diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-05-11 14:34:27 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-05-11 14:34:27 +0100 |
commit | de2f658b6bb422ec0e0fa94a49e476018602eeea (patch) | |
tree | fdf6331408573ce7e328e44524f9edc9d7ee7214 | |
parent | c88f1ffc19e38008a1c33ae039482a860aa7418c (diff) | |
parent | 7e17d50ebd359ee5fa3d65d7fdc0fe0336d60694 (diff) |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200511' into staging
target-arm queue:
aspeed: Add boot stub for smp booting
target/arm: Drop access_el3_aa32ns_aa64any()
aspeed: Support AST2600A1 silicon revision
aspeed: sdmc: Implement AST2600 locking behaviour
nrf51: Tracing cleanups
target/arm: Improve handling of SVE loads and stores
target/arm: Don't show TCG-only CPUs in KVM-only QEMU builds
hw/arm/musicpal: Map the UART devices unconditionally
target/arm: Fix tcg_gen_gvec_dup_imm vs DUP (indexed)
target/arm: Use tcg_gen_gvec_5_ptr for sve FMLA/FCMLA
# gpg: Signature made Mon 11 May 2020 14:33:14 BST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20200511: (34 commits)
target/arm: Fix tcg_gen_gvec_dup_imm vs DUP (indexed)
target/arm: Use tcg_gen_gvec_5_ptr for sve FMLA/FCMLA
hw/arm/musicpal: Map the UART devices unconditionally
target/arm: Restrict TCG cpus to TCG accel
target/arm/cpu: Restrict v8M IDAU interface to Aarch32 CPUs
target/arm/cpu: Use ARRAY_SIZE() to iterate over ARMCPUInfo[]
target/arm: Make set_feature() available for other files
target/arm/kvm: Inline set_feature() calls
target/arm: Remove sve_memopidx
target/arm: Reuse sve_probe_page for gather loads
target/arm: Reuse sve_probe_page for scatter stores
target/arm: Reuse sve_probe_page for gather first-fault loads
target/arm: Use SVEContLdSt for contiguous stores
target/arm: Update contiguous first-fault and no-fault loads
target/arm: Use SVEContLdSt for multi-register contiguous loads
target/arm: Handle watchpoints in sve_ld1_r
target/arm: Use SVEContLdSt in sve_ld1_r
target/arm: Adjust interface of sve_ld1_host_fn
target/arm: Add sve infrastructure for page lookup
target/arm: Drop manual handling of set/clear_helper_retaddr
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
33 files changed, 2979 insertions, 2252 deletions
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index e3b5750c3b..eb2cf9de5e 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1231,131 +1231,134 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, } } -/* - * Probe for whether the specified guest access is permitted. If it is not - * permitted then an exception will be taken in the same way as if this - * were a real access (and we will not return). - * If the size is 0 or the page requires I/O access, returns NULL; otherwise, - * returns the address of the host page similar to tlb_vaddr_to_host(). - */ -void *probe_access(CPUArchState *env, target_ulong addr, int size, - MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) +static int probe_access_internal(CPUArchState *env, target_ulong addr, + int fault_size, MMUAccessType access_type, + int mmu_idx, bool nonfault, + void **phost, uintptr_t retaddr) { uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr; + target_ulong tlb_addr, page_addr; size_t elt_ofs; - int wp_access; - - g_assert(-(addr | TARGET_PAGE_MASK) >= size); + int flags; switch (access_type) { case MMU_DATA_LOAD: elt_ofs = offsetof(CPUTLBEntry, addr_read); - wp_access = BP_MEM_READ; break; case MMU_DATA_STORE: elt_ofs = offsetof(CPUTLBEntry, addr_write); - wp_access = BP_MEM_WRITE; break; case MMU_INST_FETCH: elt_ofs = offsetof(CPUTLBEntry, addr_code); - wp_access = BP_MEM_READ; break; default: g_assert_not_reached(); } tlb_addr = tlb_read_ofs(entry, elt_ofs); - if (unlikely(!tlb_hit(tlb_addr, addr))) { - if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, - addr & TARGET_PAGE_MASK)) { - tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); - /* TLB resize via tlb_fill may have moved the entry. */ - index = tlb_index(env, mmu_idx, addr); + page_addr = addr & TARGET_PAGE_MASK; + if (!tlb_hit_page(tlb_addr, page_addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { + CPUState *cs = env_cpu(env); + CPUClass *cc = CPU_GET_CLASS(cs); + + if (!cc->tlb_fill(cs, addr, fault_size, access_type, + mmu_idx, nonfault, retaddr)) { + /* Non-faulting page table read failed. */ + *phost = NULL; + return TLB_INVALID_MASK; + } + + /* TLB resize via tlb_fill may have moved the entry. */ entry = tlb_entry(env, mmu_idx, addr); } tlb_addr = tlb_read_ofs(entry, elt_ofs); } + flags = tlb_addr & TLB_FLAGS_MASK; - if (!size) { - return NULL; + /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ + if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { + *phost = NULL; + return TLB_MMIO; } - if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { + /* Everything else is RAM. */ + *phost = (void *)((uintptr_t)addr + entry->addend); + return flags; +} + +int probe_access_flags(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t retaddr) +{ + int flags; + + flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, + nonfault, phost, retaddr); + + /* Handle clean RAM pages. */ + if (unlikely(flags & TLB_NOTDIRTY)) { + uintptr_t index = tlb_index(env, mmu_idx, addr); CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; - /* Reject I/O access, or other required slow-path. */ - if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { - return NULL; - } + notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); + flags &= ~TLB_NOTDIRTY; + } + + return flags; +} + +void *probe_access(CPUArchState *env, target_ulong addr, int size, + MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) +{ + void *host; + int flags; + + g_assert(-(addr | TARGET_PAGE_MASK) >= size); + + flags = probe_access_internal(env, addr, size, access_type, mmu_idx, + false, &host, retaddr); + + /* Per the interface, size == 0 merely faults the access. */ + if (size == 0) { + return NULL; + } + + if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) { + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; /* Handle watchpoints. */ - if (tlb_addr & TLB_WATCHPOINT) { + if (flags & TLB_WATCHPOINT) { + int wp_access = (access_type == MMU_DATA_STORE + ? BP_MEM_WRITE : BP_MEM_READ); cpu_check_watchpoint(env_cpu(env), addr, size, iotlbentry->attrs, wp_access, retaddr); } /* Handle clean RAM pages. */ - if (tlb_addr & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); + if (flags & TLB_NOTDIRTY) { + notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); } } - return (void *)((uintptr_t)addr + entry->addend); + return host; } void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, MMUAccessType access_type, int mmu_idx) { - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr, page; - size_t elt_ofs; + void *host; + int flags; - switch (access_type) { - case MMU_DATA_LOAD: - elt_ofs = offsetof(CPUTLBEntry, addr_read); - break; - case MMU_DATA_STORE: - elt_ofs = offsetof(CPUTLBEntry, addr_write); - break; - case MMU_INST_FETCH: - elt_ofs = offsetof(CPUTLBEntry, addr_code); - break; - default: - g_assert_not_reached(); - } + flags = probe_access_internal(env, addr, 0, access_type, + mmu_idx, true, &host, 0); - page = addr & TARGET_PAGE_MASK; - tlb_addr = tlb_read_ofs(entry, elt_ofs); - - if (!tlb_hit_page(tlb_addr, page)) { - uintptr_t index = tlb_index(env, mmu_idx, addr); - - if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { - CPUState *cs = env_cpu(env); - CPUClass *cc = CPU_GET_CLASS(cs); - - if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { - /* Non-faulting page table read failed. */ - return NULL; - } - - /* TLB resize via tlb_fill may have moved the entry. */ - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = tlb_read_ofs(entry, elt_ofs); - } - - if (tlb_addr & ~TARGET_PAGE_MASK) { - /* IO access */ - return NULL; - } - - return (void *)((uintptr_t)addr + entry->addend); + /* No combination of flags are expected by the caller. */ + return flags ? NULL : host; } - #ifdef CONFIG_PLUGIN /* * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. @@ -1769,36 +1772,54 @@ int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, full_ldub_mmu); } -uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, - MO_TE == MO_LE - ? full_le_lduw_mmu : full_be_lduw_mmu); + return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu); } -int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW, + full_be_lduw_mmu); +} + +uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu); +} + +uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, - MO_TE == MO_LE - ? full_le_lduw_mmu : full_be_lduw_mmu); + return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu); } -uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, - MO_TE == MO_LE - ? full_le_ldul_mmu : full_be_ldul_mmu); + return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu); } -uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, - MO_TE == MO_LE - ? helper_le_ldq_mmu : helper_be_ldq_mmu); + return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW, + full_le_lduw_mmu); +} + +uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu); +} + +uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu); } uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, @@ -1812,25 +1833,50 @@ int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, - uintptr_t retaddr) +uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) { - return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) { - return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) { - return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) { - return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) +{ + return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +{ + return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) +{ + return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) +{ + return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) @@ -1843,24 +1889,44 @@ int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) return cpu_ldsb_data_ra(env, ptr, 0); } -uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) +uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr) { - return cpu_lduw_data_ra(env, ptr, 0); + return cpu_lduw_be_data_ra(env, ptr, 0); } -int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) +int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr) { - return cpu_ldsw_data_ra(env, ptr, 0); + return cpu_ldsw_be_data_ra(env, ptr, 0); } -uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) +uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr) { - return cpu_ldl_data_ra(env, ptr, 0); + return cpu_ldl_be_data_ra(env, ptr, 0); } -uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) +uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr) { - return cpu_ldq_data_ra(env, ptr, 0); + return cpu_ldq_be_data_ra(env, ptr, 0); +} + +uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_lduw_le_data_ra(env, ptr, 0); +} + +int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_ldsw_le_data_ra(env, ptr, 0); +} + +uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_ldl_le_data_ra(env, ptr, 0); +} + +uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_ldq_le_data_ra(env, ptr, 0); } /* @@ -2118,22 +2184,40 @@ void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); } -void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW); } -void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL); } -void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, + int mmu_idx, uintptr_t retaddr) +{ + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ); +} + +void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW); +} + +void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) +{ + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL); +} + +void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, + int mmu_idx, uintptr_t retaddr) +{ + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ); } void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, @@ -2142,22 +2226,40 @@ void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } -void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) { - cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } -void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) { - cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } -void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, - uint64_t val, uintptr_t retaddr) +void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr, + uint64_t val, uintptr_t retaddr) { - cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); +} + +void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) +{ + cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); +} + +void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) +{ + cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); +} + +void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr, + uint64_t val, uintptr_t retaddr) +{ + cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) @@ -2165,19 +2267,34 @@ void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) cpu_stb_data_ra(env, ptr, val, 0); } -void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) +void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) +{ + cpu_stw_be_data_ra(env, ptr, val, 0); +} + +void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) +{ + cpu_stl_be_data_ra(env, ptr, val, 0); +} + +void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val) +{ + cpu_stq_be_data_ra(env, ptr, val, 0); +} + +void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) { - cpu_stw_data_ra(env, ptr, val, 0); + cpu_stw_le_data_ra(env, ptr, val, 0); } -void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) +void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) { - cpu_stl_data_ra(env, ptr, val, 0); + cpu_stl_le_data_ra(env, ptr, val, 0); } -void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) +void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) { - cpu_stq_data_ra(env, ptr, val, 0); + cpu_stq_le_data_ra(env, ptr, val, 0); } /* First set of helpers allows passing in of OI and RETADDR. This makes diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 4be78eb9b3..52359949df 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -190,13 +190,12 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, g_assert_not_reached(); } -void *probe_access(CPUArchState *env, target_ulong addr, int size, - MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) +static int probe_access_internal(CPUArchState *env, target_ulong addr, + int fault_size, MMUAccessType access_type, + bool nonfault, uintptr_t ra) { int flags; - g_assert(-(addr | TARGET_PAGE_MASK) >= size); - switch (access_type) { case MMU_DATA_STORE: flags = PAGE_WRITE; @@ -211,13 +210,39 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, g_assert_not_reached(); } - if (!guest_addr_valid(addr) || page_check_range(addr, size, flags) < 0) { - CPUState *cpu = env_cpu(env); - CPUClass *cc = CPU_GET_CLASS(cpu); - cc->tlb_fill(cpu, addr, size, access_type, MMU_USER_IDX, false, - retaddr); - g_assert_not_reached(); + if (!guest_addr_valid(addr) || page_check_range(addr, 1, flags) < 0) { + if (nonfault) { + return TLB_INVALID_MASK; + } else { + CPUState *cpu = env_cpu(env); + CPUClass *cc = CPU_GET_CLASS(cpu); + cc->tlb_fill(cpu, addr, fault_size, access_type, + MMU_USER_IDX, false, ra); + g_assert_not_reached(); + } } + return 0; +} + +int probe_access_flags(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t ra) +{ + int flags; + + flags = probe_access_internal(env, addr, 0, access_type, nonfault, ra); + *phost = flags ? NULL : g2h(addr); + return flags; +} + +void *probe_access(CPUArchState *env, target_ulong addr, int size, + MMUAccessType access_type, int mmu_idx, uintptr_t ra) +{ + int flags; + + g_assert(-(addr | TARGET_PAGE_MASK) >= size); + flags = probe_access_internal(env, addr, size, access_type, false, ra); + g_assert(flags == 0); return size ? g2h(addr) : NULL; } @@ -758,46 +783,90 @@ int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr) return ret; } -uint32_t cpu_lduw_data(CPUArchState *env, abi_ptr ptr) +uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr) { uint32_t ret; - uint16_t meminfo = trace_mem_get_info(MO_TEUW, MMU_USER_IDX, false); + uint16_t meminfo = trace_mem_get_info(MO_BEUW, MMU_USER_IDX, false); trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = lduw_p(g2h(ptr)); + ret = lduw_be_p(g2h(ptr)); qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); return ret; } -int cpu_ldsw_data(CPUArchState *env, abi_ptr ptr) +int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr) { int ret; - uint16_t meminfo = trace_mem_get_info(MO_TESW, MMU_USER_IDX, false); + uint16_t meminfo = trace_mem_get_info(MO_BESW, MMU_USER_IDX, false); trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldsw_p(g2h(ptr)); + ret = ldsw_be_p(g2h(ptr)); qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); return ret; } -uint32_t cpu_ldl_data(CPUArchState *env, abi_ptr ptr) +uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr) { uint32_t ret; - uint16_t meminfo = trace_mem_get_info(MO_TEUL, MMU_USER_IDX, false); + uint16_t meminfo = trace_mem_get_info(MO_BEUL, MMU_USER_IDX, false); trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldl_p(g2h(ptr)); + ret = ldl_be_p(g2h(ptr)); qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); return ret; } -uint64_t cpu_ldq_data(CPUArchState *env, abi_ptr ptr) +uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr) { uint64_t ret; - uint16_t meminfo = trace_mem_get_info(MO_TEQ, MMU_USER_IDX, false); + uint16_t meminfo = trace_mem_get_info(MO_BEQ, MMU_USER_IDX, false); trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldq_p(g2h(ptr)); + ret = ldq_be_p(g2h(ptr)); + qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); + return ret; +} + +uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr) +{ + uint32_t ret; + uint16_t meminfo = trace_mem_get_info(MO_LEUW, MMU_USER_IDX, false); + + trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); + ret = lduw_le_p(g2h(ptr)); + qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); + return ret; +} + +int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr) +{ + int ret; + uint16_t meminfo = trace_mem_get_info(MO_LESW, MMU_USER_IDX, false); + + trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); + ret = ldsw_le_p(g2h(ptr)); + qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); + return ret; +} + +uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr) +{ + uint32_t ret; + uint16_t meminfo = trace_mem_get_info(MO_LEUL, MMU_USER_IDX, false); + + trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); + ret = ldl_le_p(g2h(ptr)); + qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); + return ret; +} + +uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr) +{ + uint64_t ret; + uint16_t meminfo = trace_mem_get_info(MO_LEQ, MMU_USER_IDX, false); + + trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); + ret = ldq_le_p(g2h(ptr)); qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); return ret; } @@ -822,42 +891,82 @@ int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) return ret; } -uint32_t cpu_lduw_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +{ + uint32_t ret; + + set_helper_retaddr(retaddr); + ret = cpu_lduw_be_data(env, ptr); + clear_helper_retaddr(); + return ret; +} + +int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +{ + int ret; + + set_helper_retaddr(retaddr); + ret = cpu_ldsw_be_data(env, ptr); + clear_helper_retaddr(); + return ret; +} + +uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +{ + uint32_t ret; + + set_helper_retaddr(retaddr); + ret = cpu_ldl_be_data(env, ptr); + clear_helper_retaddr(); + return ret; +} + +uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +{ + uint64_t ret; + + set_helper_retaddr(retaddr); + ret = cpu_ldq_be_data(env, ptr); + clear_helper_retaddr(); + return ret; +} + +uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) { uint32_t ret; set_helper_retaddr(retaddr); - ret = cpu_lduw_data(env, ptr); + ret = cpu_lduw_le_data(env, ptr); clear_helper_retaddr(); return ret; } -int cpu_ldsw_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) { int ret; set_helper_retaddr(retaddr); - ret = cpu_ldsw_data(env, ptr); + ret = cpu_ldsw_le_data(env, ptr); clear_helper_retaddr(); return ret; } -uint32_t cpu_ldl_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) { uint32_t ret; set_helper_retaddr(retaddr); - ret = cpu_ldl_data(env, ptr); + ret = cpu_ldl_le_data(env, ptr); clear_helper_retaddr(); return ret; } -uint64_t cpu_ldq_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) { uint64_t ret; set_helper_retaddr(retaddr); - ret = cpu_ldq_data(env, ptr); + ret = cpu_ldq_le_data(env, ptr); clear_helper_retaddr(); return ret; } @@ -871,30 +980,57 @@ void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val) qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); } -void cpu_stw_data(CPUArchState *env, abi_ptr ptr, uint32_t val) +void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val) +{ + uint16_t meminfo = trace_mem_get_info(MO_BEUW, MMU_USER_IDX, true); + + trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); + stw_be_p(g2h(ptr), val); + qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); +} + +void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val) +{ + uint16_t meminfo = trace_mem_get_info(MO_BEUL, MMU_USER_IDX, true); + + trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); + stl_be_p(g2h(ptr), val); + qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); +} + +void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val) { - uint16_t meminfo = trace_mem_get_info(MO_TEUW, MMU_USER_IDX, true); + uint16_t meminfo = trace_mem_get_info(MO_BEQ, MMU_USER_IDX, true); trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - stw_p(g2h(ptr), val); + stq_be_p(g2h(ptr), val); qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); } -void cpu_stl_data(CPUArchState *env, abi_ptr ptr, uint32_t val) +void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val) { - uint16_t meminfo = trace_mem_get_info(MO_TEUL, MMU_USER_IDX, true); + uint16_t meminfo = trace_mem_get_info(MO_LEUW, MMU_USER_IDX, true); trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - stl_p(g2h(ptr), val); + stw_le_p(g2h(ptr), val); qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); } -void cpu_stq_data(CPUArchState *env, abi_ptr ptr, uint64_t val) +void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val) { - uint16_t meminfo = trace_mem_get_info(MO_TEQ, MMU_USER_IDX, true); + uint16_t meminfo = trace_mem_get_info(MO_LEUL, MMU_USER_IDX, true); trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - stq_p(g2h(ptr), val); + stl_le_p(g2h(ptr), val); + qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); +} + +void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val) +{ + uint16_t meminfo = trace_mem_get_info(MO_LEQ, MMU_USER_IDX, true); + + trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); + stq_le_p(g2h(ptr), val); qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); } @@ -906,27 +1042,51 @@ void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr, clear_helper_retaddr(); } -void cpu_stw_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t retaddr) { set_helper_retaddr(retaddr); - cpu_stw_data(env, ptr, val); + cpu_stw_be_data(env, ptr, val); clear_helper_retaddr(); } -void cpu_stl_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t retaddr) +{ + set_helper_retaddr(retaddr); + cpu_stl_be_data(env, ptr, val); + clear_helper_retaddr(); +} + +void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint64_t val, uintptr_t retaddr) +{ + set_helper_retaddr(retaddr); + cpu_stq_be_data(env, ptr, val); + clear_helper_retaddr(); +} + +void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t retaddr) +{ + set_helper_retaddr(retaddr); + cpu_stw_le_data(env, ptr, val); + clear_helper_retaddr(); +} + +void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t retaddr) { set_helper_retaddr(retaddr); - cpu_stl_data(env, ptr, val); + cpu_stl_le_data(env, ptr, val); clear_helper_retaddr(); } -void cpu_stq_data_ra(CPUArchState *env, abi_ptr ptr, - uint64_t val, uintptr_t retaddr) +void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint64_t val, uintptr_t retaddr) { set_helper_retaddr(retaddr); - cpu_stq_data(env, ptr, val); + cpu_stq_le_data(env, ptr, val); clear_helper_retaddr(); } diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst index 0d99eb24c1..9a944ef1af 100644 --- a/docs/devel/loads-stores.rst +++ b/docs/devel/loads-stores.rst @@ -97,9 +97,9 @@ function, which is a return address into the generated code. Function names follow the pattern: -load: ``cpu_ld{sign}{size}_mmuidx_ra(env, ptr, mmuidx, retaddr)`` +load: ``cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmuidx, retaddr)`` -store: ``cpu_st{size}_mmuidx_ra(env, ptr, val, mmuidx, retaddr)`` +store: ``cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmuidx, retaddr)`` ``sign`` - (empty) : for 32 or 64 bit sizes @@ -112,9 +112,14 @@ store: ``cpu_st{size}_mmuidx_ra(env, ptr, val, mmuidx, retaddr)`` - ``l`` : 32 bits - ``q`` : 64 bits +``end`` + - (empty) : for target endian, or 8 bit sizes + - ``_be`` : big endian + - ``_le`` : little endian + Regexes for git grep: - - ``\<cpu_ld[us]\?[bwlq]_mmuidx_ra\>`` - - ``\<cpu_st[bwlq]_mmuidx_ra\>`` + - ``\<cpu_ld[us]\?[bwlq](_[bl]e)\?_mmuidx_ra\>`` + - ``\<cpu_st[bwlq](_[bl]e)\?_mmuidx_ra\>`` ``cpu_{ld,st}*_data_ra`` ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -129,9 +134,9 @@ be performed with a context other than the default. Function names follow the pattern: -load: ``cpu_ld{sign}{size}_data_ra(env, ptr, ra)`` +load: ``cpu_ld{sign}{size}{end}_data_ra(env, ptr, ra)`` -store: ``cpu_st{size}_data_ra(env, ptr, val, ra)`` +store: ``cpu_st{size}{end}_data_ra(env, ptr, val, ra)`` ``sign`` - (empty) : for 32 or 64 bit sizes @@ -144,9 +149,14 @@ store: ``cpu_st{size}_data_ra(env, ptr, val, ra)`` - ``l`` : 32 bits - ``q`` : 64 bits +``end`` + - (empty) : for target endian, or 8 bit sizes + - ``_be`` : big endian + - ``_le`` : little endian + Regexes for git grep: - - ``\<cpu_ld[us]\?[bwlq]_data_ra\>`` - - ``\<cpu_st[bwlq]_data_ra\>`` + - ``\<cpu_ld[us]\?[bwlq](_[bl]e)\?_data_ra\>`` + - ``\<cpu_st[bwlq](_[bl]e)\?_data_ra\>`` ``cpu_{ld,st}*_data`` ~~~~~~~~~~~~~~~~~~~~~ @@ -163,9 +173,9 @@ the CPU state anyway. Function names follow the pattern: -load: ``cpu_ld{sign}{size}_data(env, ptr)`` +load: ``cpu_ld{sign}{size}{end}_data(env, ptr)`` -store: ``cpu_st{size}_data(env, ptr, val)`` +store: ``cpu_st{size}{end}_data(env, ptr, val)`` ``sign`` - (empty) : for 32 or 64 bit sizes @@ -178,9 +188,14 @@ store: ``cpu_st{size}_data(env, ptr, val)`` - ``l`` : 32 bits - ``q`` : 64 bits +``end`` + - (empty) : for target endian, or 8 bit sizes + - ``_be`` : big endian + - ``_le`` : little endian + Regexes for git grep - - ``\<cpu_ld[us]\?[bwlq]_data\>`` - - ``\<cpu_st[bwlq]_data\+\>`` + - ``\<cpu_ld[us]\?[bwlq](_[bl]e)\?_data\>`` + - ``\<cpu_st[bwlq](_[bl]e)\?_data\+\>`` ``cpu_ld*_code`` ~~~~~~~~~~~~~~~~ @@ -1127,7 +1127,7 @@ int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len) int ret = 0; QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { - if (watchpoint_address_matches(wp, addr, TARGET_PAGE_SIZE)) { + if (watchpoint_address_matches(wp, addr, len)) { ret |= wp->flags; } } diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index a6a2102a93..1eacb2fc17 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -93,7 +93,7 @@ struct AspeedBoardState { /* Tacoma hardware value */ #define TACOMA_BMC_HW_STRAP1 0x00000000 -#define TACOMA_BMC_HW_STRAP2 0x00000000 +#define TACOMA_BMC_HW_STRAP2 0x00000040 /* * The max ram region is for firmwares that scan the address space @@ -116,6 +116,58 @@ static const MemoryRegionOps max_ram_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; +#define AST_SMP_MAILBOX_BASE 0x1e6e2180 +#define AST_SMP_MBOX_FIELD_ENTRY (AST_SMP_MAILBOX_BASE + 0x0) +#define AST_SMP_MBOX_FIELD_GOSIGN (AST_SMP_MAILBOX_BASE + 0x4) +#define AST_SMP_MBOX_FIELD_READY (AST_SMP_MAILBOX_BASE + 0x8) +#define AST_SMP_MBOX_FIELD_POLLINSN (AST_SMP_MAILBOX_BASE + 0xc) +#define AST_SMP_MBOX_CODE (AST_SMP_MAILBOX_BASE + 0x10) +#define AST_SMP_MBOX_GOSIGN 0xabbaab00 + +static void aspeed_write_smpboot(ARMCPU *cpu, + const struct arm_boot_info *info) +{ + static const uint32_t poll_mailbox_ready[] = { + /* + * r2 = per-cpu go sign value + * r1 = AST_SMP_MBOX_FIELD_ENTRY + * r0 = AST_SMP_MBOX_FIELD_GOSIGN + */ + 0xee100fb0, /* mrc p15, 0, r0, c0, c0, 5 */ + 0xe21000ff, /* ands r0, r0, #255 */ + 0xe59f201c, /* ldr r2, [pc, #28] */ + 0xe1822000, /* orr r2, r2, r0 */ + + 0xe59f1018, /* ldr r1, [pc, #24] */ + 0xe59f0018, /* ldr r0, [pc, #24] */ + + 0xe320f002, /* wfe */ + 0xe5904000, /* ldr r4, [r0] */ + 0xe1520004, /* cmp r2, r4 */ + 0x1afffffb, /* bne <wfe> */ + 0xe591f000, /* ldr pc, [r1] */ + AST_SMP_MBOX_GOSIGN, + AST_SMP_MBOX_FIELD_ENTRY, + AST_SMP_MBOX_FIELD_GOSIGN, + }; + + rom_add_blob_fixed("aspeed.smpboot", poll_mailbox_ready, + sizeof(poll_mailbox_ready), + info->smp_loader_start); +} + +static void aspeed_reset_secondary(ARMCPU *cpu, + const struct arm_boot_info *info) +{ + AddressSpace *as = arm_boot_address_space(cpu, info); + CPUState *cs = CPU(cpu); + + /* info->smp_bootreg_addr */ + address_space_stl_notdirty(as, AST_SMP_MBOX_FIELD_GOSIGN, 0, + MEMTXATTRS_UNSPECIFIED, NULL); + cpu_set_pc(cs, info->smp_loader_start); +} + #define FIRMWARE_ADDR 0x0 static void write_boot_rom(DriveInfo *dinfo, hwaddr addr, size_t rom_size, @@ -270,6 +322,19 @@ static void aspeed_machine_init(MachineState *machine) } } + if (machine->kernel_filename && bmc->soc.num_cpus > 1) { + /* With no u-boot we must set up a boot stub for the secondary CPU */ + MemoryRegion *smpboot = g_new(MemoryRegion, 1); + memory_region_init_ram(smpboot, OBJECT(bmc), "aspeed.smpboot", + 0x80, &error_abort); + memory_region_add_subregion(get_system_memory(), + AST_SMP_MAILBOX_BASE, smpboot); + + aspeed_board_binfo.write_secondary_boot = aspeed_write_smpboot; + aspeed_board_binfo.secondary_cpu_reset_hook = aspeed_reset_secondary; + aspeed_board_binfo.smp_loader_start = AST_SMP_MBOX_CODE; + } + aspeed_board_binfo.ram_size = ram_size; aspeed_board_binfo.loader_start = sc->memmap[ASPEED_SDRAM]; aspeed_board_binfo.nb_cpus = bmc->soc.num_cpus; @@ -520,7 +585,7 @@ static void aspeed_machine_ast2600_evb_class_init(ObjectClass *oc, void *data) AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); mc->desc = "Aspeed AST2600 EVB (Cortex A7)"; - amc->soc_name = "ast2600-a0"; + amc->soc_name = "ast2600-a1"; amc->hw_strap1 = AST2600_EVB_HW_STRAP1; amc->hw_strap2 = AST2600_EVB_HW_STRAP2; amc->fmc_model = "w25q512jv"; @@ -535,8 +600,8 @@ static void aspeed_machine_tacoma_class_init(ObjectClass *oc, void *data) MachineClass *mc = MACHINE_CLASS(oc); AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); - mc->desc = "Aspeed AST2600 EVB (Cortex A7)"; - amc->soc_name = "ast2600-a0"; + mc->desc = "OpenPOWER Tacoma BMC (Cortex A7)"; + amc->soc_name = "ast2600-a1"; amc->hw_strap1 = TACOMA_BMC_HW_STRAP1; amc->hw_strap2 = TACOMA_BMC_HW_STRAP2; amc->fmc_model = "mx66l1g45g"; diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c index 1a869e09b9..c6e0ab84ac 100644 --- a/hw/arm/aspeed_ast2600.c +++ b/hw/arm/aspeed_ast2600.c @@ -557,9 +557,9 @@ static void aspeed_soc_ast2600_class_init(ObjectClass *oc, void *data) dc->realize = aspeed_soc_ast2600_realize; - sc->name = "ast2600-a0"; + sc->name = "ast2600-a1"; sc->cpu_type = ARM_CPU_TYPE_NAME("cortex-a7"); - sc->silicon_rev = AST2600_A0_SILICON_REV; + sc->silicon_rev = AST2600_A1_SILICON_REV; sc->sram_size = 0x10000; sc->spis_num = 2; sc->ehcis_num = 2; @@ -571,7 +571,7 @@ static void aspeed_soc_ast2600_class_init(ObjectClass *oc, void *data) } static const TypeInfo aspeed_soc_ast2600_type_info = { - .name = "ast2600-a0", + .name = "ast2600-a1", .parent = TYPE_ASPEED_SOC, .instance_size = sizeof(AspeedSoCState), .instance_init = aspeed_soc_ast2600_init, diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c index b2d0cfdac8..92f33ed87e 100644 --- a/hw/arm/musicpal.c +++ b/hw/arm/musicpal.c @@ -1619,14 +1619,10 @@ static void musicpal_init(MachineState *machine) pic[MP_TIMER2_IRQ], pic[MP_TIMER3_IRQ], pic[MP_TIMER4_IRQ], NULL); - if (serial_hd(0)) { - serial_mm_init(address_space_mem, MP_UART1_BASE, 2, pic[MP_UART1_IRQ], - 1825000, serial_hd(0), DEVICE_NATIVE_ENDIAN); - } - if (serial_hd(1)) { - serial_mm_init(address_space_mem, MP_UART2_BASE, 2, pic[MP_UART2_IRQ], - 1825000, serial_hd(1), DEVICE_NATIVE_ENDIAN); - } + serial_mm_init(address_space_mem, MP_UART1_BASE, 2, pic[MP_UART1_IRQ], + 1825000, serial_hd(0), DEVICE_NATIVE_ENDIAN); + serial_mm_init(address_space_mem, MP_UART2_BASE, 2, pic[MP_UART2_IRQ], + 1825000, serial_hd(1), DEVICE_NATIVE_ENDIAN); /* Register flash */ dinfo = drive_get(IF_PFLASH, 0, 0); diff --git a/hw/arm/nrf51_soc.c b/hw/arm/nrf51_soc.c index 57eff63f0d..71309e53cc 100644 --- a/hw/arm/nrf51_soc.c +++ b/hw/arm/nrf51_soc.c @@ -150,13 +150,18 @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp) /* TIMER */ for (i = 0; i < NRF51_NUM_TIMERS; i++) { + object_property_set_uint(OBJECT(&s->timer[i]), i, "id", &err); + if (err) { + error_propagate(errp, err); + return; + } object_property_set_bool(OBJECT(&s->timer[i]), true, "realized", &err); if (err) { error_propagate(errp, err); return; } - base_addr = NRF51_TIMER_BASE + i * NRF51_TIMER_SIZE; + base_addr = NRF51_TIMER_BASE + i * NRF51_PERIPHERAL_SIZE; sysbus_mmio_map(SYS_BUS_DEVICE(&s->timer[i]), 0, base_addr); sysbus_connect_irq(SYS_BUS_DEVICE(&s->timer[i]), 0, @@ -166,7 +171,7 @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp) /* STUB Peripherals */ memory_region_init_io(&s->clock, OBJECT(dev_soc), &clock_ops, NULL, - "nrf51_soc.clock", 0x1000); + "nrf51_soc.clock", NRF51_PERIPHERAL_SIZE); memory_region_add_subregion_overlap(&s->container, NRF51_IOMEM_BASE, &s->clock, -1); diff --git a/hw/i2c/microbit_i2c.c b/hw/i2c/microbit_i2c.c index 4661f05253..8024739820 100644 --- a/hw/i2c/microbit_i2c.c +++ b/hw/i2c/microbit_i2c.c @@ -100,7 +100,7 @@ static void microbit_i2c_realize(DeviceState *dev, Error **errp) MicrobitI2CState *s = MICROBIT_I2C(dev); memory_region_init_io(&s->iomem, OBJECT(s), µbit_i2c_ops, s, - "microbit.twi", NRF51_TWI_SIZE); + "microbit.twi", NRF51_PERIPHERAL_SIZE); sysbus_init_mmio(sbd, &s->iomem); } diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c index 9d7482a9df..ec4fef900e 100644 --- a/hw/misc/aspeed_scu.c +++ b/hw/misc/aspeed_scu.c @@ -431,6 +431,7 @@ static uint32_t aspeed_silicon_revs[] = { AST2500_A0_SILICON_REV, AST2500_A1_SILICON_REV, AST2600_A0_SILICON_REV, + AST2600_A1_SILICON_REV, }; bool is_supported_silicon_rev(uint32_t silicon_rev) @@ -649,12 +650,10 @@ static const MemoryRegionOps aspeed_ast2600_scu_ops = { .valid.unaligned = false, }; -static const uint32_t ast2600_a0_resets[ASPEED_AST2600_SCU_NR_REGS] = { - [AST2600_SILICON_REV] = AST2600_SILICON_REV, - [AST2600_SILICON_REV2] = AST2600_SILICON_REV, - [AST2600_SYS_RST_CTRL] = 0xF7CFFEDC | 0x100, +static const uint32_t ast2600_a1_resets[ASPEED_AST2600_SCU_NR_REGS] = { + [AST2600_SYS_RST_CTRL] = 0xF7C3FED8, [AST2600_SYS_RST_CTRL2] = 0xFFFFFFFC, - [AST2600_CLK_STOP_CTRL] = 0xEFF43E8B, + [AST2600_CLK_STOP_CTRL] = 0xFFFF7F8A, [AST2600_CLK_STOP_CTRL2] = 0xFFF0FFF0, [AST2600_SDRAM_HANDSHAKE] = 0x00000040, /* SoC completed DRAM init */ [AST2600_HPLL_PARAM] = 0x1000405F, @@ -684,7 +683,7 @@ static void aspeed_2600_scu_class_init(ObjectClass *klass, void *data) dc->desc = "ASPEED 2600 System Control Unit"; dc->reset = aspeed_ast2600_scu_reset; - asc->resets = ast2600_a0_resets; + asc->resets = ast2600_a1_resets; asc->calc_hpll = aspeed_2500_scu_calc_hpll; /* No change since AST2500 */ asc->apb_divider = 4; asc->nr_regs = ASPEED_AST2600_SCU_NR_REGS; diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c index 7b466bf19a..14db9cfc1f 100644 --- a/hw/misc/aspeed_sdmc.c +++ b/hw/misc/aspeed_sdmc.c @@ -23,7 +23,12 @@ /* Protection Key Register */ #define R_PROT (0x00 / 4) +#define PROT_UNLOCKED 0x01 +#define PROT_HARDLOCKED 0x10 /* AST2600 */ +#define PROT_SOFTLOCKED 0x00 + #define PROT_KEY_UNLOCK 0xFC600309 +#define PROT_KEY_HARDLOCK 0xDEADDEAD /* AST2600 */ /* Configuration Register */ #define R_CONF (0x04 / 4) @@ -130,16 +135,6 @@ static void aspeed_sdmc_write(void *opaque, hwaddr addr, uint64_t data, return; } - if (addr == R_PROT) { - s->regs[addr] = (data == PROT_KEY_UNLOCK) ? 1 : 0; - return; - } - - if (!s->regs[R_PROT]) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: SDMC is locked!\n", __func__); - return; - } - asc->write(s, addr, data); } @@ -320,6 +315,16 @@ static uint32_t aspeed_2400_sdmc_compute_conf(AspeedSDMCState *s, uint32_t data) static void aspeed_2400_sdmc_write(AspeedSDMCState *s, uint32_t reg, uint32_t data) { + if (reg == R_PROT) { + s->regs[reg] = (data == PROT_KEY_UNLOCK) ? PROT_UNLOCKED : PROT_SOFTLOCKED; + return; + } + + if (!s->regs[R_PROT]) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: SDMC is locked!\n", __func__); + return; + } + switch (reg) { case R_CONF: data = aspeed_2400_sdmc_compute_conf(s, data); @@ -368,6 +373,16 @@ static uint32_t aspeed_2500_sdmc_compute_conf(AspeedSDMCState *s, uint32_t data) static void aspeed_2500_sdmc_write(AspeedSDMCState *s, uint32_t reg, uint32_t data) { + if (reg == R_PROT) { + s->regs[reg] = (data == PROT_KEY_UNLOCK) ? PROT_UNLOCKED : PROT_SOFTLOCKED; + return; + } + + if (!s->regs[R_PROT]) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: SDMC is locked!\n", __func__); + return; + } + switch (reg) { case R_CONF: data = aspeed_2500_sdmc_compute_conf(s, data); @@ -424,7 +439,27 @@ static uint32_t aspeed_2600_sdmc_compute_conf(AspeedSDMCState *s, uint32_t data) static void aspeed_2600_sdmc_write(AspeedSDMCState *s, uint32_t reg, uint32_t data) { + if (s->regs[R_PROT] == PROT_HARDLOCKED) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: SDMC is locked until system reset!\n", + __func__); + return; + } + + if (reg != R_PROT && s->regs[R_PROT] == PROT_SOFTLOCKED) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: SDMC is locked!\n", __func__); + return; + } + switch (reg) { + case R_PROT: + if (data == PROT_KEY_UNLOCK) { + data = PROT_UNLOCKED; + } else if (data == PROT_KEY_HARDLOCK) { + data = PROT_HARDLOCKED; + } else { + data = PROT_SOFTLOCKED; + } + break; case R_CONF: data = aspeed_2600_sdmc_compute_conf(s, data); break; diff --git a/hw/timer/nrf51_timer.c b/hw/timer/nrf51_timer.c index e04046eb15..42be79c736 100644 --- a/hw/timer/nrf51_timer.c +++ b/hw/timer/nrf51_timer.c @@ -17,6 +17,7 @@ #include "hw/arm/nrf51.h" #include "hw/irq.h" #include "hw/timer/nrf51_timer.h" +#include "hw/qdev-properties.h" #include "migration/vmstate.h" #include "trace.h" @@ -185,7 +186,7 @@ static uint64_t nrf51_timer_read(void *opaque, hwaddr offset, unsigned int size) __func__, offset); } - trace_nrf51_timer_read(offset, r, size); + trace_nrf51_timer_read(s->id, offset, r, size); return r; } @@ -197,7 +198,7 @@ static void nrf51_timer_write(void *opaque, hwaddr offset, uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); size_t idx; - trace_nrf51_timer_write(offset, value, size); + trace_nrf51_timer_write(s->id, offset, value, size); switch (offset) { case NRF51_TIMER_TASK_START: @@ -239,6 +240,7 @@ static void nrf51_timer_write(void *opaque, hwaddr offset, idx = (offset - NRF51_TIMER_TASK_CAPTURE_0) / 4; s->cc[idx] = s->counter; + trace_nrf51_timer_set_count(s->id, idx, s->counter); } break; case NRF51_TIMER_EVENT_COMPARE_0 ... NRF51_TIMER_EVENT_COMPARE_3: @@ -313,7 +315,7 @@ static void nrf51_timer_init(Object *obj) SysBusDevice *sbd = SYS_BUS_DEVICE(obj); memory_region_init_io(&s->iomem, obj, &rng_ops, s, - TYPE_NRF51_TIMER, NRF51_TIMER_SIZE); + TYPE_NRF51_TIMER, NRF51_PERIPHERAL_SIZE); sysbus_init_mmio(sbd, &s->iomem); sysbus_init_irq(sbd, &s->irq); @@ -372,12 +374,18 @@ static const VMStateDescription vmstate_nrf51_timer = { } }; +static Property nrf51_timer_properties[] = { + DEFINE_PROP_UINT8("id", NRF51TimerState, id, 0), + DEFINE_PROP_END_OF_LIST(), +}; + static void nrf51_timer_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->reset = nrf51_timer_reset; dc->vmsd = &vmstate_nrf51_timer; + device_class_set_props(dc, nrf51_timer_properties); } static const TypeInfo nrf51_timer_info = { diff --git a/hw/timer/trace-events b/hw/timer/trace-events index 29fda7870e..80ea197594 100644 --- a/hw/timer/trace-events +++ b/hw/timer/trace-events @@ -67,8 +67,9 @@ cmsdk_apb_dualtimer_write(uint64_t offset, uint64_t data, unsigned size) "CMSDK cmsdk_apb_dualtimer_reset(void) "CMSDK APB dualtimer: reset" # nrf51_timer.c -nrf51_timer_read(uint64_t addr, uint32_t value, unsigned size) "read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" -nrf51_timer_write(uint64_t addr, uint32_t value, unsigned size) "write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" +nrf51_timer_read(uint8_t timer_id, uint64_t addr, uint32_t value, unsigned size) "timer %u read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" +nrf51_timer_write(uint8_t timer_id, uint64_t addr, uint32_t value, unsigned size) "timer %u write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" +nrf51_timer_set_count(uint8_t timer_id, uint8_t counter_id, uint32_t value) "timer %u counter %u count 0x%" PRIx32 # bcm2835_systmr.c bcm2835_systmr_irq(bool enable) "timer irq state %u" diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 49384bb66a..43ddcf024c 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -328,7 +328,18 @@ CPUArchState *cpu_copy(CPUArchState *env); | CPU_INTERRUPT_TGT_EXT_3 \ | CPU_INTERRUPT_TGT_EXT_4) -#if !defined(CONFIG_USER_ONLY) +#ifdef CONFIG_USER_ONLY + +/* + * Allow some level of source compatibility with softmmu. We do not + * support any of the more exotic features, so only invalid pages may + * be signaled by probe_access_flags(). + */ +#define TLB_INVALID_MASK (1 << (TARGET_PAGE_BITS_MIN - 1)) +#define TLB_MMIO 0 +#define TLB_WATCHPOINT 0 + +#else /* * Flags stored in the low bits of the TLB virtual address. diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 53de19753a..c14a48f65e 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -25,13 +25,13 @@ * * The syntax for the accessors is: * - * load: cpu_ld{sign}{size}_{mmusuffix}(env, ptr) - * cpu_ld{sign}{size}_{mmusuffix}_ra(env, ptr, retaddr) - * cpu_ld{sign}{size}_mmuidx_ra(env, ptr, mmu_idx, retaddr) + * load: cpu_ld{sign}{size}{end}_{mmusuffix}(env, ptr) + * cpu_ld{sign}{size}{end}_{mmusuffix}_ra(env, ptr, retaddr) + * cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmu_idx, retaddr) * - * store: cpu_st{size}_{mmusuffix}(env, ptr, val) - * cpu_st{size}_{mmusuffix}_ra(env, ptr, val, retaddr) - * cpu_st{size}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr) + * store: cpu_st{size}{end}_{mmusuffix}(env, ptr, val) + * cpu_st{size}{end}_{mmusuffix}_ra(env, ptr, val, retaddr) + * cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr) * * sign is: * (empty): for 32 and 64 bit sizes @@ -44,6 +44,11 @@ * l: 32 bits * q: 64 bits * + * end is: + * (empty): for target native endian, or for 8 bit access + * _be: for forced big endian + * _le: for forced little endian + * * mmusuffix is one of the generic suffixes "data" or "code", or "mmuidx". * The "mmuidx" suffix carries an extra mmu_idx argument that specifies * the index to use; the "data" and "code" suffixes take the index from @@ -95,32 +100,57 @@ typedef target_ulong abi_ptr; #endif uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr); -uint32_t cpu_lduw_data(CPUArchState *env, abi_ptr ptr); -uint32_t cpu_ldl_data(CPUArchState *env, abi_ptr ptr); -uint64_t cpu_ldq_data(CPUArchState *env, abi_ptr ptr); int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr); -int cpu_ldsw_data(CPUArchState *env, abi_ptr ptr); -uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -uint32_t cpu_lduw_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -uint32_t cpu_ldl_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -uint64_t cpu_ldq_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -int cpu_ldsw_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); +uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr); +int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr); +uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr); +uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr); + +uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr); +int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr); +uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr); +uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr); + +uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); + +uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); + +uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val); -void cpu_stw_data(CPUArchState *env, abi_ptr ptr, uint32_t val); -void cpu_stl_data(CPUArchState *env, abi_ptr ptr, uint32_t val); -void cpu_stq_data(CPUArchState *env, abi_ptr ptr, uint64_t val); + +void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val); + +void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val); void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr); -void cpu_stw_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr); -void cpu_stl_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr); -void cpu_stq_data_ra(CPUArchState *env, abi_ptr ptr, - uint64_t val, uintptr_t retaddr); + uint32_t val, uintptr_t ra); + +void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint64_t val, uintptr_t ra); + +void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint64_t val, uintptr_t ra); #if defined(CONFIG_USER_ONLY) @@ -157,34 +187,58 @@ static inline uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, return cpu_ldub_data_ra(env, addr, ra); } -static inline uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +static inline int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_lduw_data_ra(env, addr, ra); + return cpu_ldsb_data_ra(env, addr, ra); } -static inline uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +static inline uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_ldl_data_ra(env, addr, ra); + return cpu_lduw_be_data_ra(env, addr, ra); } -static inline uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +static inline int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_ldq_data_ra(env, addr, ra); + return cpu_ldsw_be_data_ra(env, addr, ra); } -static inline int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +static inline uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_ldsb_data_ra(env, addr, ra); + return cpu_ldl_be_data_ra(env, addr, ra); } -static inline int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +static inline uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldq_be_data_ra(env, addr, ra); +} + +static inline uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_lduw_le_data_ra(env, addr, ra); +} + +static inline int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldsw_le_data_ra(env, addr, ra); +} + +static inline uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_ldl_le_data_ra(env, addr, ra); +} + +static inline uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_ldsw_data_ra(env, addr, ra); + return cpu_ldq_le_data_ra(env, addr, ra); } static inline void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, @@ -193,22 +247,46 @@ static inline void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, cpu_stb_data_ra(env, addr, val, ra); } -static inline void cpu_stw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, int mmu_idx, uintptr_t ra) +static inline void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, int mmu_idx, + uintptr_t ra) { - cpu_stw_data_ra(env, addr, val, ra); + cpu_stw_be_data_ra(env, addr, val, ra); } -static inline void cpu_stl_mmuidx_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, int mmu_idx, uintptr_t ra) +static inline void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, int mmu_idx, + uintptr_t ra) +{ + cpu_stl_be_data_ra(env, addr, val, ra); +} + +static inline void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint64_t val, int mmu_idx, + uintptr_t ra) +{ + cpu_stq_be_data_ra(env, addr, val, ra); +} + +static inline void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, int mmu_idx, + uintptr_t ra) +{ + cpu_stw_le_data_ra(env, addr, val, ra); +} + +static inline void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, int mmu_idx, + uintptr_t ra) { - cpu_stl_data_ra(env, addr, val, ra); + cpu_stl_le_data_ra(env, addr, val, ra); } -static inline void cpu_stq_mmuidx_ra(CPUArchState *env, abi_ptr addr, - uint64_t val, int mmu_idx, uintptr_t ra) +static inline void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + uint64_t val, int mmu_idx, + uintptr_t ra) { - cpu_stq_data_ra(env, addr, val, ra); + cpu_stq_le_data_ra(env, addr, val, ra); } #else @@ -243,29 +321,92 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, int mmu_idx, uintptr_t ra); -uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); - int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, int mmu_idx, uintptr_t ra); -int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); + +uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); + +uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, int mmu_idx, uintptr_t retaddr); -void cpu_stw_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); -void cpu_stl_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); -void cpu_stq_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, - int mmu_idx, uintptr_t retaddr); + +void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, + int mmu_idx, uintptr_t retaddr); + +void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, + int mmu_idx, uintptr_t retaddr); #endif /* defined(CONFIG_USER_ONLY) */ +#ifdef TARGET_WORDS_BIGENDIAN +# define cpu_lduw_data cpu_lduw_be_data +# define cpu_ldsw_data cpu_ldsw_be_data +# define cpu_ldl_data cpu_ldl_be_data +# define cpu_ldq_data cpu_ldq_be_data +# define cpu_lduw_data_ra cpu_lduw_be_data_ra +# define cpu_ldsw_data_ra cpu_ldsw_be_data_ra +# define cpu_ldl_data_ra cpu_ldl_be_data_ra +# define cpu_ldq_data_ra cpu_ldq_be_data_ra +# define cpu_lduw_mmuidx_ra cpu_lduw_be_mmuidx_ra +# define cpu_ldsw_mmuidx_ra cpu_ldsw_be_mmuidx_ra +# define cpu_ldl_mmuidx_ra cpu_ldl_be_mmuidx_ra +# define cpu_ldq_mmuidx_ra cpu_ldq_be_mmuidx_ra +# define cpu_stw_data cpu_stw_be_data +# define cpu_stl_data cpu_stl_be_data +# define cpu_stq_data cpu_stq_be_data +# define cpu_stw_data_ra cpu_stw_be_data_ra +# define cpu_stl_data_ra cpu_stl_be_data_ra +# define cpu_stq_data_ra cpu_stq_be_data_ra +# define cpu_stw_mmuidx_ra cpu_stw_be_mmuidx_ra +# define cpu_stl_mmuidx_ra cpu_stl_be_mmuidx_ra +# define cpu_stq_mmuidx_ra cpu_stq_be_mmuidx_ra +#else +# define cpu_lduw_data cpu_lduw_le_data +# define cpu_ldsw_data cpu_ldsw_le_data +# define cpu_ldl_data cpu_ldl_le_data +# define cpu_ldq_data cpu_ldq_le_data +# define cpu_lduw_data_ra cpu_lduw_le_data_ra +# define cpu_ldsw_data_ra cpu_ldsw_le_data_ra +# define cpu_ldl_data_ra cpu_ldl_le_data_ra +# define cpu_ldq_data_ra cpu_ldq_le_data_ra +# define cpu_lduw_mmuidx_ra cpu_lduw_le_mmuidx_ra +# define cpu_ldsw_mmuidx_ra cpu_ldsw_le_mmuidx_ra +# define cpu_ldl_mmuidx_ra cpu_ldl_le_mmuidx_ra +# define cpu_ldq_mmuidx_ra cpu_ldq_le_mmuidx_ra +# define cpu_stw_data cpu_stw_le_data +# define cpu_stl_data cpu_stl_le_data +# define cpu_stq_data cpu_stq_le_data +# define cpu_stw_data_ra cpu_stw_le_data_ra +# define cpu_stl_data_ra cpu_stl_le_data_ra +# define cpu_stq_data_ra cpu_stq_le_data_ra +# define cpu_stw_mmuidx_ra cpu_stw_le_mmuidx_ra +# define cpu_stl_mmuidx_ra cpu_stl_le_mmuidx_ra +# define cpu_stq_mmuidx_ra cpu_stq_le_mmuidx_ra +#endif + uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr); uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr); uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 350c4b451b..8792bea07a 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -330,6 +330,23 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, { } #endif +/** + * probe_access: + * @env: CPUArchState + * @addr: guest virtual address to look up + * @size: size of the access + * @access_type: read, write or execute permission + * @mmu_idx: MMU index to use for lookup + * @retaddr: return address for unwinding + * + * Look up the guest virtual address @addr. Raise an exception if the + * page does not satisfy @access_type. Raise an exception if the + * access (@addr, @size) hits a watchpoint. For writes, mark a clean + * page as dirty. + * + * Finally, return the host address for a page that is backed by RAM, + * or NULL if the page requires I/O. + */ void *probe_access(CPUArchState *env, target_ulong addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr); @@ -345,6 +362,28 @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size, return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); } +/** + * probe_access_flags: + * @env: CPUArchState + * @addr: guest virtual address to look up + * @access_type: read, write or execute permission + * @mmu_idx: MMU index to use for lookup + * @nonfault: suppress the fault + * @phost: return value for host address + * @retaddr: return address for unwinding + * + * Similar to probe_access, loosely returning the TLB_FLAGS_MASK for + * the page, and storing the host address for RAM in @phost. + * + * If @nonfault is set, do not raise an exception but return TLB_INVALID_MASK. + * Do not handle watchpoints, but include TLB_WATCHPOINT in the returned flags. + * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags. + * For simplicity, all "mmio-like" flags are folded to TLB_MMIO. + */ +int probe_access_flags(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t retaddr); + #define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */ /* Estimated block size for TB allocation. */ diff --git a/include/hw/arm/nrf51.h b/include/hw/arm/nrf51.h index 1008fee6c9..de836beaa4 100644 --- a/include/hw/arm/nrf51.h +++ b/include/hw/arm/nrf51.h @@ -24,11 +24,10 @@ #define NRF51_IOMEM_BASE 0x40000000 #define NRF51_IOMEM_SIZE 0x20000000 +#define NRF51_PERIPHERAL_SIZE 0x00001000 #define NRF51_UART_BASE 0x40002000 #define NRF51_TWI_BASE 0x40003000 -#define NRF51_TWI_SIZE 0x00001000 #define NRF51_TIMER_BASE 0x40008000 -#define NRF51_TIMER_SIZE 0x00001000 #define NRF51_RNG_BASE 0x4000D000 #define NRF51_NVMC_BASE 0x4001E000 #define NRF51_GPIO_BASE 0x50000000 diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 5bf94d28cf..07f7698155 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -1100,8 +1100,31 @@ int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len, int flags); void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint); void cpu_watchpoint_remove_all(CPUState *cpu, int mask); + +/** + * cpu_check_watchpoint: + * @cpu: cpu context + * @addr: guest virtual address + * @len: access length + * @attrs: memory access attributes + * @flags: watchpoint access type + * @ra: unwind return address + * + * Check for a watchpoint hit in [addr, addr+len) of the type + * specified by @flags. Exit via exception with a hit. + */ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, MemTxAttrs attrs, int flags, uintptr_t ra); + +/** + * cpu_watchpoint_address_matches: + * @cpu: cpu context + * @addr: guest virtual address + * @len: access length + * + * Return the watchpoint flags that apply to [addr, addr+len). + * If no watchpoint is registered for the range, the result is 0. + */ int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len); #endif diff --git a/include/hw/i2c/microbit_i2c.h b/include/hw/i2c/microbit_i2c.h index aad636127e..2bff36680c 100644 --- a/include/hw/i2c/microbit_i2c.h +++ b/include/hw/i2c/microbit_i2c.h @@ -29,7 +29,7 @@ #define MICROBIT_I2C(obj) \ OBJECT_CHECK(MicrobitI2CState, (obj), TYPE_MICROBIT_I2C) -#define MICROBIT_I2C_NREGS (NRF51_TWI_SIZE / sizeof(uint32_t)) +#define MICROBIT_I2C_NREGS (NRF51_PERIPHERAL_SIZE / sizeof(uint32_t)) typedef struct { SysBusDevice parent_obj; diff --git a/include/hw/misc/aspeed_scu.h b/include/hw/misc/aspeed_scu.h index 1d7f7ffc15..a6739bb846 100644 --- a/include/hw/misc/aspeed_scu.h +++ b/include/hw/misc/aspeed_scu.h @@ -41,6 +41,7 @@ typedef struct AspeedSCUState { #define AST2500_A0_SILICON_REV 0x04000303U #define AST2500_A1_SILICON_REV 0x04010303U #define AST2600_A0_SILICON_REV 0x05000303U +#define AST2600_A1_SILICON_REV 0x05010303U #define ASPEED_IS_AST2500(si_rev) ((((si_rev) >> 24) & 0xff) == 0x04) diff --git a/include/hw/timer/nrf51_timer.h b/include/hw/timer/nrf51_timer.h index 85cad2300d..eb6815f21d 100644 --- a/include/hw/timer/nrf51_timer.h +++ b/include/hw/timer/nrf51_timer.h @@ -59,6 +59,7 @@ typedef struct NRF51TimerState { MemoryRegion iomem; qemu_irq irq; + uint8_t id; QEMUTimer timer; int64_t timer_start_ns; int64_t update_counter_ns; diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs index 775b3e24f2..83febd232c 100644 --- a/target/arm/Makefile.objs +++ b/target/arm/Makefile.objs @@ -79,6 +79,7 @@ obj-y += translate.o op_helper.o obj-y += crypto_helper.o obj-y += iwmmxt_helper.o vec_helper.o neon_helper.o obj-y += m_helper.o +obj-y += cpu_tcg.o obj-$(CONFIG_SOFTMMU) += psci.o diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 5d64adfe76..3794f0dbc4 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -574,32 +574,6 @@ bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) return true; } -#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) -static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) -{ - CPUClass *cc = CPU_GET_CLASS(cs); - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - bool ret = false; - - /* - * ARMv7-M interrupt masking works differently than -A or -R. - * There is no FIQ/IRQ distinction. Instead of I and F bits - * masking FIQ and IRQ interrupts, an exception is taken only - * if it is higher priority than the current execution priority - * (which depends on state like BASEPRI, FAULTMASK and the - * currently active exception). - */ - if (interrupt_request & CPU_INTERRUPT_HARD - && (armv7m_nvic_can_take_pending_exception(env->nvic))) { - cs->exception_index = EXCP_IRQ; - cc->do_interrupt(cs); - ret = true; - } - return ret; -} -#endif - void arm_cpu_update_virq(ARMCPU *cpu) { /* @@ -725,16 +699,6 @@ static bool arm_cpu_virtio_is_big_endian(CPUState *cs) #endif -static inline void set_feature(CPUARMState *env, int feature) -{ - env->features |= 1ULL << feature; -} - -static inline void unset_feature(CPUARMState *env, int feature) -{ - env->features &= ~(1ULL << feature); -} - static int print_insn_thumb1(bfd_vma pc, disassemble_info *info) { @@ -1830,406 +1794,6 @@ static ObjectClass *arm_cpu_class_by_name(const char *cpu_model) /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) -static void arm926_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "arm,arm926"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); - set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); - cpu->midr = 0x41069265; - cpu->reset_fpsid = 0x41011090; - cpu->ctr = 0x1dd20d2; - cpu->reset_sctlr = 0x00090078; - - /* - * ARMv5 does not have the ID_ISAR registers, but we can still - * set the field to indicate Jazelle support within QEMU. - */ - cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); - /* - * Similarly, we need to set MVFR0 fields to enable vfp and short vector - * support even though ARMv5 doesn't have this register. - */ - cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); - cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1); - cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); -} - -static void arm946_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "arm,arm946"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_PMSA); - set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); - cpu->midr = 0x41059461; - cpu->ctr = 0x0f004006; - cpu->reset_sctlr = 0x00000078; -} - -static void arm1026_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "arm,arm1026"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_AUXCR); - set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); - set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); - cpu->midr = 0x4106a262; - cpu->reset_fpsid = 0x410110a0; - cpu->ctr = 0x1dd20d2; - cpu->reset_sctlr = 0x00090078; - cpu->reset_auxcr = 1; - - /* - * ARMv5 does not have the ID_ISAR registers, but we can still - * set the field to indicate Jazelle support within QEMU. - */ - cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); - /* - * Similarly, we need to set MVFR0 fields to enable vfp and short vector - * support even though ARMv5 doesn't have this register. - */ - cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); - cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1); - cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); - - { - /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */ - ARMCPRegInfo ifar = { - .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1, - .access = PL1_RW, - .fieldoffset = offsetof(CPUARMState, cp15.ifar_ns), - .resetvalue = 0 - }; - define_one_arm_cp_reg(cpu, &ifar); - } -} - -static void arm1136_r2_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - /* - * What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an - * older core than plain "arm1136". In particular this does not - * have the v6K features. - * These ID register values are correct for 1136 but may be wrong - * for 1136_r2 (in particular r0p2 does not actually implement most - * of the ID registers). - */ - - cpu->dtb_compatible = "arm,arm1136"; - set_feature(&cpu->env, ARM_FEATURE_V6); - set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); - set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG); - set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); - cpu->midr = 0x4107b362; - cpu->reset_fpsid = 0x410120b4; - cpu->isar.mvfr0 = 0x11111111; - cpu->isar.mvfr1 = 0x00000000; - cpu->ctr = 0x1dd20d2; - cpu->reset_sctlr = 0x00050078; - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0x2; - cpu->id_afr0 = 0x3; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222110; - cpu->isar.id_isar0 = 0x00140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231111; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; - cpu->reset_auxcr = 7; -} - -static void arm1136_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "arm,arm1136"; - set_feature(&cpu->env, ARM_FEATURE_V6K); - set_feature(&cpu->env, ARM_FEATURE_V6); - set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); - set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG); - set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); - cpu->midr = 0x4117b363; - cpu->reset_fpsid = 0x410120b4; - cpu->isar.mvfr0 = 0x11111111; - cpu->isar.mvfr1 = 0x00000000; - cpu->ctr = 0x1dd20d2; - cpu->reset_sctlr = 0x00050078; - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0x2; - cpu->id_afr0 = 0x3; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222110; - cpu->isar.id_isar0 = 0x00140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231111; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; - cpu->reset_auxcr = 7; -} - -static void arm1176_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "arm,arm1176"; - set_feature(&cpu->env, ARM_FEATURE_V6K); - set_feature(&cpu->env, ARM_FEATURE_VAPA); - set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); - set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG); - set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); - set_feature(&cpu->env, ARM_FEATURE_EL3); - cpu->midr = 0x410fb767; - cpu->reset_fpsid = 0x410120b5; - cpu->isar.mvfr0 = 0x11111111; - cpu->isar.mvfr1 = 0x00000000; - cpu->ctr = 0x1dd20d2; - cpu->reset_sctlr = 0x00050078; - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x33; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222100; - cpu->isar.id_isar0 = 0x0140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231121; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x01141; - cpu->reset_auxcr = 7; -} - -static void arm11mpcore_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "arm,arm11mpcore"; - set_feature(&cpu->env, ARM_FEATURE_V6K); - set_feature(&cpu->env, ARM_FEATURE_VAPA); - set_feature(&cpu->env, ARM_FEATURE_MPIDR); - set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); - cpu->midr = 0x410fb022; - cpu->reset_fpsid = 0x410120b4; - cpu->isar.mvfr0 = 0x11111111; - cpu->isar.mvfr1 = 0x00000000; - cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0; - cpu->id_afr0 = 0x2; - cpu->isar.id_mmfr0 = 0x01100103; - cpu->isar.id_mmfr1 = 0x10020302; - cpu->isar.id_mmfr2 = 0x01222000; - cpu->isar.id_isar0 = 0x00100011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11221011; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; - cpu->reset_auxcr = 1; -} - -static void cortex_m0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - set_feature(&cpu->env, ARM_FEATURE_V6); - set_feature(&cpu->env, ARM_FEATURE_M); - - cpu->midr = 0x410cc200; -} - -static void cortex_m3_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_M); - set_feature(&cpu->env, ARM_FEATURE_M_MAIN); - cpu->midr = 0x410fc231; - cpu->pmsav7_dregion = 8; - cpu->id_pfr0 = 0x00000030; - cpu->id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; -} - -static void cortex_m4_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_M); - set_feature(&cpu->env, ARM_FEATURE_M_MAIN); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); - cpu->midr = 0x410fc240; /* r0p0 */ - cpu->pmsav7_dregion = 8; - cpu->isar.mvfr0 = 0x10110021; - cpu->isar.mvfr1 = 0x11000011; - cpu->isar.mvfr2 = 0x00000000; - cpu->id_pfr0 = 0x00000030; - cpu->id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; -} - -static void cortex_m7_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_M); - set_feature(&cpu->env, ARM_FEATURE_M_MAIN); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); - cpu->midr = 0x411fc272; /* r1p2 */ - cpu->pmsav7_dregion = 8; - cpu->isar.mvfr0 = 0x10110221; - cpu->isar.mvfr1 = 0x12000011; - cpu->isar.mvfr2 = 0x00000040; - cpu->id_pfr0 = 0x00000030; - cpu->id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00100030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02112000; - cpu->isar.id_isar2 = 0x20232231; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; -} - -static void cortex_m33_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_M); - set_feature(&cpu->env, ARM_FEATURE_M_MAIN); - set_feature(&cpu->env, ARM_FEATURE_M_SECURITY); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); - cpu->midr = 0x410fd213; /* r0p3 */ - cpu->pmsav7_dregion = 16; - cpu->sau_sregion = 8; - cpu->isar.mvfr0 = 0x10110021; - cpu->isar.mvfr1 = 0x11000011; - cpu->isar.mvfr2 = 0x00000040; - cpu->id_pfr0 = 0x00000030; - cpu->id_pfr1 = 0x00000210; - cpu->isar.id_dfr0 = 0x00200000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00101F40; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02212000; - cpu->isar.id_isar2 = 0x20232232; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; - cpu->clidr = 0x00000000; - cpu->ctr = 0x8000c000; -} - -static void arm_v7m_class_init(ObjectClass *oc, void *data) -{ - ARMCPUClass *acc = ARM_CPU_CLASS(oc); - CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -#ifndef CONFIG_USER_ONLY - cc->do_interrupt = arm_v7m_cpu_do_interrupt; -#endif - - cc->cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt; -} - -static const ARMCPRegInfo cortexr5_cp_reginfo[] = { - /* Dummy the TCM region regs for the moment */ - { .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0, - .access = PL1_RW, .type = ARM_CP_CONST }, - { .name = "BTCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1, - .access = PL1_RW, .type = ARM_CP_CONST }, - { .name = "DCACHE_INVAL", .cp = 15, .opc1 = 0, .crn = 15, .crm = 5, - .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP }, - REGINFO_SENTINEL -}; - -static void cortex_r5_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_V7MP); - set_feature(&cpu->env, ARM_FEATURE_PMSA); - set_feature(&cpu->env, ARM_FEATURE_PMU); - cpu->midr = 0x411fc153; /* r1p3 */ - cpu->id_pfr0 = 0x0131; - cpu->id_pfr1 = 0x001; - cpu->isar.id_dfr0 = 0x010400; - cpu->id_afr0 = 0x0; - cpu->isar.id_mmfr0 = 0x0210030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01200000; - cpu->isar.id_mmfr3 = 0x0211; - cpu->isar.id_isar0 = 0x02101111; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232141; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x0010142; - cpu->isar.id_isar5 = 0x0; - cpu->isar.id_isar6 = 0x0; - cpu->mp_is_up = true; - cpu->pmsav7_dregion = 16; - define_arm_cp_regs(cpu, cortexr5_cp_reginfo); -} - -static void cortex_r5f_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cortex_r5_initfn(obj); - cpu->isar.mvfr0 = 0x10110221; - cpu->isar.mvfr1 = 0x00000011; -} - static const ARMCPRegInfo cortexa8_cp_reginfo[] = { { .name = "L2LOCKDOWN", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 0, .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, @@ -2456,174 +2020,6 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } -static void ti925t_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - set_feature(&cpu->env, ARM_FEATURE_V4T); - set_feature(&cpu->env, ARM_FEATURE_OMAPCP); - cpu->midr = ARM_CPUID_TI925T; - cpu->ctr = 0x5109149; - cpu->reset_sctlr = 0x00000070; -} - -static void sa1100_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "intel,sa1100"; - set_feature(&cpu->env, ARM_FEATURE_STRONGARM); - set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); - cpu->midr = 0x4401A11B; - cpu->reset_sctlr = 0x00000070; -} - -static void sa1110_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - set_feature(&cpu->env, ARM_FEATURE_STRONGARM); - set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); - cpu->midr = 0x6901B119; - cpu->reset_sctlr = 0x00000070; -} - -static void pxa250_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052100; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa255_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052d00; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa260_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052903; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa261_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052d05; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa262_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052d06; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270a0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054110; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270a1_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054111; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270b0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054112; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270b1_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054113; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270c0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054114; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270c5_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054117; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - #ifndef TARGET_AARCH64 /* -cpu max: if KVM is enabled, like -cpu host (best possible with this host); * otherwise, a CPU with as many features enabled as our emulation supports. @@ -2698,50 +2094,10 @@ static void arm_max_initfn(Object *obj) static const ARMCPUInfo arm_cpus[] = { #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) - { .name = "arm926", .initfn = arm926_initfn }, - { .name = "arm946", .initfn = arm946_initfn }, - { .name = "arm1026", .initfn = arm1026_initfn }, - /* - * What QEMU calls "arm1136-r2" is actually the 1136 r0p2, i.e. an - * older core than plain "arm1136". In particular this does not - * have the v6K features. - */ - { .name = "arm1136-r2", .initfn = arm1136_r2_initfn }, - { .name = "arm1136", .initfn = arm1136_initfn }, - { .name = "arm1176", .initfn = arm1176_initfn }, - { .name = "arm11mpcore", .initfn = arm11mpcore_initfn }, - { .name = "cortex-m0", .initfn = cortex_m0_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m3", .initfn = cortex_m3_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m4", .initfn = cortex_m4_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m7", .initfn = cortex_m7_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m33", .initfn = cortex_m33_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-r5", .initfn = cortex_r5_initfn }, - { .name = "cortex-r5f", .initfn = cortex_r5f_initfn }, { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, { .name = "cortex-a15", .initfn = cortex_a15_initfn }, - { .name = "ti925t", .initfn = ti925t_initfn }, - { .name = "sa1100", .initfn = sa1100_initfn }, - { .name = "sa1110", .initfn = sa1110_initfn }, - { .name = "pxa250", .initfn = pxa250_initfn }, - { .name = "pxa255", .initfn = pxa255_initfn }, - { .name = "pxa260", .initfn = pxa260_initfn }, - { .name = "pxa261", .initfn = pxa261_initfn }, - { .name = "pxa262", .initfn = pxa262_initfn }, - /* "pxa270" is an alias for "pxa270-a0" */ - { .name = "pxa270", .initfn = pxa270a0_initfn }, - { .name = "pxa270-a0", .initfn = pxa270a0_initfn }, - { .name = "pxa270-a1", .initfn = pxa270a1_initfn }, - { .name = "pxa270-b0", .initfn = pxa270b0_initfn }, - { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, #ifndef TARGET_AARCH64 { .name = "max", .initfn = arm_max_initfn }, #endif @@ -2749,7 +2105,6 @@ static const ARMCPUInfo arm_cpus[] = { { .name = "any", .initfn = arm_max_initfn }, #endif #endif - { .name = NULL } }; static Property arm_cpu_properties[] = { @@ -2897,19 +2252,22 @@ static const TypeInfo idau_interface_type_info = { static void arm_cpu_register_types(void) { - const ARMCPUInfo *info = arm_cpus; + const size_t cpu_count = ARRAY_SIZE(arm_cpus); type_register_static(&arm_cpu_type_info); - type_register_static(&idau_interface_type_info); - - while (info->name) { - arm_cpu_register(info); - info++; - } #ifdef CONFIG_KVM type_register_static(&host_arm_cpu_type_info); #endif + + if (cpu_count) { + size_t i; + + type_register_static(&idau_interface_type_info); + for (i = 0; i < cpu_count; ++i) { + arm_cpu_register(&arm_cpus[i]); + } + } } type_init(arm_cpu_register_types) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 8608da6b6f..676f216b67 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -696,6 +696,16 @@ typedef struct CPUARMState { void *gicv3state; } CPUARMState; +static inline void set_feature(CPUARMState *env, int feature) +{ + env->features |= 1ULL << feature; +} + +static inline void unset_feature(CPUARMState *env, int feature) +{ + env->features &= ~(1ULL << feature); +} + /** * ARMELChangeHookFn: * type of a function which can be registered via arm_register_el_change_hook() diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 9bdf75b1ab..f5c49ee32d 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -29,16 +29,6 @@ #include "kvm_arm.h" #include "qapi/visitor.h" -static inline void set_feature(CPUARMState *env, int feature) -{ - env->features |= 1ULL << feature; -} - -static inline void unset_feature(CPUARMState *env, int feature) -{ - env->features &= ~(1ULL << feature); -} - #ifndef CONFIG_USER_ONLY static uint64_t a57_a53_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) { @@ -744,7 +734,6 @@ static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, { .name = "max", .initfn = aarch64_max_initfn }, - { .name = NULL } }; static bool aarch64_cpu_get_aarch64(Object *obj, Error **errp) @@ -850,13 +839,12 @@ static const TypeInfo aarch64_cpu_type_info = { static void aarch64_cpu_register_types(void) { - const ARMCPUInfo *info = aarch64_cpus; + size_t i; type_register_static(&aarch64_cpu_type_info); - while (info->name) { - aarch64_cpu_register(info); - info++; + for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) { + aarch64_cpu_register(&aarch64_cpus[i]); } } diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c new file mode 100644 index 0000000000..591baef535 --- /dev/null +++ b/target/arm/cpu_tcg.c @@ -0,0 +1,664 @@ +/* + * QEMU ARM TCG CPUs. + * + * Copyright (c) 2012 SUSE LINUX Products GmbH + * + * This code is licensed under the GNU GPL v2 or later. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" + +/* CPU models. These are not needed for the AArch64 linux-user build. */ +#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + +static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + CPUClass *cc = CPU_GET_CLASS(cs); + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + bool ret = false; + + /* + * ARMv7-M interrupt masking works differently than -A or -R. + * There is no FIQ/IRQ distinction. Instead of I and F bits + * masking FIQ and IRQ interrupts, an exception is taken only + * if it is higher priority than the current execution priority + * (which depends on state like BASEPRI, FAULTMASK and the + * currently active exception). + */ + if (interrupt_request & CPU_INTERRUPT_HARD + && (armv7m_nvic_can_take_pending_exception(env->nvic))) { + cs->exception_index = EXCP_IRQ; + cc->do_interrupt(cs); + ret = true; + } + return ret; +} + +static void arm926_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm926"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); + cpu->midr = 0x41069265; + cpu->reset_fpsid = 0x41011090; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00090078; + + /* + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ + cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + /* + * Similarly, we need to set MVFR0 fields to enable vfp and short vector + * support even though ARMv5 doesn't have this register. + */ + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1); + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); +} + +static void arm946_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm946"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_PMSA); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + cpu->midr = 0x41059461; + cpu->ctr = 0x0f004006; + cpu->reset_sctlr = 0x00000078; +} + +static void arm1026_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm1026"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_AUXCR); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); + cpu->midr = 0x4106a262; + cpu->reset_fpsid = 0x410110a0; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00090078; + cpu->reset_auxcr = 1; + + /* + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ + cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + /* + * Similarly, we need to set MVFR0 fields to enable vfp and short vector + * support even though ARMv5 doesn't have this register. + */ + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1); + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); + + { + /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */ + ARMCPRegInfo ifar = { + .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.ifar_ns), + .resetvalue = 0 + }; + define_one_arm_cp_reg(cpu, &ifar); + } +} + +static void arm1136_r2_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + /* + * What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an + * older core than plain "arm1136". In particular this does not + * have the v6K features. + * These ID register values are correct for 1136 but may be wrong + * for 1136_r2 (in particular r0p2 does not actually implement most + * of the ID registers). + */ + + cpu->dtb_compatible = "arm,arm1136"; + set_feature(&cpu->env, ARM_FEATURE_V6); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG); + set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); + cpu->midr = 0x4107b362; + cpu->reset_fpsid = 0x410120b4; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; + cpu->isar.id_dfr0 = 0x2; + cpu->id_afr0 = 0x3; + cpu->isar.id_mmfr0 = 0x01130003; + cpu->isar.id_mmfr1 = 0x10030302; + cpu->isar.id_mmfr2 = 0x01222110; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; + cpu->reset_auxcr = 7; +} + +static void arm1136_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm1136"; + set_feature(&cpu->env, ARM_FEATURE_V6K); + set_feature(&cpu->env, ARM_FEATURE_V6); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG); + set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); + cpu->midr = 0x4117b363; + cpu->reset_fpsid = 0x410120b4; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; + cpu->isar.id_dfr0 = 0x2; + cpu->id_afr0 = 0x3; + cpu->isar.id_mmfr0 = 0x01130003; + cpu->isar.id_mmfr1 = 0x10030302; + cpu->isar.id_mmfr2 = 0x01222110; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; + cpu->reset_auxcr = 7; +} + +static void arm1176_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm1176"; + set_feature(&cpu->env, ARM_FEATURE_V6K); + set_feature(&cpu->env, ARM_FEATURE_VAPA); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG); + set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); + set_feature(&cpu->env, ARM_FEATURE_EL3); + cpu->midr = 0x410fb767; + cpu->reset_fpsid = 0x410120b5; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x11; + cpu->isar.id_dfr0 = 0x33; + cpu->id_afr0 = 0; + cpu->isar.id_mmfr0 = 0x01130003; + cpu->isar.id_mmfr1 = 0x10030302; + cpu->isar.id_mmfr2 = 0x01222100; + cpu->isar.id_isar0 = 0x0140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231121; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x01141; + cpu->reset_auxcr = 7; +} + +static void arm11mpcore_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm11mpcore"; + set_feature(&cpu->env, ARM_FEATURE_V6K); + set_feature(&cpu->env, ARM_FEATURE_VAPA); + set_feature(&cpu->env, ARM_FEATURE_MPIDR); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + cpu->midr = 0x410fb022; + cpu->reset_fpsid = 0x410120b4; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; + cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; + cpu->isar.id_dfr0 = 0; + cpu->id_afr0 = 0x2; + cpu->isar.id_mmfr0 = 0x01100103; + cpu->isar.id_mmfr1 = 0x10020302; + cpu->isar.id_mmfr2 = 0x01222000; + cpu->isar.id_isar0 = 0x00100011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11221011; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; + cpu->reset_auxcr = 1; +} + +static void cortex_m0_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + set_feature(&cpu->env, ARM_FEATURE_V6); + set_feature(&cpu->env, ARM_FEATURE_M); + + cpu->midr = 0x410cc200; +} + +static void cortex_m3_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_M); + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); + cpu->midr = 0x410fc231; + cpu->pmsav7_dregion = 8; + cpu->id_pfr0 = 0x00000030; + cpu->id_pfr1 = 0x00000200; + cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00000030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x00000000; + cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; +} + +static void cortex_m4_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_M); + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); + set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); + cpu->midr = 0x410fc240; /* r0p0 */ + cpu->pmsav7_dregion = 8; + cpu->isar.mvfr0 = 0x10110021; + cpu->isar.mvfr1 = 0x11000011; + cpu->isar.mvfr2 = 0x00000000; + cpu->id_pfr0 = 0x00000030; + cpu->id_pfr1 = 0x00000200; + cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00000030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x00000000; + cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; +} + +static void cortex_m7_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_M); + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); + set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); + cpu->midr = 0x411fc272; /* r1p2 */ + cpu->pmsav7_dregion = 8; + cpu->isar.mvfr0 = 0x10110221; + cpu->isar.mvfr1 = 0x12000011; + cpu->isar.mvfr2 = 0x00000040; + cpu->id_pfr0 = 0x00000030; + cpu->id_pfr1 = 0x00000200; + cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00100030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x01000000; + cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01101110; + cpu->isar.id_isar1 = 0x02112000; + cpu->isar.id_isar2 = 0x20232231; + cpu->isar.id_isar3 = 0x01111131; + cpu->isar.id_isar4 = 0x01310132; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; +} + +static void cortex_m33_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_M); + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); + set_feature(&cpu->env, ARM_FEATURE_M_SECURITY); + set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); + cpu->midr = 0x410fd213; /* r0p3 */ + cpu->pmsav7_dregion = 16; + cpu->sau_sregion = 8; + cpu->isar.mvfr0 = 0x10110021; + cpu->isar.mvfr1 = 0x11000011; + cpu->isar.mvfr2 = 0x00000040; + cpu->id_pfr0 = 0x00000030; + cpu->id_pfr1 = 0x00000210; + cpu->isar.id_dfr0 = 0x00200000; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00101F40; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x01000000; + cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01101110; + cpu->isar.id_isar1 = 0x02212000; + cpu->isar.id_isar2 = 0x20232232; + cpu->isar.id_isar3 = 0x01111131; + cpu->isar.id_isar4 = 0x01310132; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; + cpu->clidr = 0x00000000; + cpu->ctr = 0x8000c000; +} + +static const ARMCPRegInfo cortexr5_cp_reginfo[] = { + /* Dummy the TCM region regs for the moment */ + { .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST }, + { .name = "BTCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST }, + { .name = "DCACHE_INVAL", .cp = 15, .opc1 = 0, .crn = 15, .crm = 5, + .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP }, + REGINFO_SENTINEL +}; + +static void cortex_r5_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_V7MP); + set_feature(&cpu->env, ARM_FEATURE_PMSA); + set_feature(&cpu->env, ARM_FEATURE_PMU); + cpu->midr = 0x411fc153; /* r1p3 */ + cpu->id_pfr0 = 0x0131; + cpu->id_pfr1 = 0x001; + cpu->isar.id_dfr0 = 0x010400; + cpu->id_afr0 = 0x0; + cpu->isar.id_mmfr0 = 0x0210030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x01200000; + cpu->isar.id_mmfr3 = 0x0211; + cpu->isar.id_isar0 = 0x02101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232141; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x0010142; + cpu->isar.id_isar5 = 0x0; + cpu->isar.id_isar6 = 0x0; + cpu->mp_is_up = true; + cpu->pmsav7_dregion = 16; + define_arm_cp_regs(cpu, cortexr5_cp_reginfo); +} + +static void cortex_r5f_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cortex_r5_initfn(obj); + cpu->isar.mvfr0 = 0x10110221; + cpu->isar.mvfr1 = 0x00000011; +} + +static void ti925t_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + set_feature(&cpu->env, ARM_FEATURE_V4T); + set_feature(&cpu->env, ARM_FEATURE_OMAPCP); + cpu->midr = ARM_CPUID_TI925T; + cpu->ctr = 0x5109149; + cpu->reset_sctlr = 0x00000070; +} + +static void sa1100_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "intel,sa1100"; + set_feature(&cpu->env, ARM_FEATURE_STRONGARM); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + cpu->midr = 0x4401A11B; + cpu->reset_sctlr = 0x00000070; +} + +static void sa1110_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + set_feature(&cpu->env, ARM_FEATURE_STRONGARM); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + cpu->midr = 0x6901B119; + cpu->reset_sctlr = 0x00000070; +} + +static void pxa250_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + cpu->midr = 0x69052100; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa255_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + cpu->midr = 0x69052d00; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa260_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + cpu->midr = 0x69052903; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa261_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + cpu->midr = 0x69052d05; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa262_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + cpu->midr = 0x69052d06; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270a0_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054110; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270a1_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054111; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270b0_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054112; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270b1_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054113; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270c0_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054114; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270c5_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054117; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void arm_v7m_class_init(ObjectClass *oc, void *data) +{ + ARMCPUClass *acc = ARM_CPU_CLASS(oc); + CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; +#ifndef CONFIG_USER_ONLY + cc->do_interrupt = arm_v7m_cpu_do_interrupt; +#endif + + cc->cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt; +} + +static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, + /* + * What QEMU calls "arm1136-r2" is actually the 1136 r0p2, i.e. an + * older core than plain "arm1136". In particular this does not + * have the v6K features. + */ + { .name = "arm1136-r2", .initfn = arm1136_r2_initfn }, + { .name = "arm1136", .initfn = arm1136_initfn }, + { .name = "arm1176", .initfn = arm1176_initfn }, + { .name = "arm11mpcore", .initfn = arm11mpcore_initfn }, + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m4", .initfn = cortex_m4_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m7", .initfn = cortex_m7_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m33", .initfn = cortex_m33_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-r5", .initfn = cortex_r5_initfn }, + { .name = "cortex-r5f", .initfn = cortex_r5f_initfn }, + { .name = "ti925t", .initfn = ti925t_initfn }, + { .name = "sa1100", .initfn = sa1100_initfn }, + { .name = "sa1110", .initfn = sa1110_initfn }, + { .name = "pxa250", .initfn = pxa250_initfn }, + { .name = "pxa255", .initfn = pxa255_initfn }, + { .name = "pxa260", .initfn = pxa260_initfn }, + { .name = "pxa261", .initfn = pxa261_initfn }, + { .name = "pxa262", .initfn = pxa262_initfn }, + /* "pxa270" is an alias for "pxa270-a0" */ + { .name = "pxa270", .initfn = pxa270a0_initfn }, + { .name = "pxa270-a0", .initfn = pxa270a0_initfn }, + { .name = "pxa270-a1", .initfn = pxa270a1_initfn }, + { .name = "pxa270-b0", .initfn = pxa270b0_initfn }, + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, +}; + +static void arm_tcg_cpu_register_types(void) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(arm_tcg_cpus); ++i) { + arm_cpu_register(&arm_tcg_cpus[i]); + } +} + +type_init(arm_tcg_cpu_register_types) + +#endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 2f47279155..7a200755ac 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1099,25 +1099,40 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/target/arm/helper.c b/target/arm/helper.c index a94f650795..b88d27819d 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -503,35 +503,19 @@ void init_cpreg_list(ARMCPU *cpu) } /* - * Some registers are not accessible if EL3.NS=0 and EL3 is using AArch32 but - * they are accessible when EL3 is using AArch64 regardless of EL3.NS. - * - * access_el3_aa32ns: Used to check AArch32 register views. - * access_el3_aa32ns_aa64any: Used to check both AArch32/64 register views. + * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0. */ static CPAccessResult access_el3_aa32ns(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - bool secure = arm_is_secure_below_el3(env); - - assert(!arm_el_is_aa64(env, 3)); - if (secure) { + if (!is_a64(env) && arm_current_el(env) == 3 && + arm_is_secure_below_el3(env)) { return CP_ACCESS_TRAP_UNCATEGORIZED; } return CP_ACCESS_OK; } -static CPAccessResult access_el3_aa32ns_aa64any(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - if (!arm_el_is_aa64(env, 3)) { - return access_el3_aa32ns(env, ri, isread); - } - return CP_ACCESS_OK; -} - /* Some secure-only AArch32 registers trap to EL3 if used from * Secure EL1 (but are just ordinary UNDEF in other non-EL3 contexts). * Note that an access from Secure EL1 can only happen if EL3 is AArch64. @@ -5147,7 +5131,7 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "VTCR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "VTTBR", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 6, .crm = 2, @@ -5195,7 +5179,7 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "HPFAR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3, @@ -7537,12 +7521,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) ARMCPRegInfo vpidr_regs[] = { { .name = "VPIDR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_CONST, .resetvalue = cpu->midr, .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) }, { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_NO_RAW, .writefn = arm_cp_write_ignore, .readfn = mpidr_read }, REGINFO_SENTINEL diff --git a/target/arm/internals.h b/target/arm/internals.h index e633aff36e..a833e3941d 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -979,11 +979,6 @@ static inline int arm_num_ctx_cmps(ARMCPU *cpu) } } -/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3. - * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits. - */ -#define MEMOPIDX_SHIFT 8 - /** * v7m_using_psp: Return true if using process stack pointer * Return true if the CPU is currently using the process stack diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index f271181ab8..7b3a19e9ae 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -22,11 +22,6 @@ #include "internals.h" #include "qemu/log.h" -static inline void set_feature(uint64_t *features, int feature) -{ - *features |= 1ULL << feature; -} - static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) { struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; @@ -146,14 +141,14 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * timers; this in turn implies most of the other feature * bits, but a few must be tested. */ - set_feature(&features, ARM_FEATURE_V7VE); - set_feature(&features, ARM_FEATURE_GENERIC_TIMER); + features |= 1ULL << ARM_FEATURE_V7VE; + features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; if (extract32(id_pfr0, 12, 4) == 1) { - set_feature(&features, ARM_FEATURE_THUMB2EE); + features |= 1ULL << ARM_FEATURE_THUMB2EE; } if (extract32(ahcf->isar.mvfr1, 12, 4) == 1) { - set_feature(&features, ARM_FEATURE_NEON); + features |= 1ULL << ARM_FEATURE_NEON; } ahcf->features = features; diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index be5b31c2b0..cd8ab6b8ae 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -447,16 +447,6 @@ void kvm_arm_pmu_set_irq(CPUState *cs, int irq) } } -static inline void set_feature(uint64_t *features, int feature) -{ - *features |= 1ULL << feature; -} - -static inline void unset_feature(uint64_t *features, int feature) -{ - *features &= ~(1ULL << feature); -} - static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) { uint64_t ret; @@ -648,11 +638,11 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * with VFPv4+Neon; this in turn implies most of the other * feature bits. */ - set_feature(&features, ARM_FEATURE_V8); - set_feature(&features, ARM_FEATURE_NEON); - set_feature(&features, ARM_FEATURE_AARCH64); - set_feature(&features, ARM_FEATURE_PMU); - set_feature(&features, ARM_FEATURE_GENERIC_TIMER); + features |= 1ULL << ARM_FEATURE_V8; + features |= 1ULL << ARM_FEATURE_NEON; + features |= 1ULL << ARM_FEATURE_AARCH64; + features |= 1ULL << ARM_FEATURE_PMU; + features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; ahcf->features = features; @@ -802,7 +792,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (cpu->has_pmu) { cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; } else { - unset_feature(&env->features, ARM_FEATURE_PMU); + env->features &= ~(1ULL << ARM_FEATURE_PMU); } if (cpu_isar_feature(aa64_sve, cpu)) { assert(kvm_arm_sve_supported(cs)); diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index fdfa652094..0da254d402 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -1630,7 +1630,7 @@ void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc) } } -/* Big-endian hosts need to frob the byte indicies. If the copy +/* Big-endian hosts need to frob the byte indices. If the copy * happens to be 8-byte aligned, then no frobbing necessary. */ static void swap_memmove(void *vd, void *vs, size_t n) @@ -3372,23 +3372,11 @@ DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64) #undef DO_ZPZ_FP -/* 4-operand predicated multiply-add. This requires 7 operands to pass - * "properly", so we need to encode some of the registers into DESC. - */ -QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 20 > 32); - -static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc, +static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, uint16_t neg1, uint16_t neg3) { intptr_t i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; do { @@ -3401,45 +3389,42 @@ static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc, e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; e2 = *(uint16_t *)(vm + H1_2(i)); e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; - r = float16_muladd(e1, e2, e3, 0, &env->vfp.fp_status_f16); + r = float16_muladd(e1, e2, e3, 0, status); *(uint16_t *)(vd + H1_2(i)) = r; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0, 0); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); } -void HELPER(sve_fmls_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0x8000, 0); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); } -void HELPER(sve_fnmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0x8000, 0x8000); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); } -void HELPER(sve_fnmls_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0, 0x8000); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); } -static void do_fmla_zpzzz_s(CPUARMState *env, void *vg, uint32_t desc, +static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, uint32_t neg1, uint32_t neg3) { intptr_t i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; do { @@ -3452,45 +3437,42 @@ static void do_fmla_zpzzz_s(CPUARMState *env, void *vg, uint32_t desc, e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; e2 = *(uint32_t *)(vm + H1_4(i)); e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; - r = float32_muladd(e1, e2, e3, 0, &env->vfp.fp_status); + r = float32_muladd(e1, e2, e3, 0, status); *(uint32_t *)(vd + H1_4(i)) = r; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0, 0); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); } -void HELPER(sve_fmls_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0x80000000, 0); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); } -void HELPER(sve_fnmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0x80000000, 0x80000000); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); } -void HELPER(sve_fnmls_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0, 0x80000000); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); } -static void do_fmla_zpzzz_d(CPUARMState *env, void *vg, uint32_t desc, +static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, uint64_t neg1, uint64_t neg3) { intptr_t i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; do { @@ -3503,31 +3485,35 @@ static void do_fmla_zpzzz_d(CPUARMState *env, void *vg, uint32_t desc, e1 = *(uint64_t *)(vn + i) ^ neg1; e2 = *(uint64_t *)(vm + i); e3 = *(uint64_t *)(va + i) ^ neg3; - r = float64_muladd(e1, e2, e3, 0, &env->vfp.fp_status); + r = float64_muladd(e1, e2, e3, 0, status); *(uint64_t *)(vd + i) = r; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, 0, 0); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); } -void HELPER(sve_fmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, INT64_MIN, 0); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); } -void HELPER(sve_fnmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, INT64_MIN, INT64_MIN); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); } -void HELPER(sve_fnmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, 0, INT64_MIN); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); } /* Two operand floating-point comparison controlled by a predicate. @@ -3809,22 +3795,13 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, * FP Complex Multiply */ -QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 22 > 32); - -void HELPER(sve_fcmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2); + unsigned rot = simd_data(desc); bool flip = rot & 1; float16 neg_imag, neg_real; - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; neg_imag = float16_set_sign(0, (rot & 2) != 0); @@ -3851,32 +3828,25 @@ void HELPER(sve_fcmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) if (likely((pg >> (i & 63)) & 1)) { d = *(float16 *)(va + H1_2(i)); - d = float16_muladd(e2, e1, d, 0, &env->vfp.fp_status_f16); + d = float16_muladd(e2, e1, d, 0, status); *(float16 *)(vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float16 *)(va + H1_2(j)); - d = float16_muladd(e4, e3, d, 0, &env->vfp.fp_status_f16); + d = float16_muladd(e4, e3, d, 0, status); *(float16 *)(vd + H1_2(j)) = d; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fcmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2); + unsigned rot = simd_data(desc); bool flip = rot & 1; float32 neg_imag, neg_real; - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; neg_imag = float32_set_sign(0, (rot & 2) != 0); @@ -3903,32 +3873,25 @@ void HELPER(sve_fcmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) if (likely((pg >> (i & 63)) & 1)) { d = *(float32 *)(va + H1_2(i)); - d = float32_muladd(e2, e1, d, 0, &env->vfp.fp_status); + d = float32_muladd(e2, e1, d, 0, status); *(float32 *)(vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float32 *)(va + H1_2(j)); - d = float32_muladd(e4, e3, d, 0, &env->vfp.fp_status); + d = float32_muladd(e4, e3, d, 0, status); *(float32 *)(vd + H1_2(j)) = d; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2); + unsigned rot = simd_data(desc); bool flip = rot & 1; float64 neg_imag, neg_real; - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; neg_imag = float64_set_sign(0, (rot & 2) != 0); @@ -3955,12 +3918,12 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) if (likely((pg >> (i & 63)) & 1)) { d = *(float64 *)(va + H1_2(i)); - d = float64_muladd(e2, e1, d, 0, &env->vfp.fp_status); + d = float64_muladd(e2, e1, d, 0, status); *(float64 *)(vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float64 *)(va + H1_2(j)); - d = float64_muladd(e4, e3, d, 0, &env->vfp.fp_status); + d = float64_muladd(e4, e3, d, 0, status); *(float64 *)(vd + H1_2(j)) = d; } } while (i & 63); @@ -3972,71 +3935,50 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) */ /* - * Load elements into @vd, controlled by @vg, from @host + @mem_ofs. - * Memory is valid through @host + @mem_max. The register element - * indicies are inferred from @mem_ofs, as modified by the types for - * which the helper is built. Return the @mem_ofs of the first element - * not loaded (which is @mem_max if they are all loaded). - * - * For softmmu, we have fully validated the guest page. For user-only, - * we cannot fully validate without taking the mmap lock, but since we - * know the access is within one host page, if any access is valid they - * all must be valid. However, when @vg is all false, it may be that - * no access is valid. + * Load one element into @vd + @reg_off from @host. + * The controlling predicate is known to be true. */ -typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host, - intptr_t mem_ofs, intptr_t mem_max); +typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host); /* * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). * The controlling predicate is known to be true. */ -typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, - target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra); -typedef sve_ld1_tlb_fn sve_st1_tlb_fn; +typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, uintptr_t retaddr); /* * Generate the above primitives. */ #define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ -static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host, \ - intptr_t mem_off, const intptr_t mem_max) \ -{ \ - intptr_t reg_off = mem_off * (sizeof(TYPEE) / sizeof(TYPEM)); \ - uint64_t *pg = vg; \ - while (mem_off + sizeof(TYPEM) <= mem_max) { \ - TYPEM val = 0; \ - if (likely((pg[reg_off >> 6] >> (reg_off & 63)) & 1)) { \ - val = HOST(host + mem_off); \ - } \ - *(TYPEE *)(vd + H(reg_off)) = val; \ - mem_off += sizeof(TYPEM), reg_off += sizeof(TYPEE); \ - } \ - return mem_off; \ -} - -#ifdef CONFIG_SOFTMMU -#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ +{ \ + TYPEM val = HOST(host); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ +} + +#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \ +static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ +{ HOST(host, (TYPEM)*(TYPEE *)(vd + H(reg_off))); } + +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \ static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ + target_ulong addr, uintptr_t ra) \ { \ - TYPEM val = TLB(env, addr, oi, ra); \ - *(TYPEE *)(vd + H(reg_off)) = val; \ + *(TYPEE *)(vd + H(reg_off)) = (TYPEM)TLB(env, addr, ra); \ } -#else -#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \ + +#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ + target_ulong addr, uintptr_t ra) \ { \ - TYPEM val = HOST(g2h(addr)); \ - *(TYPEE *)(vd + H(reg_off)) = val; \ + TLB(env, addr, (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \ } -#endif #define DO_LD_PRIM_1(NAME, H, TE, TM) \ DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ - DO_LD_TLB(NAME, H, TE, TM, ldub_p, 0, helper_ret_ldub_mmu) + DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra) DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) @@ -4046,39 +3988,54 @@ DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t) DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) -#define DO_LD_PRIM_2(NAME, end, MOEND, H, TE, TM, PH, PT) \ - DO_LD_HOST(NAME##_##end, H, TE, TM, PH##_##end##_p) \ - DO_LD_TLB(NAME##_##end, H, TE, TM, PH##_##end##_p, \ - MOEND, helper_##end##_##PT##_mmu) +#define DO_ST_PRIM_1(NAME, H, TE, TM) \ + DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \ + DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra) + +DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t) +DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t) +DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t) +DO_ST_PRIM_1(bd, , uint64_t, uint8_t) -DO_LD_PRIM_2(ld1hh, le, MO_LE, H1_2, uint16_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hsu, le, MO_LE, H1_4, uint32_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hss, le, MO_LE, H1_4, uint32_t, int16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hdu, le, MO_LE, , uint64_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hds, le, MO_LE, , uint64_t, int16_t, lduw, lduw) +#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \ + DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \ + DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \ + DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \ + DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra) -DO_LD_PRIM_2(ld1ss, le, MO_LE, H1_4, uint32_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sdu, le, MO_LE, , uint64_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sds, le, MO_LE, , uint64_t, int32_t, ldl, ldul) +#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \ + DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \ + DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \ + DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \ + DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra) -DO_LD_PRIM_2(ld1dd, le, MO_LE, , uint64_t, uint64_t, ldq, ldq) +DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw) +DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw) +DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw) +DO_LD_PRIM_2(hdu, , uint64_t, uint16_t, lduw) +DO_LD_PRIM_2(hds, , uint64_t, int16_t, lduw) -DO_LD_PRIM_2(ld1hh, be, MO_BE, H1_2, uint16_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hsu, be, MO_BE, H1_4, uint32_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hss, be, MO_BE, H1_4, uint32_t, int16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hdu, be, MO_BE, , uint64_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hds, be, MO_BE, , uint64_t, int16_t, lduw, lduw) +DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw) +DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw) +DO_ST_PRIM_2(hd, , uint64_t, uint16_t, stw) -DO_LD_PRIM_2(ld1ss, be, MO_BE, H1_4, uint32_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sdu, be, MO_BE, , uint64_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sds, be, MO_BE, , uint64_t, int32_t, ldl, ldul) +DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl) +DO_LD_PRIM_2(sdu, , uint64_t, uint32_t, ldl) +DO_LD_PRIM_2(sds, , uint64_t, int32_t, ldl) -DO_LD_PRIM_2(ld1dd, be, MO_BE, , uint64_t, uint64_t, ldq, ldq) +DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl) +DO_ST_PRIM_2(sd, , uint64_t, uint32_t, stl) + +DO_LD_PRIM_2(dd, , uint64_t, uint64_t, ldq) +DO_ST_PRIM_2(dd, , uint64_t, uint64_t, stq) #undef DO_LD_TLB +#undef DO_ST_TLB #undef DO_LD_HOST #undef DO_LD_PRIM_1 +#undef DO_ST_PRIM_1 #undef DO_LD_PRIM_2 +#undef DO_ST_PRIM_2 /* * Skip through a sequence of inactive elements in the guarding predicate @vg, @@ -4115,139 +4072,471 @@ static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off, } /* - * Return the maximum offset <= @mem_max which is still within the page - * referenced by @base + @mem_off. + * Resolve the guest virtual address to info->host and info->flags. + * If @nofault, return false if the page is invalid, otherwise + * exit via page fault exception. */ -static intptr_t max_for_page(target_ulong base, intptr_t mem_off, - intptr_t mem_max) -{ - target_ulong addr = base + mem_off; - intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK); - return MIN(split, mem_max - mem_off) + mem_off; -} -#ifndef CONFIG_USER_ONLY -/* These are normally defined only for CONFIG_USER_ONLY in <exec/cpu_ldst.h> */ -static inline void set_helper_retaddr(uintptr_t ra) { } -static inline void clear_helper_retaddr(void) { } -#endif +typedef struct { + void *host; + int flags; + MemTxAttrs attrs; +} SVEHostPage; -/* - * The result of tlb_vaddr_to_host for user-only is just g2h(x), - * which is always non-null. Elide the useless test. - */ -static inline bool test_host_page(void *host) +static bool sve_probe_page(SVEHostPage *info, bool nofault, + CPUARMState *env, target_ulong addr, + int mem_off, MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) { + int flags; + + addr += mem_off; + flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault, + &info->host, retaddr); + info->flags = flags; + + if (flags & TLB_INVALID_MASK) { + g_assert(nofault); + return false; + } + + /* Ensure that info->host[] is relative to addr, not addr + mem_off. */ + info->host -= mem_off; + #ifdef CONFIG_USER_ONLY - return true; + memset(&info->attrs, 0, sizeof(info->attrs)); #else - return likely(host != NULL); + /* + * Find the iotlbentry for addr and return the transaction attributes. + * This *must* be present in the TLB because we just found the mapping. + */ + { + uintptr_t index = tlb_index(env, mmu_idx, addr); + +# ifdef CONFIG_DEBUG_TCG + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong comparator = (access_type == MMU_DATA_LOAD + ? entry->addr_read + : tlb_addr_write(entry)); + g_assert(tlb_hit(comparator, addr)); +# endif + + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + info->attrs = iotlbentry->attrs; + } #endif + + return true; } + /* - * Common helper for all contiguous one-register predicated loads. + * Analyse contiguous data, protected by a governing predicate. */ -static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const uintptr_t retaddr, - const int esz, const int msz, - sve_ld1_host_fn *host_fn, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - void *vd = &env->vfp.zregs[rd]; - const int diffsz = esz - msz; - const intptr_t reg_max = simd_oprsz(desc); - const intptr_t mem_max = reg_max >> diffsz; - ARMVectorReg scratch; - void *host; - intptr_t split, reg_off, mem_off; - /* Find the first active element. */ - reg_off = find_next_active(vg, 0, reg_max, esz); - if (unlikely(reg_off == reg_max)) { - /* The entire predicate was false; no load occurs. */ - memset(vd, 0, reg_max); - return; - } - mem_off = reg_off >> diffsz; - set_helper_retaddr(retaddr); +typedef enum { + FAULT_NO, + FAULT_FIRST, + FAULT_ALL, +} SVEContFault; +typedef struct { /* - * If the (remaining) load is entirely within a single page, then: - * For softmmu, and the tlb hits, then no faults will occur; - * For user-only, either the first load will fault or none will. - * We can thus perform the load directly to the destination and - * Vd will be unmodified on any exception path. + * First and last element wholly contained within the two pages. + * mem_off_first[0] and reg_off_first[0] are always set >= 0. + * reg_off_last[0] may be < 0 if the first element crosses pages. + * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1] + * are set >= 0 only if there are complete elements on a second page. + * + * The reg_off_* offsets are relative to the internal vector register. + * The mem_off_first offset is relative to the memory address; the + * two offsets are different when a load operation extends, a store + * operation truncates, or for multi-register operations. */ - split = max_for_page(addr, mem_off, mem_max); - if (likely(split == mem_max)) { - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - if (test_host_page(host)) { - mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max); - tcg_debug_assert(mem_off == mem_max); - clear_helper_retaddr(); - /* After having taken any fault, zero leading inactive elements. */ - swap_memzero(vd, reg_off); - return; + int16_t mem_off_first[2]; + int16_t reg_off_first[2]; + int16_t reg_off_last[2]; + + /* + * One element that is misaligned and spans both pages, + * or -1 if there is no such active element. + */ + int16_t mem_off_split; + int16_t reg_off_split; + + /* + * The byte offset at which the entire operation crosses a page boundary. + * Set >= 0 if and only if the entire operation spans two pages. + */ + int16_t page_split; + + /* TLB data for the two pages. */ + SVEHostPage page[2]; +} SVEContLdSt; + +/* + * Find first active element on each page, and a loose bound for the + * final element on each page. Identify any single element that spans + * the page boundary. Return true if there are any active elements. + */ +static bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, + uint64_t *vg, intptr_t reg_max, + int esz, int msize) +{ + const int esize = 1 << esz; + const uint64_t pg_mask = pred_esz_masks[esz]; + intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split; + intptr_t mem_off_last, mem_off_split; + intptr_t page_split, elt_split; + intptr_t i; + + /* Set all of the element indices to -1, and the TLB data to 0. */ + memset(info, -1, offsetof(SVEContLdSt, page)); + memset(info->page, 0, sizeof(info->page)); + + /* Gross scan over the entire predicate to find bounds. */ + i = 0; + do { + uint64_t pg = vg[i] & pg_mask; + if (pg) { + reg_off_last = i * 64 + 63 - clz64(pg); + if (reg_off_first < 0) { + reg_off_first = i * 64 + ctz64(pg); + } } + } while (++i * 64 < reg_max); + + if (unlikely(reg_off_first < 0)) { + /* No active elements, no pages touched. */ + return false; } + tcg_debug_assert(reg_off_last >= 0 && reg_off_last < reg_max); + + info->reg_off_first[0] = reg_off_first; + info->mem_off_first[0] = (reg_off_first >> esz) * msize; + mem_off_last = (reg_off_last >> esz) * msize; + + page_split = -(addr | TARGET_PAGE_MASK); + if (likely(mem_off_last + msize <= page_split)) { + /* The entire operation fits within a single page. */ + info->reg_off_last[0] = reg_off_last; + return true; + } + + info->page_split = page_split; + elt_split = page_split / msize; + reg_off_split = elt_split << esz; + mem_off_split = elt_split * msize; /* - * Perform the predicated read into a temporary, thus ensuring - * if the load of the last element faults, Vd is not modified. + * This is the last full element on the first page, but it is not + * necessarily active. If there is no full element, i.e. the first + * active element is the one that's split, this value remains -1. + * It is useful as iteration bounds. */ -#ifdef CONFIG_USER_ONLY - swap_memzero(&scratch, reg_off); - host_fn(&scratch, vg, g2h(addr), mem_off, mem_max); -#else - memset(&scratch, 0, reg_max); - goto start; - while (1) { - reg_off = find_next_active(vg, reg_off, reg_max, esz); - if (reg_off >= reg_max) { - break; - } - mem_off = reg_off >> diffsz; - split = max_for_page(addr, mem_off, mem_max); - - start: - if (split - mem_off >= (1 << msz)) { - /* At least one whole element on this page. */ - host = tlb_vaddr_to_host(env, addr + mem_off, - MMU_DATA_LOAD, mmu_idx); - if (host) { - mem_off = host_fn(&scratch, vg, host - mem_off, - mem_off, split); - reg_off = mem_off << diffsz; - continue; + if (elt_split != 0) { + info->reg_off_last[0] = reg_off_split - esize; + } + + /* Determine if an unaligned element spans the pages. */ + if (page_split % msize != 0) { + /* It is helpful to know if the split element is active. */ + if ((vg[reg_off_split >> 6] >> (reg_off_split & 63)) & 1) { + info->reg_off_split = reg_off_split; + info->mem_off_split = mem_off_split; + + if (reg_off_split == reg_off_last) { + /* The page crossing element is last. */ + return true; } } + reg_off_split += esize; + mem_off_split += msize; + } + + /* + * We do want the first active element on the second page, because + * this may affect the address reported in an exception. + */ + reg_off_split = find_next_active(vg, reg_off_split, reg_max, esz); + tcg_debug_assert(reg_off_split <= reg_off_last); + info->reg_off_first[1] = reg_off_split; + info->mem_off_first[1] = (reg_off_split >> esz) * msize; + info->reg_off_last[1] = reg_off_last; + return true; +} + +/* + * Resolve the guest virtual addresses to info->page[]. + * Control the generation of page faults with @fault. Return false if + * there is no work to do, which can only happen with @fault == FAULT_NO. + */ +static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, + CPUARMState *env, target_ulong addr, + MMUAccessType access_type, uintptr_t retaddr) +{ + int mmu_idx = cpu_mmu_index(env, false); + int mem_off = info->mem_off_first[0]; + bool nofault = fault == FAULT_NO; + bool have_work = true; + + if (!sve_probe_page(&info->page[0], nofault, env, addr, mem_off, + access_type, mmu_idx, retaddr)) { + /* No work to be done. */ + return false; + } + if (likely(info->page_split < 0)) { + /* The entire operation was on the one page. */ + return true; + } + + /* + * If the second page is invalid, then we want the fault address to be + * the first byte on that page which is accessed. + */ + if (info->mem_off_split >= 0) { + /* + * There is an element split across the pages. The fault address + * should be the first byte of the second page. + */ + mem_off = info->page_split; + /* + * If the split element is also the first active element + * of the vector, then: For first-fault we should continue + * to generate faults for the second page. For no-fault, + * we have work only if the second page is valid. + */ + if (info->mem_off_first[0] < info->mem_off_split) { + nofault = FAULT_FIRST; + have_work = false; + } + } else { + /* + * There is no element split across the pages. The fault address + * should be the first active element on the second page. + */ + mem_off = info->mem_off_first[1]; /* - * Perform one normal read. This may fault, longjmping out to the - * main loop in order to raise an exception. It may succeed, and - * as a side-effect load the TLB entry for the next round. Finally, - * in the extremely unlikely case we're performing this operation - * on I/O memory, it may succeed but not bring in the TLB entry. - * But even then we have still made forward progress. + * There must have been one active element on the first page, + * so we're out of first-fault territory. */ - tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr); - reg_off += 1 << esz; + nofault = fault != FAULT_ALL; + } + + have_work |= sve_probe_page(&info->page[1], nofault, env, addr, mem_off, + access_type, mmu_idx, retaddr); + return have_work; +} + +static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, int wp_access, + uintptr_t retaddr) +{ +#ifndef CONFIG_USER_ONLY + intptr_t mem_off, reg_off, reg_last; + int flags0 = info->page[0].flags; + int flags1 = info->page[1].flags; + + if (likely(!((flags0 | flags1) & TLB_WATCHPOINT))) { + return; + } + + /* Indicate that watchpoints are handled. */ + info->page[0].flags = flags0 & ~TLB_WATCHPOINT; + info->page[1].flags = flags1 & ~TLB_WATCHPOINT; + + if (flags0 & TLB_WATCHPOINT) { + mem_off = info->mem_off_first[0]; + reg_off = info->reg_off_first[0]; + reg_last = info->reg_off_last[0]; + + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + cpu_check_watchpoint(env_cpu(env), addr + mem_off, + msize, info->page[0].attrs, + wp_access, retaddr); + } + reg_off += esize; + mem_off += msize; + } while (reg_off <= reg_last && (reg_off & 63)); + } + } + + mem_off = info->mem_off_split; + if (mem_off >= 0) { + cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize, + info->page[0].attrs, wp_access, retaddr); + } + + mem_off = info->mem_off_first[1]; + if ((flags1 & TLB_WATCHPOINT) && mem_off >= 0) { + reg_off = info->reg_off_first[1]; + reg_last = info->reg_off_last[1]; + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + cpu_check_watchpoint(env_cpu(env), addr + mem_off, + msize, info->page[1].attrs, + wp_access, retaddr); + } + reg_off += esize; + mem_off += msize; + } while (reg_off & 63); + } while (reg_off <= reg_last); } #endif +} - clear_helper_retaddr(); - memcpy(vd, &scratch, reg_max); +/* + * Common helper for all contiguous 1,2,3,4-register predicated stores. + */ +static inline QEMU_ALWAYS_INLINE +void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, const int N, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + const unsigned rd = simd_data(desc); + const intptr_t reg_max = simd_oprsz(desc); + intptr_t reg_off, reg_last, mem_off; + SVEContLdSt info; + void *host; + int flags, i; + + /* Find the active elements. */ + if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, N << msz)) { + /* The entire predicate was false; no load occurs. */ + for (i = 0; i < N; ++i) { + memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max); + } + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, retaddr); + + /* Handle watchpoints for all active elements. */ + sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz, + BP_MEM_READ, retaddr); + + /* TODO: MTE check. */ + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. Perform the load + * into scratch memory to preserve register state until the end. + */ + ARMVectorReg scratch[4] = { }; + + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[1]; + if (reg_last < 0) { + reg_last = info.reg_off_split; + if (reg_last < 0) { + reg_last = info.reg_off_last[0]; + } + } + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + tlb_fn(env, &scratch[i], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + + for (i = 0; i < N; ++i) { + memcpy(&env->vfp.zregs[(rd + i) & 31], &scratch[i], reg_max); + } + return; +#endif + } + + /* The entire operation is in RAM, on valid pages. */ + + for (i = 0; i < N; ++i) { + memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max); + } + + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[0]; + host = info.page[0].host; + + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + host + mem_off + (i << msz)); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off <= reg_last && (reg_off & 63)); + } + + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + mem_off = info.mem_off_split; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_split; + for (i = 0; i < N; ++i) { + tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } + + mem_off = info.mem_off_first[1]; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_first[1]; + reg_last = info.reg_off_last[1]; + host = info.page[1].host; + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + host + mem_off + (i << msz)); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } } #define DO_LD1_1(NAME, ESZ) \ void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \ sve_##NAME##_host, sve_##NAME##_tlb); \ } @@ -4255,168 +4544,76 @@ void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ } \ void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ } -DO_LD1_1(ld1bb, 0) -DO_LD1_1(ld1bhu, 1) -DO_LD1_1(ld1bhs, 1) -DO_LD1_1(ld1bsu, 2) -DO_LD1_1(ld1bss, 2) -DO_LD1_1(ld1bdu, 3) -DO_LD1_1(ld1bds, 3) +DO_LD1_1(ld1bb, MO_8) +DO_LD1_1(ld1bhu, MO_16) +DO_LD1_1(ld1bhs, MO_16) +DO_LD1_1(ld1bsu, MO_32) +DO_LD1_1(ld1bss, MO_32) +DO_LD1_1(ld1bdu, MO_64) +DO_LD1_1(ld1bds, MO_64) -DO_LD1_2(ld1hh, 1, 1) -DO_LD1_2(ld1hsu, 2, 1) -DO_LD1_2(ld1hss, 2, 1) -DO_LD1_2(ld1hdu, 3, 1) -DO_LD1_2(ld1hds, 3, 1) +DO_LD1_2(ld1hh, MO_16, MO_16) +DO_LD1_2(ld1hsu, MO_32, MO_16) +DO_LD1_2(ld1hss, MO_32, MO_16) +DO_LD1_2(ld1hdu, MO_64, MO_16) +DO_LD1_2(ld1hds, MO_64, MO_16) -DO_LD1_2(ld1ss, 2, 2) -DO_LD1_2(ld1sdu, 3, 2) -DO_LD1_2(ld1sds, 3, 2) +DO_LD1_2(ld1ss, MO_32, MO_32) +DO_LD1_2(ld1sdu, MO_64, MO_32) +DO_LD1_2(ld1sds, MO_64, MO_32) -DO_LD1_2(ld1dd, 3, 3) +DO_LD1_2(ld1dd, MO_64, MO_64) #undef DO_LD1_1 #undef DO_LD1_2 -/* - * Common helpers for all contiguous 2,3,4-register predicated loads. - */ -static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, int size, uintptr_t ra, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch[2] = { }; - - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); - do { - if (pg & 1) { - tlb_fn(env, &scratch[0], i, addr, oi, ra); - tlb_fn(env, &scratch[1], i, addr + size, oi, ra); - } - i += size, pg >>= size; - addr += 2 * size; - } while (i & 15); - } - clear_helper_retaddr(); - - /* Wait until all exceptions have been raised to write back. */ - memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); - memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); -} - -static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, int size, uintptr_t ra, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch[3] = { }; - - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); - do { - if (pg & 1) { - tlb_fn(env, &scratch[0], i, addr, oi, ra); - tlb_fn(env, &scratch[1], i, addr + size, oi, ra); - tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); - } - i += size, pg >>= size; - addr += 3 * size; - } while (i & 15); - } - clear_helper_retaddr(); - - /* Wait until all exceptions have been raised to write back. */ - memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); - memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); - memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); -} - -static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, int size, uintptr_t ra, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch[4] = { }; - - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); - do { - if (pg & 1) { - tlb_fn(env, &scratch[0], i, addr, oi, ra); - tlb_fn(env, &scratch[1], i, addr + size, oi, ra); - tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); - tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra); - } - i += size, pg >>= size; - addr += 4 * size; - } while (i & 15); - } - clear_helper_retaddr(); - - /* Wait until all exceptions have been raised to write back. */ - memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); - memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); - memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); - memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz); -} - #define DO_LDN_1(N) \ -void QEMU_FLATTEN HELPER(sve_ld##N##bb_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb); \ +void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \ + sve_ld1bb_host, sve_ld1bb_tlb); \ } -#define DO_LDN_2(N, SUFF, SIZE) \ -void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_le_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +#define DO_LDN_2(N, SUFF, ESZ) \ +void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ { \ - sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ - sve_ld1##SUFF##_le_tlb); \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ } \ -void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_be_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ { \ - sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ - sve_ld1##SUFF##_be_tlb); \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ } DO_LDN_1(2) DO_LDN_1(3) DO_LDN_1(4) -DO_LDN_2(2, hh, 2) -DO_LDN_2(3, hh, 2) -DO_LDN_2(4, hh, 2) +DO_LDN_2(2, hh, MO_16) +DO_LDN_2(3, hh, MO_16) +DO_LDN_2(4, hh, MO_16) -DO_LDN_2(2, ss, 4) -DO_LDN_2(3, ss, 4) -DO_LDN_2(4, ss, 4) +DO_LDN_2(2, ss, MO_32) +DO_LDN_2(3, ss, MO_32) +DO_LDN_2(4, ss, MO_32) -DO_LDN_2(2, dd, 8) -DO_LDN_2(3, dd, 8) -DO_LDN_2(4, dd, 8) +DO_LDN_2(2, dd, MO_64) +DO_LDN_2(3, dd, MO_64) +DO_LDN_2(4, dd, MO_64) #undef DO_LDN_1 #undef DO_LDN_2 @@ -4453,161 +4650,167 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz) } /* - * Common helper for all contiguous first-fault loads. + * Common helper for all contiguous no-fault and first-fault loads. */ -static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const uintptr_t retaddr, - const int esz, const int msz, - sve_ld1_host_fn *host_fn, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); +static inline QEMU_ALWAYS_INLINE +void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, const SVEContFault fault, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + const unsigned rd = simd_data(desc); void *vd = &env->vfp.zregs[rd]; - const int diffsz = esz - msz; const intptr_t reg_max = simd_oprsz(desc); - const intptr_t mem_max = reg_max >> diffsz; - intptr_t split, reg_off, mem_off; + intptr_t reg_off, mem_off, reg_last; + SVEContLdSt info; + int flags; void *host; - /* Skip to the first active element. */ - reg_off = find_next_active(vg, 0, reg_max, esz); - if (unlikely(reg_off == reg_max)) { + /* Find the active elements. */ + if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, 1 << msz)) { /* The entire predicate was false; no load occurs. */ memset(vd, 0, reg_max); return; } - mem_off = reg_off >> diffsz; - set_helper_retaddr(retaddr); + reg_off = info.reg_off_first[0]; - /* - * If the (remaining) load is entirely within a single page, then: - * For softmmu, and the tlb hits, then no faults will occur; - * For user-only, either the first load will fault or none will. - * We can thus perform the load directly to the destination and - * Vd will be unmodified on any exception path. - */ - split = max_for_page(addr, mem_off, mem_max); - if (likely(split == mem_max)) { - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - if (test_host_page(host)) { - mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max); - tcg_debug_assert(mem_off == mem_max); - clear_helper_retaddr(); - /* After any fault, zero any leading inactive elements. */ + /* Probe the page(s). */ + if (!sve_cont_ldst_pages(&info, fault, env, addr, MMU_DATA_LOAD, retaddr)) { + /* Fault on first element. */ + tcg_debug_assert(fault == FAULT_NO); + memset(vd, 0, reg_max); + goto do_fault; + } + + mem_off = info.mem_off_first[0]; + flags = info.page[0].flags; + + if (fault == FAULT_FIRST) { + /* + * Special handling of the first active element, + * if it crosses a page boundary or is MMIO. + */ + bool is_split = mem_off == info.mem_off_split; + /* TODO: MTE check. */ + if (unlikely(flags != 0) || unlikely(is_split)) { + /* + * Use the slow path for cross-page handling. + * Might trap for MMIO or watchpoints. + */ + tlb_fn(env, vd, reg_off, addr + mem_off, retaddr); + + /* After any fault, zero the other elements. */ swap_memzero(vd, reg_off); - return; + reg_off += 1 << esz; + mem_off += 1 << msz; + swap_memzero(vd + reg_off, reg_max - reg_off); + + if (is_split) { + goto second_page; + } + } else { + memset(vd, 0, reg_max); + } + } else { + memset(vd, 0, reg_max); + if (unlikely(mem_off == info.mem_off_split)) { + /* The first active element crosses a page boundary. */ + flags |= info.page[1].flags; + if (unlikely(flags & TLB_MMIO)) { + /* Some page is MMIO, see below. */ + goto do_fault; + } + if (unlikely(flags & TLB_WATCHPOINT) && + (cpu_watchpoint_address_matches + (env_cpu(env), addr + mem_off, 1 << msz) + & BP_MEM_READ)) { + /* Watchpoint hit, see below. */ + goto do_fault; + } + /* TODO: MTE check. */ + /* + * Use the slow path for cross-page handling. + * This is RAM, without a watchpoint, and will not trap. + */ + tlb_fn(env, vd, reg_off, addr + mem_off, retaddr); + goto second_page; } } -#ifdef CONFIG_USER_ONLY - /* - * The page(s) containing this first element at ADDR+MEM_OFF must - * be valid. Considering that this first element may be misaligned - * and cross a page boundary itself, take the rest of the page from - * the last byte of the element. - */ - split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max); - mem_off = host_fn(vd, vg, g2h(addr), mem_off, split); - - /* After any fault, zero any leading inactive elements. */ - swap_memzero(vd, reg_off); - reg_off = mem_off << diffsz; -#else /* - * Perform one normal read, which will fault or not. - * But it is likely to bring the page into the tlb. + * From this point on, all memory operations are MemSingleNF. + * + * Per the MemSingleNF pseudocode, a no-fault load from Device memory + * must not actually hit the bus -- it returns (UNKNOWN, FAULT) instead. + * + * Unfortuately we do not have access to the memory attributes from the + * PTE to tell Device memory from Normal memory. So we make a mostly + * correct check, and indicate (UNKNOWN, FAULT) for any MMIO. + * This gives the right answer for the common cases of "Normal memory, + * backed by host RAM" and "Device memory, backed by MMIO". + * The architecture allows us to suppress an NF load and return + * (UNKNOWN, FAULT) for any reason, so our behaviour for the corner + * case of "Normal memory, backed by MMIO" is permitted. The case we + * get wrong is "Device memory, backed by host RAM", for which we + * should return (UNKNOWN, FAULT) for but do not. + * + * Similarly, CPU_BP breakpoints would raise exceptions, and so + * return (UNKNOWN, FAULT). For simplicity, we consider gdb and + * architectural breakpoints the same. */ - tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr); - - /* After any fault, zero any leading predicated false elts. */ - swap_memzero(vd, reg_off); - mem_off += 1 << msz; - reg_off += 1 << esz; - - /* Try again to read the balance of the page. */ - split = max_for_page(addr, mem_off - 1, mem_max); - if (split >= (1 << msz)) { - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - if (host) { - mem_off = host_fn(vd, vg, host - mem_off, mem_off, split); - reg_off = mem_off << diffsz; - } + if (unlikely(flags & TLB_MMIO)) { + goto do_fault; } -#endif - clear_helper_retaddr(); - record_fault(env, reg_off, reg_max); -} + reg_last = info.reg_off_last[0]; + host = info.page[0].host; -/* - * Common helper for all contiguous no-fault loads. - */ -static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const int esz, const int msz, - sve_ld1_host_fn *host_fn) -{ - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - void *vd = &env->vfp.zregs[rd]; - const int diffsz = esz - msz; - const intptr_t reg_max = simd_oprsz(desc); - const intptr_t mem_max = reg_max >> diffsz; - const int mmu_idx = cpu_mmu_index(env, false); - intptr_t split, reg_off, mem_off; - void *host; + do { + uint64_t pg = *(uint64_t *)(vg + (reg_off >> 3)); + do { + if ((pg >> (reg_off & 63)) & 1) { + if (unlikely(flags & TLB_WATCHPOINT) && + (cpu_watchpoint_address_matches + (env_cpu(env), addr + mem_off, 1 << msz) + & BP_MEM_READ)) { + goto do_fault; + } + /* TODO: MTE check. */ + host_fn(vd, reg_off, host + mem_off); + } + reg_off += 1 << esz; + mem_off += 1 << msz; + } while (reg_off <= reg_last && (reg_off & 63)); + } while (reg_off <= reg_last); -#ifdef CONFIG_USER_ONLY - host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); - if (likely(page_check_range(addr, mem_max, PAGE_READ) == 0)) { - /* The entire operation is valid and will not fault. */ - host_fn(vd, vg, host, 0, mem_max); - return; + /* + * MemSingleNF is allowed to fail for any reason. We have special + * code above to handle the first element crossing a page boundary. + * As an implementation choice, decline to handle a cross-page element + * in any other position. + */ + reg_off = info.reg_off_split; + if (reg_off >= 0) { + goto do_fault; } -#endif - - /* There will be no fault, so we may modify in advance. */ - memset(vd, 0, reg_max); - /* Skip to the first active element. */ - reg_off = find_next_active(vg, 0, reg_max, esz); - if (unlikely(reg_off == reg_max)) { - /* The entire predicate was false; no load occurs. */ + second_page: + reg_off = info.reg_off_first[1]; + if (likely(reg_off < 0)) { + /* No active elements on the second page. All done. */ return; } - mem_off = reg_off >> diffsz; -#ifdef CONFIG_USER_ONLY - if (page_check_range(addr + mem_off, 1 << msz, PAGE_READ) == 0) { - /* At least one load is valid; take the rest of the page. */ - split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max); - mem_off = host_fn(vd, vg, host, mem_off, split); - reg_off = mem_off << diffsz; - } -#else /* - * If the address is not in the TLB, we have no way to bring the - * entry into the TLB without also risking a fault. Note that - * the corollary is that we never load from an address not in RAM. - * - * This last is out of spec, in a weird corner case. - * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory - * must not actually hit the bus -- it returns UNKNOWN data instead. - * But if you map non-RAM with Normal memory attributes and do a NF - * load then it should access the bus. (Nobody ought actually do this - * in the real world, obviously.) - * - * Then there are the annoying special cases with watchpoints... - * TODO: Add a form of non-faulting loads using cc->tlb_fill(probe=true). + * MemSingleNF is allowed to fail for any reason. As an implementation + * choice, decline to handle elements on the second page. This should + * be low frequency as the guest walks through memory -- the next + * iteration of the guest's loop should be aligned on the page boundary, + * and then all following iterations will stay aligned. */ - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - split = max_for_page(addr, mem_off, mem_max); - if (host && split >= (1 << msz)) { - mem_off = host_fn(vd, vg, host - mem_off, mem_off, split); - reg_off = mem_off << diffsz; - } -#endif + do_fault: record_fault(env, reg_off, reg_max); } @@ -4615,265 +4818,237 @@ static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr, void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ - sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } \ void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } #define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ - sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ - sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } \ void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } -DO_LDFF1_LDNF1_1(bb, 0) -DO_LDFF1_LDNF1_1(bhu, 1) -DO_LDFF1_LDNF1_1(bhs, 1) -DO_LDFF1_LDNF1_1(bsu, 2) -DO_LDFF1_LDNF1_1(bss, 2) -DO_LDFF1_LDNF1_1(bdu, 3) -DO_LDFF1_LDNF1_1(bds, 3) +DO_LDFF1_LDNF1_1(bb, MO_8) +DO_LDFF1_LDNF1_1(bhu, MO_16) +DO_LDFF1_LDNF1_1(bhs, MO_16) +DO_LDFF1_LDNF1_1(bsu, MO_32) +DO_LDFF1_LDNF1_1(bss, MO_32) +DO_LDFF1_LDNF1_1(bdu, MO_64) +DO_LDFF1_LDNF1_1(bds, MO_64) -DO_LDFF1_LDNF1_2(hh, 1, 1) -DO_LDFF1_LDNF1_2(hsu, 2, 1) -DO_LDFF1_LDNF1_2(hss, 2, 1) -DO_LDFF1_LDNF1_2(hdu, 3, 1) -DO_LDFF1_LDNF1_2(hds, 3, 1) +DO_LDFF1_LDNF1_2(hh, MO_16, MO_16) +DO_LDFF1_LDNF1_2(hsu, MO_32, MO_16) +DO_LDFF1_LDNF1_2(hss, MO_32, MO_16) +DO_LDFF1_LDNF1_2(hdu, MO_64, MO_16) +DO_LDFF1_LDNF1_2(hds, MO_64, MO_16) -DO_LDFF1_LDNF1_2(ss, 2, 2) -DO_LDFF1_LDNF1_2(sdu, 3, 2) -DO_LDFF1_LDNF1_2(sds, 3, 2) +DO_LDFF1_LDNF1_2(ss, MO_32, MO_32) +DO_LDFF1_LDNF1_2(sdu, MO_64, MO_32) +DO_LDFF1_LDNF1_2(sds, MO_64, MO_32) -DO_LDFF1_LDNF1_2(dd, 3, 3) +DO_LDFF1_LDNF1_2(dd, MO_64, MO_64) #undef DO_LDFF1_LDNF1_1 #undef DO_LDFF1_LDNF1_2 /* - * Store contiguous data, protected by a governing predicate. + * Common helper for all contiguous 1,2,3,4-register predicated stores. */ -#ifdef CONFIG_SOFTMMU -#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \ -static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ -{ \ - TLB(env, addr, *(TYPEM *)(vd + H(reg_off)), oi, ra); \ -} -#else -#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \ -static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ -{ \ - HOST(g2h(addr), *(TYPEM *)(vd + H(reg_off))); \ -} -#endif - -DO_ST_TLB(st1bb, H1, uint8_t, stb_p, 0, helper_ret_stb_mmu) -DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu) -DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu) -DO_ST_TLB(st1bd, , uint64_t, stb_p, 0, helper_ret_stb_mmu) - -DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu) -DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu) -DO_ST_TLB(st1hd_le, , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu) - -DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu) -DO_ST_TLB(st1sd_le, , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu) - -DO_ST_TLB(st1dd_le, , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu) +static inline QEMU_ALWAYS_INLINE +void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, + const uintptr_t retaddr, const int esz, + const int msz, const int N, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + const unsigned rd = simd_data(desc); + const intptr_t reg_max = simd_oprsz(desc); + intptr_t reg_off, reg_last, mem_off; + SVEContLdSt info; + void *host; + int i, flags; -DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu) -DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu) -DO_ST_TLB(st1hd_be, , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu) + /* Find the active elements. */ + if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, N << msz)) { + /* The entire predicate was false; no store occurs. */ + return; + } -DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu) -DO_ST_TLB(st1sd_be, , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu) + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, retaddr); -DO_ST_TLB(st1dd_be, , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu) + /* Handle watchpoints for all active elements. */ + sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz, + BP_MEM_WRITE, retaddr); -#undef DO_ST_TLB + /* TODO: MTE check. */ -/* - * Common helpers for all contiguous 1,2,3,4-register predicated stores. - */ -static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *vd = &env->vfp.zregs[rd]; + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. We cannot avoid + * this fault and will leave with the store incomplete. + */ + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[1]; + if (reg_last < 0) { + reg_last = info.reg_off_split; + if (reg_last < 0) { + reg_last = info.reg_off_last[0]; + } + } - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); do { - if (pg & 1) { - tlb_fn(env, vd, i, addr, oi, ra); - } - i += esize, pg >>= esize; - addr += msize; - } while (i & 15); + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + return; +#endif } - clear_helper_retaddr(); -} -static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *d1 = &env->vfp.zregs[rd]; - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[0]; + host = info.page[0].host; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; do { - if (pg & 1) { - tlb_fn(env, d1, i, addr, oi, ra); - tlb_fn(env, d2, i, addr + msize, oi, ra); + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + host + mem_off + (i << msz)); + } } - i += esize, pg >>= esize; - addr += 2 * msize; - } while (i & 15); + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off <= reg_last && (reg_off & 63)); } - clear_helper_retaddr(); -} -static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *d1 = &env->vfp.zregs[rd]; - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; - - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); - do { - if (pg & 1) { - tlb_fn(env, d1, i, addr, oi, ra); - tlb_fn(env, d2, i, addr + msize, oi, ra); - tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); - } - i += esize, pg >>= esize; - addr += 3 * msize; - } while (i & 15); + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + mem_off = info.mem_off_split; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_split; + for (i = 0; i < N; ++i) { + tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, + addr + mem_off + (i << msz), retaddr); + } } - clear_helper_retaddr(); -} -static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *d1 = &env->vfp.zregs[rd]; - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; - void *d4 = &env->vfp.zregs[(rd + 3) & 31]; + mem_off = info.mem_off_first[1]; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_first[1]; + reg_last = info.reg_off_last[1]; + host = info.page[1].host; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); do { - if (pg & 1) { - tlb_fn(env, d1, i, addr, oi, ra); - tlb_fn(env, d2, i, addr + msize, oi, ra); - tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); - tlb_fn(env, d4, i, addr + 3 * msize, oi, ra); - } - i += esize, pg >>= esize; - addr += 4 * msize; - } while (i & 15); - } - clear_helper_retaddr(); -} - -#define DO_STN_1(N, NAME, ESIZE) \ -void QEMU_FLATTEN HELPER(sve_st##N##NAME##_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + host + mem_off + (i << msz)); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } +} + +#define DO_STN_1(N, NAME, ESZ) \ +void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ { \ - sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1, \ - sve_st1##NAME##_tlb); \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ } -#define DO_STN_2(N, NAME, ESIZE, MSIZE) \ -void QEMU_FLATTEN HELPER(sve_st##N##NAME##_le_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +#define DO_STN_2(N, NAME, ESZ, MSZ) \ +void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ { \ - sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ - sve_st1##NAME##_le_tlb); \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ } \ -void QEMU_FLATTEN HELPER(sve_st##N##NAME##_be_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ { \ - sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ - sve_st1##NAME##_be_tlb); \ -} - -DO_STN_1(1, bb, 1) -DO_STN_1(1, bh, 2) -DO_STN_1(1, bs, 4) -DO_STN_1(1, bd, 8) -DO_STN_1(2, bb, 1) -DO_STN_1(3, bb, 1) -DO_STN_1(4, bb, 1) - -DO_STN_2(1, hh, 2, 2) -DO_STN_2(1, hs, 4, 2) -DO_STN_2(1, hd, 8, 2) -DO_STN_2(2, hh, 2, 2) -DO_STN_2(3, hh, 2, 2) -DO_STN_2(4, hh, 2, 2) - -DO_STN_2(1, ss, 4, 4) -DO_STN_2(1, sd, 8, 4) -DO_STN_2(2, ss, 4, 4) -DO_STN_2(3, ss, 4, 4) -DO_STN_2(4, ss, 4, 4) - -DO_STN_2(1, dd, 8, 8) -DO_STN_2(2, dd, 8, 8) -DO_STN_2(3, dd, 8, 8) -DO_STN_2(4, dd, 8, 8) + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ +} + +DO_STN_1(1, bb, MO_8) +DO_STN_1(1, bh, MO_16) +DO_STN_1(1, bs, MO_32) +DO_STN_1(1, bd, MO_64) +DO_STN_1(2, bb, MO_8) +DO_STN_1(3, bb, MO_8) +DO_STN_1(4, bb, MO_8) + +DO_STN_2(1, hh, MO_16, MO_16) +DO_STN_2(1, hs, MO_32, MO_16) +DO_STN_2(1, hd, MO_64, MO_16) +DO_STN_2(2, hh, MO_16, MO_16) +DO_STN_2(3, hh, MO_16, MO_16) +DO_STN_2(4, hh, MO_16, MO_16) + +DO_STN_2(1, ss, MO_32, MO_32) +DO_STN_2(1, sd, MO_64, MO_32) +DO_STN_2(2, ss, MO_32, MO_32) +DO_STN_2(3, ss, MO_32, MO_32) +DO_STN_2(4, ss, MO_32, MO_32) + +DO_STN_2(1, dd, MO_64, MO_64) +DO_STN_2(2, dd, MO_64, MO_64) +DO_STN_2(3, dd, MO_64, MO_64) +DO_STN_2(4, dd, MO_64, MO_64) #undef DO_STN_1 #undef DO_STN_2 @@ -4912,468 +5087,433 @@ static target_ulong off_zd_d(void *reg, intptr_t reg_ofs) return *(uint64_t *)(reg + reg_ofs); } -static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE +void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + int esize, int msize, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch = { }; + const int mmu_idx = cpu_mmu_index(env, false); + const intptr_t reg_max = simd_oprsz(desc); + const int scale = simd_data(desc); + ARMVectorReg scratch; + intptr_t reg_off; + SVEHostPage info, info2; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + memset(&scratch, 0, reg_max); + reg_off = 0; + do { + uint64_t pg = vg[reg_off >> 6]; do { if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i); - tlb_fn(env, &scratch, i, base + (off << scale), oi, ra); + target_ulong addr = base + (off_fn(vm, reg_off) << scale); + target_ulong in_page = -(addr | TARGET_PAGE_MASK); + + sve_probe_page(&info, false, env, addr, 0, MMU_DATA_LOAD, + mmu_idx, retaddr); + + if (likely(in_page >= msize)) { + if (unlikely(info.flags & TLB_WATCHPOINT)) { + cpu_check_watchpoint(env_cpu(env), addr, msize, + info.attrs, BP_MEM_READ, retaddr); + } + /* TODO: MTE check */ + host_fn(&scratch, reg_off, info.host); + } else { + /* Element crosses the page boundary. */ + sve_probe_page(&info2, false, env, addr + in_page, 0, + MMU_DATA_LOAD, mmu_idx, retaddr); + if (unlikely((info.flags | info2.flags) & TLB_WATCHPOINT)) { + cpu_check_watchpoint(env_cpu(env), addr, + msize, info.attrs, + BP_MEM_READ, retaddr); + } + /* TODO: MTE check */ + tlb_fn(env, &scratch, reg_off, addr, retaddr); + } } - i += 4, pg >>= 4; - } while (i & 15); - } - clear_helper_retaddr(); + reg_off += esize; + pg >>= esize; + } while (reg_off & 63); + } while (reg_off < reg_max); /* Wait until all exceptions have been raised to write back. */ - memcpy(vd, &scratch, oprsz); + memcpy(vd, &scratch, reg_max); } -static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc) / 8; - ARMVectorReg scratch = { }; - - set_helper_retaddr(ra); - for (i = 0; i < oprsz; i++) { - uint8_t pg = *(uint8_t *)(vg + H1(i)); - if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i * 8); - tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra); - } - } - clear_helper_retaddr(); +#define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \ +void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} - /* Wait until all exceptions have been raised to write back. */ - memcpy(vd, &scratch, oprsz * 8); -} - -#define DO_LD1_ZPZ_S(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_s, sve_ld1##MEM##_tlb); \ -} - -#define DO_LD1_ZPZ_D(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_d, sve_ld1##MEM##_tlb); \ -} - -DO_LD1_ZPZ_S(bsu, zsu) -DO_LD1_ZPZ_S(bsu, zss) -DO_LD1_ZPZ_D(bdu, zsu) -DO_LD1_ZPZ_D(bdu, zss) -DO_LD1_ZPZ_D(bdu, zd) - -DO_LD1_ZPZ_S(bss, zsu) -DO_LD1_ZPZ_S(bss, zss) -DO_LD1_ZPZ_D(bds, zsu) -DO_LD1_ZPZ_D(bds, zss) -DO_LD1_ZPZ_D(bds, zd) - -DO_LD1_ZPZ_S(hsu_le, zsu) -DO_LD1_ZPZ_S(hsu_le, zss) -DO_LD1_ZPZ_D(hdu_le, zsu) -DO_LD1_ZPZ_D(hdu_le, zss) -DO_LD1_ZPZ_D(hdu_le, zd) - -DO_LD1_ZPZ_S(hsu_be, zsu) -DO_LD1_ZPZ_S(hsu_be, zss) -DO_LD1_ZPZ_D(hdu_be, zsu) -DO_LD1_ZPZ_D(hdu_be, zss) -DO_LD1_ZPZ_D(hdu_be, zd) - -DO_LD1_ZPZ_S(hss_le, zsu) -DO_LD1_ZPZ_S(hss_le, zss) -DO_LD1_ZPZ_D(hds_le, zsu) -DO_LD1_ZPZ_D(hds_le, zss) -DO_LD1_ZPZ_D(hds_le, zd) - -DO_LD1_ZPZ_S(hss_be, zsu) -DO_LD1_ZPZ_S(hss_be, zss) -DO_LD1_ZPZ_D(hds_be, zsu) -DO_LD1_ZPZ_D(hds_be, zss) -DO_LD1_ZPZ_D(hds_be, zd) - -DO_LD1_ZPZ_S(ss_le, zsu) -DO_LD1_ZPZ_S(ss_le, zss) -DO_LD1_ZPZ_D(sdu_le, zsu) -DO_LD1_ZPZ_D(sdu_le, zss) -DO_LD1_ZPZ_D(sdu_le, zd) - -DO_LD1_ZPZ_S(ss_be, zsu) -DO_LD1_ZPZ_S(ss_be, zss) -DO_LD1_ZPZ_D(sdu_be, zsu) -DO_LD1_ZPZ_D(sdu_be, zss) -DO_LD1_ZPZ_D(sdu_be, zd) - -DO_LD1_ZPZ_D(sds_le, zsu) -DO_LD1_ZPZ_D(sds_le, zss) -DO_LD1_ZPZ_D(sds_le, zd) - -DO_LD1_ZPZ_D(sds_be, zsu) -DO_LD1_ZPZ_D(sds_be, zss) -DO_LD1_ZPZ_D(sds_be, zd) - -DO_LD1_ZPZ_D(dd_le, zsu) -DO_LD1_ZPZ_D(dd_le, zss) -DO_LD1_ZPZ_D(dd_le, zd) - -DO_LD1_ZPZ_D(dd_be, zsu) -DO_LD1_ZPZ_D(dd_be, zss) -DO_LD1_ZPZ_D(dd_be, zd) +#define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \ +void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} + +DO_LD1_ZPZ_S(bsu, zsu, MO_8) +DO_LD1_ZPZ_S(bsu, zss, MO_8) +DO_LD1_ZPZ_D(bdu, zsu, MO_8) +DO_LD1_ZPZ_D(bdu, zss, MO_8) +DO_LD1_ZPZ_D(bdu, zd, MO_8) + +DO_LD1_ZPZ_S(bss, zsu, MO_8) +DO_LD1_ZPZ_S(bss, zss, MO_8) +DO_LD1_ZPZ_D(bds, zsu, MO_8) +DO_LD1_ZPZ_D(bds, zss, MO_8) +DO_LD1_ZPZ_D(bds, zd, MO_8) + +DO_LD1_ZPZ_S(hsu_le, zsu, MO_16) +DO_LD1_ZPZ_S(hsu_le, zss, MO_16) +DO_LD1_ZPZ_D(hdu_le, zsu, MO_16) +DO_LD1_ZPZ_D(hdu_le, zss, MO_16) +DO_LD1_ZPZ_D(hdu_le, zd, MO_16) + +DO_LD1_ZPZ_S(hsu_be, zsu, MO_16) +DO_LD1_ZPZ_S(hsu_be, zss, MO_16) +DO_LD1_ZPZ_D(hdu_be, zsu, MO_16) +DO_LD1_ZPZ_D(hdu_be, zss, MO_16) +DO_LD1_ZPZ_D(hdu_be, zd, MO_16) + +DO_LD1_ZPZ_S(hss_le, zsu, MO_16) +DO_LD1_ZPZ_S(hss_le, zss, MO_16) +DO_LD1_ZPZ_D(hds_le, zsu, MO_16) +DO_LD1_ZPZ_D(hds_le, zss, MO_16) +DO_LD1_ZPZ_D(hds_le, zd, MO_16) + +DO_LD1_ZPZ_S(hss_be, zsu, MO_16) +DO_LD1_ZPZ_S(hss_be, zss, MO_16) +DO_LD1_ZPZ_D(hds_be, zsu, MO_16) +DO_LD1_ZPZ_D(hds_be, zss, MO_16) +DO_LD1_ZPZ_D(hds_be, zd, MO_16) + +DO_LD1_ZPZ_S(ss_le, zsu, MO_32) +DO_LD1_ZPZ_S(ss_le, zss, MO_32) +DO_LD1_ZPZ_D(sdu_le, zsu, MO_32) +DO_LD1_ZPZ_D(sdu_le, zss, MO_32) +DO_LD1_ZPZ_D(sdu_le, zd, MO_32) + +DO_LD1_ZPZ_S(ss_be, zsu, MO_32) +DO_LD1_ZPZ_S(ss_be, zss, MO_32) +DO_LD1_ZPZ_D(sdu_be, zsu, MO_32) +DO_LD1_ZPZ_D(sdu_be, zss, MO_32) +DO_LD1_ZPZ_D(sdu_be, zd, MO_32) + +DO_LD1_ZPZ_D(sds_le, zsu, MO_32) +DO_LD1_ZPZ_D(sds_le, zss, MO_32) +DO_LD1_ZPZ_D(sds_le, zd, MO_32) + +DO_LD1_ZPZ_D(sds_be, zsu, MO_32) +DO_LD1_ZPZ_D(sds_be, zss, MO_32) +DO_LD1_ZPZ_D(sds_be, zd, MO_32) + +DO_LD1_ZPZ_D(dd_le, zsu, MO_64) +DO_LD1_ZPZ_D(dd_le, zss, MO_64) +DO_LD1_ZPZ_D(dd_le, zd, MO_64) + +DO_LD1_ZPZ_D(dd_be, zsu, MO_64) +DO_LD1_ZPZ_D(dd_be, zss, MO_64) +DO_LD1_ZPZ_D(dd_be, zd, MO_64) #undef DO_LD1_ZPZ_S #undef DO_LD1_ZPZ_D /* First fault loads with a vector index. */ -/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting. - * The controlling predicate is known to be true. Return true if the - * load was successful. - */ -typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off, - target_ulong vaddr, int mmu_idx); - -#ifdef CONFIG_SOFTMMU -#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ -static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, int mmu_idx) \ -{ \ - target_ulong next_page = -(addr | TARGET_PAGE_MASK); \ - if (likely(next_page - addr >= sizeof(TYPEM))) { \ - void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ - if (likely(host)) { \ - TYPEM val = HOST(host); \ - *(TYPEE *)(vd + H(reg_off)) = val; \ - return true; \ - } \ - } \ - return false; \ -} -#else -#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ -static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, int mmu_idx) \ -{ \ - if (likely(page_check_range(addr, sizeof(TYPEM), PAGE_READ))) { \ - TYPEM val = HOST(g2h(addr)); \ - *(TYPEE *)(vd + H(reg_off)) = val; \ - return true; \ - } \ - return false; \ -} -#endif - -DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p) -DO_LD_NF(bss, H1_4, uint32_t, int8_t, ldsb_p) -DO_LD_NF(bdu, , uint64_t, uint8_t, ldub_p) -DO_LD_NF(bds, , uint64_t, int8_t, ldsb_p) - -DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p) -DO_LD_NF(hss_le, H1_4, uint32_t, int16_t, ldsw_le_p) -DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p) -DO_LD_NF(hss_be, H1_4, uint32_t, int16_t, ldsw_be_p) -DO_LD_NF(hdu_le, , uint64_t, uint16_t, lduw_le_p) -DO_LD_NF(hds_le, , uint64_t, int16_t, ldsw_le_p) -DO_LD_NF(hdu_be, , uint64_t, uint16_t, lduw_be_p) -DO_LD_NF(hds_be, , uint64_t, int16_t, ldsw_be_p) - -DO_LD_NF(ss_le, H1_4, uint32_t, uint32_t, ldl_le_p) -DO_LD_NF(ss_be, H1_4, uint32_t, uint32_t, ldl_be_p) -DO_LD_NF(sdu_le, , uint64_t, uint32_t, ldl_le_p) -DO_LD_NF(sds_le, , uint64_t, int32_t, ldl_le_p) -DO_LD_NF(sdu_be, , uint64_t, uint32_t, ldl_be_p) -DO_LD_NF(sds_be, , uint64_t, int32_t, ldl_be_p) - -DO_LD_NF(dd_le, , uint64_t, uint64_t, ldq_le_p) -DO_LD_NF(dd_be, , uint64_t, uint64_t, ldq_be_p) - /* - * Common helper for all gather first-faulting loads. + * Common helpers for all gather first-faulting loads. */ -static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, - sve_ld1_nf_fn *nonfault_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t reg_off, reg_max = simd_oprsz(desc); - target_ulong addr; - - /* Skip to the first true predicate. */ - reg_off = find_next_active(vg, 0, reg_max, MO_32); - if (likely(reg_off < reg_max)) { - /* Perform one normal read, which will fault or not. */ - set_helper_retaddr(ra); - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - tlb_fn(env, vd, reg_off, addr, oi, ra); - - /* The rest of the reads will be non-faulting. */ - clear_helper_retaddr(); - } - /* After any fault, zero the leading predicated false elements. */ - swap_memzero(vd, reg_off); - - while (likely((reg_off += 4) < reg_max)) { - uint64_t pg = *(uint64_t *)(vg + (reg_off >> 6) * 8); - if (likely((pg >> (reg_off & 63)) & 1)) { - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { - record_fault(env, reg_off, reg_max); - break; - } - } else { - *(uint32_t *)(vd + H1_4(reg_off)) = 0; - } - } -} - -static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, - sve_ld1_nf_fn *nonfault_fn) +static inline QEMU_ALWAYS_INLINE +void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + const int esz, const int msz, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t reg_off, reg_max = simd_oprsz(desc); - target_ulong addr; + const int mmu_idx = cpu_mmu_index(env, false); + const intptr_t reg_max = simd_oprsz(desc); + const int scale = simd_data(desc); + const int esize = 1 << esz; + const int msize = 1 << msz; + intptr_t reg_off; + SVEHostPage info; + target_ulong addr, in_page; /* Skip to the first true predicate. */ - reg_off = find_next_active(vg, 0, reg_max, MO_64); - if (likely(reg_off < reg_max)) { - /* Perform one normal read, which will fault or not. */ - set_helper_retaddr(ra); - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - tlb_fn(env, vd, reg_off, addr, oi, ra); - - /* The rest of the reads will be non-faulting. */ - clear_helper_retaddr(); + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off >= reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; } - /* After any fault, zero the leading predicated false elements. */ + /* + * Probe the first element, allowing faults. + */ + addr = base + (off_fn(vm, reg_off) << scale); + tlb_fn(env, vd, reg_off, addr, retaddr); + + /* After any fault, zero the other elements. */ swap_memzero(vd, reg_off); + reg_off += esize; + swap_memzero(vd + reg_off, reg_max - reg_off); - while (likely((reg_off += 8) < reg_max)) { - uint8_t pg = *(uint8_t *)(vg + H1(reg_off >> 3)); - if (likely(pg & 1)) { - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { - record_fault(env, reg_off, reg_max); - break; + /* + * Probe the remaining elements, not allowing faults. + */ + while (reg_off < reg_max) { + uint64_t pg = vg[reg_off >> 6]; + do { + if (likely((pg >> (reg_off & 63)) & 1)) { + addr = base + (off_fn(vm, reg_off) << scale); + in_page = -(addr | TARGET_PAGE_MASK); + + if (unlikely(in_page < msize)) { + /* Stop if the element crosses a page boundary. */ + goto fault; + } + + sve_probe_page(&info, true, env, addr, 0, MMU_DATA_LOAD, + mmu_idx, retaddr); + if (unlikely(info.flags & (TLB_INVALID_MASK | TLB_MMIO))) { + goto fault; + } + if (unlikely(info.flags & TLB_WATCHPOINT) && + (cpu_watchpoint_address_matches + (env_cpu(env), addr, msize) & BP_MEM_READ)) { + goto fault; + } + /* TODO: MTE check. */ + + host_fn(vd, reg_off, info.host); } - } else { - *(uint64_t *)(vd + reg_off) = 0; - } + reg_off += esize; + } while (reg_off & 63); } -} + return; -#define DO_LDFF1_ZPZ_S(MEM, OFS) \ -void HELPER(sve_ldff##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ + fault: + record_fault(env, reg_off, reg_max); } -#define DO_LDFF1_ZPZ_D(MEM, OFS) \ -void HELPER(sve_ldff##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ -} - -DO_LDFF1_ZPZ_S(bsu, zsu) -DO_LDFF1_ZPZ_S(bsu, zss) -DO_LDFF1_ZPZ_D(bdu, zsu) -DO_LDFF1_ZPZ_D(bdu, zss) -DO_LDFF1_ZPZ_D(bdu, zd) - -DO_LDFF1_ZPZ_S(bss, zsu) -DO_LDFF1_ZPZ_S(bss, zss) -DO_LDFF1_ZPZ_D(bds, zsu) -DO_LDFF1_ZPZ_D(bds, zss) -DO_LDFF1_ZPZ_D(bds, zd) - -DO_LDFF1_ZPZ_S(hsu_le, zsu) -DO_LDFF1_ZPZ_S(hsu_le, zss) -DO_LDFF1_ZPZ_D(hdu_le, zsu) -DO_LDFF1_ZPZ_D(hdu_le, zss) -DO_LDFF1_ZPZ_D(hdu_le, zd) - -DO_LDFF1_ZPZ_S(hsu_be, zsu) -DO_LDFF1_ZPZ_S(hsu_be, zss) -DO_LDFF1_ZPZ_D(hdu_be, zsu) -DO_LDFF1_ZPZ_D(hdu_be, zss) -DO_LDFF1_ZPZ_D(hdu_be, zd) - -DO_LDFF1_ZPZ_S(hss_le, zsu) -DO_LDFF1_ZPZ_S(hss_le, zss) -DO_LDFF1_ZPZ_D(hds_le, zsu) -DO_LDFF1_ZPZ_D(hds_le, zss) -DO_LDFF1_ZPZ_D(hds_le, zd) - -DO_LDFF1_ZPZ_S(hss_be, zsu) -DO_LDFF1_ZPZ_S(hss_be, zss) -DO_LDFF1_ZPZ_D(hds_be, zsu) -DO_LDFF1_ZPZ_D(hds_be, zss) -DO_LDFF1_ZPZ_D(hds_be, zd) - -DO_LDFF1_ZPZ_S(ss_le, zsu) -DO_LDFF1_ZPZ_S(ss_le, zss) -DO_LDFF1_ZPZ_D(sdu_le, zsu) -DO_LDFF1_ZPZ_D(sdu_le, zss) -DO_LDFF1_ZPZ_D(sdu_le, zd) - -DO_LDFF1_ZPZ_S(ss_be, zsu) -DO_LDFF1_ZPZ_S(ss_be, zss) -DO_LDFF1_ZPZ_D(sdu_be, zsu) -DO_LDFF1_ZPZ_D(sdu_be, zss) -DO_LDFF1_ZPZ_D(sdu_be, zd) - -DO_LDFF1_ZPZ_D(sds_le, zsu) -DO_LDFF1_ZPZ_D(sds_le, zss) -DO_LDFF1_ZPZ_D(sds_le, zd) - -DO_LDFF1_ZPZ_D(sds_be, zsu) -DO_LDFF1_ZPZ_D(sds_be, zss) -DO_LDFF1_ZPZ_D(sds_be, zd) - -DO_LDFF1_ZPZ_D(dd_le, zsu) -DO_LDFF1_ZPZ_D(dd_le, zss) -DO_LDFF1_ZPZ_D(dd_le, zd) - -DO_LDFF1_ZPZ_D(dd_be, zsu) -DO_LDFF1_ZPZ_D(dd_be, zss) -DO_LDFF1_ZPZ_D(dd_be, zd) +#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \ +void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} + +#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \ +void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} + +DO_LDFF1_ZPZ_S(bsu, zsu, MO_8) +DO_LDFF1_ZPZ_S(bsu, zss, MO_8) +DO_LDFF1_ZPZ_D(bdu, zsu, MO_8) +DO_LDFF1_ZPZ_D(bdu, zss, MO_8) +DO_LDFF1_ZPZ_D(bdu, zd, MO_8) + +DO_LDFF1_ZPZ_S(bss, zsu, MO_8) +DO_LDFF1_ZPZ_S(bss, zss, MO_8) +DO_LDFF1_ZPZ_D(bds, zsu, MO_8) +DO_LDFF1_ZPZ_D(bds, zss, MO_8) +DO_LDFF1_ZPZ_D(bds, zd, MO_8) + +DO_LDFF1_ZPZ_S(hsu_le, zsu, MO_16) +DO_LDFF1_ZPZ_S(hsu_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_le, zsu, MO_16) +DO_LDFF1_ZPZ_D(hdu_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_le, zd, MO_16) + +DO_LDFF1_ZPZ_S(hsu_be, zsu, MO_16) +DO_LDFF1_ZPZ_S(hsu_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_be, zsu, MO_16) +DO_LDFF1_ZPZ_D(hdu_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_be, zd, MO_16) + +DO_LDFF1_ZPZ_S(hss_le, zsu, MO_16) +DO_LDFF1_ZPZ_S(hss_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_le, zsu, MO_16) +DO_LDFF1_ZPZ_D(hds_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_le, zd, MO_16) + +DO_LDFF1_ZPZ_S(hss_be, zsu, MO_16) +DO_LDFF1_ZPZ_S(hss_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_be, zsu, MO_16) +DO_LDFF1_ZPZ_D(hds_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_be, zd, MO_16) + +DO_LDFF1_ZPZ_S(ss_le, zsu, MO_32) +DO_LDFF1_ZPZ_S(ss_le, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_le, zsu, MO_32) +DO_LDFF1_ZPZ_D(sdu_le, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_le, zd, MO_32) + +DO_LDFF1_ZPZ_S(ss_be, zsu, MO_32) +DO_LDFF1_ZPZ_S(ss_be, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_be, zsu, MO_32) +DO_LDFF1_ZPZ_D(sdu_be, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_be, zd, MO_32) + +DO_LDFF1_ZPZ_D(sds_le, zsu, MO_32) +DO_LDFF1_ZPZ_D(sds_le, zss, MO_32) +DO_LDFF1_ZPZ_D(sds_le, zd, MO_32) + +DO_LDFF1_ZPZ_D(sds_be, zsu, MO_32) +DO_LDFF1_ZPZ_D(sds_be, zss, MO_32) +DO_LDFF1_ZPZ_D(sds_be, zd, MO_32) + +DO_LDFF1_ZPZ_D(dd_le, zsu, MO_64) +DO_LDFF1_ZPZ_D(dd_le, zss, MO_64) +DO_LDFF1_ZPZ_D(dd_le, zd, MO_64) + +DO_LDFF1_ZPZ_D(dd_be, zsu, MO_64) +DO_LDFF1_ZPZ_D(dd_be, zss, MO_64) +DO_LDFF1_ZPZ_D(dd_be, zd, MO_64) /* Stores with a vector index. */ -static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE +void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + int esize, int msize, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc); + const int mmu_idx = cpu_mmu_index(env, false); + const intptr_t reg_max = simd_oprsz(desc); + const int scale = simd_data(desc); + void *host[ARM_MAX_VQ * 4]; + intptr_t reg_off, i; + SVEHostPage info, info2; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + /* + * Probe all of the elements for host addresses and flags. + */ + i = reg_off = 0; + do { + uint64_t pg = vg[reg_off >> 6]; do { - if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i); - tlb_fn(env, vd, i, base + (off << scale), oi, ra); + target_ulong addr = base + (off_fn(vm, reg_off) << scale); + target_ulong in_page = -(addr | TARGET_PAGE_MASK); + + host[i] = NULL; + if (likely((pg >> (reg_off & 63)) & 1)) { + if (likely(in_page >= msize)) { + sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE, + mmu_idx, retaddr); + host[i] = info.host; + } else { + /* + * Element crosses the page boundary. + * Probe both pages, but do not record the host address, + * so that we use the slow path. + */ + sve_probe_page(&info, false, env, addr, 0, + MMU_DATA_STORE, mmu_idx, retaddr); + sve_probe_page(&info2, false, env, addr + in_page, 0, + MMU_DATA_STORE, mmu_idx, retaddr); + info.flags |= info2.flags; + } + + if (unlikely(info.flags & TLB_WATCHPOINT)) { + cpu_check_watchpoint(env_cpu(env), addr, msize, + info.attrs, BP_MEM_WRITE, retaddr); + } + /* TODO: MTE check. */ } - i += 4, pg >>= 4; - } while (i & 15); - } - clear_helper_retaddr(); + i += 1; + reg_off += esize; + } while (reg_off & 63); + } while (reg_off < reg_max); + + /* + * Now that we have recognized all exceptions except SyncExternal + * (from TLB_MMIO), which we cannot avoid, perform all of the stores. + * + * Note for the common case of an element in RAM, not crossing a page + * boundary, we have stored the host address in host[]. This doubles + * as a first-level check against the predicate, since only enabled + * elements have non-null host addresses. + */ + i = reg_off = 0; + do { + void *h = host[i]; + if (likely(h != NULL)) { + host_fn(vd, reg_off, h); + } else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) { + target_ulong addr = base + (off_fn(vm, reg_off) << scale); + tlb_fn(env, vd, reg_off, addr, retaddr); + } + i += 1; + reg_off += esize; + } while (reg_off < reg_max); } -static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc) / 8; +#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \ +void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ + off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} - set_helper_retaddr(ra); - for (i = 0; i < oprsz; i++) { - uint8_t pg = *(uint8_t *)(vg + H1(i)); - if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i * 8); - tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra); - } - } - clear_helper_retaddr(); -} - -#define DO_ST1_ZPZ_S(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_s, sve_st1##MEM##_tlb); \ -} - -#define DO_ST1_ZPZ_D(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_d, sve_st1##MEM##_tlb); \ -} - -DO_ST1_ZPZ_S(bs, zsu) -DO_ST1_ZPZ_S(hs_le, zsu) -DO_ST1_ZPZ_S(hs_be, zsu) -DO_ST1_ZPZ_S(ss_le, zsu) -DO_ST1_ZPZ_S(ss_be, zsu) - -DO_ST1_ZPZ_S(bs, zss) -DO_ST1_ZPZ_S(hs_le, zss) -DO_ST1_ZPZ_S(hs_be, zss) -DO_ST1_ZPZ_S(ss_le, zss) -DO_ST1_ZPZ_S(ss_be, zss) - -DO_ST1_ZPZ_D(bd, zsu) -DO_ST1_ZPZ_D(hd_le, zsu) -DO_ST1_ZPZ_D(hd_be, zsu) -DO_ST1_ZPZ_D(sd_le, zsu) -DO_ST1_ZPZ_D(sd_be, zsu) -DO_ST1_ZPZ_D(dd_le, zsu) -DO_ST1_ZPZ_D(dd_be, zsu) - -DO_ST1_ZPZ_D(bd, zss) -DO_ST1_ZPZ_D(hd_le, zss) -DO_ST1_ZPZ_D(hd_be, zss) -DO_ST1_ZPZ_D(sd_le, zss) -DO_ST1_ZPZ_D(sd_be, zss) -DO_ST1_ZPZ_D(dd_le, zss) -DO_ST1_ZPZ_D(dd_be, zss) - -DO_ST1_ZPZ_D(bd, zd) -DO_ST1_ZPZ_D(hd_le, zd) -DO_ST1_ZPZ_D(hd_be, zd) -DO_ST1_ZPZ_D(sd_le, zd) -DO_ST1_ZPZ_D(sd_be, zd) -DO_ST1_ZPZ_D(dd_le, zd) -DO_ST1_ZPZ_D(dd_be, zd) +#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \ +void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} + +DO_ST1_ZPZ_S(bs, zsu, MO_8) +DO_ST1_ZPZ_S(hs_le, zsu, MO_16) +DO_ST1_ZPZ_S(hs_be, zsu, MO_16) +DO_ST1_ZPZ_S(ss_le, zsu, MO_32) +DO_ST1_ZPZ_S(ss_be, zsu, MO_32) + +DO_ST1_ZPZ_S(bs, zss, MO_8) +DO_ST1_ZPZ_S(hs_le, zss, MO_16) +DO_ST1_ZPZ_S(hs_be, zss, MO_16) +DO_ST1_ZPZ_S(ss_le, zss, MO_32) +DO_ST1_ZPZ_S(ss_be, zss, MO_32) + +DO_ST1_ZPZ_D(bd, zsu, MO_8) +DO_ST1_ZPZ_D(hd_le, zsu, MO_16) +DO_ST1_ZPZ_D(hd_be, zsu, MO_16) +DO_ST1_ZPZ_D(sd_le, zsu, MO_32) +DO_ST1_ZPZ_D(sd_be, zsu, MO_32) +DO_ST1_ZPZ_D(dd_le, zsu, MO_64) +DO_ST1_ZPZ_D(dd_be, zsu, MO_64) + +DO_ST1_ZPZ_D(bd, zss, MO_8) +DO_ST1_ZPZ_D(hd_le, zss, MO_16) +DO_ST1_ZPZ_D(hd_be, zss, MO_16) +DO_ST1_ZPZ_D(sd_le, zss, MO_32) +DO_ST1_ZPZ_D(sd_be, zss, MO_32) +DO_ST1_ZPZ_D(dd_le, zss, MO_64) +DO_ST1_ZPZ_D(dd_be, zss, MO_64) + +DO_ST1_ZPZ_D(bd, zd, MO_8) +DO_ST1_ZPZ_D(hd_le, zd, MO_16) +DO_ST1_ZPZ_D(hd_be, zd, MO_16) +DO_ST1_ZPZ_D(sd_le, zd, MO_32) +DO_ST1_ZPZ_D(sd_be, zd, MO_32) +DO_ST1_ZPZ_D(dd_le, zd, MO_64) +DO_ST1_ZPZ_D(dd_be, zd, MO_64) #undef DO_ST1_ZPZ_S #undef DO_ST1_ZPZ_D diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 6c8bda4e4c..ac7b3119e5 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -2044,7 +2044,11 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) unsigned nofs = vec_reg_offset(s, a->rn, index, esz); tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); } else { - tcg_gen_gvec_dup_imm(esz, dofs, vsz, vsz, 0); + /* + * While dup_mem handles 128-bit elements, dup_imm does not. + * Thankfully element size doesn't matter for splatting zero. + */ + tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); } } return true; @@ -3946,42 +3950,30 @@ static bool trans_FCADD(DisasContext *s, arg_FCADD *a) return true; } -typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32); - -static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn) +static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, + gen_helper_gvec_5_ptr *fn) { - if (fn == NULL) { + if (a->esz == 0) { return false; } - if (!sve_access_check(s)) { - return true; + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vec_full_reg_offset(s, a->ra), + pred_full_reg_offset(s, a->pg), + status, vsz, vsz, 0, fn); + tcg_temp_free_ptr(status); } - - unsigned vsz = vec_full_reg_size(s); - unsigned desc; - TCGv_i32 t_desc; - TCGv_ptr pg = tcg_temp_new_ptr(); - - /* We would need 7 operands to pass these arguments "properly". - * So we encode all the register numbers into the descriptor. - */ - desc = deposit32(a->rd, 5, 5, a->rn); - desc = deposit32(desc, 10, 5, a->rm); - desc = deposit32(desc, 15, 5, a->ra); - desc = simd_desc(vsz, vsz, desc); - - t_desc = tcg_const_i32(desc); - tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); - fn(cpu_env, pg, t_desc); - tcg_temp_free_i32(t_desc); - tcg_temp_free_ptr(pg); return true; } #define DO_FMLA(NAME, name) \ static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \ { \ - static gen_helper_sve_fmla * const fns[4] = { \ + static gen_helper_gvec_5_ptr * const fns[4] = { \ NULL, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ }; \ @@ -3997,7 +3989,8 @@ DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a) { - static gen_helper_sve_fmla * const fns[3] = { + static gen_helper_gvec_5_ptr * const fns[4] = { + NULL, gen_helper_sve_fcmla_zpzzz_h, gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, @@ -4008,25 +4001,14 @@ static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a) } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - unsigned desc; - TCGv_i32 t_desc; - TCGv_ptr pg = tcg_temp_new_ptr(); - - /* We would need 7 operands to pass these arguments "properly". - * So we encode all the register numbers into the descriptor. - */ - desc = deposit32(a->rd, 5, 5, a->rn); - desc = deposit32(desc, 10, 5, a->rm); - desc = deposit32(desc, 15, 5, a->ra); - desc = deposit32(desc, 20, 2, a->rot); - desc = sextract32(desc, 0, 22); - desc = simd_desc(vsz, vsz, desc); - - t_desc = tcg_const_i32(desc); - tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); - fns[a->esz - 1](cpu_env, pg, t_desc); - tcg_temp_free_i32(t_desc); - tcg_temp_free_ptr(pg); + TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vec_full_reg_offset(s, a->ra), + pred_full_reg_offset(s, a->pg), + status, vsz, vsz, a->rot, fns[a->esz]); + tcg_temp_free_ptr(status); } return true; } @@ -4582,11 +4564,6 @@ static const uint8_t dtype_esz[16] = { 3, 2, 1, 3 }; -static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype) -{ - return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s)); -} - static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype, gen_helper_gvec_mem *fn) { @@ -4599,9 +4576,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, * registers as pointers, so encode the regno into the data field. * For consistency, do this even for LD1. */ - desc = sve_memopidx(s, dtype); - desc |= zt << MEMOPIDX_SHIFT; - desc = simd_desc(vsz, vsz, desc); + desc = simd_desc(vsz, vsz, zt); t_desc = tcg_const_i32(desc); t_pg = tcg_temp_new_ptr(); @@ -4833,9 +4808,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz) int desc, poff; /* Load the first quadword using the normal predicated load helpers. */ - desc = sve_memopidx(s, msz_dtype(s, msz)); - desc |= zt << MEMOPIDX_SHIFT; - desc = simd_desc(16, 16, desc); + desc = simd_desc(16, 16, zt); t_desc = tcg_const_i32(desc); poff = pred_full_reg_offset(s, pg); @@ -5064,9 +5037,7 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, TCGv_i32 t_desc; int desc; - desc = sve_memopidx(s, msz_dtype(s, msz)); - desc |= scale << MEMOPIDX_SHIFT; - desc = simd_desc(vsz, vsz, desc); + desc = simd_desc(vsz, vsz, scale); t_desc = tcg_const_i32(desc); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); |