diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2020-03-05 16:09:20 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-03-05 16:09:20 +0000 |
commit | 7b182eb2467af6c47c9c77c64bbbeed8ed53c330 (patch) | |
tree | 6e35185494ddc48b3cdcabe4ac6c2ffdc1d3accf | |
parent | d4564afe14708a09684ca417f7dc8e8c328658f5 (diff) |
target/arm: Move helper_dc_zva to helper-a64.c
This is an aarch64-only function. Move it out of the shared file.
This patch is code movement only.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-id: 20200302175829.2183-6-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | target/arm/helper-a64.c | 91 | ||||
-rw-r--r-- | target/arm/helper-a64.h | 1 | ||||
-rw-r--r-- | target/arm/helper.h | 1 | ||||
-rw-r--r-- | target/arm/op_helper.c | 93 |
4 files changed, 92 insertions, 94 deletions
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 123ce50e7a..bc0649a44a 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -18,6 +18,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "cpu.h" #include "exec/gdbstub.h" #include "exec/helper-proto.h" @@ -1109,4 +1110,94 @@ uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) return float16_sqrt(a, s); } +void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) +{ + /* + * Implement DC ZVA, which zeroes a fixed-length block of memory. + * Note that we do not implement the (architecturally mandated) + * alignment fault for attempts to use this on Device memory + * (which matches the usual QEMU behaviour of not implementing either + * alignment faults or any memory attribute handling). + */ + ARMCPU *cpu = env_archcpu(env); + uint64_t blocklen = 4 << cpu->dcz_blocksize; + uint64_t vaddr = vaddr_in & ~(blocklen - 1); + +#ifndef CONFIG_USER_ONLY + { + /* + * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than + * the block size so we might have to do more than one TLB lookup. + * We know that in fact for any v8 CPU the page size is at least 4K + * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only + * 1K as an artefact of legacy v5 subpage support being present in the + * same QEMU executable. So in practice the hostaddr[] array has + * two entries, given the current setting of TARGET_PAGE_BITS_MIN. + */ + int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); + void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; + int try, i; + unsigned mmu_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + + assert(maxidx <= ARRAY_SIZE(hostaddr)); + + for (try = 0; try < 2; try++) { + + for (i = 0; i < maxidx; i++) { + hostaddr[i] = tlb_vaddr_to_host(env, + vaddr + TARGET_PAGE_SIZE * i, + 1, mmu_idx); + if (!hostaddr[i]) { + break; + } + } + if (i == maxidx) { + /* + * If it's all in the TLB it's fair game for just writing to; + * we know we don't need to update dirty status, etc. + */ + for (i = 0; i < maxidx - 1; i++) { + memset(hostaddr[i], 0, TARGET_PAGE_SIZE); + } + memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); + return; + } + /* + * OK, try a store and see if we can populate the tlb. This + * might cause an exception if the memory isn't writable, + * in which case we will longjmp out of here. We must for + * this purpose use the actual register value passed to us + * so that we get the fault address right. + */ + helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); + /* Now we can populate the other TLB entries, if any */ + for (i = 0; i < maxidx; i++) { + uint64_t va = vaddr + TARGET_PAGE_SIZE * i; + if (va != (vaddr_in & TARGET_PAGE_MASK)) { + helper_ret_stb_mmu(env, va, 0, oi, GETPC()); + } + } + } + + /* + * Slow path (probably attempt to do this to an I/O device or + * similar, or clearing of a block of code we have translations + * cached for). Just do a series of byte writes as the architecture + * demands. It's not worth trying to use a cpu_physical_memory_map(), + * memset(), unmap() sequence here because: + * + we'd need to account for the blocksize being larger than a page + * + the direct-RAM access case is almost always going to be dealt + * with in the fastpath code above, so there's no speed benefit + * + we would have to deal with the map returning NULL because the + * bounce buffer was in use + */ + for (i = 0; i < blocklen; i++) { + helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); + } + } +#else + memset(g2h(vaddr), 0, blocklen); +#endif +} diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index a915c1247f..b1a5935f61 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -90,6 +90,7 @@ DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr) DEF_HELPER_2(sqrt_f16, f16, f16, ptr) DEF_HELPER_2(exception_return, void, env, i64) +DEF_HELPER_2(dc_zva, void, env, i64) DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(pacib, TCG_CALL_NO_WG, i64, env, i64, i64) diff --git a/target/arm/helper.h b/target/arm/helper.h index fcbf504121..72eb9e6a1a 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -559,7 +559,6 @@ DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) -DEF_HELPER_2(dc_zva, void, env, i64) DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index af3020b78f..eb0de080f1 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -17,7 +17,6 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" -#include "qemu/units.h" #include "qemu/log.h" #include "qemu/main-loop.h" #include "cpu.h" @@ -936,95 +935,3 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) return ((uint32_t)x >> shift) | (x << (32 - shift)); } } - -void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) -{ - /* - * Implement DC ZVA, which zeroes a fixed-length block of memory. - * Note that we do not implement the (architecturally mandated) - * alignment fault for attempts to use this on Device memory - * (which matches the usual QEMU behaviour of not implementing either - * alignment faults or any memory attribute handling). - */ - - ARMCPU *cpu = env_archcpu(env); - uint64_t blocklen = 4 << cpu->dcz_blocksize; - uint64_t vaddr = vaddr_in & ~(blocklen - 1); - -#ifndef CONFIG_USER_ONLY - { - /* - * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than - * the block size so we might have to do more than one TLB lookup. - * We know that in fact for any v8 CPU the page size is at least 4K - * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only - * 1K as an artefact of legacy v5 subpage support being present in the - * same QEMU executable. So in practice the hostaddr[] array has - * two entries, given the current setting of TARGET_PAGE_BITS_MIN. - */ - int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); - void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; - int try, i; - unsigned mmu_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); - - assert(maxidx <= ARRAY_SIZE(hostaddr)); - - for (try = 0; try < 2; try++) { - - for (i = 0; i < maxidx; i++) { - hostaddr[i] = tlb_vaddr_to_host(env, - vaddr + TARGET_PAGE_SIZE * i, - 1, mmu_idx); - if (!hostaddr[i]) { - break; - } - } - if (i == maxidx) { - /* - * If it's all in the TLB it's fair game for just writing to; - * we know we don't need to update dirty status, etc. - */ - for (i = 0; i < maxidx - 1; i++) { - memset(hostaddr[i], 0, TARGET_PAGE_SIZE); - } - memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); - return; - } - /* - * OK, try a store and see if we can populate the tlb. This - * might cause an exception if the memory isn't writable, - * in which case we will longjmp out of here. We must for - * this purpose use the actual register value passed to us - * so that we get the fault address right. - */ - helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); - /* Now we can populate the other TLB entries, if any */ - for (i = 0; i < maxidx; i++) { - uint64_t va = vaddr + TARGET_PAGE_SIZE * i; - if (va != (vaddr_in & TARGET_PAGE_MASK)) { - helper_ret_stb_mmu(env, va, 0, oi, GETPC()); - } - } - } - - /* - * Slow path (probably attempt to do this to an I/O device or - * similar, or clearing of a block of code we have translations - * cached for). Just do a series of byte writes as the architecture - * demands. It's not worth trying to use a cpu_physical_memory_map(), - * memset(), unmap() sequence here because: - * + we'd need to account for the blocksize being larger than a page - * + the direct-RAM access case is almost always going to be dealt - * with in the fastpath code above, so there's no speed benefit - * + we would have to deal with the map returning NULL because the - * bounce buffer was in use - */ - for (i = 0; i < blocklen; i++) { - helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); - } - } -#else - memset(g2h(vaddr), 0, blocklen); -#endif -} |