aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2020-03-05 16:09:20 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-03-05 16:09:20 +0000
commit7b182eb2467af6c47c9c77c64bbbeed8ed53c330 (patch)
tree6e35185494ddc48b3cdcabe4ac6c2ffdc1d3accf
parentd4564afe14708a09684ca417f7dc8e8c328658f5 (diff)
target/arm: Move helper_dc_zva to helper-a64.c
This is an aarch64-only function. Move it out of the shared file. This patch is code movement only. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Message-id: 20200302175829.2183-6-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--target/arm/helper-a64.c91
-rw-r--r--target/arm/helper-a64.h1
-rw-r--r--target/arm/helper.h1
-rw-r--r--target/arm/op_helper.c93
4 files changed, 92 insertions, 94 deletions
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 123ce50e7a..bc0649a44a 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -18,6 +18,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/units.h"
#include "cpu.h"
#include "exec/gdbstub.h"
#include "exec/helper-proto.h"
@@ -1109,4 +1110,94 @@ uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
return float16_sqrt(a, s);
}
+void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
+{
+ /*
+ * Implement DC ZVA, which zeroes a fixed-length block of memory.
+ * Note that we do not implement the (architecturally mandated)
+ * alignment fault for attempts to use this on Device memory
+ * (which matches the usual QEMU behaviour of not implementing either
+ * alignment faults or any memory attribute handling).
+ */
+ ARMCPU *cpu = env_archcpu(env);
+ uint64_t blocklen = 4 << cpu->dcz_blocksize;
+ uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+
+#ifndef CONFIG_USER_ONLY
+ {
+ /*
+ * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
+ * the block size so we might have to do more than one TLB lookup.
+ * We know that in fact for any v8 CPU the page size is at least 4K
+ * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
+ * 1K as an artefact of legacy v5 subpage support being present in the
+ * same QEMU executable. So in practice the hostaddr[] array has
+ * two entries, given the current setting of TARGET_PAGE_BITS_MIN.
+ */
+ int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
+ void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
+ int try, i;
+ unsigned mmu_idx = cpu_mmu_index(env, false);
+ TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+
+ assert(maxidx <= ARRAY_SIZE(hostaddr));
+
+ for (try = 0; try < 2; try++) {
+
+ for (i = 0; i < maxidx; i++) {
+ hostaddr[i] = tlb_vaddr_to_host(env,
+ vaddr + TARGET_PAGE_SIZE * i,
+ 1, mmu_idx);
+ if (!hostaddr[i]) {
+ break;
+ }
+ }
+ if (i == maxidx) {
+ /*
+ * If it's all in the TLB it's fair game for just writing to;
+ * we know we don't need to update dirty status, etc.
+ */
+ for (i = 0; i < maxidx - 1; i++) {
+ memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
+ }
+ memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
+ return;
+ }
+ /*
+ * OK, try a store and see if we can populate the tlb. This
+ * might cause an exception if the memory isn't writable,
+ * in which case we will longjmp out of here. We must for
+ * this purpose use the actual register value passed to us
+ * so that we get the fault address right.
+ */
+ helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
+ /* Now we can populate the other TLB entries, if any */
+ for (i = 0; i < maxidx; i++) {
+ uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
+ if (va != (vaddr_in & TARGET_PAGE_MASK)) {
+ helper_ret_stb_mmu(env, va, 0, oi, GETPC());
+ }
+ }
+ }
+
+ /*
+ * Slow path (probably attempt to do this to an I/O device or
+ * similar, or clearing of a block of code we have translations
+ * cached for). Just do a series of byte writes as the architecture
+ * demands. It's not worth trying to use a cpu_physical_memory_map(),
+ * memset(), unmap() sequence here because:
+ * + we'd need to account for the blocksize being larger than a page
+ * + the direct-RAM access case is almost always going to be dealt
+ * with in the fastpath code above, so there's no speed benefit
+ * + we would have to deal with the map returning NULL because the
+ * bounce buffer was in use
+ */
+ for (i = 0; i < blocklen; i++) {
+ helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
+ }
+ }
+#else
+ memset(g2h(vaddr), 0, blocklen);
+#endif
+}
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index a915c1247f..b1a5935f61 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -90,6 +90,7 @@ DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr)
DEF_HELPER_2(sqrt_f16, f16, f16, ptr)
DEF_HELPER_2(exception_return, void, env, i64)
+DEF_HELPER_2(dc_zva, void, env, i64)
DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64)
DEF_HELPER_FLAGS_3(pacib, TCG_CALL_NO_WG, i64, env, i64, i64)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index fcbf504121..72eb9e6a1a 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -559,7 +559,6 @@ DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
-DEF_HELPER_2(dc_zva, void, env, i64)
DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index af3020b78f..eb0de080f1 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -17,7 +17,6 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
-#include "qemu/units.h"
#include "qemu/log.h"
#include "qemu/main-loop.h"
#include "cpu.h"
@@ -936,95 +935,3 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
return ((uint32_t)x >> shift) | (x << (32 - shift));
}
}
-
-void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
-{
- /*
- * Implement DC ZVA, which zeroes a fixed-length block of memory.
- * Note that we do not implement the (architecturally mandated)
- * alignment fault for attempts to use this on Device memory
- * (which matches the usual QEMU behaviour of not implementing either
- * alignment faults or any memory attribute handling).
- */
-
- ARMCPU *cpu = env_archcpu(env);
- uint64_t blocklen = 4 << cpu->dcz_blocksize;
- uint64_t vaddr = vaddr_in & ~(blocklen - 1);
-
-#ifndef CONFIG_USER_ONLY
- {
- /*
- * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
- * the block size so we might have to do more than one TLB lookup.
- * We know that in fact for any v8 CPU the page size is at least 4K
- * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
- * 1K as an artefact of legacy v5 subpage support being present in the
- * same QEMU executable. So in practice the hostaddr[] array has
- * two entries, given the current setting of TARGET_PAGE_BITS_MIN.
- */
- int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
- void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
- int try, i;
- unsigned mmu_idx = cpu_mmu_index(env, false);
- TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
-
- assert(maxidx <= ARRAY_SIZE(hostaddr));
-
- for (try = 0; try < 2; try++) {
-
- for (i = 0; i < maxidx; i++) {
- hostaddr[i] = tlb_vaddr_to_host(env,
- vaddr + TARGET_PAGE_SIZE * i,
- 1, mmu_idx);
- if (!hostaddr[i]) {
- break;
- }
- }
- if (i == maxidx) {
- /*
- * If it's all in the TLB it's fair game for just writing to;
- * we know we don't need to update dirty status, etc.
- */
- for (i = 0; i < maxidx - 1; i++) {
- memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
- }
- memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
- return;
- }
- /*
- * OK, try a store and see if we can populate the tlb. This
- * might cause an exception if the memory isn't writable,
- * in which case we will longjmp out of here. We must for
- * this purpose use the actual register value passed to us
- * so that we get the fault address right.
- */
- helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
- /* Now we can populate the other TLB entries, if any */
- for (i = 0; i < maxidx; i++) {
- uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
- if (va != (vaddr_in & TARGET_PAGE_MASK)) {
- helper_ret_stb_mmu(env, va, 0, oi, GETPC());
- }
- }
- }
-
- /*
- * Slow path (probably attempt to do this to an I/O device or
- * similar, or clearing of a block of code we have translations
- * cached for). Just do a series of byte writes as the architecture
- * demands. It's not worth trying to use a cpu_physical_memory_map(),
- * memset(), unmap() sequence here because:
- * + we'd need to account for the blocksize being larger than a page
- * + the direct-RAM access case is almost always going to be dealt
- * with in the fastpath code above, so there's no speed benefit
- * + we would have to deal with the map returning NULL because the
- * bounce buffer was in use
- */
- for (i = 0; i < blocklen; i++) {
- helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
- }
- }
-#else
- memset(g2h(vaddr), 0, blocklen);
-#endif
-}