target/arm: Move the DC ZVA helper into op_helper

Those helpers are a software implementation of the ARM v8 memory zeroing op code. They should be moved to the op helper file, which is going to eventually be built only when TCG is enabled. Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Robert Bradford <robert.bradford@intel.com> Signed-off-by: Samuel Ortiz <sameo@linux.intel.com> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Message-id: 20190701132516.26392-10-philmd@redhat.com [PMD: Rebased] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Samuel Ortiz <sameo@linux.intel.com> 2019-07-01 17:26:20 +0100
committer: Peter Maydell <peter.maydell@linaro.org> 2019-07-01 17:29:00 +0100
commit: 6cdca173ef81a9dbcee9e142f1a5a34ad9c44b75 (patch)
tree: 13c104903986c44ff83a93256c378c4722b89527 /target/arm/op_helper.c
parent: 9798ac7162c8a720c5d28f4d1fc9e03c7ab4f015 (diff)
1 files changed, 93 insertions, 0 deletions
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index b1952486c6..7c835d3ce7 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -17,6 +17,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
@@ -1325,3 +1326,95 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
         return ((uint32_t)x >> shift) | (x << (32 - shift));
     }
 }
+
+void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
+{
+    /*
+     * Implement DC ZVA, which zeroes a fixed-length block of memory.
+     * Note that we do not implement the (architecturally mandated)
+     * alignment fault for attempts to use this on Device memory
+     * (which matches the usual QEMU behaviour of not implementing either
+     * alignment faults or any memory attribute handling).
+     */
+
+    ARMCPU *cpu = env_archcpu(env);
+    uint64_t blocklen = 4 << cpu->dcz_blocksize;
+    uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+
+#ifndef CONFIG_USER_ONLY
+    {
+        /*
+         * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
+         * the block size so we might have to do more than one TLB lookup.
+         * We know that in fact for any v8 CPU the page size is at least 4K
+         * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
+         * 1K as an artefact of legacy v5 subpage support being present in the
+         * same QEMU executable. So in practice the hostaddr[] array has
+         * two entries, given the current setting of TARGET_PAGE_BITS_MIN.
+         */
+        int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
+        void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
+        int try, i;
+        unsigned mmu_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+
+        assert(maxidx <= ARRAY_SIZE(hostaddr));
+
+        for (try = 0; try < 2; try++) {
+
+            for (i = 0; i < maxidx; i++) {
+                hostaddr[i] = tlb_vaddr_to_host(env,
+                                                vaddr + TARGET_PAGE_SIZE * i,
+                                                1, mmu_idx);
+                if (!hostaddr[i]) {
+                    break;
+                }
+            }
+            if (i == maxidx) {
+                /*
+                 * If it's all in the TLB it's fair game for just writing to;
+                 * we know we don't need to update dirty status, etc.
+                 */
+                for (i = 0; i < maxidx - 1; i++) {
+                    memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
+                }
+                memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
+                return;
+            }
+            /*
+             * OK, try a store and see if we can populate the tlb. This
+             * might cause an exception if the memory isn't writable,
+             * in which case we will longjmp out of here. We must for
+             * this purpose use the actual register value passed to us
+             * so that we get the fault address right.
+             */
+            helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
+            /* Now we can populate the other TLB entries, if any */
+            for (i = 0; i < maxidx; i++) {
+                uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
+                if (va != (vaddr_in & TARGET_PAGE_MASK)) {
+                    helper_ret_stb_mmu(env, va, 0, oi, GETPC());
+                }
+            }
+        }
+
+        /*
+         * Slow path (probably attempt to do this to an I/O device or
+         * similar, or clearing of a block of code we have translations
+         * cached for). Just do a series of byte writes as the architecture
+         * demands. It's not worth trying to use a cpu_physical_memory_map(),
+         * memset(), unmap() sequence here because:
+         *  + we'd need to account for the blocksize being larger than a page
+         *  + the direct-RAM access case is almost always going to be dealt
+         *    with in the fastpath code above, so there's no speed benefit
+         *  + we would have to deal with the map returning NULL because the
+         *    bounce buffer was in use
+         */
+        for (i = 0; i < blocklen; i++) {
+            helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
+        }
+    }
+#else
+    memset(g2h(vaddr), 0, blocklen);
+#endif
+}
author	Samuel Ortiz <sameo@linux.intel.com>	2019-07-01 17:26:20 +0100
committer	Peter Maydell <peter.maydell@linaro.org>	2019-07-01 17:29:00 +0100
commit	6cdca173ef81a9dbcee9e142f1a5a34ad9c44b75 (patch)
tree	13c104903986c44ff83a93256c378c4722b89527 /target/arm/op_helper.c
parent	9798ac7162c8a720c5d28f4d1fc9e03c7ab4f015 (diff)