diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2022-06-21 13:47:20 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2022-06-21 13:47:20 -0700 |
commit | f200ff158d5abcb974a6b597a962b6b2fbea2b06 (patch) | |
tree | 0bad1924d9b2c6ccb7c816a530fe7d5ee76c97b0 /util | |
parent | 5cdcfd861e3cdb98d3239ba78c97a1a2b13d2a70 (diff) | |
parent | c79a8e840c435bc26a251e34b043318e8b2081db (diff) |
Merge tag 'pull-tcg-20220621' of https://gitlab.com/rth7680/qemu into staging
Speed empty timer list in qemu_clock_deadline_ns_all.
Implement remainder for Power3.1 hosts.
Optimize ppc host icache flushing.
Cleanups to tcg_accel_ops_init.
Fix mmio crash accessing unmapped physical memory.
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmKyLesdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8O1wf5AW6JeeUTs2r3owsK
# UpVaRqjlLpNeuktoOQoG8lbVzm1ulEv7zgXYJTZg4cc/83WQZ2G8WzTj3W+Qr/S9
# ECRd73Kou+fK3jTo8I+wPLQjLjkIV4xSABMGz/onxhoAeyS+xcAI4qGuSGrtIg2r
# sQ61V4fWCwvQJdHMyG756Xsh8Xjf18mrNQZ5PLGkyn/e9UIAc4KH6FsgWJdinGEs
# V/oibY20kCXpLxN0ajNmx3x4/NFs/ymMtn1z9fdhVGjAVPY0N6YsxjsGqd/WP/5U
# ui/x0wAhl/VNK2M2+z3hVGfNlMpkzTVG2A3ndD+tYI3nofwTYb/UiakhID7ZX1cQ
# yKDyAw==
# =3Rhw
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 21 Jun 2022 01:45:31 PM PDT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]
* tag 'pull-tcg-20220621' of https://gitlab.com/rth7680/qemu:
util/cacheflush: Optimize flushing when ppc host has coherent icache
util/cacheflush: Merge aarch64 ctr_el0 usage
util: Merge cacheflush.c and cacheinfo.c
softmmu: Always initialize xlat in address_space_translate_for_iotlb
qemu-timer: Skip empty timer lists before locking in qemu_clock_deadline_ns_all
accel/tcg: Reorganize tcg_accel_ops_init()
accel/tcg: Init TCG cflags in vCPU thread handler
target/avr: Drop avr_cpu_memory_rw_debug()
tcg/ppc: implement rem[u]_i{32,64} with mod[su][wd]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'util')
-rw-r--r-- | util/cacheflush.c | 247 | ||||
-rw-r--r-- | util/cacheinfo.c | 200 | ||||
-rw-r--r-- | util/meson.build | 2 | ||||
-rw-r--r-- | util/qemu-timer.c | 3 |
4 files changed, 235 insertions, 217 deletions
diff --git a/util/cacheflush.c b/util/cacheflush.c index 4b57186d89..2c2c73e085 100644 --- a/util/cacheflush.c +++ b/util/cacheflush.c @@ -1,5 +1,5 @@ /* - * Flush the host cpu caches. + * Info about, and flushing the host cpu caches. * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -9,8 +9,218 @@ #include "qemu/cacheflush.h" #include "qemu/cacheinfo.h" #include "qemu/bitops.h" +#include "qemu/host-utils.h" +#include "qemu/atomic.h" +int qemu_icache_linesize = 0; +int qemu_icache_linesize_log; +int qemu_dcache_linesize = 0; +int qemu_dcache_linesize_log; + +/* + * Operating system specific cache detection mechanisms. + */ + +#if defined(_WIN32) + +static void sys_cache_info(int *isize, int *dsize) +{ + SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; + DWORD size = 0; + BOOL success; + size_t i, n; + + /* + * Check for the required buffer size first. Note that if the zero + * size we use for the probe results in success, then there is no + * data available; fail in that case. + */ + success = GetLogicalProcessorInformation(0, &size); + if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return; + } + + n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); + if (!GetLogicalProcessorInformation(buf, &size)) { + goto fail; + } + + for (i = 0; i < n; i++) { + if (buf[i].Relationship == RelationCache + && buf[i].Cache.Level == 1) { + switch (buf[i].Cache.Type) { + case CacheUnified: + *isize = *dsize = buf[i].Cache.LineSize; + break; + case CacheInstruction: + *isize = buf[i].Cache.LineSize; + break; + case CacheData: + *dsize = buf[i].Cache.LineSize; + break; + default: + break; + } + } + } + fail: + g_free(buf); +} + +#elif defined(CONFIG_DARWIN) +# include <sys/sysctl.h> +static void sys_cache_info(int *isize, int *dsize) +{ + /* There's only a single sysctl for both I/D cache line sizes. */ + long size; + size_t len = sizeof(size); + if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { + *isize = *dsize = size; + } +} +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +# include <sys/sysctl.h> +static void sys_cache_info(int *isize, int *dsize) +{ + /* There's only a single sysctl for both I/D cache line sizes. */ + int size; + size_t len = sizeof(size); + if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { + *isize = *dsize = size; + } +} +#else +/* POSIX */ + +static void sys_cache_info(int *isize, int *dsize) +{ +# ifdef _SC_LEVEL1_ICACHE_LINESIZE + int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); + if (tmp_isize > 0) { + *isize = tmp_isize; + } +# endif +# ifdef _SC_LEVEL1_DCACHE_LINESIZE + int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + if (tmp_dsize > 0) { + *dsize = tmp_dsize; + } +# endif +} +#endif /* sys_cache_info */ + + +/* + * Architecture (+ OS) specific cache detection mechanisms. + */ + +#if defined(__powerpc__) +static bool have_coherent_icache; +#endif + +#if defined(__aarch64__) && !defined(CONFIG_DARWIN) +/* Apple does not expose CTR_EL0, so we must use system interfaces. */ +static uint64_t save_ctr_el0; +static void arch_cache_info(int *isize, int *dsize) +{ + uint64_t ctr; + + /* + * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, + * but (at least under Linux) these are marked protected by the + * kernel. However, CTR_EL0 contains the minimum linesize in the + * entire hierarchy, and is used by userspace cache flushing. + * + * We will also use this value in flush_idcache_range. + */ + asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); + save_ctr_el0 = ctr; + + if (*isize == 0 || *dsize == 0) { + if (*isize == 0) { + *isize = 4 << (ctr & 0xf); + } + if (*dsize == 0) { + *dsize = 4 << ((ctr >> 16) & 0xf); + } + } +} + +#elif defined(_ARCH_PPC) && defined(__linux__) +# include "elf.h" + +static void arch_cache_info(int *isize, int *dsize) +{ + if (*isize == 0) { + *isize = qemu_getauxval(AT_ICACHEBSIZE); + } + if (*dsize == 0) { + *dsize = qemu_getauxval(AT_DCACHEBSIZE); + } + have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP; +} + +#else +static void arch_cache_info(int *isize, int *dsize) { } +#endif /* arch_cache_info */ + +/* + * ... and if all else fails ... + */ + +static void fallback_cache_info(int *isize, int *dsize) +{ + /* If we can only find one of the two, assume they're the same. */ + if (*isize) { + if (*dsize) { + /* Success! */ + } else { + *dsize = *isize; + } + } else if (*dsize) { + *isize = *dsize; + } else { +#if defined(_ARCH_PPC) + /* + * For PPC, we're going to use the cache sizes computed for + * flush_idcache_range. Which means that we must use the + * architecture minimum. + */ + *isize = *dsize = 16; +#else + /* Otherwise, 64 bytes is not uncommon. */ + *isize = *dsize = 64; +#endif + } +} + +static void __attribute__((constructor)) init_cache_info(void) +{ + int isize = 0, dsize = 0; + + sys_cache_info(&isize, &dsize); + arch_cache_info(&isize, &dsize); + fallback_cache_info(&isize, &dsize); + + assert((isize & (isize - 1)) == 0); + assert((dsize & (dsize - 1)) == 0); + + qemu_icache_linesize = isize; + qemu_icache_linesize_log = ctz32(isize); + qemu_dcache_linesize = dsize; + qemu_dcache_linesize_log = ctz32(dsize); + + qatomic64_init(); +} + + +/* + * Architecture (+ OS) specific cache flushing mechanisms. + */ + #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) /* Caches are coherent and do not require flushing; symbol inline. */ @@ -29,17 +239,6 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) #else /* - * TODO: unify this with cacheinfo.c. - * We want to save the whole contents of CTR_EL0, so that we - * have more than the linesize, but also IDC and DIC. - */ -static uint64_t save_ctr_el0; -static void __attribute__((constructor)) init_ctr_el0(void) -{ - asm volatile("mrs\t%0, ctr_el0" : "=r"(save_ctr_el0)); -} - -/* * This is a copy of gcc's __aarch64_sync_cache_range, modified * to fit this three-operand interface. */ @@ -48,8 +247,8 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) const unsigned CTR_IDC = 1u << 28; const unsigned CTR_DIC = 1u << 29; const uint64_t ctr_el0 = save_ctr_el0; - const uintptr_t icache_lsize = 4 << extract64(ctr_el0, 0, 4); - const uintptr_t dcache_lsize = 4 << extract64(ctr_el0, 16, 4); + const uintptr_t icache_lsize = qemu_icache_linesize; + const uintptr_t dcache_lsize = qemu_dcache_linesize; uintptr_t p; /* @@ -104,8 +303,24 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { uintptr_t p, b, e; - size_t dsize = qemu_dcache_linesize; - size_t isize = qemu_icache_linesize; + size_t dsize, isize; + + /* + * Some processors have coherent caches and support a simplified + * flushing procedure. See + * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) + * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k + */ + if (have_coherent_icache) { + asm volatile ("sync\n\t" + "icbi 0,%0\n\t" + "isync" + : : "r"(rx) : "memory"); + return; + } + + dsize = qemu_dcache_linesize; + isize = qemu_icache_linesize; b = rw & ~(dsize - 1); e = (rw + len + dsize - 1) & ~(dsize - 1); diff --git a/util/cacheinfo.c b/util/cacheinfo.c deleted file mode 100644 index ab1644d490..0000000000 --- a/util/cacheinfo.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * cacheinfo.c - helpers to query the host about its caches - * - * Copyright (C) 2017, Emilio G. Cota <cota@braap.org> - * License: GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "qemu/host-utils.h" -#include "qemu/atomic.h" -#include "qemu/cacheinfo.h" - -int qemu_icache_linesize = 0; -int qemu_icache_linesize_log; -int qemu_dcache_linesize = 0; -int qemu_dcache_linesize_log; - -/* - * Operating system specific detection mechanisms. - */ - -#if defined(_WIN32) - -static void sys_cache_info(int *isize, int *dsize) -{ - SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; - DWORD size = 0; - BOOL success; - size_t i, n; - - /* Check for the required buffer size first. Note that if the zero - size we use for the probe results in success, then there is no - data available; fail in that case. */ - success = GetLogicalProcessorInformation(0, &size); - if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { - return; - } - - n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); - size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); - buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); - if (!GetLogicalProcessorInformation(buf, &size)) { - goto fail; - } - - for (i = 0; i < n; i++) { - if (buf[i].Relationship == RelationCache - && buf[i].Cache.Level == 1) { - switch (buf[i].Cache.Type) { - case CacheUnified: - *isize = *dsize = buf[i].Cache.LineSize; - break; - case CacheInstruction: - *isize = buf[i].Cache.LineSize; - break; - case CacheData: - *dsize = buf[i].Cache.LineSize; - break; - default: - break; - } - } - } - fail: - g_free(buf); -} - -#elif defined(__APPLE__) -# include <sys/sysctl.h> -static void sys_cache_info(int *isize, int *dsize) -{ - /* There's only a single sysctl for both I/D cache line sizes. */ - long size; - size_t len = sizeof(size); - if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { - *isize = *dsize = size; - } -} -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -# include <sys/sysctl.h> -static void sys_cache_info(int *isize, int *dsize) -{ - /* There's only a single sysctl for both I/D cache line sizes. */ - int size; - size_t len = sizeof(size); - if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { - *isize = *dsize = size; - } -} -#else -/* POSIX */ - -static void sys_cache_info(int *isize, int *dsize) -{ -# ifdef _SC_LEVEL1_ICACHE_LINESIZE - int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); - if (tmp_isize > 0) { - *isize = tmp_isize; - } -# endif -# ifdef _SC_LEVEL1_DCACHE_LINESIZE - int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); - if (tmp_dsize > 0) { - *dsize = tmp_dsize; - } -# endif -} -#endif /* sys_cache_info */ - -/* - * Architecture (+ OS) specific detection mechanisms. - */ - -#if defined(__aarch64__) - -static void arch_cache_info(int *isize, int *dsize) -{ - if (*isize == 0 || *dsize == 0) { - uint64_t ctr; - - /* The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, - but (at least under Linux) these are marked protected by the - kernel. However, CTR_EL0 contains the minimum linesize in the - entire hierarchy, and is used by userspace cache flushing. */ - asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); - if (*isize == 0) { - *isize = 4 << (ctr & 0xf); - } - if (*dsize == 0) { - *dsize = 4 << ((ctr >> 16) & 0xf); - } - } -} - -#elif defined(_ARCH_PPC) && defined(__linux__) -# include "elf.h" - -static void arch_cache_info(int *isize, int *dsize) -{ - if (*isize == 0) { - *isize = qemu_getauxval(AT_ICACHEBSIZE); - } - if (*dsize == 0) { - *dsize = qemu_getauxval(AT_DCACHEBSIZE); - } -} - -#else -static void arch_cache_info(int *isize, int *dsize) { } -#endif /* arch_cache_info */ - -/* - * ... and if all else fails ... - */ - -static void fallback_cache_info(int *isize, int *dsize) -{ - /* If we can only find one of the two, assume they're the same. */ - if (*isize) { - if (*dsize) { - /* Success! */ - } else { - *dsize = *isize; - } - } else if (*dsize) { - *isize = *dsize; - } else { -#if defined(_ARCH_PPC) - /* - * For PPC, we're going to use the cache sizes computed for - * flush_idcache_range. Which means that we must use the - * architecture minimum. - */ - *isize = *dsize = 16; -#else - /* Otherwise, 64 bytes is not uncommon. */ - *isize = *dsize = 64; -#endif - } -} - -static void __attribute__((constructor)) init_cache_info(void) -{ - int isize = 0, dsize = 0; - - sys_cache_info(&isize, &dsize); - arch_cache_info(&isize, &dsize); - fallback_cache_info(&isize, &dsize); - - assert((isize & (isize - 1)) == 0); - assert((dsize & (dsize - 1)) == 0); - - qemu_icache_linesize = isize; - qemu_icache_linesize_log = ctz32(isize); - qemu_dcache_linesize = dsize; - qemu_dcache_linesize_log = ctz32(dsize); - - qatomic64_init(); -} diff --git a/util/meson.build b/util/meson.build index 8f16018cd4..4939b0b91c 100644 --- a/util/meson.build +++ b/util/meson.build @@ -27,7 +27,7 @@ util_ss.add(files('envlist.c', 'path.c', 'module.c')) util_ss.add(files('host-utils.c')) util_ss.add(files('bitmap.c', 'bitops.c')) util_ss.add(files('fifo8.c')) -util_ss.add(files('cacheinfo.c', 'cacheflush.c')) +util_ss.add(files('cacheflush.c')) util_ss.add(files('error.c', 'error-report.c')) util_ss.add(files('qemu-print.c')) util_ss.add(files('id.c')) diff --git a/util/qemu-timer.c b/util/qemu-timer.c index a670a57881..6a0de33dd2 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -261,6 +261,9 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) } QLIST_FOREACH(timer_list, &clock->timerlists, list) { + if (!qatomic_read(&timer_list->active_timers)) { + continue; + } qemu_mutex_lock(&timer_list->active_timers_lock); ts = timer_list->active_timers; /* Skip all external timers */ |