diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-10-30 14:10:32 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-10-30 14:10:32 +0000 |
commit | 68d8ef4ec540682c3538d4963e836e43a211dd17 (patch) | |
tree | adb6ef5cec791cdc355280c1564e33d227472567 /accel/tcg | |
parent | 62a23835b7c9019ae502915d5990e150349d5114 (diff) | |
parent | 19633df89bfc609569bb693e2e33eb1a68d35e0e (diff) |
Merge remote-tracking branch 'remotes/stsquad/tags/pull-tcg-plugins-281019-4' into staging
TCG Plugins initial implementation
- use --enable-plugins @ configure
- low impact introspection (-plugin empty.so to measure overhead)
- plugins cannot alter guest state
- example plugins included in source tree (tests/plugins)
- -d plugin to enable plugin output in logs
- check-tcg runs extra tests when plugins enabled
- documentation in docs/devel/plugins.rst
# gpg: Signature made Mon 28 Oct 2019 15:13:23 GMT
# gpg: using RSA key 6685AE99E75167BCAFC8DF35FBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>" [full]
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8 DF35 FBD0 DB09 5A9E 2A44
* remotes/stsquad/tags/pull-tcg-plugins-281019-4: (57 commits)
travis.yml: enable linux-gcc-debug-tcg cache
MAINTAINERS: add me for the TCG plugins code
scripts/checkpatch.pl: don't complain about (foo, /* empty */)
.travis.yml: add --enable-plugins tests
include/exec: wrap cpu_ldst.h in CONFIG_TCG
accel/stubs: reduce headers from tcg-stub
tests/plugin: add hotpages to analyse memory access patterns
tests/plugin: add instruction execution breakdown
tests/plugin: add a hotblocks plugin
tests/tcg: enable plugin testing
tests/tcg: drop test-i386-fprem from TESTS when not SLOW
tests/tcg: move "virtual" tests to EXTRA_TESTS
tests/tcg: set QEMU_OPTS for all cris runs
tests/tcg/Makefile.target: fix path to config-host.mak
tests/plugin: add sample plugins
linux-user: support -plugin option
vl: support -plugin option
plugin: add qemu_plugin_outs helper
plugin: add qemu_plugin_insn_disas helper
plugin: expand the plugin_init function to include an info block
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'accel/tcg')
-rw-r--r-- | accel/tcg/Makefile.objs | 1 | ||||
-rw-r--r-- | accel/tcg/atomic_common.inc.c | 54 | ||||
-rw-r--r-- | accel/tcg/atomic_template.h | 94 | ||||
-rw-r--r-- | accel/tcg/cpu-exec.c | 8 | ||||
-rw-r--r-- | accel/tcg/cputlb.c | 60 | ||||
-rw-r--r-- | accel/tcg/plugin-gen.c | 932 | ||||
-rw-r--r-- | accel/tcg/plugin-helpers.h | 5 | ||||
-rw-r--r-- | accel/tcg/translate-all.c | 15 | ||||
-rw-r--r-- | accel/tcg/translator.c | 20 | ||||
-rw-r--r-- | accel/tcg/user-exec.c | 3 |
10 files changed, 1149 insertions, 43 deletions
diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs index d381a02f34..a92f2c454b 100644 --- a/accel/tcg/Makefile.objs +++ b/accel/tcg/Makefile.objs @@ -6,3 +6,4 @@ obj-y += translator.o obj-$(CONFIG_USER_ONLY) += user-exec.o obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o +obj-$(CONFIG_PLUGIN) += plugin-gen.o diff --git a/accel/tcg/atomic_common.inc.c b/accel/tcg/atomic_common.inc.c new file mode 100644 index 0000000000..344525b0bb --- /dev/null +++ b/accel/tcg/atomic_common.inc.c @@ -0,0 +1,54 @@ +/* + * Common Atomic Helper Functions + * + * This file should be included before the various instantiations of + * the atomic_template.h helpers. + * + * Copyright (c) 2019 Linaro + * Written by Alex Bennée <alex.bennee@linaro.org> + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +static inline +void atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr, uint16_t info) +{ + CPUState *cpu = env_cpu(env); + + trace_guest_mem_before_exec(cpu, addr, info); + trace_guest_mem_before_exec(cpu, addr, info | TRACE_MEM_ST); +} + +static inline void +atomic_trace_rmw_post(CPUArchState *env, target_ulong addr, uint16_t info) +{ + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info | TRACE_MEM_ST); +} + +static inline +void atomic_trace_ld_pre(CPUArchState *env, target_ulong addr, uint16_t info) +{ + trace_guest_mem_before_exec(env_cpu(env), addr, info); +} + +static inline +void atomic_trace_ld_post(CPUArchState *env, target_ulong addr, uint16_t info) +{ + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info); +} + +static inline +void atomic_trace_st_pre(CPUArchState *env, target_ulong addr, uint16_t info) +{ + trace_guest_mem_before_exec(env_cpu(env), addr, info); +} + +static inline +void atomic_trace_st_post(CPUArchState *env, target_ulong addr, uint16_t info) +{ + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info); +} diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index 287433d809..837676231f 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -18,6 +18,7 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ +#include "qemu/plugin.h" #include "trace/mem.h" #if DATA_SIZE == 16 @@ -59,26 +60,6 @@ # define ABI_TYPE uint32_t #endif -#define ATOMIC_TRACE_RMW do { \ - uint8_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false); \ - \ - trace_guest_mem_before_exec(env_cpu(env), addr, info); \ - trace_guest_mem_before_exec(env_cpu(env), addr, \ - info | TRACE_MEM_ST); \ - } while (0) - -#define ATOMIC_TRACE_LD do { \ - uint8_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false); \ - \ - trace_guest_mem_before_exec(env_cpu(env), addr, info); \ - } while (0) - -# define ATOMIC_TRACE_ST do { \ - uint8_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, true); \ - \ - trace_guest_mem_before_exec(env_cpu(env), addr, info); \ - } while (0) - /* Define host-endian atomic operations. Note that END is used within the ATOMIC_NAME macro, and redefined below. */ #if DATA_SIZE == 1 @@ -98,14 +79,17 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; DATA_TYPE ret; + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false, + ATOMIC_MMU_IDX); - ATOMIC_TRACE_RMW; + atomic_trace_rmw_pre(env, addr, info); #if DATA_SIZE == 16 ret = atomic16_cmpxchg(haddr, cmpv, newv); #else ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv); #endif ATOMIC_MMU_CLEANUP; + atomic_trace_rmw_post(env, addr, info); return ret; } @@ -115,10 +99,13 @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false, + ATOMIC_MMU_IDX); - ATOMIC_TRACE_LD; + atomic_trace_ld_pre(env, addr, info); val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; + atomic_trace_ld_post(env, addr, info); return val; } @@ -127,10 +114,13 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, { ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, true, + ATOMIC_MMU_IDX); - ATOMIC_TRACE_ST; + atomic_trace_st_pre(env, addr, info); atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; + atomic_trace_st_post(env, addr, info); } #endif #else @@ -140,10 +130,13 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; DATA_TYPE ret; + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false, + ATOMIC_MMU_IDX); - ATOMIC_TRACE_RMW; + atomic_trace_rmw_pre(env, addr, info); ret = atomic_xchg__nocheck(haddr, val); ATOMIC_MMU_CLEANUP; + atomic_trace_rmw_post(env, addr, info); return ret; } @@ -154,10 +147,14 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ATOMIC_MMU_DECLS; \ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ DATA_TYPE ret; \ + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, \ + false, \ + ATOMIC_MMU_IDX); \ \ - ATOMIC_TRACE_RMW; \ + atomic_trace_rmw_pre(env, addr, info); \ ret = atomic_##X(haddr, val); \ ATOMIC_MMU_CLEANUP; \ + atomic_trace_rmw_post(env, addr, info); \ return ret; \ } @@ -186,8 +183,11 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ATOMIC_MMU_DECLS; \ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ XDATA_TYPE cmp, old, new, val = xval; \ + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, \ + false, \ + ATOMIC_MMU_IDX); \ \ - ATOMIC_TRACE_RMW; \ + atomic_trace_rmw_pre(env, addr, info); \ smp_mb(); \ cmp = atomic_read__nocheck(haddr); \ do { \ @@ -195,6 +195,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ cmp = atomic_cmpxchg__nocheck(haddr, old, new); \ } while (cmp != old); \ ATOMIC_MMU_CLEANUP; \ + atomic_trace_rmw_post(env, addr, info); \ return RET; \ } @@ -232,14 +233,18 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; DATA_TYPE ret; + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, + false, + ATOMIC_MMU_IDX); - ATOMIC_TRACE_RMW; + atomic_trace_rmw_pre(env, addr, info); #if DATA_SIZE == 16 ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv)); #else ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); #endif ATOMIC_MMU_CLEANUP; + atomic_trace_rmw_post(env, addr, info); return BSWAP(ret); } @@ -249,10 +254,14 @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, + false, + ATOMIC_MMU_IDX); - ATOMIC_TRACE_LD; + atomic_trace_ld_pre(env, addr, info); val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; + atomic_trace_ld_post(env, addr, info); return BSWAP(val); } @@ -261,11 +270,16 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, { ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, + true, + ATOMIC_MMU_IDX); - ATOMIC_TRACE_ST; + val = BSWAP(val); + atomic_trace_st_pre(env, addr, info); val = BSWAP(val); atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; + atomic_trace_st_post(env, addr, info); } #endif #else @@ -275,10 +289,14 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; ABI_TYPE ret; + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, + false, + ATOMIC_MMU_IDX); - ATOMIC_TRACE_RMW; + atomic_trace_rmw_pre(env, addr, info); ret = atomic_xchg__nocheck(haddr, BSWAP(val)); ATOMIC_MMU_CLEANUP; + atomic_trace_rmw_post(env, addr, info); return BSWAP(ret); } @@ -289,10 +307,14 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ATOMIC_MMU_DECLS; \ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ DATA_TYPE ret; \ + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, \ + false, \ + ATOMIC_MMU_IDX); \ \ - ATOMIC_TRACE_RMW; \ + atomic_trace_rmw_pre(env, addr, info); \ ret = atomic_##X(haddr, BSWAP(val)); \ ATOMIC_MMU_CLEANUP; \ + atomic_trace_rmw_post(env, addr, info); \ return BSWAP(ret); \ } @@ -319,8 +341,11 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ATOMIC_MMU_DECLS; \ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ XDATA_TYPE ldo, ldn, old, new, val = xval; \ + uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, \ + false, \ + ATOMIC_MMU_IDX); \ \ - ATOMIC_TRACE_RMW; \ + atomic_trace_rmw_pre(env, addr, info); \ smp_mb(); \ ldn = atomic_read__nocheck(haddr); \ do { \ @@ -328,6 +353,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ldn = atomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new)); \ } while (ldo != ldn); \ ATOMIC_MMU_CLEANUP; \ + atomic_trace_rmw_post(env, addr, info); \ return RET; \ } @@ -355,10 +381,6 @@ GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new) #undef MEND #endif /* DATA_SIZE > 1 */ -#undef ATOMIC_TRACE_ST -#undef ATOMIC_TRACE_LD -#undef ATOMIC_TRACE_RMW - #undef BSWAP #undef ABI_TYPE #undef DATA_TYPE diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 48272c781b..c01f59c743 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -238,8 +238,6 @@ void cpu_exec_step_atomic(CPUState *cpu) uint32_t flags; uint32_t cflags = 1; uint32_t cf_mask = cflags & CF_HASH_MASK; - /* volatile because we modify it between setjmp and longjmp */ - volatile bool in_exclusive_region = false; if (sigsetjmp(cpu->jmp_env, 0) == 0) { tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); @@ -253,7 +251,6 @@ void cpu_exec_step_atomic(CPUState *cpu) /* Since we got here, we know that parallel_cpus must be true. */ parallel_cpus = false; - in_exclusive_region = true; cc->cpu_exec_enter(cpu); /* execute the generated code */ trace_exec_tb(tb, pc); @@ -271,9 +268,10 @@ void cpu_exec_step_atomic(CPUState *cpu) qemu_mutex_unlock_iothread(); } assert_no_pages_locked(); + qemu_plugin_disable_mem_helpers(cpu); } - if (in_exclusive_region) { + if (cpu_in_exclusive_context(cpu)) { /* We might longjump out of either the codegen or the * execution, so must make sure we only end the exclusive * region if we started it. @@ -704,6 +702,8 @@ int cpu_exec(CPUState *cpu) if (qemu_mutex_iothread_locked()) { qemu_mutex_unlock_iothread(); } + qemu_plugin_disable_mem_helpers(cpu); + assert_no_pages_locked(); } diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 5eebddcca8..68487dceb5 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -34,6 +34,9 @@ #include "qemu/atomic.h" #include "qemu/atomic128.h" #include "translate-all.h" +#ifdef CONFIG_PLUGIN +#include "qemu/plugin-memory.h" +#endif /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -1051,7 +1054,8 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, * NOTE: This function will trigger an exception if the page is * not executable. */ -tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, + void **hostp) { uintptr_t mmu_idx = cpu_mmu_index(env, true); uintptr_t index = tlb_index(env, mmu_idx, addr); @@ -1077,13 +1081,24 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) if (unlikely(entry->addr_code & TLB_MMIO)) { /* The region is not backed by RAM. */ + if (hostp) { + *hostp = NULL; + } return -1; } p = (void *)((uintptr_t)addr + entry->addend); + if (hostp) { + *hostp = p; + } return qemu_ram_addr_from_host_nofail(p); } +tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) +{ + return get_page_addr_code_hostp(env, addr, NULL); +} + static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) { @@ -1235,6 +1250,45 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, return (void *)((uintptr_t)addr + entry->addend); } + +#ifdef CONFIG_PLUGIN +/* + * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. + * This should be a hot path as we will have just looked this path up + * in the softmmu lookup code (or helper). We don't handle re-fills or + * checking the victim table. This is purely informational. + * + * This should never fail as the memory access being instrumented + * should have just filled the TLB. + */ + +bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, + bool is_store, struct qemu_plugin_hwaddr *data) +{ + CPUArchState *env = cpu->env_ptr; + CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); + uintptr_t index = tlb_index(env, mmu_idx, addr); + target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; + + if (likely(tlb_hit(tlb_addr, addr))) { + /* We must have an iotlb entry for MMIO */ + if (tlb_addr & TLB_MMIO) { + CPUIOTLBEntry *iotlbentry; + iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + data->is_io = true; + data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); + data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; + } else { + data->is_io = false; + data->v.ram.hostaddr = addr + tlbe->addend; + } + return true; + } + return false; +} + +#endif + /* Probe for a read-modify-write atomic operation. Do not allow unaligned * operations, or io operations to proceed. Return the host address. */ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, @@ -1811,6 +1865,9 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #define ATOMIC_MMU_DECLS #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) #define ATOMIC_MMU_CLEANUP +#define ATOMIC_MMU_IDX get_mmuidx(oi) + +#include "atomic_common.inc.c" #define DATA_SIZE 1 #include "atomic_template.h" @@ -1853,6 +1910,7 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #define DATA_SIZE 8 #include "atomic_template.h" #endif +#undef ATOMIC_MMU_IDX /* Code access functions. */ diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c new file mode 100644 index 0000000000..51580d51a0 --- /dev/null +++ b/accel/tcg/plugin-gen.c @@ -0,0 +1,932 @@ +/* + * plugin-gen.c - TCG-related bits of plugin infrastructure + * + * Copyright (C) 2018, Emilio G. Cota <cota@braap.org> + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * We support instrumentation at an instruction granularity. That is, + * if a plugin wants to instrument the memory accesses performed by a + * particular instruction, it can just do that instead of instrumenting + * all memory accesses. Thus, in order to do this we first have to + * translate a TB, so that plugins can decide what/where to instrument. + * + * Injecting the desired instrumentation could be done with a second + * translation pass that combined the instrumentation requests, but that + * would be ugly and inefficient since we would decode the guest code twice. + * Instead, during TB translation we add "empty" instrumentation calls for all + * possible instrumentation events, and then once we collect the instrumentation + * requests from plugins, we either "fill in" those empty events or remove them + * if they have no requests. + * + * When "filling in" an event we first copy the empty callback's TCG ops. This + * might seem unnecessary, but it is done to support an arbitrary number + * of callbacks per event. Take for example a regular instruction callback. + * We first generate a callback to an empty helper function. Then, if two + * plugins register one callback each for this instruction, we make two copies + * of the TCG ops generated for the empty callback, substituting the function + * pointer that points to the empty helper function with the plugins' desired + * callback functions. After that we remove the empty callback's ops. + * + * Note that the location in TCGOp.args[] of the pointer to a helper function + * varies across different guest and host architectures. Instead of duplicating + * the logic that figures this out, we rely on the fact that the empty + * callbacks point to empty functions that are unique pointers in the program. + * Thus, to find the right location we just have to look for a match in + * TCGOp.args[]. This is the main reason why we first copy an empty callback's + * TCG ops and then fill them in; regardless of whether we have one or many + * callbacks for that event, the logic to add all of them is the same. + * + * When generating more than one callback per event, we make a small + * optimization to avoid generating redundant operations. For instance, for the + * second and all subsequent callbacks of an event, we do not need to reload the + * CPU's index into a TCG temp, since the first callback did it already. + */ +#include "qemu/osdep.h" +#include "cpu.h" +#include "tcg/tcg.h" +#include "tcg/tcg-op.h" +#include "trace/mem.h" +#include "exec/exec-all.h" +#include "exec/plugin-gen.h" +#include "exec/translator.h" + +#ifdef CONFIG_SOFTMMU +# define CONFIG_SOFTMMU_GATE 1 +#else +# define CONFIG_SOFTMMU_GATE 0 +#endif + +/* + * plugin_cb_start TCG op args[]: + * 0: enum plugin_gen_from + * 1: enum plugin_gen_cb + * 2: set to 1 for mem callback that is a write, 0 otherwise. + */ + +enum plugin_gen_from { + PLUGIN_GEN_FROM_TB, + PLUGIN_GEN_FROM_INSN, + PLUGIN_GEN_FROM_MEM, + PLUGIN_GEN_AFTER_INSN, + PLUGIN_GEN_N_FROMS, +}; + +enum plugin_gen_cb { + PLUGIN_GEN_CB_UDATA, + PLUGIN_GEN_CB_INLINE, + PLUGIN_GEN_CB_MEM, + PLUGIN_GEN_ENABLE_MEM_HELPER, + PLUGIN_GEN_DISABLE_MEM_HELPER, + PLUGIN_GEN_N_CBS, +}; + +/* + * These helpers are stubs that get dynamically switched out for calls + * direct to the plugin if they are subscribed to. + */ +void HELPER(plugin_vcpu_udata_cb)(uint32_t cpu_index, void *udata) +{ } + +void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index, + qemu_plugin_meminfo_t info, uint64_t vaddr, + void *userdata) +{ } + +static void do_gen_mem_cb(TCGv vaddr, uint32_t info) +{ + TCGv_i32 cpu_index = tcg_temp_new_i32(); + TCGv_i32 meminfo = tcg_const_i32(info); + TCGv_i64 vaddr64 = tcg_temp_new_i64(); + TCGv_ptr udata = tcg_const_ptr(NULL); + + tcg_gen_ld_i32(cpu_index, cpu_env, + -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); + tcg_gen_extu_tl_i64(vaddr64, vaddr); + + gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, vaddr64, udata); + + tcg_temp_free_ptr(udata); + tcg_temp_free_i64(vaddr64); + tcg_temp_free_i32(meminfo); + tcg_temp_free_i32(cpu_index); +} + +static void gen_empty_udata_cb(void) +{ + TCGv_i32 cpu_index = tcg_temp_new_i32(); + TCGv_ptr udata = tcg_const_ptr(NULL); /* will be overwritten later */ + + tcg_gen_ld_i32(cpu_index, cpu_env, + -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); + gen_helper_plugin_vcpu_udata_cb(cpu_index, udata); + + tcg_temp_free_ptr(udata); + tcg_temp_free_i32(cpu_index); +} + +/* + * For now we only support addi_i64. + * When we support more ops, we can generate one empty inline cb for each. + */ +static void gen_empty_inline_cb(void) +{ + TCGv_i64 val = tcg_temp_new_i64(); + TCGv_ptr ptr = tcg_const_ptr(NULL); /* overwritten later */ + + tcg_gen_ld_i64(val, ptr, 0); + /* pass an immediate != 0 so that it doesn't get optimized away */ + tcg_gen_addi_i64(val, val, 0xdeadface); + tcg_gen_st_i64(val, ptr, 0); + tcg_temp_free_ptr(ptr); + tcg_temp_free_i64(val); +} + +static void gen_empty_mem_cb(TCGv addr, uint32_t info) +{ + do_gen_mem_cb(addr, info); +} + +/* + * Share the same function for enable/disable. When enabling, the NULL + * pointer will be overwritten later. + */ +static void gen_empty_mem_helper(void) +{ + TCGv_ptr ptr; + + ptr = tcg_const_ptr(NULL); + tcg_gen_st_ptr(ptr, cpu_env, offsetof(CPUState, plugin_mem_cbs) - + offsetof(ArchCPU, env)); + tcg_temp_free_ptr(ptr); +} + +static inline +void gen_plugin_cb_start(enum plugin_gen_from from, + enum plugin_gen_cb type, unsigned wr) +{ + TCGOp *op; + + tcg_gen_plugin_cb_start(from, type, wr); + op = tcg_last_op(); + QSIMPLEQ_INSERT_TAIL(&tcg_ctx->plugin_ops, op, plugin_link); +} + +static void gen_wrapped(enum plugin_gen_from from, + enum plugin_gen_cb type, void (*func)(void)) +{ + gen_plugin_cb_start(from, type, 0); + func(); + tcg_gen_plugin_cb_end(); +} + +static inline void plugin_gen_empty_callback(enum plugin_gen_from from) +{ + switch (from) { + case PLUGIN_GEN_AFTER_INSN: + gen_wrapped(from, PLUGIN_GEN_DISABLE_MEM_HELPER, + gen_empty_mem_helper); + break; + case PLUGIN_GEN_FROM_INSN: + /* + * Note: plugin_gen_inject() relies on ENABLE_MEM_HELPER being + * the first callback of an instruction + */ + gen_wrapped(from, PLUGIN_GEN_ENABLE_MEM_HELPER, + gen_empty_mem_helper); + /* fall through */ + case PLUGIN_GEN_FROM_TB: + gen_wrapped(from, PLUGIN_GEN_CB_UDATA, gen_empty_udata_cb); + gen_wrapped(from, PLUGIN_GEN_CB_INLINE, gen_empty_inline_cb); + break; + default: + g_assert_not_reached(); + } +} + +union mem_gen_fn { + void (*mem_fn)(TCGv, uint32_t); + void (*inline_fn)(void); +}; + +static void gen_mem_wrapped(enum plugin_gen_cb type, + const union mem_gen_fn *f, TCGv addr, + uint32_t info, bool is_mem) +{ + int wr = !!(info & TRACE_MEM_ST); + + gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, wr); + if (is_mem) { + f->mem_fn(addr, info); + } else { + f->inline_fn(); + } + tcg_gen_plugin_cb_end(); +} + +void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info) +{ + union mem_gen_fn fn; + + fn.mem_fn = gen_empty_mem_cb; + gen_mem_wrapped(PLUGIN_GEN_CB_MEM, &fn, addr, info, true); + + fn.inline_fn = gen_empty_inline_cb; + gen_mem_wrapped(PLUGIN_GEN_CB_INLINE, &fn, 0, info, false); +} + +static TCGOp *find_op(TCGOp *op, TCGOpcode opc) +{ + while (op) { + if (op->opc == opc) { + return op; + } + op = QTAILQ_NEXT(op, link); + } + return NULL; +} + +static TCGOp *rm_ops_range(TCGOp *begin, TCGOp *end) +{ + TCGOp *ret = QTAILQ_NEXT(end, link); + + QTAILQ_REMOVE_SEVERAL(&tcg_ctx->ops, begin, end, link); + return ret; +} + +/* remove all ops until (and including) plugin_cb_end */ +static TCGOp *rm_ops(TCGOp *op) +{ + TCGOp *end_op = find_op(op, INDEX_op_plugin_cb_end); + + tcg_debug_assert(end_op); + return rm_ops_range(op, end_op); +} + +static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op) +{ + *begin_op = QTAILQ_NEXT(*begin_op, link); + tcg_debug_assert(*begin_op); + op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc); + memcpy(op->args, (*begin_op)->args, sizeof(op->args)); + return op; +} + +static TCGOp *copy_op(TCGOp **begin_op, TCGOp *op, TCGOpcode opc) +{ + op = copy_op_nocheck(begin_op, op); + tcg_debug_assert((*begin_op)->opc == opc); + return op; +} + +static TCGOp *copy_extu_i32_i64(TCGOp **begin_op, TCGOp *op) +{ + if (TCG_TARGET_REG_BITS == 32) { + /* mov_i32 */ + op = copy_op(begin_op, op, INDEX_op_mov_i32); + /* movi_i32 */ + op = copy_op(begin_op, op, INDEX_op_movi_i32); + } else { + /* extu_i32_i64 */ + op = copy_op(begin_op, op, INDEX_op_extu_i32_i64); + } + return op; +} + +static TCGOp *copy_mov_i64(TCGOp **begin_op, TCGOp *op) +{ + if (TCG_TARGET_REG_BITS == 32) { + /* 2x mov_i32 */ + op = copy_op(begin_op, op, INDEX_op_mov_i32); + op = copy_op(begin_op, op, INDEX_op_mov_i32); + } else { + /* mov_i64 */ + op = copy_op(begin_op, op, INDEX_op_mov_i64); + } + return op; +} + +static TCGOp *copy_movi_i64(TCGOp **begin_op, TCGOp *op, uint64_t v) +{ + if (TCG_TARGET_REG_BITS == 32) { + /* 2x movi_i32 */ + op = copy_op(begin_op, op, INDEX_op_movi_i32); + op->args[1] = v; + + op = copy_op(begin_op, op, INDEX_op_movi_i32); + op->args[1] = v >> 32; + } else { + /* movi_i64 */ + op = copy_op(begin_op, op, INDEX_op_movi_i64); + op->args[1] = v; + } + return op; +} + +static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr) +{ + if (UINTPTR_MAX == UINT32_MAX) { + /* movi_i32 */ + op = copy_op(begin_op, op, INDEX_op_movi_i32); + op->args[1] = (uintptr_t)ptr; + } else { + /* movi_i64 */ + op = copy_movi_i64(begin_op, op, (uint64_t)(uintptr_t)ptr); + } + return op; +} + +static TCGOp *copy_const_i64(TCGOp **begin_op, TCGOp *op, uint64_t v) +{ + return copy_movi_i64(begin_op, op, v); +} + +static TCGOp *copy_extu_tl_i64(TCGOp **begin_op, TCGOp *op) +{ + if (TARGET_LONG_BITS == 32) { + /* extu_i32_i64 */ + op = copy_extu_i32_i64(begin_op, op); + } else { + /* mov_i64 */ + op = copy_mov_i64(begin_op, op); + } + return op; +} + +static TCGOp *copy_ld_i64(TCGOp **begin_op, TCGOp *op) +{ + if (TCG_TARGET_REG_BITS == 32) { + /* 2x ld_i32 */ + op = copy_op(begin_op, op, INDEX_op_ld_i32); + op = copy_op(begin_op, op, INDEX_op_ld_i32); + } else { + /* ld_i64 */ + op = copy_op(begin_op, op, INDEX_op_ld_i64); + } + return op; +} + +static TCGOp *copy_st_i64(TCGOp **begin_op, TCGOp *op) +{ + if (TCG_TARGET_REG_BITS == 32) { + /* 2x st_i32 */ + op = copy_op(begin_op, op, INDEX_op_st_i32); + op = copy_op(begin_op, op, INDEX_op_st_i32); + } else { + /* st_i64 */ + op = copy_op(begin_op, op, INDEX_op_st_i64); + } + return op; +} + +static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op) +{ + if (TCG_TARGET_REG_BITS == 32) { + /* all 32-bit backends must implement add2_i32 */ + g_assert(TCG_TARGET_HAS_add2_i32); + op = copy_op(begin_op, op, INDEX_op_add2_i32); + } else { + op = copy_op(begin_op, op, INDEX_op_add_i64); + } + return op; +} + +static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op) +{ + if (UINTPTR_MAX == UINT32_MAX) { + /* st_i32 */ + op = copy_op(begin_op, op, INDEX_op_st_i32); + } else { + /* st_i64 */ + op = copy_st_i64(begin_op, op); + } + return op; +} + +static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, + void *func, unsigned tcg_flags, int *cb_idx) +{ + /* copy all ops until the call */ + do { + op = copy_op_nocheck(begin_op, op); + } while (op->opc != INDEX_op_call); + + /* fill in the op call */ + op->param1 = (*begin_op)->param1; + op->param2 = (*begin_op)->param2; + tcg_debug_assert(op->life == 0); + if (*cb_idx == -1) { + int i; + + /* + * Instead of working out the position of the callback in args[], just + * look for @empty_func, since it should be a unique pointer. + */ + for (i = 0; i < MAX_OPC_PARAM_ARGS; i++) { + if ((uintptr_t)(*begin_op)->args[i] == (uintptr_t)empty_func) { + *cb_idx = i; + break; + } + } + tcg_debug_assert(i < MAX_OPC_PARAM_ARGS); + } + op->args[*cb_idx] = (uintptr_t)func; + op->args[*cb_idx + 1] = tcg_flags; + + return op; +} + +static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb, + TCGOp *begin_op, TCGOp *op, int *cb_idx) +{ + /* const_ptr */ + op = copy_const_ptr(&begin_op, op, cb->userp); + + /* copy the ld_i32, but note that we only have to copy it once */ + begin_op = QTAILQ_NEXT(begin_op, link); + tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); + if (*cb_idx == -1) { + op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32); + memcpy(op->args, begin_op->args, sizeof(op->args)); + } + + /* call */ + op = copy_call(&begin_op, op, HELPER(plugin_vcpu_udata_cb), + cb->f.vcpu_udata, cb->tcg_flags, cb_idx); + + return op; +} + +static TCGOp *append_inline_cb(const struct qemu_plugin_dyn_cb *cb, + TCGOp *begin_op, TCGOp *op, + int *unused) +{ + /* const_ptr */ + op = copy_const_ptr(&begin_op, op, cb->userp); + + /* ld_i64 */ + op = copy_ld_i64(&begin_op, op); + + /* const_i64 */ + op = copy_const_i64(&begin_op, op, cb->inline_insn.imm); + + /* add_i64 */ + op = copy_add_i64(&begin_op, op); + + /* st_i64 */ + op = copy_st_i64(&begin_op, op); + + return op; +} + +static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb, + TCGOp *begin_op, TCGOp *op, int *cb_idx) +{ + enum plugin_gen_cb type = begin_op->args[1]; + + tcg_debug_assert(type == PLUGIN_GEN_CB_MEM); + + /* const_i32 == movi_i32 ("info", so it remains as is) */ + op = copy_op(&begin_op, op, INDEX_op_movi_i32); + + /* const_ptr */ + op = copy_const_ptr(&begin_op, op, cb->userp); + + /* copy the ld_i32, but note that we only have to copy it once */ + begin_op = QTAILQ_NEXT(begin_op, link); + tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); + if (*cb_idx == -1) { + op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32); + memcpy(op->args, begin_op->args, sizeof(op->args)); + } + + /* extu_tl_i64 */ + op = copy_extu_tl_i64(&begin_op, op); + + if (type == PLUGIN_GEN_CB_MEM) { + /* call */ + op = copy_call(&begin_op, op, HELPER(plugin_vcpu_mem_cb), + cb->f.vcpu_udata, cb->tcg_flags, cb_idx); + } + + return op; +} + +typedef TCGOp *(*inject_fn)(const struct qemu_plugin_dyn_cb *cb, + TCGOp *begin_op, TCGOp *op, int *intp); +typedef bool (*op_ok_fn)(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb); + +static bool op_ok(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb) +{ + return true; +} + +static bool op_rw(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb) +{ + int w; + + w = op->args[2]; + return !!(cb->rw & (w + 1)); +} + +static inline +void inject_cb_type(const GArray *cbs, TCGOp *begin_op, inject_fn inject, + op_ok_fn ok) +{ + TCGOp *end_op; + TCGOp *op; + int cb_idx = -1; + int i; + + if (!cbs || cbs->len == 0) { + rm_ops(begin_op); + return; + } + + end_op = find_op(begin_op, INDEX_op_plugin_cb_end); + tcg_debug_assert(end_op); + + op = end_op; + for (i = 0; i < cbs->len; i++) { + struct qemu_plugin_dyn_cb *cb = + &g_array_index(cbs, struct qemu_plugin_dyn_cb, i); + + if (!ok(begin_op, cb)) { + continue; + } + op = inject(cb, begin_op, op, &cb_idx); + } + rm_ops_range(begin_op, end_op); +} + +static void +inject_udata_cb(const GArray *cbs, TCGOp *begin_op) +{ + inject_cb_type(cbs, begin_op, append_udata_cb, op_ok); +} + +static void +inject_inline_cb(const GArray *cbs, TCGOp *begin_op, op_ok_fn ok) +{ + inject_cb_type(cbs, begin_op, append_inline_cb, ok); +} + +static void +inject_mem_cb(const GArray *cbs, TCGOp *begin_op) +{ + inject_cb_type(cbs, begin_op, append_mem_cb, op_rw); +} + +/* we could change the ops in place, but we can reuse more code by copying */ +static void inject_mem_helper(TCGOp *begin_op, GArray *arr) +{ + TCGOp *orig_op = begin_op; + TCGOp *end_op; + TCGOp *op; + + end_op = find_op(begin_op, INDEX_op_plugin_cb_end); + tcg_debug_assert(end_op); + + /* const ptr */ + op = copy_const_ptr(&begin_op, end_op, arr); + + /* st_ptr */ + op = copy_st_ptr(&begin_op, op); + + rm_ops_range(orig_op, end_op); +} + +/* + * Tracking memory accesses performed from helpers requires extra work. + * If an instruction is emulated with helpers, we do two things: + * (1) copy the CB descriptors, and keep track of it so that they can be + * freed later on, and (2) point CPUState.plugin_mem_cbs to the descriptors, so + * that we can read them at run-time (i.e. when the helper executes). + * This run-time access is performed from qemu_plugin_vcpu_mem_cb. + * + * Note that plugin_gen_disable_mem_helpers undoes (2). Since it + * is possible that the code we generate after the instruction is + * dead, we also add checks before generating tb_exit etc. + */ +static void inject_mem_enable_helper(struct qemu_plugin_insn *plugin_insn, + TCGOp *begin_op) +{ + GArray *cbs[2]; + GArray *arr; + size_t n_cbs, i; + + cbs[0] = plugin_insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR]; + cbs[1] = plugin_insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE]; + + n_cbs = 0; + for (i = 0; i < ARRAY_SIZE(cbs); i++) { + n_cbs += cbs[i]->len; + } + + plugin_insn->mem_helper = plugin_insn->calls_helpers && n_cbs; + if (likely(!plugin_insn->mem_helper)) { + rm_ops(begin_op); + return; + } + + arr = g_array_sized_new(false, false, + sizeof(struct qemu_plugin_dyn_cb), n_cbs); + + for (i = 0; i < ARRAY_SIZE(cbs); i++) { + g_array_append_vals(arr, cbs[i]->data, cbs[i]->len); + } + + qemu_plugin_add_dyn_cb_arr(arr); + inject_mem_helper(begin_op, arr); +} + +static void inject_mem_disable_helper(struct qemu_plugin_insn *plugin_insn, + TCGOp *begin_op) +{ + if (likely(!plugin_insn->mem_helper)) { + rm_ops(begin_op); + return; + } + inject_mem_helper(begin_op, NULL); +} + +/* called before finishing a TB with exit_tb, goto_tb or goto_ptr */ +void plugin_gen_disable_mem_helpers(void) +{ + TCGv_ptr ptr; + + if (likely(tcg_ctx->plugin_insn == NULL || + !tcg_ctx->plugin_insn->mem_helper)) { + return; + } + ptr = tcg_const_ptr(NULL); + tcg_gen_st_ptr(ptr, cpu_env, offsetof(CPUState, plugin_mem_cbs) - + offsetof(ArchCPU, env)); + tcg_temp_free_ptr(ptr); + tcg_ctx->plugin_insn->mem_helper = false; +} + +static void plugin_gen_tb_udata(const struct qemu_plugin_tb *ptb, + TCGOp *begin_op) +{ + inject_udata_cb(ptb->cbs[PLUGIN_CB_REGULAR], begin_op); +} + +static void plugin_gen_tb_inline(const struct qemu_plugin_tb *ptb, + TCGOp *begin_op) +{ + inject_inline_cb(ptb->cbs[PLUGIN_CB_INLINE], begin_op, op_ok); +} + +static void plugin_gen_insn_udata(const struct qemu_plugin_tb *ptb, + TCGOp *begin_op, int insn_idx) +{ + struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx); + + inject_udata_cb(insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR], begin_op); +} + +static void plugin_gen_insn_inline(const struct qemu_plugin_tb *ptb, + TCGOp *begin_op, int insn_idx) +{ + struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx); + inject_inline_cb(insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE], + begin_op, op_ok); +} + +static void plugin_gen_mem_regular(const struct qemu_plugin_tb *ptb, + TCGOp *begin_op, int insn_idx) +{ + struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx); + inject_mem_cb(insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR], begin_op); +} + +static void plugin_gen_mem_inline(const struct qemu_plugin_tb *ptb, + TCGOp *begin_op, int insn_idx) +{ + const GArray *cbs; + struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx); + + cbs = insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE]; + inject_inline_cb(cbs, begin_op, op_rw); +} + +static void plugin_gen_enable_mem_helper(const struct qemu_plugin_tb *ptb, + TCGOp *begin_op, int insn_idx) +{ + struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx); + inject_mem_enable_helper(insn, begin_op); +} + +static void plugin_gen_disable_mem_helper(const struct qemu_plugin_tb *ptb, + TCGOp *begin_op, int insn_idx) +{ + struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx); + inject_mem_disable_helper(insn, begin_op); +} + +static void plugin_inject_cb(const struct qemu_plugin_tb *ptb, TCGOp *begin_op, + int insn_idx) +{ + enum plugin_gen_from from = begin_op->args[0]; + enum plugin_gen_cb type = begin_op->args[1]; + + switch (from) { + case PLUGIN_GEN_FROM_TB: + switch (type) { + case PLUGIN_GEN_CB_UDATA: + plugin_gen_tb_udata(ptb, begin_op); + return; + case PLUGIN_GEN_CB_INLINE: + plugin_gen_tb_inline(ptb, begin_op); + return; + default: + g_assert_not_reached(); + } + case PLUGIN_GEN_FROM_INSN: + switch (type) { + case PLUGIN_GEN_CB_UDATA: + plugin_gen_insn_udata(ptb, begin_op, insn_idx); + return; + case PLUGIN_GEN_CB_INLINE: + plugin_gen_insn_inline(ptb, begin_op, insn_idx); + return; + case PLUGIN_GEN_ENABLE_MEM_HELPER: + plugin_gen_enable_mem_helper(ptb, begin_op, insn_idx); + return; + default: + g_assert_not_reached(); + } + case PLUGIN_GEN_FROM_MEM: + switch (type) { + case PLUGIN_GEN_CB_MEM: + plugin_gen_mem_regular(ptb, begin_op, insn_idx); + return; + case PLUGIN_GEN_CB_INLINE: + plugin_gen_mem_inline(ptb, begin_op, insn_idx); + return; + default: + g_assert_not_reached(); + } + case PLUGIN_GEN_AFTER_INSN: + switch (type) { + case PLUGIN_GEN_DISABLE_MEM_HELPER: + plugin_gen_disable_mem_helper(ptb, begin_op, insn_idx); + return; + default: + g_assert_not_reached(); + } + default: + g_assert_not_reached(); + } +} + +/* #define DEBUG_PLUGIN_GEN_OPS */ +static void pr_ops(void) +{ +#ifdef DEBUG_PLUGIN_GEN_OPS + TCGOp *op; + int i = 0; + + QTAILQ_FOREACH(op, &tcg_ctx->ops, link) { + const char *name = ""; + const char *type = ""; + + if (op->opc == INDEX_op_plugin_cb_start) { + switch (op->args[0]) { + case PLUGIN_GEN_FROM_TB: + name = "tb"; + break; + case PLUGIN_GEN_FROM_INSN: + name = "insn"; + break; + case PLUGIN_GEN_FROM_MEM: + name = "mem"; + break; + case PLUGIN_GEN_AFTER_INSN: + name = "after insn"; + break; + default: + break; + } + switch (op->args[1]) { + case PLUGIN_GEN_CB_UDATA: + type = "udata"; + break; + case PLUGIN_GEN_CB_INLINE: + type = "inline"; + break; + case PLUGIN_GEN_CB_MEM: + type = "mem"; + break; + case PLUGIN_GEN_ENABLE_MEM_HELPER: + type = "enable mem helper"; + break; + case PLUGIN_GEN_DISABLE_MEM_HELPER: + type = "disable mem helper"; + break; + default: + break; + } + } + printf("op[%2i]: %s %s %s\n", i, tcg_op_defs[op->opc].name, name, type); + i++; + } +#endif +} + +static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb) +{ + TCGOp *op; + int insn_idx; + + pr_ops(); + insn_idx = -1; + QSIMPLEQ_FOREACH(op, &tcg_ctx->plugin_ops, plugin_link) { + enum plugin_gen_from from = op->args[0]; + enum plugin_gen_cb type = op->args[1]; + + tcg_debug_assert(op->opc == INDEX_op_plugin_cb_start); + /* ENABLE_MEM_HELPER is the first callback of an instruction */ + if (from == PLUGIN_GEN_FROM_INSN && + type == PLUGIN_GEN_ENABLE_MEM_HELPER) { + insn_idx++; + } + plugin_inject_cb(plugin_tb, op, insn_idx); + } + pr_ops(); +} + +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb) +{ + struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; + bool ret = false; + + if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_mask)) { + ret = true; + + QSIMPLEQ_INIT(&tcg_ctx->plugin_ops); + ptb->vaddr = tb->pc; + ptb->vaddr2 = -1; + get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1); + ptb->haddr2 = NULL; + + plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB); + } + return ret; +} + +void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db) +{ + struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; + struct qemu_plugin_insn *pinsn; + + pinsn = qemu_plugin_tb_insn_get(ptb); + tcg_ctx->plugin_insn = pinsn; + pinsn->vaddr = db->pc_next; + plugin_gen_empty_callback(PLUGIN_GEN_FROM_INSN); + + /* + * Detect page crossing to get the new host address. + * Note that we skip this when haddr1 == NULL, e.g. when we're + * fetching instructions from a region not backed by RAM. + */ + if (likely(ptb->haddr1 != NULL && ptb->vaddr2 == -1) && + unlikely((db->pc_next & TARGET_PAGE_MASK) != + (db->pc_first & TARGET_PAGE_MASK))) { + get_page_addr_code_hostp(cpu->env_ptr, db->pc_next, + &ptb->haddr2); + ptb->vaddr2 = db->pc_next; + } + if (likely(ptb->vaddr2 == -1)) { + pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr; + } else { + pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2; + } +} + +void plugin_gen_insn_end(void) +{ + plugin_gen_empty_callback(PLUGIN_GEN_AFTER_INSN); +} + +void plugin_gen_tb_end(CPUState *cpu) +{ + struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; + int i; + + /* collect instrumentation requests */ + qemu_plugin_tb_trans_cb(cpu, ptb); + + /* inject the instrumentation at the appropriate places */ + plugin_gen_inject(ptb); + + /* clean up */ + for (i = 0; i < PLUGIN_N_CB_SUBTYPES; i++) { + if (ptb->cbs[i]) { + g_array_set_size(ptb->cbs[i], 0); + } + } + ptb->n = 0; + tcg_ctx->plugin_insn = NULL; +} diff --git a/accel/tcg/plugin-helpers.h b/accel/tcg/plugin-helpers.h new file mode 100644 index 0000000000..1916ee7920 --- /dev/null +++ b/accel/tcg/plugin-helpers.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_PLUGIN +/* Note: no TCG flags because those are overwritten later */ +DEF_HELPER_2(plugin_vcpu_udata_cb, void, i32, ptr) +DEF_HELPER_4(plugin_vcpu_mem_cb, void, i32, i32, i64, ptr) +#endif diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index ae063b53f9..9f48da9472 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1214,6 +1214,8 @@ static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data) /* flush all the translation blocks */ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) { + bool did_flush = false; + mmap_lock(); /* If it is already been done on request of another CPU, * just retry. @@ -1221,6 +1223,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) if (tb_ctx.tb_flush_count != tb_flush_count.host_int) { goto done; } + did_flush = true; if (DEBUG_TB_FLUSH_GATE) { size_t nb_tbs = tcg_nb_tbs(); @@ -1245,14 +1248,22 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) done: mmap_unlock(); + if (did_flush) { + qemu_plugin_flush_cb(); + } } void tb_flush(CPUState *cpu) { if (tcg_enabled()) { unsigned tb_flush_count = atomic_mb_read(&tb_ctx.tb_flush_count); - async_safe_run_on_cpu(cpu, do_tb_flush, - RUN_ON_CPU_HOST_INT(tb_flush_count)); + + if (cpu_in_exclusive_context(cpu)) { + do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count)); + } else { + async_safe_run_on_cpu(cpu, do_tb_flush, + RUN_ON_CPU_HOST_INT(tb_flush_count)); + } } } diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 70c66c538c..f977682be7 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -16,6 +16,7 @@ #include "exec/gen-icount.h" #include "exec/log.h" #include "exec/translator.h" +#include "exec/plugin-gen.h" /* Pairs with tcg_clear_temp_count. To be called by #TranslatorOps.{translate_insn,tb_stop} if @@ -34,6 +35,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, CPUState *cpu, TranslationBlock *tb, int max_insns) { int bp_insn = 0; + bool plugin_enabled; /* Initialize DisasContext */ db->tb = tb; @@ -55,11 +57,17 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, ops->tb_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ + plugin_enabled = plugin_gen_tb_start(cpu, tb); + while (true) { db->num_insns++; ops->insn_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ + if (plugin_enabled) { + plugin_gen_insn_start(cpu, db); + } + /* Pass breakpoint hits to target for further processing */ if (!db->singlestep_enabled && unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) { @@ -99,6 +107,14 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, break; } + /* + * We can't instrument after instructions that change control + * flow although this only really affects post-load operations. + */ + if (plugin_enabled) { + plugin_gen_insn_end(); + } + /* Stop translation if the output buffer is full, or we have executed all of the allowed instructions. */ if (tcg_op_buf_full() || db->num_insns >= db->max_insns) { @@ -111,6 +127,10 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, ops->tb_stop(db, cpu); gen_tb_end(db->tb, db->num_insns - bp_insn); + if (plugin_enabled) { + plugin_gen_tb_end(cpu); + } + /* The disas_log hook may use these values rather than recompute. */ db->tb->size = db->pc_next - db->pc_first; db->tb->icount = db->num_insns; diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 71c4bf6477..b09f7a1577 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -751,10 +751,13 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #define ATOMIC_MMU_DECLS do {} while (0) #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC()) #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0) +#define ATOMIC_MMU_IDX MMU_USER_IDX #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) #define EXTRA_ARGS +#include "atomic_common.inc.c" + #define DATA_SIZE 1 #include "atomic_template.h" |