diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2022-10-05 10:17:02 -0400 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2022-10-05 10:17:02 -0400 |
commit | 4a9c04672a875ed00ea807ea4d552c01f6440bc7 (patch) | |
tree | 5ae0c7c85d114c608cf6018b8dc50f4feaaa83b8 /include | |
parent | fafd35a6dab8e70a7c395aaa8e1273267cf9f3c8 (diff) | |
parent | ab419fd8a035a65942de4e63effcd55ccbf1a9fe (diff) |
Merge tag 'pull-tcg-20221004' of https://gitlab.com/rth7680/qemu into staging
Cache CPUClass for use in hot code paths.
Add CPUTLBEntryFull, probe_access_full, tlb_set_page_full.
Add generic support for TARGET_TB_PCREL.
tcg/ppc: Optimize 26-bit jumps using STQ for POWER 2.07
target/sh4: Fix TB_FLAG_UNALIGN
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmM8jXEdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/oEggArAHK8FtydfQ4ZwnF
# SjXfpdP50OC0SZn3uBN93FZOrxz9UYG9t1oDHs39J/+b/u2nwJYch//EH2k+NtOW
# hc3iIgS9bWgs/UWZESkViKQccw7gpYlc21Br38WWwFNEFyecX0p+e9pJgld5rSv1
# mRGvCs5J2svH2tcXl/Sb/JWgcumOJoG7qy2aLyJGolR6UOfwcfFMzQXzq8qjpRKH
# Jh84qusE/rLbzBsdN6snJY4+dyvUo03lT5IJ4d+FQg2tUip+Qqt7pnMbsqq6qF6H
# R6fWU1JTbsh7GxXJwQJ83jLBnUsi8cy6FKrZ3jyiBq76+DIpR0PqoEe+PN/weInU
# TN0z4g==
# =RfXJ
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 04 Oct 2022 15:45:53 EDT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* tag 'pull-tcg-20221004' of https://gitlab.com/rth7680/qemu:
target/sh4: Fix TB_FLAG_UNALIGN
tcg/ppc: Optimize 26-bit jumps
accel/tcg: Introduce TARGET_TB_PCREL
accel/tcg: Introduce tb_pc and log_pc
hw/core: Add CPUClass.get_pc
include/hw/core: Create struct CPUJumpCache
accel/tcg: Inline tb_flush_jmp_cache
accel/tcg: Do not align tb->page_addr[0]
accel/tcg: Use DisasContextBase in plugin_gen_tb_start
accel/tcg: Use bool for page_find_alloc
accel/tcg: Remove PageDesc code_bitmap
include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA
accel/tcg: Introduce tlb_set_page_full
accel/tcg: Introduce probe_access_full
accel/tcg: Suppress auto-invalidate in probe_access_internal
accel/tcg: Drop addr member from SavedIOTLB
accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull
cputlb: used cached CPUClass in our hot-paths
hw/core/cpu-sysemu: used cached class in cpu_asidx_from_attrs
cpu: cache CPUClass in CPUState for hot code paths
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'include')
-rw-r--r-- | include/exec/cpu-common.h | 1 | ||||
-rw-r--r-- | include/exec/cpu-defs.h | 48 | ||||
-rw-r--r-- | include/exec/exec-all.h | 75 | ||||
-rw-r--r-- | include/exec/plugin-gen.h | 7 | ||||
-rw-r--r-- | include/hw/core/cpu.h | 28 | ||||
-rw-r--r-- | include/qemu/typedefs.h | 2 | ||||
-rw-r--r-- | include/tcg/tcg.h | 2 |
7 files changed, 131 insertions, 32 deletions
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index d909429427..c493510ee9 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -38,6 +38,7 @@ void cpu_list_unlock(void); unsigned int cpu_list_generation_id_get(void); void tcg_flush_softmmu_tlb(CPUState *cs); +void tcg_flush_jmp_cache(CPUState *cs); void tcg_iommu_init_notifier_list(CPUState *cpu); void tcg_iommu_free_notifier_list(CPUState *cpu); diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index ba3cd32a1e..21309cf567 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -54,6 +54,9 @@ # error TARGET_PAGE_BITS must be defined in cpu-param.h # endif #endif +#ifndef TARGET_TB_PCREL +# define TARGET_TB_PCREL 0 +#endif #define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8) @@ -108,6 +111,7 @@ typedef uint64_t target_ulong; # endif # endif +/* Minimalized TLB entry for use by TCG fast path. */ typedef struct CPUTLBEntry { /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not @@ -131,14 +135,14 @@ typedef struct CPUTLBEntry { QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS)); -/* The IOTLB is not accessed directly inline by generated TCG code, - * so the CPUIOTLBEntry layout is not as critical as that of the - * CPUTLBEntry. (This is also why we don't want to combine the two - * structs into one.) +/* + * The full TLB entry, which is not accessed by generated TCG code, + * so the layout is not as critical as that of CPUTLBEntry. This is + * also why we don't want to combine the two structs. */ -typedef struct CPUIOTLBEntry { +typedef struct CPUTLBEntryFull { /* - * @addr contains: + * @xlat_section contains: * - in the lower TARGET_PAGE_BITS, a physical section number * - with the lower TARGET_PAGE_BITS masked off, an offset which * must be added to the virtual address to obtain: @@ -146,9 +150,32 @@ typedef struct CPUIOTLBEntry { * number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM) * + the offset within the target MemoryRegion (otherwise) */ - hwaddr addr; + hwaddr xlat_section; + + /* + * @phys_addr contains the physical address in the address space + * given by cpu_asidx_from_attrs(cpu, @attrs). + */ + hwaddr phys_addr; + + /* @attrs contains the memory transaction attributes for the page. */ MemTxAttrs attrs; -} CPUIOTLBEntry; + + /* @prot contains the complete protections for the page. */ + uint8_t prot; + + /* @lg_page_size contains the log2 of the page size. */ + uint8_t lg_page_size; + + /* + * Allow target-specific additions to this structure. + * This may be used to cache items from the guest cpu + * page tables for later use by the implementation. + */ +#ifdef TARGET_PAGE_ENTRY_EXTRA + TARGET_PAGE_ENTRY_EXTRA +#endif +} CPUTLBEntryFull; /* * Data elements that are per MMU mode, minus the bits accessed by @@ -172,9 +199,8 @@ typedef struct CPUTLBDesc { size_t vindex; /* The tlb victim table, in two parts. */ CPUTLBEntry vtable[CPU_VTLB_SIZE]; - CPUIOTLBEntry viotlb[CPU_VTLB_SIZE]; - /* The iotlb. */ - CPUIOTLBEntry *iotlb; + CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE]; + CPUTLBEntryFull *fulltlb; } CPUTLBDesc; /* diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index bcad607c4e..e5f8b224a5 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -258,6 +258,28 @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, unsigned bits); /** + * tlb_set_page_full: + * @cpu: CPU context + * @mmu_idx: mmu index of the tlb to modify + * @vaddr: virtual address of the entry to add + * @full: the details of the tlb entry + * + * Add an entry to @cpu tlb index @mmu_idx. All of the fields of + * @full must be filled, except for xlat_section, and constitute + * the complete description of the translated page. + * + * This is generally called by the target tlb_fill function after + * having performed a successful page table walk to find the physical + * address and attributes for the translation. + * + * At most one entry for a given virtual address is permitted. Only a + * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only + * used by tlb_flush_page. + */ +void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr, + CPUTLBEntryFull *full); + +/** * tlb_set_page_with_attrs: * @cpu: CPU to add this TLB entry for * @vaddr: virtual address of page to add entry for @@ -434,6 +456,21 @@ int probe_access_flags(CPUArchState *env, target_ulong addr, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, uintptr_t retaddr); +#ifndef CONFIG_USER_ONLY +/** + * probe_access_full: + * Like probe_access_flags, except also return into @pfull. + * + * The CPUTLBEntryFull structure returned via @pfull is transient + * and must be consumed or copied immediately, before any further + * access or changes to TLB @mmu_idx. + */ +int probe_access_full(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, + CPUTLBEntryFull **pfull, uintptr_t retaddr); +#endif + #define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */ /* Estimated block size for TB allocation. */ @@ -459,8 +496,32 @@ struct tb_tc { }; struct TranslationBlock { - target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */ - target_ulong cs_base; /* CS base for this block */ +#if !TARGET_TB_PCREL + /* + * Guest PC corresponding to this block. This must be the true + * virtual address. Therefore e.g. x86 stores EIP + CS_BASE, and + * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or + * privilege, must store those bits elsewhere. + * + * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are + * written such that the TB is associated only with the physical + * page and may be run in any virtual address context. In this case, + * PC must always be taken from ENV in a target-specific manner. + * Unwind information is taken as offsets from the page, to be + * deposited into the "current" PC. + */ + target_ulong pc; +#endif + + /* + * Target-specific data associated with the TranslationBlock, e.g.: + * x86: the original user, the Code Segment virtual base, + * arm: an extension of tb->flags, + * s390x: instruction data for EXECUTE, + * sparc: the next pc of the instruction queue (for delay slots). + */ + target_ulong cs_base; + uint32_t flags; /* flags defining in which context the code was generated */ uint32_t cflags; /* compile flags */ @@ -533,6 +594,16 @@ struct TranslationBlock { uintptr_t jmp_dest[2]; }; +/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */ +static inline target_ulong tb_pc(const TranslationBlock *tb) +{ +#if TARGET_TB_PCREL + qemu_build_not_reached(); +#else + return tb->pc; +#endif +} + /* Hide the qatomic_read to make code a little easier on the eyes */ static inline uint32_t tb_cflags(const TranslationBlock *tb) { diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h index f92f169739..5004728c61 100644 --- a/include/exec/plugin-gen.h +++ b/include/exec/plugin-gen.h @@ -19,7 +19,8 @@ struct DisasContextBase; #ifdef CONFIG_PLUGIN -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress); +bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, + bool supress); void plugin_gen_tb_end(CPUState *cpu); void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db); void plugin_gen_insn_end(void); @@ -48,8 +49,8 @@ static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size) #else /* !CONFIG_PLUGIN */ -static inline -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress) +static inline bool +plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, bool sup) { return false; } diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 500503da13..f9b58773f7 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -51,6 +51,13 @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size, */ #define CPU(obj) ((CPUState *)(obj)) +/* + * The class checkers bring in CPU_GET_CLASS() which is potentially + * expensive given the eventual call to + * object_class_dynamic_cast_assert(). Because of this the CPUState + * has a cached value for the class in cs->cc which is set up in + * cpu_exec_realizefn() for use in hot code paths. + */ typedef struct CPUClass CPUClass; DECLARE_CLASS_CHECKERS(CPUClass, CPU, TYPE_CPU) @@ -108,6 +115,8 @@ struct SysemuCPUOps; * If the target behaviour here is anything other than "set * the PC register to the value passed in" then the target must * also implement the synchronize_from_tb hook. + * @get_pc: Callback for getting the Program Counter register. + * As above, with the semantics of the target architecture. * @gdb_read_register: Callback for letting GDB read a register. * @gdb_write_register: Callback for letting GDB write a register. * @gdb_adjust_breakpoint: Callback for adjusting the address of a @@ -144,6 +153,7 @@ struct CPUClass { void (*dump_state)(CPUState *cpu, FILE *, int flags); int64_t (*get_arch_id)(CPUState *cpu); void (*set_pc)(CPUState *cpu, vaddr value); + vaddr (*get_pc)(CPUState *cpu); int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg); int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg); vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr); @@ -218,7 +228,6 @@ struct CPUWatchpoint { * the memory regions get moved around by io_writex. */ typedef struct SavedIOTLB { - hwaddr addr; MemoryRegionSection *section; hwaddr mr_offset; } SavedIOTLB; @@ -230,9 +239,6 @@ struct kvm_run; struct hax_vcpu_state; struct hvf_vcpu_state; -#define TB_JMP_CACHE_BITS 12 -#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) - /* work queue */ /* The union type allows passing of 64 bit target pointers on 32 bit @@ -317,6 +323,8 @@ struct qemu_work_item; struct CPUState { /*< private >*/ DeviceState parent_obj; + /* cache to avoid expensive CPU_GET_CLASS */ + CPUClass *cc; /*< public >*/ int nr_cores; @@ -361,8 +369,7 @@ struct CPUState { CPUArchState *env_ptr; IcountDecr *icount_decr_ptr; - /* Accessed in parallel; all accesses must be atomic */ - TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; + CPUJumpCache *tb_jmp_cache; struct GDBRegisterState *gdb_regs; int gdb_num_regs; @@ -448,15 +455,6 @@ extern CPUTailQ cpus; extern __thread CPUState *current_cpu; -static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) -{ - unsigned int i; - - for (i = 0; i < TB_JMP_CACHE_SIZE; i++) { - qatomic_set(&cpu->tb_jmp_cache[i], NULL); - } -} - /** * qemu_tcg_mttcg_enabled: * Check whether we are running MultiThread TCG or not. diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 42f4ceb701..5f95169827 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -41,7 +41,9 @@ typedef struct CoMutex CoMutex; typedef struct ConfidentialGuestSupport ConfidentialGuestSupport; typedef struct CPUAddressSpace CPUAddressSpace; typedef struct CPUArchState CPUArchState; +typedef struct CPUJumpCache CPUJumpCache; typedef struct CPUState CPUState; +typedef struct CPUTLBEntryFull CPUTLBEntryFull; typedef struct DeviceListener DeviceListener; typedef struct DeviceState DeviceState; typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot; diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 26a70526f1..d84bae6e3f 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -840,7 +840,7 @@ void tcg_register_thread(void); void tcg_prologue_init(TCGContext *s); void tcg_func_start(TCGContext *s); -int tcg_gen_code(TCGContext *s, TranslationBlock *tb); +int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start); void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size); |