diff options
Diffstat (limited to 'target')
-rw-r--r-- | target/i386/Makefile.objs | 2 | ||||
-rw-r--r-- | target/i386/cpu.c | 9 | ||||
-rw-r--r-- | target/i386/cpu.h | 3 | ||||
-rw-r--r-- | target/ppc/cpu-qom.h | 1 | ||||
-rw-r--r-- | target/ppc/cpu.h | 59 | ||||
-rw-r--r-- | target/ppc/excp_helper.c | 30 | ||||
-rw-r--r-- | target/ppc/internal.h | 27 | ||||
-rw-r--r-- | target/ppc/kvm.c | 206 | ||||
-rw-r--r-- | target/ppc/kvm_ppc.h | 23 | ||||
-rw-r--r-- | target/ppc/machine.c | 8 | ||||
-rw-r--r-- | target/ppc/mmu-hash64.c | 2 | ||||
-rw-r--r-- | target/ppc/translate.c | 22 | ||||
-rw-r--r-- | target/ppc/translate/vmx-impl.inc.c | 27 | ||||
-rw-r--r-- | target/ppc/translate/vsx-impl.inc.c | 65 | ||||
-rw-r--r-- | target/ppc/translate_init.inc.c | 7 | ||||
-rw-r--r-- | target/s390x/Makefile.objs | 1 | ||||
-rw-r--r-- | target/s390x/cpu.h | 7 | ||||
-rw-r--r-- | target/s390x/helper.h | 21 | ||||
-rw-r--r-- | target/s390x/insn-data.def | 82 | ||||
-rw-r--r-- | target/s390x/insn-format.def | 25 | ||||
-rw-r--r-- | target/s390x/internal.h | 43 | ||||
-rw-r--r-- | target/s390x/mem_helper.c | 26 | ||||
-rw-r--r-- | target/s390x/translate.c | 61 | ||||
-rw-r--r-- | target/s390x/translate_vx.inc.c | 935 | ||||
-rw-r--r-- | target/s390x/vec.h | 101 | ||||
-rw-r--r-- | target/s390x/vec_helper.c | 193 |
26 files changed, 1754 insertions, 232 deletions
diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs index cb9c265525..48e0c28434 100644 --- a/target/i386/Makefile.objs +++ b/target/i386/Makefile.objs @@ -3,10 +3,10 @@ obj-$(CONFIG_TCG) += translate.o obj-$(CONFIG_TCG) += bpt_helper.o cc_helper.o excp_helper.o fpu_helper.o obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o mpx_helper.o obj-$(CONFIG_TCG) += seg_helper.o smm_helper.o svm_helper.o +obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o ifeq ($(CONFIG_SOFTMMU),y) obj-y += machine.o arch_memory_mapping.o arch_dump.o monitor.o obj-$(CONFIG_KVM) += kvm.o -obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o obj-$(CONFIG_HYPERV) += hyperv.o obj-$(call lnot,$(CONFIG_HYPERV)) += hyperv-stub.o ifeq ($(CONFIG_WIN32),y) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index d3aa6a815b..d90c01a059 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5031,6 +5031,13 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_feat_level(cpu, FEAT_C000_0001_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_SVM); x86_cpu_adjust_feat_level(cpu, FEAT_XSAVE); + + /* Intel Processor Trace requires CPUID[0x14] */ + if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && + kvm_enabled() && cpu->intel_pt_auto_level) { + x86_cpu_adjust_level(cpu, &cpu->env.cpuid_min_level, 0x14); + } + /* SVM requires CPUID[0x8000000A] */ if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) { x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000000A); @@ -5824,6 +5831,8 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_INT32("x-hv-max-vps", X86CPU, hv_max_vps, -1), DEFINE_PROP_BOOL("x-hv-synic-kvm-only", X86CPU, hyperv_synic_kvm_only, false), + DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level, + true), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 95112b9118..83fb522554 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1454,6 +1454,9 @@ struct X86CPU { /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; + /* Enable auto level-increase for Intel Processor Trace leave */ + bool intel_pt_auto_level; + /* if true fill the top bits of the MTRR_PHYSMASKn variable range */ bool fill_mtrr_mask; diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h index ae51fe754e..be9b4c30c3 100644 --- a/target/ppc/cpu-qom.h +++ b/target/ppc/cpu-qom.h @@ -190,6 +190,7 @@ typedef struct PowerPCCPUClass { #endif const PPCHash64Options *hash64_opts; struct ppc_radix_page_info *radix_page_info; + uint32_t lrg_decr_bits; void (*init_proc)(CPUPPCState *env); int (*check_pow)(CPUPPCState *env); int (*handle_mmu_fault)(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 26604ddf98..fc12b4688e 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1321,10 +1321,10 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env); void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value); void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value); bool ppc_decr_clear_on_delivery(CPUPPCState *env); -uint32_t cpu_ppc_load_decr (CPUPPCState *env); -void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value); -uint32_t cpu_ppc_load_hdecr (CPUPPCState *env); -void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value); +target_ulong cpu_ppc_load_decr(CPUPPCState *env); +void cpu_ppc_store_decr(CPUPPCState *env, target_ulong value); +target_ulong cpu_ppc_load_hdecr(CPUPPCState *env); +void cpu_ppc_store_hdecr(CPUPPCState *env, target_ulong value); uint64_t cpu_ppc_load_purr (CPUPPCState *env); uint32_t cpu_ppc601_load_rtcl (CPUPPCState *env); uint32_t cpu_ppc601_load_rtcu (CPUPPCState *env); @@ -2563,19 +2563,64 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx) } /* Accessors for FP, VMX and VSX registers */ +#if defined(HOST_WORDS_BIGENDIAN) +#define VsrB(i) u8[i] +#define VsrSB(i) s8[i] +#define VsrH(i) u16[i] +#define VsrSH(i) s16[i] +#define VsrW(i) u32[i] +#define VsrSW(i) s32[i] +#define VsrD(i) u64[i] +#define VsrSD(i) s64[i] +#else +#define VsrB(i) u8[15 - (i)] +#define VsrSB(i) s8[15 - (i)] +#define VsrH(i) u16[7 - (i)] +#define VsrSH(i) s16[7 - (i)] +#define VsrW(i) u32[3 - (i)] +#define VsrSW(i) s32[3 - (i)] +#define VsrD(i) u64[1 - (i)] +#define VsrSD(i) s64[1 - (i)] +#endif + +static inline int vsr64_offset(int i, bool high) +{ + return offsetof(CPUPPCState, vsr[i].VsrD(high ? 0 : 1)); +} + +static inline int vsr_full_offset(int i) +{ + return offsetof(CPUPPCState, vsr[i].u64[0]); +} + +static inline int fpr_offset(int i) +{ + return vsr64_offset(i, true); +} + static inline uint64_t *cpu_fpr_ptr(CPUPPCState *env, int i) { - return &env->vsr[i].u64[0]; + return (uint64_t *)((uintptr_t)env + fpr_offset(i)); } static inline uint64_t *cpu_vsrl_ptr(CPUPPCState *env, int i) { - return &env->vsr[i].u64[1]; + return (uint64_t *)((uintptr_t)env + vsr64_offset(i, false)); +} + +static inline long avr64_offset(int i, bool high) +{ + return vsr64_offset(i + 32, high); +} + +static inline int avr_full_offset(int i) +{ + return vsr_full_offset(i + 32); } static inline ppc_avr_t *cpu_avr_ptr(CPUPPCState *env, int i) { - return &env->vsr[32 + i]; + return (ppc_avr_t *)((uintptr_t)env + avr_full_offset(i)); } void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env); diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 39bedbb11d..beafcf1ebd 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -107,6 +107,24 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, return POWERPC_EXCP_RESET; } +static uint64_t ppc_excp_vector_offset(CPUState *cs, int ail) +{ + uint64_t offset = 0; + + switch (ail) { + case AIL_0001_8000: + offset = 0x18000; + break; + case AIL_C000_0000_0000_4000: + offset = 0xc000000000004000ull; + break; + default: + cpu_abort(cs, "Invalid AIL combination %d\n", ail); + break; + } + + return offset; +} /* Note that this function should be greatly optimized * when called with a constant excp, from ppc_hw_interrupt @@ -708,17 +726,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) /* Handle AIL */ if (ail) { new_msr |= (1 << MSR_IR) | (1 << MSR_DR); - switch(ail) { - case AIL_0001_8000: - vector |= 0x18000; - break; - case AIL_C000_0000_0000_4000: - vector |= 0xc000000000004000ull; - break; - default: - cpu_abort(cs, "Invalid AIL combination %d\n", ail); - break; - } + vector |= ppc_excp_vector_offset(cs, ail); } #if defined(TARGET_PPC64) diff --git a/target/ppc/internal.h b/target/ppc/internal.h index f26a71ffcf..fb6f64ed1e 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -204,35 +204,16 @@ EXTRACT_HELPER(IMM8, 11, 8); EXTRACT_HELPER(DCMX, 16, 7); EXTRACT_HELPER_SPLIT_3(DCMX_XV, 5, 16, 0, 1, 2, 5, 1, 6, 6); -#if defined(HOST_WORDS_BIGENDIAN) -#define VsrB(i) u8[i] -#define VsrSB(i) s8[i] -#define VsrH(i) u16[i] -#define VsrSH(i) s16[i] -#define VsrW(i) u32[i] -#define VsrSW(i) s32[i] -#define VsrD(i) u64[i] -#define VsrSD(i) s64[i] -#else -#define VsrB(i) u8[15 - (i)] -#define VsrSB(i) s8[15 - (i)] -#define VsrH(i) u16[7 - (i)] -#define VsrSH(i) s16[7 - (i)] -#define VsrW(i) u32[3 - (i)] -#define VsrSW(i) s32[3 - (i)] -#define VsrD(i) u64[1 - (i)] -#define VsrSD(i) s64[1 - (i)] -#endif static inline void getVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env) { - vsr->VsrD(0) = env->vsr[n].u64[0]; - vsr->VsrD(1) = env->vsr[n].u64[1]; + vsr->VsrD(0) = env->vsr[n].VsrD(0); + vsr->VsrD(1) = env->vsr[n].VsrD(1); } static inline void putVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env) { - env->vsr[n].u64[0] = vsr->VsrD(0); - env->vsr[n].u64[1] = vsr->VsrD(1); + env->vsr[n].VsrD(0) = vsr->VsrD(0); + env->vsr[n].VsrD(1) = vsr->VsrD(1); } void helper_compute_fprf_float16(CPUPPCState *env, float16 arg); diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index d01852fe31..2427c8ee13 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -90,7 +90,9 @@ static int cap_ppc_pvr_compat; static int cap_ppc_safe_cache; static int cap_ppc_safe_bounds_check; static int cap_ppc_safe_indirect_branch; +static int cap_ppc_count_cache_flush_assist; static int cap_ppc_nested_kvm_hv; +static int cap_large_decr; static uint32_t debug_inst_opcode; @@ -124,6 +126,7 @@ static bool kvmppc_is_pr(KVMState *ks) static int kvm_ppc_register_host_cpu_type(MachineState *ms); static void kvmppc_get_cpu_characteristics(KVMState *s); +static int kvmppc_get_dec_bits(void); int kvm_arch_init(MachineState *ms, KVMState *s) { @@ -151,6 +154,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); + cap_large_decr = kvmppc_get_dec_bits(); /* * Note: setting it to false because there is not such capability * in KVM at this moment. @@ -752,7 +756,7 @@ static int kvm_get_fp(CPUState *cs) static int kvm_get_vpa(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); - sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); struct kvm_one_reg reg; int ret; @@ -792,7 +796,7 @@ static int kvm_get_vpa(CPUState *cs) static int kvm_put_vpa(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); - sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); struct kvm_one_reg reg; int ret; @@ -1593,70 +1597,93 @@ void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) } } -static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) +static int kvm_handle_hw_breakpoint(CPUState *cs, + struct kvm_debug_exit_arch *arch_info) { - CPUState *cs = CPU(cpu); - CPUPPCState *env = &cpu->env; - struct kvm_debug_exit_arch *arch_info = &run->debug.arch; int handle = 0; int n; int flag = 0; - if (cs->singlestep_enabled) { - handle = 1; - } else if (arch_info->status) { - if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { - if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { - n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); - if (n >= 0) { - handle = 1; - } - } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | - KVMPPC_DEBUG_WATCH_WRITE)) { - n = find_hw_watchpoint(arch_info->address, &flag); - if (n >= 0) { - handle = 1; - cs->watchpoint_hit = &hw_watchpoint; - hw_watchpoint.vaddr = hw_debug_points[n].addr; - hw_watchpoint.flags = flag; - } + if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { + if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { + n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); + if (n >= 0) { + handle = 1; + } + } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | + KVMPPC_DEBUG_WATCH_WRITE)) { + n = find_hw_watchpoint(arch_info->address, &flag); + if (n >= 0) { + handle = 1; + cs->watchpoint_hit = &hw_watchpoint; + hw_watchpoint.vaddr = hw_debug_points[n].addr; + hw_watchpoint.flags = flag; } } - } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { - handle = 1; - } else { - /* QEMU is not able to handle debug exception, so inject - * program exception to guest; - * Yes program exception NOT debug exception !! - * When QEMU is using debug resources then debug exception must - * be always set. To achieve this we set MSR_DE and also set - * MSRP_DEP so guest cannot change MSR_DE. - * When emulating debug resource for guest we want guest - * to control MSR_DE (enable/disable debug interrupt on need). - * Supporting both configurations are NOT possible. - * So the result is that we cannot share debug resources - * between QEMU and Guest on BOOKE architecture. - * In the current design QEMU gets the priority over guest, - * this means that if QEMU is using debug resources then guest - * cannot use them; - * For software breakpoint QEMU uses a privileged instruction; - * So there cannot be any reason that we are here for guest - * set debug exception, only possibility is guest executed a - * privileged / illegal instruction and that's why we are - * injecting a program interrupt. - */ + } + return handle; +} - cpu_synchronize_state(cs); - /* env->nip is PC, so increment this by 4 to use - * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. - */ - env->nip += 4; - cs->exception_index = POWERPC_EXCP_PROGRAM; - env->error_code = POWERPC_EXCP_INVAL; - ppc_cpu_do_interrupt(cs); +static int kvm_handle_singlestep(void) +{ + return 1; +} + +static int kvm_handle_sw_breakpoint(void) +{ + return 1; +} + +static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + struct kvm_debug_exit_arch *arch_info = &run->debug.arch; + + if (cs->singlestep_enabled) { + return kvm_handle_singlestep(); } - return handle; + if (arch_info->status) { + return kvm_handle_hw_breakpoint(cs, arch_info); + } + + if (kvm_find_sw_breakpoint(cs, arch_info->address)) { + return kvm_handle_sw_breakpoint(); + } + + /* + * QEMU is not able to handle debug exception, so inject + * program exception to guest; + * Yes program exception NOT debug exception !! + * When QEMU is using debug resources then debug exception must + * be always set. To achieve this we set MSR_DE and also set + * MSRP_DEP so guest cannot change MSR_DE. + * When emulating debug resource for guest we want guest + * to control MSR_DE (enable/disable debug interrupt on need). + * Supporting both configurations are NOT possible. + * So the result is that we cannot share debug resources + * between QEMU and Guest on BOOKE architecture. + * In the current design QEMU gets the priority over guest, + * this means that if QEMU is using debug resources then guest + * cannot use them; + * For software breakpoint QEMU uses a privileged instruction; + * So there cannot be any reason that we are here for guest + * set debug exception, only possibility is guest executed a + * privileged / illegal instruction and that's why we are + * injecting a program interrupt. + */ + cpu_synchronize_state(cs); + /* + * env->nip is PC, so increment this by 4 to use + * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. + */ + env->nip += 4; + cs->exception_index = POWERPC_EXCP_PROGRAM; + env->error_code = POWERPC_EXCP_INVAL; + ppc_cpu_do_interrupt(cs); + + return 0; } int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) @@ -1927,6 +1954,16 @@ uint64_t kvmppc_get_clockfreq(void) return kvmppc_read_int_cpu_dt("clock-frequency"); } +static int kvmppc_get_dec_bits(void) +{ + int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits"); + + if (nr_bits > 0) { + return nr_bits; + } + return 0; +} + static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) { PowerPCCPU *cpu = ppc_env_get_cpu(env); @@ -2007,6 +2044,11 @@ void kvmppc_enable_clear_ref_mod_hcalls(void) kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); } +void kvmppc_enable_h_page_init(void) +{ + kvmppc_enable_hcall(kvm_state, H_PAGE_INIT); +} + void kvmppc_set_papr(PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); @@ -2379,7 +2421,13 @@ static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c) static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c) { - if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { + if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) && + (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) && + (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) { + return SPAPR_CAP_FIXED_NA; + } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) { + return SPAPR_CAP_WORKAROUND; + } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { return SPAPR_CAP_FIXED_CCD; } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { return SPAPR_CAP_FIXED_IBS; @@ -2388,6 +2436,14 @@ static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c) return 0; } +static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c) +{ + if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) { + return 1; + } + return 0; +} + static void kvmppc_get_cpu_characteristics(KVMState *s) { struct kvm_ppc_cpu_char c; @@ -2410,6 +2466,8 @@ static void kvmppc_get_cpu_characteristics(KVMState *s) cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c); cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c); cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c); + cap_ppc_count_cache_flush_assist = + parse_cap_ppc_count_cache_flush_assist(c); } int kvmppc_get_cap_safe_cache(void) @@ -2427,6 +2485,11 @@ int kvmppc_get_cap_safe_indirect_branch(void) return cap_ppc_safe_indirect_branch; } +int kvmppc_get_cap_count_cache_flush_assist(void) +{ + return cap_ppc_count_cache_flush_assist; +} + bool kvmppc_has_cap_nested_kvm_hv(void) { return !!cap_ppc_nested_kvm_hv; @@ -2442,6 +2505,35 @@ bool kvmppc_has_cap_spapr_vfio(void) return cap_spapr_vfio; } +int kvmppc_get_cap_large_decr(void) +{ + return cap_large_decr; +} + +int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) +{ + CPUState *cs = CPU(cpu); + uint64_t lpcr; + + kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); + /* Do we need to modify the LPCR? */ + if (!!(lpcr & LPCR_LD) != !!enable) { + if (enable) { + lpcr |= LPCR_LD; + } else { + lpcr &= ~LPCR_LD; + } + kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); + kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); + + if (!!(lpcr & LPCR_LD) != !!enable) { + return -1; + } + } + + return 0; +} + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index bdfaa4e70a..2c2ea30e87 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -23,6 +23,7 @@ int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level); void kvmppc_enable_logical_ci_hcalls(void); void kvmppc_enable_set_mode_hcall(void); void kvmppc_enable_clear_ref_mod_hcalls(void); +void kvmppc_enable_h_page_init(void); void kvmppc_set_papr(PowerPCCPU *cpu); int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr); void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy); @@ -62,8 +63,11 @@ bool kvmppc_has_cap_mmu_hash_v3(void); int kvmppc_get_cap_safe_cache(void); int kvmppc_get_cap_safe_bounds_check(void); int kvmppc_get_cap_safe_indirect_branch(void); +int kvmppc_get_cap_count_cache_flush_assist(void); bool kvmppc_has_cap_nested_kvm_hv(void); int kvmppc_set_cap_nested_kvm_hv(int enable); +int kvmppc_get_cap_large_decr(void); +int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); int kvmppc_enable_hwrng(void); int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); @@ -135,6 +139,10 @@ static inline void kvmppc_enable_clear_ref_mod_hcalls(void) { } +static inline void kvmppc_enable_h_page_init(void) +{ +} + static inline void kvmppc_set_papr(PowerPCCPU *cpu) { } @@ -322,6 +330,11 @@ static inline int kvmppc_get_cap_safe_indirect_branch(void) return 0; } +static inline int kvmppc_get_cap_count_cache_flush_assist(void) +{ + return 0; +} + static inline bool kvmppc_has_cap_nested_kvm_hv(void) { return false; @@ -332,6 +345,16 @@ static inline int kvmppc_set_cap_nested_kvm_hv(int enable) return -1; } +static inline int kvmppc_get_cap_large_decr(void) +{ + return 0; +} + +static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) +{ + return -1; +} + static inline int kvmppc_enable_hwrng(void) { return -1; diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 756b6d2971..a92d0ad3a3 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -150,7 +150,7 @@ static int get_fpr(QEMUFile *f, void *pv, size_t size, { ppc_vsr_t *v = pv; - v->u64[0] = qemu_get_be64(f); + v->VsrD(0) = qemu_get_be64(f); return 0; } @@ -160,7 +160,7 @@ static int put_fpr(QEMUFile *f, void *pv, size_t size, { ppc_vsr_t *v = pv; - qemu_put_be64(f, v->u64[0]); + qemu_put_be64(f, v->VsrD(0)); return 0; } @@ -181,7 +181,7 @@ static int get_vsr(QEMUFile *f, void *pv, size_t size, { ppc_vsr_t *v = pv; - v->u64[1] = qemu_get_be64(f); + v->VsrD(1) = qemu_get_be64(f); return 0; } @@ -191,7 +191,7 @@ static int put_vsr(QEMUFile *f, void *pv, size_t size, { ppc_vsr_t *v = pv; - qemu_put_be64(f, v->u64[1]); + qemu_put_be64(f, v->VsrD(1)); return 0; } diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index c431303eff..a2b1ec5040 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -1109,7 +1109,7 @@ void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val) case POWERPC_MMU_3_00: /* P9 */ lpcr = val & (LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD | (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL | - LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | + LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | LPCR_LD | (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE)) | LPCR_MER | LPCR_GTSE | LPCR_TC | LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE | LPCR_HDICE); diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 819221f246..98b37cebc2 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -6677,34 +6677,22 @@ GEN_TM_PRIV_NOOP(trechkpt); static inline void get_fpr(TCGv_i64 dst, int regno) { - tcg_gen_ld_i64(dst, cpu_env, offsetof(CPUPPCState, vsr[regno].u64[0])); + tcg_gen_ld_i64(dst, cpu_env, fpr_offset(regno)); } static inline void set_fpr(int regno, TCGv_i64 src) { - tcg_gen_st_i64(src, cpu_env, offsetof(CPUPPCState, vsr[regno].u64[0])); + tcg_gen_st_i64(src, cpu_env, fpr_offset(regno)); } static inline void get_avr64(TCGv_i64 dst, int regno, bool high) { -#ifdef HOST_WORDS_BIGENDIAN - tcg_gen_ld_i64(dst, cpu_env, offsetof(CPUPPCState, - vsr[32 + regno].u64[(high ? 0 : 1)])); -#else - tcg_gen_ld_i64(dst, cpu_env, offsetof(CPUPPCState, - vsr[32 + regno].u64[(high ? 1 : 0)])); -#endif + tcg_gen_ld_i64(dst, cpu_env, avr64_offset(regno, high)); } static inline void set_avr64(int regno, TCGv_i64 src, bool high) { -#ifdef HOST_WORDS_BIGENDIAN - tcg_gen_st_i64(src, cpu_env, offsetof(CPUPPCState, - vsr[32 + regno].u64[(high ? 0 : 1)])); -#else - tcg_gen_st_i64(src, cpu_env, offsetof(CPUPPCState, - vsr[32 + regno].u64[(high ? 1 : 0)])); -#endif + tcg_gen_st_i64(src, cpu_env, avr64_offset(regno, high)); } #include "translate/fp-impl.inc.c" @@ -7417,7 +7405,7 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, #if !defined(NO_TIMER_DUMP) cpu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64 #if !defined(CONFIG_USER_ONLY) - " DECR %08" PRIu32 + " DECR " TARGET_FMT_lu #endif "\n", cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env) diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c index f1b15ae2cb..eb10c533ca 100644 --- a/target/ppc/translate/vmx-impl.inc.c +++ b/target/ppc/translate/vmx-impl.inc.c @@ -10,15 +10,10 @@ static inline TCGv_ptr gen_avr_ptr(int reg) { TCGv_ptr r = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(r, cpu_env, offsetof(CPUPPCState, vsr[32 + reg].u64[0])); + tcg_gen_addi_ptr(r, cpu_env, avr_full_offset(reg)); return r; } -static inline long avr64_offset(int reg, bool high) -{ - return offsetof(CPUPPCState, vsr[32 + reg].u64[(high ? 0 : 1)]); -} - #define GEN_VR_LDX(name, opc2, opc3) \ static void glue(gen_, name)(DisasContext *ctx) \ { \ @@ -205,7 +200,7 @@ static void gen_mtvscr(DisasContext *ctx) } val = tcg_temp_new_i32(); - bofs = avr64_offset(rB(ctx->opcode), true); + bofs = avr_full_offset(rB(ctx->opcode)); #ifdef HOST_WORDS_BIGENDIAN bofs += 3 * 4; #endif @@ -284,9 +279,9 @@ static void glue(gen_, name)(DisasContext *ctx) \ } \ \ tcg_op(vece, \ - avr64_offset(rD(ctx->opcode), true), \ - avr64_offset(rA(ctx->opcode), true), \ - avr64_offset(rB(ctx->opcode), true), \ + avr_full_offset(rD(ctx->opcode)), \ + avr_full_offset(rA(ctx->opcode)), \ + avr_full_offset(rB(ctx->opcode)), \ 16, 16); \ } @@ -578,10 +573,10 @@ static void glue(gen_, NAME)(DisasContext *ctx) \ gen_exception(ctx, POWERPC_EXCP_VPU); \ return; \ } \ - tcg_gen_gvec_4(avr64_offset(rD(ctx->opcode), true), \ + tcg_gen_gvec_4(avr_full_offset(rD(ctx->opcode)), \ offsetof(CPUPPCState, vscr_sat), \ - avr64_offset(rA(ctx->opcode), true), \ - avr64_offset(rB(ctx->opcode), true), \ + avr_full_offset(rA(ctx->opcode)), \ + avr_full_offset(rB(ctx->opcode)), \ 16, 16, &g); \ } @@ -755,7 +750,7 @@ static void glue(gen_, name)(DisasContext *ctx) \ return; \ } \ simm = SIMM5(ctx->opcode); \ - tcg_op(avr64_offset(rD(ctx->opcode), true), 16, 16, simm); \ + tcg_op(avr_full_offset(rD(ctx->opcode)), 16, 16, simm); \ } GEN_VXFORM_DUPI(vspltisb, tcg_gen_gvec_dup8i, 6, 12); @@ -850,8 +845,8 @@ static void gen_vsplt(DisasContext *ctx, int vece) } uimm = UIMM5(ctx->opcode); - bofs = avr64_offset(rB(ctx->opcode), true); - dofs = avr64_offset(rD(ctx->opcode), true); + bofs = avr_full_offset(rB(ctx->opcode)); + dofs = avr_full_offset(rD(ctx->opcode)); /* Experimental testing shows that hardware masks the immediate. */ bofs += (uimm << vece) & 15; diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index e73197e717..508e9199c8 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -1,54 +1,23 @@ /*** VSX extension ***/ -static inline void get_vsr(TCGv_i64 dst, int n) -{ - tcg_gen_ld_i64(dst, cpu_env, offsetof(CPUPPCState, vsr[n].u64[1])); -} - -static inline void set_vsr(int n, TCGv_i64 src) -{ - tcg_gen_st_i64(src, cpu_env, offsetof(CPUPPCState, vsr[n].u64[1])); -} - -static inline int vsr_full_offset(int n) -{ - return offsetof(CPUPPCState, vsr[n].u64[0]); -} - static inline void get_cpu_vsrh(TCGv_i64 dst, int n) { - if (n < 32) { - get_fpr(dst, n); - } else { - get_avr64(dst, n - 32, true); - } + tcg_gen_ld_i64(dst, cpu_env, vsr64_offset(n, true)); } static inline void get_cpu_vsrl(TCGv_i64 dst, int n) { - if (n < 32) { - get_vsr(dst, n); - } else { - get_avr64(dst, n - 32, false); - } + tcg_gen_ld_i64(dst, cpu_env, vsr64_offset(n, false)); } static inline void set_cpu_vsrh(int n, TCGv_i64 src) { - if (n < 32) { - set_fpr(n, src); - } else { - set_avr64(n - 32, src, true); - } + tcg_gen_st_i64(src, cpu_env, vsr64_offset(n, true)); } static inline void set_cpu_vsrl(int n, TCGv_i64 src) { - if (n < 32) { - set_vsr(n, src); - } else { - set_avr64(n - 32, src, false); - } + tcg_gen_st_i64(src, cpu_env, vsr64_offset(n, false)); } #define VSX_LOAD_SCALAR(name, operation) \ @@ -1618,8 +1587,7 @@ static void gen_xsxsigdp(DisasContext *ctx) tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0); get_cpu_vsrh(t1, xB(ctx->opcode)); - tcg_gen_andi_i64(rt, t1, 0x000FFFFFFFFFFFFF); - tcg_gen_or_i64(rt, rt, t0); + tcg_gen_deposit_i64(rt, t0, t1, 0, 52); tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); @@ -1655,8 +1623,7 @@ static void gen_xsxsigqp(DisasContext *ctx) tcg_gen_movi_i64(t0, 0x0001000000000000); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0); - tcg_gen_andi_i64(xth, xbh, 0x0000FFFFFFFFFFFF); - tcg_gen_or_i64(xth, xth, t0); + tcg_gen_deposit_i64(xth, t0, xbh, 0, 48); set_cpu_vsrh(rD(ctx->opcode) + 32, xth); tcg_gen_mov_i64(xtl, xbl); set_cpu_vsrl(rD(ctx->opcode) + 32, xtl); @@ -1726,7 +1693,6 @@ static void gen_xviexpdp(DisasContext *ctx) TCGv_i64 xal; TCGv_i64 xbh; TCGv_i64 xbl; - TCGv_i64 t0; if (unlikely(!ctx->vsx_enabled)) { gen_exception(ctx, POWERPC_EXCP_VSXU); @@ -1742,20 +1708,13 @@ static void gen_xviexpdp(DisasContext *ctx) get_cpu_vsrl(xal, xA(ctx->opcode)); get_cpu_vsrh(xbh, xB(ctx->opcode)); get_cpu_vsrl(xbl, xB(ctx->opcode)); - t0 = tcg_temp_new_i64(); - tcg_gen_andi_i64(xth, xah, 0x800FFFFFFFFFFFFF); - tcg_gen_andi_i64(t0, xbh, 0x7FF); - tcg_gen_shli_i64(t0, t0, 52); - tcg_gen_or_i64(xth, xth, t0); + tcg_gen_deposit_i64(xth, xah, xbh, 52, 11); set_cpu_vsrh(xT(ctx->opcode), xth); - tcg_gen_andi_i64(xtl, xal, 0x800FFFFFFFFFFFFF); - tcg_gen_andi_i64(t0, xbl, 0x7FF); - tcg_gen_shli_i64(t0, t0, 52); - tcg_gen_or_i64(xtl, xtl, t0); + + tcg_gen_deposit_i64(xtl, xal, xbl, 52, 11); set_cpu_vsrl(xT(ctx->opcode), xtl); - tcg_temp_free_i64(t0); tcg_temp_free_i64(xth); tcg_temp_free_i64(xtl); tcg_temp_free_i64(xah); @@ -1853,16 +1812,14 @@ static void gen_xvxsigdp(DisasContext *ctx) tcg_gen_movi_i64(t0, 0x0010000000000000); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0); - tcg_gen_andi_i64(xth, xbh, 0x000FFFFFFFFFFFFF); - tcg_gen_or_i64(xth, xth, t0); + tcg_gen_deposit_i64(xth, t0, xbh, 0, 52); set_cpu_vsrh(xT(ctx->opcode), xth); tcg_gen_extract_i64(exp, xbl, 52, 11); tcg_gen_movi_i64(t0, 0x0010000000000000); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0); - tcg_gen_andi_i64(xtl, xbl, 0x000FFFFFFFFFFFFF); - tcg_gen_or_i64(xtl, xtl, t0); + tcg_gen_deposit_i64(xth, t0, xbl, 0, 52); set_cpu_vsrl(xT(ctx->opcode), xtl); tcg_temp_free_i64(t0); diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 58542c0fe0..0bd555eb19 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -8376,6 +8376,7 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data) #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; pcc->hash64_opts = &ppc_hash64_opts_basic; + pcc->lrg_decr_bits = 32; #endif pcc->excp_model = POWERPC_EXCP_970; pcc->bus_model = PPC_FLAGS_INPUT_970; @@ -8550,6 +8551,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; pcc->hash64_opts = &ppc_hash64_opts_POWER7; + pcc->lrg_decr_bits = 32; #endif pcc->excp_model = POWERPC_EXCP_POWER7; pcc->bus_model = PPC_FLAGS_INPUT_POWER7; @@ -8718,6 +8720,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; pcc->hash64_opts = &ppc_hash64_opts_POWER7; + pcc->lrg_decr_bits = 32; #endif pcc->excp_model = POWERPC_EXCP_POWER8; pcc->bus_model = PPC_FLAGS_INPUT_POWER7; @@ -8892,7 +8895,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ | PPC_MEM_SYNC | PPC_MEM_EIEIO | PPC_MEM_TLBSYNC | - PPC_64B | PPC_64BX | PPC_ALTIVEC | + PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC | PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD | PPC_CILDST; @@ -8904,6 +8907,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 | PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL; pcc->msr_mask = (1ull << MSR_SF) | + (1ull << MSR_SHV) | (1ull << MSR_TM) | (1ull << MSR_VR) | (1ull << MSR_VSX) | @@ -8926,6 +8930,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) /* segment page size remain the same */ pcc->hash64_opts = &ppc_hash64_opts_POWER7; pcc->radix_page_info = &POWER9_radix_page_info; + pcc->lrg_decr_bits = 56; #endif pcc->excp_model = POWERPC_EXCP_POWER9; pcc->bus_model = PPC_FLAGS_INPUT_POWER9; diff --git a/target/s390x/Makefile.objs b/target/s390x/Makefile.objs index 22a9a9927a..68eeee3d2f 100644 --- a/target/s390x/Makefile.objs +++ b/target/s390x/Makefile.objs @@ -1,6 +1,7 @@ obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o +obj-$(CONFIG_TCG) += vec_helper.o obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o obj-$(CONFIG_SOFTMMU) += sigp.o obj-$(CONFIG_KVM) += kvm.o diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index b71ac5183d..cb6d77053a 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -257,6 +257,7 @@ extern const struct VMStateDescription vmstate_s390_cpu; /* PSW defines */ #undef PSW_MASK_PER #undef PSW_MASK_UNUSED_2 +#undef PSW_MASK_UNUSED_3 #undef PSW_MASK_DAT #undef PSW_MASK_IO #undef PSW_MASK_EXT @@ -276,6 +277,7 @@ extern const struct VMStateDescription vmstate_s390_cpu; #define PSW_MASK_PER 0x4000000000000000ULL #define PSW_MASK_UNUSED_2 0x2000000000000000ULL +#define PSW_MASK_UNUSED_3 0x1000000000000000ULL #define PSW_MASK_DAT 0x0400000000000000ULL #define PSW_MASK_IO 0x0200000000000000ULL #define PSW_MASK_EXT 0x0100000000000000ULL @@ -323,12 +325,14 @@ extern const struct VMStateDescription vmstate_s390_cpu; /* we'll use some unused PSW positions to store CR flags in tb flags */ #define FLAG_MASK_AFP (PSW_MASK_UNUSED_2 >> FLAG_MASK_PSW_SHIFT) +#define FLAG_MASK_VECTOR (PSW_MASK_UNUSED_3 >> FLAG_MASK_PSW_SHIFT) /* Control register 0 bits */ #define CR0_LOWPROT 0x0000000010000000ULL #define CR0_SECONDARY 0x0000000004000000ULL #define CR0_EDAT 0x0000000000800000ULL #define CR0_AFP 0x0000000000040000ULL +#define CR0_VECTOR 0x0000000000020000ULL #define CR0_EMERGENCY_SIGNAL_SC 0x0000000000004000ULL #define CR0_EXTERNAL_CALL_SC 0x0000000000002000ULL #define CR0_CKC_SC 0x0000000000000800ULL @@ -373,6 +377,9 @@ static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc, if (env->cregs[0] & CR0_AFP) { *flags |= FLAG_MASK_AFP; } + if (env->cregs[0] & CR0_VECTOR) { + *flags |= FLAG_MASK_VECTOR; + } } /* PER bits from control register 9 */ diff --git a/target/s390x/helper.h b/target/s390x/helper.h index bb659257f6..0b494a2fd2 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -123,6 +123,27 @@ DEF_HELPER_4(cu42, i32, env, i32, i32, i32) DEF_HELPER_5(msa, i32, env, i32, i32, i32, i32) DEF_HELPER_FLAGS_1(stpt, TCG_CALL_NO_RWG, i64, env) DEF_HELPER_FLAGS_1(stck, TCG_CALL_NO_RWG_SE, i64, env) +DEF_HELPER_FLAGS_3(probe_write_access, TCG_CALL_NO_WG, void, env, i64, i64) + +/* === Vector Support Instructions === */ +DEF_HELPER_FLAGS_4(vll, TCG_CALL_NO_WG, void, env, ptr, i64, i64) +DEF_HELPER_FLAGS_4(gvec_vpk16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vpk32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vpk64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vpks16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vpks32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vpks64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_5(gvec_vpks_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vpks_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vpks_cc64, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vpkls16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vpkls32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vpkls64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_5(gvec_vpkls_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vpkls_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vpkls_cc64, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(vstl, TCG_CALL_NO_WG, void, env, cptr, i64, i64) #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index 61b750a855..71fa9b8d6c 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -972,6 +972,88 @@ D(0xb93e, KIMD, RRE, MSA, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KIMD) D(0xb93f, KLMD, RRE, MSA, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KLMD) +/* === Vector Support Instructions === */ + +/* VECTOR GATHER ELEMENT */ + E(0xe713, VGEF, VRV, V, la2, 0, 0, 0, vge, 0, ES_32, IF_VEC) + E(0xe712, VGEG, VRV, V, la2, 0, 0, 0, vge, 0, ES_64, IF_VEC) +/* VECTOR GENERATE BYTE MASK */ + F(0xe744, VGBM, VRI_a, V, 0, 0, 0, 0, vgbm, 0, IF_VEC) +/* VECTOR GENERATE MASK */ + F(0xe746, VGM, VRI_b, V, 0, 0, 0, 0, vgm, 0, IF_VEC) +/* VECTOR LOAD */ + F(0xe706, VL, VRX, V, la2, 0, 0, 0, vl, 0, IF_VEC) + F(0xe756, VLR, VRR_a, V, 0, 0, 0, 0, vlr, 0, IF_VEC) +/* VECTOR LOAD AND REPLICATE */ + F(0xe705, VLREP, VRX, V, la2, 0, 0, 0, vlrep, 0, IF_VEC) +/* VECTOR LOAD ELEMENT */ + E(0xe700, VLEB, VRX, V, la2, 0, 0, 0, vle, 0, ES_8, IF_VEC) + E(0xe701, VLEH, VRX, V, la2, 0, 0, 0, vle, 0, ES_16, IF_VEC) + E(0xe703, VLEF, VRX, V, la2, 0, 0, 0, vle, 0, ES_32, IF_VEC) + E(0xe702, VLEG, VRX, V, la2, 0, 0, 0, vle, 0, ES_64, IF_VEC) +/* VECTOR LOAD ELEMENT IMMEDIATE */ + E(0xe740, VLEIB, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_8, IF_VEC) + E(0xe741, VLEIH, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_16, IF_VEC) + E(0xe743, VLEIF, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_32, IF_VEC) + E(0xe742, VLEIG, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_64, IF_VEC) +/* VECTOR LOAD GR FROM VR ELEMENT */ + F(0xe721, VLGV, VRS_c, V, la2, 0, r1, 0, vlgv, 0, IF_VEC) +/* VECTOR LOAD LOGICAL ELEMENT AND ZERO */ + F(0xe704, VLLEZ, VRX, V, la2, 0, 0, 0, vllez, 0, IF_VEC) +/* VECTOR LOAD MULTIPLE */ + F(0xe736, VLM, VRS_a, V, la2, 0, 0, 0, vlm, 0, IF_VEC) +/* VECTOR LOAD TO BLOCK BOUNDARY */ + F(0xe707, VLBB, VRX, V, la2, 0, 0, 0, vlbb, 0, IF_VEC) +/* VECTOR LOAD VR ELEMENT FROM GR */ + F(0xe722, VLVG, VRS_b, V, la2, r3, 0, 0, vlvg, 0, IF_VEC) +/* VECTOR LOAD VR FROM GRS DISJOINT */ + F(0xe762, VLVGP, VRR_f, V, r2, r3, 0, 0, vlvgp, 0, IF_VEC) +/* VECTOR LOAD WITH LENGTH */ + F(0xe737, VLL, VRS_b, V, la2, r3_32u, 0, 0, vll, 0, IF_VEC) +/* VECTOR MERGE HIGH */ + F(0xe761, VMRH, VRR_c, V, 0, 0, 0, 0, vmr, 0, IF_VEC) +/* VECTOR MERGE LOW */ + F(0xe760, VMRL, VRR_c, V, 0, 0, 0, 0, vmr, 0, IF_VEC) +/* VECTOR PACK */ + F(0xe794, VPK, VRR_c, V, 0, 0, 0, 0, vpk, 0, IF_VEC) +/* VECTOR PACK SATURATE */ + F(0xe797, VPKS, VRR_b, V, 0, 0, 0, 0, vpk, 0, IF_VEC) +/* VECTOR PACK LOGICAL SATURATE */ + F(0xe795, VPKLS, VRR_b, V, 0, 0, 0, 0, vpk, 0, IF_VEC) + F(0xe78c, VPERM, VRR_e, V, 0, 0, 0, 0, vperm, 0, IF_VEC) +/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */ + F(0xe784, VPDI, VRR_c, V, 0, 0, 0, 0, vpdi, 0, IF_VEC) +/* VECTOR REPLICATE */ + F(0xe74d, VREP, VRI_c, V, 0, 0, 0, 0, vrep, 0, IF_VEC) +/* VECTOR REPLICATE IMMEDIATE */ + F(0xe745, VREPI, VRI_a, V, 0, 0, 0, 0, vrepi, 0, IF_VEC) +/* VECTOR SCATTER ELEMENT */ + E(0xe71b, VSCEF, VRV, V, la2, 0, 0, 0, vsce, 0, ES_32, IF_VEC) + E(0xe71a, VSCEG, VRV, V, la2, 0, 0, 0, vsce, 0, ES_64, IF_VEC) +/* VECTOR SELECT */ + F(0xe78d, VSEL, VRR_e, V, 0, 0, 0, 0, vsel, 0, IF_VEC) +/* VECTOR SIGN EXTEND TO DOUBLEWORD */ + F(0xe75f, VSEG, VRR_a, V, 0, 0, 0, 0, vseg, 0, IF_VEC) +/* VECTOR STORE */ + F(0xe70e, VST, VRX, V, la2, 0, 0, 0, vst, 0, IF_VEC) +/* VECTOR STORE ELEMENT */ + E(0xe708, VSTEB, VRX, V, la2, 0, 0, 0, vste, 0, ES_8, IF_VEC) + E(0xe709, VSTEH, VRX, V, la2, 0, 0, 0, vste, 0, ES_16, IF_VEC) + E(0xe70b, VSTEF, VRX, V, la2, 0, 0, 0, vste, 0, ES_32, IF_VEC) + E(0xe70a, VSTEG, VRX, V, la2, 0, 0, 0, vste, 0, ES_64, IF_VEC) +/* VECTOR STORE MULTIPLE */ + F(0xe73e, VSTM, VRS_a, V, la2, 0, 0, 0, vstm, 0, IF_VEC) +/* VECTOR STORE WITH LENGTH */ + F(0xe73f, VSTL, VRS_b, V, la2, r3_32u, 0, 0, vstl, 0, IF_VEC) +/* VECTOR UNPACK HIGH */ + F(0xe7d7, VUPH, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) +/* VECTOR UNPACK LOGICAL HIGH */ + F(0xe7d5, VUPLH, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) +/* VECTOR UNPACK LOW */ + F(0xe7d6, VUPL, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) +/* VECTOR UNPACK LOGICAL LOW */ + F(0xe7d4, VUPLL, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) + #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ E(0xb250, CSP, RRE, Z, r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV) diff --git a/target/s390x/insn-format.def b/target/s390x/insn-format.def index 4297ff4165..6253edbd19 100644 --- a/target/s390x/insn-format.def +++ b/target/s390x/insn-format.def @@ -54,3 +54,28 @@ F4(SS_e, R(1, 8), BD(2,16,20), R(3,12), BD(4,32,36)) F3(SS_f, BD(1,16,20), L(2,8,8), BD(2,32,36)) F2(SSE, BD(1,16,20), BD(2,32,36)) F3(SSF, BD(1,16,20), BD(2,32,36), R(3,8)) +F3(VRI_a, V(1,8), I(2,16,16), M(3,32)) +F4(VRI_b, V(1,8), I(2,16,8), I(3,24,8), M(4,32)) +F4(VRI_c, V(1,8), V(3,12), I(2,16,16), M(4,32)) +F5(VRI_d, V(1,8), V(2,12), V(3,16), I(4,24,8), M(5,32)) +F5(VRI_e, V(1,8), V(2,12), I(3,16,12), M(5,28), M(4,32)) +F5(VRI_f, V(1,8), V(2,12), V(3,16), M(5,24), I(4,28,8)) +F5(VRI_g, V(1,8), V(2,12), I(4,16,8), M(5,24), I(3,28,8)) +F3(VRI_h, V(1,8), I(2,16,16), I(3,32,4)) +F4(VRI_i, V(1,8), R(2,12), M(4,24), I(3,28,8)) +F5(VRR_a, V(1,8), V(2,12), M(5,24), M(4,28), M(3,32)) +F5(VRR_b, V(1,8), V(2,12), V(3,16), M(5,24), M(4,32)) +F6(VRR_c, V(1,8), V(2,12), V(3,16), M(6,24), M(5,28), M(4,32)) +F6(VRR_d, V(1,8), V(2,12), V(3,16), M(5,20), M(6,24), V(4,32)) +F6(VRR_e, V(1,8), V(2,12), V(3,16), M(6,20), M(5,28), V(4,32)) +F3(VRR_f, V(1,8), R(2,12), R(3,16)) +F1(VRR_g, V(1,12)) +F3(VRR_h, V(1,12), V(2,16), M(3,24)) +F3(VRR_i, R(1,8), V(2,12), M(3,24)) +F4(VRS_a, V(1,8), V(3,12), BD(2,16,20), M(4,32)) +F4(VRS_b, V(1,8), R(3,12), BD(2,16,20), M(4,32)) +F4(VRS_c, R(1,8), V(3,12), BD(2,16,20), M(4,32)) +F3(VRS_d, R(3,12), BD(2,16,20), V(1,32)) +F4(VRV, V(1,8), V(2,12), BD(2,16,20), M(3,32)) +F3(VRX, V(1,8), BXD(2), M(3,32)) +F3(VSI, I(3,8,8), BD(2,16,20), V(1,32)) diff --git a/target/s390x/internal.h b/target/s390x/internal.h index 7baf0e2404..3b4855c175 100644 --- a/target/s390x/internal.h +++ b/target/s390x/internal.h @@ -63,45 +63,7 @@ typedef struct LowCore { PSW program_new_psw; /* 0x1d0 */ PSW mcck_new_psw; /* 0x1e0 */ PSW io_new_psw; /* 0x1f0 */ - PSW return_psw; /* 0x200 */ - uint8_t irb[64]; /* 0x210 */ - uint64_t sync_enter_timer; /* 0x250 */ - uint64_t async_enter_timer; /* 0x258 */ - uint64_t exit_timer; /* 0x260 */ - uint64_t last_update_timer; /* 0x268 */ - uint64_t user_timer; /* 0x270 */ - uint64_t system_timer; /* 0x278 */ - uint64_t last_update_clock; /* 0x280 */ - uint64_t steal_clock; /* 0x288 */ - PSW return_mcck_psw; /* 0x290 */ - uint8_t pad9[0xc00 - 0x2a0]; /* 0x2a0 */ - /* System info area */ - uint64_t save_area[16]; /* 0xc00 */ - uint8_t pad10[0xd40 - 0xc80]; /* 0xc80 */ - uint64_t kernel_stack; /* 0xd40 */ - uint64_t thread_info; /* 0xd48 */ - uint64_t async_stack; /* 0xd50 */ - uint64_t kernel_asce; /* 0xd58 */ - uint64_t user_asce; /* 0xd60 */ - uint64_t panic_stack; /* 0xd68 */ - uint64_t user_exec_asce; /* 0xd70 */ - uint8_t pad11[0xdc0 - 0xd78]; /* 0xd78 */ - - /* SMP info area: defined by DJB */ - uint64_t clock_comparator; /* 0xdc0 */ - uint64_t ext_call_fast; /* 0xdc8 */ - uint64_t percpu_offset; /* 0xdd0 */ - uint64_t current_task; /* 0xdd8 */ - uint32_t softirq_pending; /* 0xde0 */ - uint32_t pad_0x0de4; /* 0xde4 */ - uint64_t int_clock; /* 0xde8 */ - uint8_t pad12[0xe00 - 0xdf0]; /* 0xdf0 */ - - /* 0xe00 is used as indicator for dump tools */ - /* whether the kernel died with panic() or not */ - uint32_t panic_magic; /* 0xe00 */ - - uint8_t pad13[0x11b0 - 0xe04]; /* 0xe04 */ + uint8_t pad13[0x11b0 - 0x200]; /* 0x200 */ uint64_t mcesad; /* 0x11B0 */ @@ -130,6 +92,7 @@ typedef struct LowCore { uint8_t pad18[0x2000 - 0x1400]; /* 0x1400 */ } QEMU_PACKED LowCore; +QEMU_BUILD_BUG_ON(sizeof(LowCore) != 8192); #endif /* CONFIG_USER_ONLY */ #define MAX_ILEN 6 @@ -386,6 +349,8 @@ void ioinst_handle_sal(S390CPU *cpu, uint64_t reg1, uintptr_t ra); /* mem_helper.c */ target_ulong mmu_real2abs(CPUS390XState *env, target_ulong raddr); +void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len, + uintptr_t ra); /* mmu_helper.c */ diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index a506d9ef99..3f76a8abfd 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -2623,3 +2623,29 @@ uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3) return convert_unicode(env, r1, r2, m3, GETPC(), decode_utf32, encode_utf16); } + +void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len, + uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + if (!h2g_valid(addr) || !h2g_valid(addr + len - 1) || + page_check_range(addr, len, PAGE_WRITE) < 0) { + s390_program_interrupt(env, PGM_ADDRESSING, ILEN_AUTO, ra); + } +#else + /* test the actual access, not just any access to the page due to LAP */ + while (len) { + const uint64_t pagelen = -(addr | -TARGET_PAGE_MASK); + const uint64_t curlen = MIN(pagelen, len); + + probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra); + addr = wrap_address(env, addr + curlen); + len -= curlen; + } +#endif +} + +void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len) +{ + probe_write_access(env, addr, len, GETPC()); +} diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 41fb466bb4..0afa8f7ca5 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -34,6 +34,7 @@ #include "disas/disas.h" #include "exec/exec-all.h" #include "tcg-op.h" +#include "tcg-op-gvec.h" #include "qemu/log.h" #include "qemu/host-utils.h" #include "exec/cpu_ldst.h" @@ -985,6 +986,7 @@ static void free_compare(DisasCompare *c) #define F3(N, X1, X2, X3) F0(N) #define F4(N, X1, X2, X3, X4) F0(N) #define F5(N, X1, X2, X3, X4, X5) F0(N) +#define F6(N, X1, X2, X3, X4, X5, X6) F0(N) typedef enum { #include "insn-format.def" @@ -996,6 +998,7 @@ typedef enum { #undef F3 #undef F4 #undef F5 +#undef F6 /* Define a structure to hold the decoded fields. We'll store each inside an array indexed by an enum. In order to conserve memory, we'll arrange @@ -1010,6 +1013,8 @@ enum DisasFieldIndexO { FLD_O_m1, FLD_O_m3, FLD_O_m4, + FLD_O_m5, + FLD_O_m6, FLD_O_b1, FLD_O_b2, FLD_O_b4, @@ -1023,7 +1028,11 @@ enum DisasFieldIndexO { FLD_O_i2, FLD_O_i3, FLD_O_i4, - FLD_O_i5 + FLD_O_i5, + FLD_O_v1, + FLD_O_v2, + FLD_O_v3, + FLD_O_v4, }; enum DisasFieldIndexC { @@ -1031,6 +1040,7 @@ enum DisasFieldIndexC { FLD_C_m1 = 0, FLD_C_b1 = 0, FLD_C_i1 = 0, + FLD_C_v1 = 0, FLD_C_r2 = 1, FLD_C_b2 = 1, @@ -1039,20 +1049,25 @@ enum DisasFieldIndexC { FLD_C_r3 = 2, FLD_C_m3 = 2, FLD_C_i3 = 2, + FLD_C_v3 = 2, FLD_C_m4 = 3, FLD_C_b4 = 3, FLD_C_i4 = 3, FLD_C_l1 = 3, + FLD_C_v4 = 3, FLD_C_i5 = 4, FLD_C_d1 = 4, + FLD_C_m5 = 4, FLD_C_d2 = 5, + FLD_C_m6 = 5, FLD_C_d4 = 6, FLD_C_x2 = 6, FLD_C_l2 = 6, + FLD_C_v2 = 6, NUM_C_FIELD = 7 }; @@ -1097,6 +1112,7 @@ typedef struct DisasFormatInfo { #define R(N, B) { B, 4, 0, FLD_C_r##N, FLD_O_r##N } #define M(N, B) { B, 4, 0, FLD_C_m##N, FLD_O_m##N } +#define V(N, B) { B, 4, 3, FLD_C_v##N, FLD_O_v##N } #define BD(N, BB, BD) { BB, 4, 0, FLD_C_b##N, FLD_O_b##N }, \ { BD, 12, 0, FLD_C_d##N, FLD_O_d##N } #define BXD(N) { 16, 4, 0, FLD_C_b##N, FLD_O_b##N }, \ @@ -1116,6 +1132,7 @@ typedef struct DisasFormatInfo { #define F3(N, X1, X2, X3) { { X1, X2, X3 } }, #define F4(N, X1, X2, X3, X4) { { X1, X2, X3, X4 } }, #define F5(N, X1, X2, X3, X4, X5) { { X1, X2, X3, X4, X5 } }, +#define F6(N, X1, X2, X3, X4, X5, X6) { { X1, X2, X3, X4, X5, X6 } }, static const DisasFormatInfo format_info[] = { #include "insn-format.def" @@ -1127,8 +1144,10 @@ static const DisasFormatInfo format_info[] = { #undef F3 #undef F4 #undef F5 +#undef F6 #undef R #undef M +#undef V #undef BD #undef BXD #undef BDL @@ -1185,6 +1204,7 @@ typedef struct { #define IF_BFP 0x0008 /* binary floating point instruction */ #define IF_DFP 0x0010 /* decimal floating point instruction */ #define IF_PRIV 0x0020 /* privileged instruction */ +#define IF_VEC 0x0040 /* vector instruction */ struct DisasInsn { unsigned opc:16; @@ -5101,6 +5121,8 @@ static DisasJumpType op_mpcifc(DisasContext *s, DisasOps *o) } #endif +#include "translate_vx.inc.c" + /* ====================================================================== */ /* The "Cc OUTput" generators. Given the generated output (and in some cases the original inputs), update the various cc data structures in order to @@ -5772,6 +5794,13 @@ static void in2_r3_sr32(DisasContext *s, DisasFields *f, DisasOps *o) } #define SPEC_in2_r3_sr32 0 +static void in2_r3_32u(DisasContext *s, DisasFields *f, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(o->in2, regs[get_field(f, r3)]); +} +#define SPEC_in2_r3_32u 0 + static void in2_r2_32s(DisasContext *s, DisasFields *f, DisasOps *o) { o->in2 = tcg_temp_new_i64(); @@ -6119,6 +6148,25 @@ static void extract_field(DisasFields *o, const DisasField *f, uint64_t insn) case 2: /* dl+dh split, signed 20 bit. */ r = ((int8_t)r << 12) | (r >> 8); break; + case 3: /* MSB stored in RXB */ + g_assert(f->size == 4); + switch (f->beg) { + case 8: + r |= extract64(insn, 63 - 36, 1) << 4; + break; + case 12: + r |= extract64(insn, 63 - 37, 1) << 4; + break; + case 16: + r |= extract64(insn, 63 - 38, 1) << 4; + break; + case 32: + r |= extract64(insn, 63 - 39, 1) << 4; + break; + default: + g_assert_not_reached(); + } + break; default: abort(); } @@ -6300,11 +6348,22 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) if (insn->flags & IF_DFP) { dxc = 3; } + if (insn->flags & IF_VEC) { + dxc = 0xfe; + } if (dxc) { gen_data_exception(dxc); return DISAS_NORETURN; } } + + /* if vector instructions not enabled, executing them is forbidden */ + if (insn->flags & IF_VEC) { + if (!((s->base.tb->flags & FLAG_MASK_VECTOR))) { + gen_data_exception(0xfe); + return DISAS_NORETURN; + } + } } /* Check for insn specification exceptions. */ diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c new file mode 100644 index 0000000000..76f9a5d939 --- /dev/null +++ b/target/s390x/translate_vx.inc.c @@ -0,0 +1,935 @@ +/* + * QEMU TCG support -- s390x vector instruction translation functions + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +/* + * For most instructions that use the same element size for reads and + * writes, we can use real gvec vector expansion, which potantially uses + * real host vector instructions. As they only work up to 64 bit elements, + * 128 bit elements (vector is a single element) have to be handled + * differently. Operations that are too complicated to encode via TCG ops + * are handled via gvec ool (out-of-line) handlers. + * + * As soon as instructions use different element sizes for reads and writes + * or access elements "out of their element scope" we expand them manually + * in fancy loops, as gvec expansion does not deal with actual element + * numbers and does also not support access to other elements. + * + * 128 bit elements: + * As we only have i32/i64, such elements have to be loaded into two + * i64 values and can then be processed e.g. by tcg_gen_add2_i64. + * + * Sizes: + * On s390x, the operand size (oprsz) and the maximum size (maxsz) are + * always 16 (128 bit). What gvec code calls "vece", s390x calls "es", + * a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only + * 128 bit element size has to be treated in a special way (MO_64 + 1). + * We will use ES_* instead of MO_* for this reason in this file. + * + * CC handling: + * As gvec ool-helpers can currently not return values (besides via + * pointers like vectors or cpu_env), whenever we have to set the CC and + * can't conclude the value from the result vector, we will directly + * set it in "env->cc_op" and mark it as static via set_cc_static()". + * Whenever this is done, the helper writes globals (cc_op). + */ + +#define NUM_VEC_ELEMENT_BYTES(es) (1 << (es)) +#define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es)) +#define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE) + +#define ES_8 MO_8 +#define ES_16 MO_16 +#define ES_32 MO_32 +#define ES_64 MO_64 +#define ES_128 4 + +static inline bool valid_vec_element(uint8_t enr, TCGMemOp es) +{ + return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1)); +} + +static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr, + TCGMemOp memop) +{ + const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE); + + switch (memop) { + case ES_8: + tcg_gen_ld8u_i64(dst, cpu_env, offs); + break; + case ES_16: + tcg_gen_ld16u_i64(dst, cpu_env, offs); + break; + case ES_32: + tcg_gen_ld32u_i64(dst, cpu_env, offs); + break; + case ES_8 | MO_SIGN: + tcg_gen_ld8s_i64(dst, cpu_env, offs); + break; + case ES_16 | MO_SIGN: + tcg_gen_ld16s_i64(dst, cpu_env, offs); + break; + case ES_32 | MO_SIGN: + tcg_gen_ld32s_i64(dst, cpu_env, offs); + break; + case ES_64: + case ES_64 | MO_SIGN: + tcg_gen_ld_i64(dst, cpu_env, offs); + break; + default: + g_assert_not_reached(); + } +} + +static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr, + TCGMemOp memop) +{ + const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE); + + switch (memop) { + case ES_8: + tcg_gen_st8_i64(src, cpu_env, offs); + break; + case ES_16: + tcg_gen_st16_i64(src, cpu_env, offs); + break; + case ES_32: + tcg_gen_st32_i64(src, cpu_env, offs); + break; + case ES_64: + tcg_gen_st_i64(src, cpu_env, offs); + break; + default: + g_assert_not_reached(); + } +} + + +static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, + uint8_t es) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + + /* mask off invalid parts from the element nr */ + tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1); + + /* convert it to an element offset relative to cpu_env (vec_reg_offset() */ + tcg_gen_shli_i64(tmp, tmp, es); +#ifndef HOST_WORDS_BIGENDIAN + tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es)); +#endif + tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg)); + + /* generate the final ptr by adding cpu_env */ + tcg_gen_trunc_i64_ptr(ptr, tmp); + tcg_gen_add_ptr(ptr, ptr, cpu_env); + + tcg_temp_free_i64(tmp); +} + +#define gen_gvec_3_ool(v1, v2, v3, data, fn) \ + tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), 16, 16, data, fn) +#define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \ + tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), ptr, 16, 16, data, fn) +#define gen_gvec_4(v1, v2, v3, v4, gen) \ + tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ + 16, 16, gen) +#define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \ + tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ + 16, 16, data, fn) +#define gen_gvec_dup_i64(es, v1, c) \ + tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c) +#define gen_gvec_mov(v1, v2) \ + tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \ + 16) +#define gen_gvec_dup64i(v1, c) \ + tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c) + +static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c) +{ + switch (es) { + case ES_8: + tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c); + break; + case ES_16: + tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c); + break; + case ES_32: + tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c); + break; + case ES_64: + gen_gvec_dup64i(reg, c); + break; + default: + g_assert_not_reached(); + } +} + +static void zero_vec(uint8_t reg) +{ + tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0); +} + +static DisasJumpType op_vge(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s->fields, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es); + tcg_gen_add_i64(o->addr1, o->addr1, tmp); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0); + + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); + write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static uint64_t generate_byte_mask(uint8_t mask) +{ + uint64_t r = 0; + int i; + + for (i = 0; i < 8; i++) { + if ((mask >> i) & 1) { + r |= 0xffull << (i * 8); + } + } + return r; +} + +static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o) +{ + const uint16_t i2 = get_field(s->fields, i2); + + if (i2 == (i2 & 0xff) * 0x0101) { + /* + * Masks for both 64 bit elements of the vector are the same. + * Trust tcg to produce a good constant loading. + */ + gen_gvec_dup64i(get_field(s->fields, v1), + generate_byte_mask(i2 & 0xff)); + } else { + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8)); + write_vec_element_i64(t, get_field(s->fields, v1), 0, ES_64); + tcg_gen_movi_i64(t, generate_byte_mask(i2)); + write_vec_element_i64(t, get_field(s->fields, v1), 1, ES_64); + tcg_temp_free_i64(t); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vgm(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + const uint8_t bits = NUM_VEC_ELEMENT_BITS(es); + const uint8_t i2 = get_field(s->fields, i2) & (bits - 1); + const uint8_t i3 = get_field(s->fields, i3) & (bits - 1); + uint64_t mask = 0; + int i; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* generate the mask - take care of wrapping */ + for (i = i2; ; i = (i + 1) % bits) { + mask |= 1ull << (bits - i - 1); + if (i == i3) { + break; + } + } + + gen_gvec_dupi(es, get_field(s->fields, v1), mask); + return DISAS_NEXT; +} + +static DisasJumpType op_vl(DisasContext *s, DisasOps *o) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ); + write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64); + write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64); + tcg_temp_free(t0); + tcg_temp_free(t1); + return DISAS_NEXT; +} + +static DisasJumpType op_vlr(DisasContext *s, DisasOps *o) +{ + gen_gvec_mov(get_field(s->fields, v1), get_field(s->fields, v2)); + return DISAS_NEXT; +} + +static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m3); + TCGv_i64 tmp; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); + gen_gvec_dup_i64(es, get_field(s->fields, v1), tmp); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vle(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s->fields, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); + write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vlei(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s->fields, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_const_i64((int16_t)get_field(s->fields, i2)); + write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + TCGv_ptr ptr; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* fast path if we don't need the register content */ + if (!get_field(s->fields, b2)) { + uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1); + + read_vec_element_i64(o->out, get_field(s->fields, v3), enr, es); + return DISAS_NEXT; + } + + ptr = tcg_temp_new_ptr(); + get_vec_element_ptr_i64(ptr, get_field(s->fields, v3), o->addr1, es); + switch (es) { + case ES_8: + tcg_gen_ld8u_i64(o->out, ptr, 0); + break; + case ES_16: + tcg_gen_ld16u_i64(o->out, ptr, 0); + break; + case ES_32: + tcg_gen_ld32u_i64(o->out, ptr, 0); + break; + case ES_64: + tcg_gen_ld_i64(o->out, ptr, 0); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_ptr(ptr); + + return DISAS_NEXT; +} + +static DisasJumpType op_vllez(DisasContext *s, DisasOps *o) +{ + uint8_t es = get_field(s->fields, m3); + uint8_t enr; + TCGv_i64 t; + + switch (es) { + /* rightmost sub-element of leftmost doubleword */ + case ES_8: + enr = 7; + break; + case ES_16: + enr = 3; + break; + case ES_32: + enr = 1; + break; + case ES_64: + enr = 0; + break; + /* leftmost sub-element of leftmost doubleword */ + case 6: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + es = ES_32; + enr = 0; + break; + } + default: + /* fallthrough */ + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es); + zero_vec(get_field(s->fields, v1)); + write_vec_element_i64(t, get_field(s->fields, v1), enr, es); + tcg_temp_free_i64(t); + return DISAS_NEXT; +} + +static DisasJumpType op_vlm(DisasContext *s, DisasOps *o) +{ + const uint8_t v3 = get_field(s->fields, v3); + uint8_t v1 = get_field(s->fields, v1); + TCGv_i64 t0, t1; + + if (v3 < v1 || (v3 - v1 + 1) > 16) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* + * Check for possible access exceptions by trying to load the last + * element. The first element will be checked first next. + */ + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8); + tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEQ); + + for (;; v1++) { + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ); + write_vec_element_i64(t1, v1, 0, ES_64); + if (v1 == v3) { + break; + } + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ); + write_vec_element_i64(t1, v1, 1, ES_64); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + } + + /* Store the last element, loaded first */ + write_vec_element_i64(t0, v1, 1, ES_64); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + return DISAS_NEXT; +} + +static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o) +{ + const int64_t block_size = (1ull << (get_field(s->fields, m3) + 6)); + const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1)); + TCGv_ptr a0; + TCGv_i64 bytes; + + if (get_field(s->fields, m3) > 6) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + bytes = tcg_temp_new_i64(); + a0 = tcg_temp_new_ptr(); + /* calculate the number of bytes until the next block boundary */ + tcg_gen_ori_i64(bytes, o->addr1, -block_size); + tcg_gen_neg_i64(bytes, bytes); + + tcg_gen_addi_ptr(a0, cpu_env, v1_offs); + gen_helper_vll(cpu_env, a0, o->addr1, bytes); + tcg_temp_free_i64(bytes); + tcg_temp_free_ptr(a0); + return DISAS_NEXT; +} + +static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + TCGv_ptr ptr; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* fast path if we don't need the register content */ + if (!get_field(s->fields, b2)) { + uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1); + + write_vec_element_i64(o->in2, get_field(s->fields, v1), enr, es); + return DISAS_NEXT; + } + + ptr = tcg_temp_new_ptr(); + get_vec_element_ptr_i64(ptr, get_field(s->fields, v1), o->addr1, es); + switch (es) { + case ES_8: + tcg_gen_st8_i64(o->in2, ptr, 0); + break; + case ES_16: + tcg_gen_st16_i64(o->in2, ptr, 0); + break; + case ES_32: + tcg_gen_st32_i64(o->in2, ptr, 0); + break; + case ES_64: + tcg_gen_st_i64(o->in2, ptr, 0); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_ptr(ptr); + + return DISAS_NEXT; +} + +static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o) +{ + write_vec_element_i64(o->in1, get_field(s->fields, v1), 0, ES_64); + write_vec_element_i64(o->in2, get_field(s->fields, v1), 1, ES_64); + return DISAS_NEXT; +} + +static DisasJumpType op_vll(DisasContext *s, DisasOps *o) +{ + const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1)); + TCGv_ptr a0 = tcg_temp_new_ptr(); + + /* convert highest index into an actual length */ + tcg_gen_addi_i64(o->in2, o->in2, 1); + tcg_gen_addi_ptr(a0, cpu_env, v1_offs); + gen_helper_vll(cpu_env, a0, o->addr1, o->in2); + tcg_temp_free_ptr(a0); + return DISAS_NEXT; +} + +static DisasJumpType op_vmr(DisasContext *s, DisasOps *o) +{ + const uint8_t v1 = get_field(s->fields, v1); + const uint8_t v2 = get_field(s->fields, v2); + const uint8_t v3 = get_field(s->fields, v3); + const uint8_t es = get_field(s->fields, m4); + int dst_idx, src_idx; + TCGv_i64 tmp; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + if (s->fields->op2 == 0x61) { + /* iterate backwards to avoid overwriting data we might need later */ + for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) { + src_idx = dst_idx / 2; + if (dst_idx % 2 == 0) { + read_vec_element_i64(tmp, v2, src_idx, es); + } else { + read_vec_element_i64(tmp, v3, src_idx, es); + } + write_vec_element_i64(tmp, v1, dst_idx, es); + } + } else { + /* iterate forward to avoid overwriting data we might need later */ + for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) { + src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2; + if (dst_idx % 2 == 0) { + read_vec_element_i64(tmp, v2, src_idx, es); + } else { + read_vec_element_i64(tmp, v3, src_idx, es); + } + write_vec_element_i64(tmp, v1, dst_idx, es); + } + } + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vpk(DisasContext *s, DisasOps *o) +{ + const uint8_t v1 = get_field(s->fields, v1); + const uint8_t v2 = get_field(s->fields, v2); + const uint8_t v3 = get_field(s->fields, v3); + const uint8_t es = get_field(s->fields, m4); + static gen_helper_gvec_3 * const vpk[3] = { + gen_helper_gvec_vpk16, + gen_helper_gvec_vpk32, + gen_helper_gvec_vpk64, + }; + static gen_helper_gvec_3 * const vpks[3] = { + gen_helper_gvec_vpks16, + gen_helper_gvec_vpks32, + gen_helper_gvec_vpks64, + }; + static gen_helper_gvec_3_ptr * const vpks_cc[3] = { + gen_helper_gvec_vpks_cc16, + gen_helper_gvec_vpks_cc32, + gen_helper_gvec_vpks_cc64, + }; + static gen_helper_gvec_3 * const vpkls[3] = { + gen_helper_gvec_vpkls16, + gen_helper_gvec_vpkls32, + gen_helper_gvec_vpkls64, + }; + static gen_helper_gvec_3_ptr * const vpkls_cc[3] = { + gen_helper_gvec_vpkls_cc16, + gen_helper_gvec_vpkls_cc32, + gen_helper_gvec_vpkls_cc64, + }; + + if (es == ES_8 || es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (s->fields->op2) { + case 0x97: + if (get_field(s->fields, m5) & 0x1) { + gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]); + set_cc_static(s); + } else { + gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]); + } + break; + case 0x95: + if (get_field(s->fields, m5) & 0x1) { + gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]); + set_cc_static(s); + } else { + gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]); + } + break; + case 0x94: + /* If sources and destination dont't overlap -> fast path */ + if (v1 != v2 && v1 != v3) { + const uint8_t src_es = get_field(s->fields, m4); + const uint8_t dst_es = src_es - 1; + TCGv_i64 tmp = tcg_temp_new_i64(); + int dst_idx, src_idx; + + for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) { + src_idx = dst_idx; + if (src_idx < NUM_VEC_ELEMENTS(src_es)) { + read_vec_element_i64(tmp, v2, src_idx, src_es); + } else { + src_idx -= NUM_VEC_ELEMENTS(src_es); + read_vec_element_i64(tmp, v3, src_idx, src_es); + } + write_vec_element_i64(tmp, v1, dst_idx, dst_es); + } + tcg_temp_free_i64(tmp); + } else { + gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]); + } + break; + default: + g_assert_not_reached(); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vperm(DisasContext *s, DisasOps *o) +{ + gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), + 0, gen_helper_gvec_vperm); + return DISAS_NEXT; +} + +static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o) +{ + const uint8_t i2 = extract32(get_field(s->fields, m4), 2, 1); + const uint8_t i3 = extract32(get_field(s->fields, m4), 0, 1); + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + read_vec_element_i64(t0, get_field(s->fields, v2), i2, ES_64); + read_vec_element_i64(t1, get_field(s->fields, v3), i3, ES_64); + write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64); + write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + return DISAS_NEXT; +} + +static DisasJumpType op_vrep(DisasContext *s, DisasOps *o) +{ + const uint8_t enr = get_field(s->fields, i2); + const uint8_t es = get_field(s->fields, m4); + + if (es > ES_64 || !valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s->fields, v1)), + vec_reg_offset(get_field(s->fields, v3), enr, es), + 16, 16); + return DISAS_NEXT; +} + +static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o) +{ + const int64_t data = (int16_t)get_field(s->fields, i2); + const uint8_t es = get_field(s->fields, m3); + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_dupi(es, get_field(s->fields, v1), data); + return DISAS_NEXT; +} + +static DisasJumpType op_vsce(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s->fields, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es); + tcg_gen_add_i64(o->addr1, o->addr1, tmp); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0); + + read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + /* bit in c not set -> copy bit from b */ + tcg_gen_andc_i64(t, b, c); + /* bit in c set -> copy bit from a */ + tcg_gen_and_i64(d, a, c); + /* merge the results */ + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b, + TCGv_vec c) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_andc_vec(vece, t, b, c); + tcg_gen_and_vec(vece, d, a, c); + tcg_gen_or_vec(vece, d, d, t); + tcg_temp_free_vec(t); +} + +static DisasJumpType op_vsel(DisasContext *s, DisasOps *o) +{ + static const GVecGen4 gvec_op = { + .fni8 = gen_sel_i64, + .fniv = gen_sel_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + }; + + gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op); + return DISAS_NEXT; +} + +static DisasJumpType op_vseg(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m3); + int idx1, idx2; + TCGv_i64 tmp; + + switch (es) { + case ES_8: + idx1 = 7; + idx2 = 15; + break; + case ES_16: + idx1 = 3; + idx2 = 7; + break; + case ES_32: + idx1 = 1; + idx2 = 3; + break; + default: + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, get_field(s->fields, v2), idx1, es | MO_SIGN); + write_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64); + read_vec_element_i64(tmp, get_field(s->fields, v2), idx2, es | MO_SIGN); + write_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vst(DisasContext *s, DisasOps *o) +{ + TCGv_i64 tmp = tcg_const_i64(16); + + /* Probe write access before actually modifying memory */ + gen_helper_probe_write_access(cpu_env, o->addr1, tmp); + + read_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + read_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vste(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s->fields, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vstm(DisasContext *s, DisasOps *o) +{ + const uint8_t v3 = get_field(s->fields, v3); + uint8_t v1 = get_field(s->fields, v1); + TCGv_i64 tmp; + + while (v3 < v1 || (v3 - v1 + 1) > 16) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* Probe write access before actually modifying memory */ + tmp = tcg_const_i64((v3 - v1 + 1) * 16); + gen_helper_probe_write_access(cpu_env, o->addr1, tmp); + + for (;; v1++) { + read_vec_element_i64(tmp, v1, 0, ES_64); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + read_vec_element_i64(tmp, v1, 1, ES_64); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); + if (v1 == v3) { + break; + } + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + } + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vstl(DisasContext *s, DisasOps *o) +{ + const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1)); + TCGv_ptr a0 = tcg_temp_new_ptr(); + + /* convert highest index into an actual length */ + tcg_gen_addi_i64(o->in2, o->in2, 1); + tcg_gen_addi_ptr(a0, cpu_env, v1_offs); + gen_helper_vstl(cpu_env, a0, o->addr1, o->in2); + tcg_temp_free_ptr(a0); + return DISAS_NEXT; +} + +static DisasJumpType op_vup(DisasContext *s, DisasOps *o) +{ + const bool logical = s->fields->op2 == 0xd4 || s->fields->op2 == 0xd5; + const uint8_t v1 = get_field(s->fields, v1); + const uint8_t v2 = get_field(s->fields, v2); + const uint8_t src_es = get_field(s->fields, m3); + const uint8_t dst_es = src_es + 1; + int dst_idx, src_idx; + TCGv_i64 tmp; + + if (src_es > ES_32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + if (s->fields->op2 == 0xd7 || s->fields->op2 == 0xd5) { + /* iterate backwards to avoid overwriting data we might need later */ + for (dst_idx = NUM_VEC_ELEMENTS(dst_es) - 1; dst_idx >= 0; dst_idx--) { + src_idx = dst_idx; + read_vec_element_i64(tmp, v2, src_idx, + src_es | (logical ? 0 : MO_SIGN)); + write_vec_element_i64(tmp, v1, dst_idx, dst_es); + } + + } else { + /* iterate forward to avoid overwriting data we might need later */ + for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) { + src_idx = dst_idx + NUM_VEC_ELEMENTS(src_es) / 2; + read_vec_element_i64(tmp, v2, src_idx, + src_es | (logical ? 0 : MO_SIGN)); + write_vec_element_i64(tmp, v1, dst_idx, dst_es); + } + } + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} diff --git a/target/s390x/vec.h b/target/s390x/vec.h new file mode 100644 index 0000000000..3313fb43ee --- /dev/null +++ b/target/s390x/vec.h @@ -0,0 +1,101 @@ +/* + * QEMU TCG support -- s390x vector utilitites + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef S390X_VEC_H +#define S390X_VEC_H + +typedef union S390Vector { + uint64_t doubleword[2]; + uint32_t word[4]; + uint16_t halfword[8]; + uint8_t byte[16]; +} S390Vector; + +/* + * Each vector is stored as two 64bit host values. So when talking about + * byte/halfword/word numbers, we have to take care of proper translation + * between element numbers. + * + * Big Endian (target/possible host) + * B: [ 0][ 1][ 2][ 3][ 4][ 5][ 6][ 7] - [ 8][ 9][10][11][12][13][14][15] + * HW: [ 0][ 1][ 2][ 3] - [ 4][ 5][ 6][ 7] + * W: [ 0][ 1] - [ 2][ 3] + * DW: [ 0] - [ 1] + * + * Little Endian (possible host) + * B: [ 7][ 6][ 5][ 4][ 3][ 2][ 1][ 0] - [15][14][13][12][11][10][ 9][ 8] + * HW: [ 3][ 2][ 1][ 0] - [ 7][ 6][ 5][ 4] + * W: [ 1][ 0] - [ 3][ 2] + * DW: [ 0] - [ 1] + */ +#ifndef HOST_WORDS_BIGENDIAN +#define H1(x) ((x) ^ 7) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) +#else +#define H1(x) (x) +#define H2(x) (x) +#define H4(x) (x) +#endif + +static inline uint8_t s390_vec_read_element8(const S390Vector *v, uint8_t enr) +{ + g_assert(enr < 16); + return v->byte[H1(enr)]; +} + +static inline uint16_t s390_vec_read_element16(const S390Vector *v, uint8_t enr) +{ + g_assert(enr < 8); + return v->halfword[H2(enr)]; +} + +static inline uint32_t s390_vec_read_element32(const S390Vector *v, uint8_t enr) +{ + g_assert(enr < 4); + return v->word[H4(enr)]; +} + +static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr) +{ + g_assert(enr < 2); + return v->doubleword[enr]; +} + +static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr, + uint8_t data) +{ + g_assert(enr < 16); + v->byte[H1(enr)] = data; +} + +static inline void s390_vec_write_element16(S390Vector *v, uint8_t enr, + uint16_t data) +{ + g_assert(enr < 8); + v->halfword[H2(enr)] = data; +} + +static inline void s390_vec_write_element32(S390Vector *v, uint8_t enr, + uint32_t data) +{ + g_assert(enr < 4); + v->word[H4(enr)] = data; +} + +static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr, + uint64_t data) +{ + g_assert(enr < 2); + v->doubleword[enr] = data; +} + +#endif /* S390X_VEC_H */ diff --git a/target/s390x/vec_helper.c b/target/s390x/vec_helper.c new file mode 100644 index 0000000000..bb4c9304f0 --- /dev/null +++ b/target/s390x/vec_helper.c @@ -0,0 +1,193 @@ +/* + * QEMU TCG support -- s390x vector support instructions + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "internal.h" +#include "vec.h" +#include "tcg/tcg.h" +#include "tcg/tcg-gvec-desc.h" +#include "exec/helper-proto.h" +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" + +void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes) +{ + if (likely(bytes >= 16)) { + uint64_t t0, t1; + + t0 = cpu_ldq_data_ra(env, addr, GETPC()); + addr = wrap_address(env, addr + 8); + t1 = cpu_ldq_data_ra(env, addr, GETPC()); + s390_vec_write_element64(v1, 0, t0); + s390_vec_write_element64(v1, 1, t1); + } else { + S390Vector tmp = {}; + int i; + + for (i = 0; i < bytes; i++) { + uint8_t byte = cpu_ldub_data_ra(env, addr, GETPC()); + + s390_vec_write_element8(&tmp, i, byte); + addr = wrap_address(env, addr + 1); + } + *(S390Vector *)v1 = tmp; + } +} + +#define DEF_VPK_HFN(BITS, TBITS) \ +typedef uint##TBITS##_t (*vpk##BITS##_fn)(uint##BITS##_t, int *); \ +static int vpk##BITS##_hfn(S390Vector *v1, const S390Vector *v2, \ + const S390Vector *v3, vpk##BITS##_fn fn) \ +{ \ + int i, saturated = 0; \ + S390Vector tmp; \ + \ + for (i = 0; i < (128 / TBITS); i++) { \ + uint##BITS##_t src; \ + \ + if (i < (128 / BITS)) { \ + src = s390_vec_read_element##BITS(v2, i); \ + } else { \ + src = s390_vec_read_element##BITS(v3, i - (128 / BITS)); \ + } \ + s390_vec_write_element##TBITS(&tmp, i, fn(src, &saturated)); \ + } \ + *v1 = tmp; \ + return saturated; \ +} +DEF_VPK_HFN(64, 32) +DEF_VPK_HFN(32, 16) +DEF_VPK_HFN(16, 8) + +#define DEF_VPK(BITS, TBITS) \ +static uint##TBITS##_t vpk##BITS##e(uint##BITS##_t src, int *saturated) \ +{ \ + return src; \ +} \ +void HELPER(gvec_vpk##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + vpk##BITS##_hfn(v1, v2, v3, vpk##BITS##e); \ +} +DEF_VPK(64, 32) +DEF_VPK(32, 16) +DEF_VPK(16, 8) + +#define DEF_VPKS(BITS, TBITS) \ +static uint##TBITS##_t vpks##BITS##e(uint##BITS##_t src, int *saturated) \ +{ \ + if ((int##BITS##_t)src > INT##TBITS##_MAX) { \ + (*saturated)++; \ + return INT##TBITS##_MAX; \ + } else if ((int##BITS##_t)src < INT##TBITS##_MIN) { \ + (*saturated)++; \ + return INT##TBITS##_MIN; \ + } \ + return src; \ +} \ +void HELPER(gvec_vpks##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e); \ +} \ +void HELPER(gvec_vpks_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + int saturated = vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e); \ + \ + if (saturated == (128 / TBITS)) { \ + env->cc_op = 3; \ + } else if (saturated) { \ + env->cc_op = 1; \ + } else { \ + env->cc_op = 0; \ + } \ +} +DEF_VPKS(64, 32) +DEF_VPKS(32, 16) +DEF_VPKS(16, 8) + +#define DEF_VPKLS(BITS, TBITS) \ +static uint##TBITS##_t vpkls##BITS##e(uint##BITS##_t src, int *saturated) \ +{ \ + if (src > UINT##TBITS##_MAX) { \ + (*saturated)++; \ + return UINT##TBITS##_MAX; \ + } \ + return src; \ +} \ +void HELPER(gvec_vpkls##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e); \ +} \ +void HELPER(gvec_vpkls_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + int saturated = vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e); \ + \ + if (saturated == (128 / TBITS)) { \ + env->cc_op = 3; \ + } else if (saturated) { \ + env->cc_op = 1; \ + } else { \ + env->cc_op = 0; \ + } \ +} +DEF_VPKLS(64, 32) +DEF_VPKLS(32, 16) +DEF_VPKLS(16, 8) + +void HELPER(gvec_vperm)(void *v1, const void *v2, const void *v3, + const void *v4, uint32_t desc) +{ + S390Vector tmp; + int i; + + for (i = 0; i < 16; i++) { + const uint8_t selector = s390_vec_read_element8(v4, i) & 0x1f; + uint8_t byte; + + if (selector < 16) { + byte = s390_vec_read_element8(v2, selector); + } else { + byte = s390_vec_read_element8(v3, selector - 16); + } + s390_vec_write_element8(&tmp, i, byte); + } + *(S390Vector *)v1 = tmp; +} + +void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr, + uint64_t bytes) +{ + /* Probe write access before actually modifying memory */ + probe_write_access(env, addr, bytes, GETPC()); + + if (likely(bytes >= 16)) { + cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 0), GETPC()); + addr = wrap_address(env, addr + 8); + cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 1), GETPC()); + } else { + S390Vector tmp = {}; + int i; + + for (i = 0; i < bytes; i++) { + uint8_t byte = s390_vec_read_element8(v1, i); + + cpu_stb_data_ra(env, addr, byte, GETPC()); + addr = wrap_address(env, addr + 1); + } + *(S390Vector *)v1 = tmp; + } +} |