diff options
Diffstat (limited to 'target-arm')
-rw-r--r-- | target-arm/cpu-qom.h | 10 | ||||
-rw-r--r-- | target-arm/cpu.c | 22 | ||||
-rw-r--r-- | target-arm/cpu.h | 187 | ||||
-rw-r--r-- | target-arm/cpu64.c | 1 | ||||
-rw-r--r-- | target-arm/helper-a64.c | 136 | ||||
-rw-r--r-- | target-arm/helper-a64.h | 10 | ||||
-rw-r--r-- | target-arm/helper.c | 1216 | ||||
-rw-r--r-- | target-arm/helper.h | 10 | ||||
-rw-r--r-- | target-arm/kvm-consts.h | 16 | ||||
-rw-r--r-- | target-arm/kvm.c | 55 | ||||
-rw-r--r-- | target-arm/kvm_arm.h | 17 | ||||
-rw-r--r-- | target-arm/neon_helper.c | 28 | ||||
-rw-r--r-- | target-arm/op_helper.c | 69 | ||||
-rw-r--r-- | target-arm/translate-a64.c | 4875 | ||||
-rw-r--r-- | target-arm/translate.c | 418 |
15 files changed, 6441 insertions, 629 deletions
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h index afbd4222c5..00234e1d3d 100644 --- a/target-arm/cpu-qom.h +++ b/target-arm/cpu-qom.h @@ -132,6 +132,16 @@ typedef struct ARMCPU { uint32_t id_isar3; uint32_t id_isar4; uint32_t id_isar5; + uint64_t id_aa64pfr0; + uint64_t id_aa64pfr1; + uint64_t id_aa64dfr0; + uint64_t id_aa64dfr1; + uint64_t id_aa64afr0; + uint64_t id_aa64afr1; + uint64_t id_aa64isar0; + uint64_t id_aa64isar1; + uint64_t id_aa64mmfr0; + uint64_t id_aa64mmfr1; uint32_t clidr; /* The elements of this array are the CCSIDR values for each cache, * in the order L1DCache, L1ICache, L2DCache, L2ICache, etc. diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 52efd5d66f..1ce8a9bc38 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -60,7 +60,7 @@ static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) return; } - if (ri->type & ARM_CP_64BIT) { + if (cpreg_field_is_64bit(ri)) { CPREG_FIELD64(&cpu->env, ri) = ri->resetvalue; } else { CPREG_FIELD32(&cpu->env, ri) = ri->resetvalue; @@ -91,9 +91,10 @@ static void arm_cpu_reset(CPUState *s) env->aarch64 = 1; #if defined(CONFIG_USER_ONLY) env->pstate = PSTATE_MODE_EL0t; + /* Userspace expects access to CTL_EL0 and the cache ops */ + env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI; #else - env->pstate = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F - | PSTATE_MODE_EL1h; + env->pstate = PSTATE_MODE_EL1h; #endif } @@ -108,13 +109,14 @@ static void arm_cpu_reset(CPUState *s) } #else /* SVC mode with interrupts disabled. */ - env->uncached_cpsr = ARM_CPU_MODE_SVC | CPSR_A | CPSR_F | CPSR_I; + env->uncached_cpsr = ARM_CPU_MODE_SVC; + env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F; /* On ARMv7-M the CPSR_I is the value of the PRIMASK register, and is clear at reset. Initial SP and PC are loaded from ROM. */ if (IS_M(env)) { uint32_t pc; uint8_t *rom; - env->uncached_cpsr &= ~CPSR_I; + env->daif &= ~PSTATE_I; rom = rom_ptr(0); if (rom) { /* We should really use ldl_phys here, in case the guest @@ -128,7 +130,7 @@ static void arm_cpu_reset(CPUState *s) } } - if (env->cp15.c1_sys & (1 << 13)) { + if (env->cp15.c1_sys & SCTLR_V) { env->regs[15] = 0xFFFF0000; } @@ -681,14 +683,12 @@ static void cortex_a9_initfn(Object *obj) } #ifndef CONFIG_USER_ONLY -static int a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) +static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) { /* Linux wants the number of processors from here. * Might as well set the interrupt-controller bit too. */ - *value = ((smp_cpus - 1) << 24) | (1 << 23); - return 0; + return ((smp_cpus - 1) << 24) | (1 << 23); } #endif @@ -924,6 +924,7 @@ static void arm_any_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); set_feature(&cpu->env, ARM_FEATURE_V7MP); + set_feature(&cpu->env, ARM_FEATURE_CRC); #ifdef TARGET_AARCH64 set_feature(&cpu->env, ARM_FEATURE_AARCH64); #endif @@ -982,6 +983,7 @@ static const ARMCPUInfo arm_cpus[] = { static Property arm_cpu_properties[] = { DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false), + DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0), DEFINE_PROP_END_OF_LIST() }; diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 198b6b8d4e..49fef3fcbe 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -74,8 +74,10 @@ */ #ifdef HOST_WORDS_BIGENDIAN #define offsetoflow32(S, M) (offsetof(S, M) + sizeof(uint32_t)) +#define offsetofhigh32(S, M) offsetof(S, M) #else #define offsetoflow32(S, M) offsetof(S, M) +#define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t)) #endif /* Meanings of the ARMCPU object's two inbound GPIO lines */ @@ -102,7 +104,7 @@ struct arm_boot_info; /* CPU state for each instance of a generic timer (in cp15 c14) */ typedef struct ARMGenericTimer { uint64_t cval; /* Timer CompareValue register */ - uint32_t ctl; /* Timer Control register */ + uint64_t ctl; /* Timer Control register */ } ARMGenericTimer; #define GTIMER_PHYS 0 @@ -133,6 +135,7 @@ typedef struct CPUARMState { * NZCV are kept in the split out env->CF/VF/NF/ZF, (which have the same * semantics as for AArch32, as described in the comments on each field) * nRW (also known as M[4]) is kept, inverted, in env->aarch64 + * DAIF (exception masks) are kept in env->daif * all other bits are stored in their correct places in env->pstate */ uint32_t pstate; @@ -162,20 +165,19 @@ typedef struct CPUARMState { uint32_t GE; /* cpsr[19:16] */ uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */ uint32_t condexec_bits; /* IT bits. cpsr[15:10,26:25]. */ + uint32_t daif; /* exception masks, in the bits they are in in PSTATE */ /* System control coprocessor (cp15) */ struct { uint32_t c0_cpuid; - uint32_t c0_cssel; /* Cache size selection. */ - uint32_t c1_sys; /* System control register. */ - uint32_t c1_coproc; /* Coprocessor access register. */ + uint64_t c0_cssel; /* Cache size selection. */ + uint64_t c1_sys; /* System control register. */ + uint64_t c1_coproc; /* Coprocessor access register. */ uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */ uint32_t c1_scr; /* secure config register. */ - uint32_t c2_base0; /* MMU translation table base 0. */ - uint32_t c2_base0_hi; /* MMU translation table base 0, high 32 bits */ - uint32_t c2_base1; /* MMU translation table base 0. */ - uint32_t c2_base1_hi; /* MMU translation table base 1, high 32 bits */ - uint32_t c2_control; /* MMU translation table base control. */ + uint64_t ttbr0_el1; /* MMU translation table base 0. */ + uint64_t ttbr1_el1; /* MMU translation table base 1. */ + uint64_t c2_control; /* MMU translation table base control. */ uint32_t c2_mask; /* MMU translation table base selection mask. */ uint32_t c2_base_mask; /* MMU translation table base 0 mask. */ uint32_t c2_data; /* MPU data cachable bits. */ @@ -197,14 +199,15 @@ typedef struct CPUARMState { uint32_t c9_pmxevtyper; /* perf monitor event type */ uint32_t c9_pmuserenr; /* perf monitor user enable */ uint32_t c9_pminten; /* perf monitor interrupt enables */ - uint32_t c12_vbar; /* vector base address register */ + uint64_t mair_el1; + uint64_t c12_vbar; /* vector base address register */ uint32_t c13_fcse; /* FCSE PID. */ uint32_t c13_context; /* Context ID. */ uint64_t tpidr_el0; /* User RW Thread register. */ uint64_t tpidrro_el0; /* User RO Thread register. */ uint64_t tpidr_el1; /* Privileged Thread register. */ - uint32_t c14_cntfrq; /* Counter Frequency register */ - uint32_t c14_cntkctl; /* Timer Control register */ + uint64_t c14_cntfrq; /* Counter Frequency register */ + uint64_t c14_cntkctl; /* Timer Control register */ ARMGenericTimer c14_timer[NUM_GTIMERS]; uint32_t c15_cpar; /* XScale Coprocessor Access Register */ uint32_t c15_ticonfig; /* TI925T configuration byte. */ @@ -215,13 +218,12 @@ typedef struct CPUARMState { uint32_t c15_diagnostic; /* diagnostic register */ uint32_t c15_power_diagnostic; uint32_t c15_power_control; /* power control */ + uint64_t dbgbvr[16]; /* breakpoint value registers */ + uint64_t dbgbcr[16]; /* breakpoint control registers */ + uint64_t dbgwvr[16]; /* watchpoint value registers */ + uint64_t dbgwcr[16]; /* watchpoint control registers */ } cp15; - /* System registers (AArch64) */ - struct { - uint64_t tpidr_el0; - } sr; - struct { uint32_t other_sp; uint32_t vecbase; @@ -337,6 +339,58 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw, int mmu_idx); #define cpu_handle_mmu_fault cpu_arm_handle_mmu_fault +/* SCTLR bit meanings. Several bits have been reused in newer + * versions of the architecture; in that case we define constants + * for both old and new bit meanings. Code which tests against those + * bits should probably check or otherwise arrange that the CPU + * is the architectural version it expects. + */ +#define SCTLR_M (1U << 0) +#define SCTLR_A (1U << 1) +#define SCTLR_C (1U << 2) +#define SCTLR_W (1U << 3) /* up to v6; RAO in v7 */ +#define SCTLR_SA (1U << 3) +#define SCTLR_P (1U << 4) /* up to v5; RAO in v6 and v7 */ +#define SCTLR_SA0 (1U << 4) /* v8 onward, AArch64 only */ +#define SCTLR_D (1U << 5) /* up to v5; RAO in v6 */ +#define SCTLR_CP15BEN (1U << 5) /* v7 onward */ +#define SCTLR_L (1U << 6) /* up to v5; RAO in v6 and v7; RAZ in v8 */ +#define SCTLR_B (1U << 7) /* up to v6; RAZ in v7 */ +#define SCTLR_ITD (1U << 7) /* v8 onward */ +#define SCTLR_S (1U << 8) /* up to v6; RAZ in v7 */ +#define SCTLR_SED (1U << 8) /* v8 onward */ +#define SCTLR_R (1U << 9) /* up to v6; RAZ in v7 */ +#define SCTLR_UMA (1U << 9) /* v8 onward, AArch64 only */ +#define SCTLR_F (1U << 10) /* up to v6 */ +#define SCTLR_SW (1U << 10) /* v7 onward */ +#define SCTLR_Z (1U << 11) +#define SCTLR_I (1U << 12) +#define SCTLR_V (1U << 13) +#define SCTLR_RR (1U << 14) /* up to v7 */ +#define SCTLR_DZE (1U << 14) /* v8 onward, AArch64 only */ +#define SCTLR_L4 (1U << 15) /* up to v6; RAZ in v7 */ +#define SCTLR_UCT (1U << 15) /* v8 onward, AArch64 only */ +#define SCTLR_DT (1U << 16) /* up to ??, RAO in v6 and v7 */ +#define SCTLR_nTWI (1U << 16) /* v8 onward */ +#define SCTLR_HA (1U << 17) +#define SCTLR_IT (1U << 18) /* up to ??, RAO in v6 and v7 */ +#define SCTLR_nTWE (1U << 18) /* v8 onward */ +#define SCTLR_WXN (1U << 19) +#define SCTLR_ST (1U << 20) /* up to ??, RAZ in v6 */ +#define SCTLR_UWXN (1U << 20) /* v7 onward */ +#define SCTLR_FI (1U << 21) +#define SCTLR_U (1U << 22) +#define SCTLR_XP (1U << 23) /* up to v6; v7 onward RAO */ +#define SCTLR_VE (1U << 24) /* up to v7 */ +#define SCTLR_E0E (1U << 24) /* v8 onward, AArch64 only */ +#define SCTLR_EE (1U << 25) +#define SCTLR_L2 (1U << 26) /* up to v6, RAZ in v7 */ +#define SCTLR_UCI (1U << 26) /* v8 onward, AArch64 only */ +#define SCTLR_NMFI (1U << 27) +#define SCTLR_TRE (1U << 28) +#define SCTLR_AFE (1U << 29) +#define SCTLR_TE (1U << 30) + #define CPSR_M (0x1fU) #define CPSR_T (1U << 5) #define CPSR_F (1U << 6) @@ -354,9 +408,11 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw, #define CPSR_Z (1U << 30) #define CPSR_N (1U << 31) #define CPSR_NZCV (CPSR_N | CPSR_Z | CPSR_C | CPSR_V) +#define CPSR_AIF (CPSR_A | CPSR_I | CPSR_F) #define CPSR_IT (CPSR_IT_0_1 | CPSR_IT_2_7) -#define CACHED_CPSR_BITS (CPSR_T | CPSR_GE | CPSR_IT | CPSR_Q | CPSR_NZCV) +#define CACHED_CPSR_BITS (CPSR_T | CPSR_AIF | CPSR_GE | CPSR_IT | CPSR_Q \ + | CPSR_NZCV) /* Bits writable in user mode. */ #define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE) /* Execution state bits. MRS read as zero, MSR writes ignored. */ @@ -379,7 +435,8 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw, #define PSTATE_Z (1U << 30) #define PSTATE_N (1U << 31) #define PSTATE_NZCV (PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V) -#define CACHED_PSTATE_BITS (PSTATE_NZCV) +#define PSTATE_DAIF (PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F) +#define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF) /* Mode values for AArch64 */ #define PSTATE_MODE_EL3h 13 #define PSTATE_MODE_EL3t 12 @@ -400,7 +457,7 @@ static inline uint32_t pstate_read(CPUARMState *env) ZF = (env->ZF == 0); return (env->NF & 0x80000000) | (ZF << 30) | (env->CF << 29) | ((env->VF & 0x80000000) >> 3) - | env->pstate; + | env->pstate | env->daif; } static inline void pstate_write(CPUARMState *env, uint32_t val) @@ -409,6 +466,7 @@ static inline void pstate_write(CPUARMState *env, uint32_t val) env->NF = val; env->CF = (val >> 29) & 1; env->VF = (val << 3) & 0x80000000; + env->daif = val & PSTATE_DAIF; env->pstate = val & ~CACHED_PSTATE_BITS; } @@ -496,6 +554,8 @@ enum arm_fprounding { FPROUNDING_ODD }; +int arm_rmode_to_sf(int rmode); + enum arm_cpu_mode { ARM_CPU_MODE_USR = 0x10, ARM_CPU_MODE_FIQ = 0x11, @@ -566,6 +626,7 @@ enum arm_features { ARM_FEATURE_AARCH64, /* supports 64 bit mode */ ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */ ARM_FEATURE_CBAR, /* has cp15 CBAR */ + ARM_FEATURE_CRC, /* ARMv8 CRC instructions */ }; static inline int arm_feature(CPUARMState *env, int feature) @@ -573,6 +634,22 @@ static inline int arm_feature(CPUARMState *env, int feature) return (env->features & (1ULL << feature)) != 0; } +/* Return true if the specified exception level is running in AArch64 state. */ +static inline bool arm_el_is_aa64(CPUARMState *env, int el) +{ + /* We don't currently support EL2 or EL3, and this isn't valid for EL0 + * (if we're in EL0, is_a64() is what you want, and if we're not in EL0 + * then the state of EL0 isn't well defined.) + */ + assert(el == 1); + /* AArch64-capable CPUs always run with EL1 in AArch64 mode. This + * is a QEMU-imposed simplification which we may wish to change later. + * If we in future support EL2 and/or EL3, then the state of lower + * exception levels is controlled by the HCR.RW and SCR.RW bits. + */ + return arm_feature(env, ARM_FEATURE_AARCH64); +} + void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf); /* Interface between CPU and Interrupt controller. */ @@ -682,7 +759,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) #define ARM_CP_NOP (ARM_CP_SPECIAL | (1 << 8)) #define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8)) #define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8)) -#define ARM_LAST_SPECIAL ARM_CP_NZCV +#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8)) +#define ARM_LAST_SPECIAL ARM_CP_CURRENTEL /* Used only as a terminator for ARMCPRegInfo lists */ #define ARM_CP_SENTINEL 0xffff /* Mask of only the flag bits in a type field */ @@ -762,14 +840,30 @@ static inline int arm_current_pl(CPUARMState *env) typedef struct ARMCPRegInfo ARMCPRegInfo; -/* Access functions for coprocessor registers. These should return - * 0 on success, or one of the EXCP_* constants if access should cause - * an exception (in which case *value is not written). +typedef enum CPAccessResult { + /* Access is permitted */ + CP_ACCESS_OK = 0, + /* Access fails due to a configurable trap or enable which would + * result in a categorized exception syndrome giving information about + * the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6, + * 0xc or 0x18). + */ + CP_ACCESS_TRAP = 1, + /* Access fails and results in an exception syndrome 0x0 ("uncategorized"). + * Note that this is not a catch-all case -- the set of cases which may + * result in this failure is specifically defined by the architecture. + */ + CP_ACCESS_TRAP_UNCATEGORIZED = 2, +} CPAccessResult; + +/* Access functions for coprocessor registers. These cannot fail and + * may not raise exceptions. */ -typedef int CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque, - uint64_t *value); -typedef int CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque, - uint64_t value); +typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque); +typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque, + uint64_t value); +/* Access permission check functions for coprocessor registers. */ +typedef CPAccessResult CPAccessFn(CPUARMState *env, const ARMCPRegInfo *opaque); /* Hook function for register reset */ typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque); @@ -823,6 +917,12 @@ struct ARMCPRegInfo { * 2. both readfn and writefn are specified */ ptrdiff_t fieldoffset; /* offsetof(CPUARMState, field) */ + /* Function for making any access checks for this register in addition to + * those specified by the 'access' permissions bits. If NULL, no extra + * checks required. The access check is performed at runtime, not at + * translate time. + */ + CPAccessFn *accessfn; /* Function for handling reads of this register. If NULL, then reads * will be done by loading from the offset into CPUARMState specified * by fieldoffset. @@ -836,14 +936,14 @@ struct ARMCPRegInfo { /* Function for doing a "raw" read; used when we need to copy * coprocessor state to the kernel for KVM or out for * migration. This only needs to be provided if there is also a - * readfn and it makes an access permission check. + * readfn and it has side effects (for instance clear-on-read bits). */ CPReadFn *raw_readfn; /* Function for doing a "raw" write; used when we need to copy KVM * kernel coprocessor state into userspace, or for inbound * migration. This only needs to be provided if there is also a - * writefn and it makes an access permission check or masks out - * "unwritable" bits or has write-one-to-clear or similar behaviour. + * writefn and it masks out "unwritable" bits or has write-one-to-clear + * or similar behaviour. */ CPWriteFn *raw_writefn; /* Function for resetting the register. If NULL, then reset will be done @@ -878,16 +978,24 @@ static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs) const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp); /* CPWriteFn that can be used to implement writes-ignored behaviour */ -int arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value); +void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value); /* CPReadFn that can be used for read-as-zero behaviour */ -int arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value); +uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri); /* CPResetFn that does nothing, for use if no reset is required even * if fieldoffset is non zero. */ void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque); +/* Return true if this reginfo struct's field in the cpu state struct + * is 64 bits wide. + */ +static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri) +{ + return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT); +} + static inline bool cp_access_ok(int current_pl, const ARMCPRegInfo *ri, int isread) { @@ -972,7 +1080,7 @@ static inline CPUARMState *cpu_init(const char *cpu_model) #define MMU_USER_IDX 1 static inline int cpu_mmu_index (CPUARMState *env) { - return (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR ? 1 : 0; + return arm_current_pl(env) ? 0 : 1; } #include "exec/cpu-all.h" @@ -999,7 +1107,9 @@ static inline int cpu_mmu_index (CPUARMState *env) #define ARM_TBFLAG_BSWAP_CODE_SHIFT 16 #define ARM_TBFLAG_BSWAP_CODE_MASK (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT) -/* Bit usage when in AArch64 state: currently no bits defined */ +/* Bit usage when in AArch64 state */ +#define ARM_TBFLAG_AA64_EL_SHIFT 0 +#define ARM_TBFLAG_AA64_EL_MASK (0x3 << ARM_TBFLAG_AA64_EL_SHIFT) /* some convenience accessor macros */ #define ARM_TBFLAG_AARCH64_STATE(F) \ @@ -1018,13 +1128,16 @@ static inline int cpu_mmu_index (CPUARMState *env) (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT) #define ARM_TBFLAG_BSWAP_CODE(F) \ (((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT) +#define ARM_TBFLAG_AA64_EL(F) \ + (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT) static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, target_ulong *cs_base, int *flags) { if (is_a64(env)) { *pc = env->pc; - *flags = ARM_TBFLAG_AARCH64_STATE_MASK; + *flags = ARM_TBFLAG_AARCH64_STATE_MASK + | (arm_current_pl(env) << ARM_TBFLAG_AA64_EL_SHIFT); } else { int privmode; *pc = env->regs[15]; diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c index a639c2e476..8426bf1333 100644 --- a/target-arm/cpu64.c +++ b/target-arm/cpu64.c @@ -45,6 +45,7 @@ static void aarch64_any_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_AARCH64); + cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */ } #endif diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index 4ce0d01a85..c2ce33ee88 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -122,3 +122,139 @@ uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status) { return float_rel_to_flags(float64_compare(x, y, fp_status)); } + +float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) +{ + float_status *fpst = fpstp; + + if ((float32_is_zero(a) && float32_is_infinity(b)) || + (float32_is_infinity(a) && float32_is_zero(b))) { + /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ + return make_float32((1U << 30) | + ((float32_val(a) ^ float32_val(b)) & (1U << 31))); + } + return float32_mul(a, b, fpst); +} + +float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + + if ((float64_is_zero(a) && float64_is_infinity(b)) || + (float64_is_infinity(a) && float64_is_zero(b))) { + /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ + return make_float64((1ULL << 62) | + ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); + } + return float64_mul(a, b, fpst); +} + +uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices, + uint32_t rn, uint32_t numregs) +{ + /* Helper function for SIMD TBL and TBX. We have to do the table + * lookup part for the 64 bits worth of indices we're passed in. + * result is the initial results vector (either zeroes for TBL + * or some guest values for TBX), rn the register number where + * the table starts, and numregs the number of registers in the table. + * We return the results of the lookups. + */ + int shift; + + for (shift = 0; shift < 64; shift += 8) { + int index = extract64(indices, shift, 8); + if (index < 16 * numregs) { + /* Convert index (a byte offset into the virtual table + * which is a series of 128-bit vectors concatenated) + * into the correct vfp.regs[] element plus a bit offset + * into that element, bearing in mind that the table + * can wrap around from V31 to V0. + */ + int elt = (rn * 2 + (index >> 3)) % 64; + int bitidx = (index & 7) * 8; + uint64_t val = extract64(env->vfp.regs[elt], bitidx, 8); + + result = deposit64(result, shift, 8, val); + } + } + return result; +} + +/* 64bit/double versions of the neon float compare functions */ +uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float64_eq_quiet(a, b, fpst); +} + +uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float64_le(b, a, fpst); +} + +uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float64_lt(b, a, fpst); +} + +/* Reciprocal step and sqrt step. Note that unlike the A32/T32 + * versions, these do a fully fused multiply-add or + * multiply-add-and-halve. + */ +#define float32_two make_float32(0x40000000) +#define float32_three make_float32(0x40400000) +#define float32_one_point_five make_float32(0x3fc00000) + +#define float64_two make_float64(0x4000000000000000ULL) +#define float64_three make_float64(0x4008000000000000ULL) +#define float64_one_point_five make_float64(0x3FF8000000000000ULL) + +float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float32_chs(a); + if ((float32_is_infinity(a) && float32_is_zero(b)) || + (float32_is_infinity(b) && float32_is_zero(a))) { + return float32_two; + } + return float32_muladd(a, b, float32_two, 0, fpst); +} + +float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float64_chs(a); + if ((float64_is_infinity(a) && float64_is_zero(b)) || + (float64_is_infinity(b) && float64_is_zero(a))) { + return float64_two; + } + return float64_muladd(a, b, float64_two, 0, fpst); +} + +float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float32_chs(a); + if ((float32_is_infinity(a) && float32_is_zero(b)) || + (float32_is_infinity(b) && float32_is_zero(a))) { + return float32_one_point_five; + } + return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst); +} + +float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float64_chs(a); + if ((float64_is_infinity(a) && float64_is_zero(b)) || + (float64_is_infinity(b) && float64_is_zero(a))) { + return float64_one_point_five; + } + return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); +} diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h index bca19f3dea..ab9933cab0 100644 --- a/target-arm/helper-a64.h +++ b/target-arm/helper-a64.h @@ -26,3 +26,13 @@ DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr) DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr) +DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32) +DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr) +DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr) +DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) +DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) +DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) +DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr) +DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr) +DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr) +DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr) diff --git a/target-arm/helper.c b/target-arm/helper.c index c708f15e27..90f85f1899 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -5,6 +5,8 @@ #include "sysemu/arch_init.h" #include "sysemu/sysemu.h" #include "qemu/bitops.h" +#include "qemu/crc32c.h" +#include <zlib.h> /* For crc32 */ #ifndef CONFIG_USER_ONLY static inline int get_phys_addr(CPUARMState *env, uint32_t address, @@ -107,65 +109,56 @@ static int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) } } -static int raw_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) +static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) { - if (ri->type & ARM_CP_64BIT) { - *value = CPREG_FIELD64(env, ri); + if (cpreg_field_is_64bit(ri)) { + return CPREG_FIELD64(env, ri); } else { - *value = CPREG_FIELD32(env, ri); + return CPREG_FIELD32(env, ri); } - return 0; } -static int raw_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - if (ri->type & ARM_CP_64BIT) { + if (cpreg_field_is_64bit(ri)) { CPREG_FIELD64(env, ri) = value; } else { CPREG_FIELD32(env, ri) = value; } - return 0; } -static bool read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *v) +static uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri) { - /* Raw read of a coprocessor register (as needed for migration, etc) - * return true on success, false if the read is impossible for some reason. - */ + /* Raw read of a coprocessor register (as needed for migration, etc). */ if (ri->type & ARM_CP_CONST) { - *v = ri->resetvalue; + return ri->resetvalue; } else if (ri->raw_readfn) { - return (ri->raw_readfn(env, ri, v) == 0); + return ri->raw_readfn(env, ri); } else if (ri->readfn) { - return (ri->readfn(env, ri, v) == 0); + return ri->readfn(env, ri); } else { - raw_read(env, ri, v); + return raw_read(env, ri); } - return true; } -static bool write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri, - int64_t v) +static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t v) { /* Raw write of a coprocessor register (as needed for migration, etc). - * Return true on success, false if the write is impossible for some reason. * Note that constant registers are treated as write-ignored; the * caller should check for success by whether a readback gives the * value written. */ if (ri->type & ARM_CP_CONST) { - return true; + return; } else if (ri->raw_writefn) { - return (ri->raw_writefn(env, ri, v) == 0); + ri->raw_writefn(env, ri, v); } else if (ri->writefn) { - return (ri->writefn(env, ri, v) == 0); + ri->writefn(env, ri, v); } else { raw_write(env, ri, v); } - return true; } bool write_cpustate_to_list(ARMCPU *cpu) @@ -177,7 +170,7 @@ bool write_cpustate_to_list(ARMCPU *cpu) for (i = 0; i < cpu->cpreg_array_len; i++) { uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]); const ARMCPRegInfo *ri; - uint64_t v; + ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); if (!ri) { ok = false; @@ -186,11 +179,7 @@ bool write_cpustate_to_list(ARMCPU *cpu) if (ri->type & ARM_CP_NO_MIGRATE) { continue; } - if (!read_raw_cp_reg(&cpu->env, ri, &v)) { - ok = false; - continue; - } - cpu->cpreg_values[i] = v; + cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri); } return ok; } @@ -203,7 +192,6 @@ bool write_list_to_cpustate(ARMCPU *cpu) for (i = 0; i < cpu->cpreg_array_len; i++) { uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]); uint64_t v = cpu->cpreg_values[i]; - uint64_t readback; const ARMCPRegInfo *ri; ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); @@ -218,9 +206,8 @@ bool write_list_to_cpustate(ARMCPU *cpu) * (to catch read-only registers and partially read-only * registers where the incoming migration value doesn't match) */ - if (!write_raw_cp_reg(&cpu->env, ri, v) || - !read_raw_cp_reg(&cpu->env, ri, &readback) || - readback != v) { + write_raw_cp_reg(&cpu->env, ri, v); + if (read_raw_cp_reg(&cpu->env, ri) != v) { ok = false; } } @@ -309,14 +296,13 @@ void init_cpreg_list(ARMCPU *cpu) g_list_free(keys); } -static int dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { env->cp15.c3 = value; tlb_flush(env, 1); /* Flush TLB as domain not tracked in TLB */ - return 0; } -static int fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { if (env->cp15.c13_fcse != value) { /* Unlike real hardware the qemu TLB uses virtual addresses, @@ -325,10 +311,10 @@ static int fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) tlb_flush(env, 1); env->cp15.c13_fcse = value; } - return 0; } -static int contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) + +static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { if (env->cp15.c13_context != value && !arm_feature(env, ARM_FEATURE_MPU)) { /* For VMSA (when not using the LPAE long descriptor page table @@ -338,39 +324,34 @@ static int contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush(env, 1); } env->cp15.c13_context = value; - return 0; } -static int tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { /* Invalidate all (TLBIALL) */ tlb_flush(env, 1); - return 0; } -static int tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */ tlb_flush_page(env, value & TARGET_PAGE_MASK); - return 0; } -static int tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { /* Invalidate by ASID (TLBIASID) */ tlb_flush(env, value == 0); - return 0; } -static int tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */ tlb_flush_page(env, value & TARGET_PAGE_MASK); - return 0; } static const ARMCPRegInfo cp_reginfo[] = { @@ -450,14 +431,14 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = { REGINFO_SENTINEL }; -static int cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { if (env->cp15.c1_coproc != value) { env->cp15.c1_coproc = value; /* ??? Is this safe when called from within a TB? */ tb_flush(env); } - return 0; } static const ARMCPRegInfo v6_cp_reginfo[] = { @@ -479,124 +460,101 @@ static const ARMCPRegInfo v6_cp_reginfo[] = { */ { .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, }, - { .name = "CPACR", .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, + { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, + .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_coproc), .resetvalue = 0, .writefn = cpacr_write }, REGINFO_SENTINEL }; - -static int pmreg_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) +static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri) { - /* Generic performance monitor register read function for where - * user access may be allowed by PMUSERENR. + /* Perfomance monitor registers user accessibility is controlled + * by PMUSERENR. */ if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) { - return EXCP_UDEF; + return CP_ACCESS_TRAP; } - *value = CPREG_FIELD32(env, ri); - return 0; + return CP_ACCESS_OK; } -static int pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) { - return EXCP_UDEF; - } /* only the DP, X, D and E bits are writable */ env->cp15.c9_pmcr &= ~0x39; env->cp15.c9_pmcr |= (value & 0x39); - return 0; } -static int pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri, +static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) { - return EXCP_UDEF; - } value &= (1 << 31); env->cp15.c9_pmcnten |= value; - return 0; } -static int pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) { - return EXCP_UDEF; - } value &= (1 << 31); env->cp15.c9_pmcnten &= ~value; - return 0; } -static int pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) { - return EXCP_UDEF; - } env->cp15.c9_pmovsr &= ~value; - return 0; } -static int pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) { - return EXCP_UDEF; - } env->cp15.c9_pmxevtyper = value & 0xff; - return 0; } -static int pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri, +static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { env->cp15.c9_pmuserenr = value & 1; - return 0; } -static int pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { /* We have no event counters so only the C bit can be changed */ value &= (1 << 31); env->cp15.c9_pminten |= value; - return 0; } -static int pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { value &= (1 << 31); env->cp15.c9_pminten &= ~value; - return 0; } -static int vbar_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { + /* Note that even though the AArch64 view of this register has bits + * [10:0] all RES0 we can only mask the bottom 5, to comply with the + * architectural requirements for bits which are RES0 only in some + * contexts. (ARMv8 would permit us to do no masking at all, but ARMv7 + * requires the bottom five bits to be RAZ/WI because they're UNK/SBZP.) + */ env->cp15.c12_vbar = value & ~0x1Ful; - return 0; } -static int ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) +static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = arm_env_get_cpu(env); - *value = cpu->ccsidr[env->cp15.c0_cssel]; - return 0; + return cpu->ccsidr[env->cp15.c0_cssel]; } -static int csselr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { env->cp15.c0_cssel = value & 0xf; - return 0; } static const ARMCPRegInfo v7_cp_reginfo[] = { @@ -624,37 +582,41 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1, .access = PL0_RW, .resetvalue = 0, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), - .readfn = pmreg_read, .writefn = pmcntenset_write, - .raw_readfn = raw_read, .raw_writefn = raw_write }, + .writefn = pmcntenset_write, + .accessfn = pmreg_access, + .raw_writefn = raw_write }, { .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2, .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), - .readfn = pmreg_read, .writefn = pmcntenclr_write, + .accessfn = pmreg_access, + .writefn = pmcntenclr_write, .type = ARM_CP_NO_MIGRATE }, { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3, .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), - .readfn = pmreg_read, .writefn = pmovsr_write, - .raw_readfn = raw_read, .raw_writefn = raw_write }, - /* Unimplemented so WI. Strictly speaking write accesses in PL0 should - * respect PMUSERENR. - */ + .accessfn = pmreg_access, + .writefn = pmovsr_write, + .raw_writefn = raw_write }, + /* Unimplemented so WI. */ { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4, - .access = PL0_W, .type = ARM_CP_NOP }, + .access = PL0_W, .accessfn = pmreg_access, .type = ARM_CP_NOP }, /* Since we don't implement any events, writing to PMSELR is UNPREDICTABLE. - * We choose to RAZ/WI. XXX should respect PMUSERENR. + * We choose to RAZ/WI. */ { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5, - .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* Unimplemented, RAZ/WI. XXX PMUSERENR */ + .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = pmreg_access }, + /* Unimplemented, RAZ/WI. */ { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0, - .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = pmreg_access }, { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1, .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper), - .readfn = pmreg_read, .writefn = pmxevtyper_write, - .raw_readfn = raw_read, .raw_writefn = raw_write }, - /* Unimplemented, RAZ/WI. XXX PMUSERENR */ + .accessfn = pmreg_access, .writefn = pmxevtyper_write, + .raw_writefn = raw_write }, + /* Unimplemented, RAZ/WI. */ { .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2, - .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = pmreg_access }, { .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0, .access = PL0_R | PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr), @@ -669,16 +631,19 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .resetvalue = 0, .writefn = pmintenclr_write, }, - { .name = "VBAR", .cp = 15, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0, + { .name = "VBAR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .writefn = vbar_write, .fieldoffset = offsetof(CPUARMState, cp15.c12_vbar), .resetvalue = 0 }, { .name = "SCR", .cp = 15, .crn = 1, .crm = 1, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_scr), .resetvalue = 0, }, - { .name = "CCSIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0, + { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0, .access = PL1_R, .readfn = ccsidr_read, .type = ARM_CP_NO_MIGRATE }, - { .name = "CSSELR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0, + { .name = "CSSELR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c0_cssel), .writefn = csselr_write, .resetvalue = 0 }, /* Auxiliary ID register: this actually has an IMPDEF value but for now @@ -686,37 +651,42 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { */ { .name = "AIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + /* MAIR can just read-as-written because we don't implement caches + * and so don't need to care about memory attributes. + */ + { .name = "MAIR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el1), + .resetvalue = 0 }, + /* For non-long-descriptor page tables these are PRRR and NMRR; + * regardless they still act as reads-as-written for QEMU. + * The override is necessary because of the overly-broad TLB_LOCKDOWN + * definition. + */ + { .name = "MAIR0", .state = ARM_CP_STATE_AA32, .type = ARM_CP_OVERRIDE, + .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0, .access = PL1_RW, + .fieldoffset = offsetoflow32(CPUARMState, cp15.mair_el1), + .resetfn = arm_cp_reset_ignore }, + { .name = "MAIR1", .state = ARM_CP_STATE_AA32, .type = ARM_CP_OVERRIDE, + .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 1, .access = PL1_RW, + .fieldoffset = offsetofhigh32(CPUARMState, cp15.mair_el1), + .resetfn = arm_cp_reset_ignore }, REGINFO_SENTINEL }; -static int teecr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { value &= 1; env->teecr = value; - return 0; } -static int teehbr_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) +static CPAccessResult teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri) { - /* This is a helper function because the user access rights - * depend on the value of the TEECR. - */ if (arm_current_pl(env) == 0 && (env->teecr & 1)) { - return EXCP_UDEF; + return CP_ACCESS_TRAP; } - *value = env->teehbr; - return 0; -} - -static int teehbr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - if (arm_current_pl(env) == 0 && (env->teecr & 1)) { - return EXCP_UDEF; - } - env->teehbr = value; - return 0; + return CP_ACCESS_OK; } static const ARMCPRegInfo t2ee_cp_reginfo[] = { @@ -726,8 +696,7 @@ static const ARMCPRegInfo t2ee_cp_reginfo[] = { .writefn = teecr_write }, { .name = "TEEHBR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 6, .opc2 = 0, .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, teehbr), - .resetvalue = 0, .raw_readfn = raw_read, .raw_writefn = raw_write, - .readfn = teehbr_read, .writefn = teehbr_write }, + .accessfn = teehbr_access, .resetvalue = 0 }, REGINFO_SENTINEL }; @@ -757,6 +726,59 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = { #ifndef CONFIG_USER_ONLY +static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero */ + if (arm_current_pl(env) == 0 && !extract32(env->cp15.c14_cntkctl, 0, 2)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx) +{ + /* CNT[PV]CT: not visible from PL0 if ELO[PV]CTEN is zero */ + if (arm_current_pl(env) == 0 && + !extract32(env->cp15.c14_cntkctl, timeridx, 1)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static CPAccessResult gt_timer_access(CPUARMState *env, int timeridx) +{ + /* CNT[PV]_CVAL, CNT[PV]_CTL, CNT[PV]_TVAL: not visible from PL0 if + * EL0[PV]TEN is zero. + */ + if (arm_current_pl(env) == 0 && + !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static CPAccessResult gt_pct_access(CPUARMState *env, + const ARMCPRegInfo *ri) +{ + return gt_counter_access(env, GTIMER_PHYS); +} + +static CPAccessResult gt_vct_access(CPUARMState *env, + const ARMCPRegInfo *ri) +{ + return gt_counter_access(env, GTIMER_VIRT); +} + +static CPAccessResult gt_ptimer_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return gt_timer_access(env, GTIMER_PHYS); +} + +static CPAccessResult gt_vtimer_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return gt_timer_access(env, GTIMER_VIRT); +} + static uint64_t gt_get_countervalue(CPUARMState *env) { return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / GTIMER_SCALE; @@ -802,17 +824,6 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx) } } -static int gt_cntfrq_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) -{ - /* Not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero */ - if (arm_current_pl(env) == 0 && !extract32(env->cp15.c14_cntkctl, 0, 2)) { - return EXCP_UDEF; - } - *value = env->cp15.c14_cntfrq; - return 0; -} - static void gt_cnt_reset(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = arm_env_get_cpu(env); @@ -821,81 +832,40 @@ static void gt_cnt_reset(CPUARMState *env, const ARMCPRegInfo *ri) timer_del(cpu->gt_timer[timeridx]); } -static int gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) -{ - int timeridx = ri->opc1 & 1; - - if (arm_current_pl(env) == 0 && - !extract32(env->cp15.c14_cntkctl, timeridx, 1)) { - return EXCP_UDEF; - } - *value = gt_get_countervalue(env); - return 0; -} - -static int gt_cval_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) +static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri) { - int timeridx = ri->opc1 & 1; - - if (arm_current_pl(env) == 0 && - !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) { - return EXCP_UDEF; - } - *value = env->cp15.c14_timer[timeridx].cval; - return 0; + return gt_get_countervalue(env); } -static int gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { int timeridx = ri->opc1 & 1; env->cp15.c14_timer[timeridx].cval = value; gt_recalc_timer(arm_env_get_cpu(env), timeridx); - return 0; } -static int gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) + +static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri) { int timeridx = ri->crm & 1; - if (arm_current_pl(env) == 0 && - !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) { - return EXCP_UDEF; - } - *value = (uint32_t)(env->cp15.c14_timer[timeridx].cval - - gt_get_countervalue(env)); - return 0; + return (uint32_t)(env->cp15.c14_timer[timeridx].cval - + gt_get_countervalue(env)); } -static int gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { int timeridx = ri->crm & 1; env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) + + sextract64(value, 0, 32); gt_recalc_timer(arm_env_get_cpu(env), timeridx); - return 0; -} - -static int gt_ctl_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) -{ - int timeridx = ri->crm & 1; - - if (arm_current_pl(env) == 0 && - !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) { - return EXCP_UDEF; - } - *value = env->cp15.c14_timer[timeridx].ctl; - return 0; } -static int gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { ARMCPU *cpu = arm_env_get_cpu(env); int timeridx = ri->crm & 1; @@ -912,7 +882,6 @@ static int gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, qemu_set_irq(cpu->gt_timer_outputs[timeridx], (oldval & 4) && (value & 2)); } - return 0; } void arm_gt_ptimer_cb(void *opaque) @@ -935,66 +904,131 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { * Our reset value matches the fixed frequency we implement the timer at. */ { .name = "CNTFRQ", .cp = 15, .crn = 14, .crm = 0, .opc1 = 0, .opc2 = 0, - .access = PL1_RW | PL0_R, + .type = ARM_CP_NO_MIGRATE, + .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c14_cntfrq), + .resetfn = arm_cp_reset_ignore, + }, + { .name = "CNTFRQ_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0, + .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access, .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq), .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE, - .readfn = gt_cntfrq_read, .raw_readfn = raw_read, }, /* overall control: mostly access permissions */ - { .name = "CNTKCTL", .cp = 15, .crn = 14, .crm = 1, .opc1 = 0, .opc2 = 0, + { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 14, .crm = 1, .opc2 = 0, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c14_cntkctl), .resetvalue = 0, }, /* per-timer control */ { .name = "CNTP_CTL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1, + .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, .access = PL1_RW | PL0_R, + .accessfn = gt_ptimer_access, + .fieldoffset = offsetoflow32(CPUARMState, + cp15.c14_timer[GTIMER_PHYS].ctl), + .resetfn = arm_cp_reset_ignore, + .writefn = gt_ctl_write, .raw_writefn = raw_write, + }, + { .name = "CNTP_CTL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 1, .type = ARM_CP_IO, .access = PL1_RW | PL0_R, + .accessfn = gt_ptimer_access, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl), .resetvalue = 0, - .readfn = gt_ctl_read, .writefn = gt_ctl_write, - .raw_readfn = raw_read, .raw_writefn = raw_write, + .writefn = gt_ctl_write, .raw_writefn = raw_write, }, { .name = "CNTV_CTL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 1, + .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, .access = PL1_RW | PL0_R, + .accessfn = gt_vtimer_access, + .fieldoffset = offsetoflow32(CPUARMState, + cp15.c14_timer[GTIMER_VIRT].ctl), + .resetfn = arm_cp_reset_ignore, + .writefn = gt_ctl_write, .raw_writefn = raw_write, + }, + { .name = "CNTV_CTL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 1, .type = ARM_CP_IO, .access = PL1_RW | PL0_R, + .accessfn = gt_vtimer_access, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl), .resetvalue = 0, - .readfn = gt_ctl_read, .writefn = gt_ctl_write, - .raw_readfn = raw_read, .raw_writefn = raw_write, + .writefn = gt_ctl_write, .raw_writefn = raw_write, }, /* TimerValue views: a 32 bit downcounting view of the underlying state */ { .name = "CNTP_TVAL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 0, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R, + .accessfn = gt_ptimer_access, + .readfn = gt_tval_read, .writefn = gt_tval_write, + }, + { .name = "CNTP_TVAL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 0, + .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R, .readfn = gt_tval_read, .writefn = gt_tval_write, }, { .name = "CNTV_TVAL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 0, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R, + .accessfn = gt_vtimer_access, + .readfn = gt_tval_read, .writefn = gt_tval_write, + }, + { .name = "CNTV_TVAL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 0, + .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R, .readfn = gt_tval_read, .writefn = gt_tval_write, }, /* The counter itself */ { .name = "CNTPCT", .cp = 15, .crm = 14, .opc1 = 0, .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO, + .accessfn = gt_pct_access, + .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore, + }, + { .name = "CNTPCT_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 1, + .access = PL0_R, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, + .accessfn = gt_pct_access, .readfn = gt_cnt_read, .resetfn = gt_cnt_reset, }, { .name = "CNTVCT", .cp = 15, .crm = 14, .opc1 = 1, .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO, + .accessfn = gt_vct_access, + .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore, + }, + { .name = "CNTVCT_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 2, + .access = PL0_R, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, + .accessfn = gt_vct_access, .readfn = gt_cnt_read, .resetfn = gt_cnt_reset, }, /* Comparison value, indicating when the timer goes off */ { .name = "CNTP_CVAL", .cp = 15, .crm = 14, .opc1 = 2, .access = PL1_RW | PL0_R, - .type = ARM_CP_64BIT | ARM_CP_IO, + .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_NO_MIGRATE, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval), - .resetvalue = 0, - .readfn = gt_cval_read, .writefn = gt_cval_write, - .raw_readfn = raw_read, .raw_writefn = raw_write, + .accessfn = gt_ptimer_access, .resetfn = arm_cp_reset_ignore, + .writefn = gt_cval_write, .raw_writefn = raw_write, + }, + { .name = "CNTP_CVAL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 2, + .access = PL1_RW | PL0_R, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval), + .resetvalue = 0, .accessfn = gt_vtimer_access, + .writefn = gt_cval_write, .raw_writefn = raw_write, }, { .name = "CNTV_CVAL", .cp = 15, .crm = 14, .opc1 = 3, .access = PL1_RW | PL0_R, - .type = ARM_CP_64BIT | ARM_CP_IO, + .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_NO_MIGRATE, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval), - .resetvalue = 0, - .readfn = gt_cval_read, .writefn = gt_cval_write, - .raw_readfn = raw_read, .raw_writefn = raw_write, + .accessfn = gt_vtimer_access, .resetfn = arm_cp_reset_ignore, + .writefn = gt_cval_write, .raw_writefn = raw_write, + }, + { .name = "CNTV_CVAL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 2, + .access = PL1_RW | PL0_R, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval), + .resetvalue = 0, .accessfn = gt_vtimer_access, + .writefn = gt_cval_write, .raw_writefn = raw_write, }, REGINFO_SENTINEL }; @@ -1010,7 +1044,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { #endif -static int par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { if (arm_feature(env, ARM_FEATURE_LPAE)) { env->cp15.c7_par = value; @@ -1019,7 +1053,6 @@ static int par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) } else { env->cp15.c7_par = value & 0xfffff1ff; } - return 0; } #ifndef CONFIG_USER_ONLY @@ -1035,7 +1068,20 @@ static inline bool extended_addresses_enabled(CPUARMState *env) && (env->cp15.c2_control & (1U << 31)); } -static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + if (ri->opc2 & 4) { + /* Other states are only available with TrustZone; in + * a non-TZ implementation these registers don't exist + * at all, which is an Uncategorized trap. This underdecoding + * is safe because the reginfo is NO_MIGRATE. + */ + return CP_ACCESS_TRAP_UNCATEGORIZED; + } + return CP_ACCESS_OK; +} + +static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { hwaddr phys_addr; target_ulong page_size; @@ -1043,10 +1089,6 @@ static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) int ret, is_user = ri->opc2 & 2; int access_type = ri->opc2 & 1; - if (ri->opc2 & 4) { - /* Other states are only available with TrustZone */ - return EXCP_UDEF; - } ret = get_phys_addr(env, value, access_type, is_user, &phys_addr, &prot, &page_size); if (extended_addresses_enabled(env)) { @@ -1082,13 +1124,12 @@ static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) env->cp15.c7_par = phys_addr & 0xfffff000; } } else { - env->cp15.c7_par = ((ret & (10 << 1)) >> 5) | - ((ret & (12 << 1)) >> 6) | + env->cp15.c7_par = ((ret & (1 << 10)) >> 5) | + ((ret & (1 << 12)) >> 6) | ((ret & 0xf) << 1) | 1; } env->cp15.c7_par_hi = 0; } - return 0; } #endif @@ -1099,7 +1140,8 @@ static const ARMCPRegInfo vapa_cp_reginfo[] = { .writefn = par_write }, #ifndef CONFIG_USER_ONLY { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY, - .access = PL1_W, .writefn = ats_write, .type = ARM_CP_NO_MIGRATE }, + .access = PL1_W, .accessfn = ats_access, + .writefn = ats_write, .type = ARM_CP_NO_MIGRATE }, #endif REGINFO_SENTINEL }; @@ -1134,52 +1176,26 @@ static uint32_t extended_mpu_ap_bits(uint32_t val) return ret; } -static int pmsav5_data_ap_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void pmsav5_data_ap_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { env->cp15.c5_data = extended_mpu_ap_bits(value); - return 0; } -static int pmsav5_data_ap_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) +static uint64_t pmsav5_data_ap_read(CPUARMState *env, const ARMCPRegInfo *ri) { - *value = simple_mpu_ap_bits(env->cp15.c5_data); - return 0; + return simple_mpu_ap_bits(env->cp15.c5_data); } -static int pmsav5_insn_ap_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void pmsav5_insn_ap_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { env->cp15.c5_insn = extended_mpu_ap_bits(value); - return 0; -} - -static int pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) -{ - *value = simple_mpu_ap_bits(env->cp15.c5_insn); - return 0; -} - -static int arm946_prbs_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) -{ - if (ri->crm >= 8) { - return EXCP_UDEF; - } - *value = env->cp15.c6_region[ri->crm]; - return 0; } -static int arm946_prbs_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static uint64_t pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri) { - if (ri->crm >= 8) { - return EXCP_UDEF; - } - env->cp15.c6_region[ri->crm] = value; - return 0; + return simple_mpu_ap_bits(env->cp15.c5_insn); } static const ARMCPRegInfo pmsav5_cp_reginfo[] = { @@ -1204,14 +1220,35 @@ static const ARMCPRegInfo pmsav5_cp_reginfo[] = { .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c2_insn), .resetvalue = 0, }, /* Protection region base and size registers */ - { .name = "946_PRBS", .cp = 15, .crn = 6, .crm = CP_ANY, .opc1 = 0, - .opc2 = CP_ANY, .access = PL1_RW, - .readfn = arm946_prbs_read, .writefn = arm946_prbs_write, }, + { .name = "946_PRBS0", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[0]) }, + { .name = "946_PRBS1", .cp = 15, .crn = 6, .crm = 1, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[1]) }, + { .name = "946_PRBS2", .cp = 15, .crn = 6, .crm = 2, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[2]) }, + { .name = "946_PRBS3", .cp = 15, .crn = 6, .crm = 3, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[3]) }, + { .name = "946_PRBS4", .cp = 15, .crn = 6, .crm = 4, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[4]) }, + { .name = "946_PRBS5", .cp = 15, .crn = 6, .crm = 5, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[5]) }, + { .name = "946_PRBS6", .cp = 15, .crn = 6, .crm = 6, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[6]) }, + { .name = "946_PRBS7", .cp = 15, .crn = 6, .crm = 7, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[7]) }, REGINFO_SENTINEL }; -static int vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { int maskshift = extract32(value, 0, 3); @@ -1228,11 +1265,10 @@ static int vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, env->cp15.c2_control = value; env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> maskshift); env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> maskshift); - return 0; } -static int vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { if (arm_feature(env, ARM_FEATURE_LPAE)) { /* With LPAE the TTBCR could result in a change of ASID @@ -1240,7 +1276,7 @@ static int vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, */ tlb_flush(env, 1); } - return vmsa_ttbcr_raw_write(env, ri, value); + vmsa_ttbcr_raw_write(env, ri, value); } static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri) @@ -1250,6 +1286,26 @@ static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri) env->cp15.c2_mask = 0; } +static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */ + tlb_flush(env, 1); + env->cp15.c2_control = value; +} + +static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* 64 bit accesses to the TTBRs can change the ASID and so we + * must flush the TLB. + */ + if (cpreg_field_is_64bit(ri)) { + tlb_flush(env, 1); + } + raw_write(env, ri, value); +} + static const ARMCPRegInfo vmsa_cp_reginfo[] = { { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL1_RW, @@ -1257,56 +1313,59 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { { .name = "IFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c5_insn), .resetvalue = 0, }, - { .name = "TTBR0", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0, - .access = PL1_RW, - .fieldoffset = offsetof(CPUARMState, cp15.c2_base0), .resetvalue = 0, }, - { .name = "TTBR1", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1, - .access = PL1_RW, - .fieldoffset = offsetof(CPUARMState, cp15.c2_base1), .resetvalue = 0, }, - { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2, - .access = PL1_RW, .writefn = vmsa_ttbcr_write, - .resetfn = vmsa_ttbcr_reset, .raw_writefn = vmsa_ttbcr_raw_write, + { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1), + .writefn = vmsa_ttbr_write, .resetvalue = 0 }, + { .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el1), + .writefn = vmsa_ttbr_write, .resetvalue = 0 }, + { .name = "TCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2, + .access = PL1_RW, .writefn = vmsa_tcr_el1_write, + .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.c2_control) }, + { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, .writefn = vmsa_ttbcr_write, + .resetfn = arm_cp_reset_ignore, .raw_writefn = vmsa_ttbcr_raw_write, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c2_control) }, { .name = "DFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c6_data), .resetvalue = 0, }, REGINFO_SENTINEL }; -static int omap_ticonfig_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void omap_ticonfig_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { env->cp15.c15_ticonfig = value & 0xe7; /* The OS_TYPE bit in this register changes the reported CPUID! */ env->cp15.c0_cpuid = (value & (1 << 5)) ? ARM_CPUID_TI915T : ARM_CPUID_TI925T; - return 0; } -static int omap_threadid_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void omap_threadid_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { env->cp15.c15_threadid = value & 0xffff; - return 0; } -static int omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { /* Wait-for-interrupt (deprecated) */ cpu_interrupt(CPU(arm_env_get_cpu(env)), CPU_INTERRUPT_HALT); - return 0; } -static int omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { /* On OMAP there are registers indicating the max/min index of dcache lines * containing a dirty line; cache flush operations have to reset these. */ env->cp15.c15_i_max = 0x000; env->cp15.c15_i_min = 0xff0; - return 0; } static const ARMCPRegInfo omap_cp_reginfo[] = { @@ -1348,8 +1407,8 @@ static const ARMCPRegInfo omap_cp_reginfo[] = { REGINFO_SENTINEL }; -static int xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { value &= 0x3fff; if (env->cp15.c15_cpar != value) { @@ -1357,7 +1416,6 @@ static int xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri, tb_flush(env); env->cp15.c15_cpar = value; } - return 0; } static const ARMCPRegInfo xscale_cp_reginfo[] = { @@ -1437,12 +1495,12 @@ static const ARMCPRegInfo strongarm_cp_reginfo[] = { REGINFO_SENTINEL }; -static int mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) +static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri) { CPUState *cs = CPU(arm_env_get_cpu(env)); uint32_t mpidr = cs->cpu_index; - /* We don't support setting cluster ID ([8..11]) + /* We don't support setting cluster ID ([8..11]) (known as Aff1 + * in later ARM ARM versions), or any of the higher affinity level fields, * so these bits always RAZ. */ if (arm_feature(env, ARM_FEATURE_V7MP)) { @@ -1453,27 +1511,26 @@ static int mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri, * not currently model any of those cores. */ } - *value = mpidr; - return 0; + return mpidr; } static const ARMCPRegInfo mpidr_cp_reginfo[] = { - { .name = "MPIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5, + { .name = "MPIDR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5, .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_MIGRATE }, REGINFO_SENTINEL }; -static int par64_read(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value) +static uint64_t par64_read(CPUARMState *env, const ARMCPRegInfo *ri) { - *value = ((uint64_t)env->cp15.c7_par_hi << 32) | env->cp15.c7_par; - return 0; + return ((uint64_t)env->cp15.c7_par_hi << 32) | env->cp15.c7_par; } -static int par64_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +static void par64_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { env->cp15.c7_par_hi = value >> 32; env->cp15.c7_par = value; - return 0; } static void par64_reset(CPUARMState *env, const ARMCPRegInfo *ri) @@ -1482,63 +1539,15 @@ static void par64_reset(CPUARMState *env, const ARMCPRegInfo *ri) env->cp15.c7_par = 0; } -static int ttbr064_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) -{ - *value = ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0; - return 0; -} - -static int ttbr064_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - env->cp15.c2_base0_hi = value >> 32; - env->cp15.c2_base0 = value; - return 0; -} - -static int ttbr064_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Writes to the 64 bit format TTBRs may change the ASID */ - tlb_flush(env, 1); - return ttbr064_raw_write(env, ri, value); -} - -static void ttbr064_reset(CPUARMState *env, const ARMCPRegInfo *ri) -{ - env->cp15.c2_base0_hi = 0; - env->cp15.c2_base0 = 0; -} - -static int ttbr164_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) -{ - *value = ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1; - return 0; -} - -static int ttbr164_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - env->cp15.c2_base1_hi = value >> 32; - env->cp15.c2_base1 = value; - return 0; -} - -static void ttbr164_reset(CPUARMState *env, const ARMCPRegInfo *ri) -{ - env->cp15.c2_base1_hi = 0; - env->cp15.c2_base1 = 0; -} - static const ARMCPRegInfo lpae_cp_reginfo[] = { /* NOP AMAIR0/1: the override is because these clash with the rather * broadly specified TLB_LOCKDOWN entry in the generic cp_reginfo. */ - { .name = "AMAIR0", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0, + { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE, .resetvalue = 0 }, + /* AMAIR1 is mapped to AMAIR_EL1[63:32] */ { .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1, .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE, .resetvalue = 0 }, @@ -1551,41 +1560,72 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = par64_read, .writefn = par64_write, .resetfn = par64_reset }, { .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0, - .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = ttbr064_read, - .writefn = ttbr064_write, .raw_writefn = ttbr064_raw_write, - .resetfn = ttbr064_reset }, + .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE, + .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1), + .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore }, { .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1, - .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = ttbr164_read, - .writefn = ttbr164_write, .resetfn = ttbr164_reset }, + .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE, + .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el1), + .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore }, REGINFO_SENTINEL }; -static int aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) +static uint64_t aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri) { - *value = vfp_get_fpcr(env); - return 0; + return vfp_get_fpcr(env); } -static int aa64_fpcr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void aa64_fpcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { vfp_set_fpcr(env, value); - return 0; } -static int aa64_fpsr_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t *value) +static uint64_t aa64_fpsr_read(CPUARMState *env, const ARMCPRegInfo *ri) { - *value = vfp_get_fpsr(env); - return 0; + return vfp_get_fpsr(env); } -static int aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { vfp_set_fpsr(env, value); - return 0; +} + +static CPAccessResult aa64_cacheop_access(CPUARMState *env, + const ARMCPRegInfo *ri) +{ + /* Cache invalidate/clean: NOP, but EL0 must UNDEF unless + * SCTLR_EL1.UCI is set. + */ + if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCI)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static void tlbi_aa64_va_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by VA (AArch64 version) */ + uint64_t pageaddr = value << 12; + tlb_flush_page(env, pageaddr); +} + +static void tlbi_aa64_vaa_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by VA, all ASIDs (AArch64 version) */ + uint64_t pageaddr = value << 12; + tlb_flush_page(env, pageaddr); +} + +static void tlbi_aa64_asid_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by ASID (AArch64 version) */ + int asid = extract64(value, 48, 16); + tlb_flush(env, asid == 0); } static const ARMCPRegInfo v8_cp_reginfo[] = { @@ -1601,13 +1641,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "FPSR", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4, .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write }, - /* This claims a 32 byte cacheline size for icache and dcache, VIPT icache. - * It will eventually need to have a CPU-specified reset value. - */ - { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0, - .access = PL0_R, .type = ARM_CP_CONST, - .resetvalue = 0x80030003 }, /* Prohibit use of DC ZVA. OPTME: implement DC ZVA and allow its use. * For system mode the DZP bit here will need to be computed, not constant. */ @@ -1615,16 +1648,155 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0, .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0x10 }, + { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2, + .access = PL1_R, .type = ARM_CP_CURRENTEL }, + /* Cache ops: all NOPs since we don't emulate caches */ + { .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "IC_IALLU", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "IC_IVAU", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1, + .access = PL0_W, .type = ARM_CP_NOP, + .accessfn = aa64_cacheop_access }, + { .name = "DC_IVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "DC_ISW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "DC_CVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1, + .access = PL0_W, .type = ARM_CP_NOP, + .accessfn = aa64_cacheop_access }, + { .name = "DC_CSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "DC_CVAU", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 11, .opc2 = 1, + .access = PL0_W, .type = ARM_CP_NOP, + .accessfn = aa64_cacheop_access }, + { .name = "DC_CIVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 1, + .access = PL0_W, .type = ARM_CP_NOP, + .accessfn = aa64_cacheop_access }, + { .name = "DC_CISW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NOP }, + /* TLBI operations */ + { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbiall_write }, + { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbi_aa64_va_write }, + { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbi_aa64_asid_write }, + { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 3, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbi_aa64_vaa_write }, + { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 5, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbi_aa64_va_write }, + { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 7, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbi_aa64_vaa_write }, + { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbiall_write }, + { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbi_aa64_va_write }, + { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbi_aa64_asid_write }, + { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 3, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbi_aa64_vaa_write }, + { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 5, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbi_aa64_va_write }, + { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 7, + .access = PL1_W, .type = ARM_CP_NO_MIGRATE, + .writefn = tlbi_aa64_vaa_write }, + /* Dummy implementation of monitor debug system control register: + * we don't support debug. + */ + { .name = "MDSCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + /* We define a dummy WI OSLAR_EL1, because Linux writes to it. */ + { .name = "OSLAR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4, + .access = PL1_W, .type = ARM_CP_NOP }, REGINFO_SENTINEL }; -static int sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { env->cp15.c1_sys = value; /* ??? Lots of these bits are not implemented. */ /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(env, 1); - return 0; +} + +static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* Only accessible in EL0 if SCTLR.UCT is set (and only in AArch64, + * but the AArch32 CTR has its own reginfo struct) + */ + if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCT)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static void define_aarch64_debug_regs(ARMCPU *cpu) +{ + /* Define breakpoint and watchpoint registers. These do nothing + * but read as written, for now. + */ + int i; + + for (i = 0; i < 16; i++) { + ARMCPRegInfo dbgregs[] = { + { .name = "DBGBVR", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.dbgbvr[i]) }, + { .name = "DBGBCR", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.dbgbcr[i]) }, + { .name = "DBGWVR", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]) }, + { .name = "DBGWCR", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]) }, + REGINFO_SENTINEL + }; + define_arm_cp_regs(cpu, dbgregs); + } } void register_cp_regs_for_features(ARMCPU *cpu) @@ -1707,11 +1879,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, .access = PL0_RW, .resetvalue = cpu->midr & 0xff000000, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr), - .readfn = pmreg_read, .writefn = pmcr_write, - .raw_readfn = raw_read, .raw_writefn = raw_write, + .accessfn = pmreg_access, .writefn = pmcr_write, + .raw_writefn = raw_write, }; ARMCPRegInfo clidr = { - .name = "CLIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1, + .name = "CLIDR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->clidr }; define_one_arm_cp_reg(cpu, &pmcr); @@ -1721,7 +1894,53 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, not_v7_cp_reginfo); } if (arm_feature(env, ARM_FEATURE_V8)) { + /* AArch64 ID registers, which all have impdef reset values */ + ARMCPRegInfo v8_idregs[] = { + { .name = "ID_AA64PFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64pfr0 }, + { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64pfr1}, + { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64dfr0 }, + { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64dfr1 }, + { .name = "ID_AA64AFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 4, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64afr0 }, + { .name = "ID_AA64AFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 5, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64afr1 }, + { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64isar0 }, + { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64isar1 }, + { .name = "ID_AA64MMFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64mmfr0 }, + { .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64mmfr1 }, + REGINFO_SENTINEL + }; + define_arm_cp_regs(cpu, v8_idregs); define_arm_cp_regs(cpu, v8_cp_reginfo); + define_aarch64_debug_regs(cpu); } if (arm_feature(env, ARM_FEATURE_MPU)) { /* These are the MPU registers prior to PMSAv6. Any new @@ -1787,9 +2006,16 @@ void register_cp_regs_for_features(ARMCPU *cpu) .writefn = arm_cp_write_ignore, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid), .type = ARM_CP_OVERRIDE }, + { .name = "MIDR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 0, .crn = 0, .crm = 0, + .access = PL1_R, .resetvalue = cpu->midr, .type = ARM_CP_CONST }, { .name = "CTR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->ctr }, + { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0, + .access = PL0_R, .accessfn = ctr_el0_access, + .type = ARM_CP_CONST, .resetvalue = cpu->ctr }, { .name = "TCMTR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, @@ -1860,7 +2086,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) /* Generic registers whose values depend on the implementation */ { ARMCPRegInfo sctlr = { - .name = "SCTLR", .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0, + .name = "SCTLR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_sys), .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr, .raw_writefn = raw_write, @@ -2039,6 +2266,10 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, if (opaque) { r2->opaque = opaque; } + /* reginfo passed to helpers is correct for the actual access, + * and is never ARM_CP_STATE_BOTH: + */ + r2->state = state; /* Make sure reginfo passed to helpers for wildcarded regs * has the correct crm/opc1/opc2 for this reg, not CP_ANY: */ @@ -2202,17 +2433,15 @@ const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp) return g_hash_table_lookup(cpregs, &encoded_cp); } -int arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { /* Helper coprocessor write function for write-ignore registers */ - return 0; } -int arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value) +uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri) { /* Helper coprocessor write function for read-as-zero registers */ - *value = 0; return 0; } @@ -2249,7 +2478,7 @@ uint32_t cpsr_read(CPUARMState *env) (env->CF << 29) | ((env->VF & 0x80000000) >> 3) | (env->QF << 27) | (env->thumb << 5) | ((env->condexec_bits & 3) << 25) | ((env->condexec_bits & 0xfc) << 8) - | (env->GE << 16); + | (env->GE << 16) | env->daif; } void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) @@ -2276,6 +2505,9 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) env->GE = (val >> 16) & 0xf; } + env->daif &= ~(CPSR_AIF & mask); + env->daif |= val & CPSR_AIF & mask; + if ((env->uncached_cpsr ^ val) & mask & CPSR_M) { if (bad_mode_switch(env, val & CPSR_M)) { /* Attempt to switch to an invalid mode: this is UNPREDICTABLE. @@ -2449,14 +2681,16 @@ void switch_mode(CPUARMState *env, int mode) static void v7m_push(CPUARMState *env, uint32_t val) { + CPUState *cs = ENV_GET_CPU(env); env->regs[13] -= 4; - stl_phys(env->regs[13], val); + stl_phys(cs->as, env->regs[13], val); } static uint32_t v7m_pop(CPUARMState *env) { + CPUState *cs = ENV_GET_CPU(env); uint32_t val; - val = ldl_phys(env->regs[13]); + val = ldl_phys(cs->as, env->regs[13]); env->regs[13] += 4; return val; } @@ -2611,7 +2845,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) /* Clear IT bits */ env->condexec_bits = 0; env->regs[14] = lr; - addr = ldl_phys(env->v7m.vecbase + env->v7m.exception * 4); + addr = ldl_phys(cs->as, env->v7m.vecbase + env->v7m.exception * 4); env->regs[15] = addr & 0xfffffffe; env->thumb = addr & 1; } @@ -2716,7 +2950,7 @@ void arm_cpu_do_interrupt(CPUState *cs) return; /* Never happens. Keep compiler happy. */ } /* High vectors. */ - if (env->cp15.c1_sys & (1 << 13)) { + if (env->cp15.c1_sys & SCTLR_V) { /* when enabled, base address cannot be remapped. */ addr += 0xffff0000; } else { @@ -2735,11 +2969,11 @@ void arm_cpu_do_interrupt(CPUState *cs) env->condexec_bits = 0; /* Switch to the new mode, and to the correct instruction set. */ env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode; - env->uncached_cpsr |= mask; + env->daif |= mask; /* this is a lie, as the was no c1_sys on V4T/V5, but who cares * and we should just guard the thumb mode on V4 */ if (arm_feature(env, ARM_FEATURE_V4T)) { - env->thumb = (env->cp15.c1_sys & (1 << 30)) != 0; + env->thumb = (env->cp15.c1_sys & SCTLR_TE) != 0; } env->regs[14] = env->regs[15] + offset; env->regs[15] = addr; @@ -2765,12 +2999,15 @@ static inline int check_ap(CPUARMState *env, int ap, int domain_prot, switch (ap) { case 0: + if (arm_feature(env, ARM_FEATURE_V7)) { + return 0; + } if (access_type == 1) return 0; - switch ((env->cp15.c1_sys >> 8) & 3) { - case 1: + switch (env->cp15.c1_sys & (SCTLR_S | SCTLR_R)) { + case SCTLR_S: return is_user ? 0 : PAGE_READ; - case 2: + case SCTLR_R: return PAGE_READ; default: return 0; @@ -2804,9 +3041,9 @@ static uint32_t get_level1_table_address(CPUARMState *env, uint32_t address) uint32_t table; if (address & env->cp15.c2_mask) - table = env->cp15.c2_base1 & 0xffffc000; + table = env->cp15.ttbr1_el1 & 0xffffc000; else - table = env->cp15.c2_base0 & env->cp15.c2_base_mask; + table = env->cp15.ttbr0_el1 & env->cp15.c2_base_mask; table |= (address >> 18) & 0x3ffc; return table; @@ -2816,6 +3053,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, int is_user, hwaddr *phys_ptr, int *prot, target_ulong *page_size) { + CPUState *cs = ENV_GET_CPU(env); int code; uint32_t table; uint32_t desc; @@ -2828,7 +3066,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, /* Pagetable walk. */ /* Lookup l1 descriptor. */ table = get_level1_table_address(env, address); - desc = ldl_phys(table); + desc = ldl_phys(cs->as, table); type = (desc & 3); domain = (desc >> 5) & 0x0f; domain_prot = (env->cp15.c3 >> (domain * 2)) & 3; @@ -2859,7 +3097,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, /* Fine pagetable. */ table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); } - desc = ldl_phys(table); + desc = ldl_phys(cs->as, table); switch (desc & 3) { case 0: /* Page translation fault. */ code = 7; @@ -2871,7 +3109,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, break; case 2: /* 4k page. */ phys_addr = (desc & 0xfffff000) | (address & 0xfff); - ap = (desc >> (4 + ((address >> 13) & 6))) & 3; + ap = (desc >> (4 + ((address >> 9) & 6))) & 3; *page_size = 0x1000; break; case 3: /* 1k page. */ @@ -2911,6 +3149,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, int is_user, hwaddr *phys_ptr, int *prot, target_ulong *page_size) { + CPUState *cs = ENV_GET_CPU(env); int code; uint32_t table; uint32_t desc; @@ -2925,7 +3164,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, /* Pagetable walk. */ /* Lookup l1 descriptor. */ table = get_level1_table_address(env, address); - desc = ldl_phys(table); + desc = ldl_phys(cs->as, table); type = (desc & 3); if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) { /* Section translation fault, or attempt to use the encoding @@ -2967,7 +3206,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, } /* Lookup l2 entry. */ table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); - desc = ldl_phys(table); + desc = ldl_phys(cs->as, table); ap = ((desc >> 4) & 3) | ((desc >> 7) & 4); switch (desc & 3) { case 0: /* Page translation fault. */ @@ -2999,7 +3238,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, goto do_fault; /* The simplified model uses AP[0] as an access control bit. */ - if ((env->cp15.c1_sys & (1 << 29)) && (ap & 1) == 0) { + if ((env->cp15.c1_sys & SCTLR_AFE) && (ap & 1) == 0) { /* Access flag fault. */ code = (code == 15) ? 6 : 3; goto do_fault; @@ -3033,6 +3272,7 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address, hwaddr *phys_ptr, int *prot, target_ulong *page_size_ptr) { + CPUState *cs = ENV_GET_CPU(env); /* Read an LPAE long-descriptor translation table. */ MMUFaultType fault_type = translation_fault; uint32_t level = 1; @@ -3079,11 +3319,11 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address, * we will always flush the TLB any time the ASID is changed). */ if (ttbr_select == 0) { - ttbr = ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0; + ttbr = env->cp15.ttbr0_el1; epd = extract32(env->cp15.c2_control, 7, 1); tsz = t0sz; } else { - ttbr = ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1; + ttbr = env->cp15.ttbr1_el1; epd = extract32(env->cp15.c2_control, 23, 1); tsz = t1sz; } @@ -3121,7 +3361,7 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address, uint64_t descriptor; descaddr |= ((address >> (9 * (4 - level))) & 0xff8); - descriptor = ldq_phys(descaddr); + descriptor = ldq_phys(cs->as, descaddr); if (!(descriptor & 1) || (!(descriptor & 2) && (level == 3))) { /* Invalid, or the Reserved level 3 encoding */ @@ -3290,7 +3530,7 @@ static inline int get_phys_addr(CPUARMState *env, uint32_t address, if (address < 0x02000000) address += env->cp15.c13_fcse; - if ((env->cp15.c1_sys & 1) == 0) { + if ((env->cp15.c1_sys & SCTLR_M) == 0) { /* MMU/MPU disabled. */ *phys_ptr = address; *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; @@ -3303,7 +3543,7 @@ static inline int get_phys_addr(CPUARMState *env, uint32_t address, } else if (extended_addresses_enabled(env)) { return get_phys_addr_lpae(env, address, access_type, is_user, phys_ptr, prot, page_size); - } else if (env->cp15.c1_sys & (1 << 23)) { + } else if (env->cp15.c1_sys & SCTLR_XP) { return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr, prot, page_size); } else { @@ -3402,12 +3642,12 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) case 9: /* PSP */ return env->v7m.current_sp ? env->regs[13] : env->v7m.other_sp; case 16: /* PRIMASK */ - return (env->uncached_cpsr & CPSR_I) != 0; + return (env->daif & PSTATE_I) != 0; case 17: /* BASEPRI */ case 18: /* BASEPRI_MAX */ return env->v7m.basepri; case 19: /* FAULTMASK */ - return (env->uncached_cpsr & CPSR_F) != 0; + return (env->daif & PSTATE_F) != 0; case 20: /* CONTROL */ return env->v7m.control; default: @@ -3454,10 +3694,11 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val) env->v7m.other_sp = val; break; case 16: /* PRIMASK */ - if (val & 1) - env->uncached_cpsr |= CPSR_I; - else - env->uncached_cpsr &= ~CPSR_I; + if (val & 1) { + env->daif |= PSTATE_I; + } else { + env->daif &= ~PSTATE_I; + } break; case 17: /* BASEPRI */ env->v7m.basepri = val & 0xff; @@ -3468,10 +3709,11 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val) env->v7m.basepri = val; break; case 19: /* FAULTMASK */ - if (val & 1) - env->uncached_cpsr |= CPSR_F; - else - env->uncached_cpsr &= ~CPSR_F; + if (val & 1) { + env->daif |= PSTATE_F; + } else { + env->daif &= ~PSTATE_F; + } break; case 20: /* CONTROL */ env->v7m.control = val & 3; @@ -4048,6 +4290,23 @@ uint32_t HELPER(set_rmode)(uint32_t rmode, CPUARMState *env) return prev_rmode; } +/* Set the current fp rounding mode in the standard fp status and return + * the old one. This is for NEON instructions that need to change the + * rounding mode but wish to use the standard FPSCR values for everything + * else. Always set the rounding mode back to the correct value after + * modifying it. + * The argument is a softfloat float_round_ value. + */ +uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env) +{ + float_status *fp_status = &env->vfp.standard_fp_status; + + uint32_t prev_rmode = get_float_rounding_mode(fp_status); + set_float_rounding_mode(rmode, fp_status); + + return prev_rmode; +} + /* Half precision conversions. */ static float32 do_fcvt_f16_to_f32(uint32_t a, CPUARMState *env, float_status *s) { @@ -4418,3 +4677,68 @@ float64 HELPER(rintd)(float64 x, void *fp_status) return ret; } + +/* Convert ARM rounding mode to softfloat */ +int arm_rmode_to_sf(int rmode) +{ + switch (rmode) { + case FPROUNDING_TIEAWAY: + rmode = float_round_ties_away; + break; + case FPROUNDING_ODD: + /* FIXME: add support for TIEAWAY and ODD */ + qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n", + rmode); + case FPROUNDING_TIEEVEN: + default: + rmode = float_round_nearest_even; + break; + case FPROUNDING_POSINF: + rmode = float_round_up; + break; + case FPROUNDING_NEGINF: + rmode = float_round_down; + break; + case FPROUNDING_ZERO: + rmode = float_round_to_zero; + break; + } + return rmode; +} + +static void crc_init_buffer(uint8_t *buf, uint32_t val, uint32_t bytes) +{ + memset(buf, 0, 4); + + if (bytes == 1) { + buf[0] = val & 0xff; + } else if (bytes == 2) { + buf[0] = val & 0xff; + buf[1] = (val >> 8) & 0xff; + } else { + buf[0] = val & 0xff; + buf[1] = (val >> 8) & 0xff; + buf[2] = (val >> 16) & 0xff; + buf[3] = (val >> 24) & 0xff; + } +} + +uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes) +{ + uint8_t buf[4]; + + crc_init_buffer(buf, val, bytes); + + /* zlib crc32 converts the accumulator and output to one's complement. */ + return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; +} + +uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) +{ + uint8_t buf[4]; + + crc_init_buffer(buf, val, bytes); + + /* Linux crc32c converts the output to one's complement. */ + return crc32c(acc, buf, bytes) ^ 0xffffffff; +} diff --git a/target-arm/helper.h b/target-arm/helper.h index 70872dffc6..276f3a9149 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -57,11 +57,14 @@ DEF_HELPER_1(cpsr_read, i32, env) DEF_HELPER_3(v7m_msr, void, env, i32, i32) DEF_HELPER_2(v7m_mrs, i32, env, i32) +DEF_HELPER_2(access_check_cp_reg, void, env, ptr) DEF_HELPER_3(set_cp_reg, void, env, ptr, i32) DEF_HELPER_2(get_cp_reg, i32, env, ptr) DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64) DEF_HELPER_2(get_cp_reg64, i64, env, ptr) +DEF_HELPER_3(msr_i_pstate, void, env, i32, i32) + DEF_HELPER_2(get_r13_banked, i32, env, i32) DEF_HELPER_3(set_r13_banked, void, env, i32, i32) @@ -149,6 +152,7 @@ DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr) DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr) DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, env) +DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env) DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env) DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env) @@ -319,6 +323,7 @@ DEF_HELPER_1(neon_cls_s8, i32, i32) DEF_HELPER_1(neon_cls_s16, i32, i32) DEF_HELPER_1(neon_cls_s32, i32, i32) DEF_HELPER_1(neon_cnt_u8, i32, i32) +DEF_HELPER_FLAGS_1(neon_rbit_u8, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32) DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32) @@ -380,6 +385,8 @@ DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_acge_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, ptr) +DEF_HELPER_3(neon_acge_f64, i64, i64, i64, ptr) +DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, ptr) /* iwmmxt_helper.c */ DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64) @@ -492,6 +499,9 @@ DEF_HELPER_3(neon_qzip32, void, env, i32, i32) DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32) DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32) +DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) +DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #endif diff --git a/target-arm/kvm-consts.h b/target-arm/kvm-consts.h index 0e7f889cba..6009a33f10 100644 --- a/target-arm/kvm-consts.h +++ b/target-arm/kvm-consts.h @@ -50,15 +50,29 @@ MISMATCH_CHECK(PSCI_FN_CPU_OFF, KVM_PSCI_FN_CPU_OFF) MISMATCH_CHECK(PSCI_FN_CPU_ON, KVM_PSCI_FN_CPU_ON) MISMATCH_CHECK(PSCI_FN_MIGRATE, KVM_PSCI_FN_MIGRATE) +/* Note that KVM uses overlapping values for AArch32 and AArch64 + * target CPU numbers. AArch32 targets: + */ #define QEMU_KVM_ARM_TARGET_CORTEX_A15 0 +#define QEMU_KVM_ARM_TARGET_CORTEX_A7 1 + +/* AArch64 targets: */ +#define QEMU_KVM_ARM_TARGET_AEM_V8 0 +#define QEMU_KVM_ARM_TARGET_FOUNDATION_V8 1 +#define QEMU_KVM_ARM_TARGET_CORTEX_A57 2 /* There's no kernel define for this: sentinel value which * matches no KVM target value for either 64 or 32 bit */ #define QEMU_KVM_ARM_TARGET_NONE UINT_MAX -#ifndef TARGET_AARCH64 +#ifdef TARGET_AARCH64 +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_AEM_V8, KVM_ARM_TARGET_AEM_V8) +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8) +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57) +#else MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A15, KVM_ARM_TARGET_CORTEX_A15) +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A7, KVM_ARM_TARGET_CORTEX_A7) #endif #define CP_REG_ARM64 0x6000000000000000ULL diff --git a/target-arm/kvm.c b/target-arm/kvm.c index 1d2688dda7..39202d7eea 100644 --- a/target-arm/kvm.c +++ b/target-arm/kvm.c @@ -165,8 +165,10 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu) */ typedef struct KVMDevice { struct kvm_arm_device_addr kda; + struct kvm_device_attr kdattr; MemoryRegion *mr; QSLIST_ENTRY(KVMDevice) entries; + int dev_fd; } KVMDevice; static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head; @@ -200,6 +202,29 @@ static MemoryListener devlistener = { .region_del = kvm_arm_devlistener_del, }; +static void kvm_arm_set_device_addr(KVMDevice *kd) +{ + struct kvm_device_attr *attr = &kd->kdattr; + int ret; + + /* If the device control API is available and we have a device fd on the + * KVMDevice struct, let's use the newer API + */ + if (kd->dev_fd >= 0) { + uint64_t addr = kd->kda.addr; + attr->addr = (uintptr_t)&addr; + ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr); + } else { + ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda); + } + + if (ret < 0) { + fprintf(stderr, "Failed to set device address: %s\n", + strerror(-ret)); + abort(); + } +} + static void kvm_arm_machine_init_done(Notifier *notifier, void *data) { KVMDevice *kd, *tkd; @@ -207,12 +232,7 @@ static void kvm_arm_machine_init_done(Notifier *notifier, void *data) memory_listener_unregister(&devlistener); QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) { if (kd->kda.addr != -1) { - if (kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, - &kd->kda) < 0) { - fprintf(stderr, "KVM_ARM_SET_DEVICE_ADDRESS failed: %s\n", - strerror(errno)); - abort(); - } + kvm_arm_set_device_addr(kd); } memory_region_unref(kd->mr); g_free(kd); @@ -223,7 +243,8 @@ static Notifier notify = { .notify = kvm_arm_machine_init_done, }; -void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid) +void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, + uint64_t attr, int dev_fd) { KVMDevice *kd; @@ -239,6 +260,10 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid) kd->mr = mr; kd->kda.id = devid; kd->kda.addr = -1; + kd->kdattr.flags = 0; + kd->kdattr.group = group; + kd->kdattr.attr = attr; + kd->dev_fd = dev_fd; QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries); memory_region_ref(kd->mr); } @@ -389,3 +414,19 @@ void kvm_arch_remove_all_hw_breakpoints(void) void kvm_arch_init_irq_routing(KVMState *s) { } + +int kvm_arch_irqchip_create(KVMState *s) +{ + int ret; + + /* If we can create the VGIC using the newer device control API, we + * let the device do this when it initializes itself, otherwise we + * fall back to the old API */ + + ret = kvm_create_device(s, KVM_DEV_TYPE_ARM_VGIC_V2, true); + if (ret == 0) { + return 1; + } + + return 0; +} diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h index cd3d13ca2d..137c5671e9 100644 --- a/target-arm/kvm_arm.h +++ b/target-arm/kvm_arm.h @@ -18,16 +18,21 @@ * kvm_arm_register_device: * @mr: memory region for this device * @devid: the KVM device ID + * @group: device control API group for setting addresses + * @attr: device control API address type + * @dev_fd: device control device file descriptor (or -1 if not supported) * * Remember the memory region @mr, and when it is mapped by the * machine model, tell the kernel that base address using the - * KVM_SET_DEVICE_ADDRESS ioctl. @devid should be the ID of - * the device as defined by KVM_SET_DEVICE_ADDRESS. - * The machine model may map and unmap the device multiple times; - * the kernel will only be told the final address at the point - * where machine init is complete. + * KVM_ARM_SET_DEVICE_ADDRESS ioctl or the newer device control API. @devid + * should be the ID of the device as defined by KVM_ARM_SET_DEVICE_ADDRESS or + * the arm-vgic device in the device control API. + * The machine model may map + * and unmap the device multiple times; the kernel will only be told the final + * address at the point where machine init is complete. */ -void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid); +void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, + uint64_t attr, int dev_fd); /** * write_list_to_kvmstate: diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index be6fbd997e..13752baf63 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -1133,6 +1133,18 @@ uint32_t HELPER(neon_cnt_u8)(uint32_t x) return x; } +/* Reverse bits in each 8 bit word */ +uint32_t HELPER(neon_rbit_u8)(uint32_t x) +{ + x = ((x & 0xf0f0f0f0) >> 4) + | ((x & 0x0f0f0f0f) << 4); + x = ((x & 0x88888888) >> 3) + | ((x & 0x44444444) >> 1) + | ((x & 0x22222222) << 1) + | ((x & 0x11111111) << 3); + return x; +} + #define NEON_QDMULH16(dest, src1, src2, round) do { \ uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ @@ -1811,6 +1823,22 @@ uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp) return -float32_lt(f1, f0, fpst); } +uint64_t HELPER(neon_acge_f64)(uint64_t a, uint64_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float64 f0 = float64_abs(make_float64(a)); + float64 f1 = float64_abs(make_float64(b)); + return -float64_le(f1, f0, fpst); +} + +uint64_t HELPER(neon_acgt_f64)(uint64_t a, uint64_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float64 f0 = float64_abs(make_float64(a)); + float64 f1 = float64_abs(make_float64(b)); + return -float64_lt(f1, f0, fpst); +} + #define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1)) void HELPER(neon_qunzip8)(CPUARMState *env, uint32_t rd, uint32_t rm) diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index a918e5b27a..7d06d2f9a5 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -273,44 +273,75 @@ void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val) } } -void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value) +void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip) { const ARMCPRegInfo *ri = rip; - int excp = ri->writefn(env, ri, value); - if (excp) { - raise_exception(env, excp); + switch (ri->accessfn(env, ri)) { + case CP_ACCESS_OK: + return; + case CP_ACCESS_TRAP: + case CP_ACCESS_TRAP_UNCATEGORIZED: + /* These cases will eventually need to generate different + * syndrome information. + */ + break; + default: + g_assert_not_reached(); } + raise_exception(env, EXCP_UDEF); +} + +void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value) +{ + const ARMCPRegInfo *ri = rip; + + ri->writefn(env, ri, value); } uint32_t HELPER(get_cp_reg)(CPUARMState *env, void *rip) { const ARMCPRegInfo *ri = rip; - uint64_t value; - int excp = ri->readfn(env, ri, &value); - if (excp) { - raise_exception(env, excp); - } - return value; + + return ri->readfn(env, ri); } void HELPER(set_cp_reg64)(CPUARMState *env, void *rip, uint64_t value) { const ARMCPRegInfo *ri = rip; - int excp = ri->writefn(env, ri, value); - if (excp) { - raise_exception(env, excp); - } + + ri->writefn(env, ri, value); } uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip) { const ARMCPRegInfo *ri = rip; - uint64_t value; - int excp = ri->readfn(env, ri, &value); - if (excp) { - raise_exception(env, excp); + + return ri->readfn(env, ri); +} + +void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm) +{ + /* MSR_i to update PSTATE. This is OK from EL0 only if UMA is set. + * Note that SPSel is never OK from EL0; we rely on handle_msr_i() + * to catch that case at translate time. + */ + if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) { + raise_exception(env, EXCP_UDEF); + } + + switch (op) { + case 0x05: /* SPSel */ + env->pstate = deposit32(env->pstate, 0, 1, imm); + break; + case 0x1e: /* DAIFSet */ + env->daif |= (imm << 6) & PSTATE_DAIF; + break; + case 0x1f: /* DAIFClear */ + env->daif &= ~((imm << 6) & PSTATE_DAIF); + break; + default: + g_assert_not_reached(); } - return value; } /* ??? Flag setting arithmetic is awkward because we need to do comparisons. diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index cf80c46b90..08ac6591b6 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -61,6 +61,27 @@ enum a64_shift_type { A64_SHIFT_TYPE_ROR = 3 }; +/* Table based decoder typedefs - used when the relevant bits for decode + * are too awkwardly scattered across the instruction (eg SIMD). + */ +typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); + +typedef struct AArch64DecodeTable { + uint32_t pattern; + uint32_t mask; + AArch64DecodeFn *disas_fn; +} AArch64DecodeTable; + +/* Function prototype for gen_ functions for calling Neon helpers */ +typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32); +typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); +typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64); +typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64); +typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64); +typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); +typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); +typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); + /* initialize TCG globals. */ void a64_translate_init(void) { @@ -308,6 +329,28 @@ static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) return v; } +/* Return the offset into CPUARMState of an element of specified + * size, 'element' places in from the least significant end of + * the FP/vector register Qn. + */ +static inline int vec_reg_offset(int regno, int element, TCGMemOp size) +{ + int offs = offsetof(CPUARMState, vfp.regs[regno * 2]); +#ifdef HOST_WORDS_BIGENDIAN + /* This is complicated slightly because vfp.regs[2n] is + * still the low half and vfp.regs[2n+1] the high half + * of the 128 bit vector, even on big endian systems. + * Calculate the offset assuming a fully bigendian 128 bits, + * then XOR to account for the order of the two 64 bit halves. + */ + offs += (16 - ((element + 1) * (1 << size))); + offs ^= 8; +#else + offs += element * (1 << size); +#endif + return offs; +} + /* Return the offset into CPUARMState of a slice (from * the least significant end) of FP register Qn (ie * Dn, Sn, Hn or Bn). @@ -575,20 +618,26 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) */ /* - * Store from GPR register to memory + * Store from GPR register to memory. */ +static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, + TCGv_i64 tcg_addr, int size, int memidx) +{ + g_assert(size <= 3); + tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size); +} + static void do_gpr_st(DisasContext *s, TCGv_i64 source, TCGv_i64 tcg_addr, int size) { - g_assert(size <= 3); - tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size); + do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s)); } /* * Load from memory to GPR register */ -static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, - int size, bool is_signed, bool extend) +static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + int size, bool is_signed, bool extend, int memidx) { TCGMemOp memop = MO_TE + size; @@ -598,7 +647,7 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, memop += MO_SIGN; } - tcg_gen_qemu_ld_i64(dest, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); if (extend && is_signed) { g_assert(size < 3); @@ -606,6 +655,13 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, } } +static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + int size, bool is_signed, bool extend) +{ + do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend, + get_mem_index(s)); +} + /* * Store from FP register to memory */ @@ -661,6 +717,156 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) } /* + * Vector load/store helpers. + * + * The principal difference between this and a FP load is that we don't + * zero extend as we are filling a partial chunk of the vector register. + * These functions don't support 128 bit loads/stores, which would be + * normal load/store operations. + * + * The _i32 versions are useful when operating on 32 bit quantities + * (eg for floating point single or using Neon helper functions). + */ + +/* Get value of an element within a vector register */ +static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, + int element, TCGMemOp memop) +{ + int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE); + switch (memop) { + case MO_8: + tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_16: + tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_32: + tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_8|MO_SIGN: + tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_16|MO_SIGN: + tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_32|MO_SIGN: + tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_64: + case MO_64|MO_SIGN: + tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off); + break; + default: + g_assert_not_reached(); + } +} + +static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, + int element, TCGMemOp memop) +{ + int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE); + switch (memop) { + case MO_8: + tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off); + break; + case MO_16: + tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off); + break; + case MO_8|MO_SIGN: + tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off); + break; + case MO_16|MO_SIGN: + tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off); + break; + case MO_32: + case MO_32|MO_SIGN: + tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off); + break; + default: + g_assert_not_reached(); + } +} + +/* Set value of an element within a vector register */ +static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, + int element, TCGMemOp memop) +{ + int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE); + switch (memop) { + case MO_8: + tcg_gen_st8_i64(tcg_src, cpu_env, vect_off); + break; + case MO_16: + tcg_gen_st16_i64(tcg_src, cpu_env, vect_off); + break; + case MO_32: + tcg_gen_st32_i64(tcg_src, cpu_env, vect_off); + break; + case MO_64: + tcg_gen_st_i64(tcg_src, cpu_env, vect_off); + break; + default: + g_assert_not_reached(); + } +} + +static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, + int destidx, int element, TCGMemOp memop) +{ + int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE); + switch (memop) { + case MO_8: + tcg_gen_st8_i32(tcg_src, cpu_env, vect_off); + break; + case MO_16: + tcg_gen_st16_i32(tcg_src, cpu_env, vect_off); + break; + case MO_32: + tcg_gen_st_i32(tcg_src, cpu_env, vect_off); + break; + default: + g_assert_not_reached(); + } +} + +/* Clear the high 64 bits of a 128 bit vector (in general non-quad + * vector ops all need to do this). + */ +static void clear_vec_high(DisasContext *s, int rd) +{ + TCGv_i64 tcg_zero = tcg_const_i64(0); + + write_vec_element(s, tcg_zero, rd, 1, MO_64); + tcg_temp_free_i64(tcg_zero); +} + +/* Store from vector register to memory */ +static void do_vec_st(DisasContext *s, int srcidx, int element, + TCGv_i64 tcg_addr, int size) +{ + TCGMemOp memop = MO_TE + size; + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + + read_vec_element(s, tcg_tmp, srcidx, element, size); + tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + + tcg_temp_free_i64(tcg_tmp); +} + +/* Load from memory to vector register */ +static void do_vec_ld(DisasContext *s, int destidx, int element, + TCGv_i64 tcg_addr, int size) +{ + TCGMemOp memop = MO_TE + size; + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + write_vec_element(s, tcg_tmp, destidx, element, size); + + tcg_temp_free_i64(tcg_tmp); +} + +/* * This utility function is for doing register extension with an * optional shift. You will likely want to pass a temporary for the * destination register. See DecodeRegExtend() in the ARM ARM. @@ -722,6 +928,31 @@ static inline void gen_check_sp_alignment(DisasContext *s) } /* + * This provides a simple table based table lookup decoder. It is + * intended to be used when the relevant bits for decode are too + * awkwardly placed and switch/if based logic would be confusing and + * deeply nested. Since it's a linear search through the table, tables + * should be kept small. + * + * It returns the first handler where insn & mask == pattern, or + * NULL if there is no match. + * The table is terminated by an empty mask (i.e. 0) + */ +static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, + uint32_t insn) +{ + const AArch64DecodeTable *tptr = table; + + while (tptr->mask) { + if ((insn & tptr->mask) == tptr->pattern) { + return tptr->disas_fn; + } + tptr++; + } + return NULL; +} + +/* * the instruction disassembly implemented here matches * the instruction encoding classifications in chapter 3 (C3) * of the ARM Architecture Reference Manual (DDI0487A_a) @@ -849,9 +1080,11 @@ static void handle_hint(DisasContext *s, uint32_t insn, switch (selector) { case 0: /* NOP */ return; + case 3: /* WFI */ + s->is_jmp = DISAS_WFI; + return; case 1: /* YIELD */ case 2: /* WFE */ - case 3: /* WFI */ case 4: /* SEV */ case 5: /* SEVL */ /* we treat all as NOP at least for now */ @@ -895,7 +1128,30 @@ static void handle_sync(DisasContext *s, uint32_t insn, static void handle_msr_i(DisasContext *s, uint32_t insn, unsigned int op1, unsigned int op2, unsigned int crm) { - unsupported_encoding(s, insn); + int op = op1 << 3 | op2; + switch (op) { + case 0x05: /* SPSel */ + if (s->current_pl == 0) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x1e: /* DAIFSet */ + case 0x1f: /* DAIFClear */ + { + TCGv_i32 tcg_imm = tcg_const_i32(crm); + TCGv_i32 tcg_op = tcg_const_i32(op); + gen_a64_set_pc_im(s->pc - 4); + gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm); + tcg_temp_free_i32(tcg_imm); + tcg_temp_free_i32(tcg_op); + s->is_jmp = DISAS_UPDATE; + break; + } + default: + unallocated_encoding(s); + return; + } } static void gen_get_nzcv(TCGv_i64 tcg_rt) @@ -961,7 +1217,12 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, crn, crm, op0, op1, op2)); if (!ri) { - /* Unknown register */ + /* Unknown register; this might be a guest error or a QEMU + * unimplemented feature. + */ + qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " + "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", + isread ? "read" : "write", op0, op1, crn, crm, op2); unallocated_encoding(s); return; } @@ -972,6 +1233,17 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, return; } + if (ri->accessfn) { + /* Emit code to perform further access permissions checks at + * runtime; this may result in an exception. + */ + TCGv_ptr tmpptr; + gen_a64_set_pc_im(s->pc - 4); + tmpptr = tcg_const_ptr(ri); + gen_helper_access_check_cp_reg(cpu_env, tmpptr); + tcg_temp_free_ptr(tmpptr); + } + /* Handle special cases first */ switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) { case ARM_CP_NOP: @@ -984,6 +1256,13 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, gen_set_nzcv(tcg_rt); } return; + case ARM_CP_CURRENTEL: + /* Reads as current EL value from pstate, which is + * guaranteed to be constant by the tb flags. + */ + tcg_rt = cpu_reg(s, rt); + tcg_gen_movi_i64(tcg_rt, s->current_pl << 2); + return; default: break; } @@ -999,7 +1278,6 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, tcg_gen_movi_i64(tcg_rt, ri->resetvalue); } else if (ri->readfn) { TCGv_ptr tmpptr; - gen_a64_set_pc_im(s->pc - 4); tmpptr = tcg_const_ptr(ri); gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr); tcg_temp_free_ptr(tmpptr); @@ -1012,7 +1290,6 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, return; } else if (ri->writefn) { TCGv_ptr tmpptr; - gen_a64_set_pc_im(s->pc - 4); tmpptr = tcg_const_ptr(ri); gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt); tcg_temp_free_ptr(tmpptr); @@ -1257,12 +1534,68 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, } #else static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, - TCGv_i64 addr, int size, int is_pair) + TCGv_i64 inaddr, int size, int is_pair) { - qemu_log_mask(LOG_UNIMP, - "%s:%d: system mode store_exclusive unsupported " - "at pc=%016" PRIx64 "\n", - __FILE__, __LINE__, s->pc - 4); + /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] + * && (!is_pair || env->exclusive_high == [addr + datasize])) { + * [addr] = {Rt}; + * if (is_pair) { + * [addr + datasize] = {Rt2}; + * } + * {Rd} = 0; + * } else { + * {Rd} = 1; + * } + * env->exclusive_addr = -1; + */ + int fail_label = gen_new_label(); + int done_label = gen_new_label(); + TCGv_i64 addr = tcg_temp_local_new_i64(); + TCGv_i64 tmp; + + /* Copy input into a local temp so it is not trashed when the + * basic block ends at the branch insn. + */ + tcg_gen_mov_i64(addr, inaddr); + tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label); + + tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size); + tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label); + tcg_temp_free_i64(tmp); + + if (is_pair) { + TCGv_i64 addrhi = tcg_temp_new_i64(); + TCGv_i64 tmphi = tcg_temp_new_i64(); + + tcg_gen_addi_i64(addrhi, addr, 1 << size); + tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size); + tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label); + + tcg_temp_free_i64(tmphi); + tcg_temp_free_i64(addrhi); + } + + /* We seem to still have the exclusive monitor, so do the store */ + tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size); + if (is_pair) { + TCGv_i64 addrhi = tcg_temp_new_i64(); + + tcg_gen_addi_i64(addrhi, addr, 1 << size); + tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi, + get_mem_index(s), MO_TE + size); + tcg_temp_free_i64(addrhi); + } + + tcg_temp_free_i64(addr); + + tcg_gen_movi_i64(cpu_reg(s, rd), 0); + tcg_gen_br(done_label); + gen_set_label(fail_label); + tcg_gen_movi_i64(cpu_reg(s, rd), 1); + gen_set_label(done_label); + tcg_gen_movi_i64(cpu_exclusive_addr, -1); + } #endif @@ -1536,6 +1869,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) * +----+-------+---+-----+-----+---+--------+-----+------+------+ * * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback) + 10 -> unprivileged * V = 0 -> non-vector * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 @@ -1551,6 +1885,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn) bool is_signed = false; bool is_store = false; bool is_extended = false; + bool is_unpriv = (idx == 2); bool is_vector = extract32(insn, 26, 1); bool post_index; bool writeback; @@ -1559,7 +1894,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn) if (is_vector) { size |= (opc & 2) << 1; - if (size > 4) { + if (size > 4 || is_unpriv) { unallocated_encoding(s); return; } @@ -1567,6 +1902,10 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn) } else { if (size == 3 && opc == 2) { /* PRFM - prefetch */ + if (is_unpriv) { + unallocated_encoding(s); + return; + } return; } if (opc == 3 && size > 1) { @@ -1580,6 +1919,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn) switch (idx) { case 0: + case 2: post_index = false; writeback = false; break; @@ -1591,9 +1931,6 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn) post_index = false; writeback = true; break; - case 2: - g_assert(false); - break; } if (rn == 31) { @@ -1613,10 +1950,13 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn) } } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); + int memidx = is_unpriv ? 1 : get_mem_index(s); + if (is_store) { - do_gpr_st(s, tcg_rt, tcg_addr, size); + do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx); } else { - do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended); + do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size, + is_signed, is_extended, memidx); } } @@ -1796,25 +2136,6 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn) } } -/* Load/store register (immediate forms) */ -static void disas_ldst_reg_imm(DisasContext *s, uint32_t insn) -{ - switch (extract32(insn, 10, 2)) { - case 0: case 1: case 3: - /* Load/store register (unscaled immediate) */ - /* Load/store immediate pre/post-indexed */ - disas_ldst_reg_imm9(s, insn); - break; - case 2: - /* Load/store register unprivileged */ - unsupported_encoding(s, insn); - break; - default: - unallocated_encoding(s); - break; - } -} - /* Load/store register (all forms) */ static void disas_ldst_reg(DisasContext *s, uint32_t insn) { @@ -1823,7 +2144,11 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn) if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) { disas_ldst_reg_roffset(s, insn); } else { - disas_ldst_reg_imm(s, insn); + /* Load/store register (unscaled immediate) + * Load/store immediate pre/post-indexed + * Load/store register unprivileged + */ + disas_ldst_reg_imm9(s, insn); } break; case 1: @@ -1835,16 +2160,278 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn) } } -/* AdvSIMD load/store multiple structures */ +/* C3.3.1 AdvSIMD load/store multiple structures + * + * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0 + * +---+---+---------------+---+-------------+--------+------+------+------+ + * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt | + * +---+---+---------------+---+-------------+--------+------+------+------+ + * + * C3.3.2 AdvSIMD load/store multiple structures (post-indexed) + * + * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0 + * +---+---+---------------+---+---+---------+--------+------+------+------+ + * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt | + * +---+---+---------------+---+---+---------+--------+------+------+------+ + * + * Rt: first (or only) SIMD&FP register to be transferred + * Rn: base address or SP + * Rm (post-index only): post-index register (when !31) or size dependent #imm + */ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) { - unsupported_encoding(s, insn); + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int size = extract32(insn, 10, 2); + int opcode = extract32(insn, 12, 4); + bool is_store = !extract32(insn, 22, 1); + bool is_postidx = extract32(insn, 23, 1); + bool is_q = extract32(insn, 30, 1); + TCGv_i64 tcg_addr, tcg_rn; + + int ebytes = 1 << size; + int elements = (is_q ? 128 : 64) / (8 << size); + int rpt; /* num iterations */ + int selem; /* structure elements */ + int r; + + if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { + unallocated_encoding(s); + return; + } + + /* From the shared decode logic */ + switch (opcode) { + case 0x0: + rpt = 1; + selem = 4; + break; + case 0x2: + rpt = 4; + selem = 1; + break; + case 0x4: + rpt = 1; + selem = 3; + break; + case 0x6: + rpt = 3; + selem = 1; + break; + case 0x7: + rpt = 1; + selem = 1; + break; + case 0x8: + rpt = 1; + selem = 2; + break; + case 0xa: + rpt = 2; + selem = 1; + break; + default: + unallocated_encoding(s); + return; + } + + if (size == 3 && !is_q && selem != 1) { + /* reserved */ + unallocated_encoding(s); + return; + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + tcg_rn = cpu_reg_sp(s, rn); + tcg_addr = tcg_temp_new_i64(); + tcg_gen_mov_i64(tcg_addr, tcg_rn); + + for (r = 0; r < rpt; r++) { + int e; + for (e = 0; e < elements; e++) { + int tt = (rt + r) % 32; + int xs; + for (xs = 0; xs < selem; xs++) { + if (is_store) { + do_vec_st(s, tt, e, tcg_addr, size); + } else { + do_vec_ld(s, tt, e, tcg_addr, size); + + /* For non-quad operations, setting a slice of the low + * 64 bits of the register clears the high 64 bits (in + * the ARM ARM pseudocode this is implicit in the fact + * that 'rval' is a 64 bit wide variable). We optimize + * by noticing that we only need to do this the first + * time we touch a register. + */ + if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) { + clear_vec_high(s, tt); + } + } + tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); + tt = (tt + 1) % 32; + } + } + } + + if (is_postidx) { + int rm = extract32(insn, 16, 5); + if (rm == 31) { + tcg_gen_mov_i64(tcg_rn, tcg_addr); + } else { + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); + } + } + tcg_temp_free_i64(tcg_addr); } -/* AdvSIMD load/store single structure */ +/* C3.3.3 AdvSIMD load/store single structure + * + * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 + * +---+---+---------------+-----+-----------+-----+---+------+------+------+ + * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt | + * +---+---+---------------+-----+-----------+-----+---+------+------+------+ + * + * C3.3.4 AdvSIMD load/store single structure (post-indexed) + * + * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 + * +---+---+---------------+-----+-----------+-----+---+------+------+------+ + * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt | + * +---+---+---------------+-----+-----------+-----+---+------+------+------+ + * + * Rt: first (or only) SIMD&FP register to be transferred + * Rn: base address or SP + * Rm (post-index only): post-index register (when !31) or size dependent #imm + * index = encoded in Q:S:size dependent on size + * + * lane_size = encoded in R, opc + * transfer width = encoded in opc, S, size + */ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) { - unsupported_encoding(s, insn); + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int size = extract32(insn, 10, 2); + int S = extract32(insn, 12, 1); + int opc = extract32(insn, 13, 3); + int R = extract32(insn, 21, 1); + int is_load = extract32(insn, 22, 1); + int is_postidx = extract32(insn, 23, 1); + int is_q = extract32(insn, 30, 1); + + int scale = extract32(opc, 1, 2); + int selem = (extract32(opc, 0, 1) << 1 | R) + 1; + bool replicate = false; + int index = is_q << 3 | S << 2 | size; + int ebytes, xs; + TCGv_i64 tcg_addr, tcg_rn; + + switch (scale) { + case 3: + if (!is_load || S) { + unallocated_encoding(s); + return; + } + scale = size; + replicate = true; + break; + case 0: + break; + case 1: + if (extract32(size, 0, 1)) { + unallocated_encoding(s); + return; + } + index >>= 1; + break; + case 2: + if (extract32(size, 1, 1)) { + unallocated_encoding(s); + return; + } + if (!extract32(size, 0, 1)) { + index >>= 2; + } else { + if (S) { + unallocated_encoding(s); + return; + } + index >>= 3; + scale = 3; + } + break; + default: + g_assert_not_reached(); + } + + ebytes = 1 << scale; + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + tcg_rn = cpu_reg_sp(s, rn); + tcg_addr = tcg_temp_new_i64(); + tcg_gen_mov_i64(tcg_addr, tcg_rn); + + for (xs = 0; xs < selem; xs++) { + if (replicate) { + /* Load and replicate to all elements */ + uint64_t mulconst; + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, + get_mem_index(s), MO_TE + scale); + switch (scale) { + case 0: + mulconst = 0x0101010101010101ULL; + break; + case 1: + mulconst = 0x0001000100010001ULL; + break; + case 2: + mulconst = 0x0000000100000001ULL; + break; + case 3: + mulconst = 0; + break; + default: + g_assert_not_reached(); + } + if (mulconst) { + tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst); + } + write_vec_element(s, tcg_tmp, rt, 0, MO_64); + if (is_q) { + write_vec_element(s, tcg_tmp, rt, 1, MO_64); + } else { + clear_vec_high(s, rt); + } + tcg_temp_free_i64(tcg_tmp); + } else { + /* Load/store one element per register */ + if (is_load) { + do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale); + } else { + do_vec_st(s, rt, index, tcg_addr, MO_TE + scale); + } + } + tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); + rt = (rt + 1) % 32; + } + + if (is_postidx) { + int rm = extract32(insn, 16, 5); + if (rm == 31) { + tcg_gen_mov_i64(tcg_rn, tcg_addr); + } else { + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); + } + } + tcg_temp_free_i64(tcg_addr); } /* C3.3 Loads and stores */ @@ -3186,34 +3773,6 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn) } } -/* Convert ARM rounding mode to softfloat */ -static inline int arm_rmode_to_sf(int rmode) -{ - switch (rmode) { - case FPROUNDING_TIEAWAY: - rmode = float_round_ties_away; - break; - case FPROUNDING_ODD: - /* FIXME: add support for TIEAWAY and ODD */ - qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n", - rmode); - case FPROUNDING_TIEEVEN: - default: - rmode = float_round_nearest_even; - break; - case FPROUNDING_POSINF: - rmode = float_round_up; - break; - case FPROUNDING_NEGINF: - rmode = float_round_down; - break; - case FPROUNDING_ZERO: - rmode = float_round_to_zero; - break; - } - return rmode; -} - static void handle_fp_compare(DisasContext *s, bool is_double, unsigned int rn, unsigned int rm, bool cmp_with_zero, bool signal_all_nans) @@ -4224,13 +4783,4174 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn) } } +static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, + int pos) +{ + /* Extract 64 bits from the middle of two concatenated 64 bit + * vector register slices left:right. The extracted bits start + * at 'pos' bits into the right (least significant) side. + * We return the result in tcg_right, and guarantee not to + * trash tcg_left. + */ + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + assert(pos > 0 && pos < 64); + + tcg_gen_shri_i64(tcg_right, tcg_right, pos); + tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); + tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); + + tcg_temp_free_i64(tcg_tmp); +} + +/* C3.6.1 EXT + * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 + * +---+---+-------------+-----+---+------+---+------+---+------+------+ + * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | + * +---+---+-------------+-----+---+------+---+------+---+------+------+ + */ +static void disas_simd_ext(DisasContext *s, uint32_t insn) +{ + int is_q = extract32(insn, 30, 1); + int op2 = extract32(insn, 22, 2); + int imm4 = extract32(insn, 11, 4); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int pos = imm4 << 3; + TCGv_i64 tcg_resl, tcg_resh; + + if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { + unallocated_encoding(s); + return; + } + + tcg_resh = tcg_temp_new_i64(); + tcg_resl = tcg_temp_new_i64(); + + /* Vd gets bits starting at pos bits into Vm:Vn. This is + * either extracting 128 bits from a 128:128 concatenation, or + * extracting 64 bits from a 64:64 concatenation. + */ + if (!is_q) { + read_vec_element(s, tcg_resl, rn, 0, MO_64); + if (pos != 0) { + read_vec_element(s, tcg_resh, rm, 0, MO_64); + do_ext64(s, tcg_resh, tcg_resl, pos); + } + tcg_gen_movi_i64(tcg_resh, 0); + } else { + TCGv_i64 tcg_hh; + typedef struct { + int reg; + int elt; + } EltPosns; + EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; + EltPosns *elt = eltposns; + + if (pos >= 64) { + elt++; + pos -= 64; + } + + read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); + elt++; + read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); + elt++; + if (pos != 0) { + do_ext64(s, tcg_resh, tcg_resl, pos); + tcg_hh = tcg_temp_new_i64(); + read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); + do_ext64(s, tcg_hh, tcg_resh, pos); + tcg_temp_free_i64(tcg_hh); + } + } + + write_vec_element(s, tcg_resl, rd, 0, MO_64); + tcg_temp_free_i64(tcg_resl); + write_vec_element(s, tcg_resh, rd, 1, MO_64); + tcg_temp_free_i64(tcg_resh); +} + +/* C3.6.2 TBL/TBX + * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 + * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ + * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | + * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ + */ +static void disas_simd_tb(DisasContext *s, uint32_t insn) +{ + int op2 = extract32(insn, 22, 2); + int is_q = extract32(insn, 30, 1); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int is_tblx = extract32(insn, 12, 1); + int len = extract32(insn, 13, 2); + TCGv_i64 tcg_resl, tcg_resh, tcg_idx; + TCGv_i32 tcg_regno, tcg_numregs; + + if (op2 != 0) { + unallocated_encoding(s); + return; + } + + /* This does a table lookup: for every byte element in the input + * we index into a table formed from up to four vector registers, + * and then the output is the result of the lookups. Our helper + * function does the lookup operation for a single 64 bit part of + * the input. + */ + tcg_resl = tcg_temp_new_i64(); + tcg_resh = tcg_temp_new_i64(); + + if (is_tblx) { + read_vec_element(s, tcg_resl, rd, 0, MO_64); + } else { + tcg_gen_movi_i64(tcg_resl, 0); + } + if (is_tblx && is_q) { + read_vec_element(s, tcg_resh, rd, 1, MO_64); + } else { + tcg_gen_movi_i64(tcg_resh, 0); + } + + tcg_idx = tcg_temp_new_i64(); + tcg_regno = tcg_const_i32(rn); + tcg_numregs = tcg_const_i32(len + 1); + read_vec_element(s, tcg_idx, rm, 0, MO_64); + gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx, + tcg_regno, tcg_numregs); + if (is_q) { + read_vec_element(s, tcg_idx, rm, 1, MO_64); + gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx, + tcg_regno, tcg_numregs); + } + tcg_temp_free_i64(tcg_idx); + tcg_temp_free_i32(tcg_regno); + tcg_temp_free_i32(tcg_numregs); + + write_vec_element(s, tcg_resl, rd, 0, MO_64); + tcg_temp_free_i64(tcg_resl); + write_vec_element(s, tcg_resh, rd, 1, MO_64); + tcg_temp_free_i64(tcg_resh); +} + +/* C3.6.3 ZIP/UZP/TRN + * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 + * +---+---+-------------+------+---+------+---+------------------+------+ + * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | + * +---+---+-------------+------+---+------+---+------------------+------+ + */ +static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int rm = extract32(insn, 16, 5); + int size = extract32(insn, 22, 2); + /* opc field bits [1:0] indicate ZIP/UZP/TRN; + * bit 2 indicates 1 vs 2 variant of the insn. + */ + int opcode = extract32(insn, 12, 2); + bool part = extract32(insn, 14, 1); + bool is_q = extract32(insn, 30, 1); + int esize = 8 << size; + int i, ofs; + int datasize = is_q ? 128 : 64; + int elements = datasize / esize; + TCGv_i64 tcg_res, tcg_resl, tcg_resh; + + if (opcode == 0 || (size == 3 && !is_q)) { + unallocated_encoding(s); + return; + } + + tcg_resl = tcg_const_i64(0); + tcg_resh = tcg_const_i64(0); + tcg_res = tcg_temp_new_i64(); + + for (i = 0; i < elements; i++) { + switch (opcode) { + case 1: /* UZP1/2 */ + { + int midpoint = elements / 2; + if (i < midpoint) { + read_vec_element(s, tcg_res, rn, 2 * i + part, size); + } else { + read_vec_element(s, tcg_res, rm, + 2 * (i - midpoint) + part, size); + } + break; + } + case 2: /* TRN1/2 */ + if (i & 1) { + read_vec_element(s, tcg_res, rm, (i & ~1) + part, size); + } else { + read_vec_element(s, tcg_res, rn, (i & ~1) + part, size); + } + break; + case 3: /* ZIP1/2 */ + { + int base = part * elements / 2; + if (i & 1) { + read_vec_element(s, tcg_res, rm, base + (i >> 1), size); + } else { + read_vec_element(s, tcg_res, rn, base + (i >> 1), size); + } + break; + } + default: + g_assert_not_reached(); + } + + ofs = i * esize; + if (ofs < 64) { + tcg_gen_shli_i64(tcg_res, tcg_res, ofs); + tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res); + } else { + tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64); + tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res); + } + } + + tcg_temp_free_i64(tcg_res); + + write_vec_element(s, tcg_resl, rd, 0, MO_64); + tcg_temp_free_i64(tcg_resl); + write_vec_element(s, tcg_resh, rd, 1, MO_64); + tcg_temp_free_i64(tcg_resh); +} + +static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2, + int opc, bool is_min, TCGv_ptr fpst) +{ + /* Helper function for disas_simd_across_lanes: do a single precision + * min/max operation on the specified two inputs, + * and return the result in tcg_elt1. + */ + if (opc == 0xc) { + if (is_min) { + gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst); + } else { + gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst); + } + } else { + assert(opc == 0xf); + if (is_min) { + gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst); + } else { + gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst); + } + } +} + +/* C3.6.4 AdvSIMD across lanes + * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +---+---+---+-----------+------+-----------+--------+-----+------+------+ + * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | + * +---+---+---+-----------+------+-----------+--------+-----+------+------+ + */ +static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + bool is_q = extract32(insn, 30, 1); + bool is_u = extract32(insn, 29, 1); + bool is_fp = false; + bool is_min = false; + int esize; + int elements; + int i; + TCGv_i64 tcg_res, tcg_elt; + + switch (opcode) { + case 0x1b: /* ADDV */ + if (is_u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x3: /* SADDLV, UADDLV */ + case 0xa: /* SMAXV, UMAXV */ + case 0x1a: /* SMINV, UMINV */ + if (size == 3 || (size == 2 && !is_q)) { + unallocated_encoding(s); + return; + } + break; + case 0xc: /* FMAXNMV, FMINNMV */ + case 0xf: /* FMAXV, FMINV */ + if (!is_u || !is_q || extract32(size, 0, 1)) { + unallocated_encoding(s); + return; + } + /* Bit 1 of size field encodes min vs max, and actual size is always + * 32 bits: adjust the size variable so following code can rely on it + */ + is_min = extract32(size, 1, 1); + is_fp = true; + size = 2; + break; + default: + unallocated_encoding(s); + return; + } + + esize = 8 << size; + elements = (is_q ? 128 : 64) / esize; + + tcg_res = tcg_temp_new_i64(); + tcg_elt = tcg_temp_new_i64(); + + /* These instructions operate across all lanes of a vector + * to produce a single result. We can guarantee that a 64 + * bit intermediate is sufficient: + * + for [US]ADDLV the maximum element size is 32 bits, and + * the result type is 64 bits + * + for FMAX*V, FMIN*V, ADDV the intermediate type is the + * same as the element size, which is 32 bits at most + * For the integer operations we can choose to work at 64 + * or 32 bits and truncate at the end; for simplicity + * we use 64 bits always. The floating point + * ops do require 32 bit intermediates, though. + */ + if (!is_fp) { + read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); + + for (i = 1; i < elements; i++) { + read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); + + switch (opcode) { + case 0x03: /* SADDLV / UADDLV */ + case 0x1b: /* ADDV */ + tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); + break; + case 0x0a: /* SMAXV / UMAXV */ + tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, + tcg_res, + tcg_res, tcg_elt, tcg_res, tcg_elt); + break; + case 0x1a: /* SMINV / UMINV */ + tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE, + tcg_res, + tcg_res, tcg_elt, tcg_res, tcg_elt); + break; + break; + default: + g_assert_not_reached(); + } + + } + } else { + /* Floating point ops which work on 32 bit (single) intermediates. + * Note that correct NaN propagation requires that we do these + * operations in exactly the order specified by the pseudocode. + */ + TCGv_i32 tcg_elt1 = tcg_temp_new_i32(); + TCGv_i32 tcg_elt2 = tcg_temp_new_i32(); + TCGv_i32 tcg_elt3 = tcg_temp_new_i32(); + TCGv_ptr fpst = get_fpstatus_ptr(); + + assert(esize == 32); + assert(elements == 4); + + read_vec_element(s, tcg_elt, rn, 0, MO_32); + tcg_gen_trunc_i64_i32(tcg_elt1, tcg_elt); + read_vec_element(s, tcg_elt, rn, 1, MO_32); + tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt); + + do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst); + + read_vec_element(s, tcg_elt, rn, 2, MO_32); + tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt); + read_vec_element(s, tcg_elt, rn, 3, MO_32); + tcg_gen_trunc_i64_i32(tcg_elt3, tcg_elt); + + do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst); + + do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst); + + tcg_gen_extu_i32_i64(tcg_res, tcg_elt1); + tcg_temp_free_i32(tcg_elt1); + tcg_temp_free_i32(tcg_elt2); + tcg_temp_free_i32(tcg_elt3); + tcg_temp_free_ptr(fpst); + } + + tcg_temp_free_i64(tcg_elt); + + /* Now truncate the result to the width required for the final output */ + if (opcode == 0x03) { + /* SADDLV, UADDLV: result is 2*esize */ + size++; + } + + switch (size) { + case 0: + tcg_gen_ext8u_i64(tcg_res, tcg_res); + break; + case 1: + tcg_gen_ext16u_i64(tcg_res, tcg_res); + break; + case 2: + tcg_gen_ext32u_i64(tcg_res, tcg_res); + break; + case 3: + break; + default: + g_assert_not_reached(); + } + + write_fp_dreg(s, rd, tcg_res); + tcg_temp_free_i64(tcg_res); +} + +/* C6.3.31 DUP (Element, Vector) + * + * 31 30 29 21 20 16 15 10 9 5 4 0 + * +---+---+-------------------+--------+-------------+------+------+ + * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | + * +---+---+-------------------+--------+-------------+------+------+ + * + * size: encoded in imm5 (see ARM ARM LowestSetBit()) + */ +static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, + int imm5) +{ + int size = ctz32(imm5); + int esize = 8 << size; + int elements = (is_q ? 128 : 64) / esize; + int index, i; + TCGv_i64 tmp; + + if (size > 3 || (size == 3 && !is_q)) { + unallocated_encoding(s); + return; + } + + index = imm5 >> (size + 1); + + tmp = tcg_temp_new_i64(); + read_vec_element(s, tmp, rn, index, size); + + for (i = 0; i < elements; i++) { + write_vec_element(s, tmp, rd, i, size); + } + + if (!is_q) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tmp); +} + +/* C6.3.31 DUP (element, scalar) + * 31 21 20 16 15 10 9 5 4 0 + * +-----------------------+--------+-------------+------+------+ + * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | + * +-----------------------+--------+-------------+------+------+ + */ +static void handle_simd_dupes(DisasContext *s, int rd, int rn, + int imm5) +{ + int size = ctz32(imm5); + int index; + TCGv_i64 tmp; + + if (size > 3) { + unallocated_encoding(s); + return; + } + + index = imm5 >> (size + 1); + + /* This instruction just extracts the specified element and + * zero-extends it into the bottom of the destination register. + */ + tmp = tcg_temp_new_i64(); + read_vec_element(s, tmp, rn, index, size); + write_fp_dreg(s, rd, tmp); + tcg_temp_free_i64(tmp); +} + +/* C6.3.32 DUP (General) + * + * 31 30 29 21 20 16 15 10 9 5 4 0 + * +---+---+-------------------+--------+-------------+------+------+ + * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | + * +---+---+-------------------+--------+-------------+------+------+ + * + * size: encoded in imm5 (see ARM ARM LowestSetBit()) + */ +static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, + int imm5) +{ + int size = ctz32(imm5); + int esize = 8 << size; + int elements = (is_q ? 128 : 64)/esize; + int i = 0; + + if (size > 3 || ((size == 3) && !is_q)) { + unallocated_encoding(s); + return; + } + for (i = 0; i < elements; i++) { + write_vec_element(s, cpu_reg(s, rn), rd, i, size); + } + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* C6.3.150 INS (Element) + * + * 31 21 20 16 15 14 11 10 9 5 4 0 + * +-----------------------+--------+------------+---+------+------+ + * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | + * +-----------------------+--------+------------+---+------+------+ + * + * size: encoded in imm5 (see ARM ARM LowestSetBit()) + * index: encoded in imm5<4:size+1> + */ +static void handle_simd_inse(DisasContext *s, int rd, int rn, + int imm4, int imm5) +{ + int size = ctz32(imm5); + int src_index, dst_index; + TCGv_i64 tmp; + + if (size > 3) { + unallocated_encoding(s); + return; + } + dst_index = extract32(imm5, 1+size, 5); + src_index = extract32(imm4, size, 4); + + tmp = tcg_temp_new_i64(); + + read_vec_element(s, tmp, rn, src_index, size); + write_vec_element(s, tmp, rd, dst_index, size); + + tcg_temp_free_i64(tmp); +} + + +/* C6.3.151 INS (General) + * + * 31 21 20 16 15 10 9 5 4 0 + * +-----------------------+--------+-------------+------+------+ + * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | + * +-----------------------+--------+-------------+------+------+ + * + * size: encoded in imm5 (see ARM ARM LowestSetBit()) + * index: encoded in imm5<4:size+1> + */ +static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) +{ + int size = ctz32(imm5); + int idx; + + if (size > 3) { + unallocated_encoding(s); + return; + } + + idx = extract32(imm5, 1 + size, 4 - size); + write_vec_element(s, cpu_reg(s, rn), rd, idx, size); +} + +/* + * C6.3.321 UMOV (General) + * C6.3.237 SMOV (General) + * + * 31 30 29 21 20 16 15 12 10 9 5 4 0 + * +---+---+-------------------+--------+-------------+------+------+ + * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | + * +---+---+-------------------+--------+-------------+------+------+ + * + * U: unsigned when set + * size: encoded in imm5 (see ARM ARM LowestSetBit()) + */ +static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, + int rn, int rd, int imm5) +{ + int size = ctz32(imm5); + int element; + TCGv_i64 tcg_rd; + + /* Check for UnallocatedEncodings */ + if (is_signed) { + if (size > 2 || (size == 2 && !is_q)) { + unallocated_encoding(s); + return; + } + } else { + if (size > 3 + || (size < 3 && is_q) + || (size == 3 && !is_q)) { + unallocated_encoding(s); + return; + } + } + element = extract32(imm5, 1+size, 4); + + tcg_rd = cpu_reg(s, rd); + read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); + if (is_signed && !is_q) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } +} + +/* C3.6.5 AdvSIMD copy + * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 + * +---+---+----+-----------------+------+---+------+---+------+------+ + * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | + * +---+---+----+-----------------+------+---+------+---+------+------+ + */ +static void disas_simd_copy(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int imm4 = extract32(insn, 11, 4); + int op = extract32(insn, 29, 1); + int is_q = extract32(insn, 30, 1); + int imm5 = extract32(insn, 16, 5); + + if (op) { + if (is_q) { + /* INS (element) */ + handle_simd_inse(s, rd, rn, imm4, imm5); + } else { + unallocated_encoding(s); + } + } else { + switch (imm4) { + case 0: + /* DUP (element - vector) */ + handle_simd_dupe(s, is_q, rd, rn, imm5); + break; + case 1: + /* DUP (general) */ + handle_simd_dupg(s, is_q, rd, rn, imm5); + break; + case 3: + if (is_q) { + /* INS (general) */ + handle_simd_insg(s, rd, rn, imm5); + } else { + unallocated_encoding(s); + } + break; + case 5: + case 7: + /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ + handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); + break; + default: + unallocated_encoding(s); + break; + } + } +} + +/* C3.6.6 AdvSIMD modified immediate + * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 + * +---+---+----+---------------------+-----+-------+----+---+-------+------+ + * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | + * +---+---+----+---------------------+-----+-------+----+---+-------+------+ + * + * There are a number of operations that can be carried out here: + * MOVI - move (shifted) imm into register + * MVNI - move inverted (shifted) imm into register + * ORR - bitwise OR of (shifted) imm with register + * BIC - bitwise clear of (shifted) imm with register + */ +static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int cmode = extract32(insn, 12, 4); + int cmode_3_1 = extract32(cmode, 1, 3); + int cmode_0 = extract32(cmode, 0, 1); + int o2 = extract32(insn, 11, 1); + uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); + bool is_neg = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + uint64_t imm = 0; + TCGv_i64 tcg_rd, tcg_imm; + int i; + + if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { + unallocated_encoding(s); + return; + } + + /* See AdvSIMDExpandImm() in ARM ARM */ + switch (cmode_3_1) { + case 0: /* Replicate(Zeros(24):imm8, 2) */ + case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */ + case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */ + case 3: /* Replicate(imm8:Zeros(24), 2) */ + { + int shift = cmode_3_1 * 8; + imm = bitfield_replicate(abcdefgh << shift, 32); + break; + } + case 4: /* Replicate(Zeros(8):imm8, 4) */ + case 5: /* Replicate(imm8:Zeros(8), 4) */ + { + int shift = (cmode_3_1 & 0x1) * 8; + imm = bitfield_replicate(abcdefgh << shift, 16); + break; + } + case 6: + if (cmode_0) { + /* Replicate(Zeros(8):imm8:Ones(16), 2) */ + imm = (abcdefgh << 16) | 0xffff; + } else { + /* Replicate(Zeros(16):imm8:Ones(8), 2) */ + imm = (abcdefgh << 8) | 0xff; + } + imm = bitfield_replicate(imm, 32); + break; + case 7: + if (!cmode_0 && !is_neg) { + imm = bitfield_replicate(abcdefgh, 8); + } else if (!cmode_0 && is_neg) { + int i; + imm = 0; + for (i = 0; i < 8; i++) { + if ((abcdefgh) & (1 << i)) { + imm |= 0xffULL << (i * 8); + } + } + } else if (cmode_0) { + if (is_neg) { + imm = (abcdefgh & 0x3f) << 48; + if (abcdefgh & 0x80) { + imm |= 0x8000000000000000ULL; + } + if (abcdefgh & 0x40) { + imm |= 0x3fc0000000000000ULL; + } else { + imm |= 0x4000000000000000ULL; + } + } else { + imm = (abcdefgh & 0x3f) << 19; + if (abcdefgh & 0x80) { + imm |= 0x80000000; + } + if (abcdefgh & 0x40) { + imm |= 0x3e000000; + } else { + imm |= 0x40000000; + } + imm |= (imm << 32); + } + } + break; + } + + if (cmode_3_1 != 7 && is_neg) { + imm = ~imm; + } + + tcg_imm = tcg_const_i64(imm); + tcg_rd = new_tmp_a64(s); + + for (i = 0; i < 2; i++) { + int foffs = i ? fp_reg_hi_offset(rd) : fp_reg_offset(rd, MO_64); + + if (i == 1 && !is_q) { + /* non-quad ops clear high half of vector */ + tcg_gen_movi_i64(tcg_rd, 0); + } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) { + tcg_gen_ld_i64(tcg_rd, cpu_env, foffs); + if (is_neg) { + /* AND (BIC) */ + tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm); + } else { + /* ORR */ + tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm); + } + } else { + /* MOVI */ + tcg_gen_mov_i64(tcg_rd, tcg_imm); + } + tcg_gen_st_i64(tcg_rd, cpu_env, foffs); + } + + tcg_temp_free_i64(tcg_imm); +} + +/* C3.6.7 AdvSIMD scalar copy + * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 + * +-----+----+-----------------+------+---+------+---+------+------+ + * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | + * +-----+----+-----------------+------+---+------+---+------+------+ + */ +static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int imm4 = extract32(insn, 11, 4); + int imm5 = extract32(insn, 16, 5); + int op = extract32(insn, 29, 1); + + if (op != 0 || imm4 != 0) { + unallocated_encoding(s); + return; + } + + /* DUP (element, scalar) */ + handle_simd_dupes(s, rd, rn, imm5); +} + +/* C3.6.8 AdvSIMD scalar pairwise + * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +-----+---+-----------+------+-----------+--------+-----+------+------+ + * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | + * +-----+---+-----------+------+-----------+--------+-----+------+------+ + */ +static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) +{ + int u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + TCGv_ptr fpst; + + /* For some ops (the FP ones), size[1] is part of the encoding. + * For ADDP strictly it is not but size[1] is always 1 for valid + * encodings. + */ + opcode |= (extract32(size, 1, 1) << 5); + + switch (opcode) { + case 0x3b: /* ADDP */ + if (u || size != 3) { + unallocated_encoding(s); + return; + } + TCGV_UNUSED_PTR(fpst); + break; + case 0xc: /* FMAXNMP */ + case 0xd: /* FADDP */ + case 0xf: /* FMAXP */ + case 0x2c: /* FMINNMP */ + case 0x2f: /* FMINP */ + /* FP op, size[0] is 32 or 64 bit */ + if (!u) { + unallocated_encoding(s); + return; + } + size = extract32(size, 0, 1) ? 3 : 2; + fpst = get_fpstatus_ptr(); + break; + default: + unallocated_encoding(s); + return; + } + + if (size == 3) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, 0, MO_64); + read_vec_element(s, tcg_op2, rn, 1, MO_64); + + switch (opcode) { + case 0x3b: /* ADDP */ + tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); + break; + case 0xc: /* FMAXNMP */ + gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0xd: /* FADDP */ + gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0xf: /* FMAXP */ + gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2c: /* FMINNMP */ + gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2f: /* FMINP */ + gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res); + } else { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op1, rn, 0, MO_32); + read_vec_element_i32(s, tcg_op2, rn, 1, MO_32); + + switch (opcode) { + case 0xc: /* FMAXNMP */ + gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0xd: /* FADDP */ + gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0xf: /* FMAXP */ + gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2c: /* FMINNMP */ + gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2f: /* FMINP */ + gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + write_fp_sreg(s, rd, tcg_res); + + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i32(tcg_res); + } + + if (!TCGV_IS_UNUSED_PTR(fpst)) { + tcg_temp_free_ptr(fpst); + } +} + +/* + * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) + * + * This code is handles the common shifting code and is used by both + * the vector and scalar code. + */ +static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, + TCGv_i64 tcg_rnd, bool accumulate, + bool is_u, int size, int shift) +{ + bool extended_result = false; + bool round = !TCGV_IS_UNUSED_I64(tcg_rnd); + int ext_lshift = 0; + TCGv_i64 tcg_src_hi; + + if (round && size == 3) { + extended_result = true; + ext_lshift = 64 - shift; + tcg_src_hi = tcg_temp_new_i64(); + } else if (shift == 64) { + if (!accumulate && is_u) { + /* result is zero */ + tcg_gen_movi_i64(tcg_res, 0); + return; + } + } + + /* Deal with the rounding step */ + if (round) { + if (extended_result) { + TCGv_i64 tcg_zero = tcg_const_i64(0); + if (!is_u) { + /* take care of sign extending tcg_res */ + tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); + tcg_gen_add2_i64(tcg_src, tcg_src_hi, + tcg_src, tcg_src_hi, + tcg_rnd, tcg_zero); + } else { + tcg_gen_add2_i64(tcg_src, tcg_src_hi, + tcg_src, tcg_zero, + tcg_rnd, tcg_zero); + } + tcg_temp_free_i64(tcg_zero); + } else { + tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); + } + } + + /* Now do the shift right */ + if (round && extended_result) { + /* extended case, >64 bit precision required */ + if (ext_lshift == 0) { + /* special case, only high bits matter */ + tcg_gen_mov_i64(tcg_src, tcg_src_hi); + } else { + tcg_gen_shri_i64(tcg_src, tcg_src, shift); + tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); + tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); + } + } else { + if (is_u) { + if (shift == 64) { + /* essentially shifting in 64 zeros */ + tcg_gen_movi_i64(tcg_src, 0); + } else { + tcg_gen_shri_i64(tcg_src, tcg_src, shift); + } + } else { + if (shift == 64) { + /* effectively extending the sign-bit */ + tcg_gen_sari_i64(tcg_src, tcg_src, 63); + } else { + tcg_gen_sari_i64(tcg_src, tcg_src, shift); + } + } + } + + if (accumulate) { + tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); + } else { + tcg_gen_mov_i64(tcg_res, tcg_src); + } + + if (extended_result) { + tcg_temp_free_i64(tcg_src_hi); + } +} + +/* Common SHL/SLI - Shift left with an optional insert */ +static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src, + bool insert, int shift) +{ + if (insert) { /* SLI */ + tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift); + } else { /* SHL */ + tcg_gen_shli_i64(tcg_res, tcg_src, shift); + } +} + +/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ +static void handle_scalar_simd_shri(DisasContext *s, + bool is_u, int immh, int immb, + int opcode, int rn, int rd) +{ + const int size = 3; + int immhb = immh << 3 | immb; + int shift = 2 * (8 << size) - immhb; + bool accumulate = false; + bool round = false; + TCGv_i64 tcg_rn; + TCGv_i64 tcg_rd; + TCGv_i64 tcg_round; + + if (!extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0x02: /* SSRA / USRA (accumulate) */ + accumulate = true; + break; + case 0x04: /* SRSHR / URSHR (rounding) */ + round = true; + break; + case 0x06: /* SRSRA / URSRA (accum + rounding) */ + accumulate = round = true; + break; + } + + if (round) { + uint64_t round_const = 1ULL << (shift - 1); + tcg_round = tcg_const_i64(round_const); + } else { + TCGV_UNUSED_I64(tcg_round); + } + + tcg_rn = read_fp_dreg(s, rn); + tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); + + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + accumulate, is_u, size, shift); + + write_fp_dreg(s, rd, tcg_rd); + + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rd); + if (round) { + tcg_temp_free_i64(tcg_round); + } +} + +/* SHL/SLI - Scalar shift left */ +static void handle_scalar_simd_shli(DisasContext *s, bool insert, + int immh, int immb, int opcode, + int rn, int rd) +{ + int size = 32 - clz32(immh) - 1; + int immhb = immh << 3 | immb; + int shift = immhb - (8 << size); + TCGv_i64 tcg_rn = new_tmp_a64(s); + TCGv_i64 tcg_rd = new_tmp_a64(s); + + if (!extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + tcg_rn = read_fp_dreg(s, rn); + tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); + + handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift); + + write_fp_dreg(s, rd, tcg_rd); + + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rd); +} + +/* C3.6.9 AdvSIMD scalar shift by immediate + * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 + * +-----+---+-------------+------+------+--------+---+------+------+ + * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | + * +-----+---+-------------+------+------+--------+---+------+------+ + * + * This is the scalar version so it works on a fixed sized registers + */ +static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int opcode = extract32(insn, 11, 5); + int immb = extract32(insn, 16, 3); + int immh = extract32(insn, 19, 4); + bool is_u = extract32(insn, 29, 1); + + switch (opcode) { + case 0x00: /* SSHR / USHR */ + case 0x02: /* SSRA / USRA */ + case 0x04: /* SRSHR / URSHR */ + case 0x06: /* SRSRA / URSRA */ + handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); + break; + case 0x0a: /* SHL / SLI */ + handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); + break; + default: + unsupported_encoding(s, insn); + break; + } +} + +/* C3.6.10 AdvSIMD scalar three different + * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 + * +-----+---+-----------+------+---+------+--------+-----+------+------+ + * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | + * +-----+---+-----------+------+---+------+--------+-----+------+------+ + */ +static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) +{ + bool is_u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 4); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + if (is_u) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0x9: /* SQDMLAL, SQDMLAL2 */ + case 0xb: /* SQDMLSL, SQDMLSL2 */ + case 0xd: /* SQDMULL, SQDMULL2 */ + if (size == 0 || size == 3) { + unallocated_encoding(s); + return; + } + break; + default: + unallocated_encoding(s); + return; + } + + if (size == 2) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); + read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); + + tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); + gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res); + + switch (opcode) { + case 0xd: /* SQDMULL, SQDMULL2 */ + break; + case 0xb: /* SQDMLSL, SQDMLSL2 */ + tcg_gen_neg_i64(tcg_res, tcg_res); + /* fall through */ + case 0x9: /* SQDMLAL, SQDMLAL2 */ + read_vec_element(s, tcg_op1, rd, 0, MO_64); + gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, + tcg_res, tcg_op1); + break; + default: + g_assert_not_reached(); + } + + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res); + } else { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element_i32(s, tcg_op1, rn, 0, MO_16); + read_vec_element_i32(s, tcg_op2, rm, 0, MO_16); + + gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); + gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); + + switch (opcode) { + case 0xd: /* SQDMULL, SQDMULL2 */ + break; + case 0xb: /* SQDMLSL, SQDMLSL2 */ + gen_helper_neon_negl_u32(tcg_res, tcg_res); + /* fall through */ + case 0x9: /* SQDMLAL, SQDMLAL2 */ + { + TCGv_i64 tcg_op3 = tcg_temp_new_i64(); + read_vec_element(s, tcg_op3, rd, 0, MO_32); + gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, + tcg_res, tcg_op3); + tcg_temp_free_i64(tcg_op3); + break; + } + default: + g_assert_not_reached(); + } + + tcg_gen_ext32u_i64(tcg_res, tcg_res); + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i64(tcg_res); + } +} + +static void handle_3same_64(DisasContext *s, int opcode, bool u, + TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) +{ + /* Handle 64x64->64 opcodes which are shared between the scalar + * and vector 3-same groups. We cover every opcode where size == 3 + * is valid in either the three-reg-same (integer, not pairwise) + * or scalar-three-reg-same groups. (Some opcodes are not yet + * implemented.) + */ + TCGCond cond; + + switch (opcode) { + case 0x1: /* SQADD */ + if (u) { + gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } else { + gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } + break; + case 0x5: /* SQSUB */ + if (u) { + gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } else { + gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } + break; + case 0x6: /* CMGT, CMHI */ + /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0. + * We implement this using setcond (test) and then negating. + */ + cond = u ? TCG_COND_GTU : TCG_COND_GT; + do_cmop: + tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); + tcg_gen_neg_i64(tcg_rd, tcg_rd); + break; + case 0x7: /* CMGE, CMHS */ + cond = u ? TCG_COND_GEU : TCG_COND_GE; + goto do_cmop; + case 0x11: /* CMTST, CMEQ */ + if (u) { + cond = TCG_COND_EQ; + goto do_cmop; + } + /* CMTST : test is "if (X & Y != 0)". */ + tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); + tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0); + tcg_gen_neg_i64(tcg_rd, tcg_rd); + break; + case 0x8: /* SSHL, USHL */ + if (u) { + gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm); + } else { + gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm); + } + break; + case 0x9: /* SQSHL, UQSHL */ + if (u) { + gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } else { + gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } + break; + case 0xa: /* SRSHL, URSHL */ + if (u) { + gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); + } else { + gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); + } + break; + case 0xb: /* SQRSHL, UQRSHL */ + if (u) { + gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } else { + gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } + break; + case 0x10: /* ADD, SUB */ + if (u) { + tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); + } else { + tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); + } + break; + default: + g_assert_not_reached(); + } +} + +/* Handle the 3-same-operands float operations; shared by the scalar + * and vector encodings. The caller must filter out any encodings + * not allocated for the encoding it is dealing with. + */ +static void handle_3same_float(DisasContext *s, int size, int elements, + int fpopcode, int rd, int rn, int rm) +{ + int pass; + TCGv_ptr fpst = get_fpstatus_ptr(); + + for (pass = 0; pass < elements; pass++) { + if (size) { + /* Double */ + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + switch (fpopcode) { + case 0x39: /* FMLS */ + /* As usual for ARM, separate negation for fused multiply-add */ + gen_helper_vfp_negd(tcg_op1, tcg_op1); + /* fall through */ + case 0x19: /* FMLA */ + read_vec_element(s, tcg_res, rd, pass, MO_64); + gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, + tcg_res, fpst); + break; + case 0x18: /* FMAXNM */ + gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1a: /* FADD */ + gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1b: /* FMULX */ + gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1c: /* FCMEQ */ + gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1e: /* FMAX */ + gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1f: /* FRECPS */ + gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x38: /* FMINNM */ + gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3a: /* FSUB */ + gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3e: /* FMIN */ + gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3f: /* FRSQRTS */ + gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5b: /* FMUL */ + gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5c: /* FCMGE */ + gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5d: /* FACGE */ + gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5f: /* FDIV */ + gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7a: /* FABD */ + gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_absd(tcg_res, tcg_res); + break; + case 0x7c: /* FCMGT */ + gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7d: /* FACGT */ + gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + write_vec_element(s, tcg_res, rd, pass, MO_64); + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } else { + /* Single */ + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); + read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); + + switch (fpopcode) { + case 0x39: /* FMLS */ + /* As usual for ARM, separate negation for fused multiply-add */ + gen_helper_vfp_negs(tcg_op1, tcg_op1); + /* fall through */ + case 0x19: /* FMLA */ + read_vec_element_i32(s, tcg_res, rd, pass, MO_32); + gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, + tcg_res, fpst); + break; + case 0x1a: /* FADD */ + gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1b: /* FMULX */ + gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1c: /* FCMEQ */ + gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1e: /* FMAX */ + gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1f: /* FRECPS */ + gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x18: /* FMAXNM */ + gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x38: /* FMINNM */ + gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3a: /* FSUB */ + gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3e: /* FMIN */ + gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3f: /* FRSQRTS */ + gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5b: /* FMUL */ + gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5c: /* FCMGE */ + gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5d: /* FACGE */ + gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5f: /* FDIV */ + gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7a: /* FABD */ + gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_abss(tcg_res, tcg_res); + break; + case 0x7c: /* FCMGT */ + gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7d: /* FACGT */ + gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + if (elements == 1) { + /* scalar single so clear high part */ + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); + write_vec_element(s, tcg_tmp, rd, pass, MO_64); + tcg_temp_free_i64(tcg_tmp); + } else { + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + } + + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + } + } + + tcg_temp_free_ptr(fpst); + + if ((elements << size) < 4) { + /* scalar, or non-quad vector op */ + clear_vec_high(s, rd); + } +} + +/* C3.6.11 AdvSIMD scalar three same + * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 + * +-----+---+-----------+------+---+------+--------+---+------+------+ + * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | + * +-----+---+-----------+------+---+------+--------+---+------+------+ + */ +static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int opcode = extract32(insn, 11, 5); + int rm = extract32(insn, 16, 5); + int size = extract32(insn, 22, 2); + bool u = extract32(insn, 29, 1); + TCGv_i64 tcg_rd; + + if (opcode >= 0x18) { + /* Floating point: U, size[1] and opcode indicate operation */ + int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); + switch (fpopcode) { + case 0x1b: /* FMULX */ + case 0x1f: /* FRECPS */ + case 0x3f: /* FRSQRTS */ + case 0x5d: /* FACGE */ + case 0x7d: /* FACGT */ + case 0x1c: /* FCMEQ */ + case 0x5c: /* FCMGE */ + case 0x7c: /* FCMGT */ + case 0x7a: /* FABD */ + break; + default: + unallocated_encoding(s); + return; + } + + handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); + return; + } + + switch (opcode) { + case 0x1: /* SQADD, UQADD */ + case 0x5: /* SQSUB, UQSUB */ + case 0x9: /* SQSHL, UQSHL */ + case 0xb: /* SQRSHL, UQRSHL */ + break; + case 0x8: /* SSHL, USHL */ + case 0xa: /* SRSHL, URSHL */ + case 0x6: /* CMGT, CMHI */ + case 0x7: /* CMGE, CMHS */ + case 0x11: /* CMTST, CMEQ */ + case 0x10: /* ADD, SUB (vector) */ + if (size != 3) { + unallocated_encoding(s); + return; + } + break; + case 0x16: /* SQDMULH, SQRDMULH (vector) */ + if (size != 1 && size != 2) { + unallocated_encoding(s); + return; + } + break; + default: + unallocated_encoding(s); + return; + } + + tcg_rd = tcg_temp_new_i64(); + + if (size == 3) { + TCGv_i64 tcg_rn = read_fp_dreg(s, rn); + TCGv_i64 tcg_rm = read_fp_dreg(s, rm); + + handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rm); + } else { + /* Do a single operation on the lowest element in the vector. + * We use the standard Neon helpers and rely on 0 OP 0 == 0 with + * no side effects for all these operations. + * OPTME: special-purpose helpers would avoid doing some + * unnecessary work in the helper for the 8 and 16 bit cases. + */ + NeonGenTwoOpEnvFn *genenvfn; + TCGv_i32 tcg_rn = tcg_temp_new_i32(); + TCGv_i32 tcg_rm = tcg_temp_new_i32(); + TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_rn, rn, 0, size); + read_vec_element_i32(s, tcg_rm, rm, 0, size); + + switch (opcode) { + case 0x1: /* SQADD, UQADD */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, + { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, + { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x5: /* SQSUB, UQSUB */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, + { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, + { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x9: /* SQSHL, UQSHL */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, + { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, + { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0xb: /* SQRSHL, UQRSHL */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, + { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, + { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x16: /* SQDMULH, SQRDMULH */ + { + static NeonGenTwoOpEnvFn * const fns[2][2] = { + { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, + { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, + }; + assert(size == 1 || size == 2); + genenvfn = fns[size - 1][u]; + break; + } + default: + g_assert_not_reached(); + } + + genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm); + tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); + tcg_temp_free_i32(tcg_rd32); + tcg_temp_free_i32(tcg_rn); + tcg_temp_free_i32(tcg_rm); + } + + write_fp_dreg(s, rd, tcg_rd); + + tcg_temp_free_i64(tcg_rd); +} + +static void handle_2misc_64(DisasContext *s, int opcode, bool u, + TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + /* Handle 64->64 opcodes which are shared between the scalar and + * vector 2-reg-misc groups. We cover every integer opcode where size == 3 + * is valid in either group and also the double-precision fp ops. + */ + TCGCond cond; + + switch (opcode) { + case 0x5: /* NOT */ + /* This opcode is shared with CNT and RBIT but we have earlier + * enforced that size == 3 if and only if this is the NOT insn. + */ + tcg_gen_not_i64(tcg_rd, tcg_rn); + break; + case 0xa: /* CMLT */ + /* 64 bit integer comparison against zero, result is + * test ? (2^64 - 1) : 0. We implement via setcond(!test) and + * subtracting 1. + */ + cond = TCG_COND_LT; + do_cmop: + tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0); + tcg_gen_neg_i64(tcg_rd, tcg_rd); + break; + case 0x8: /* CMGT, CMGE */ + cond = u ? TCG_COND_GE : TCG_COND_GT; + goto do_cmop; + case 0x9: /* CMEQ, CMLE */ + cond = u ? TCG_COND_LE : TCG_COND_EQ; + goto do_cmop; + case 0xb: /* ABS, NEG */ + if (u) { + tcg_gen_neg_i64(tcg_rd, tcg_rn); + } else { + TCGv_i64 tcg_zero = tcg_const_i64(0); + tcg_gen_neg_i64(tcg_rd, tcg_rn); + tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero, + tcg_rn, tcg_rd); + tcg_temp_free_i64(tcg_zero); + } + break; + case 0x2f: /* FABS */ + gen_helper_vfp_absd(tcg_rd, tcg_rn); + break; + case 0x6f: /* FNEG */ + gen_helper_vfp_negd(tcg_rd, tcg_rn); + break; + default: + g_assert_not_reached(); + } +} + +static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, + bool is_scalar, bool is_u, bool is_q, + int size, int rn, int rd) +{ + bool is_double = (size == 3); + TCGv_ptr fpst = get_fpstatus_ptr(); + + if (is_double) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_zero = tcg_const_i64(0); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + NeonGenTwoDoubleOPFn *genfn; + bool swap = false; + int pass; + + switch (opcode) { + case 0x2e: /* FCMLT (zero) */ + swap = true; + /* fallthrough */ + case 0x2c: /* FCMGT (zero) */ + genfn = gen_helper_neon_cgt_f64; + break; + case 0x2d: /* FCMEQ (zero) */ + genfn = gen_helper_neon_ceq_f64; + break; + case 0x6d: /* FCMLE (zero) */ + swap = true; + /* fall through */ + case 0x6c: /* FCMGE (zero) */ + genfn = gen_helper_neon_cge_f64; + break; + default: + g_assert_not_reached(); + } + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + read_vec_element(s, tcg_op, rn, pass, MO_64); + if (swap) { + genfn(tcg_res, tcg_zero, tcg_op, fpst); + } else { + genfn(tcg_res, tcg_op, tcg_zero, fpst); + } + write_vec_element(s, tcg_res, rd, pass, MO_64); + } + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_zero); + tcg_temp_free_i64(tcg_op); + } else { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_zero = tcg_const_i32(0); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + NeonGenTwoSingleOPFn *genfn; + bool swap = false; + int pass, maxpasses; + + switch (opcode) { + case 0x2e: /* FCMLT (zero) */ + swap = true; + /* fall through */ + case 0x2c: /* FCMGT (zero) */ + genfn = gen_helper_neon_cgt_f32; + break; + case 0x2d: /* FCMEQ (zero) */ + genfn = gen_helper_neon_ceq_f32; + break; + case 0x6d: /* FCMLE (zero) */ + swap = true; + /* fall through */ + case 0x6c: /* FCMGE (zero) */ + genfn = gen_helper_neon_cge_f32; + break; + default: + g_assert_not_reached(); + } + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + for (pass = 0; pass < maxpasses; pass++) { + read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + if (swap) { + genfn(tcg_res, tcg_zero, tcg_op, fpst); + } else { + genfn(tcg_res, tcg_op, tcg_zero, fpst); + } + if (is_scalar) { + write_fp_sreg(s, rd, tcg_res); + } else { + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + } + } + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_zero); + tcg_temp_free_i32(tcg_op); + if (!is_q && !is_scalar) { + clear_vec_high(s, rd); + } + } + + tcg_temp_free_ptr(fpst); +} + +/* C3.6.12 AdvSIMD scalar two reg misc + * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +-----+---+-----------+------+-----------+--------+-----+------+------+ + * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | + * +-----+---+-----------+------+-----------+--------+-----+------+------+ + */ +static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int opcode = extract32(insn, 12, 5); + int size = extract32(insn, 22, 2); + bool u = extract32(insn, 29, 1); + + switch (opcode) { + case 0xa: /* CMLT */ + if (u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x8: /* CMGT, CMGE */ + case 0x9: /* CMEQ, CMLE */ + case 0xb: /* ABS, NEG */ + if (size != 3) { + unallocated_encoding(s); + return; + } + break; + case 0xc ... 0xf: + case 0x16 ... 0x1d: + case 0x1f: + /* Floating point: U, size[1] and opcode indicate operation; + * size[0] indicates single or double precision. + */ + opcode |= (extract32(size, 1, 1) << 5) | (u << 6); + size = extract32(size, 0, 1) ? 3 : 2; + switch (opcode) { + case 0x2c: /* FCMGT (zero) */ + case 0x2d: /* FCMEQ (zero) */ + case 0x2e: /* FCMLT (zero) */ + case 0x6c: /* FCMGE (zero) */ + case 0x6d: /* FCMLE (zero) */ + handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); + return; + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x1d: /* SCVTF */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + case 0x3d: /* FRECPE */ + case 0x3f: /* FRECPX */ + case 0x56: /* FCVTXN, FCVTXN2 */ + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x5d: /* UCVTF */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + case 0x7d: /* FRSQRTE */ + unsupported_encoding(s, insn); + return; + default: + unallocated_encoding(s); + return; + } + break; + default: + /* Other categories of encoding in this class: + * + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64 + * + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2: + * narrowing saturate ops: size 64/32/16 -> 32/16/8 + */ + unsupported_encoding(s, insn); + return; + } + + if (size == 3) { + TCGv_i64 tcg_rn = read_fp_dreg(s, rn); + TCGv_i64 tcg_rd = tcg_temp_new_i64(); + + handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn); + write_fp_dreg(s, rd, tcg_rd); + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_rn); + } else { + /* the 'size might not be 64' ops aren't implemented yet */ + g_assert_not_reached(); + } +} + +/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ +static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, + int immh, int immb, int opcode, int rn, int rd) +{ + int size = 32 - clz32(immh) - 1; + int immhb = immh << 3 | immb; + int shift = 2 * (8 << size) - immhb; + bool accumulate = false; + bool round = false; + int dsize = is_q ? 128 : 64; + int esize = 8 << size; + int elements = dsize/esize; + TCGMemOp memop = size | (is_u ? 0 : MO_SIGN); + TCGv_i64 tcg_rn = new_tmp_a64(s); + TCGv_i64 tcg_rd = new_tmp_a64(s); + TCGv_i64 tcg_round; + int i; + + if (extract32(immh, 3, 1) && !is_q) { + unallocated_encoding(s); + return; + } + + if (size > 3 && !is_q) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0x02: /* SSRA / USRA (accumulate) */ + accumulate = true; + break; + case 0x04: /* SRSHR / URSHR (rounding) */ + round = true; + break; + case 0x06: /* SRSRA / URSRA (accum + rounding) */ + accumulate = round = true; + break; + } + + if (round) { + uint64_t round_const = 1ULL << (shift - 1); + tcg_round = tcg_const_i64(round_const); + } else { + TCGV_UNUSED_I64(tcg_round); + } + + for (i = 0; i < elements; i++) { + read_vec_element(s, tcg_rn, rn, i, memop); + if (accumulate) { + read_vec_element(s, tcg_rd, rd, i, memop); + } + + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + accumulate, is_u, size, shift); + + write_vec_element(s, tcg_rd, rd, i, size); + } + + if (!is_q) { + clear_vec_high(s, rd); + } + + if (round) { + tcg_temp_free_i64(tcg_round); + } +} + +/* SHL/SLI - Vector shift left */ +static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, + int immh, int immb, int opcode, int rn, int rd) +{ + int size = 32 - clz32(immh) - 1; + int immhb = immh << 3 | immb; + int shift = immhb - (8 << size); + int dsize = is_q ? 128 : 64; + int esize = 8 << size; + int elements = dsize/esize; + TCGv_i64 tcg_rn = new_tmp_a64(s); + TCGv_i64 tcg_rd = new_tmp_a64(s); + int i; + + if (extract32(immh, 3, 1) && !is_q) { + unallocated_encoding(s); + return; + } + + if (size > 3 && !is_q) { + unallocated_encoding(s); + return; + } + + for (i = 0; i < elements; i++) { + read_vec_element(s, tcg_rn, rn, i, size); + if (insert) { + read_vec_element(s, tcg_rd, rd, i, size); + } + + handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift); + + write_vec_element(s, tcg_rd, rd, i, size); + } + + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* USHLL/SHLL - Vector shift left with widening */ +static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, + int immh, int immb, int opcode, int rn, int rd) +{ + int size = 32 - clz32(immh) - 1; + int immhb = immh << 3 | immb; + int shift = immhb - (8 << size); + int dsize = 64; + int esize = 8 << size; + int elements = dsize/esize; + TCGv_i64 tcg_rn = new_tmp_a64(s); + TCGv_i64 tcg_rd = new_tmp_a64(s); + int i; + + if (size >= 3) { + unallocated_encoding(s); + return; + } + + /* For the LL variants the store is larger than the load, + * so if rd == rn we would overwrite parts of our input. + * So load everything right now and use shifts in the main loop. + */ + read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); + + for (i = 0; i < elements; i++) { + tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); + ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); + tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); + write_vec_element(s, tcg_rd, rd, i, size + 1); + } +} + + +/* C3.6.14 AdvSIMD shift by immediate + * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 + * +---+---+---+-------------+------+------+--------+---+------+------+ + * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | + * +---+---+---+-------------+------+------+--------+---+------+------+ + */ +static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int opcode = extract32(insn, 11, 5); + int immb = extract32(insn, 16, 3); + int immh = extract32(insn, 19, 4); + bool is_u = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + + switch (opcode) { + case 0x00: /* SSHR / USHR */ + case 0x02: /* SSRA / USRA (accumulate) */ + case 0x04: /* SRSHR / URSHR (rounding) */ + case 0x06: /* SRSRA / URSRA (accum + rounding) */ + handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); + break; + case 0x0a: /* SHL / SLI */ + handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); + break; + case 0x14: /* SSHLL / USHLL */ + handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); + break; + default: + /* We don't currently implement any of the Narrow or saturating shifts; + * nor do we implement the fixed-point conversions in this + * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU). + */ + unsupported_encoding(s, insn); + return; + } +} + +/* Generate code to do a "long" addition or subtraction, ie one done in + * TCGv_i64 on vector lanes twice the width specified by size. + */ +static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res, + TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) +{ + static NeonGenTwo64OpFn * const fns[3][2] = { + { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, + { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, + { tcg_gen_add_i64, tcg_gen_sub_i64 }, + }; + NeonGenTwo64OpFn *genfn; + assert(size < 3); + + genfn = fns[size][is_sub]; + genfn(tcg_res, tcg_op1, tcg_op2); +} + +static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, + int opcode, int rd, int rn, int rm) +{ + /* 3-reg-different widening insns: 64 x 64 -> 128 */ + TCGv_i64 tcg_res[2]; + int pass, accop; + + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = tcg_temp_new_i64(); + + /* Does this op do an adding accumulate, a subtracting accumulate, + * or no accumulate at all? + */ + switch (opcode) { + case 5: + case 8: + case 9: + accop = 1; + break; + case 10: + case 11: + accop = -1; + break; + default: + accop = 0; + break; + } + + if (accop != 0) { + read_vec_element(s, tcg_res[0], rd, 0, MO_64); + read_vec_element(s, tcg_res[1], rd, 1, MO_64); + } + + /* size == 2 means two 32x32->64 operations; this is worth special + * casing because we can generally handle it inline. + */ + if (size == 2) { + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_passres; + TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN); + + int elt = pass + is_q * 2; + + read_vec_element(s, tcg_op1, rn, elt, memop); + read_vec_element(s, tcg_op2, rm, elt, memop); + + if (accop == 0) { + tcg_passres = tcg_res[pass]; + } else { + tcg_passres = tcg_temp_new_i64(); + } + + switch (opcode) { + case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ + tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2); + break; + case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ + tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2); + break; + case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ + case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ + { + TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(); + TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(); + + tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2); + tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1); + tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, + tcg_passres, + tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); + tcg_temp_free_i64(tcg_tmp1); + tcg_temp_free_i64(tcg_tmp2); + break; + } + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ + tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); + break; + case 9: /* SQDMLAL, SQDMLAL2 */ + case 11: /* SQDMLSL, SQDMLSL2 */ + case 13: /* SQDMULL, SQDMULL2 */ + tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, + tcg_passres, tcg_passres); + break; + default: + g_assert_not_reached(); + } + + if (opcode == 9 || opcode == 11) { + /* saturating accumulate ops */ + if (accop < 0) { + tcg_gen_neg_i64(tcg_passres, tcg_passres); + } + gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, + tcg_res[pass], tcg_passres); + } else if (accop > 0) { + tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); + } else if (accop < 0) { + tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); + } + + if (accop != 0) { + tcg_temp_free_i64(tcg_passres); + } + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } + } else { + /* size 0 or 1, generally helper functions */ + for (pass = 0; pass < 2; pass++) { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i64 tcg_passres; + int elt = pass + is_q * 2; + + read_vec_element_i32(s, tcg_op1, rn, elt, MO_32); + read_vec_element_i32(s, tcg_op2, rm, elt, MO_32); + + if (accop == 0) { + tcg_passres = tcg_res[pass]; + } else { + tcg_passres = tcg_temp_new_i64(); + } + + switch (opcode) { + case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ + case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ + { + TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(); + static NeonGenWidenFn * const widenfns[2][2] = { + { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, + { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, + }; + NeonGenWidenFn *widenfn = widenfns[size][is_u]; + + widenfn(tcg_op2_64, tcg_op2); + widenfn(tcg_passres, tcg_op1); + gen_neon_addl(size, (opcode == 2), tcg_passres, + tcg_passres, tcg_op2_64); + tcg_temp_free_i64(tcg_op2_64); + break; + } + case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ + case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ + if (size == 0) { + if (is_u) { + gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2); + } else { + gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2); + } + } else { + if (is_u) { + gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2); + } else { + gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2); + } + } + break; + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ + if (size == 0) { + if (is_u) { + gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2); + } else { + gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2); + } + } else { + if (is_u) { + gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2); + } else { + gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); + } + } + break; + case 9: /* SQDMLAL, SQDMLAL2 */ + case 11: /* SQDMLSL, SQDMLSL2 */ + case 13: /* SQDMULL, SQDMULL2 */ + assert(size == 1); + gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, + tcg_passres, tcg_passres); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + + if (accop != 0) { + if (opcode == 9 || opcode == 11) { + /* saturating accumulate ops */ + if (accop < 0) { + gen_helper_neon_negl_u32(tcg_passres, tcg_passres); + } + gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, + tcg_res[pass], + tcg_passres); + } else { + gen_neon_addl(size, (accop < 0), tcg_res[pass], + tcg_res[pass], tcg_passres); + } + tcg_temp_free_i64(tcg_passres); + } + } + } + + write_vec_element(s, tcg_res[0], rd, 0, MO_64); + write_vec_element(s, tcg_res[1], rd, 1, MO_64); + tcg_temp_free_i64(tcg_res[0]); + tcg_temp_free_i64(tcg_res[1]); +} + +static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, + int opcode, int rd, int rn, int rm) +{ + TCGv_i64 tcg_res[2]; + int part = is_q ? 2 : 0; + int pass; + + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(); + static NeonGenWidenFn * const widenfns[3][2] = { + { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, + { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, + { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, + }; + NeonGenWidenFn *widenfn = widenfns[size][is_u]; + + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); + widenfn(tcg_op2_wide, tcg_op2); + tcg_temp_free_i32(tcg_op2); + tcg_res[pass] = tcg_temp_new_i64(); + gen_neon_addl(size, (opcode == 3), + tcg_res[pass], tcg_op1, tcg_op2_wide); + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2_wide); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } +} + +static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in) +{ + tcg_gen_shri_i64(in, in, 32); + tcg_gen_trunc_i64_i32(res, in); +} + +static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) +{ + tcg_gen_addi_i64(in, in, 1U << 31); + do_narrow_high_u32(res, in); +} + +static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, + int opcode, int rd, int rn, int rm) +{ + TCGv_i32 tcg_res[2]; + int part = is_q ? 2 : 0; + int pass; + + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_wideres = tcg_temp_new_i64(); + static NeonGenNarrowFn * const narrowfns[3][2] = { + { gen_helper_neon_narrow_high_u8, + gen_helper_neon_narrow_round_high_u8 }, + { gen_helper_neon_narrow_high_u16, + gen_helper_neon_narrow_round_high_u16 }, + { do_narrow_high_u32, do_narrow_round_high_u32 }, + }; + NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; + + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + + tcg_res[pass] = tcg_temp_new_i32(); + gennarrow(tcg_res[pass], tcg_wideres); + tcg_temp_free_i64(tcg_wideres); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* C3.6.15 AdvSIMD three different + * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 + * +---+---+---+-----------+------+---+------+--------+-----+------+------+ + * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | + * +---+---+---+-----------+------+---+------+--------+-----+------+------+ + */ +static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) +{ + /* Instructions in this group fall into three basic classes + * (in each case with the operation working on each element in + * the input vectors): + * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra + * 128 bit input) + * (2) wide 64 x 128 -> 128 + * (3) narrowing 128 x 128 -> 64 + * Here we do initial decode, catch unallocated cases and + * dispatch to separate functions for each class. + */ + int is_q = extract32(insn, 30, 1); + int is_u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 4); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + switch (opcode) { + case 1: /* SADDW, SADDW2, UADDW, UADDW2 */ + case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */ + /* 64 x 128 -> 128 */ + if (size == 3) { + unallocated_encoding(s); + return; + } + handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm); + break; + case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */ + case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */ + /* 128 x 128 -> 64 */ + if (size == 3) { + unallocated_encoding(s); + return; + } + handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); + break; + case 14: /* PMULL, PMULL2 */ + if (is_u || size == 1 || size == 2) { + unallocated_encoding(s); + return; + } + unsupported_encoding(s, insn); + break; + case 9: /* SQDMLAL, SQDMLAL2 */ + case 11: /* SQDMLSL, SQDMLSL2 */ + case 13: /* SQDMULL, SQDMULL2 */ + if (is_u || size == 0) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ + case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ + case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ + case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ + /* 64 x 64 -> 128 */ + if (size == 3) { + unallocated_encoding(s); + return; + } + handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); + break; + default: + /* opcode 15 not allocated */ + unallocated_encoding(s); + break; + } +} + +/* Logic op (opcode == 3) subgroup of C3.6.16. */ +static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int rm = extract32(insn, 16, 5); + int size = extract32(insn, 22, 2); + bool is_u = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res[2]; + int pass; + + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = tcg_temp_new_i64(); + + for (pass = 0; pass < (is_q ? 2 : 1); pass++) { + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + if (!is_u) { + switch (size) { + case 0: /* AND */ + tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + case 1: /* BIC */ + tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + case 2: /* ORR */ + tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + case 3: /* ORN */ + tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + } + } else { + if (size != 0) { + /* B* ops need res loaded to operate on */ + read_vec_element(s, tcg_res[pass], rd, pass, MO_64); + } + + switch (size) { + case 0: /* EOR */ + tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + case 1: /* BSL bitwise select */ + tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2); + tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]); + tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1); + break; + case 2: /* BIT, bitwise insert if true */ + tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]); + tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2); + tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + break; + case 3: /* BIF, bitwise insert if false */ + tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]); + tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2); + tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + break; + } + } + } + + write_vec_element(s, tcg_res[0], rd, 0, MO_64); + if (!is_q) { + tcg_gen_movi_i64(tcg_res[1], 0); + } + write_vec_element(s, tcg_res[1], rd, 1, MO_64); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res[0]); + tcg_temp_free_i64(tcg_res[1]); +} + +/* Helper functions for 32 bit comparisons */ +static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) +{ + tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2); +} + +static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) +{ + tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2); +} + +static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) +{ + tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2); +} + +static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) +{ + tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2); +} + +/* Pairwise op subgroup of C3.6.16. + * + * This is called directly or via the handle_3same_float for float pairwise + * operations where the opcode and size are calculated differently. + */ +static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, + int size, int rn, int rm, int rd) +{ + TCGv_ptr fpst; + int pass; + + /* Floating point operations need fpst */ + if (opcode >= 0x58) { + fpst = get_fpstatus_ptr(); + } else { + TCGV_UNUSED_PTR(fpst); + } + + /* These operations work on the concatenated rm:rn, with each pair of + * adjacent elements being operated on to produce an element in the result. + */ + if (size == 3) { + TCGv_i64 tcg_res[2]; + + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + int passreg = (pass == 0) ? rn : rm; + + read_vec_element(s, tcg_op1, passreg, 0, MO_64); + read_vec_element(s, tcg_op2, passreg, 1, MO_64); + tcg_res[pass] = tcg_temp_new_i64(); + + switch (opcode) { + case 0x17: /* ADDP */ + tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + case 0x58: /* FMAXNMP */ + gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x5a: /* FADDP */ + gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x5e: /* FMAXP */ + gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x78: /* FMINNMP */ + gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x7e: /* FMINP */ + gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } + } else { + int maxpass = is_q ? 4 : 2; + TCGv_i32 tcg_res[4]; + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + NeonGenTwoOpFn *genfn = NULL; + int passreg = pass < (maxpass / 2) ? rn : rm; + int passelt = (is_q && (pass & 1)) ? 2 : 0; + + read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); + read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); + tcg_res[pass] = tcg_temp_new_i32(); + + switch (opcode) { + case 0x17: /* ADDP */ + { + static NeonGenTwoOpFn * const fns[3] = { + gen_helper_neon_padd_u8, + gen_helper_neon_padd_u16, + tcg_gen_add_i32, + }; + genfn = fns[size]; + break; + } + case 0x14: /* SMAXP, UMAXP */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, + { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, + { gen_max_s32, gen_max_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x15: /* SMINP, UMINP */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, + { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, + { gen_min_s32, gen_min_u32 }, + }; + genfn = fns[size][u]; + break; + } + /* The FP operations are all on single floats (32 bit) */ + case 0x58: /* FMAXNMP */ + gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x5a: /* FADDP */ + gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x5e: /* FMAXP */ + gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x78: /* FMINNMP */ + gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x7e: /* FMINP */ + gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + /* FP ops called directly, otherwise call now */ + if (genfn) { + genfn(tcg_res[pass], tcg_op1, tcg_op2); + } + + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + } + + for (pass = 0; pass < maxpass; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } + if (!is_q) { + clear_vec_high(s, rd); + } + } + + if (!TCGV_IS_UNUSED_PTR(fpst)) { + tcg_temp_free_ptr(fpst); + } +} + +/* Floating point op subgroup of C3.6.16. */ +static void disas_simd_3same_float(DisasContext *s, uint32_t insn) +{ + /* For floating point ops, the U, size[1] and opcode bits + * together indicate the operation. size[0] indicates single + * or double. + */ + int fpopcode = extract32(insn, 11, 5) + | (extract32(insn, 23, 1) << 5) + | (extract32(insn, 29, 1) << 6); + int is_q = extract32(insn, 30, 1); + int size = extract32(insn, 22, 1); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + int datasize = is_q ? 128 : 64; + int esize = 32 << size; + int elements = datasize / esize; + + if (size == 1 && !is_q) { + unallocated_encoding(s); + return; + } + + switch (fpopcode) { + case 0x58: /* FMAXNMP */ + case 0x5a: /* FADDP */ + case 0x5e: /* FMAXP */ + case 0x78: /* FMINNMP */ + case 0x7e: /* FMINP */ + if (size && !is_q) { + unallocated_encoding(s); + return; + } + handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, + rn, rm, rd); + return; + case 0x1b: /* FMULX */ + case 0x1f: /* FRECPS */ + case 0x3f: /* FRSQRTS */ + case 0x5d: /* FACGE */ + case 0x7d: /* FACGT */ + case 0x19: /* FMLA */ + case 0x39: /* FMLS */ + case 0x18: /* FMAXNM */ + case 0x1a: /* FADD */ + case 0x1c: /* FCMEQ */ + case 0x1e: /* FMAX */ + case 0x38: /* FMINNM */ + case 0x3a: /* FSUB */ + case 0x3e: /* FMIN */ + case 0x5b: /* FMUL */ + case 0x5c: /* FCMGE */ + case 0x5f: /* FDIV */ + case 0x7a: /* FABD */ + case 0x7c: /* FCMGT */ + handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); + return; + default: + unallocated_encoding(s); + return; + } +} + +/* Integer op subgroup of C3.6.16. */ +static void disas_simd_3same_int(DisasContext *s, uint32_t insn) +{ + int is_q = extract32(insn, 30, 1); + int u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 11, 5); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int pass; + + switch (opcode) { + case 0x13: /* MUL, PMUL */ + if (u && size != 0) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x0: /* SHADD, UHADD */ + case 0x2: /* SRHADD, URHADD */ + case 0x4: /* SHSUB, UHSUB */ + case 0xc: /* SMAX, UMAX */ + case 0xd: /* SMIN, UMIN */ + case 0xe: /* SABD, UABD */ + case 0xf: /* SABA, UABA */ + case 0x12: /* MLA, MLS */ + if (size == 3) { + unallocated_encoding(s); + return; + } + break; + case 0x16: /* SQDMULH, SQRDMULH */ + if (size == 0 || size == 3) { + unallocated_encoding(s); + return; + } + break; + default: + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + } + + if (size == 3) { + for (pass = 0; pass < (is_q ? 2 : 1); pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); + + write_vec_element(s, tcg_res, rd, pass, MO_64); + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } + } else { + for (pass = 0; pass < (is_q ? 4 : 2); pass++) { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + NeonGenTwoOpFn *genfn = NULL; + NeonGenTwoOpEnvFn *genenvfn = NULL; + + read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); + read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); + + switch (opcode) { + case 0x0: /* SHADD, UHADD */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, + { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, + { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x1: /* SQADD, UQADD */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, + { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, + { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x2: /* SRHADD, URHADD */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, + { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, + { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x4: /* SHSUB, UHSUB */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, + { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, + { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x5: /* SQSUB, UQSUB */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, + { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, + { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x6: /* CMGT, CMHI */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 }, + { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 }, + { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x7: /* CMGE, CMHS */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 }, + { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 }, + { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x8: /* SSHL, USHL */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 }, + { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 }, + { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x9: /* SQSHL, UQSHL */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, + { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, + { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0xa: /* SRSHL, URSHL */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, + { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, + { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0xb: /* SQRSHL, UQRSHL */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, + { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, + { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0xc: /* SMAX, UMAX */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_max_s8, gen_helper_neon_max_u8 }, + { gen_helper_neon_max_s16, gen_helper_neon_max_u16 }, + { gen_max_s32, gen_max_u32 }, + }; + genfn = fns[size][u]; + break; + } + + case 0xd: /* SMIN, UMIN */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_min_s8, gen_helper_neon_min_u8 }, + { gen_helper_neon_min_s16, gen_helper_neon_min_u16 }, + { gen_min_s32, gen_min_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0xe: /* SABD, UABD */ + case 0xf: /* SABA, UABA */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 }, + { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 }, + { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x10: /* ADD, SUB */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 }, + { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, + { tcg_gen_add_i32, tcg_gen_sub_i32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x11: /* CMTST, CMEQ */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 }, + { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 }, + { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x13: /* MUL, PMUL */ + if (u) { + /* PMUL */ + assert(size == 0); + genfn = gen_helper_neon_mul_p8; + break; + } + /* fall through : MUL */ + case 0x12: /* MLA, MLS */ + { + static NeonGenTwoOpFn * const fns[3] = { + gen_helper_neon_mul_u8, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + }; + genfn = fns[size]; + break; + } + case 0x16: /* SQDMULH, SQRDMULH */ + { + static NeonGenTwoOpEnvFn * const fns[2][2] = { + { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, + { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, + }; + assert(size == 1 || size == 2); + genenvfn = fns[size - 1][u]; + break; + } + default: + g_assert_not_reached(); + } + + if (genenvfn) { + genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2); + } else { + genfn(tcg_res, tcg_op1, tcg_op2); + } + + if (opcode == 0xf || opcode == 0x12) { + /* SABA, UABA, MLA, MLS: accumulating ops */ + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 }, + { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, + { tcg_gen_add_i32, tcg_gen_sub_i32 }, + }; + bool is_sub = (opcode == 0x12 && u); /* MLS */ + + genfn = fns[size][is_sub]; + read_vec_element_i32(s, tcg_op1, rd, pass, MO_32); + genfn(tcg_res, tcg_res, tcg_op1); + } + + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + } + } + + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* C3.6.16 AdvSIMD three same + * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 + * +---+---+---+-----------+------+---+------+--------+---+------+------+ + * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | + * +---+---+---+-----------+------+---+------+--------+---+------+------+ + */ +static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) +{ + int opcode = extract32(insn, 11, 5); + + switch (opcode) { + case 0x3: /* logic ops */ + disas_simd_3same_logic(s, insn); + break; + case 0x17: /* ADDP */ + case 0x14: /* SMAXP, UMAXP */ + case 0x15: /* SMINP, UMINP */ + { + /* Pairwise operations */ + int is_q = extract32(insn, 30, 1); + int u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + if (opcode == 0x17) { + if (u || (size == 3 && !is_q)) { + unallocated_encoding(s); + return; + } + } else { + if (size == 3) { + unallocated_encoding(s); + return; + } + } + handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); + break; + } + case 0x18 ... 0x31: + /* floating point ops, sz[1] and U are part of opcode */ + disas_simd_3same_float(s, insn); + break; + default: + disas_simd_3same_int(s, insn); + break; + } +} + +static void handle_2misc_narrow(DisasContext *s, int opcode, bool u, bool is_q, + int size, int rn, int rd) +{ + /* Handle 2-reg-misc ops which are narrowing (so each 2*size element + * in the source becomes a size element in the destination). + */ + int pass; + TCGv_i32 tcg_res[2]; + int destelt = is_q ? 2 : 0; + + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + NeonGenNarrowFn *genfn = NULL; + NeonGenNarrowEnvFn *genenvfn = NULL; + + read_vec_element(s, tcg_op, rn, pass, MO_64); + tcg_res[pass] = tcg_temp_new_i32(); + + switch (opcode) { + case 0x12: /* XTN, SQXTUN */ + { + static NeonGenNarrowFn * const xtnfns[3] = { + gen_helper_neon_narrow_u8, + gen_helper_neon_narrow_u16, + tcg_gen_trunc_i64_i32, + }; + static NeonGenNarrowEnvFn * const sqxtunfns[3] = { + gen_helper_neon_unarrow_sat8, + gen_helper_neon_unarrow_sat16, + gen_helper_neon_unarrow_sat32, + }; + if (u) { + genenvfn = sqxtunfns[size]; + } else { + genfn = xtnfns[size]; + } + break; + } + case 0x14: /* SQXTN, UQXTN */ + { + static NeonGenNarrowEnvFn * const fns[3][2] = { + { gen_helper_neon_narrow_sat_s8, + gen_helper_neon_narrow_sat_u8 }, + { gen_helper_neon_narrow_sat_s16, + gen_helper_neon_narrow_sat_u16 }, + { gen_helper_neon_narrow_sat_s32, + gen_helper_neon_narrow_sat_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + default: + g_assert_not_reached(); + } + + if (genfn) { + genfn(tcg_res[pass], tcg_op); + } else { + genenvfn(tcg_res[pass], cpu_env, tcg_op); + } + + tcg_temp_free_i64(tcg_op); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } + if (!is_q) { + clear_vec_high(s, rd); + } +} + +static void handle_rev(DisasContext *s, int opcode, bool u, + bool is_q, int size, int rn, int rd) +{ + int op = (opcode << 1) | u; + int opsz = op + size; + int grp_size = 3 - opsz; + int dsize = is_q ? 128 : 64; + int i; + + if (opsz >= 3) { + unallocated_encoding(s); + return; + } + + if (size == 0) { + /* Special case bytes, use bswap op on each group of elements */ + int groups = dsize / (8 << grp_size); + + for (i = 0; i < groups; i++) { + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + + read_vec_element(s, tcg_tmp, rn, i, grp_size); + switch (grp_size) { + case MO_16: + tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); + break; + case MO_32: + tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp); + break; + case MO_64: + tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); + break; + default: + g_assert_not_reached(); + } + write_vec_element(s, tcg_tmp, rd, i, grp_size); + tcg_temp_free_i64(tcg_tmp); + } + if (!is_q) { + clear_vec_high(s, rd); + } + } else { + int revmask = (1 << grp_size) - 1; + int esize = 8 << size; + int elements = dsize / esize; + TCGv_i64 tcg_rn = tcg_temp_new_i64(); + TCGv_i64 tcg_rd = tcg_const_i64(0); + TCGv_i64 tcg_rd_hi = tcg_const_i64(0); + + for (i = 0; i < elements; i++) { + int e_rev = (i & 0xf) ^ revmask; + int off = e_rev * esize; + read_vec_element(s, tcg_rn, rn, i, size); + if (off >= 64) { + tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi, + tcg_rn, off - 64, esize); + } else { + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize); + } + } + write_vec_element(s, tcg_rd, rd, 0, MO_64); + write_vec_element(s, tcg_rd_hi, rd, 1, MO_64); + + tcg_temp_free_i64(tcg_rd_hi); + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_rn); + } +} + +/* C3.6.17 AdvSIMD two reg misc + * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +---+---+---+-----------+------+-----------+--------+-----+------+------+ + * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | + * +---+---+---+-----------+------+-----------+--------+-----+------+------+ + */ +static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) +{ + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + bool u = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + switch (opcode) { + case 0x0: /* REV64, REV32 */ + case 0x1: /* REV16 */ + handle_rev(s, opcode, u, is_q, size, rn, rd); + return; + case 0x5: /* CNT, NOT, RBIT */ + if (u && size == 0) { + /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */ + size = 3; + break; + } else if (u && size == 1) { + /* RBIT */ + break; + } else if (!u && size == 0) { + /* CNT */ + break; + } + unallocated_encoding(s); + return; + case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ + case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ + if (size == 3) { + unallocated_encoding(s); + return; + } + handle_2misc_narrow(s, opcode, u, is_q, size, rn, rd); + return; + case 0x2: /* SADDLP, UADDLP */ + case 0x4: /* CLS, CLZ */ + case 0x6: /* SADALP, UADALP */ + if (size == 3) { + unallocated_encoding(s); + return; + } + unsupported_encoding(s, insn); + return; + case 0x13: /* SHLL, SHLL2 */ + if (u == 0 || size == 3) { + unallocated_encoding(s); + return; + } + unsupported_encoding(s, insn); + return; + case 0xa: /* CMLT */ + if (u == 1) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x8: /* CMGT, CMGE */ + case 0x9: /* CMEQ, CMLE */ + case 0xb: /* ABS, NEG */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x3: /* SUQADD, USQADD */ + case 0x7: /* SQABS, SQNEG */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + unsupported_encoding(s, insn); + return; + case 0xc ... 0xf: + case 0x16 ... 0x1d: + case 0x1f: + { + /* Floating point: U, size[1] and opcode indicate operation; + * size[0] indicates single or double precision. + */ + opcode |= (extract32(size, 1, 1) << 5) | (u << 6); + size = extract32(size, 0, 1) ? 3 : 2; + switch (opcode) { + case 0x2f: /* FABS */ + case 0x6f: /* FNEG */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x2c: /* FCMGT (zero) */ + case 0x2d: /* FCMEQ (zero) */ + case 0x2e: /* FCMLT (zero) */ + case 0x6c: /* FCMGE (zero) */ + case 0x6d: /* FCMLE (zero) */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); + return; + case 0x16: /* FCVTN, FCVTN2 */ + case 0x17: /* FCVTL, FCVTL2 */ + case 0x18: /* FRINTN */ + case 0x19: /* FRINTM */ + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x1d: /* SCVTF */ + case 0x38: /* FRINTP */ + case 0x39: /* FRINTZ */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + case 0x3c: /* URECPE */ + case 0x3d: /* FRECPE */ + case 0x56: /* FCVTXN, FCVTXN2 */ + case 0x58: /* FRINTA */ + case 0x59: /* FRINTX */ + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x5d: /* UCVTF */ + case 0x79: /* FRINTI */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + case 0x7c: /* URSQRTE */ + case 0x7d: /* FRSQRTE */ + case 0x7f: /* FSQRT */ + unsupported_encoding(s, insn); + return; + default: + unallocated_encoding(s); + return; + } + break; + } + default: + unallocated_encoding(s); + return; + } + + if (size == 3) { + /* All 64-bit element operations can be shared with scalar 2misc */ + int pass; + + for (pass = 0; pass < (is_q ? 2 : 1); pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + + handle_2misc_64(s, opcode, u, tcg_res, tcg_op); + + write_vec_element(s, tcg_res, rd, pass, MO_64); + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_op); + } + } else { + int pass; + + for (pass = 0; pass < (is_q ? 4 : 2); pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + TCGCond cond; + + read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + + if (size == 2) { + /* Special cases for 32 bit elements */ + switch (opcode) { + case 0xa: /* CMLT */ + /* 32 bit integer comparison against zero, result is + * test ? (2^32 - 1) : 0. We implement via setcond(test) + * and inverting. + */ + cond = TCG_COND_LT; + do_cmop: + tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0); + tcg_gen_neg_i32(tcg_res, tcg_res); + break; + case 0x8: /* CMGT, CMGE */ + cond = u ? TCG_COND_GE : TCG_COND_GT; + goto do_cmop; + case 0x9: /* CMEQ, CMLE */ + cond = u ? TCG_COND_LE : TCG_COND_EQ; + goto do_cmop; + case 0xb: /* ABS, NEG */ + if (u) { + tcg_gen_neg_i32(tcg_res, tcg_op); + } else { + TCGv_i32 tcg_zero = tcg_const_i32(0); + tcg_gen_neg_i32(tcg_res, tcg_op); + tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op, + tcg_zero, tcg_op, tcg_res); + tcg_temp_free_i32(tcg_zero); + } + break; + case 0x2f: /* FABS */ + gen_helper_vfp_abss(tcg_res, tcg_op); + break; + case 0x6f: /* FNEG */ + gen_helper_vfp_negs(tcg_res, tcg_op); + break; + default: + g_assert_not_reached(); + } + } else { + /* Use helpers for 8 and 16 bit elements */ + switch (opcode) { + case 0x5: /* CNT, RBIT */ + /* For these two insns size is part of the opcode specifier + * (handled earlier); they always operate on byte elements. + */ + if (u) { + gen_helper_neon_rbit_u8(tcg_res, tcg_op); + } else { + gen_helper_neon_cnt_u8(tcg_res, tcg_op); + } + break; + case 0x8: /* CMGT, CMGE */ + case 0x9: /* CMEQ, CMLE */ + case 0xa: /* CMLT */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 }, + { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 }, + { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 }, + }; + NeonGenTwoOpFn *genfn; + int comp; + bool reverse; + TCGv_i32 tcg_zero = tcg_const_i32(0); + + /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */ + comp = (opcode - 0x8) * 2 + u; + /* ...but LE, LT are implemented as reverse GE, GT */ + reverse = (comp > 2); + if (reverse) { + comp = 4 - comp; + } + genfn = fns[comp][size]; + if (reverse) { + genfn(tcg_res, tcg_zero, tcg_op); + } else { + genfn(tcg_res, tcg_op, tcg_zero); + } + tcg_temp_free_i32(tcg_zero); + break; + } + case 0xb: /* ABS, NEG */ + if (u) { + TCGv_i32 tcg_zero = tcg_const_i32(0); + if (size) { + gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op); + } else { + gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op); + } + tcg_temp_free_i32(tcg_zero); + } else { + if (size) { + gen_helper_neon_abs_s16(tcg_res, tcg_op); + } else { + gen_helper_neon_abs_s8(tcg_res, tcg_op); + } + } + break; + default: + g_assert_not_reached(); + } + } + + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_op); + } + } + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* C3.6.13 AdvSIMD scalar x indexed element + * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 + * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ + * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | + * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ + * C3.6.18 AdvSIMD vector x indexed element + * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 + * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ + * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | + * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ + */ +static void disas_simd_indexed(DisasContext *s, uint32_t insn) +{ + /* This encoding has two kinds of instruction: + * normal, where we perform elt x idxelt => elt for each + * element in the vector + * long, where we perform elt x idxelt and generate a result of + * double the width of the input element + * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs). + */ + bool is_scalar = extract32(insn, 28, 1); + bool is_q = extract32(insn, 30, 1); + bool u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int l = extract32(insn, 21, 1); + int m = extract32(insn, 20, 1); + /* Note that the Rm field here is only 4 bits, not 5 as it usually is */ + int rm = extract32(insn, 16, 4); + int opcode = extract32(insn, 12, 4); + int h = extract32(insn, 11, 1); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + bool is_long = false; + bool is_fp = false; + int index; + TCGv_ptr fpst; + + switch (opcode) { + case 0x0: /* MLA */ + case 0x4: /* MLS */ + if (!u || is_scalar) { + unallocated_encoding(s); + return; + } + break; + case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */ + if (is_scalar) { + unallocated_encoding(s); + return; + } + is_long = true; + break; + case 0x3: /* SQDMLAL, SQDMLAL2 */ + case 0x7: /* SQDMLSL, SQDMLSL2 */ + case 0xb: /* SQDMULL, SQDMULL2 */ + is_long = true; + /* fall through */ + case 0xc: /* SQDMULH */ + case 0xd: /* SQRDMULH */ + if (u) { + unallocated_encoding(s); + return; + } + break; + case 0x8: /* MUL */ + if (u || is_scalar) { + unallocated_encoding(s); + return; + } + break; + case 0x1: /* FMLA */ + case 0x5: /* FMLS */ + if (u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x9: /* FMUL, FMULX */ + if (!extract32(size, 1, 1)) { + unallocated_encoding(s); + return; + } + is_fp = true; + break; + default: + unallocated_encoding(s); + return; + } + + if (is_fp) { + /* low bit of size indicates single/double */ + size = extract32(size, 0, 1) ? 3 : 2; + if (size == 2) { + index = h << 1 | l; + } else { + if (l || !is_q) { + unallocated_encoding(s); + return; + } + index = h; + } + rm |= (m << 4); + } else { + switch (size) { + case 1: + index = h << 2 | l << 1 | m; + break; + case 2: + index = h << 1 | l; + rm |= (m << 4); + break; + default: + unallocated_encoding(s); + return; + } + } + + if (is_fp) { + fpst = get_fpstatus_ptr(); + } else { + TCGV_UNUSED_PTR(fpst); + } + + if (size == 3) { + TCGv_i64 tcg_idx = tcg_temp_new_i64(); + int pass; + + assert(is_fp && is_q && !is_long); + + read_vec_element(s, tcg_idx, rm, index, MO_64); + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + + switch (opcode) { + case 0x5: /* FMLS */ + /* As usual for ARM, separate negation for fused multiply-add */ + gen_helper_vfp_negd(tcg_op, tcg_op); + /* fall through */ + case 0x1: /* FMLA */ + read_vec_element(s, tcg_res, rd, pass, MO_64); + gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); + break; + case 0x9: /* FMUL, FMULX */ + if (u) { + gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); + } else { + gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); + } + break; + default: + g_assert_not_reached(); + } + + write_vec_element(s, tcg_res, rd, pass, MO_64); + tcg_temp_free_i64(tcg_op); + tcg_temp_free_i64(tcg_res); + } + + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_idx); + } else if (!is_long) { + /* 32 bit floating point, or 16 or 32 bit integer. + * For the 16 bit scalar case we use the usual Neon helpers and + * rely on the fact that 0 op 0 == 0 with no side effects. + */ + TCGv_i32 tcg_idx = tcg_temp_new_i32(); + int pass, maxpasses; + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + read_vec_element_i32(s, tcg_idx, rm, index, size); + + if (size == 1 && !is_scalar) { + /* The simplest way to handle the 16x16 indexed ops is to duplicate + * the index into both halves of the 32 bit tcg_idx and then use + * the usual Neon helpers. + */ + tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); + } + + for (pass = 0; pass < maxpasses; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); + + switch (opcode) { + case 0x0: /* MLA */ + case 0x4: /* MLS */ + case 0x8: /* MUL */ + { + static NeonGenTwoOpFn * const fns[2][2] = { + { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, + { tcg_gen_add_i32, tcg_gen_sub_i32 }, + }; + NeonGenTwoOpFn *genfn; + bool is_sub = opcode == 0x4; + + if (size == 1) { + gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx); + } else { + tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx); + } + if (opcode == 0x8) { + break; + } + read_vec_element_i32(s, tcg_op, rd, pass, MO_32); + genfn = fns[size - 1][is_sub]; + genfn(tcg_res, tcg_op, tcg_res); + break; + } + case 0x5: /* FMLS */ + /* As usual for ARM, separate negation for fused multiply-add */ + gen_helper_vfp_negs(tcg_op, tcg_op); + /* fall through */ + case 0x1: /* FMLA */ + read_vec_element_i32(s, tcg_res, rd, pass, MO_32); + gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); + break; + case 0x9: /* FMUL, FMULX */ + if (u) { + gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); + } else { + gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); + } + break; + case 0xc: /* SQDMULH */ + if (size == 1) { + gen_helper_neon_qdmulh_s16(tcg_res, cpu_env, + tcg_op, tcg_idx); + } else { + gen_helper_neon_qdmulh_s32(tcg_res, cpu_env, + tcg_op, tcg_idx); + } + break; + case 0xd: /* SQRDMULH */ + if (size == 1) { + gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env, + tcg_op, tcg_idx); + } else { + gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env, + tcg_op, tcg_idx); + } + break; + default: + g_assert_not_reached(); + } + + if (is_scalar) { + write_fp_sreg(s, rd, tcg_res); + } else { + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + } + + tcg_temp_free_i32(tcg_op); + tcg_temp_free_i32(tcg_res); + } + + tcg_temp_free_i32(tcg_idx); + + if (!is_q) { + clear_vec_high(s, rd); + } + } else { + /* long ops: 16x16->32 or 32x32->64 */ + TCGv_i64 tcg_res[2]; + int pass; + bool satop = extract32(opcode, 0, 1); + TCGMemOp memop = MO_32; + + if (satop || !u) { + memop |= MO_SIGN; + } + + if (size == 2) { + TCGv_i64 tcg_idx = tcg_temp_new_i64(); + + read_vec_element(s, tcg_idx, rm, index, memop); + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_passres; + int passelt; + + if (is_scalar) { + passelt = 0; + } else { + passelt = pass + (is_q * 2); + } + + read_vec_element(s, tcg_op, rn, passelt, memop); + + tcg_res[pass] = tcg_temp_new_i64(); + + if (opcode == 0xa || opcode == 0xb) { + /* Non-accumulating ops */ + tcg_passres = tcg_res[pass]; + } else { + tcg_passres = tcg_temp_new_i64(); + } + + tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); + tcg_temp_free_i64(tcg_op); + + if (satop) { + /* saturating, doubling */ + gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, + tcg_passres, tcg_passres); + } + + if (opcode == 0xa || opcode == 0xb) { + continue; + } + + /* Accumulating op: handle accumulate step */ + read_vec_element(s, tcg_res[pass], rd, pass, MO_64); + + switch (opcode) { + case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); + break; + case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); + break; + case 0x7: /* SQDMLSL, SQDMLSL2 */ + tcg_gen_neg_i64(tcg_passres, tcg_passres); + /* fall through */ + case 0x3: /* SQDMLAL, SQDMLAL2 */ + gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, + tcg_res[pass], + tcg_passres); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_i64(tcg_passres); + } + tcg_temp_free_i64(tcg_idx); + + if (is_scalar) { + clear_vec_high(s, rd); + } + } else { + TCGv_i32 tcg_idx = tcg_temp_new_i32(); + + assert(size == 1); + read_vec_element_i32(s, tcg_idx, rm, index, size); + + if (!is_scalar) { + /* The simplest way to handle the 16x16 indexed ops is to + * duplicate the index into both halves of the 32 bit tcg_idx + * and then use the usual Neon helpers. + */ + tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); + } + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i64 tcg_passres; + + if (is_scalar) { + read_vec_element_i32(s, tcg_op, rn, pass, size); + } else { + read_vec_element_i32(s, tcg_op, rn, + pass + (is_q * 2), MO_32); + } + + tcg_res[pass] = tcg_temp_new_i64(); + + if (opcode == 0xa || opcode == 0xb) { + /* Non-accumulating ops */ + tcg_passres = tcg_res[pass]; + } else { + tcg_passres = tcg_temp_new_i64(); + } + + if (memop & MO_SIGN) { + gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); + } else { + gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); + } + if (satop) { + gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, + tcg_passres, tcg_passres); + } + tcg_temp_free_i32(tcg_op); + + if (opcode == 0xa || opcode == 0xb) { + continue; + } + + /* Accumulating op: handle accumulate step */ + read_vec_element(s, tcg_res[pass], rd, pass, MO_64); + + switch (opcode) { + case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], + tcg_passres); + break; + case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], + tcg_passres); + break; + case 0x7: /* SQDMLSL, SQDMLSL2 */ + gen_helper_neon_negl_u32(tcg_passres, tcg_passres); + /* fall through */ + case 0x3: /* SQDMLAL, SQDMLAL2 */ + gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, + tcg_res[pass], + tcg_passres); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_i64(tcg_passres); + } + tcg_temp_free_i32(tcg_idx); + + if (is_scalar) { + tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); + } + } + + if (is_scalar) { + tcg_res[1] = tcg_const_i64(0); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } + } + + if (!TCGV_IS_UNUSED_PTR(fpst)) { + tcg_temp_free_ptr(fpst); + } +} + +/* C3.6.19 Crypto AES + * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +-----------------+------+-----------+--------+-----+------+------+ + * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | + * +-----------------+------+-----------+--------+-----+------+------+ + */ +static void disas_crypto_aes(DisasContext *s, uint32_t insn) +{ + unsupported_encoding(s, insn); +} + +/* C3.6.20 Crypto three-reg SHA + * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 + * +-----------------+------+---+------+---+--------+-----+------+------+ + * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | + * +-----------------+------+---+------+---+--------+-----+------+------+ + */ +static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) +{ + unsupported_encoding(s, insn); +} + +/* C3.6.21 Crypto two-reg SHA + * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +-----------------+------+-----------+--------+-----+------+------+ + * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | + * +-----------------+------+-----------+--------+-----+------+------+ + */ +static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) +{ + unsupported_encoding(s, insn); +} + +/* C3.6 Data processing - SIMD, inc Crypto + * + * As the decode gets a little complex we are using a table based + * approach for this part of the decode. + */ +static const AArch64DecodeTable data_proc_simd[] = { + /* pattern , mask , fn */ + { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, + { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, + { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, + { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, + { 0x0e000400, 0x9fe08400, disas_simd_copy }, + { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ + /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ + { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, + { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, + { 0x0e000000, 0xbf208c00, disas_simd_tb }, + { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, + { 0x2e000000, 0xbf208400, disas_simd_ext }, + { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, + { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, + { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, + { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, + { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, + { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ + { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, + { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, + { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, + { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, + { 0x00000000, 0x00000000, NULL } +}; + static void disas_data_proc_simd(DisasContext *s, uint32_t insn) { /* Note that this is called with all non-FP cases from * table C3-6 so it must UNDEF for entries not specifically * allocated to instructions in that table. */ - unsupported_encoding(s, insn); + AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); + if (fn) { + fn(s, insn); + } else { + unallocated_encoding(s); + } } /* C3.6 Data processing - SIMD and floating point */ @@ -4318,7 +9038,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu, dc->condexec_mask = 0; dc->condexec_cond = 0; #if !defined(CONFIG_USER_ONLY) - dc->user = 0; + dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0); #endif dc->vfp_enabled = 0; dc->vec_len = 0; @@ -4429,6 +9149,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu, /* This is a special case because we don't want to just halt the CPU * if trying to debug across a WFI. */ + gen_a64_set_pc_im(dc->pc); gen_helper_wfi(cpu_env); break; } @@ -4443,7 +9164,7 @@ done_generating: qemu_log("----------------\n"); qemu_log("IN: %s\n", lookup_symbol(pc_start)); log_target_disas(env, pc_start, dc->pc - pc_start, - dc->thumb | (dc->bswap_code << 1)); + 4 | (dc->bswap_code << 1)); qemu_log("\n"); } #endif diff --git a/target-arm/translate.c b/target-arm/translate.c index 8d240e160d..253d2a13eb 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -2759,6 +2759,113 @@ static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn, return 0; } +static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp, + int rounding) +{ + TCGv_ptr fpst = get_fpstatus_ptr(0); + TCGv_i32 tcg_rmode; + + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + + if (dp) { + TCGv_i64 tcg_op; + TCGv_i64 tcg_res; + tcg_op = tcg_temp_new_i64(); + tcg_res = tcg_temp_new_i64(); + tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm)); + gen_helper_rintd(tcg_res, tcg_op, fpst); + tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd)); + tcg_temp_free_i64(tcg_op); + tcg_temp_free_i64(tcg_res); + } else { + TCGv_i32 tcg_op; + TCGv_i32 tcg_res; + tcg_op = tcg_temp_new_i32(); + tcg_res = tcg_temp_new_i32(); + tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm)); + gen_helper_rints(tcg_res, tcg_op, fpst); + tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd)); + tcg_temp_free_i32(tcg_op); + tcg_temp_free_i32(tcg_res); + } + + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + + tcg_temp_free_ptr(fpst); + return 0; +} + +static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp, + int rounding) +{ + bool is_signed = extract32(insn, 7, 1); + TCGv_ptr fpst = get_fpstatus_ptr(0); + TCGv_i32 tcg_rmode, tcg_shift; + + tcg_shift = tcg_const_i32(0); + + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + + if (dp) { + TCGv_i64 tcg_double, tcg_res; + TCGv_i32 tcg_tmp; + /* Rd is encoded as a single precision register even when the source + * is double precision. + */ + rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1); + tcg_double = tcg_temp_new_i64(); + tcg_res = tcg_temp_new_i64(); + tcg_tmp = tcg_temp_new_i32(); + tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm)); + if (is_signed) { + gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst); + } else { + gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst); + } + tcg_gen_trunc_i64_i32(tcg_tmp, tcg_res); + tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd)); + tcg_temp_free_i32(tcg_tmp); + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_double); + } else { + TCGv_i32 tcg_single, tcg_res; + tcg_single = tcg_temp_new_i32(); + tcg_res = tcg_temp_new_i32(); + tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm)); + if (is_signed) { + gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst); + } else { + gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst); + } + tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd)); + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_single); + } + + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + + tcg_temp_free_i32(tcg_shift); + + tcg_temp_free_ptr(fpst); + + return 0; +} + +/* Table for converting the most common AArch32 encoding of + * rounding mode to arm_fprounding order (which matches the + * common AArch64 order); see ARM ARM pseudocode FPDecodeRM(). + */ +static const uint8_t fp_decode_rm[] = { + FPROUNDING_TIEAWAY, + FPROUNDING_TIEEVEN, + FPROUNDING_POSINF, + FPROUNDING_NEGINF, +}; + static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn) { uint32_t rd, rn, rm, dp = extract32(insn, 8, 1); @@ -2781,6 +2888,14 @@ static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn) return handle_vsel(insn, rd, rn, rm, dp); } else if ((insn & 0x0fb00e10) == 0x0e800a00) { return handle_vminmaxnm(insn, rd, rn, rm, dp); + } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) { + /* VRINTA, VRINTN, VRINTP, VRINTM */ + int rounding = fp_decode_rm[extract32(insn, 16, 2)]; + return handle_vrint(insn, rd, rm, dp, rounding); + } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) { + /* VCVTA, VCVTN, VCVTP, VCVTM */ + int rounding = fp_decode_rm[extract32(insn, 16, 2)]; + return handle_vcvt(insn, rd, rm, dp, rounding); } return 1; } @@ -3027,16 +3142,19 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) VFP_DREG_N(rn, insn); } - if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) { - /* Integer or single precision destination. */ + if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) || + ((rn & 0x1e) == 0x6))) { + /* Integer or single/half precision destination. */ rd = VFP_SREG_D(insn); } else { VFP_DREG_D(rd, insn); } if (op == 15 && - (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) { - /* VCVT from int is always from S reg regardless of dp bit. - * VCVT with immediate frac_bits has same format as SREG_M + (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) || + ((rn & 0x1e) == 0x4))) { + /* VCVT from int or half precision is always from S reg + * regardless of dp bit. VCVT with immediate frac_bits + * has same format as SREG_M. */ rm = VFP_SREG_M(insn); } else { @@ -3126,12 +3244,19 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) case 5: case 6: case 7: - /* VCVTB, VCVTT: only present with the halfprec extension, - * UNPREDICTABLE if bit 8 is set (we choose to UNDEF) + /* VCVTB, VCVTT: only present with the halfprec extension + * UNPREDICTABLE if bit 8 is set prior to ARMv8 + * (we choose to UNDEF) */ - if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) { + if ((dp && !arm_feature(env, ARM_FEATURE_V8)) || + !arm_feature(env, ARM_FEATURE_VFP_FP16)) { return 1; } + if (!extract32(rn, 1, 1)) { + /* Half precision source. */ + gen_mov_F0_vreg(0, rm); + break; + } /* Otherwise fall through */ default: /* One source operand. */ @@ -3279,21 +3404,39 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) case 3: /* sqrt */ gen_vfp_sqrt(dp); break; - case 4: /* vcvtb.f32.f16 */ + case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */ tmp = gen_vfp_mrs(); tcg_gen_ext16u_i32(tmp, tmp); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env); + if (dp) { + gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp, + cpu_env); + } else { + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, + cpu_env); + } tcg_temp_free_i32(tmp); break; - case 5: /* vcvtt.f32.f16 */ + case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */ tmp = gen_vfp_mrs(); tcg_gen_shri_i32(tmp, tmp, 16); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env); + if (dp) { + gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp, + cpu_env); + } else { + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, + cpu_env); + } tcg_temp_free_i32(tmp); break; - case 6: /* vcvtb.f16.f32 */ + case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */ tmp = tcg_temp_new_i32(); - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + if (dp) { + gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d, + cpu_env); + } else { + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, + cpu_env); + } gen_mov_F0_vreg(0, rd); tmp2 = gen_vfp_mrs(); tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); @@ -3301,9 +3444,15 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp2); gen_vfp_msr(tmp); break; - case 7: /* vcvtt.f16.f32 */ + case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */ tmp = tcg_temp_new_i32(); - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + if (dp) { + gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d, + cpu_env); + } else { + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, + cpu_env); + } tcg_gen_shli_i32(tmp, tmp, 16); gen_mov_F0_vreg(0, rd); tmp2 = gen_vfp_mrs(); @@ -3325,6 +3474,44 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) gen_vfp_F1_ld0(dp); gen_vfp_cmpe(dp); break; + case 12: /* vrintr */ + { + TCGv_ptr fpst = get_fpstatus_ptr(0); + if (dp) { + gen_helper_rintd(cpu_F0d, cpu_F0d, fpst); + } else { + gen_helper_rints(cpu_F0s, cpu_F0s, fpst); + } + tcg_temp_free_ptr(fpst); + break; + } + case 13: /* vrintz */ + { + TCGv_ptr fpst = get_fpstatus_ptr(0); + TCGv_i32 tcg_rmode; + tcg_rmode = tcg_const_i32(float_round_to_zero); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + if (dp) { + gen_helper_rintd(cpu_F0d, cpu_F0d, fpst); + } else { + gen_helper_rints(cpu_F0s, cpu_F0s, fpst); + } + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + tcg_temp_free_ptr(fpst); + break; + } + case 14: /* vrintx */ + { + TCGv_ptr fpst = get_fpstatus_ptr(0); + if (dp) { + gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst); + } else { + gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst); + } + tcg_temp_free_ptr(fpst); + break; + } case 15: /* single<->double conversion */ if (dp) gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env); @@ -3398,16 +3585,21 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) } /* Write back the result. */ - if (op == 15 && (rn >= 8 && rn <= 11)) - ; /* Comparison, do nothing. */ - else if (op == 15 && dp && ((rn & 0x1c) == 0x18)) - /* VCVT double to int: always integer result. */ + if (op == 15 && (rn >= 8 && rn <= 11)) { + /* Comparison, do nothing. */ + } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 || + (rn & 0x1e) == 0x6)) { + /* VCVT double to int: always integer result. + * VCVT double to half precision is always a single + * precision result. + */ gen_mov_vreg_F0(0, rd); - else if (op == 15 && rn == 15) + } else if (op == 15 && rn == 15) { /* conversion */ gen_mov_vreg_F0(!dp, rd); - else + } else { gen_mov_vreg_F0(dp, rd); + } /* break out of the loop if we have finished */ if (veclen == 0) @@ -4617,8 +4809,22 @@ static const uint8_t neon_3r_sizes[] = { #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */ #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */ #define NEON_2RM_VSHLL 38 +#define NEON_2RM_VRINTN 40 +#define NEON_2RM_VRINTX 41 +#define NEON_2RM_VRINTA 42 +#define NEON_2RM_VRINTZ 43 #define NEON_2RM_VCVT_F16_F32 44 +#define NEON_2RM_VRINTM 45 #define NEON_2RM_VCVT_F32_F16 46 +#define NEON_2RM_VRINTP 47 +#define NEON_2RM_VCVTAU 48 +#define NEON_2RM_VCVTAS 49 +#define NEON_2RM_VCVTNU 50 +#define NEON_2RM_VCVTNS 51 +#define NEON_2RM_VCVTPU 52 +#define NEON_2RM_VCVTPS 53 +#define NEON_2RM_VCVTMU 54 +#define NEON_2RM_VCVTMS 55 #define NEON_2RM_VRECPE 56 #define NEON_2RM_VRSQRTE 57 #define NEON_2RM_VRECPE_F 58 @@ -4632,6 +4838,9 @@ static int neon_2rm_is_float_op(int op) { /* Return true if this neon 2reg-misc op is float-to-float */ return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F || + (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) || + op == NEON_2RM_VRINTM || + (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) || op >= NEON_2RM_VRECPE_F); } @@ -4676,8 +4885,22 @@ static const uint8_t neon_2rm_sizes[] = { [NEON_2RM_VMOVN] = 0x7, [NEON_2RM_VQMOVN] = 0x7, [NEON_2RM_VSHLL] = 0x7, + [NEON_2RM_VRINTN] = 0x4, + [NEON_2RM_VRINTX] = 0x4, + [NEON_2RM_VRINTA] = 0x4, + [NEON_2RM_VRINTZ] = 0x4, [NEON_2RM_VCVT_F16_F32] = 0x2, + [NEON_2RM_VRINTM] = 0x4, [NEON_2RM_VCVT_F32_F16] = 0x2, + [NEON_2RM_VRINTP] = 0x4, + [NEON_2RM_VCVTAU] = 0x4, + [NEON_2RM_VCVTAS] = 0x4, + [NEON_2RM_VCVTNU] = 0x4, + [NEON_2RM_VCVTNS] = 0x4, + [NEON_2RM_VCVTPU] = 0x4, + [NEON_2RM_VCVTPS] = 0x4, + [NEON_2RM_VCVTMU] = 0x4, + [NEON_2RM_VCVTMS] = 0x4, [NEON_2RM_VRECPE] = 0x4, [NEON_2RM_VRSQRTE] = 0x4, [NEON_2RM_VRECPE_F] = 0x4, @@ -6388,6 +6611,73 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins } neon_store_reg(rm, pass, tmp2); break; + case NEON_2RM_VRINTN: + case NEON_2RM_VRINTA: + case NEON_2RM_VRINTM: + case NEON_2RM_VRINTP: + case NEON_2RM_VRINTZ: + { + TCGv_i32 tcg_rmode; + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + int rmode; + + if (op == NEON_2RM_VRINTZ) { + rmode = FPROUNDING_ZERO; + } else { + rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1]; + } + + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, + cpu_env); + gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus); + gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, + cpu_env); + tcg_temp_free_ptr(fpstatus); + tcg_temp_free_i32(tcg_rmode); + break; + } + case NEON_2RM_VRINTX: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VCVTAU: + case NEON_2RM_VCVTAS: + case NEON_2RM_VCVTNU: + case NEON_2RM_VCVTNS: + case NEON_2RM_VCVTPU: + case NEON_2RM_VCVTPS: + case NEON_2RM_VCVTMU: + case NEON_2RM_VCVTMS: + { + bool is_signed = !extract32(insn, 7, 1); + TCGv_ptr fpst = get_fpstatus_ptr(1); + TCGv_i32 tcg_rmode, tcg_shift; + int rmode = fp_decode_rm[extract32(insn, 8, 2)]; + + tcg_shift = tcg_const_i32(0); + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, + cpu_env); + + if (is_signed) { + gen_helper_vfp_tosls(cpu_F0s, cpu_F0s, + tcg_shift, fpst); + } else { + gen_helper_vfp_touls(cpu_F0s, cpu_F0s, + tcg_shift, fpst); + } + + gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, + cpu_env); + tcg_temp_free_i32(tcg_rmode); + tcg_temp_free_i32(tcg_shift); + tcg_temp_free_ptr(fpst); + break; + } case NEON_2RM_VRECPE: gen_helper_recpe_u32(tmp, tmp, cpu_env); break; @@ -6547,6 +6837,17 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn) return 1; } + if (ri->accessfn) { + /* Emit code to perform further access permissions checks at + * runtime; this may result in an exception. + */ + TCGv_ptr tmpptr; + gen_set_pc_im(s, s->pc); + tmpptr = tcg_const_ptr(ri); + gen_helper_access_check_cp_reg(cpu_env, tmpptr); + tcg_temp_free_ptr(tmpptr); + } + /* Handle special cases first */ switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) { case ARM_CP_NOP: @@ -6575,7 +6876,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn) tmp64 = tcg_const_i64(ri->resetvalue); } else if (ri->readfn) { TCGv_ptr tmpptr; - gen_set_pc_im(s, s->pc); tmp64 = tcg_temp_new_i64(); tmpptr = tcg_const_ptr(ri); gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr); @@ -6598,7 +6898,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn) tmp = tcg_const_i32(ri->resetvalue); } else if (ri->readfn) { TCGv_ptr tmpptr; - gen_set_pc_im(s, s->pc); tmp = tcg_temp_new_i32(); tmpptr = tcg_const_ptr(ri); gen_helper_get_cp_reg(tmp, cpu_env, tmpptr); @@ -6633,7 +6932,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmphi); if (ri->writefn) { TCGv_ptr tmpptr = tcg_const_ptr(ri); - gen_set_pc_im(s, s->pc); gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64); tcg_temp_free_ptr(tmpptr); } else { @@ -6644,7 +6942,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn) if (ri->writefn) { TCGv_i32 tmp; TCGv_ptr tmpptr; - gen_set_pc_im(s, s->pc); tmp = load_reg(s, rt); tmpptr = tcg_const_ptr(ri); gen_helper_set_cp_reg(cpu_env, tmpptr, tmp); @@ -6672,6 +6969,19 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn) return 0; } + /* Unknown register; this might be a guest error or a QEMU + * unimplemented feature. + */ + if (is64) { + qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 " + "64 bit system register cp:%d opc1: %d crm:%d\n", + isread ? "read" : "write", cpnum, opc1, crm); + } else { + qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 " + "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d\n", + isread ? "read" : "write", cpnum, opc1, crn, crm, opc2); + } + return 1; } @@ -7251,6 +7561,36 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s) store_reg(s, 14, tmp2); gen_bx(s, tmp); break; + case 0x4: + { + /* crc32/crc32c */ + uint32_t c = extract32(insn, 8, 4); + + /* Check this CPU supports ARMv8 CRC instructions. + * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED. + * Bits 8, 10 and 11 should be zero. + */ + if (!arm_feature(env, ARM_FEATURE_CRC) || op1 == 0x3 || + (c & 0xd) != 0) { + goto illegal_op; + } + + rn = extract32(insn, 16, 4); + rd = extract32(insn, 12, 4); + + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + tmp3 = tcg_const_i32(1 << op1); + if (c & 0x2) { + gen_helper_crc32c(tmp, tmp, tmp2, tmp3); + } else { + gen_helper_crc32(tmp, tmp, tmp2, tmp3); + } + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); + store_reg(s, rd, tmp); + break; + } case 0x5: /* saturating add/subtract */ ARCH(5TE); rd = (insn >> 12) & 0xf; @@ -8835,6 +9175,32 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw case 0x18: /* clz */ gen_helper_clz(tmp, tmp); break; + case 0x20: + case 0x21: + case 0x22: + case 0x28: + case 0x29: + case 0x2a: + { + /* crc32/crc32c */ + uint32_t sz = op & 0x3; + uint32_t c = op & 0x8; + + if (!arm_feature(env, ARM_FEATURE_CRC)) { + goto illegal_op; + } + + tmp2 = load_reg(s, rm); + tmp3 = tcg_const_i32(1 << sz); + if (c) { + gen_helper_crc32c(tmp, tmp, tmp2, tmp3); + } else { + gen_helper_crc32(tmp, tmp, tmp2, tmp3); + } + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); + break; + } default: goto illegal_op; } |