aboutsummaryrefslogtreecommitdiff
path: root/target-arm
diff options
context:
space:
mode:
Diffstat (limited to 'target-arm')
-rw-r--r--target-arm/cpu-qom.h10
-rw-r--r--target-arm/cpu.c22
-rw-r--r--target-arm/cpu.h187
-rw-r--r--target-arm/cpu64.c1
-rw-r--r--target-arm/helper-a64.c136
-rw-r--r--target-arm/helper-a64.h10
-rw-r--r--target-arm/helper.c1216
-rw-r--r--target-arm/helper.h10
-rw-r--r--target-arm/kvm-consts.h16
-rw-r--r--target-arm/kvm.c55
-rw-r--r--target-arm/kvm_arm.h17
-rw-r--r--target-arm/neon_helper.c28
-rw-r--r--target-arm/op_helper.c69
-rw-r--r--target-arm/translate-a64.c4875
-rw-r--r--target-arm/translate.c418
15 files changed, 6441 insertions, 629 deletions
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index afbd4222c5..00234e1d3d 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -132,6 +132,16 @@ typedef struct ARMCPU {
uint32_t id_isar3;
uint32_t id_isar4;
uint32_t id_isar5;
+ uint64_t id_aa64pfr0;
+ uint64_t id_aa64pfr1;
+ uint64_t id_aa64dfr0;
+ uint64_t id_aa64dfr1;
+ uint64_t id_aa64afr0;
+ uint64_t id_aa64afr1;
+ uint64_t id_aa64isar0;
+ uint64_t id_aa64isar1;
+ uint64_t id_aa64mmfr0;
+ uint64_t id_aa64mmfr1;
uint32_t clidr;
/* The elements of this array are the CCSIDR values for each cache,
* in the order L1DCache, L1ICache, L2DCache, L2ICache, etc.
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 52efd5d66f..1ce8a9bc38 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -60,7 +60,7 @@ static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
return;
}
- if (ri->type & ARM_CP_64BIT) {
+ if (cpreg_field_is_64bit(ri)) {
CPREG_FIELD64(&cpu->env, ri) = ri->resetvalue;
} else {
CPREG_FIELD32(&cpu->env, ri) = ri->resetvalue;
@@ -91,9 +91,10 @@ static void arm_cpu_reset(CPUState *s)
env->aarch64 = 1;
#if defined(CONFIG_USER_ONLY)
env->pstate = PSTATE_MODE_EL0t;
+ /* Userspace expects access to CTL_EL0 and the cache ops */
+ env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI;
#else
- env->pstate = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F
- | PSTATE_MODE_EL1h;
+ env->pstate = PSTATE_MODE_EL1h;
#endif
}
@@ -108,13 +109,14 @@ static void arm_cpu_reset(CPUState *s)
}
#else
/* SVC mode with interrupts disabled. */
- env->uncached_cpsr = ARM_CPU_MODE_SVC | CPSR_A | CPSR_F | CPSR_I;
+ env->uncached_cpsr = ARM_CPU_MODE_SVC;
+ env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F;
/* On ARMv7-M the CPSR_I is the value of the PRIMASK register, and is
clear at reset. Initial SP and PC are loaded from ROM. */
if (IS_M(env)) {
uint32_t pc;
uint8_t *rom;
- env->uncached_cpsr &= ~CPSR_I;
+ env->daif &= ~PSTATE_I;
rom = rom_ptr(0);
if (rom) {
/* We should really use ldl_phys here, in case the guest
@@ -128,7 +130,7 @@ static void arm_cpu_reset(CPUState *s)
}
}
- if (env->cp15.c1_sys & (1 << 13)) {
+ if (env->cp15.c1_sys & SCTLR_V) {
env->regs[15] = 0xFFFF0000;
}
@@ -681,14 +683,12 @@ static void cortex_a9_initfn(Object *obj)
}
#ifndef CONFIG_USER_ONLY
-static int a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
/* Linux wants the number of processors from here.
* Might as well set the interrupt-controller bit too.
*/
- *value = ((smp_cpus - 1) << 24) | (1 << 23);
- return 0;
+ return ((smp_cpus - 1) << 24) | (1 << 23);
}
#endif
@@ -924,6 +924,7 @@ static void arm_any_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
set_feature(&cpu->env, ARM_FEATURE_V7MP);
+ set_feature(&cpu->env, ARM_FEATURE_CRC);
#ifdef TARGET_AARCH64
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
#endif
@@ -982,6 +983,7 @@ static const ARMCPUInfo arm_cpus[] = {
static Property arm_cpu_properties[] = {
DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
+ DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
DEFINE_PROP_END_OF_LIST()
};
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 198b6b8d4e..49fef3fcbe 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -74,8 +74,10 @@
*/
#ifdef HOST_WORDS_BIGENDIAN
#define offsetoflow32(S, M) (offsetof(S, M) + sizeof(uint32_t))
+#define offsetofhigh32(S, M) offsetof(S, M)
#else
#define offsetoflow32(S, M) offsetof(S, M)
+#define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t))
#endif
/* Meanings of the ARMCPU object's two inbound GPIO lines */
@@ -102,7 +104,7 @@ struct arm_boot_info;
/* CPU state for each instance of a generic timer (in cp15 c14) */
typedef struct ARMGenericTimer {
uint64_t cval; /* Timer CompareValue register */
- uint32_t ctl; /* Timer Control register */
+ uint64_t ctl; /* Timer Control register */
} ARMGenericTimer;
#define GTIMER_PHYS 0
@@ -133,6 +135,7 @@ typedef struct CPUARMState {
* NZCV are kept in the split out env->CF/VF/NF/ZF, (which have the same
* semantics as for AArch32, as described in the comments on each field)
* nRW (also known as M[4]) is kept, inverted, in env->aarch64
+ * DAIF (exception masks) are kept in env->daif
* all other bits are stored in their correct places in env->pstate
*/
uint32_t pstate;
@@ -162,20 +165,19 @@ typedef struct CPUARMState {
uint32_t GE; /* cpsr[19:16] */
uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
uint32_t condexec_bits; /* IT bits. cpsr[15:10,26:25]. */
+ uint32_t daif; /* exception masks, in the bits they are in in PSTATE */
/* System control coprocessor (cp15) */
struct {
uint32_t c0_cpuid;
- uint32_t c0_cssel; /* Cache size selection. */
- uint32_t c1_sys; /* System control register. */
- uint32_t c1_coproc; /* Coprocessor access register. */
+ uint64_t c0_cssel; /* Cache size selection. */
+ uint64_t c1_sys; /* System control register. */
+ uint64_t c1_coproc; /* Coprocessor access register. */
uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */
uint32_t c1_scr; /* secure config register. */
- uint32_t c2_base0; /* MMU translation table base 0. */
- uint32_t c2_base0_hi; /* MMU translation table base 0, high 32 bits */
- uint32_t c2_base1; /* MMU translation table base 0. */
- uint32_t c2_base1_hi; /* MMU translation table base 1, high 32 bits */
- uint32_t c2_control; /* MMU translation table base control. */
+ uint64_t ttbr0_el1; /* MMU translation table base 0. */
+ uint64_t ttbr1_el1; /* MMU translation table base 1. */
+ uint64_t c2_control; /* MMU translation table base control. */
uint32_t c2_mask; /* MMU translation table base selection mask. */
uint32_t c2_base_mask; /* MMU translation table base 0 mask. */
uint32_t c2_data; /* MPU data cachable bits. */
@@ -197,14 +199,15 @@ typedef struct CPUARMState {
uint32_t c9_pmxevtyper; /* perf monitor event type */
uint32_t c9_pmuserenr; /* perf monitor user enable */
uint32_t c9_pminten; /* perf monitor interrupt enables */
- uint32_t c12_vbar; /* vector base address register */
+ uint64_t mair_el1;
+ uint64_t c12_vbar; /* vector base address register */
uint32_t c13_fcse; /* FCSE PID. */
uint32_t c13_context; /* Context ID. */
uint64_t tpidr_el0; /* User RW Thread register. */
uint64_t tpidrro_el0; /* User RO Thread register. */
uint64_t tpidr_el1; /* Privileged Thread register. */
- uint32_t c14_cntfrq; /* Counter Frequency register */
- uint32_t c14_cntkctl; /* Timer Control register */
+ uint64_t c14_cntfrq; /* Counter Frequency register */
+ uint64_t c14_cntkctl; /* Timer Control register */
ARMGenericTimer c14_timer[NUM_GTIMERS];
uint32_t c15_cpar; /* XScale Coprocessor Access Register */
uint32_t c15_ticonfig; /* TI925T configuration byte. */
@@ -215,13 +218,12 @@ typedef struct CPUARMState {
uint32_t c15_diagnostic; /* diagnostic register */
uint32_t c15_power_diagnostic;
uint32_t c15_power_control; /* power control */
+ uint64_t dbgbvr[16]; /* breakpoint value registers */
+ uint64_t dbgbcr[16]; /* breakpoint control registers */
+ uint64_t dbgwvr[16]; /* watchpoint value registers */
+ uint64_t dbgwcr[16]; /* watchpoint control registers */
} cp15;
- /* System registers (AArch64) */
- struct {
- uint64_t tpidr_el0;
- } sr;
-
struct {
uint32_t other_sp;
uint32_t vecbase;
@@ -337,6 +339,58 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
int mmu_idx);
#define cpu_handle_mmu_fault cpu_arm_handle_mmu_fault
+/* SCTLR bit meanings. Several bits have been reused in newer
+ * versions of the architecture; in that case we define constants
+ * for both old and new bit meanings. Code which tests against those
+ * bits should probably check or otherwise arrange that the CPU
+ * is the architectural version it expects.
+ */
+#define SCTLR_M (1U << 0)
+#define SCTLR_A (1U << 1)
+#define SCTLR_C (1U << 2)
+#define SCTLR_W (1U << 3) /* up to v6; RAO in v7 */
+#define SCTLR_SA (1U << 3)
+#define SCTLR_P (1U << 4) /* up to v5; RAO in v6 and v7 */
+#define SCTLR_SA0 (1U << 4) /* v8 onward, AArch64 only */
+#define SCTLR_D (1U << 5) /* up to v5; RAO in v6 */
+#define SCTLR_CP15BEN (1U << 5) /* v7 onward */
+#define SCTLR_L (1U << 6) /* up to v5; RAO in v6 and v7; RAZ in v8 */
+#define SCTLR_B (1U << 7) /* up to v6; RAZ in v7 */
+#define SCTLR_ITD (1U << 7) /* v8 onward */
+#define SCTLR_S (1U << 8) /* up to v6; RAZ in v7 */
+#define SCTLR_SED (1U << 8) /* v8 onward */
+#define SCTLR_R (1U << 9) /* up to v6; RAZ in v7 */
+#define SCTLR_UMA (1U << 9) /* v8 onward, AArch64 only */
+#define SCTLR_F (1U << 10) /* up to v6 */
+#define SCTLR_SW (1U << 10) /* v7 onward */
+#define SCTLR_Z (1U << 11)
+#define SCTLR_I (1U << 12)
+#define SCTLR_V (1U << 13)
+#define SCTLR_RR (1U << 14) /* up to v7 */
+#define SCTLR_DZE (1U << 14) /* v8 onward, AArch64 only */
+#define SCTLR_L4 (1U << 15) /* up to v6; RAZ in v7 */
+#define SCTLR_UCT (1U << 15) /* v8 onward, AArch64 only */
+#define SCTLR_DT (1U << 16) /* up to ??, RAO in v6 and v7 */
+#define SCTLR_nTWI (1U << 16) /* v8 onward */
+#define SCTLR_HA (1U << 17)
+#define SCTLR_IT (1U << 18) /* up to ??, RAO in v6 and v7 */
+#define SCTLR_nTWE (1U << 18) /* v8 onward */
+#define SCTLR_WXN (1U << 19)
+#define SCTLR_ST (1U << 20) /* up to ??, RAZ in v6 */
+#define SCTLR_UWXN (1U << 20) /* v7 onward */
+#define SCTLR_FI (1U << 21)
+#define SCTLR_U (1U << 22)
+#define SCTLR_XP (1U << 23) /* up to v6; v7 onward RAO */
+#define SCTLR_VE (1U << 24) /* up to v7 */
+#define SCTLR_E0E (1U << 24) /* v8 onward, AArch64 only */
+#define SCTLR_EE (1U << 25)
+#define SCTLR_L2 (1U << 26) /* up to v6, RAZ in v7 */
+#define SCTLR_UCI (1U << 26) /* v8 onward, AArch64 only */
+#define SCTLR_NMFI (1U << 27)
+#define SCTLR_TRE (1U << 28)
+#define SCTLR_AFE (1U << 29)
+#define SCTLR_TE (1U << 30)
+
#define CPSR_M (0x1fU)
#define CPSR_T (1U << 5)
#define CPSR_F (1U << 6)
@@ -354,9 +408,11 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
#define CPSR_Z (1U << 30)
#define CPSR_N (1U << 31)
#define CPSR_NZCV (CPSR_N | CPSR_Z | CPSR_C | CPSR_V)
+#define CPSR_AIF (CPSR_A | CPSR_I | CPSR_F)
#define CPSR_IT (CPSR_IT_0_1 | CPSR_IT_2_7)
-#define CACHED_CPSR_BITS (CPSR_T | CPSR_GE | CPSR_IT | CPSR_Q | CPSR_NZCV)
+#define CACHED_CPSR_BITS (CPSR_T | CPSR_AIF | CPSR_GE | CPSR_IT | CPSR_Q \
+ | CPSR_NZCV)
/* Bits writable in user mode. */
#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE)
/* Execution state bits. MRS read as zero, MSR writes ignored. */
@@ -379,7 +435,8 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
#define PSTATE_Z (1U << 30)
#define PSTATE_N (1U << 31)
#define PSTATE_NZCV (PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V)
-#define CACHED_PSTATE_BITS (PSTATE_NZCV)
+#define PSTATE_DAIF (PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F)
+#define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF)
/* Mode values for AArch64 */
#define PSTATE_MODE_EL3h 13
#define PSTATE_MODE_EL3t 12
@@ -400,7 +457,7 @@ static inline uint32_t pstate_read(CPUARMState *env)
ZF = (env->ZF == 0);
return (env->NF & 0x80000000) | (ZF << 30)
| (env->CF << 29) | ((env->VF & 0x80000000) >> 3)
- | env->pstate;
+ | env->pstate | env->daif;
}
static inline void pstate_write(CPUARMState *env, uint32_t val)
@@ -409,6 +466,7 @@ static inline void pstate_write(CPUARMState *env, uint32_t val)
env->NF = val;
env->CF = (val >> 29) & 1;
env->VF = (val << 3) & 0x80000000;
+ env->daif = val & PSTATE_DAIF;
env->pstate = val & ~CACHED_PSTATE_BITS;
}
@@ -496,6 +554,8 @@ enum arm_fprounding {
FPROUNDING_ODD
};
+int arm_rmode_to_sf(int rmode);
+
enum arm_cpu_mode {
ARM_CPU_MODE_USR = 0x10,
ARM_CPU_MODE_FIQ = 0x11,
@@ -566,6 +626,7 @@ enum arm_features {
ARM_FEATURE_AARCH64, /* supports 64 bit mode */
ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */
ARM_FEATURE_CBAR, /* has cp15 CBAR */
+ ARM_FEATURE_CRC, /* ARMv8 CRC instructions */
};
static inline int arm_feature(CPUARMState *env, int feature)
@@ -573,6 +634,22 @@ static inline int arm_feature(CPUARMState *env, int feature)
return (env->features & (1ULL << feature)) != 0;
}
+/* Return true if the specified exception level is running in AArch64 state. */
+static inline bool arm_el_is_aa64(CPUARMState *env, int el)
+{
+ /* We don't currently support EL2 or EL3, and this isn't valid for EL0
+ * (if we're in EL0, is_a64() is what you want, and if we're not in EL0
+ * then the state of EL0 isn't well defined.)
+ */
+ assert(el == 1);
+ /* AArch64-capable CPUs always run with EL1 in AArch64 mode. This
+ * is a QEMU-imposed simplification which we may wish to change later.
+ * If we in future support EL2 and/or EL3, then the state of lower
+ * exception levels is controlled by the HCR.RW and SCR.RW bits.
+ */
+ return arm_feature(env, ARM_FEATURE_AARCH64);
+}
+
void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
/* Interface between CPU and Interrupt controller. */
@@ -682,7 +759,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
#define ARM_CP_NOP (ARM_CP_SPECIAL | (1 << 8))
#define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8))
#define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8))
-#define ARM_LAST_SPECIAL ARM_CP_NZCV
+#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8))
+#define ARM_LAST_SPECIAL ARM_CP_CURRENTEL
/* Used only as a terminator for ARMCPRegInfo lists */
#define ARM_CP_SENTINEL 0xffff
/* Mask of only the flag bits in a type field */
@@ -762,14 +840,30 @@ static inline int arm_current_pl(CPUARMState *env)
typedef struct ARMCPRegInfo ARMCPRegInfo;
-/* Access functions for coprocessor registers. These should return
- * 0 on success, or one of the EXCP_* constants if access should cause
- * an exception (in which case *value is not written).
+typedef enum CPAccessResult {
+ /* Access is permitted */
+ CP_ACCESS_OK = 0,
+ /* Access fails due to a configurable trap or enable which would
+ * result in a categorized exception syndrome giving information about
+ * the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6,
+ * 0xc or 0x18).
+ */
+ CP_ACCESS_TRAP = 1,
+ /* Access fails and results in an exception syndrome 0x0 ("uncategorized").
+ * Note that this is not a catch-all case -- the set of cases which may
+ * result in this failure is specifically defined by the architecture.
+ */
+ CP_ACCESS_TRAP_UNCATEGORIZED = 2,
+} CPAccessResult;
+
+/* Access functions for coprocessor registers. These cannot fail and
+ * may not raise exceptions.
*/
-typedef int CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque,
- uint64_t *value);
-typedef int CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque,
- uint64_t value);
+typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque);
+typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque,
+ uint64_t value);
+/* Access permission check functions for coprocessor registers. */
+typedef CPAccessResult CPAccessFn(CPUARMState *env, const ARMCPRegInfo *opaque);
/* Hook function for register reset */
typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque);
@@ -823,6 +917,12 @@ struct ARMCPRegInfo {
* 2. both readfn and writefn are specified
*/
ptrdiff_t fieldoffset; /* offsetof(CPUARMState, field) */
+ /* Function for making any access checks for this register in addition to
+ * those specified by the 'access' permissions bits. If NULL, no extra
+ * checks required. The access check is performed at runtime, not at
+ * translate time.
+ */
+ CPAccessFn *accessfn;
/* Function for handling reads of this register. If NULL, then reads
* will be done by loading from the offset into CPUARMState specified
* by fieldoffset.
@@ -836,14 +936,14 @@ struct ARMCPRegInfo {
/* Function for doing a "raw" read; used when we need to copy
* coprocessor state to the kernel for KVM or out for
* migration. This only needs to be provided if there is also a
- * readfn and it makes an access permission check.
+ * readfn and it has side effects (for instance clear-on-read bits).
*/
CPReadFn *raw_readfn;
/* Function for doing a "raw" write; used when we need to copy KVM
* kernel coprocessor state into userspace, or for inbound
* migration. This only needs to be provided if there is also a
- * writefn and it makes an access permission check or masks out
- * "unwritable" bits or has write-one-to-clear or similar behaviour.
+ * writefn and it masks out "unwritable" bits or has write-one-to-clear
+ * or similar behaviour.
*/
CPWriteFn *raw_writefn;
/* Function for resetting the register. If NULL, then reset will be done
@@ -878,16 +978,24 @@ static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs)
const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp);
/* CPWriteFn that can be used to implement writes-ignored behaviour */
-int arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value);
+void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value);
/* CPReadFn that can be used for read-as-zero behaviour */
-int arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value);
+uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri);
/* CPResetFn that does nothing, for use if no reset is required even
* if fieldoffset is non zero.
*/
void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque);
+/* Return true if this reginfo struct's field in the cpu state struct
+ * is 64 bits wide.
+ */
+static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri)
+{
+ return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT);
+}
+
static inline bool cp_access_ok(int current_pl,
const ARMCPRegInfo *ri, int isread)
{
@@ -972,7 +1080,7 @@ static inline CPUARMState *cpu_init(const char *cpu_model)
#define MMU_USER_IDX 1
static inline int cpu_mmu_index (CPUARMState *env)
{
- return (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR ? 1 : 0;
+ return arm_current_pl(env) ? 0 : 1;
}
#include "exec/cpu-all.h"
@@ -999,7 +1107,9 @@ static inline int cpu_mmu_index (CPUARMState *env)
#define ARM_TBFLAG_BSWAP_CODE_SHIFT 16
#define ARM_TBFLAG_BSWAP_CODE_MASK (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
-/* Bit usage when in AArch64 state: currently no bits defined */
+/* Bit usage when in AArch64 state */
+#define ARM_TBFLAG_AA64_EL_SHIFT 0
+#define ARM_TBFLAG_AA64_EL_MASK (0x3 << ARM_TBFLAG_AA64_EL_SHIFT)
/* some convenience accessor macros */
#define ARM_TBFLAG_AARCH64_STATE(F) \
@@ -1018,13 +1128,16 @@ static inline int cpu_mmu_index (CPUARMState *env)
(((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT)
#define ARM_TBFLAG_BSWAP_CODE(F) \
(((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT)
+#define ARM_TBFLAG_AA64_EL(F) \
+ (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT)
static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
target_ulong *cs_base, int *flags)
{
if (is_a64(env)) {
*pc = env->pc;
- *flags = ARM_TBFLAG_AARCH64_STATE_MASK;
+ *flags = ARM_TBFLAG_AARCH64_STATE_MASK
+ | (arm_current_pl(env) << ARM_TBFLAG_AA64_EL_SHIFT);
} else {
int privmode;
*pc = env->regs[15];
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index a639c2e476..8426bf1333 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -45,6 +45,7 @@ static void aarch64_any_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
set_feature(&cpu->env, ARM_FEATURE_V7MP);
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
}
#endif
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 4ce0d01a85..c2ce33ee88 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -122,3 +122,139 @@ uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
{
return float_rel_to_flags(float64_compare(x, y, fp_status));
}
+
+float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ if ((float32_is_zero(a) && float32_is_infinity(b)) ||
+ (float32_is_infinity(a) && float32_is_zero(b))) {
+ /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+ return make_float32((1U << 30) |
+ ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
+ }
+ return float32_mul(a, b, fpst);
+}
+
+float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ if ((float64_is_zero(a) && float64_is_infinity(b)) ||
+ (float64_is_infinity(a) && float64_is_zero(b))) {
+ /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+ return make_float64((1ULL << 62) |
+ ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
+ }
+ return float64_mul(a, b, fpst);
+}
+
+uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
+ uint32_t rn, uint32_t numregs)
+{
+ /* Helper function for SIMD TBL and TBX. We have to do the table
+ * lookup part for the 64 bits worth of indices we're passed in.
+ * result is the initial results vector (either zeroes for TBL
+ * or some guest values for TBX), rn the register number where
+ * the table starts, and numregs the number of registers in the table.
+ * We return the results of the lookups.
+ */
+ int shift;
+
+ for (shift = 0; shift < 64; shift += 8) {
+ int index = extract64(indices, shift, 8);
+ if (index < 16 * numregs) {
+ /* Convert index (a byte offset into the virtual table
+ * which is a series of 128-bit vectors concatenated)
+ * into the correct vfp.regs[] element plus a bit offset
+ * into that element, bearing in mind that the table
+ * can wrap around from V31 to V0.
+ */
+ int elt = (rn * 2 + (index >> 3)) % 64;
+ int bitidx = (index & 7) * 8;
+ uint64_t val = extract64(env->vfp.regs[elt], bitidx, 8);
+
+ result = deposit64(result, shift, 8, val);
+ }
+ }
+ return result;
+}
+
+/* 64bit/double versions of the neon float compare functions */
+uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return -float64_eq_quiet(a, b, fpst);
+}
+
+uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return -float64_le(b, a, fpst);
+}
+
+uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return -float64_lt(b, a, fpst);
+}
+
+/* Reciprocal step and sqrt step. Note that unlike the A32/T32
+ * versions, these do a fully fused multiply-add or
+ * multiply-add-and-halve.
+ */
+#define float32_two make_float32(0x40000000)
+#define float32_three make_float32(0x40400000)
+#define float32_one_point_five make_float32(0x3fc00000)
+
+#define float64_two make_float64(0x4000000000000000ULL)
+#define float64_three make_float64(0x4008000000000000ULL)
+#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
+
+float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float32_chs(a);
+ if ((float32_is_infinity(a) && float32_is_zero(b)) ||
+ (float32_is_infinity(b) && float32_is_zero(a))) {
+ return float32_two;
+ }
+ return float32_muladd(a, b, float32_two, 0, fpst);
+}
+
+float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float64_chs(a);
+ if ((float64_is_infinity(a) && float64_is_zero(b)) ||
+ (float64_is_infinity(b) && float64_is_zero(a))) {
+ return float64_two;
+ }
+ return float64_muladd(a, b, float64_two, 0, fpst);
+}
+
+float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float32_chs(a);
+ if ((float32_is_infinity(a) && float32_is_zero(b)) ||
+ (float32_is_infinity(b) && float32_is_zero(a))) {
+ return float32_one_point_five;
+ }
+ return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
+}
+
+float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float64_chs(a);
+ if ((float64_is_infinity(a) && float64_is_zero(b)) ||
+ (float64_is_infinity(b) && float64_is_zero(a))) {
+ return float64_one_point_five;
+ }
+ return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
+}
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index bca19f3dea..ab9933cab0 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -26,3 +26,13 @@ DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
+DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
+DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
+DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
+DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
+DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
+DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
+DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
+DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
+DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
+DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index c708f15e27..90f85f1899 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -5,6 +5,8 @@
#include "sysemu/arch_init.h"
#include "sysemu/sysemu.h"
#include "qemu/bitops.h"
+#include "qemu/crc32c.h"
+#include <zlib.h> /* For crc32 */
#ifndef CONFIG_USER_ONLY
static inline int get_phys_addr(CPUARMState *env, uint32_t address,
@@ -107,65 +109,56 @@ static int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
}
}
-static int raw_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- if (ri->type & ARM_CP_64BIT) {
- *value = CPREG_FIELD64(env, ri);
+ if (cpreg_field_is_64bit(ri)) {
+ return CPREG_FIELD64(env, ri);
} else {
- *value = CPREG_FIELD32(env, ri);
+ return CPREG_FIELD32(env, ri);
}
- return 0;
}
-static int raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- if (ri->type & ARM_CP_64BIT) {
+ if (cpreg_field_is_64bit(ri)) {
CPREG_FIELD64(env, ri) = value;
} else {
CPREG_FIELD32(env, ri) = value;
}
- return 0;
}
-static bool read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *v)
+static uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri)
{
- /* Raw read of a coprocessor register (as needed for migration, etc)
- * return true on success, false if the read is impossible for some reason.
- */
+ /* Raw read of a coprocessor register (as needed for migration, etc). */
if (ri->type & ARM_CP_CONST) {
- *v = ri->resetvalue;
+ return ri->resetvalue;
} else if (ri->raw_readfn) {
- return (ri->raw_readfn(env, ri, v) == 0);
+ return ri->raw_readfn(env, ri);
} else if (ri->readfn) {
- return (ri->readfn(env, ri, v) == 0);
+ return ri->readfn(env, ri);
} else {
- raw_read(env, ri, v);
+ return raw_read(env, ri);
}
- return true;
}
-static bool write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
- int64_t v)
+static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t v)
{
/* Raw write of a coprocessor register (as needed for migration, etc).
- * Return true on success, false if the write is impossible for some reason.
* Note that constant registers are treated as write-ignored; the
* caller should check for success by whether a readback gives the
* value written.
*/
if (ri->type & ARM_CP_CONST) {
- return true;
+ return;
} else if (ri->raw_writefn) {
- return (ri->raw_writefn(env, ri, v) == 0);
+ ri->raw_writefn(env, ri, v);
} else if (ri->writefn) {
- return (ri->writefn(env, ri, v) == 0);
+ ri->writefn(env, ri, v);
} else {
raw_write(env, ri, v);
}
- return true;
}
bool write_cpustate_to_list(ARMCPU *cpu)
@@ -177,7 +170,7 @@ bool write_cpustate_to_list(ARMCPU *cpu)
for (i = 0; i < cpu->cpreg_array_len; i++) {
uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
const ARMCPRegInfo *ri;
- uint64_t v;
+
ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
if (!ri) {
ok = false;
@@ -186,11 +179,7 @@ bool write_cpustate_to_list(ARMCPU *cpu)
if (ri->type & ARM_CP_NO_MIGRATE) {
continue;
}
- if (!read_raw_cp_reg(&cpu->env, ri, &v)) {
- ok = false;
- continue;
- }
- cpu->cpreg_values[i] = v;
+ cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri);
}
return ok;
}
@@ -203,7 +192,6 @@ bool write_list_to_cpustate(ARMCPU *cpu)
for (i = 0; i < cpu->cpreg_array_len; i++) {
uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
uint64_t v = cpu->cpreg_values[i];
- uint64_t readback;
const ARMCPRegInfo *ri;
ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
@@ -218,9 +206,8 @@ bool write_list_to_cpustate(ARMCPU *cpu)
* (to catch read-only registers and partially read-only
* registers where the incoming migration value doesn't match)
*/
- if (!write_raw_cp_reg(&cpu->env, ri, v) ||
- !read_raw_cp_reg(&cpu->env, ri, &readback) ||
- readback != v) {
+ write_raw_cp_reg(&cpu->env, ri, v);
+ if (read_raw_cp_reg(&cpu->env, ri) != v) {
ok = false;
}
}
@@ -309,14 +296,13 @@ void init_cpreg_list(ARMCPU *cpu)
g_list_free(keys);
}
-static int dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
env->cp15.c3 = value;
tlb_flush(env, 1); /* Flush TLB as domain not tracked in TLB */
- return 0;
}
-static int fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
if (env->cp15.c13_fcse != value) {
/* Unlike real hardware the qemu TLB uses virtual addresses,
@@ -325,10 +311,10 @@ static int fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
tlb_flush(env, 1);
env->cp15.c13_fcse = value;
}
- return 0;
}
-static int contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+
+static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
if (env->cp15.c13_context != value && !arm_feature(env, ARM_FEATURE_MPU)) {
/* For VMSA (when not using the LPAE long descriptor page table
@@ -338,39 +324,34 @@ static int contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush(env, 1);
}
env->cp15.c13_context = value;
- return 0;
}
-static int tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
/* Invalidate all (TLBIALL) */
tlb_flush(env, 1);
- return 0;
}
-static int tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
/* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */
tlb_flush_page(env, value & TARGET_PAGE_MASK);
- return 0;
}
-static int tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
/* Invalidate by ASID (TLBIASID) */
tlb_flush(env, value == 0);
- return 0;
}
-static int tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
/* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */
tlb_flush_page(env, value & TARGET_PAGE_MASK);
- return 0;
}
static const ARMCPRegInfo cp_reginfo[] = {
@@ -450,14 +431,14 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = {
REGINFO_SENTINEL
};
-static int cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
if (env->cp15.c1_coproc != value) {
env->cp15.c1_coproc = value;
/* ??? Is this safe when called from within a TB? */
tb_flush(env);
}
- return 0;
}
static const ARMCPRegInfo v6_cp_reginfo[] = {
@@ -479,124 +460,101 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
*/
{ .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1,
.access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, },
- { .name = "CPACR", .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2,
+ { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
+ .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2,
.access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_coproc),
.resetvalue = 0, .writefn = cpacr_write },
REGINFO_SENTINEL
};
-
-static int pmreg_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri)
{
- /* Generic performance monitor register read function for where
- * user access may be allowed by PMUSERENR.
+ /* Perfomance monitor registers user accessibility is controlled
+ * by PMUSERENR.
*/
if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
- return EXCP_UDEF;
+ return CP_ACCESS_TRAP;
}
- *value = CPREG_FIELD32(env, ri);
- return 0;
+ return CP_ACCESS_OK;
}
-static int pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
- return EXCP_UDEF;
- }
/* only the DP, X, D and E bits are writable */
env->cp15.c9_pmcr &= ~0x39;
env->cp15.c9_pmcr |= (value & 0x39);
- return 0;
}
-static int pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
+static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
- return EXCP_UDEF;
- }
value &= (1 << 31);
env->cp15.c9_pmcnten |= value;
- return 0;
}
-static int pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
- return EXCP_UDEF;
- }
value &= (1 << 31);
env->cp15.c9_pmcnten &= ~value;
- return 0;
}
-static int pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
- return EXCP_UDEF;
- }
env->cp15.c9_pmovsr &= ~value;
- return 0;
}
-static int pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
- return EXCP_UDEF;
- }
env->cp15.c9_pmxevtyper = value & 0xff;
- return 0;
}
-static int pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
env->cp15.c9_pmuserenr = value & 1;
- return 0;
}
-static int pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
/* We have no event counters so only the C bit can be changed */
value &= (1 << 31);
env->cp15.c9_pminten |= value;
- return 0;
}
-static int pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
value &= (1 << 31);
env->cp15.c9_pminten &= ~value;
- return 0;
}
-static int vbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
+ /* Note that even though the AArch64 view of this register has bits
+ * [10:0] all RES0 we can only mask the bottom 5, to comply with the
+ * architectural requirements for bits which are RES0 only in some
+ * contexts. (ARMv8 would permit us to do no masking at all, but ARMv7
+ * requires the bottom five bits to be RAZ/WI because they're UNK/SBZP.)
+ */
env->cp15.c12_vbar = value & ~0x1Ful;
- return 0;
}
-static int ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
ARMCPU *cpu = arm_env_get_cpu(env);
- *value = cpu->ccsidr[env->cp15.c0_cssel];
- return 0;
+ return cpu->ccsidr[env->cp15.c0_cssel];
}
-static int csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
env->cp15.c0_cssel = value & 0xf;
- return 0;
}
static const ARMCPRegInfo v7_cp_reginfo[] = {
@@ -624,37 +582,41 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
{ .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1,
.access = PL0_RW, .resetvalue = 0,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
- .readfn = pmreg_read, .writefn = pmcntenset_write,
- .raw_readfn = raw_read, .raw_writefn = raw_write },
+ .writefn = pmcntenset_write,
+ .accessfn = pmreg_access,
+ .raw_writefn = raw_write },
{ .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2,
.access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
- .readfn = pmreg_read, .writefn = pmcntenclr_write,
+ .accessfn = pmreg_access,
+ .writefn = pmcntenclr_write,
.type = ARM_CP_NO_MIGRATE },
{ .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3,
.access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
- .readfn = pmreg_read, .writefn = pmovsr_write,
- .raw_readfn = raw_read, .raw_writefn = raw_write },
- /* Unimplemented so WI. Strictly speaking write accesses in PL0 should
- * respect PMUSERENR.
- */
+ .accessfn = pmreg_access,
+ .writefn = pmovsr_write,
+ .raw_writefn = raw_write },
+ /* Unimplemented so WI. */
{ .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4,
- .access = PL0_W, .type = ARM_CP_NOP },
+ .access = PL0_W, .accessfn = pmreg_access, .type = ARM_CP_NOP },
/* Since we don't implement any events, writing to PMSELR is UNPREDICTABLE.
- * We choose to RAZ/WI. XXX should respect PMUSERENR.
+ * We choose to RAZ/WI.
*/
{ .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5,
- .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- /* Unimplemented, RAZ/WI. XXX PMUSERENR */
+ .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = pmreg_access },
+ /* Unimplemented, RAZ/WI. */
{ .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0,
- .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = pmreg_access },
{ .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1,
.access = PL0_RW,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper),
- .readfn = pmreg_read, .writefn = pmxevtyper_write,
- .raw_readfn = raw_read, .raw_writefn = raw_write },
- /* Unimplemented, RAZ/WI. XXX PMUSERENR */
+ .accessfn = pmreg_access, .writefn = pmxevtyper_write,
+ .raw_writefn = raw_write },
+ /* Unimplemented, RAZ/WI. */
{ .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2,
- .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = pmreg_access },
{ .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0,
.access = PL0_R | PL1_RW,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr),
@@ -669,16 +631,19 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
.access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
.resetvalue = 0, .writefn = pmintenclr_write, },
- { .name = "VBAR", .cp = 15, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
+ { .name = "VBAR", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
.access = PL1_RW, .writefn = vbar_write,
.fieldoffset = offsetof(CPUARMState, cp15.c12_vbar),
.resetvalue = 0 },
{ .name = "SCR", .cp = 15, .crn = 1, .crm = 1, .opc1 = 0, .opc2 = 0,
.access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_scr),
.resetvalue = 0, },
- { .name = "CCSIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0,
+ { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0,
.access = PL1_R, .readfn = ccsidr_read, .type = ARM_CP_NO_MIGRATE },
- { .name = "CSSELR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0,
+ { .name = "CSSELR", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0,
.access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c0_cssel),
.writefn = csselr_write, .resetvalue = 0 },
/* Auxiliary ID register: this actually has an IMPDEF value but for now
@@ -686,37 +651,42 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
*/
{ .name = "AIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ /* MAIR can just read-as-written because we don't implement caches
+ * and so don't need to care about memory attributes.
+ */
+ { .name = "MAIR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0,
+ .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el1),
+ .resetvalue = 0 },
+ /* For non-long-descriptor page tables these are PRRR and NMRR;
+ * regardless they still act as reads-as-written for QEMU.
+ * The override is necessary because of the overly-broad TLB_LOCKDOWN
+ * definition.
+ */
+ { .name = "MAIR0", .state = ARM_CP_STATE_AA32, .type = ARM_CP_OVERRIDE,
+ .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0, .access = PL1_RW,
+ .fieldoffset = offsetoflow32(CPUARMState, cp15.mair_el1),
+ .resetfn = arm_cp_reset_ignore },
+ { .name = "MAIR1", .state = ARM_CP_STATE_AA32, .type = ARM_CP_OVERRIDE,
+ .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 1, .access = PL1_RW,
+ .fieldoffset = offsetofhigh32(CPUARMState, cp15.mair_el1),
+ .resetfn = arm_cp_reset_ignore },
REGINFO_SENTINEL
};
-static int teecr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
value &= 1;
env->teecr = value;
- return 0;
}
-static int teehbr_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+static CPAccessResult teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri)
{
- /* This is a helper function because the user access rights
- * depend on the value of the TEECR.
- */
if (arm_current_pl(env) == 0 && (env->teecr & 1)) {
- return EXCP_UDEF;
+ return CP_ACCESS_TRAP;
}
- *value = env->teehbr;
- return 0;
-}
-
-static int teehbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- if (arm_current_pl(env) == 0 && (env->teecr & 1)) {
- return EXCP_UDEF;
- }
- env->teehbr = value;
- return 0;
+ return CP_ACCESS_OK;
}
static const ARMCPRegInfo t2ee_cp_reginfo[] = {
@@ -726,8 +696,7 @@ static const ARMCPRegInfo t2ee_cp_reginfo[] = {
.writefn = teecr_write },
{ .name = "TEEHBR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 6, .opc2 = 0,
.access = PL0_RW, .fieldoffset = offsetof(CPUARMState, teehbr),
- .resetvalue = 0, .raw_readfn = raw_read, .raw_writefn = raw_write,
- .readfn = teehbr_read, .writefn = teehbr_write },
+ .accessfn = teehbr_access, .resetvalue = 0 },
REGINFO_SENTINEL
};
@@ -757,6 +726,59 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = {
#ifndef CONFIG_USER_ONLY
+static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ /* CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero */
+ if (arm_current_pl(env) == 0 && !extract32(env->cp15.c14_cntkctl, 0, 2)) {
+ return CP_ACCESS_TRAP;
+ }
+ return CP_ACCESS_OK;
+}
+
+static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx)
+{
+ /* CNT[PV]CT: not visible from PL0 if ELO[PV]CTEN is zero */
+ if (arm_current_pl(env) == 0 &&
+ !extract32(env->cp15.c14_cntkctl, timeridx, 1)) {
+ return CP_ACCESS_TRAP;
+ }
+ return CP_ACCESS_OK;
+}
+
+static CPAccessResult gt_timer_access(CPUARMState *env, int timeridx)
+{
+ /* CNT[PV]_CVAL, CNT[PV]_CTL, CNT[PV]_TVAL: not visible from PL0 if
+ * EL0[PV]TEN is zero.
+ */
+ if (arm_current_pl(env) == 0 &&
+ !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
+ return CP_ACCESS_TRAP;
+ }
+ return CP_ACCESS_OK;
+}
+
+static CPAccessResult gt_pct_access(CPUARMState *env,
+ const ARMCPRegInfo *ri)
+{
+ return gt_counter_access(env, GTIMER_PHYS);
+}
+
+static CPAccessResult gt_vct_access(CPUARMState *env,
+ const ARMCPRegInfo *ri)
+{
+ return gt_counter_access(env, GTIMER_VIRT);
+}
+
+static CPAccessResult gt_ptimer_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return gt_timer_access(env, GTIMER_PHYS);
+}
+
+static CPAccessResult gt_vtimer_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return gt_timer_access(env, GTIMER_VIRT);
+}
+
static uint64_t gt_get_countervalue(CPUARMState *env)
{
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / GTIMER_SCALE;
@@ -802,17 +824,6 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
}
}
-static int gt_cntfrq_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
-{
- /* Not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero */
- if (arm_current_pl(env) == 0 && !extract32(env->cp15.c14_cntkctl, 0, 2)) {
- return EXCP_UDEF;
- }
- *value = env->cp15.c14_cntfrq;
- return 0;
-}
-
static void gt_cnt_reset(CPUARMState *env, const ARMCPRegInfo *ri)
{
ARMCPU *cpu = arm_env_get_cpu(env);
@@ -821,81 +832,40 @@ static void gt_cnt_reset(CPUARMState *env, const ARMCPRegInfo *ri)
timer_del(cpu->gt_timer[timeridx]);
}
-static int gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
-{
- int timeridx = ri->opc1 & 1;
-
- if (arm_current_pl(env) == 0 &&
- !extract32(env->cp15.c14_cntkctl, timeridx, 1)) {
- return EXCP_UDEF;
- }
- *value = gt_get_countervalue(env);
- return 0;
-}
-
-static int gt_cval_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- int timeridx = ri->opc1 & 1;
-
- if (arm_current_pl(env) == 0 &&
- !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
- return EXCP_UDEF;
- }
- *value = env->cp15.c14_timer[timeridx].cval;
- return 0;
+ return gt_get_countervalue(env);
}
-static int gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
int timeridx = ri->opc1 & 1;
env->cp15.c14_timer[timeridx].cval = value;
gt_recalc_timer(arm_env_get_cpu(env), timeridx);
- return 0;
}
-static int gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+
+static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
int timeridx = ri->crm & 1;
- if (arm_current_pl(env) == 0 &&
- !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
- return EXCP_UDEF;
- }
- *value = (uint32_t)(env->cp15.c14_timer[timeridx].cval -
- gt_get_countervalue(env));
- return 0;
+ return (uint32_t)(env->cp15.c14_timer[timeridx].cval -
+ gt_get_countervalue(env));
}
-static int gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
int timeridx = ri->crm & 1;
env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) +
+ sextract64(value, 0, 32);
gt_recalc_timer(arm_env_get_cpu(env), timeridx);
- return 0;
-}
-
-static int gt_ctl_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
-{
- int timeridx = ri->crm & 1;
-
- if (arm_current_pl(env) == 0 &&
- !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
- return EXCP_UDEF;
- }
- *value = env->cp15.c14_timer[timeridx].ctl;
- return 0;
}
-static int gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
ARMCPU *cpu = arm_env_get_cpu(env);
int timeridx = ri->crm & 1;
@@ -912,7 +882,6 @@ static int gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
qemu_set_irq(cpu->gt_timer_outputs[timeridx],
(oldval & 4) && (value & 2));
}
- return 0;
}
void arm_gt_ptimer_cb(void *opaque)
@@ -935,66 +904,131 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
* Our reset value matches the fixed frequency we implement the timer at.
*/
{ .name = "CNTFRQ", .cp = 15, .crn = 14, .crm = 0, .opc1 = 0, .opc2 = 0,
- .access = PL1_RW | PL0_R,
+ .type = ARM_CP_NO_MIGRATE,
+ .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access,
+ .fieldoffset = offsetoflow32(CPUARMState, cp15.c14_cntfrq),
+ .resetfn = arm_cp_reset_ignore,
+ },
+ { .name = "CNTFRQ_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0,
+ .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access,
.fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
.resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE,
- .readfn = gt_cntfrq_read, .raw_readfn = raw_read,
},
/* overall control: mostly access permissions */
- { .name = "CNTKCTL", .cp = 15, .crn = 14, .crm = 1, .opc1 = 0, .opc2 = 0,
+ { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 0, .crn = 14, .crm = 1, .opc2 = 0,
.access = PL1_RW,
.fieldoffset = offsetof(CPUARMState, cp15.c14_cntkctl),
.resetvalue = 0,
},
/* per-timer control */
{ .name = "CNTP_CTL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1,
+ .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, .access = PL1_RW | PL0_R,
+ .accessfn = gt_ptimer_access,
+ .fieldoffset = offsetoflow32(CPUARMState,
+ cp15.c14_timer[GTIMER_PHYS].ctl),
+ .resetfn = arm_cp_reset_ignore,
+ .writefn = gt_ctl_write, .raw_writefn = raw_write,
+ },
+ { .name = "CNTP_CTL_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 1,
.type = ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .accessfn = gt_ptimer_access,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl),
.resetvalue = 0,
- .readfn = gt_ctl_read, .writefn = gt_ctl_write,
- .raw_readfn = raw_read, .raw_writefn = raw_write,
+ .writefn = gt_ctl_write, .raw_writefn = raw_write,
},
{ .name = "CNTV_CTL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 1,
+ .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, .access = PL1_RW | PL0_R,
+ .accessfn = gt_vtimer_access,
+ .fieldoffset = offsetoflow32(CPUARMState,
+ cp15.c14_timer[GTIMER_VIRT].ctl),
+ .resetfn = arm_cp_reset_ignore,
+ .writefn = gt_ctl_write, .raw_writefn = raw_write,
+ },
+ { .name = "CNTV_CTL_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 1,
.type = ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .accessfn = gt_vtimer_access,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl),
.resetvalue = 0,
- .readfn = gt_ctl_read, .writefn = gt_ctl_write,
- .raw_readfn = raw_read, .raw_writefn = raw_write,
+ .writefn = gt_ctl_write, .raw_writefn = raw_write,
},
/* TimerValue views: a 32 bit downcounting view of the underlying state */
{ .name = "CNTP_TVAL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 0,
.type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .accessfn = gt_ptimer_access,
+ .readfn = gt_tval_read, .writefn = gt_tval_write,
+ },
+ { .name = "CNTP_TVAL_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 0,
+ .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
.readfn = gt_tval_read, .writefn = gt_tval_write,
},
{ .name = "CNTV_TVAL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 0,
.type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .accessfn = gt_vtimer_access,
+ .readfn = gt_tval_read, .writefn = gt_tval_write,
+ },
+ { .name = "CNTV_TVAL_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 0,
+ .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
.readfn = gt_tval_read, .writefn = gt_tval_write,
},
/* The counter itself */
{ .name = "CNTPCT", .cp = 15, .crm = 14, .opc1 = 0,
.access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO,
+ .accessfn = gt_pct_access,
+ .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore,
+ },
+ { .name = "CNTPCT_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 1,
+ .access = PL0_R, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO,
+ .accessfn = gt_pct_access,
.readfn = gt_cnt_read, .resetfn = gt_cnt_reset,
},
{ .name = "CNTVCT", .cp = 15, .crm = 14, .opc1 = 1,
.access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO,
+ .accessfn = gt_vct_access,
+ .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore,
+ },
+ { .name = "CNTVCT_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 2,
+ .access = PL0_R, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO,
+ .accessfn = gt_vct_access,
.readfn = gt_cnt_read, .resetfn = gt_cnt_reset,
},
/* Comparison value, indicating when the timer goes off */
{ .name = "CNTP_CVAL", .cp = 15, .crm = 14, .opc1 = 2,
.access = PL1_RW | PL0_R,
- .type = ARM_CP_64BIT | ARM_CP_IO,
+ .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_NO_MIGRATE,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
- .resetvalue = 0,
- .readfn = gt_cval_read, .writefn = gt_cval_write,
- .raw_readfn = raw_read, .raw_writefn = raw_write,
+ .accessfn = gt_ptimer_access, .resetfn = arm_cp_reset_ignore,
+ .writefn = gt_cval_write, .raw_writefn = raw_write,
+ },
+ { .name = "CNTP_CVAL_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 2,
+ .access = PL1_RW | PL0_R,
+ .type = ARM_CP_IO,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
+ .resetvalue = 0, .accessfn = gt_vtimer_access,
+ .writefn = gt_cval_write, .raw_writefn = raw_write,
},
{ .name = "CNTV_CVAL", .cp = 15, .crm = 14, .opc1 = 3,
.access = PL1_RW | PL0_R,
- .type = ARM_CP_64BIT | ARM_CP_IO,
+ .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_NO_MIGRATE,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
- .resetvalue = 0,
- .readfn = gt_cval_read, .writefn = gt_cval_write,
- .raw_readfn = raw_read, .raw_writefn = raw_write,
+ .accessfn = gt_vtimer_access, .resetfn = arm_cp_reset_ignore,
+ .writefn = gt_cval_write, .raw_writefn = raw_write,
+ },
+ { .name = "CNTV_CVAL_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 2,
+ .access = PL1_RW | PL0_R,
+ .type = ARM_CP_IO,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
+ .resetvalue = 0, .accessfn = gt_vtimer_access,
+ .writefn = gt_cval_write, .raw_writefn = raw_write,
},
REGINFO_SENTINEL
};
@@ -1010,7 +1044,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
#endif
-static int par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
if (arm_feature(env, ARM_FEATURE_LPAE)) {
env->cp15.c7_par = value;
@@ -1019,7 +1053,6 @@ static int par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
} else {
env->cp15.c7_par = value & 0xfffff1ff;
}
- return 0;
}
#ifndef CONFIG_USER_ONLY
@@ -1035,7 +1068,20 @@ static inline bool extended_addresses_enabled(CPUARMState *env)
&& (env->cp15.c2_control & (1U << 31));
}
-static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ if (ri->opc2 & 4) {
+ /* Other states are only available with TrustZone; in
+ * a non-TZ implementation these registers don't exist
+ * at all, which is an Uncategorized trap. This underdecoding
+ * is safe because the reginfo is NO_MIGRATE.
+ */
+ return CP_ACCESS_TRAP_UNCATEGORIZED;
+ }
+ return CP_ACCESS_OK;
+}
+
+static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
hwaddr phys_addr;
target_ulong page_size;
@@ -1043,10 +1089,6 @@ static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
int ret, is_user = ri->opc2 & 2;
int access_type = ri->opc2 & 1;
- if (ri->opc2 & 4) {
- /* Other states are only available with TrustZone */
- return EXCP_UDEF;
- }
ret = get_phys_addr(env, value, access_type, is_user,
&phys_addr, &prot, &page_size);
if (extended_addresses_enabled(env)) {
@@ -1082,13 +1124,12 @@ static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
env->cp15.c7_par = phys_addr & 0xfffff000;
}
} else {
- env->cp15.c7_par = ((ret & (10 << 1)) >> 5) |
- ((ret & (12 << 1)) >> 6) |
+ env->cp15.c7_par = ((ret & (1 << 10)) >> 5) |
+ ((ret & (1 << 12)) >> 6) |
((ret & 0xf) << 1) | 1;
}
env->cp15.c7_par_hi = 0;
}
- return 0;
}
#endif
@@ -1099,7 +1140,8 @@ static const ARMCPRegInfo vapa_cp_reginfo[] = {
.writefn = par_write },
#ifndef CONFIG_USER_ONLY
{ .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY,
- .access = PL1_W, .writefn = ats_write, .type = ARM_CP_NO_MIGRATE },
+ .access = PL1_W, .accessfn = ats_access,
+ .writefn = ats_write, .type = ARM_CP_NO_MIGRATE },
#endif
REGINFO_SENTINEL
};
@@ -1134,52 +1176,26 @@ static uint32_t extended_mpu_ap_bits(uint32_t val)
return ret;
}
-static int pmsav5_data_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void pmsav5_data_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
env->cp15.c5_data = extended_mpu_ap_bits(value);
- return 0;
}
-static int pmsav5_data_ap_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+static uint64_t pmsav5_data_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- *value = simple_mpu_ap_bits(env->cp15.c5_data);
- return 0;
+ return simple_mpu_ap_bits(env->cp15.c5_data);
}
-static int pmsav5_insn_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void pmsav5_insn_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
env->cp15.c5_insn = extended_mpu_ap_bits(value);
- return 0;
-}
-
-static int pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
-{
- *value = simple_mpu_ap_bits(env->cp15.c5_insn);
- return 0;
-}
-
-static int arm946_prbs_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
-{
- if (ri->crm >= 8) {
- return EXCP_UDEF;
- }
- *value = env->cp15.c6_region[ri->crm];
- return 0;
}
-static int arm946_prbs_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static uint64_t pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- if (ri->crm >= 8) {
- return EXCP_UDEF;
- }
- env->cp15.c6_region[ri->crm] = value;
- return 0;
+ return simple_mpu_ap_bits(env->cp15.c5_insn);
}
static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
@@ -1204,14 +1220,35 @@ static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
.access = PL1_RW,
.fieldoffset = offsetof(CPUARMState, cp15.c2_insn), .resetvalue = 0, },
/* Protection region base and size registers */
- { .name = "946_PRBS", .cp = 15, .crn = 6, .crm = CP_ANY, .opc1 = 0,
- .opc2 = CP_ANY, .access = PL1_RW,
- .readfn = arm946_prbs_read, .writefn = arm946_prbs_write, },
+ { .name = "946_PRBS0", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0,
+ .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c6_region[0]) },
+ { .name = "946_PRBS1", .cp = 15, .crn = 6, .crm = 1, .opc1 = 0,
+ .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c6_region[1]) },
+ { .name = "946_PRBS2", .cp = 15, .crn = 6, .crm = 2, .opc1 = 0,
+ .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c6_region[2]) },
+ { .name = "946_PRBS3", .cp = 15, .crn = 6, .crm = 3, .opc1 = 0,
+ .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c6_region[3]) },
+ { .name = "946_PRBS4", .cp = 15, .crn = 6, .crm = 4, .opc1 = 0,
+ .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c6_region[4]) },
+ { .name = "946_PRBS5", .cp = 15, .crn = 6, .crm = 5, .opc1 = 0,
+ .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c6_region[5]) },
+ { .name = "946_PRBS6", .cp = 15, .crn = 6, .crm = 6, .opc1 = 0,
+ .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c6_region[6]) },
+ { .name = "946_PRBS7", .cp = 15, .crn = 6, .crm = 7, .opc1 = 0,
+ .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c6_region[7]) },
REGINFO_SENTINEL
};
-static int vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
int maskshift = extract32(value, 0, 3);
@@ -1228,11 +1265,10 @@ static int vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
env->cp15.c2_control = value;
env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> maskshift);
env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> maskshift);
- return 0;
}
-static int vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
if (arm_feature(env, ARM_FEATURE_LPAE)) {
/* With LPAE the TTBCR could result in a change of ASID
@@ -1240,7 +1276,7 @@ static int vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
*/
tlb_flush(env, 1);
}
- return vmsa_ttbcr_raw_write(env, ri, value);
+ vmsa_ttbcr_raw_write(env, ri, value);
}
static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -1250,6 +1286,26 @@ static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
env->cp15.c2_mask = 0;
}
+static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */
+ tlb_flush(env, 1);
+ env->cp15.c2_control = value;
+}
+
+static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* 64 bit accesses to the TTBRs can change the ASID and so we
+ * must flush the TLB.
+ */
+ if (cpreg_field_is_64bit(ri)) {
+ tlb_flush(env, 1);
+ }
+ raw_write(env, ri, value);
+}
+
static const ARMCPRegInfo vmsa_cp_reginfo[] = {
{ .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
.access = PL1_RW,
@@ -1257,56 +1313,59 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
{ .name = "IFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1,
.access = PL1_RW,
.fieldoffset = offsetof(CPUARMState, cp15.c5_insn), .resetvalue = 0, },
- { .name = "TTBR0", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
- .access = PL1_RW,
- .fieldoffset = offsetof(CPUARMState, cp15.c2_base0), .resetvalue = 0, },
- { .name = "TTBR1", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1,
- .access = PL1_RW,
- .fieldoffset = offsetof(CPUARMState, cp15.c2_base1), .resetvalue = 0, },
- { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
- .access = PL1_RW, .writefn = vmsa_ttbcr_write,
- .resetfn = vmsa_ttbcr_reset, .raw_writefn = vmsa_ttbcr_raw_write,
+ { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
+ .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
+ .writefn = vmsa_ttbr_write, .resetvalue = 0 },
+ { .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1,
+ .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el1),
+ .writefn = vmsa_ttbr_write, .resetvalue = 0 },
+ { .name = "TCR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
+ .access = PL1_RW, .writefn = vmsa_tcr_el1_write,
+ .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write,
.fieldoffset = offsetof(CPUARMState, cp15.c2_control) },
+ { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, .writefn = vmsa_ttbcr_write,
+ .resetfn = arm_cp_reset_ignore, .raw_writefn = vmsa_ttbcr_raw_write,
+ .fieldoffset = offsetoflow32(CPUARMState, cp15.c2_control) },
{ .name = "DFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
.access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c6_data),
.resetvalue = 0, },
REGINFO_SENTINEL
};
-static int omap_ticonfig_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void omap_ticonfig_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
env->cp15.c15_ticonfig = value & 0xe7;
/* The OS_TYPE bit in this register changes the reported CPUID! */
env->cp15.c0_cpuid = (value & (1 << 5)) ?
ARM_CPUID_TI915T : ARM_CPUID_TI925T;
- return 0;
}
-static int omap_threadid_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void omap_threadid_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
env->cp15.c15_threadid = value & 0xffff;
- return 0;
}
-static int omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
/* Wait-for-interrupt (deprecated) */
cpu_interrupt(CPU(arm_env_get_cpu(env)), CPU_INTERRUPT_HALT);
- return 0;
}
-static int omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
/* On OMAP there are registers indicating the max/min index of dcache lines
* containing a dirty line; cache flush operations have to reset these.
*/
env->cp15.c15_i_max = 0x000;
env->cp15.c15_i_min = 0xff0;
- return 0;
}
static const ARMCPRegInfo omap_cp_reginfo[] = {
@@ -1348,8 +1407,8 @@ static const ARMCPRegInfo omap_cp_reginfo[] = {
REGINFO_SENTINEL
};
-static int xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
value &= 0x3fff;
if (env->cp15.c15_cpar != value) {
@@ -1357,7 +1416,6 @@ static int xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri,
tb_flush(env);
env->cp15.c15_cpar = value;
}
- return 0;
}
static const ARMCPRegInfo xscale_cp_reginfo[] = {
@@ -1437,12 +1495,12 @@ static const ARMCPRegInfo strongarm_cp_reginfo[] = {
REGINFO_SENTINEL
};
-static int mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
CPUState *cs = CPU(arm_env_get_cpu(env));
uint32_t mpidr = cs->cpu_index;
- /* We don't support setting cluster ID ([8..11])
+ /* We don't support setting cluster ID ([8..11]) (known as Aff1
+ * in later ARM ARM versions), or any of the higher affinity level fields,
* so these bits always RAZ.
*/
if (arm_feature(env, ARM_FEATURE_V7MP)) {
@@ -1453,27 +1511,26 @@ static int mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri,
* not currently model any of those cores.
*/
}
- *value = mpidr;
- return 0;
+ return mpidr;
}
static const ARMCPRegInfo mpidr_cp_reginfo[] = {
- { .name = "MPIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
+ { .name = "MPIDR", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
.access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_MIGRATE },
REGINFO_SENTINEL
};
-static int par64_read(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value)
+static uint64_t par64_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- *value = ((uint64_t)env->cp15.c7_par_hi << 32) | env->cp15.c7_par;
- return 0;
+ return ((uint64_t)env->cp15.c7_par_hi << 32) | env->cp15.c7_par;
}
-static int par64_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void par64_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
env->cp15.c7_par_hi = value >> 32;
env->cp15.c7_par = value;
- return 0;
}
static void par64_reset(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -1482,63 +1539,15 @@ static void par64_reset(CPUARMState *env, const ARMCPRegInfo *ri)
env->cp15.c7_par = 0;
}
-static int ttbr064_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
-{
- *value = ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0;
- return 0;
-}
-
-static int ttbr064_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- env->cp15.c2_base0_hi = value >> 32;
- env->cp15.c2_base0 = value;
- return 0;
-}
-
-static int ttbr064_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Writes to the 64 bit format TTBRs may change the ASID */
- tlb_flush(env, 1);
- return ttbr064_raw_write(env, ri, value);
-}
-
-static void ttbr064_reset(CPUARMState *env, const ARMCPRegInfo *ri)
-{
- env->cp15.c2_base0_hi = 0;
- env->cp15.c2_base0 = 0;
-}
-
-static int ttbr164_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
-{
- *value = ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1;
- return 0;
-}
-
-static int ttbr164_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- env->cp15.c2_base1_hi = value >> 32;
- env->cp15.c2_base1 = value;
- return 0;
-}
-
-static void ttbr164_reset(CPUARMState *env, const ARMCPRegInfo *ri)
-{
- env->cp15.c2_base1_hi = 0;
- env->cp15.c2_base1 = 0;
-}
-
static const ARMCPRegInfo lpae_cp_reginfo[] = {
/* NOP AMAIR0/1: the override is because these clash with the rather
* broadly specified TLB_LOCKDOWN entry in the generic cp_reginfo.
*/
- { .name = "AMAIR0", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0,
+ { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0,
.access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE,
.resetvalue = 0 },
+ /* AMAIR1 is mapped to AMAIR_EL1[63:32] */
{ .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1,
.access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE,
.resetvalue = 0 },
@@ -1551,41 +1560,72 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = {
.access = PL1_RW, .type = ARM_CP_64BIT,
.readfn = par64_read, .writefn = par64_write, .resetfn = par64_reset },
{ .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0,
- .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = ttbr064_read,
- .writefn = ttbr064_write, .raw_writefn = ttbr064_raw_write,
- .resetfn = ttbr064_reset },
+ .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE,
+ .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
+ .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore },
{ .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1,
- .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = ttbr164_read,
- .writefn = ttbr164_write, .resetfn = ttbr164_reset },
+ .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE,
+ .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el1),
+ .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore },
REGINFO_SENTINEL
};
-static int aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+static uint64_t aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- *value = vfp_get_fpcr(env);
- return 0;
+ return vfp_get_fpcr(env);
}
-static int aa64_fpcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void aa64_fpcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
vfp_set_fpcr(env, value);
- return 0;
}
-static int aa64_fpsr_read(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t *value)
+static uint64_t aa64_fpsr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- *value = vfp_get_fpsr(env);
- return 0;
+ return vfp_get_fpsr(env);
}
-static int aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
vfp_set_fpsr(env, value);
- return 0;
+}
+
+static CPAccessResult aa64_cacheop_access(CPUARMState *env,
+ const ARMCPRegInfo *ri)
+{
+ /* Cache invalidate/clean: NOP, but EL0 must UNDEF unless
+ * SCTLR_EL1.UCI is set.
+ */
+ if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCI)) {
+ return CP_ACCESS_TRAP;
+ }
+ return CP_ACCESS_OK;
+}
+
+static void tlbi_aa64_va_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Invalidate by VA (AArch64 version) */
+ uint64_t pageaddr = value << 12;
+ tlb_flush_page(env, pageaddr);
+}
+
+static void tlbi_aa64_vaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Invalidate by VA, all ASIDs (AArch64 version) */
+ uint64_t pageaddr = value << 12;
+ tlb_flush_page(env, pageaddr);
+}
+
+static void tlbi_aa64_asid_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Invalidate by ASID (AArch64 version) */
+ int asid = extract64(value, 48, 16);
+ tlb_flush(env, asid == 0);
}
static const ARMCPRegInfo v8_cp_reginfo[] = {
@@ -1601,13 +1641,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
{ .name = "FPSR", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
.access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
- /* This claims a 32 byte cacheline size for icache and dcache, VIPT icache.
- * It will eventually need to have a CPU-specified reset value.
- */
- { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
- .access = PL0_R, .type = ARM_CP_CONST,
- .resetvalue = 0x80030003 },
/* Prohibit use of DC ZVA. OPTME: implement DC ZVA and allow its use.
* For system mode the DZP bit here will need to be computed, not constant.
*/
@@ -1615,16 +1648,155 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
.access = PL0_R, .type = ARM_CP_CONST,
.resetvalue = 0x10 },
+ { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2,
+ .access = PL1_R, .type = ARM_CP_CURRENTEL },
+ /* Cache ops: all NOPs since we don't emulate caches */
+ { .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
+ .access = PL1_W, .type = ARM_CP_NOP },
+ { .name = "IC_IALLU", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0,
+ .access = PL1_W, .type = ARM_CP_NOP },
+ { .name = "IC_IVAU", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1,
+ .access = PL0_W, .type = ARM_CP_NOP,
+ .accessfn = aa64_cacheop_access },
+ { .name = "DC_IVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1,
+ .access = PL1_W, .type = ARM_CP_NOP },
+ { .name = "DC_ISW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2,
+ .access = PL1_W, .type = ARM_CP_NOP },
+ { .name = "DC_CVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1,
+ .access = PL0_W, .type = ARM_CP_NOP,
+ .accessfn = aa64_cacheop_access },
+ { .name = "DC_CSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
+ .access = PL1_W, .type = ARM_CP_NOP },
+ { .name = "DC_CVAU", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 11, .opc2 = 1,
+ .access = PL0_W, .type = ARM_CP_NOP,
+ .accessfn = aa64_cacheop_access },
+ { .name = "DC_CIVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 1,
+ .access = PL0_W, .type = ARM_CP_NOP,
+ .accessfn = aa64_cacheop_access },
+ { .name = "DC_CISW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2,
+ .access = PL1_W, .type = ARM_CP_NOP },
+ /* TLBI operations */
+ { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 0,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbiall_write },
+ { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 1,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbi_aa64_va_write },
+ { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 2,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbi_aa64_asid_write },
+ { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 3,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbi_aa64_vaa_write },
+ { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 5,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbi_aa64_va_write },
+ { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 7,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbi_aa64_vaa_write },
+ { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 0,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbiall_write },
+ { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 1,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbi_aa64_va_write },
+ { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 2,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbi_aa64_asid_write },
+ { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 3,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbi_aa64_vaa_write },
+ { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 5,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbi_aa64_va_write },
+ { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 7,
+ .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+ .writefn = tlbi_aa64_vaa_write },
+ /* Dummy implementation of monitor debug system control register:
+ * we don't support debug.
+ */
+ { .name = "MDSCR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ /* We define a dummy WI OSLAR_EL1, because Linux writes to it. */
+ { .name = "OSLAR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4,
+ .access = PL1_W, .type = ARM_CP_NOP },
REGINFO_SENTINEL
};
-static int sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
env->cp15.c1_sys = value;
/* ??? Lots of these bits are not implemented. */
/* This may enable/disable the MMU, so do a TLB flush. */
tlb_flush(env, 1);
- return 0;
+}
+
+static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ /* Only accessible in EL0 if SCTLR.UCT is set (and only in AArch64,
+ * but the AArch32 CTR has its own reginfo struct)
+ */
+ if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCT)) {
+ return CP_ACCESS_TRAP;
+ }
+ return CP_ACCESS_OK;
+}
+
+static void define_aarch64_debug_regs(ARMCPU *cpu)
+{
+ /* Define breakpoint and watchpoint registers. These do nothing
+ * but read as written, for now.
+ */
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ ARMCPRegInfo dbgregs[] = {
+ { .name = "DBGBVR", .state = ARM_CP_STATE_AA64,
+ .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4,
+ .access = PL1_RW,
+ .fieldoffset = offsetof(CPUARMState, cp15.dbgbvr[i]) },
+ { .name = "DBGBCR", .state = ARM_CP_STATE_AA64,
+ .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5,
+ .access = PL1_RW,
+ .fieldoffset = offsetof(CPUARMState, cp15.dbgbcr[i]) },
+ { .name = "DBGWVR", .state = ARM_CP_STATE_AA64,
+ .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6,
+ .access = PL1_RW,
+ .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]) },
+ { .name = "DBGWCR", .state = ARM_CP_STATE_AA64,
+ .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7,
+ .access = PL1_RW,
+ .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]) },
+ REGINFO_SENTINEL
+ };
+ define_arm_cp_regs(cpu, dbgregs);
+ }
}
void register_cp_regs_for_features(ARMCPU *cpu)
@@ -1707,11 +1879,12 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
.access = PL0_RW, .resetvalue = cpu->midr & 0xff000000,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
- .readfn = pmreg_read, .writefn = pmcr_write,
- .raw_readfn = raw_read, .raw_writefn = raw_write,
+ .accessfn = pmreg_access, .writefn = pmcr_write,
+ .raw_writefn = raw_write,
};
ARMCPRegInfo clidr = {
- .name = "CLIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1,
+ .name = "CLIDR", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->clidr
};
define_one_arm_cp_reg(cpu, &pmcr);
@@ -1721,7 +1894,53 @@ void register_cp_regs_for_features(ARMCPU *cpu)
define_arm_cp_regs(cpu, not_v7_cp_reginfo);
}
if (arm_feature(env, ARM_FEATURE_V8)) {
+ /* AArch64 ID registers, which all have impdef reset values */
+ ARMCPRegInfo v8_idregs[] = {
+ { .name = "ID_AA64PFR0_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 0,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->id_aa64pfr0 },
+ { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->id_aa64pfr1},
+ { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->id_aa64dfr0 },
+ { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->id_aa64dfr1 },
+ { .name = "ID_AA64AFR0_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 4,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->id_aa64afr0 },
+ { .name = "ID_AA64AFR1_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 5,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->id_aa64afr1 },
+ { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->id_aa64isar0 },
+ { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->id_aa64isar1 },
+ { .name = "ID_AA64MMFR0_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->id_aa64mmfr0 },
+ { .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->id_aa64mmfr1 },
+ REGINFO_SENTINEL
+ };
+ define_arm_cp_regs(cpu, v8_idregs);
define_arm_cp_regs(cpu, v8_cp_reginfo);
+ define_aarch64_debug_regs(cpu);
}
if (arm_feature(env, ARM_FEATURE_MPU)) {
/* These are the MPU registers prior to PMSAv6. Any new
@@ -1787,9 +2006,16 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.writefn = arm_cp_write_ignore, .raw_writefn = raw_write,
.fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
.type = ARM_CP_OVERRIDE },
+ { .name = "MIDR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .opc2 = 0, .crn = 0, .crm = 0,
+ .access = PL1_R, .resetvalue = cpu->midr, .type = ARM_CP_CONST },
{ .name = "CTR",
.cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
+ { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
+ .access = PL0_R, .accessfn = ctr_el0_access,
+ .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
{ .name = "TCMTR",
.cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
@@ -1860,7 +2086,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
/* Generic registers whose values depend on the implementation */
{
ARMCPRegInfo sctlr = {
- .name = "SCTLR", .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
+ .name = "SCTLR", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
.access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_sys),
.writefn = sctlr_write, .resetvalue = cpu->reset_sctlr,
.raw_writefn = raw_write,
@@ -2039,6 +2266,10 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
if (opaque) {
r2->opaque = opaque;
}
+ /* reginfo passed to helpers is correct for the actual access,
+ * and is never ARM_CP_STATE_BOTH:
+ */
+ r2->state = state;
/* Make sure reginfo passed to helpers for wildcarded regs
* has the correct crm/opc1/opc2 for this reg, not CP_ANY:
*/
@@ -2202,17 +2433,15 @@ const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp)
return g_hash_table_lookup(cpregs, &encoded_cp);
}
-int arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
/* Helper coprocessor write function for write-ignore registers */
- return 0;
}
-int arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value)
+uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri)
{
/* Helper coprocessor write function for read-as-zero registers */
- *value = 0;
return 0;
}
@@ -2249,7 +2478,7 @@ uint32_t cpsr_read(CPUARMState *env)
(env->CF << 29) | ((env->VF & 0x80000000) >> 3) | (env->QF << 27)
| (env->thumb << 5) | ((env->condexec_bits & 3) << 25)
| ((env->condexec_bits & 0xfc) << 8)
- | (env->GE << 16);
+ | (env->GE << 16) | env->daif;
}
void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
@@ -2276,6 +2505,9 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
env->GE = (val >> 16) & 0xf;
}
+ env->daif &= ~(CPSR_AIF & mask);
+ env->daif |= val & CPSR_AIF & mask;
+
if ((env->uncached_cpsr ^ val) & mask & CPSR_M) {
if (bad_mode_switch(env, val & CPSR_M)) {
/* Attempt to switch to an invalid mode: this is UNPREDICTABLE.
@@ -2449,14 +2681,16 @@ void switch_mode(CPUARMState *env, int mode)
static void v7m_push(CPUARMState *env, uint32_t val)
{
+ CPUState *cs = ENV_GET_CPU(env);
env->regs[13] -= 4;
- stl_phys(env->regs[13], val);
+ stl_phys(cs->as, env->regs[13], val);
}
static uint32_t v7m_pop(CPUARMState *env)
{
+ CPUState *cs = ENV_GET_CPU(env);
uint32_t val;
- val = ldl_phys(env->regs[13]);
+ val = ldl_phys(cs->as, env->regs[13]);
env->regs[13] += 4;
return val;
}
@@ -2611,7 +2845,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
/* Clear IT bits */
env->condexec_bits = 0;
env->regs[14] = lr;
- addr = ldl_phys(env->v7m.vecbase + env->v7m.exception * 4);
+ addr = ldl_phys(cs->as, env->v7m.vecbase + env->v7m.exception * 4);
env->regs[15] = addr & 0xfffffffe;
env->thumb = addr & 1;
}
@@ -2716,7 +2950,7 @@ void arm_cpu_do_interrupt(CPUState *cs)
return; /* Never happens. Keep compiler happy. */
}
/* High vectors. */
- if (env->cp15.c1_sys & (1 << 13)) {
+ if (env->cp15.c1_sys & SCTLR_V) {
/* when enabled, base address cannot be remapped. */
addr += 0xffff0000;
} else {
@@ -2735,11 +2969,11 @@ void arm_cpu_do_interrupt(CPUState *cs)
env->condexec_bits = 0;
/* Switch to the new mode, and to the correct instruction set. */
env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode;
- env->uncached_cpsr |= mask;
+ env->daif |= mask;
/* this is a lie, as the was no c1_sys on V4T/V5, but who cares
* and we should just guard the thumb mode on V4 */
if (arm_feature(env, ARM_FEATURE_V4T)) {
- env->thumb = (env->cp15.c1_sys & (1 << 30)) != 0;
+ env->thumb = (env->cp15.c1_sys & SCTLR_TE) != 0;
}
env->regs[14] = env->regs[15] + offset;
env->regs[15] = addr;
@@ -2765,12 +2999,15 @@ static inline int check_ap(CPUARMState *env, int ap, int domain_prot,
switch (ap) {
case 0:
+ if (arm_feature(env, ARM_FEATURE_V7)) {
+ return 0;
+ }
if (access_type == 1)
return 0;
- switch ((env->cp15.c1_sys >> 8) & 3) {
- case 1:
+ switch (env->cp15.c1_sys & (SCTLR_S | SCTLR_R)) {
+ case SCTLR_S:
return is_user ? 0 : PAGE_READ;
- case 2:
+ case SCTLR_R:
return PAGE_READ;
default:
return 0;
@@ -2804,9 +3041,9 @@ static uint32_t get_level1_table_address(CPUARMState *env, uint32_t address)
uint32_t table;
if (address & env->cp15.c2_mask)
- table = env->cp15.c2_base1 & 0xffffc000;
+ table = env->cp15.ttbr1_el1 & 0xffffc000;
else
- table = env->cp15.c2_base0 & env->cp15.c2_base_mask;
+ table = env->cp15.ttbr0_el1 & env->cp15.c2_base_mask;
table |= (address >> 18) & 0x3ffc;
return table;
@@ -2816,6 +3053,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type,
int is_user, hwaddr *phys_ptr,
int *prot, target_ulong *page_size)
{
+ CPUState *cs = ENV_GET_CPU(env);
int code;
uint32_t table;
uint32_t desc;
@@ -2828,7 +3066,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type,
/* Pagetable walk. */
/* Lookup l1 descriptor. */
table = get_level1_table_address(env, address);
- desc = ldl_phys(table);
+ desc = ldl_phys(cs->as, table);
type = (desc & 3);
domain = (desc >> 5) & 0x0f;
domain_prot = (env->cp15.c3 >> (domain * 2)) & 3;
@@ -2859,7 +3097,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type,
/* Fine pagetable. */
table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
}
- desc = ldl_phys(table);
+ desc = ldl_phys(cs->as, table);
switch (desc & 3) {
case 0: /* Page translation fault. */
code = 7;
@@ -2871,7 +3109,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type,
break;
case 2: /* 4k page. */
phys_addr = (desc & 0xfffff000) | (address & 0xfff);
- ap = (desc >> (4 + ((address >> 13) & 6))) & 3;
+ ap = (desc >> (4 + ((address >> 9) & 6))) & 3;
*page_size = 0x1000;
break;
case 3: /* 1k page. */
@@ -2911,6 +3149,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
int is_user, hwaddr *phys_ptr,
int *prot, target_ulong *page_size)
{
+ CPUState *cs = ENV_GET_CPU(env);
int code;
uint32_t table;
uint32_t desc;
@@ -2925,7 +3164,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
/* Pagetable walk. */
/* Lookup l1 descriptor. */
table = get_level1_table_address(env, address);
- desc = ldl_phys(table);
+ desc = ldl_phys(cs->as, table);
type = (desc & 3);
if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) {
/* Section translation fault, or attempt to use the encoding
@@ -2967,7 +3206,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
}
/* Lookup l2 entry. */
table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
- desc = ldl_phys(table);
+ desc = ldl_phys(cs->as, table);
ap = ((desc >> 4) & 3) | ((desc >> 7) & 4);
switch (desc & 3) {
case 0: /* Page translation fault. */
@@ -2999,7 +3238,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
goto do_fault;
/* The simplified model uses AP[0] as an access control bit. */
- if ((env->cp15.c1_sys & (1 << 29)) && (ap & 1) == 0) {
+ if ((env->cp15.c1_sys & SCTLR_AFE) && (ap & 1) == 0) {
/* Access flag fault. */
code = (code == 15) ? 6 : 3;
goto do_fault;
@@ -3033,6 +3272,7 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
hwaddr *phys_ptr, int *prot,
target_ulong *page_size_ptr)
{
+ CPUState *cs = ENV_GET_CPU(env);
/* Read an LPAE long-descriptor translation table. */
MMUFaultType fault_type = translation_fault;
uint32_t level = 1;
@@ -3079,11 +3319,11 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
* we will always flush the TLB any time the ASID is changed).
*/
if (ttbr_select == 0) {
- ttbr = ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0;
+ ttbr = env->cp15.ttbr0_el1;
epd = extract32(env->cp15.c2_control, 7, 1);
tsz = t0sz;
} else {
- ttbr = ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1;
+ ttbr = env->cp15.ttbr1_el1;
epd = extract32(env->cp15.c2_control, 23, 1);
tsz = t1sz;
}
@@ -3121,7 +3361,7 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
uint64_t descriptor;
descaddr |= ((address >> (9 * (4 - level))) & 0xff8);
- descriptor = ldq_phys(descaddr);
+ descriptor = ldq_phys(cs->as, descaddr);
if (!(descriptor & 1) ||
(!(descriptor & 2) && (level == 3))) {
/* Invalid, or the Reserved level 3 encoding */
@@ -3290,7 +3530,7 @@ static inline int get_phys_addr(CPUARMState *env, uint32_t address,
if (address < 0x02000000)
address += env->cp15.c13_fcse;
- if ((env->cp15.c1_sys & 1) == 0) {
+ if ((env->cp15.c1_sys & SCTLR_M) == 0) {
/* MMU/MPU disabled. */
*phys_ptr = address;
*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
@@ -3303,7 +3543,7 @@ static inline int get_phys_addr(CPUARMState *env, uint32_t address,
} else if (extended_addresses_enabled(env)) {
return get_phys_addr_lpae(env, address, access_type, is_user, phys_ptr,
prot, page_size);
- } else if (env->cp15.c1_sys & (1 << 23)) {
+ } else if (env->cp15.c1_sys & SCTLR_XP) {
return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr,
prot, page_size);
} else {
@@ -3402,12 +3642,12 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg)
case 9: /* PSP */
return env->v7m.current_sp ? env->regs[13] : env->v7m.other_sp;
case 16: /* PRIMASK */
- return (env->uncached_cpsr & CPSR_I) != 0;
+ return (env->daif & PSTATE_I) != 0;
case 17: /* BASEPRI */
case 18: /* BASEPRI_MAX */
return env->v7m.basepri;
case 19: /* FAULTMASK */
- return (env->uncached_cpsr & CPSR_F) != 0;
+ return (env->daif & PSTATE_F) != 0;
case 20: /* CONTROL */
return env->v7m.control;
default:
@@ -3454,10 +3694,11 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
env->v7m.other_sp = val;
break;
case 16: /* PRIMASK */
- if (val & 1)
- env->uncached_cpsr |= CPSR_I;
- else
- env->uncached_cpsr &= ~CPSR_I;
+ if (val & 1) {
+ env->daif |= PSTATE_I;
+ } else {
+ env->daif &= ~PSTATE_I;
+ }
break;
case 17: /* BASEPRI */
env->v7m.basepri = val & 0xff;
@@ -3468,10 +3709,11 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
env->v7m.basepri = val;
break;
case 19: /* FAULTMASK */
- if (val & 1)
- env->uncached_cpsr |= CPSR_F;
- else
- env->uncached_cpsr &= ~CPSR_F;
+ if (val & 1) {
+ env->daif |= PSTATE_F;
+ } else {
+ env->daif &= ~PSTATE_F;
+ }
break;
case 20: /* CONTROL */
env->v7m.control = val & 3;
@@ -4048,6 +4290,23 @@ uint32_t HELPER(set_rmode)(uint32_t rmode, CPUARMState *env)
return prev_rmode;
}
+/* Set the current fp rounding mode in the standard fp status and return
+ * the old one. This is for NEON instructions that need to change the
+ * rounding mode but wish to use the standard FPSCR values for everything
+ * else. Always set the rounding mode back to the correct value after
+ * modifying it.
+ * The argument is a softfloat float_round_ value.
+ */
+uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
+{
+ float_status *fp_status = &env->vfp.standard_fp_status;
+
+ uint32_t prev_rmode = get_float_rounding_mode(fp_status);
+ set_float_rounding_mode(rmode, fp_status);
+
+ return prev_rmode;
+}
+
/* Half precision conversions. */
static float32 do_fcvt_f16_to_f32(uint32_t a, CPUARMState *env, float_status *s)
{
@@ -4418,3 +4677,68 @@ float64 HELPER(rintd)(float64 x, void *fp_status)
return ret;
}
+
+/* Convert ARM rounding mode to softfloat */
+int arm_rmode_to_sf(int rmode)
+{
+ switch (rmode) {
+ case FPROUNDING_TIEAWAY:
+ rmode = float_round_ties_away;
+ break;
+ case FPROUNDING_ODD:
+ /* FIXME: add support for TIEAWAY and ODD */
+ qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
+ rmode);
+ case FPROUNDING_TIEEVEN:
+ default:
+ rmode = float_round_nearest_even;
+ break;
+ case FPROUNDING_POSINF:
+ rmode = float_round_up;
+ break;
+ case FPROUNDING_NEGINF:
+ rmode = float_round_down;
+ break;
+ case FPROUNDING_ZERO:
+ rmode = float_round_to_zero;
+ break;
+ }
+ return rmode;
+}
+
+static void crc_init_buffer(uint8_t *buf, uint32_t val, uint32_t bytes)
+{
+ memset(buf, 0, 4);
+
+ if (bytes == 1) {
+ buf[0] = val & 0xff;
+ } else if (bytes == 2) {
+ buf[0] = val & 0xff;
+ buf[1] = (val >> 8) & 0xff;
+ } else {
+ buf[0] = val & 0xff;
+ buf[1] = (val >> 8) & 0xff;
+ buf[2] = (val >> 16) & 0xff;
+ buf[3] = (val >> 24) & 0xff;
+ }
+}
+
+uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
+{
+ uint8_t buf[4];
+
+ crc_init_buffer(buf, val, bytes);
+
+ /* zlib crc32 converts the accumulator and output to one's complement. */
+ return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
+}
+
+uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
+{
+ uint8_t buf[4];
+
+ crc_init_buffer(buf, val, bytes);
+
+ /* Linux crc32c converts the output to one's complement. */
+ return crc32c(acc, buf, bytes) ^ 0xffffffff;
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 70872dffc6..276f3a9149 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -57,11 +57,14 @@ DEF_HELPER_1(cpsr_read, i32, env)
DEF_HELPER_3(v7m_msr, void, env, i32, i32)
DEF_HELPER_2(v7m_mrs, i32, env, i32)
+DEF_HELPER_2(access_check_cp_reg, void, env, ptr)
DEF_HELPER_3(set_cp_reg, void, env, ptr, i32)
DEF_HELPER_2(get_cp_reg, i32, env, ptr)
DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64)
DEF_HELPER_2(get_cp_reg64, i64, env, ptr)
+DEF_HELPER_3(msr_i_pstate, void, env, i32, i32)
+
DEF_HELPER_2(get_r13_banked, i32, env, i32)
DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
@@ -149,6 +152,7 @@ DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr)
DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, env)
+DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env)
DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
@@ -319,6 +323,7 @@ DEF_HELPER_1(neon_cls_s8, i32, i32)
DEF_HELPER_1(neon_cls_s16, i32, i32)
DEF_HELPER_1(neon_cls_s32, i32, i32)
DEF_HELPER_1(neon_cnt_u8, i32, i32)
+DEF_HELPER_FLAGS_1(neon_rbit_u8, TCG_CALL_NO_RWG_SE, i32, i32)
DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32)
DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32)
@@ -380,6 +385,8 @@ DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr)
DEF_HELPER_3(neon_acge_f32, i32, i32, i32, ptr)
DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, ptr)
+DEF_HELPER_3(neon_acge_f64, i64, i64, i64, ptr)
+DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, ptr)
/* iwmmxt_helper.c */
DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64)
@@ -492,6 +499,9 @@ DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#endif
diff --git a/target-arm/kvm-consts.h b/target-arm/kvm-consts.h
index 0e7f889cba..6009a33f10 100644
--- a/target-arm/kvm-consts.h
+++ b/target-arm/kvm-consts.h
@@ -50,15 +50,29 @@ MISMATCH_CHECK(PSCI_FN_CPU_OFF, KVM_PSCI_FN_CPU_OFF)
MISMATCH_CHECK(PSCI_FN_CPU_ON, KVM_PSCI_FN_CPU_ON)
MISMATCH_CHECK(PSCI_FN_MIGRATE, KVM_PSCI_FN_MIGRATE)
+/* Note that KVM uses overlapping values for AArch32 and AArch64
+ * target CPU numbers. AArch32 targets:
+ */
#define QEMU_KVM_ARM_TARGET_CORTEX_A15 0
+#define QEMU_KVM_ARM_TARGET_CORTEX_A7 1
+
+/* AArch64 targets: */
+#define QEMU_KVM_ARM_TARGET_AEM_V8 0
+#define QEMU_KVM_ARM_TARGET_FOUNDATION_V8 1
+#define QEMU_KVM_ARM_TARGET_CORTEX_A57 2
/* There's no kernel define for this: sentinel value which
* matches no KVM target value for either 64 or 32 bit
*/
#define QEMU_KVM_ARM_TARGET_NONE UINT_MAX
-#ifndef TARGET_AARCH64
+#ifdef TARGET_AARCH64
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_AEM_V8, KVM_ARM_TARGET_AEM_V8)
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8)
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57)
+#else
MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A15, KVM_ARM_TARGET_CORTEX_A15)
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A7, KVM_ARM_TARGET_CORTEX_A7)
#endif
#define CP_REG_ARM64 0x6000000000000000ULL
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 1d2688dda7..39202d7eea 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -165,8 +165,10 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
*/
typedef struct KVMDevice {
struct kvm_arm_device_addr kda;
+ struct kvm_device_attr kdattr;
MemoryRegion *mr;
QSLIST_ENTRY(KVMDevice) entries;
+ int dev_fd;
} KVMDevice;
static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head;
@@ -200,6 +202,29 @@ static MemoryListener devlistener = {
.region_del = kvm_arm_devlistener_del,
};
+static void kvm_arm_set_device_addr(KVMDevice *kd)
+{
+ struct kvm_device_attr *attr = &kd->kdattr;
+ int ret;
+
+ /* If the device control API is available and we have a device fd on the
+ * KVMDevice struct, let's use the newer API
+ */
+ if (kd->dev_fd >= 0) {
+ uint64_t addr = kd->kda.addr;
+ attr->addr = (uintptr_t)&addr;
+ ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
+ } else {
+ ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
+ }
+
+ if (ret < 0) {
+ fprintf(stderr, "Failed to set device address: %s\n",
+ strerror(-ret));
+ abort();
+ }
+}
+
static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
{
KVMDevice *kd, *tkd;
@@ -207,12 +232,7 @@ static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
memory_listener_unregister(&devlistener);
QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
if (kd->kda.addr != -1) {
- if (kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR,
- &kd->kda) < 0) {
- fprintf(stderr, "KVM_ARM_SET_DEVICE_ADDRESS failed: %s\n",
- strerror(errno));
- abort();
- }
+ kvm_arm_set_device_addr(kd);
}
memory_region_unref(kd->mr);
g_free(kd);
@@ -223,7 +243,8 @@ static Notifier notify = {
.notify = kvm_arm_machine_init_done,
};
-void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid)
+void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
+ uint64_t attr, int dev_fd)
{
KVMDevice *kd;
@@ -239,6 +260,10 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid)
kd->mr = mr;
kd->kda.id = devid;
kd->kda.addr = -1;
+ kd->kdattr.flags = 0;
+ kd->kdattr.group = group;
+ kd->kdattr.attr = attr;
+ kd->dev_fd = dev_fd;
QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
memory_region_ref(kd->mr);
}
@@ -389,3 +414,19 @@ void kvm_arch_remove_all_hw_breakpoints(void)
void kvm_arch_init_irq_routing(KVMState *s)
{
}
+
+int kvm_arch_irqchip_create(KVMState *s)
+{
+ int ret;
+
+ /* If we can create the VGIC using the newer device control API, we
+ * let the device do this when it initializes itself, otherwise we
+ * fall back to the old API */
+
+ ret = kvm_create_device(s, KVM_DEV_TYPE_ARM_VGIC_V2, true);
+ if (ret == 0) {
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index cd3d13ca2d..137c5671e9 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -18,16 +18,21 @@
* kvm_arm_register_device:
* @mr: memory region for this device
* @devid: the KVM device ID
+ * @group: device control API group for setting addresses
+ * @attr: device control API address type
+ * @dev_fd: device control device file descriptor (or -1 if not supported)
*
* Remember the memory region @mr, and when it is mapped by the
* machine model, tell the kernel that base address using the
- * KVM_SET_DEVICE_ADDRESS ioctl. @devid should be the ID of
- * the device as defined by KVM_SET_DEVICE_ADDRESS.
- * The machine model may map and unmap the device multiple times;
- * the kernel will only be told the final address at the point
- * where machine init is complete.
+ * KVM_ARM_SET_DEVICE_ADDRESS ioctl or the newer device control API. @devid
+ * should be the ID of the device as defined by KVM_ARM_SET_DEVICE_ADDRESS or
+ * the arm-vgic device in the device control API.
+ * The machine model may map
+ * and unmap the device multiple times; the kernel will only be told the final
+ * address at the point where machine init is complete.
*/
-void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid);
+void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
+ uint64_t attr, int dev_fd);
/**
* write_list_to_kvmstate:
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index be6fbd997e..13752baf63 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -1133,6 +1133,18 @@ uint32_t HELPER(neon_cnt_u8)(uint32_t x)
return x;
}
+/* Reverse bits in each 8 bit word */
+uint32_t HELPER(neon_rbit_u8)(uint32_t x)
+{
+ x = ((x & 0xf0f0f0f0) >> 4)
+ | ((x & 0x0f0f0f0f) << 4);
+ x = ((x & 0x88888888) >> 3)
+ | ((x & 0x44444444) >> 1)
+ | ((x & 0x22222222) << 1)
+ | ((x & 0x11111111) << 3);
+ return x;
+}
+
#define NEON_QDMULH16(dest, src1, src2, round) do { \
uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \
if ((tmp ^ (tmp << 1)) & SIGNBIT) { \
@@ -1811,6 +1823,22 @@ uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp)
return -float32_lt(f1, f0, fpst);
}
+uint64_t HELPER(neon_acge_f64)(uint64_t a, uint64_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float64 f0 = float64_abs(make_float64(a));
+ float64 f1 = float64_abs(make_float64(b));
+ return -float64_le(f1, f0, fpst);
+}
+
+uint64_t HELPER(neon_acgt_f64)(uint64_t a, uint64_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float64 f0 = float64_abs(make_float64(a));
+ float64 f1 = float64_abs(make_float64(b));
+ return -float64_lt(f1, f0, fpst);
+}
+
#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1))
void HELPER(neon_qunzip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index a918e5b27a..7d06d2f9a5 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -273,44 +273,75 @@ void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val)
}
}
-void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value)
+void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip)
{
const ARMCPRegInfo *ri = rip;
- int excp = ri->writefn(env, ri, value);
- if (excp) {
- raise_exception(env, excp);
+ switch (ri->accessfn(env, ri)) {
+ case CP_ACCESS_OK:
+ return;
+ case CP_ACCESS_TRAP:
+ case CP_ACCESS_TRAP_UNCATEGORIZED:
+ /* These cases will eventually need to generate different
+ * syndrome information.
+ */
+ break;
+ default:
+ g_assert_not_reached();
}
+ raise_exception(env, EXCP_UDEF);
+}
+
+void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value)
+{
+ const ARMCPRegInfo *ri = rip;
+
+ ri->writefn(env, ri, value);
}
uint32_t HELPER(get_cp_reg)(CPUARMState *env, void *rip)
{
const ARMCPRegInfo *ri = rip;
- uint64_t value;
- int excp = ri->readfn(env, ri, &value);
- if (excp) {
- raise_exception(env, excp);
- }
- return value;
+
+ return ri->readfn(env, ri);
}
void HELPER(set_cp_reg64)(CPUARMState *env, void *rip, uint64_t value)
{
const ARMCPRegInfo *ri = rip;
- int excp = ri->writefn(env, ri, value);
- if (excp) {
- raise_exception(env, excp);
- }
+
+ ri->writefn(env, ri, value);
}
uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
{
const ARMCPRegInfo *ri = rip;
- uint64_t value;
- int excp = ri->readfn(env, ri, &value);
- if (excp) {
- raise_exception(env, excp);
+
+ return ri->readfn(env, ri);
+}
+
+void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm)
+{
+ /* MSR_i to update PSTATE. This is OK from EL0 only if UMA is set.
+ * Note that SPSel is never OK from EL0; we rely on handle_msr_i()
+ * to catch that case at translate time.
+ */
+ if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
+ raise_exception(env, EXCP_UDEF);
+ }
+
+ switch (op) {
+ case 0x05: /* SPSel */
+ env->pstate = deposit32(env->pstate, 0, 1, imm);
+ break;
+ case 0x1e: /* DAIFSet */
+ env->daif |= (imm << 6) & PSTATE_DAIF;
+ break;
+ case 0x1f: /* DAIFClear */
+ env->daif &= ~((imm << 6) & PSTATE_DAIF);
+ break;
+ default:
+ g_assert_not_reached();
}
- return value;
}
/* ??? Flag setting arithmetic is awkward because we need to do comparisons.
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index cf80c46b90..08ac6591b6 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -61,6 +61,27 @@ enum a64_shift_type {
A64_SHIFT_TYPE_ROR = 3
};
+/* Table based decoder typedefs - used when the relevant bits for decode
+ * are too awkwardly scattered across the instruction (eg SIMD).
+ */
+typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
+
+typedef struct AArch64DecodeTable {
+ uint32_t pattern;
+ uint32_t mask;
+ AArch64DecodeFn *disas_fn;
+} AArch64DecodeTable;
+
+/* Function prototype for gen_ functions for calling Neon helpers */
+typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
+typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
+typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
+typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
+typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
+
/* initialize TCG globals. */
void a64_translate_init(void)
{
@@ -308,6 +329,28 @@ static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
return v;
}
+/* Return the offset into CPUARMState of an element of specified
+ * size, 'element' places in from the least significant end of
+ * the FP/vector register Qn.
+ */
+static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
+{
+ int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
+#ifdef HOST_WORDS_BIGENDIAN
+ /* This is complicated slightly because vfp.regs[2n] is
+ * still the low half and vfp.regs[2n+1] the high half
+ * of the 128 bit vector, even on big endian systems.
+ * Calculate the offset assuming a fully bigendian 128 bits,
+ * then XOR to account for the order of the two 64 bit halves.
+ */
+ offs += (16 - ((element + 1) * (1 << size)));
+ offs ^= 8;
+#else
+ offs += element * (1 << size);
+#endif
+ return offs;
+}
+
/* Return the offset into CPUARMState of a slice (from
* the least significant end) of FP register Qn (ie
* Dn, Sn, Hn or Bn).
@@ -575,20 +618,26 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
*/
/*
- * Store from GPR register to memory
+ * Store from GPR register to memory.
*/
+static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
+ TCGv_i64 tcg_addr, int size, int memidx)
+{
+ g_assert(size <= 3);
+ tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
+}
+
static void do_gpr_st(DisasContext *s, TCGv_i64 source,
TCGv_i64 tcg_addr, int size)
{
- g_assert(size <= 3);
- tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size);
+ do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
}
/*
* Load from memory to GPR register
*/
-static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
- int size, bool is_signed, bool extend)
+static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+ int size, bool is_signed, bool extend, int memidx)
{
TCGMemOp memop = MO_TE + size;
@@ -598,7 +647,7 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
memop += MO_SIGN;
}
- tcg_gen_qemu_ld_i64(dest, tcg_addr, get_mem_index(s), memop);
+ tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
if (extend && is_signed) {
g_assert(size < 3);
@@ -606,6 +655,13 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
}
}
+static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+ int size, bool is_signed, bool extend)
+{
+ do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
+ get_mem_index(s));
+}
+
/*
* Store from FP register to memory
*/
@@ -661,6 +717,156 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
}
/*
+ * Vector load/store helpers.
+ *
+ * The principal difference between this and a FP load is that we don't
+ * zero extend as we are filling a partial chunk of the vector register.
+ * These functions don't support 128 bit loads/stores, which would be
+ * normal load/store operations.
+ *
+ * The _i32 versions are useful when operating on 32 bit quantities
+ * (eg for floating point single or using Neon helper functions).
+ */
+
+/* Get value of an element within a vector register */
+static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
+ int element, TCGMemOp memop)
+{
+ int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
+ switch (memop) {
+ case MO_8:
+ tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
+ break;
+ case MO_16:
+ tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
+ break;
+ case MO_32:
+ tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
+ break;
+ case MO_8|MO_SIGN:
+ tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
+ break;
+ case MO_16|MO_SIGN:
+ tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
+ break;
+ case MO_32|MO_SIGN:
+ tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
+ break;
+ case MO_64:
+ case MO_64|MO_SIGN:
+ tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
+ int element, TCGMemOp memop)
+{
+ int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
+ switch (memop) {
+ case MO_8:
+ tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
+ break;
+ case MO_16:
+ tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
+ break;
+ case MO_8|MO_SIGN:
+ tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
+ break;
+ case MO_16|MO_SIGN:
+ tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
+ break;
+ case MO_32:
+ case MO_32|MO_SIGN:
+ tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* Set value of an element within a vector register */
+static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
+ int element, TCGMemOp memop)
+{
+ int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
+ switch (memop) {
+ case MO_8:
+ tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
+ break;
+ case MO_16:
+ tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
+ break;
+ case MO_32:
+ tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
+ break;
+ case MO_64:
+ tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
+ int destidx, int element, TCGMemOp memop)
+{
+ int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
+ switch (memop) {
+ case MO_8:
+ tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
+ break;
+ case MO_16:
+ tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
+ break;
+ case MO_32:
+ tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* Clear the high 64 bits of a 128 bit vector (in general non-quad
+ * vector ops all need to do this).
+ */
+static void clear_vec_high(DisasContext *s, int rd)
+{
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+
+ write_vec_element(s, tcg_zero, rd, 1, MO_64);
+ tcg_temp_free_i64(tcg_zero);
+}
+
+/* Store from vector register to memory */
+static void do_vec_st(DisasContext *s, int srcidx, int element,
+ TCGv_i64 tcg_addr, int size)
+{
+ TCGMemOp memop = MO_TE + size;
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_tmp, srcidx, element, size);
+ tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
+
+ tcg_temp_free_i64(tcg_tmp);
+}
+
+/* Load from memory to vector register */
+static void do_vec_ld(DisasContext *s, int destidx, int element,
+ TCGv_i64 tcg_addr, int size)
+{
+ TCGMemOp memop = MO_TE + size;
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+ tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
+ write_vec_element(s, tcg_tmp, destidx, element, size);
+
+ tcg_temp_free_i64(tcg_tmp);
+}
+
+/*
* This utility function is for doing register extension with an
* optional shift. You will likely want to pass a temporary for the
* destination register. See DecodeRegExtend() in the ARM ARM.
@@ -722,6 +928,31 @@ static inline void gen_check_sp_alignment(DisasContext *s)
}
/*
+ * This provides a simple table based table lookup decoder. It is
+ * intended to be used when the relevant bits for decode are too
+ * awkwardly placed and switch/if based logic would be confusing and
+ * deeply nested. Since it's a linear search through the table, tables
+ * should be kept small.
+ *
+ * It returns the first handler where insn & mask == pattern, or
+ * NULL if there is no match.
+ * The table is terminated by an empty mask (i.e. 0)
+ */
+static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
+ uint32_t insn)
+{
+ const AArch64DecodeTable *tptr = table;
+
+ while (tptr->mask) {
+ if ((insn & tptr->mask) == tptr->pattern) {
+ return tptr->disas_fn;
+ }
+ tptr++;
+ }
+ return NULL;
+}
+
+/*
* the instruction disassembly implemented here matches
* the instruction encoding classifications in chapter 3 (C3)
* of the ARM Architecture Reference Manual (DDI0487A_a)
@@ -849,9 +1080,11 @@ static void handle_hint(DisasContext *s, uint32_t insn,
switch (selector) {
case 0: /* NOP */
return;
+ case 3: /* WFI */
+ s->is_jmp = DISAS_WFI;
+ return;
case 1: /* YIELD */
case 2: /* WFE */
- case 3: /* WFI */
case 4: /* SEV */
case 5: /* SEVL */
/* we treat all as NOP at least for now */
@@ -895,7 +1128,30 @@ static void handle_sync(DisasContext *s, uint32_t insn,
static void handle_msr_i(DisasContext *s, uint32_t insn,
unsigned int op1, unsigned int op2, unsigned int crm)
{
- unsupported_encoding(s, insn);
+ int op = op1 << 3 | op2;
+ switch (op) {
+ case 0x05: /* SPSel */
+ if (s->current_pl == 0) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x1e: /* DAIFSet */
+ case 0x1f: /* DAIFClear */
+ {
+ TCGv_i32 tcg_imm = tcg_const_i32(crm);
+ TCGv_i32 tcg_op = tcg_const_i32(op);
+ gen_a64_set_pc_im(s->pc - 4);
+ gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
+ tcg_temp_free_i32(tcg_imm);
+ tcg_temp_free_i32(tcg_op);
+ s->is_jmp = DISAS_UPDATE;
+ break;
+ }
+ default:
+ unallocated_encoding(s);
+ return;
+ }
}
static void gen_get_nzcv(TCGv_i64 tcg_rt)
@@ -961,7 +1217,12 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
crn, crm, op0, op1, op2));
if (!ri) {
- /* Unknown register */
+ /* Unknown register; this might be a guest error or a QEMU
+ * unimplemented feature.
+ */
+ qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
+ "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
+ isread ? "read" : "write", op0, op1, crn, crm, op2);
unallocated_encoding(s);
return;
}
@@ -972,6 +1233,17 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
return;
}
+ if (ri->accessfn) {
+ /* Emit code to perform further access permissions checks at
+ * runtime; this may result in an exception.
+ */
+ TCGv_ptr tmpptr;
+ gen_a64_set_pc_im(s->pc - 4);
+ tmpptr = tcg_const_ptr(ri);
+ gen_helper_access_check_cp_reg(cpu_env, tmpptr);
+ tcg_temp_free_ptr(tmpptr);
+ }
+
/* Handle special cases first */
switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
case ARM_CP_NOP:
@@ -984,6 +1256,13 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
gen_set_nzcv(tcg_rt);
}
return;
+ case ARM_CP_CURRENTEL:
+ /* Reads as current EL value from pstate, which is
+ * guaranteed to be constant by the tb flags.
+ */
+ tcg_rt = cpu_reg(s, rt);
+ tcg_gen_movi_i64(tcg_rt, s->current_pl << 2);
+ return;
default:
break;
}
@@ -999,7 +1278,6 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
} else if (ri->readfn) {
TCGv_ptr tmpptr;
- gen_a64_set_pc_im(s->pc - 4);
tmpptr = tcg_const_ptr(ri);
gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
tcg_temp_free_ptr(tmpptr);
@@ -1012,7 +1290,6 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
return;
} else if (ri->writefn) {
TCGv_ptr tmpptr;
- gen_a64_set_pc_im(s->pc - 4);
tmpptr = tcg_const_ptr(ri);
gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
tcg_temp_free_ptr(tmpptr);
@@ -1257,12 +1534,68 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
}
#else
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
- TCGv_i64 addr, int size, int is_pair)
+ TCGv_i64 inaddr, int size, int is_pair)
{
- qemu_log_mask(LOG_UNIMP,
- "%s:%d: system mode store_exclusive unsupported "
- "at pc=%016" PRIx64 "\n",
- __FILE__, __LINE__, s->pc - 4);
+ /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
+ * && (!is_pair || env->exclusive_high == [addr + datasize])) {
+ * [addr] = {Rt};
+ * if (is_pair) {
+ * [addr + datasize] = {Rt2};
+ * }
+ * {Rd} = 0;
+ * } else {
+ * {Rd} = 1;
+ * }
+ * env->exclusive_addr = -1;
+ */
+ int fail_label = gen_new_label();
+ int done_label = gen_new_label();
+ TCGv_i64 addr = tcg_temp_local_new_i64();
+ TCGv_i64 tmp;
+
+ /* Copy input into a local temp so it is not trashed when the
+ * basic block ends at the branch insn.
+ */
+ tcg_gen_mov_i64(addr, inaddr);
+ tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
+
+ tmp = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
+ tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
+ tcg_temp_free_i64(tmp);
+
+ if (is_pair) {
+ TCGv_i64 addrhi = tcg_temp_new_i64();
+ TCGv_i64 tmphi = tcg_temp_new_i64();
+
+ tcg_gen_addi_i64(addrhi, addr, 1 << size);
+ tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
+ tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
+
+ tcg_temp_free_i64(tmphi);
+ tcg_temp_free_i64(addrhi);
+ }
+
+ /* We seem to still have the exclusive monitor, so do the store */
+ tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
+ if (is_pair) {
+ TCGv_i64 addrhi = tcg_temp_new_i64();
+
+ tcg_gen_addi_i64(addrhi, addr, 1 << size);
+ tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
+ get_mem_index(s), MO_TE + size);
+ tcg_temp_free_i64(addrhi);
+ }
+
+ tcg_temp_free_i64(addr);
+
+ tcg_gen_movi_i64(cpu_reg(s, rd), 0);
+ tcg_gen_br(done_label);
+ gen_set_label(fail_label);
+ tcg_gen_movi_i64(cpu_reg(s, rd), 1);
+ gen_set_label(done_label);
+ tcg_gen_movi_i64(cpu_exclusive_addr, -1);
+
}
#endif
@@ -1536,6 +1869,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
* +----+-------+---+-----+-----+---+--------+-----+------+------+
*
* idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
+ 10 -> unprivileged
* V = 0 -> non-vector
* size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
* opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
@@ -1551,6 +1885,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
bool is_signed = false;
bool is_store = false;
bool is_extended = false;
+ bool is_unpriv = (idx == 2);
bool is_vector = extract32(insn, 26, 1);
bool post_index;
bool writeback;
@@ -1559,7 +1894,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
if (is_vector) {
size |= (opc & 2) << 1;
- if (size > 4) {
+ if (size > 4 || is_unpriv) {
unallocated_encoding(s);
return;
}
@@ -1567,6 +1902,10 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
} else {
if (size == 3 && opc == 2) {
/* PRFM - prefetch */
+ if (is_unpriv) {
+ unallocated_encoding(s);
+ return;
+ }
return;
}
if (opc == 3 && size > 1) {
@@ -1580,6 +1919,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
switch (idx) {
case 0:
+ case 2:
post_index = false;
writeback = false;
break;
@@ -1591,9 +1931,6 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
post_index = false;
writeback = true;
break;
- case 2:
- g_assert(false);
- break;
}
if (rn == 31) {
@@ -1613,10 +1950,13 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
}
} else {
TCGv_i64 tcg_rt = cpu_reg(s, rt);
+ int memidx = is_unpriv ? 1 : get_mem_index(s);
+
if (is_store) {
- do_gpr_st(s, tcg_rt, tcg_addr, size);
+ do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
} else {
- do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
+ do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
+ is_signed, is_extended, memidx);
}
}
@@ -1796,25 +2136,6 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
}
}
-/* Load/store register (immediate forms) */
-static void disas_ldst_reg_imm(DisasContext *s, uint32_t insn)
-{
- switch (extract32(insn, 10, 2)) {
- case 0: case 1: case 3:
- /* Load/store register (unscaled immediate) */
- /* Load/store immediate pre/post-indexed */
- disas_ldst_reg_imm9(s, insn);
- break;
- case 2:
- /* Load/store register unprivileged */
- unsupported_encoding(s, insn);
- break;
- default:
- unallocated_encoding(s);
- break;
- }
-}
-
/* Load/store register (all forms) */
static void disas_ldst_reg(DisasContext *s, uint32_t insn)
{
@@ -1823,7 +2144,11 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
disas_ldst_reg_roffset(s, insn);
} else {
- disas_ldst_reg_imm(s, insn);
+ /* Load/store register (unscaled immediate)
+ * Load/store immediate pre/post-indexed
+ * Load/store register unprivileged
+ */
+ disas_ldst_reg_imm9(s, insn);
}
break;
case 1:
@@ -1835,16 +2160,278 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
}
}
-/* AdvSIMD load/store multiple structures */
+/* C3.3.1 AdvSIMD load/store multiple structures
+ *
+ * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
+ * +---+---+---------------+---+-------------+--------+------+------+------+
+ * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
+ * +---+---+---------------+---+-------------+--------+------+------+------+
+ *
+ * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
+ *
+ * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
+ * +---+---+---------------+---+---+---------+--------+------+------+------+
+ * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
+ * +---+---+---------------+---+---+---------+--------+------+------+------+
+ *
+ * Rt: first (or only) SIMD&FP register to be transferred
+ * Rn: base address or SP
+ * Rm (post-index only): post-index register (when !31) or size dependent #imm
+ */
static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
{
- unsupported_encoding(s, insn);
+ int rt = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int size = extract32(insn, 10, 2);
+ int opcode = extract32(insn, 12, 4);
+ bool is_store = !extract32(insn, 22, 1);
+ bool is_postidx = extract32(insn, 23, 1);
+ bool is_q = extract32(insn, 30, 1);
+ TCGv_i64 tcg_addr, tcg_rn;
+
+ int ebytes = 1 << size;
+ int elements = (is_q ? 128 : 64) / (8 << size);
+ int rpt; /* num iterations */
+ int selem; /* structure elements */
+ int r;
+
+ if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* From the shared decode logic */
+ switch (opcode) {
+ case 0x0:
+ rpt = 1;
+ selem = 4;
+ break;
+ case 0x2:
+ rpt = 4;
+ selem = 1;
+ break;
+ case 0x4:
+ rpt = 1;
+ selem = 3;
+ break;
+ case 0x6:
+ rpt = 3;
+ selem = 1;
+ break;
+ case 0x7:
+ rpt = 1;
+ selem = 1;
+ break;
+ case 0x8:
+ rpt = 1;
+ selem = 2;
+ break;
+ case 0xa:
+ rpt = 2;
+ selem = 1;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size == 3 && !is_q && selem != 1) {
+ /* reserved */
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ tcg_rn = cpu_reg_sp(s, rn);
+ tcg_addr = tcg_temp_new_i64();
+ tcg_gen_mov_i64(tcg_addr, tcg_rn);
+
+ for (r = 0; r < rpt; r++) {
+ int e;
+ for (e = 0; e < elements; e++) {
+ int tt = (rt + r) % 32;
+ int xs;
+ for (xs = 0; xs < selem; xs++) {
+ if (is_store) {
+ do_vec_st(s, tt, e, tcg_addr, size);
+ } else {
+ do_vec_ld(s, tt, e, tcg_addr, size);
+
+ /* For non-quad operations, setting a slice of the low
+ * 64 bits of the register clears the high 64 bits (in
+ * the ARM ARM pseudocode this is implicit in the fact
+ * that 'rval' is a 64 bit wide variable). We optimize
+ * by noticing that we only need to do this the first
+ * time we touch a register.
+ */
+ if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
+ clear_vec_high(s, tt);
+ }
+ }
+ tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
+ tt = (tt + 1) % 32;
+ }
+ }
+ }
+
+ if (is_postidx) {
+ int rm = extract32(insn, 16, 5);
+ if (rm == 31) {
+ tcg_gen_mov_i64(tcg_rn, tcg_addr);
+ } else {
+ tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
+ }
+ }
+ tcg_temp_free_i64(tcg_addr);
}
-/* AdvSIMD load/store single structure */
+/* C3.3.3 AdvSIMD load/store single structure
+ *
+ * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
+ * +---+---+---------------+-----+-----------+-----+---+------+------+------+
+ * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt |
+ * +---+---+---------------+-----+-----------+-----+---+------+------+------+
+ *
+ * C3.3.4 AdvSIMD load/store single structure (post-indexed)
+ *
+ * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
+ * +---+---+---------------+-----+-----------+-----+---+------+------+------+
+ * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt |
+ * +---+---+---------------+-----+-----------+-----+---+------+------+------+
+ *
+ * Rt: first (or only) SIMD&FP register to be transferred
+ * Rn: base address or SP
+ * Rm (post-index only): post-index register (when !31) or size dependent #imm
+ * index = encoded in Q:S:size dependent on size
+ *
+ * lane_size = encoded in R, opc
+ * transfer width = encoded in opc, S, size
+ */
static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
{
- unsupported_encoding(s, insn);
+ int rt = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int size = extract32(insn, 10, 2);
+ int S = extract32(insn, 12, 1);
+ int opc = extract32(insn, 13, 3);
+ int R = extract32(insn, 21, 1);
+ int is_load = extract32(insn, 22, 1);
+ int is_postidx = extract32(insn, 23, 1);
+ int is_q = extract32(insn, 30, 1);
+
+ int scale = extract32(opc, 1, 2);
+ int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
+ bool replicate = false;
+ int index = is_q << 3 | S << 2 | size;
+ int ebytes, xs;
+ TCGv_i64 tcg_addr, tcg_rn;
+
+ switch (scale) {
+ case 3:
+ if (!is_load || S) {
+ unallocated_encoding(s);
+ return;
+ }
+ scale = size;
+ replicate = true;
+ break;
+ case 0:
+ break;
+ case 1:
+ if (extract32(size, 0, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+ index >>= 1;
+ break;
+ case 2:
+ if (extract32(size, 1, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+ if (!extract32(size, 0, 1)) {
+ index >>= 2;
+ } else {
+ if (S) {
+ unallocated_encoding(s);
+ return;
+ }
+ index >>= 3;
+ scale = 3;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ ebytes = 1 << scale;
+
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ tcg_rn = cpu_reg_sp(s, rn);
+ tcg_addr = tcg_temp_new_i64();
+ tcg_gen_mov_i64(tcg_addr, tcg_rn);
+
+ for (xs = 0; xs < selem; xs++) {
+ if (replicate) {
+ /* Load and replicate to all elements */
+ uint64_t mulconst;
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+ tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
+ get_mem_index(s), MO_TE + scale);
+ switch (scale) {
+ case 0:
+ mulconst = 0x0101010101010101ULL;
+ break;
+ case 1:
+ mulconst = 0x0001000100010001ULL;
+ break;
+ case 2:
+ mulconst = 0x0000000100000001ULL;
+ break;
+ case 3:
+ mulconst = 0;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (mulconst) {
+ tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
+ }
+ write_vec_element(s, tcg_tmp, rt, 0, MO_64);
+ if (is_q) {
+ write_vec_element(s, tcg_tmp, rt, 1, MO_64);
+ } else {
+ clear_vec_high(s, rt);
+ }
+ tcg_temp_free_i64(tcg_tmp);
+ } else {
+ /* Load/store one element per register */
+ if (is_load) {
+ do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
+ } else {
+ do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
+ }
+ }
+ tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
+ rt = (rt + 1) % 32;
+ }
+
+ if (is_postidx) {
+ int rm = extract32(insn, 16, 5);
+ if (rm == 31) {
+ tcg_gen_mov_i64(tcg_rn, tcg_addr);
+ } else {
+ tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
+ }
+ }
+ tcg_temp_free_i64(tcg_addr);
}
/* C3.3 Loads and stores */
@@ -3186,34 +3773,6 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
}
}
-/* Convert ARM rounding mode to softfloat */
-static inline int arm_rmode_to_sf(int rmode)
-{
- switch (rmode) {
- case FPROUNDING_TIEAWAY:
- rmode = float_round_ties_away;
- break;
- case FPROUNDING_ODD:
- /* FIXME: add support for TIEAWAY and ODD */
- qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
- rmode);
- case FPROUNDING_TIEEVEN:
- default:
- rmode = float_round_nearest_even;
- break;
- case FPROUNDING_POSINF:
- rmode = float_round_up;
- break;
- case FPROUNDING_NEGINF:
- rmode = float_round_down;
- break;
- case FPROUNDING_ZERO:
- rmode = float_round_to_zero;
- break;
- }
- return rmode;
-}
-
static void handle_fp_compare(DisasContext *s, bool is_double,
unsigned int rn, unsigned int rm,
bool cmp_with_zero, bool signal_all_nans)
@@ -4224,13 +4783,4174 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
}
}
+static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
+ int pos)
+{
+ /* Extract 64 bits from the middle of two concatenated 64 bit
+ * vector register slices left:right. The extracted bits start
+ * at 'pos' bits into the right (least significant) side.
+ * We return the result in tcg_right, and guarantee not to
+ * trash tcg_left.
+ */
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+ assert(pos > 0 && pos < 64);
+
+ tcg_gen_shri_i64(tcg_right, tcg_right, pos);
+ tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
+ tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
+
+ tcg_temp_free_i64(tcg_tmp);
+}
+
+/* C3.6.1 EXT
+ * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
+ * +---+---+-------------+-----+---+------+---+------+---+------+------+
+ * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
+ * +---+---+-------------+-----+---+------+---+------+---+------+------+
+ */
+static void disas_simd_ext(DisasContext *s, uint32_t insn)
+{
+ int is_q = extract32(insn, 30, 1);
+ int op2 = extract32(insn, 22, 2);
+ int imm4 = extract32(insn, 11, 4);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ int pos = imm4 << 3;
+ TCGv_i64 tcg_resl, tcg_resh;
+
+ if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ tcg_resh = tcg_temp_new_i64();
+ tcg_resl = tcg_temp_new_i64();
+
+ /* Vd gets bits starting at pos bits into Vm:Vn. This is
+ * either extracting 128 bits from a 128:128 concatenation, or
+ * extracting 64 bits from a 64:64 concatenation.
+ */
+ if (!is_q) {
+ read_vec_element(s, tcg_resl, rn, 0, MO_64);
+ if (pos != 0) {
+ read_vec_element(s, tcg_resh, rm, 0, MO_64);
+ do_ext64(s, tcg_resh, tcg_resl, pos);
+ }
+ tcg_gen_movi_i64(tcg_resh, 0);
+ } else {
+ TCGv_i64 tcg_hh;
+ typedef struct {
+ int reg;
+ int elt;
+ } EltPosns;
+ EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
+ EltPosns *elt = eltposns;
+
+ if (pos >= 64) {
+ elt++;
+ pos -= 64;
+ }
+
+ read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
+ elt++;
+ read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
+ elt++;
+ if (pos != 0) {
+ do_ext64(s, tcg_resh, tcg_resl, pos);
+ tcg_hh = tcg_temp_new_i64();
+ read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
+ do_ext64(s, tcg_hh, tcg_resh, pos);
+ tcg_temp_free_i64(tcg_hh);
+ }
+ }
+
+ write_vec_element(s, tcg_resl, rd, 0, MO_64);
+ tcg_temp_free_i64(tcg_resl);
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ tcg_temp_free_i64(tcg_resh);
+}
+
+/* C3.6.2 TBL/TBX
+ * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
+ * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
+ * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
+ * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
+ */
+static void disas_simd_tb(DisasContext *s, uint32_t insn)
+{
+ int op2 = extract32(insn, 22, 2);
+ int is_q = extract32(insn, 30, 1);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ int is_tblx = extract32(insn, 12, 1);
+ int len = extract32(insn, 13, 2);
+ TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
+ TCGv_i32 tcg_regno, tcg_numregs;
+
+ if (op2 != 0) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* This does a table lookup: for every byte element in the input
+ * we index into a table formed from up to four vector registers,
+ * and then the output is the result of the lookups. Our helper
+ * function does the lookup operation for a single 64 bit part of
+ * the input.
+ */
+ tcg_resl = tcg_temp_new_i64();
+ tcg_resh = tcg_temp_new_i64();
+
+ if (is_tblx) {
+ read_vec_element(s, tcg_resl, rd, 0, MO_64);
+ } else {
+ tcg_gen_movi_i64(tcg_resl, 0);
+ }
+ if (is_tblx && is_q) {
+ read_vec_element(s, tcg_resh, rd, 1, MO_64);
+ } else {
+ tcg_gen_movi_i64(tcg_resh, 0);
+ }
+
+ tcg_idx = tcg_temp_new_i64();
+ tcg_regno = tcg_const_i32(rn);
+ tcg_numregs = tcg_const_i32(len + 1);
+ read_vec_element(s, tcg_idx, rm, 0, MO_64);
+ gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
+ tcg_regno, tcg_numregs);
+ if (is_q) {
+ read_vec_element(s, tcg_idx, rm, 1, MO_64);
+ gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
+ tcg_regno, tcg_numregs);
+ }
+ tcg_temp_free_i64(tcg_idx);
+ tcg_temp_free_i32(tcg_regno);
+ tcg_temp_free_i32(tcg_numregs);
+
+ write_vec_element(s, tcg_resl, rd, 0, MO_64);
+ tcg_temp_free_i64(tcg_resl);
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ tcg_temp_free_i64(tcg_resh);
+}
+
+/* C3.6.3 ZIP/UZP/TRN
+ * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
+ * +---+---+-------------+------+---+------+---+------------------+------+
+ * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
+ * +---+---+-------------+------+---+------+---+------------------+------+
+ */
+static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int rm = extract32(insn, 16, 5);
+ int size = extract32(insn, 22, 2);
+ /* opc field bits [1:0] indicate ZIP/UZP/TRN;
+ * bit 2 indicates 1 vs 2 variant of the insn.
+ */
+ int opcode = extract32(insn, 12, 2);
+ bool part = extract32(insn, 14, 1);
+ bool is_q = extract32(insn, 30, 1);
+ int esize = 8 << size;
+ int i, ofs;
+ int datasize = is_q ? 128 : 64;
+ int elements = datasize / esize;
+ TCGv_i64 tcg_res, tcg_resl, tcg_resh;
+
+ if (opcode == 0 || (size == 3 && !is_q)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ tcg_resl = tcg_const_i64(0);
+ tcg_resh = tcg_const_i64(0);
+ tcg_res = tcg_temp_new_i64();
+
+ for (i = 0; i < elements; i++) {
+ switch (opcode) {
+ case 1: /* UZP1/2 */
+ {
+ int midpoint = elements / 2;
+ if (i < midpoint) {
+ read_vec_element(s, tcg_res, rn, 2 * i + part, size);
+ } else {
+ read_vec_element(s, tcg_res, rm,
+ 2 * (i - midpoint) + part, size);
+ }
+ break;
+ }
+ case 2: /* TRN1/2 */
+ if (i & 1) {
+ read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
+ } else {
+ read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
+ }
+ break;
+ case 3: /* ZIP1/2 */
+ {
+ int base = part * elements / 2;
+ if (i & 1) {
+ read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
+ } else {
+ read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
+ }
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
+
+ ofs = i * esize;
+ if (ofs < 64) {
+ tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
+ tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
+ } else {
+ tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
+ tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
+ }
+ }
+
+ tcg_temp_free_i64(tcg_res);
+
+ write_vec_element(s, tcg_resl, rd, 0, MO_64);
+ tcg_temp_free_i64(tcg_resl);
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ tcg_temp_free_i64(tcg_resh);
+}
+
+static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
+ int opc, bool is_min, TCGv_ptr fpst)
+{
+ /* Helper function for disas_simd_across_lanes: do a single precision
+ * min/max operation on the specified two inputs,
+ * and return the result in tcg_elt1.
+ */
+ if (opc == 0xc) {
+ if (is_min) {
+ gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
+ } else {
+ gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
+ }
+ } else {
+ assert(opc == 0xf);
+ if (is_min) {
+ gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
+ } else {
+ gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
+ }
+ }
+}
+
+/* C3.6.4 AdvSIMD across lanes
+ * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
+ * +---+---+---+-----------+------+-----------+--------+-----+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
+ * +---+---+---+-----------+------+-----------+--------+-----+------+------+
+ */
+static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 5);
+ bool is_q = extract32(insn, 30, 1);
+ bool is_u = extract32(insn, 29, 1);
+ bool is_fp = false;
+ bool is_min = false;
+ int esize;
+ int elements;
+ int i;
+ TCGv_i64 tcg_res, tcg_elt;
+
+ switch (opcode) {
+ case 0x1b: /* ADDV */
+ if (is_u) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x3: /* SADDLV, UADDLV */
+ case 0xa: /* SMAXV, UMAXV */
+ case 0x1a: /* SMINV, UMINV */
+ if (size == 3 || (size == 2 && !is_q)) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0xc: /* FMAXNMV, FMINNMV */
+ case 0xf: /* FMAXV, FMINV */
+ if (!is_u || !is_q || extract32(size, 0, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* Bit 1 of size field encodes min vs max, and actual size is always
+ * 32 bits: adjust the size variable so following code can rely on it
+ */
+ is_min = extract32(size, 1, 1);
+ is_fp = true;
+ size = 2;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ esize = 8 << size;
+ elements = (is_q ? 128 : 64) / esize;
+
+ tcg_res = tcg_temp_new_i64();
+ tcg_elt = tcg_temp_new_i64();
+
+ /* These instructions operate across all lanes of a vector
+ * to produce a single result. We can guarantee that a 64
+ * bit intermediate is sufficient:
+ * + for [US]ADDLV the maximum element size is 32 bits, and
+ * the result type is 64 bits
+ * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
+ * same as the element size, which is 32 bits at most
+ * For the integer operations we can choose to work at 64
+ * or 32 bits and truncate at the end; for simplicity
+ * we use 64 bits always. The floating point
+ * ops do require 32 bit intermediates, though.
+ */
+ if (!is_fp) {
+ read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
+
+ for (i = 1; i < elements; i++) {
+ read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
+
+ switch (opcode) {
+ case 0x03: /* SADDLV / UADDLV */
+ case 0x1b: /* ADDV */
+ tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
+ break;
+ case 0x0a: /* SMAXV / UMAXV */
+ tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
+ tcg_res,
+ tcg_res, tcg_elt, tcg_res, tcg_elt);
+ break;
+ case 0x1a: /* SMINV / UMINV */
+ tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
+ tcg_res,
+ tcg_res, tcg_elt, tcg_res, tcg_elt);
+ break;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ }
+ } else {
+ /* Floating point ops which work on 32 bit (single) intermediates.
+ * Note that correct NaN propagation requires that we do these
+ * operations in exactly the order specified by the pseudocode.
+ */
+ TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
+ TCGv_ptr fpst = get_fpstatus_ptr();
+
+ assert(esize == 32);
+ assert(elements == 4);
+
+ read_vec_element(s, tcg_elt, rn, 0, MO_32);
+ tcg_gen_trunc_i64_i32(tcg_elt1, tcg_elt);
+ read_vec_element(s, tcg_elt, rn, 1, MO_32);
+ tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
+
+ do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
+
+ read_vec_element(s, tcg_elt, rn, 2, MO_32);
+ tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
+ read_vec_element(s, tcg_elt, rn, 3, MO_32);
+ tcg_gen_trunc_i64_i32(tcg_elt3, tcg_elt);
+
+ do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
+
+ do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
+
+ tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
+ tcg_temp_free_i32(tcg_elt1);
+ tcg_temp_free_i32(tcg_elt2);
+ tcg_temp_free_i32(tcg_elt3);
+ tcg_temp_free_ptr(fpst);
+ }
+
+ tcg_temp_free_i64(tcg_elt);
+
+ /* Now truncate the result to the width required for the final output */
+ if (opcode == 0x03) {
+ /* SADDLV, UADDLV: result is 2*esize */
+ size++;
+ }
+
+ switch (size) {
+ case 0:
+ tcg_gen_ext8u_i64(tcg_res, tcg_res);
+ break;
+ case 1:
+ tcg_gen_ext16u_i64(tcg_res, tcg_res);
+ break;
+ case 2:
+ tcg_gen_ext32u_i64(tcg_res, tcg_res);
+ break;
+ case 3:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_dreg(s, rd, tcg_res);
+ tcg_temp_free_i64(tcg_res);
+}
+
+/* C6.3.31 DUP (Element, Vector)
+ *
+ * 31 30 29 21 20 16 15 10 9 5 4 0
+ * +---+---+-------------------+--------+-------------+------+------+
+ * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
+ * +---+---+-------------------+--------+-------------+------+------+
+ *
+ * size: encoded in imm5 (see ARM ARM LowestSetBit())
+ */
+static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
+ int imm5)
+{
+ int size = ctz32(imm5);
+ int esize = 8 << size;
+ int elements = (is_q ? 128 : 64) / esize;
+ int index, i;
+ TCGv_i64 tmp;
+
+ if (size > 3 || (size == 3 && !is_q)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ index = imm5 >> (size + 1);
+
+ tmp = tcg_temp_new_i64();
+ read_vec_element(s, tmp, rn, index, size);
+
+ for (i = 0; i < elements; i++) {
+ write_vec_element(s, tmp, rd, i, size);
+ }
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+
+ tcg_temp_free_i64(tmp);
+}
+
+/* C6.3.31 DUP (element, scalar)
+ * 31 21 20 16 15 10 9 5 4 0
+ * +-----------------------+--------+-------------+------+------+
+ * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
+ * +-----------------------+--------+-------------+------+------+
+ */
+static void handle_simd_dupes(DisasContext *s, int rd, int rn,
+ int imm5)
+{
+ int size = ctz32(imm5);
+ int index;
+ TCGv_i64 tmp;
+
+ if (size > 3) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ index = imm5 >> (size + 1);
+
+ /* This instruction just extracts the specified element and
+ * zero-extends it into the bottom of the destination register.
+ */
+ tmp = tcg_temp_new_i64();
+ read_vec_element(s, tmp, rn, index, size);
+ write_fp_dreg(s, rd, tmp);
+ tcg_temp_free_i64(tmp);
+}
+
+/* C6.3.32 DUP (General)
+ *
+ * 31 30 29 21 20 16 15 10 9 5 4 0
+ * +---+---+-------------------+--------+-------------+------+------+
+ * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
+ * +---+---+-------------------+--------+-------------+------+------+
+ *
+ * size: encoded in imm5 (see ARM ARM LowestSetBit())
+ */
+static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
+ int imm5)
+{
+ int size = ctz32(imm5);
+ int esize = 8 << size;
+ int elements = (is_q ? 128 : 64)/esize;
+ int i = 0;
+
+ if (size > 3 || ((size == 3) && !is_q)) {
+ unallocated_encoding(s);
+ return;
+ }
+ for (i = 0; i < elements; i++) {
+ write_vec_element(s, cpu_reg(s, rn), rd, i, size);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
+/* C6.3.150 INS (Element)
+ *
+ * 31 21 20 16 15 14 11 10 9 5 4 0
+ * +-----------------------+--------+------------+---+------+------+
+ * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
+ * +-----------------------+--------+------------+---+------+------+
+ *
+ * size: encoded in imm5 (see ARM ARM LowestSetBit())
+ * index: encoded in imm5<4:size+1>
+ */
+static void handle_simd_inse(DisasContext *s, int rd, int rn,
+ int imm4, int imm5)
+{
+ int size = ctz32(imm5);
+ int src_index, dst_index;
+ TCGv_i64 tmp;
+
+ if (size > 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ dst_index = extract32(imm5, 1+size, 5);
+ src_index = extract32(imm4, size, 4);
+
+ tmp = tcg_temp_new_i64();
+
+ read_vec_element(s, tmp, rn, src_index, size);
+ write_vec_element(s, tmp, rd, dst_index, size);
+
+ tcg_temp_free_i64(tmp);
+}
+
+
+/* C6.3.151 INS (General)
+ *
+ * 31 21 20 16 15 10 9 5 4 0
+ * +-----------------------+--------+-------------+------+------+
+ * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
+ * +-----------------------+--------+-------------+------+------+
+ *
+ * size: encoded in imm5 (see ARM ARM LowestSetBit())
+ * index: encoded in imm5<4:size+1>
+ */
+static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
+{
+ int size = ctz32(imm5);
+ int idx;
+
+ if (size > 3) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ idx = extract32(imm5, 1 + size, 4 - size);
+ write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
+}
+
+/*
+ * C6.3.321 UMOV (General)
+ * C6.3.237 SMOV (General)
+ *
+ * 31 30 29 21 20 16 15 12 10 9 5 4 0
+ * +---+---+-------------------+--------+-------------+------+------+
+ * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
+ * +---+---+-------------------+--------+-------------+------+------+
+ *
+ * U: unsigned when set
+ * size: encoded in imm5 (see ARM ARM LowestSetBit())
+ */
+static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
+ int rn, int rd, int imm5)
+{
+ int size = ctz32(imm5);
+ int element;
+ TCGv_i64 tcg_rd;
+
+ /* Check for UnallocatedEncodings */
+ if (is_signed) {
+ if (size > 2 || (size == 2 && !is_q)) {
+ unallocated_encoding(s);
+ return;
+ }
+ } else {
+ if (size > 3
+ || (size < 3 && is_q)
+ || (size == 3 && !is_q)) {
+ unallocated_encoding(s);
+ return;
+ }
+ }
+ element = extract32(imm5, 1+size, 4);
+
+ tcg_rd = cpu_reg(s, rd);
+ read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
+ if (is_signed && !is_q) {
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+ }
+}
+
+/* C3.6.5 AdvSIMD copy
+ * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
+ * +---+---+----+-----------------+------+---+------+---+------+------+
+ * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
+ * +---+---+----+-----------------+------+---+------+---+------+------+
+ */
+static void disas_simd_copy(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int imm4 = extract32(insn, 11, 4);
+ int op = extract32(insn, 29, 1);
+ int is_q = extract32(insn, 30, 1);
+ int imm5 = extract32(insn, 16, 5);
+
+ if (op) {
+ if (is_q) {
+ /* INS (element) */
+ handle_simd_inse(s, rd, rn, imm4, imm5);
+ } else {
+ unallocated_encoding(s);
+ }
+ } else {
+ switch (imm4) {
+ case 0:
+ /* DUP (element - vector) */
+ handle_simd_dupe(s, is_q, rd, rn, imm5);
+ break;
+ case 1:
+ /* DUP (general) */
+ handle_simd_dupg(s, is_q, rd, rn, imm5);
+ break;
+ case 3:
+ if (is_q) {
+ /* INS (general) */
+ handle_simd_insg(s, rd, rn, imm5);
+ } else {
+ unallocated_encoding(s);
+ }
+ break;
+ case 5:
+ case 7:
+ /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
+ handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
+ break;
+ default:
+ unallocated_encoding(s);
+ break;
+ }
+ }
+}
+
+/* C3.6.6 AdvSIMD modified immediate
+ * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
+ * +---+---+----+---------------------+-----+-------+----+---+-------+------+
+ * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
+ * +---+---+----+---------------------+-----+-------+----+---+-------+------+
+ *
+ * There are a number of operations that can be carried out here:
+ * MOVI - move (shifted) imm into register
+ * MVNI - move inverted (shifted) imm into register
+ * ORR - bitwise OR of (shifted) imm with register
+ * BIC - bitwise clear of (shifted) imm with register
+ */
+static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int cmode = extract32(insn, 12, 4);
+ int cmode_3_1 = extract32(cmode, 1, 3);
+ int cmode_0 = extract32(cmode, 0, 1);
+ int o2 = extract32(insn, 11, 1);
+ uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
+ bool is_neg = extract32(insn, 29, 1);
+ bool is_q = extract32(insn, 30, 1);
+ uint64_t imm = 0;
+ TCGv_i64 tcg_rd, tcg_imm;
+ int i;
+
+ if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* See AdvSIMDExpandImm() in ARM ARM */
+ switch (cmode_3_1) {
+ case 0: /* Replicate(Zeros(24):imm8, 2) */
+ case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
+ case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
+ case 3: /* Replicate(imm8:Zeros(24), 2) */
+ {
+ int shift = cmode_3_1 * 8;
+ imm = bitfield_replicate(abcdefgh << shift, 32);
+ break;
+ }
+ case 4: /* Replicate(Zeros(8):imm8, 4) */
+ case 5: /* Replicate(imm8:Zeros(8), 4) */
+ {
+ int shift = (cmode_3_1 & 0x1) * 8;
+ imm = bitfield_replicate(abcdefgh << shift, 16);
+ break;
+ }
+ case 6:
+ if (cmode_0) {
+ /* Replicate(Zeros(8):imm8:Ones(16), 2) */
+ imm = (abcdefgh << 16) | 0xffff;
+ } else {
+ /* Replicate(Zeros(16):imm8:Ones(8), 2) */
+ imm = (abcdefgh << 8) | 0xff;
+ }
+ imm = bitfield_replicate(imm, 32);
+ break;
+ case 7:
+ if (!cmode_0 && !is_neg) {
+ imm = bitfield_replicate(abcdefgh, 8);
+ } else if (!cmode_0 && is_neg) {
+ int i;
+ imm = 0;
+ for (i = 0; i < 8; i++) {
+ if ((abcdefgh) & (1 << i)) {
+ imm |= 0xffULL << (i * 8);
+ }
+ }
+ } else if (cmode_0) {
+ if (is_neg) {
+ imm = (abcdefgh & 0x3f) << 48;
+ if (abcdefgh & 0x80) {
+ imm |= 0x8000000000000000ULL;
+ }
+ if (abcdefgh & 0x40) {
+ imm |= 0x3fc0000000000000ULL;
+ } else {
+ imm |= 0x4000000000000000ULL;
+ }
+ } else {
+ imm = (abcdefgh & 0x3f) << 19;
+ if (abcdefgh & 0x80) {
+ imm |= 0x80000000;
+ }
+ if (abcdefgh & 0x40) {
+ imm |= 0x3e000000;
+ } else {
+ imm |= 0x40000000;
+ }
+ imm |= (imm << 32);
+ }
+ }
+ break;
+ }
+
+ if (cmode_3_1 != 7 && is_neg) {
+ imm = ~imm;
+ }
+
+ tcg_imm = tcg_const_i64(imm);
+ tcg_rd = new_tmp_a64(s);
+
+ for (i = 0; i < 2; i++) {
+ int foffs = i ? fp_reg_hi_offset(rd) : fp_reg_offset(rd, MO_64);
+
+ if (i == 1 && !is_q) {
+ /* non-quad ops clear high half of vector */
+ tcg_gen_movi_i64(tcg_rd, 0);
+ } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
+ tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
+ if (is_neg) {
+ /* AND (BIC) */
+ tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
+ } else {
+ /* ORR */
+ tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
+ }
+ } else {
+ /* MOVI */
+ tcg_gen_mov_i64(tcg_rd, tcg_imm);
+ }
+ tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
+ }
+
+ tcg_temp_free_i64(tcg_imm);
+}
+
+/* C3.6.7 AdvSIMD scalar copy
+ * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
+ * +-----+----+-----------------+------+---+------+---+------+------+
+ * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
+ * +-----+----+-----------------+------+---+------+---+------+------+
+ */
+static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int imm4 = extract32(insn, 11, 4);
+ int imm5 = extract32(insn, 16, 5);
+ int op = extract32(insn, 29, 1);
+
+ if (op != 0 || imm4 != 0) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* DUP (element, scalar) */
+ handle_simd_dupes(s, rd, rn, imm5);
+}
+
+/* C3.6.8 AdvSIMD scalar pairwise
+ * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
+ * +-----+---+-----------+------+-----------+--------+-----+------+------+
+ * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
+ * +-----+---+-----------+------+-----------+--------+-----+------+------+
+ */
+static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
+{
+ int u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ TCGv_ptr fpst;
+
+ /* For some ops (the FP ones), size[1] is part of the encoding.
+ * For ADDP strictly it is not but size[1] is always 1 for valid
+ * encodings.
+ */
+ opcode |= (extract32(size, 1, 1) << 5);
+
+ switch (opcode) {
+ case 0x3b: /* ADDP */
+ if (u || size != 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ TCGV_UNUSED_PTR(fpst);
+ break;
+ case 0xc: /* FMAXNMP */
+ case 0xd: /* FADDP */
+ case 0xf: /* FMAXP */
+ case 0x2c: /* FMINNMP */
+ case 0x2f: /* FMINP */
+ /* FP op, size[0] is 32 or 64 bit */
+ if (!u) {
+ unallocated_encoding(s);
+ return;
+ }
+ size = extract32(size, 0, 1) ? 3 : 2;
+ fpst = get_fpstatus_ptr();
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size == 3) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, 0, MO_64);
+ read_vec_element(s, tcg_op2, rn, 1, MO_64);
+
+ switch (opcode) {
+ case 0x3b: /* ADDP */
+ tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
+ break;
+ case 0xc: /* FMAXNMP */
+ gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xd: /* FADDP */
+ gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FMAXP */
+ gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2c: /* FMINNMP */
+ gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2f: /* FMINP */
+ gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_dreg(s, rd, tcg_res);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ tcg_temp_free_i64(tcg_res);
+ } else {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
+ read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
+
+ switch (opcode) {
+ case 0xc: /* FMAXNMP */
+ gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xd: /* FADDP */
+ gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FMAXP */
+ gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2c: /* FMINNMP */
+ gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2f: /* FMINP */
+ gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_sreg(s, rd, tcg_res);
+
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_temp_free_i32(tcg_res);
+ }
+
+ if (!TCGV_IS_UNUSED_PTR(fpst)) {
+ tcg_temp_free_ptr(fpst);
+ }
+}
+
+/*
+ * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
+ *
+ * This code is handles the common shifting code and is used by both
+ * the vector and scalar code.
+ */
+static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
+ TCGv_i64 tcg_rnd, bool accumulate,
+ bool is_u, int size, int shift)
+{
+ bool extended_result = false;
+ bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
+ int ext_lshift = 0;
+ TCGv_i64 tcg_src_hi;
+
+ if (round && size == 3) {
+ extended_result = true;
+ ext_lshift = 64 - shift;
+ tcg_src_hi = tcg_temp_new_i64();
+ } else if (shift == 64) {
+ if (!accumulate && is_u) {
+ /* result is zero */
+ tcg_gen_movi_i64(tcg_res, 0);
+ return;
+ }
+ }
+
+ /* Deal with the rounding step */
+ if (round) {
+ if (extended_result) {
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+ if (!is_u) {
+ /* take care of sign extending tcg_res */
+ tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
+ tcg_gen_add2_i64(tcg_src, tcg_src_hi,
+ tcg_src, tcg_src_hi,
+ tcg_rnd, tcg_zero);
+ } else {
+ tcg_gen_add2_i64(tcg_src, tcg_src_hi,
+ tcg_src, tcg_zero,
+ tcg_rnd, tcg_zero);
+ }
+ tcg_temp_free_i64(tcg_zero);
+ } else {
+ tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
+ }
+ }
+
+ /* Now do the shift right */
+ if (round && extended_result) {
+ /* extended case, >64 bit precision required */
+ if (ext_lshift == 0) {
+ /* special case, only high bits matter */
+ tcg_gen_mov_i64(tcg_src, tcg_src_hi);
+ } else {
+ tcg_gen_shri_i64(tcg_src, tcg_src, shift);
+ tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
+ tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
+ }
+ } else {
+ if (is_u) {
+ if (shift == 64) {
+ /* essentially shifting in 64 zeros */
+ tcg_gen_movi_i64(tcg_src, 0);
+ } else {
+ tcg_gen_shri_i64(tcg_src, tcg_src, shift);
+ }
+ } else {
+ if (shift == 64) {
+ /* effectively extending the sign-bit */
+ tcg_gen_sari_i64(tcg_src, tcg_src, 63);
+ } else {
+ tcg_gen_sari_i64(tcg_src, tcg_src, shift);
+ }
+ }
+ }
+
+ if (accumulate) {
+ tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
+ } else {
+ tcg_gen_mov_i64(tcg_res, tcg_src);
+ }
+
+ if (extended_result) {
+ tcg_temp_free_i64(tcg_src_hi);
+ }
+}
+
+/* Common SHL/SLI - Shift left with an optional insert */
+static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
+ bool insert, int shift)
+{
+ if (insert) { /* SLI */
+ tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
+ } else { /* SHL */
+ tcg_gen_shli_i64(tcg_res, tcg_src, shift);
+ }
+}
+
+/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
+static void handle_scalar_simd_shri(DisasContext *s,
+ bool is_u, int immh, int immb,
+ int opcode, int rn, int rd)
+{
+ const int size = 3;
+ int immhb = immh << 3 | immb;
+ int shift = 2 * (8 << size) - immhb;
+ bool accumulate = false;
+ bool round = false;
+ TCGv_i64 tcg_rn;
+ TCGv_i64 tcg_rd;
+ TCGv_i64 tcg_round;
+
+ if (!extract32(immh, 3, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (opcode) {
+ case 0x02: /* SSRA / USRA (accumulate) */
+ accumulate = true;
+ break;
+ case 0x04: /* SRSHR / URSHR (rounding) */
+ round = true;
+ break;
+ case 0x06: /* SRSRA / URSRA (accum + rounding) */
+ accumulate = round = true;
+ break;
+ }
+
+ if (round) {
+ uint64_t round_const = 1ULL << (shift - 1);
+ tcg_round = tcg_const_i64(round_const);
+ } else {
+ TCGV_UNUSED_I64(tcg_round);
+ }
+
+ tcg_rn = read_fp_dreg(s, rn);
+ tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
+
+ handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+ accumulate, is_u, size, shift);
+
+ write_fp_dreg(s, rd, tcg_rd);
+
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rd);
+ if (round) {
+ tcg_temp_free_i64(tcg_round);
+ }
+}
+
+/* SHL/SLI - Scalar shift left */
+static void handle_scalar_simd_shli(DisasContext *s, bool insert,
+ int immh, int immb, int opcode,
+ int rn, int rd)
+{
+ int size = 32 - clz32(immh) - 1;
+ int immhb = immh << 3 | immb;
+ int shift = immhb - (8 << size);
+ TCGv_i64 tcg_rn = new_tmp_a64(s);
+ TCGv_i64 tcg_rd = new_tmp_a64(s);
+
+ if (!extract32(immh, 3, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ tcg_rn = read_fp_dreg(s, rn);
+ tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
+
+ handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
+
+ write_fp_dreg(s, rd, tcg_rd);
+
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rd);
+}
+
+/* C3.6.9 AdvSIMD scalar shift by immediate
+ * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
+ * +-----+---+-------------+------+------+--------+---+------+------+
+ * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
+ * +-----+---+-------------+------+------+--------+---+------+------+
+ *
+ * This is the scalar version so it works on a fixed sized registers
+ */
+static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 11, 5);
+ int immb = extract32(insn, 16, 3);
+ int immh = extract32(insn, 19, 4);
+ bool is_u = extract32(insn, 29, 1);
+
+ switch (opcode) {
+ case 0x00: /* SSHR / USHR */
+ case 0x02: /* SSRA / USRA */
+ case 0x04: /* SRSHR / URSHR */
+ case 0x06: /* SRSRA / URSRA */
+ handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
+ break;
+ case 0x0a: /* SHL / SLI */
+ handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
+ break;
+ default:
+ unsupported_encoding(s, insn);
+ break;
+ }
+}
+
+/* C3.6.10 AdvSIMD scalar three different
+ * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
+ * +-----+---+-----------+------+---+------+--------+-----+------+------+
+ * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
+ * +-----+---+-----------+------+---+------+--------+-----+------+------+
+ */
+static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
+{
+ bool is_u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 4);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+
+ if (is_u) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (opcode) {
+ case 0x9: /* SQDMLAL, SQDMLAL2 */
+ case 0xb: /* SQDMLSL, SQDMLSL2 */
+ case 0xd: /* SQDMULL, SQDMULL2 */
+ if (size == 0 || size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size == 2) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
+ read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
+
+ tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
+ gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
+
+ switch (opcode) {
+ case 0xd: /* SQDMULL, SQDMULL2 */
+ break;
+ case 0xb: /* SQDMLSL, SQDMLSL2 */
+ tcg_gen_neg_i64(tcg_res, tcg_res);
+ /* fall through */
+ case 0x9: /* SQDMLAL, SQDMLAL2 */
+ read_vec_element(s, tcg_op1, rd, 0, MO_64);
+ gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
+ tcg_res, tcg_op1);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_dreg(s, rd, tcg_res);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ tcg_temp_free_i64(tcg_res);
+ } else {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
+ read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
+
+ gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
+ gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
+
+ switch (opcode) {
+ case 0xd: /* SQDMULL, SQDMULL2 */
+ break;
+ case 0xb: /* SQDMLSL, SQDMLSL2 */
+ gen_helper_neon_negl_u32(tcg_res, tcg_res);
+ /* fall through */
+ case 0x9: /* SQDMLAL, SQDMLAL2 */
+ {
+ TCGv_i64 tcg_op3 = tcg_temp_new_i64();
+ read_vec_element(s, tcg_op3, rd, 0, MO_32);
+ gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
+ tcg_res, tcg_op3);
+ tcg_temp_free_i64(tcg_op3);
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
+
+ tcg_gen_ext32u_i64(tcg_res, tcg_res);
+ write_fp_dreg(s, rd, tcg_res);
+
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_temp_free_i64(tcg_res);
+ }
+}
+
+static void handle_3same_64(DisasContext *s, int opcode, bool u,
+ TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
+{
+ /* Handle 64x64->64 opcodes which are shared between the scalar
+ * and vector 3-same groups. We cover every opcode where size == 3
+ * is valid in either the three-reg-same (integer, not pairwise)
+ * or scalar-three-reg-same groups. (Some opcodes are not yet
+ * implemented.)
+ */
+ TCGCond cond;
+
+ switch (opcode) {
+ case 0x1: /* SQADD */
+ if (u) {
+ gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ }
+ break;
+ case 0x5: /* SQSUB */
+ if (u) {
+ gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ }
+ break;
+ case 0x6: /* CMGT, CMHI */
+ /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
+ * We implement this using setcond (test) and then negating.
+ */
+ cond = u ? TCG_COND_GTU : TCG_COND_GT;
+ do_cmop:
+ tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
+ tcg_gen_neg_i64(tcg_rd, tcg_rd);
+ break;
+ case 0x7: /* CMGE, CMHS */
+ cond = u ? TCG_COND_GEU : TCG_COND_GE;
+ goto do_cmop;
+ case 0x11: /* CMTST, CMEQ */
+ if (u) {
+ cond = TCG_COND_EQ;
+ goto do_cmop;
+ }
+ /* CMTST : test is "if (X & Y != 0)". */
+ tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
+ tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
+ tcg_gen_neg_i64(tcg_rd, tcg_rd);
+ break;
+ case 0x8: /* SSHL, USHL */
+ if (u) {
+ gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
+ }
+ break;
+ case 0x9: /* SQSHL, UQSHL */
+ if (u) {
+ gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ }
+ break;
+ case 0xa: /* SRSHL, URSHL */
+ if (u) {
+ gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
+ }
+ break;
+ case 0xb: /* SQRSHL, UQRSHL */
+ if (u) {
+ gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ }
+ break;
+ case 0x10: /* ADD, SUB */
+ if (u) {
+ tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
+ } else {
+ tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* Handle the 3-same-operands float operations; shared by the scalar
+ * and vector encodings. The caller must filter out any encodings
+ * not allocated for the encoding it is dealing with.
+ */
+static void handle_3same_float(DisasContext *s, int size, int elements,
+ int fpopcode, int rd, int rn, int rm)
+{
+ int pass;
+ TCGv_ptr fpst = get_fpstatus_ptr();
+
+ for (pass = 0; pass < elements; pass++) {
+ if (size) {
+ /* Double */
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+ switch (fpopcode) {
+ case 0x39: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ gen_helper_vfp_negd(tcg_op1, tcg_op1);
+ /* fall through */
+ case 0x19: /* FMLA */
+ read_vec_element(s, tcg_res, rd, pass, MO_64);
+ gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
+ tcg_res, fpst);
+ break;
+ case 0x18: /* FMAXNM */
+ gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1a: /* FADD */
+ gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1b: /* FMULX */
+ gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1c: /* FCMEQ */
+ gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1e: /* FMAX */
+ gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1f: /* FRECPS */
+ gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x38: /* FMINNM */
+ gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3a: /* FSUB */
+ gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3e: /* FMIN */
+ gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3f: /* FRSQRTS */
+ gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5b: /* FMUL */
+ gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5c: /* FCMGE */
+ gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5d: /* FACGE */
+ gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5f: /* FDIV */
+ gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7a: /* FABD */
+ gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
+ gen_helper_vfp_absd(tcg_res, tcg_res);
+ break;
+ case 0x7c: /* FCMGT */
+ gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7d: /* FACGT */
+ gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+
+ tcg_temp_free_i64(tcg_res);
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ } else {
+ /* Single */
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
+ read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
+
+ switch (fpopcode) {
+ case 0x39: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ gen_helper_vfp_negs(tcg_op1, tcg_op1);
+ /* fall through */
+ case 0x19: /* FMLA */
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
+ tcg_res, fpst);
+ break;
+ case 0x1a: /* FADD */
+ gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1b: /* FMULX */
+ gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1c: /* FCMEQ */
+ gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1e: /* FMAX */
+ gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1f: /* FRECPS */
+ gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x18: /* FMAXNM */
+ gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x38: /* FMINNM */
+ gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3a: /* FSUB */
+ gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3e: /* FMIN */
+ gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3f: /* FRSQRTS */
+ gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5b: /* FMUL */
+ gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5c: /* FCMGE */
+ gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5d: /* FACGE */
+ gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5f: /* FDIV */
+ gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7a: /* FABD */
+ gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
+ gen_helper_vfp_abss(tcg_res, tcg_res);
+ break;
+ case 0x7c: /* FCMGT */
+ gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7d: /* FACGT */
+ gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (elements == 1) {
+ /* scalar single so clear high part */
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+ tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
+ write_vec_element(s, tcg_tmp, rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_tmp);
+ } else {
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ }
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ }
+ }
+
+ tcg_temp_free_ptr(fpst);
+
+ if ((elements << size) < 4) {
+ /* scalar, or non-quad vector op */
+ clear_vec_high(s, rd);
+ }
+}
+
+/* C3.6.11 AdvSIMD scalar three same
+ * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
+ * +-----+---+-----------+------+---+------+--------+---+------+------+
+ * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
+ * +-----+---+-----------+------+---+------+--------+---+------+------+
+ */
+static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 11, 5);
+ int rm = extract32(insn, 16, 5);
+ int size = extract32(insn, 22, 2);
+ bool u = extract32(insn, 29, 1);
+ TCGv_i64 tcg_rd;
+
+ if (opcode >= 0x18) {
+ /* Floating point: U, size[1] and opcode indicate operation */
+ int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
+ switch (fpopcode) {
+ case 0x1b: /* FMULX */
+ case 0x1f: /* FRECPS */
+ case 0x3f: /* FRSQRTS */
+ case 0x5d: /* FACGE */
+ case 0x7d: /* FACGT */
+ case 0x1c: /* FCMEQ */
+ case 0x5c: /* FCMGE */
+ case 0x7c: /* FCMGT */
+ case 0x7a: /* FABD */
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
+ return;
+ }
+
+ switch (opcode) {
+ case 0x1: /* SQADD, UQADD */
+ case 0x5: /* SQSUB, UQSUB */
+ case 0x9: /* SQSHL, UQSHL */
+ case 0xb: /* SQRSHL, UQRSHL */
+ break;
+ case 0x8: /* SSHL, USHL */
+ case 0xa: /* SRSHL, URSHL */
+ case 0x6: /* CMGT, CMHI */
+ case 0x7: /* CMGE, CMHS */
+ case 0x11: /* CMTST, CMEQ */
+ case 0x10: /* ADD, SUB (vector) */
+ if (size != 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x16: /* SQDMULH, SQRDMULH (vector) */
+ if (size != 1 && size != 2) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ tcg_rd = tcg_temp_new_i64();
+
+ if (size == 3) {
+ TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
+ TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
+
+ handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rm);
+ } else {
+ /* Do a single operation on the lowest element in the vector.
+ * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
+ * no side effects for all these operations.
+ * OPTME: special-purpose helpers would avoid doing some
+ * unnecessary work in the helper for the 8 and 16 bit cases.
+ */
+ NeonGenTwoOpEnvFn *genenvfn;
+ TCGv_i32 tcg_rn = tcg_temp_new_i32();
+ TCGv_i32 tcg_rm = tcg_temp_new_i32();
+ TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_rn, rn, 0, size);
+ read_vec_element_i32(s, tcg_rm, rm, 0, size);
+
+ switch (opcode) {
+ case 0x1: /* SQADD, UQADD */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
+ { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
+ { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0x5: /* SQSUB, UQSUB */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
+ { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
+ { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0x9: /* SQSHL, UQSHL */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
+ { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
+ { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0xb: /* SQRSHL, UQRSHL */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
+ { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
+ { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0x16: /* SQDMULH, SQRDMULH */
+ {
+ static NeonGenTwoOpEnvFn * const fns[2][2] = {
+ { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
+ { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
+ };
+ assert(size == 1 || size == 2);
+ genenvfn = fns[size - 1][u];
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
+
+ genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
+ tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
+ tcg_temp_free_i32(tcg_rd32);
+ tcg_temp_free_i32(tcg_rn);
+ tcg_temp_free_i32(tcg_rm);
+ }
+
+ write_fp_dreg(s, rd, tcg_rd);
+
+ tcg_temp_free_i64(tcg_rd);
+}
+
+static void handle_2misc_64(DisasContext *s, int opcode, bool u,
+ TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
+{
+ /* Handle 64->64 opcodes which are shared between the scalar and
+ * vector 2-reg-misc groups. We cover every integer opcode where size == 3
+ * is valid in either group and also the double-precision fp ops.
+ */
+ TCGCond cond;
+
+ switch (opcode) {
+ case 0x5: /* NOT */
+ /* This opcode is shared with CNT and RBIT but we have earlier
+ * enforced that size == 3 if and only if this is the NOT insn.
+ */
+ tcg_gen_not_i64(tcg_rd, tcg_rn);
+ break;
+ case 0xa: /* CMLT */
+ /* 64 bit integer comparison against zero, result is
+ * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
+ * subtracting 1.
+ */
+ cond = TCG_COND_LT;
+ do_cmop:
+ tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
+ tcg_gen_neg_i64(tcg_rd, tcg_rd);
+ break;
+ case 0x8: /* CMGT, CMGE */
+ cond = u ? TCG_COND_GE : TCG_COND_GT;
+ goto do_cmop;
+ case 0x9: /* CMEQ, CMLE */
+ cond = u ? TCG_COND_LE : TCG_COND_EQ;
+ goto do_cmop;
+ case 0xb: /* ABS, NEG */
+ if (u) {
+ tcg_gen_neg_i64(tcg_rd, tcg_rn);
+ } else {
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+ tcg_gen_neg_i64(tcg_rd, tcg_rn);
+ tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
+ tcg_rn, tcg_rd);
+ tcg_temp_free_i64(tcg_zero);
+ }
+ break;
+ case 0x2f: /* FABS */
+ gen_helper_vfp_absd(tcg_rd, tcg_rn);
+ break;
+ case 0x6f: /* FNEG */
+ gen_helper_vfp_negd(tcg_rd, tcg_rn);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
+ bool is_scalar, bool is_u, bool is_q,
+ int size, int rn, int rd)
+{
+ bool is_double = (size == 3);
+ TCGv_ptr fpst = get_fpstatus_ptr();
+
+ if (is_double) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+ NeonGenTwoDoubleOPFn *genfn;
+ bool swap = false;
+ int pass;
+
+ switch (opcode) {
+ case 0x2e: /* FCMLT (zero) */
+ swap = true;
+ /* fallthrough */
+ case 0x2c: /* FCMGT (zero) */
+ genfn = gen_helper_neon_cgt_f64;
+ break;
+ case 0x2d: /* FCMEQ (zero) */
+ genfn = gen_helper_neon_ceq_f64;
+ break;
+ case 0x6d: /* FCMLE (zero) */
+ swap = true;
+ /* fall through */
+ case 0x6c: /* FCMGE (zero) */
+ genfn = gen_helper_neon_cge_f64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+ if (swap) {
+ genfn(tcg_res, tcg_zero, tcg_op, fpst);
+ } else {
+ genfn(tcg_res, tcg_op, tcg_zero, fpst);
+ }
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+ }
+ if (is_scalar) {
+ clear_vec_high(s, rd);
+ }
+
+ tcg_temp_free_i64(tcg_res);
+ tcg_temp_free_i64(tcg_zero);
+ tcg_temp_free_i64(tcg_op);
+ } else {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_zero = tcg_const_i32(0);
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+ NeonGenTwoSingleOPFn *genfn;
+ bool swap = false;
+ int pass, maxpasses;
+
+ switch (opcode) {
+ case 0x2e: /* FCMLT (zero) */
+ swap = true;
+ /* fall through */
+ case 0x2c: /* FCMGT (zero) */
+ genfn = gen_helper_neon_cgt_f32;
+ break;
+ case 0x2d: /* FCMEQ (zero) */
+ genfn = gen_helper_neon_ceq_f32;
+ break;
+ case 0x6d: /* FCMLE (zero) */
+ swap = true;
+ /* fall through */
+ case 0x6c: /* FCMGE (zero) */
+ genfn = gen_helper_neon_cge_f32;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (is_scalar) {
+ maxpasses = 1;
+ } else {
+ maxpasses = is_q ? 4 : 2;
+ }
+
+ for (pass = 0; pass < maxpasses; pass++) {
+ read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+ if (swap) {
+ genfn(tcg_res, tcg_zero, tcg_op, fpst);
+ } else {
+ genfn(tcg_res, tcg_op, tcg_zero, fpst);
+ }
+ if (is_scalar) {
+ write_fp_sreg(s, rd, tcg_res);
+ } else {
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ }
+ }
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_zero);
+ tcg_temp_free_i32(tcg_op);
+ if (!is_q && !is_scalar) {
+ clear_vec_high(s, rd);
+ }
+ }
+
+ tcg_temp_free_ptr(fpst);
+}
+
+/* C3.6.12 AdvSIMD scalar two reg misc
+ * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
+ * +-----+---+-----------+------+-----------+--------+-----+------+------+
+ * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
+ * +-----+---+-----------+------+-----------+--------+-----+------+------+
+ */
+static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 12, 5);
+ int size = extract32(insn, 22, 2);
+ bool u = extract32(insn, 29, 1);
+
+ switch (opcode) {
+ case 0xa: /* CMLT */
+ if (u) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x8: /* CMGT, CMGE */
+ case 0x9: /* CMEQ, CMLE */
+ case 0xb: /* ABS, NEG */
+ if (size != 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0xc ... 0xf:
+ case 0x16 ... 0x1d:
+ case 0x1f:
+ /* Floating point: U, size[1] and opcode indicate operation;
+ * size[0] indicates single or double precision.
+ */
+ opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
+ size = extract32(size, 0, 1) ? 3 : 2;
+ switch (opcode) {
+ case 0x2c: /* FCMGT (zero) */
+ case 0x2d: /* FCMEQ (zero) */
+ case 0x2e: /* FCMLT (zero) */
+ case 0x6c: /* FCMGE (zero) */
+ case 0x6d: /* FCMLE (zero) */
+ handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
+ return;
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x1d: /* SCVTF */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ case 0x3d: /* FRECPE */
+ case 0x3f: /* FRECPX */
+ case 0x56: /* FCVTXN, FCVTXN2 */
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x5d: /* UCVTF */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ case 0x7d: /* FRSQRTE */
+ unsupported_encoding(s, insn);
+ return;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ default:
+ /* Other categories of encoding in this class:
+ * + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
+ * + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2:
+ * narrowing saturate ops: size 64/32/16 -> 32/16/8
+ */
+ unsupported_encoding(s, insn);
+ return;
+ }
+
+ if (size == 3) {
+ TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
+ TCGv_i64 tcg_rd = tcg_temp_new_i64();
+
+ handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn);
+ write_fp_dreg(s, rd, tcg_rd);
+ tcg_temp_free_i64(tcg_rd);
+ tcg_temp_free_i64(tcg_rn);
+ } else {
+ /* the 'size might not be 64' ops aren't implemented yet */
+ g_assert_not_reached();
+ }
+}
+
+/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
+static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
+ int immh, int immb, int opcode, int rn, int rd)
+{
+ int size = 32 - clz32(immh) - 1;
+ int immhb = immh << 3 | immb;
+ int shift = 2 * (8 << size) - immhb;
+ bool accumulate = false;
+ bool round = false;
+ int dsize = is_q ? 128 : 64;
+ int esize = 8 << size;
+ int elements = dsize/esize;
+ TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
+ TCGv_i64 tcg_rn = new_tmp_a64(s);
+ TCGv_i64 tcg_rd = new_tmp_a64(s);
+ TCGv_i64 tcg_round;
+ int i;
+
+ if (extract32(immh, 3, 1) && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size > 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (opcode) {
+ case 0x02: /* SSRA / USRA (accumulate) */
+ accumulate = true;
+ break;
+ case 0x04: /* SRSHR / URSHR (rounding) */
+ round = true;
+ break;
+ case 0x06: /* SRSRA / URSRA (accum + rounding) */
+ accumulate = round = true;
+ break;
+ }
+
+ if (round) {
+ uint64_t round_const = 1ULL << (shift - 1);
+ tcg_round = tcg_const_i64(round_const);
+ } else {
+ TCGV_UNUSED_I64(tcg_round);
+ }
+
+ for (i = 0; i < elements; i++) {
+ read_vec_element(s, tcg_rn, rn, i, memop);
+ if (accumulate) {
+ read_vec_element(s, tcg_rd, rd, i, memop);
+ }
+
+ handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+ accumulate, is_u, size, shift);
+
+ write_vec_element(s, tcg_rd, rd, i, size);
+ }
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+
+ if (round) {
+ tcg_temp_free_i64(tcg_round);
+ }
+}
+
+/* SHL/SLI - Vector shift left */
+static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
+ int immh, int immb, int opcode, int rn, int rd)
+{
+ int size = 32 - clz32(immh) - 1;
+ int immhb = immh << 3 | immb;
+ int shift = immhb - (8 << size);
+ int dsize = is_q ? 128 : 64;
+ int esize = 8 << size;
+ int elements = dsize/esize;
+ TCGv_i64 tcg_rn = new_tmp_a64(s);
+ TCGv_i64 tcg_rd = new_tmp_a64(s);
+ int i;
+
+ if (extract32(immh, 3, 1) && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size > 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ for (i = 0; i < elements; i++) {
+ read_vec_element(s, tcg_rn, rn, i, size);
+ if (insert) {
+ read_vec_element(s, tcg_rd, rd, i, size);
+ }
+
+ handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
+
+ write_vec_element(s, tcg_rd, rd, i, size);
+ }
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
+/* USHLL/SHLL - Vector shift left with widening */
+static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
+ int immh, int immb, int opcode, int rn, int rd)
+{
+ int size = 32 - clz32(immh) - 1;
+ int immhb = immh << 3 | immb;
+ int shift = immhb - (8 << size);
+ int dsize = 64;
+ int esize = 8 << size;
+ int elements = dsize/esize;
+ TCGv_i64 tcg_rn = new_tmp_a64(s);
+ TCGv_i64 tcg_rd = new_tmp_a64(s);
+ int i;
+
+ if (size >= 3) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* For the LL variants the store is larger than the load,
+ * so if rd == rn we would overwrite parts of our input.
+ * So load everything right now and use shifts in the main loop.
+ */
+ read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
+
+ for (i = 0; i < elements; i++) {
+ tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
+ ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
+ tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
+ write_vec_element(s, tcg_rd, rd, i, size + 1);
+ }
+}
+
+
+/* C3.6.14 AdvSIMD shift by immediate
+ * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
+ * +---+---+---+-------------+------+------+--------+---+------+------+
+ * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
+ * +---+---+---+-------------+------+------+--------+---+------+------+
+ */
+static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 11, 5);
+ int immb = extract32(insn, 16, 3);
+ int immh = extract32(insn, 19, 4);
+ bool is_u = extract32(insn, 29, 1);
+ bool is_q = extract32(insn, 30, 1);
+
+ switch (opcode) {
+ case 0x00: /* SSHR / USHR */
+ case 0x02: /* SSRA / USRA (accumulate) */
+ case 0x04: /* SRSHR / URSHR (rounding) */
+ case 0x06: /* SRSRA / URSRA (accum + rounding) */
+ handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
+ break;
+ case 0x0a: /* SHL / SLI */
+ handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
+ break;
+ case 0x14: /* SSHLL / USHLL */
+ handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
+ break;
+ default:
+ /* We don't currently implement any of the Narrow or saturating shifts;
+ * nor do we implement the fixed-point conversions in this
+ * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU).
+ */
+ unsupported_encoding(s, insn);
+ return;
+ }
+}
+
+/* Generate code to do a "long" addition or subtraction, ie one done in
+ * TCGv_i64 on vector lanes twice the width specified by size.
+ */
+static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
+ TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
+{
+ static NeonGenTwo64OpFn * const fns[3][2] = {
+ { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
+ { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
+ { tcg_gen_add_i64, tcg_gen_sub_i64 },
+ };
+ NeonGenTwo64OpFn *genfn;
+ assert(size < 3);
+
+ genfn = fns[size][is_sub];
+ genfn(tcg_res, tcg_op1, tcg_op2);
+}
+
+static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
+ int opcode, int rd, int rn, int rm)
+{
+ /* 3-reg-different widening insns: 64 x 64 -> 128 */
+ TCGv_i64 tcg_res[2];
+ int pass, accop;
+
+ tcg_res[0] = tcg_temp_new_i64();
+ tcg_res[1] = tcg_temp_new_i64();
+
+ /* Does this op do an adding accumulate, a subtracting accumulate,
+ * or no accumulate at all?
+ */
+ switch (opcode) {
+ case 5:
+ case 8:
+ case 9:
+ accop = 1;
+ break;
+ case 10:
+ case 11:
+ accop = -1;
+ break;
+ default:
+ accop = 0;
+ break;
+ }
+
+ if (accop != 0) {
+ read_vec_element(s, tcg_res[0], rd, 0, MO_64);
+ read_vec_element(s, tcg_res[1], rd, 1, MO_64);
+ }
+
+ /* size == 2 means two 32x32->64 operations; this is worth special
+ * casing because we can generally handle it inline.
+ */
+ if (size == 2) {
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_passres;
+ TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
+
+ int elt = pass + is_q * 2;
+
+ read_vec_element(s, tcg_op1, rn, elt, memop);
+ read_vec_element(s, tcg_op2, rm, elt, memop);
+
+ if (accop == 0) {
+ tcg_passres = tcg_res[pass];
+ } else {
+ tcg_passres = tcg_temp_new_i64();
+ }
+
+ switch (opcode) {
+ case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+ tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
+ break;
+ case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
+ tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
+ break;
+ case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
+ case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
+ {
+ TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
+ tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
+ tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
+ tcg_passres,
+ tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
+ tcg_temp_free_i64(tcg_tmp1);
+ tcg_temp_free_i64(tcg_tmp2);
+ break;
+ }
+ case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
+ tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
+ break;
+ case 9: /* SQDMLAL, SQDMLAL2 */
+ case 11: /* SQDMLSL, SQDMLSL2 */
+ case 13: /* SQDMULL, SQDMULL2 */
+ tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
+ gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (opcode == 9 || opcode == 11) {
+ /* saturating accumulate ops */
+ if (accop < 0) {
+ tcg_gen_neg_i64(tcg_passres, tcg_passres);
+ }
+ gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+ tcg_res[pass], tcg_passres);
+ } else if (accop > 0) {
+ tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+ } else if (accop < 0) {
+ tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+ }
+
+ if (accop != 0) {
+ tcg_temp_free_i64(tcg_passres);
+ }
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ }
+ } else {
+ /* size 0 or 1, generally helper functions */
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i64 tcg_passres;
+ int elt = pass + is_q * 2;
+
+ read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
+ read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
+
+ if (accop == 0) {
+ tcg_passres = tcg_res[pass];
+ } else {
+ tcg_passres = tcg_temp_new_i64();
+ }
+
+ switch (opcode) {
+ case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+ case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
+ {
+ TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
+ static NeonGenWidenFn * const widenfns[2][2] = {
+ { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
+ { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
+ };
+ NeonGenWidenFn *widenfn = widenfns[size][is_u];
+
+ widenfn(tcg_op2_64, tcg_op2);
+ widenfn(tcg_passres, tcg_op1);
+ gen_neon_addl(size, (opcode == 2), tcg_passres,
+ tcg_passres, tcg_op2_64);
+ tcg_temp_free_i64(tcg_op2_64);
+ break;
+ }
+ case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
+ case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
+ if (size == 0) {
+ if (is_u) {
+ gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
+ } else {
+ gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
+ }
+ } else {
+ if (is_u) {
+ gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
+ } else {
+ gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
+ }
+ }
+ break;
+ case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
+ if (size == 0) {
+ if (is_u) {
+ gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
+ } else {
+ gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
+ }
+ } else {
+ if (is_u) {
+ gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
+ } else {
+ gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
+ }
+ }
+ break;
+ case 9: /* SQDMLAL, SQDMLAL2 */
+ case 11: /* SQDMLSL, SQDMLSL2 */
+ case 13: /* SQDMULL, SQDMULL2 */
+ assert(size == 1);
+ gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
+ gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+
+ if (accop != 0) {
+ if (opcode == 9 || opcode == 11) {
+ /* saturating accumulate ops */
+ if (accop < 0) {
+ gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
+ }
+ gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+ tcg_res[pass],
+ tcg_passres);
+ } else {
+ gen_neon_addl(size, (accop < 0), tcg_res[pass],
+ tcg_res[pass], tcg_passres);
+ }
+ tcg_temp_free_i64(tcg_passres);
+ }
+ }
+ }
+
+ write_vec_element(s, tcg_res[0], rd, 0, MO_64);
+ write_vec_element(s, tcg_res[1], rd, 1, MO_64);
+ tcg_temp_free_i64(tcg_res[0]);
+ tcg_temp_free_i64(tcg_res[1]);
+}
+
+static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
+ int opcode, int rd, int rn, int rm)
+{
+ TCGv_i64 tcg_res[2];
+ int part = is_q ? 2 : 0;
+ int pass;
+
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
+ static NeonGenWidenFn * const widenfns[3][2] = {
+ { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
+ { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
+ { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
+ };
+ NeonGenWidenFn *widenfn = widenfns[size][is_u];
+
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
+ widenfn(tcg_op2_wide, tcg_op2);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_res[pass] = tcg_temp_new_i64();
+ gen_neon_addl(size, (opcode == 3),
+ tcg_res[pass], tcg_op1, tcg_op2_wide);
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2_wide);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
+ }
+}
+
+static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in)
+{
+ tcg_gen_shri_i64(in, in, 32);
+ tcg_gen_trunc_i64_i32(res, in);
+}
+
+static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
+{
+ tcg_gen_addi_i64(in, in, 1U << 31);
+ do_narrow_high_u32(res, in);
+}
+
+static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
+ int opcode, int rd, int rn, int rm)
+{
+ TCGv_i32 tcg_res[2];
+ int part = is_q ? 2 : 0;
+ int pass;
+
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_wideres = tcg_temp_new_i64();
+ static NeonGenNarrowFn * const narrowfns[3][2] = {
+ { gen_helper_neon_narrow_high_u8,
+ gen_helper_neon_narrow_round_high_u8 },
+ { gen_helper_neon_narrow_high_u16,
+ gen_helper_neon_narrow_round_high_u16 },
+ { do_narrow_high_u32, do_narrow_round_high_u32 },
+ };
+ NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
+
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+ gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+
+ tcg_res[pass] = tcg_temp_new_i32();
+ gennarrow(tcg_res[pass], tcg_wideres);
+ tcg_temp_free_i64(tcg_wideres);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
+ tcg_temp_free_i32(tcg_res[pass]);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
+/* C3.6.15 AdvSIMD three different
+ * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
+ * +---+---+---+-----------+------+---+------+--------+-----+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
+ * +---+---+---+-----------+------+---+------+--------+-----+------+------+
+ */
+static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
+{
+ /* Instructions in this group fall into three basic classes
+ * (in each case with the operation working on each element in
+ * the input vectors):
+ * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
+ * 128 bit input)
+ * (2) wide 64 x 128 -> 128
+ * (3) narrowing 128 x 128 -> 64
+ * Here we do initial decode, catch unallocated cases and
+ * dispatch to separate functions for each class.
+ */
+ int is_q = extract32(insn, 30, 1);
+ int is_u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 4);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+
+ switch (opcode) {
+ case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
+ case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
+ /* 64 x 128 -> 128 */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
+ break;
+ case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
+ case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
+ /* 128 x 128 -> 64 */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
+ break;
+ case 14: /* PMULL, PMULL2 */
+ if (is_u || size == 1 || size == 2) {
+ unallocated_encoding(s);
+ return;
+ }
+ unsupported_encoding(s, insn);
+ break;
+ case 9: /* SQDMLAL, SQDMLAL2 */
+ case 11: /* SQDMLSL, SQDMLSL2 */
+ case 13: /* SQDMULL, SQDMULL2 */
+ if (is_u || size == 0) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+ case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
+ case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
+ case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
+ case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
+ /* 64 x 64 -> 128 */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
+ break;
+ default:
+ /* opcode 15 not allocated */
+ unallocated_encoding(s);
+ break;
+ }
+}
+
+/* Logic op (opcode == 3) subgroup of C3.6.16. */
+static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int rm = extract32(insn, 16, 5);
+ int size = extract32(insn, 22, 2);
+ bool is_u = extract32(insn, 29, 1);
+ bool is_q = extract32(insn, 30, 1);
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_res[2];
+ int pass;
+
+ tcg_res[0] = tcg_temp_new_i64();
+ tcg_res[1] = tcg_temp_new_i64();
+
+ for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+ if (!is_u) {
+ switch (size) {
+ case 0: /* AND */
+ tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
+ break;
+ case 1: /* BIC */
+ tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
+ break;
+ case 2: /* ORR */
+ tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
+ break;
+ case 3: /* ORN */
+ tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
+ break;
+ }
+ } else {
+ if (size != 0) {
+ /* B* ops need res loaded to operate on */
+ read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ }
+
+ switch (size) {
+ case 0: /* EOR */
+ tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
+ break;
+ case 1: /* BSL bitwise select */
+ tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
+ tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
+ tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
+ break;
+ case 2: /* BIT, bitwise insert if true */
+ tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
+ tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
+ tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
+ break;
+ case 3: /* BIF, bitwise insert if false */
+ tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
+ tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
+ tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
+ break;
+ }
+ }
+ }
+
+ write_vec_element(s, tcg_res[0], rd, 0, MO_64);
+ if (!is_q) {
+ tcg_gen_movi_i64(tcg_res[1], 0);
+ }
+ write_vec_element(s, tcg_res[1], rd, 1, MO_64);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ tcg_temp_free_i64(tcg_res[0]);
+ tcg_temp_free_i64(tcg_res[1]);
+}
+
+/* Helper functions for 32 bit comparisons */
+static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
+{
+ tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
+}
+
+static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
+{
+ tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
+}
+
+static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
+{
+ tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
+}
+
+static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
+{
+ tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
+}
+
+/* Pairwise op subgroup of C3.6.16.
+ *
+ * This is called directly or via the handle_3same_float for float pairwise
+ * operations where the opcode and size are calculated differently.
+ */
+static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
+ int size, int rn, int rm, int rd)
+{
+ TCGv_ptr fpst;
+ int pass;
+
+ /* Floating point operations need fpst */
+ if (opcode >= 0x58) {
+ fpst = get_fpstatus_ptr();
+ } else {
+ TCGV_UNUSED_PTR(fpst);
+ }
+
+ /* These operations work on the concatenated rm:rn, with each pair of
+ * adjacent elements being operated on to produce an element in the result.
+ */
+ if (size == 3) {
+ TCGv_i64 tcg_res[2];
+
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ int passreg = (pass == 0) ? rn : rm;
+
+ read_vec_element(s, tcg_op1, passreg, 0, MO_64);
+ read_vec_element(s, tcg_op2, passreg, 1, MO_64);
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ switch (opcode) {
+ case 0x17: /* ADDP */
+ tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
+ break;
+ case 0x58: /* FMAXNMP */
+ gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5a: /* FADDP */
+ gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5e: /* FMAXP */
+ gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x78: /* FMINNMP */
+ gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7e: /* FMINP */
+ gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
+ }
+ } else {
+ int maxpass = is_q ? 4 : 2;
+ TCGv_i32 tcg_res[4];
+
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ NeonGenTwoOpFn *genfn = NULL;
+ int passreg = pass < (maxpass / 2) ? rn : rm;
+ int passelt = (is_q && (pass & 1)) ? 2 : 0;
+
+ read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
+ read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
+ tcg_res[pass] = tcg_temp_new_i32();
+
+ switch (opcode) {
+ case 0x17: /* ADDP */
+ {
+ static NeonGenTwoOpFn * const fns[3] = {
+ gen_helper_neon_padd_u8,
+ gen_helper_neon_padd_u16,
+ tcg_gen_add_i32,
+ };
+ genfn = fns[size];
+ break;
+ }
+ case 0x14: /* SMAXP, UMAXP */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
+ { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
+ { gen_max_s32, gen_max_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x15: /* SMINP, UMINP */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
+ { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
+ { gen_min_s32, gen_min_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ /* The FP operations are all on single floats (32 bit) */
+ case 0x58: /* FMAXNMP */
+ gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5a: /* FADDP */
+ gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5e: /* FMAXP */
+ gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x78: /* FMINNMP */
+ gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7e: /* FMINP */
+ gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* FP ops called directly, otherwise call now */
+ if (genfn) {
+ genfn(tcg_res[pass], tcg_op1, tcg_op2);
+ }
+
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ }
+
+ for (pass = 0; pass < maxpass; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
+ tcg_temp_free_i32(tcg_res[pass]);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+ }
+
+ if (!TCGV_IS_UNUSED_PTR(fpst)) {
+ tcg_temp_free_ptr(fpst);
+ }
+}
+
+/* Floating point op subgroup of C3.6.16. */
+static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
+{
+ /* For floating point ops, the U, size[1] and opcode bits
+ * together indicate the operation. size[0] indicates single
+ * or double.
+ */
+ int fpopcode = extract32(insn, 11, 5)
+ | (extract32(insn, 23, 1) << 5)
+ | (extract32(insn, 29, 1) << 6);
+ int is_q = extract32(insn, 30, 1);
+ int size = extract32(insn, 22, 1);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+
+ int datasize = is_q ? 128 : 64;
+ int esize = 32 << size;
+ int elements = datasize / esize;
+
+ if (size == 1 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (fpopcode) {
+ case 0x58: /* FMAXNMP */
+ case 0x5a: /* FADDP */
+ case 0x5e: /* FMAXP */
+ case 0x78: /* FMINNMP */
+ case 0x7e: /* FMINP */
+ if (size && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
+ rn, rm, rd);
+ return;
+ case 0x1b: /* FMULX */
+ case 0x1f: /* FRECPS */
+ case 0x3f: /* FRSQRTS */
+ case 0x5d: /* FACGE */
+ case 0x7d: /* FACGT */
+ case 0x19: /* FMLA */
+ case 0x39: /* FMLS */
+ case 0x18: /* FMAXNM */
+ case 0x1a: /* FADD */
+ case 0x1c: /* FCMEQ */
+ case 0x1e: /* FMAX */
+ case 0x38: /* FMINNM */
+ case 0x3a: /* FSUB */
+ case 0x3e: /* FMIN */
+ case 0x5b: /* FMUL */
+ case 0x5c: /* FCMGE */
+ case 0x5f: /* FDIV */
+ case 0x7a: /* FABD */
+ case 0x7c: /* FCMGT */
+ handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
+ return;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+}
+
+/* Integer op subgroup of C3.6.16. */
+static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
+{
+ int is_q = extract32(insn, 30, 1);
+ int u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 11, 5);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ int pass;
+
+ switch (opcode) {
+ case 0x13: /* MUL, PMUL */
+ if (u && size != 0) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x0: /* SHADD, UHADD */
+ case 0x2: /* SRHADD, URHADD */
+ case 0x4: /* SHSUB, UHSUB */
+ case 0xc: /* SMAX, UMAX */
+ case 0xd: /* SMIN, UMIN */
+ case 0xe: /* SABD, UABD */
+ case 0xf: /* SABA, UABA */
+ case 0x12: /* MLA, MLS */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x16: /* SQDMULH, SQRDMULH */
+ if (size == 0 || size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ default:
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ }
+
+ if (size == 3) {
+ for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+ handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
+
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+
+ tcg_temp_free_i64(tcg_res);
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ }
+ } else {
+ for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+ NeonGenTwoOpFn *genfn = NULL;
+ NeonGenTwoOpEnvFn *genenvfn = NULL;
+
+ read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
+ read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
+
+ switch (opcode) {
+ case 0x0: /* SHADD, UHADD */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
+ { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
+ { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x1: /* SQADD, UQADD */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
+ { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
+ { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0x2: /* SRHADD, URHADD */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
+ { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
+ { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x4: /* SHSUB, UHSUB */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
+ { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
+ { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x5: /* SQSUB, UQSUB */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
+ { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
+ { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0x6: /* CMGT, CMHI */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
+ { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
+ { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x7: /* CMGE, CMHS */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
+ { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
+ { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x8: /* SSHL, USHL */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
+ { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
+ { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x9: /* SQSHL, UQSHL */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
+ { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
+ { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0xa: /* SRSHL, URSHL */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
+ { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
+ { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0xb: /* SQRSHL, UQRSHL */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
+ { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
+ { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0xc: /* SMAX, UMAX */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
+ { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
+ { gen_max_s32, gen_max_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+
+ case 0xd: /* SMIN, UMIN */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
+ { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
+ { gen_min_s32, gen_min_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0xe: /* SABD, UABD */
+ case 0xf: /* SABA, UABA */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
+ { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
+ { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x10: /* ADD, SUB */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
+ { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
+ { tcg_gen_add_i32, tcg_gen_sub_i32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x11: /* CMTST, CMEQ */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
+ { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
+ { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x13: /* MUL, PMUL */
+ if (u) {
+ /* PMUL */
+ assert(size == 0);
+ genfn = gen_helper_neon_mul_p8;
+ break;
+ }
+ /* fall through : MUL */
+ case 0x12: /* MLA, MLS */
+ {
+ static NeonGenTwoOpFn * const fns[3] = {
+ gen_helper_neon_mul_u8,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ };
+ genfn = fns[size];
+ break;
+ }
+ case 0x16: /* SQDMULH, SQRDMULH */
+ {
+ static NeonGenTwoOpEnvFn * const fns[2][2] = {
+ { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
+ { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
+ };
+ assert(size == 1 || size == 2);
+ genenvfn = fns[size - 1][u];
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
+
+ if (genenvfn) {
+ genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
+ } else {
+ genfn(tcg_res, tcg_op1, tcg_op2);
+ }
+
+ if (opcode == 0xf || opcode == 0x12) {
+ /* SABA, UABA, MLA, MLS: accumulating ops */
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
+ { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
+ { tcg_gen_add_i32, tcg_gen_sub_i32 },
+ };
+ bool is_sub = (opcode == 0x12 && u); /* MLS */
+
+ genfn = fns[size][is_sub];
+ read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
+ genfn(tcg_res, tcg_res, tcg_op1);
+ }
+
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ }
+ }
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
+/* C3.6.16 AdvSIMD three same
+ * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
+ * +---+---+---+-----------+------+---+------+--------+---+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
+ * +---+---+---+-----------+------+---+------+--------+---+------+------+
+ */
+static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
+{
+ int opcode = extract32(insn, 11, 5);
+
+ switch (opcode) {
+ case 0x3: /* logic ops */
+ disas_simd_3same_logic(s, insn);
+ break;
+ case 0x17: /* ADDP */
+ case 0x14: /* SMAXP, UMAXP */
+ case 0x15: /* SMINP, UMINP */
+ {
+ /* Pairwise operations */
+ int is_q = extract32(insn, 30, 1);
+ int u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ if (opcode == 0x17) {
+ if (u || (size == 3 && !is_q)) {
+ unallocated_encoding(s);
+ return;
+ }
+ } else {
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ }
+ handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
+ break;
+ }
+ case 0x18 ... 0x31:
+ /* floating point ops, sz[1] and U are part of opcode */
+ disas_simd_3same_float(s, insn);
+ break;
+ default:
+ disas_simd_3same_int(s, insn);
+ break;
+ }
+}
+
+static void handle_2misc_narrow(DisasContext *s, int opcode, bool u, bool is_q,
+ int size, int rn, int rd)
+{
+ /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
+ * in the source becomes a size element in the destination).
+ */
+ int pass;
+ TCGv_i32 tcg_res[2];
+ int destelt = is_q ? 2 : 0;
+
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ NeonGenNarrowFn *genfn = NULL;
+ NeonGenNarrowEnvFn *genenvfn = NULL;
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+ tcg_res[pass] = tcg_temp_new_i32();
+
+ switch (opcode) {
+ case 0x12: /* XTN, SQXTUN */
+ {
+ static NeonGenNarrowFn * const xtnfns[3] = {
+ gen_helper_neon_narrow_u8,
+ gen_helper_neon_narrow_u16,
+ tcg_gen_trunc_i64_i32,
+ };
+ static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
+ gen_helper_neon_unarrow_sat8,
+ gen_helper_neon_unarrow_sat16,
+ gen_helper_neon_unarrow_sat32,
+ };
+ if (u) {
+ genenvfn = sqxtunfns[size];
+ } else {
+ genfn = xtnfns[size];
+ }
+ break;
+ }
+ case 0x14: /* SQXTN, UQXTN */
+ {
+ static NeonGenNarrowEnvFn * const fns[3][2] = {
+ { gen_helper_neon_narrow_sat_s8,
+ gen_helper_neon_narrow_sat_u8 },
+ { gen_helper_neon_narrow_sat_s16,
+ gen_helper_neon_narrow_sat_u16 },
+ { gen_helper_neon_narrow_sat_s32,
+ gen_helper_neon_narrow_sat_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
+
+ if (genfn) {
+ genfn(tcg_res[pass], tcg_op);
+ } else {
+ genenvfn(tcg_res[pass], cpu_env, tcg_op);
+ }
+
+ tcg_temp_free_i64(tcg_op);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
+ tcg_temp_free_i32(tcg_res[pass]);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
+static void handle_rev(DisasContext *s, int opcode, bool u,
+ bool is_q, int size, int rn, int rd)
+{
+ int op = (opcode << 1) | u;
+ int opsz = op + size;
+ int grp_size = 3 - opsz;
+ int dsize = is_q ? 128 : 64;
+ int i;
+
+ if (opsz >= 3) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size == 0) {
+ /* Special case bytes, use bswap op on each group of elements */
+ int groups = dsize / (8 << grp_size);
+
+ for (i = 0; i < groups; i++) {
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_tmp, rn, i, grp_size);
+ switch (grp_size) {
+ case MO_16:
+ tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
+ break;
+ case MO_32:
+ tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
+ break;
+ case MO_64:
+ tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ write_vec_element(s, tcg_tmp, rd, i, grp_size);
+ tcg_temp_free_i64(tcg_tmp);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+ } else {
+ int revmask = (1 << grp_size) - 1;
+ int esize = 8 << size;
+ int elements = dsize / esize;
+ TCGv_i64 tcg_rn = tcg_temp_new_i64();
+ TCGv_i64 tcg_rd = tcg_const_i64(0);
+ TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
+
+ for (i = 0; i < elements; i++) {
+ int e_rev = (i & 0xf) ^ revmask;
+ int off = e_rev * esize;
+ read_vec_element(s, tcg_rn, rn, i, size);
+ if (off >= 64) {
+ tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
+ tcg_rn, off - 64, esize);
+ } else {
+ tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
+ }
+ }
+ write_vec_element(s, tcg_rd, rd, 0, MO_64);
+ write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
+
+ tcg_temp_free_i64(tcg_rd_hi);
+ tcg_temp_free_i64(tcg_rd);
+ tcg_temp_free_i64(tcg_rn);
+ }
+}
+
+/* C3.6.17 AdvSIMD two reg misc
+ * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
+ * +---+---+---+-----------+------+-----------+--------+-----+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
+ * +---+---+---+-----------+------+-----------+--------+-----+------+------+
+ */
+static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
+{
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 5);
+ bool u = extract32(insn, 29, 1);
+ bool is_q = extract32(insn, 30, 1);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+
+ switch (opcode) {
+ case 0x0: /* REV64, REV32 */
+ case 0x1: /* REV16 */
+ handle_rev(s, opcode, u, is_q, size, rn, rd);
+ return;
+ case 0x5: /* CNT, NOT, RBIT */
+ if (u && size == 0) {
+ /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
+ size = 3;
+ break;
+ } else if (u && size == 1) {
+ /* RBIT */
+ break;
+ } else if (!u && size == 0) {
+ /* CNT */
+ break;
+ }
+ unallocated_encoding(s);
+ return;
+ case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
+ case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_2misc_narrow(s, opcode, u, is_q, size, rn, rd);
+ return;
+ case 0x2: /* SADDLP, UADDLP */
+ case 0x4: /* CLS, CLZ */
+ case 0x6: /* SADALP, UADALP */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ unsupported_encoding(s, insn);
+ return;
+ case 0x13: /* SHLL, SHLL2 */
+ if (u == 0 || size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ unsupported_encoding(s, insn);
+ return;
+ case 0xa: /* CMLT */
+ if (u == 1) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x8: /* CMGT, CMGE */
+ case 0x9: /* CMEQ, CMLE */
+ case 0xb: /* ABS, NEG */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x3: /* SUQADD, USQADD */
+ case 0x7: /* SQABS, SQNEG */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ unsupported_encoding(s, insn);
+ return;
+ case 0xc ... 0xf:
+ case 0x16 ... 0x1d:
+ case 0x1f:
+ {
+ /* Floating point: U, size[1] and opcode indicate operation;
+ * size[0] indicates single or double precision.
+ */
+ opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
+ size = extract32(size, 0, 1) ? 3 : 2;
+ switch (opcode) {
+ case 0x2f: /* FABS */
+ case 0x6f: /* FNEG */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x2c: /* FCMGT (zero) */
+ case 0x2d: /* FCMEQ (zero) */
+ case 0x2e: /* FCMLT (zero) */
+ case 0x6c: /* FCMGE (zero) */
+ case 0x6d: /* FCMLE (zero) */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
+ return;
+ case 0x16: /* FCVTN, FCVTN2 */
+ case 0x17: /* FCVTL, FCVTL2 */
+ case 0x18: /* FRINTN */
+ case 0x19: /* FRINTM */
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x1d: /* SCVTF */
+ case 0x38: /* FRINTP */
+ case 0x39: /* FRINTZ */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ case 0x3c: /* URECPE */
+ case 0x3d: /* FRECPE */
+ case 0x56: /* FCVTXN, FCVTXN2 */
+ case 0x58: /* FRINTA */
+ case 0x59: /* FRINTX */
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x5d: /* UCVTF */
+ case 0x79: /* FRINTI */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ case 0x7c: /* URSQRTE */
+ case 0x7d: /* FRSQRTE */
+ case 0x7f: /* FSQRT */
+ unsupported_encoding(s, insn);
+ return;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ }
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size == 3) {
+ /* All 64-bit element operations can be shared with scalar 2misc */
+ int pass;
+
+ for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+
+ handle_2misc_64(s, opcode, u, tcg_res, tcg_op);
+
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+
+ tcg_temp_free_i64(tcg_res);
+ tcg_temp_free_i64(tcg_op);
+ }
+ } else {
+ int pass;
+
+ for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+ TCGCond cond;
+
+ read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+
+ if (size == 2) {
+ /* Special cases for 32 bit elements */
+ switch (opcode) {
+ case 0xa: /* CMLT */
+ /* 32 bit integer comparison against zero, result is
+ * test ? (2^32 - 1) : 0. We implement via setcond(test)
+ * and inverting.
+ */
+ cond = TCG_COND_LT;
+ do_cmop:
+ tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
+ tcg_gen_neg_i32(tcg_res, tcg_res);
+ break;
+ case 0x8: /* CMGT, CMGE */
+ cond = u ? TCG_COND_GE : TCG_COND_GT;
+ goto do_cmop;
+ case 0x9: /* CMEQ, CMLE */
+ cond = u ? TCG_COND_LE : TCG_COND_EQ;
+ goto do_cmop;
+ case 0xb: /* ABS, NEG */
+ if (u) {
+ tcg_gen_neg_i32(tcg_res, tcg_op);
+ } else {
+ TCGv_i32 tcg_zero = tcg_const_i32(0);
+ tcg_gen_neg_i32(tcg_res, tcg_op);
+ tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
+ tcg_zero, tcg_op, tcg_res);
+ tcg_temp_free_i32(tcg_zero);
+ }
+ break;
+ case 0x2f: /* FABS */
+ gen_helper_vfp_abss(tcg_res, tcg_op);
+ break;
+ case 0x6f: /* FNEG */
+ gen_helper_vfp_negs(tcg_res, tcg_op);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ /* Use helpers for 8 and 16 bit elements */
+ switch (opcode) {
+ case 0x5: /* CNT, RBIT */
+ /* For these two insns size is part of the opcode specifier
+ * (handled earlier); they always operate on byte elements.
+ */
+ if (u) {
+ gen_helper_neon_rbit_u8(tcg_res, tcg_op);
+ } else {
+ gen_helper_neon_cnt_u8(tcg_res, tcg_op);
+ }
+ break;
+ case 0x8: /* CMGT, CMGE */
+ case 0x9: /* CMEQ, CMLE */
+ case 0xa: /* CMLT */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
+ { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
+ { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
+ };
+ NeonGenTwoOpFn *genfn;
+ int comp;
+ bool reverse;
+ TCGv_i32 tcg_zero = tcg_const_i32(0);
+
+ /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
+ comp = (opcode - 0x8) * 2 + u;
+ /* ...but LE, LT are implemented as reverse GE, GT */
+ reverse = (comp > 2);
+ if (reverse) {
+ comp = 4 - comp;
+ }
+ genfn = fns[comp][size];
+ if (reverse) {
+ genfn(tcg_res, tcg_zero, tcg_op);
+ } else {
+ genfn(tcg_res, tcg_op, tcg_zero);
+ }
+ tcg_temp_free_i32(tcg_zero);
+ break;
+ }
+ case 0xb: /* ABS, NEG */
+ if (u) {
+ TCGv_i32 tcg_zero = tcg_const_i32(0);
+ if (size) {
+ gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
+ } else {
+ gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
+ }
+ tcg_temp_free_i32(tcg_zero);
+ } else {
+ if (size) {
+ gen_helper_neon_abs_s16(tcg_res, tcg_op);
+ } else {
+ gen_helper_neon_abs_s8(tcg_res, tcg_op);
+ }
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op);
+ }
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
+/* C3.6.13 AdvSIMD scalar x indexed element
+ * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
+ * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
+ * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * C3.6.18 AdvSIMD vector x indexed element
+ * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
+ * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
+ * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
+ */
+static void disas_simd_indexed(DisasContext *s, uint32_t insn)
+{
+ /* This encoding has two kinds of instruction:
+ * normal, where we perform elt x idxelt => elt for each
+ * element in the vector
+ * long, where we perform elt x idxelt and generate a result of
+ * double the width of the input element
+ * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
+ */
+ bool is_scalar = extract32(insn, 28, 1);
+ bool is_q = extract32(insn, 30, 1);
+ bool u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int l = extract32(insn, 21, 1);
+ int m = extract32(insn, 20, 1);
+ /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
+ int rm = extract32(insn, 16, 4);
+ int opcode = extract32(insn, 12, 4);
+ int h = extract32(insn, 11, 1);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ bool is_long = false;
+ bool is_fp = false;
+ int index;
+ TCGv_ptr fpst;
+
+ switch (opcode) {
+ case 0x0: /* MLA */
+ case 0x4: /* MLS */
+ if (!u || is_scalar) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
+ if (is_scalar) {
+ unallocated_encoding(s);
+ return;
+ }
+ is_long = true;
+ break;
+ case 0x3: /* SQDMLAL, SQDMLAL2 */
+ case 0x7: /* SQDMLSL, SQDMLSL2 */
+ case 0xb: /* SQDMULL, SQDMULL2 */
+ is_long = true;
+ /* fall through */
+ case 0xc: /* SQDMULH */
+ case 0xd: /* SQRDMULH */
+ if (u) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x8: /* MUL */
+ if (u || is_scalar) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x1: /* FMLA */
+ case 0x5: /* FMLS */
+ if (u) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x9: /* FMUL, FMULX */
+ if (!extract32(size, 1, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+ is_fp = true;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (is_fp) {
+ /* low bit of size indicates single/double */
+ size = extract32(size, 0, 1) ? 3 : 2;
+ if (size == 2) {
+ index = h << 1 | l;
+ } else {
+ if (l || !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ index = h;
+ }
+ rm |= (m << 4);
+ } else {
+ switch (size) {
+ case 1:
+ index = h << 2 | l << 1 | m;
+ break;
+ case 2:
+ index = h << 1 | l;
+ rm |= (m << 4);
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ }
+
+ if (is_fp) {
+ fpst = get_fpstatus_ptr();
+ } else {
+ TCGV_UNUSED_PTR(fpst);
+ }
+
+ if (size == 3) {
+ TCGv_i64 tcg_idx = tcg_temp_new_i64();
+ int pass;
+
+ assert(is_fp && is_q && !is_long);
+
+ read_vec_element(s, tcg_idx, rm, index, MO_64);
+
+ for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+
+ switch (opcode) {
+ case 0x5: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ gen_helper_vfp_negd(tcg_op, tcg_op);
+ /* fall through */
+ case 0x1: /* FMLA */
+ read_vec_element(s, tcg_res, rd, pass, MO_64);
+ gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+ break;
+ case 0x9: /* FMUL, FMULX */
+ if (u) {
+ gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
+ } else {
+ gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_op);
+ tcg_temp_free_i64(tcg_res);
+ }
+
+ if (is_scalar) {
+ clear_vec_high(s, rd);
+ }
+
+ tcg_temp_free_i64(tcg_idx);
+ } else if (!is_long) {
+ /* 32 bit floating point, or 16 or 32 bit integer.
+ * For the 16 bit scalar case we use the usual Neon helpers and
+ * rely on the fact that 0 op 0 == 0 with no side effects.
+ */
+ TCGv_i32 tcg_idx = tcg_temp_new_i32();
+ int pass, maxpasses;
+
+ if (is_scalar) {
+ maxpasses = 1;
+ } else {
+ maxpasses = is_q ? 4 : 2;
+ }
+
+ read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+ if (size == 1 && !is_scalar) {
+ /* The simplest way to handle the 16x16 indexed ops is to duplicate
+ * the index into both halves of the 32 bit tcg_idx and then use
+ * the usual Neon helpers.
+ */
+ tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+ }
+
+ for (pass = 0; pass < maxpasses; pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
+
+ switch (opcode) {
+ case 0x0: /* MLA */
+ case 0x4: /* MLS */
+ case 0x8: /* MUL */
+ {
+ static NeonGenTwoOpFn * const fns[2][2] = {
+ { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
+ { tcg_gen_add_i32, tcg_gen_sub_i32 },
+ };
+ NeonGenTwoOpFn *genfn;
+ bool is_sub = opcode == 0x4;
+
+ if (size == 1) {
+ gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
+ } else {
+ tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
+ }
+ if (opcode == 0x8) {
+ break;
+ }
+ read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
+ genfn = fns[size - 1][is_sub];
+ genfn(tcg_res, tcg_op, tcg_res);
+ break;
+ }
+ case 0x5: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ gen_helper_vfp_negs(tcg_op, tcg_op);
+ /* fall through */
+ case 0x1: /* FMLA */
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+ break;
+ case 0x9: /* FMUL, FMULX */
+ if (u) {
+ gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
+ } else {
+ gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+ }
+ break;
+ case 0xc: /* SQDMULH */
+ if (size == 1) {
+ gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ } else {
+ gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ }
+ break;
+ case 0xd: /* SQRDMULH */
+ if (size == 1) {
+ gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ } else {
+ gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (is_scalar) {
+ write_fp_sreg(s, rd, tcg_res);
+ } else {
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ }
+
+ tcg_temp_free_i32(tcg_op);
+ tcg_temp_free_i32(tcg_res);
+ }
+
+ tcg_temp_free_i32(tcg_idx);
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+ } else {
+ /* long ops: 16x16->32 or 32x32->64 */
+ TCGv_i64 tcg_res[2];
+ int pass;
+ bool satop = extract32(opcode, 0, 1);
+ TCGMemOp memop = MO_32;
+
+ if (satop || !u) {
+ memop |= MO_SIGN;
+ }
+
+ if (size == 2) {
+ TCGv_i64 tcg_idx = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_idx, rm, index, memop);
+
+ for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_passres;
+ int passelt;
+
+ if (is_scalar) {
+ passelt = 0;
+ } else {
+ passelt = pass + (is_q * 2);
+ }
+
+ read_vec_element(s, tcg_op, rn, passelt, memop);
+
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ if (opcode == 0xa || opcode == 0xb) {
+ /* Non-accumulating ops */
+ tcg_passres = tcg_res[pass];
+ } else {
+ tcg_passres = tcg_temp_new_i64();
+ }
+
+ tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
+ tcg_temp_free_i64(tcg_op);
+
+ if (satop) {
+ /* saturating, doubling */
+ gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ }
+
+ if (opcode == 0xa || opcode == 0xb) {
+ continue;
+ }
+
+ /* Accumulating op: handle accumulate step */
+ read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+
+ switch (opcode) {
+ case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+ break;
+ case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+ break;
+ case 0x7: /* SQDMLSL, SQDMLSL2 */
+ tcg_gen_neg_i64(tcg_passres, tcg_passres);
+ /* fall through */
+ case 0x3: /* SQDMLAL, SQDMLAL2 */
+ gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+ tcg_res[pass],
+ tcg_passres);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_i64(tcg_passres);
+ }
+ tcg_temp_free_i64(tcg_idx);
+
+ if (is_scalar) {
+ clear_vec_high(s, rd);
+ }
+ } else {
+ TCGv_i32 tcg_idx = tcg_temp_new_i32();
+
+ assert(size == 1);
+ read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+ if (!is_scalar) {
+ /* The simplest way to handle the 16x16 indexed ops is to
+ * duplicate the index into both halves of the 32 bit tcg_idx
+ * and then use the usual Neon helpers.
+ */
+ tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+ }
+
+ for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i64 tcg_passres;
+
+ if (is_scalar) {
+ read_vec_element_i32(s, tcg_op, rn, pass, size);
+ } else {
+ read_vec_element_i32(s, tcg_op, rn,
+ pass + (is_q * 2), MO_32);
+ }
+
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ if (opcode == 0xa || opcode == 0xb) {
+ /* Non-accumulating ops */
+ tcg_passres = tcg_res[pass];
+ } else {
+ tcg_passres = tcg_temp_new_i64();
+ }
+
+ if (memop & MO_SIGN) {
+ gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
+ } else {
+ gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
+ }
+ if (satop) {
+ gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ }
+ tcg_temp_free_i32(tcg_op);
+
+ if (opcode == 0xa || opcode == 0xb) {
+ continue;
+ }
+
+ /* Accumulating op: handle accumulate step */
+ read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+
+ switch (opcode) {
+ case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
+ tcg_passres);
+ break;
+ case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
+ tcg_passres);
+ break;
+ case 0x7: /* SQDMLSL, SQDMLSL2 */
+ gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
+ /* fall through */
+ case 0x3: /* SQDMLAL, SQDMLAL2 */
+ gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+ tcg_res[pass],
+ tcg_passres);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_i64(tcg_passres);
+ }
+ tcg_temp_free_i32(tcg_idx);
+
+ if (is_scalar) {
+ tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
+ }
+ }
+
+ if (is_scalar) {
+ tcg_res[1] = tcg_const_i64(0);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
+ }
+ }
+
+ if (!TCGV_IS_UNUSED_PTR(fpst)) {
+ tcg_temp_free_ptr(fpst);
+ }
+}
+
+/* C3.6.19 Crypto AES
+ * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
+ * +-----------------+------+-----------+--------+-----+------+------+
+ * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
+ * +-----------------+------+-----------+--------+-----+------+------+
+ */
+static void disas_crypto_aes(DisasContext *s, uint32_t insn)
+{
+ unsupported_encoding(s, insn);
+}
+
+/* C3.6.20 Crypto three-reg SHA
+ * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
+ * +-----------------+------+---+------+---+--------+-----+------+------+
+ * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
+ * +-----------------+------+---+------+---+--------+-----+------+------+
+ */
+static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
+{
+ unsupported_encoding(s, insn);
+}
+
+/* C3.6.21 Crypto two-reg SHA
+ * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
+ * +-----------------+------+-----------+--------+-----+------+------+
+ * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
+ * +-----------------+------+-----------+--------+-----+------+------+
+ */
+static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
+{
+ unsupported_encoding(s, insn);
+}
+
+/* C3.6 Data processing - SIMD, inc Crypto
+ *
+ * As the decode gets a little complex we are using a table based
+ * approach for this part of the decode.
+ */
+static const AArch64DecodeTable data_proc_simd[] = {
+ /* pattern , mask , fn */
+ { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
+ { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
+ { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
+ { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
+ { 0x0e000400, 0x9fe08400, disas_simd_copy },
+ { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
+ /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
+ { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
+ { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
+ { 0x0e000000, 0xbf208c00, disas_simd_tb },
+ { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
+ { 0x2e000000, 0xbf208400, disas_simd_ext },
+ { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
+ { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
+ { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
+ { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
+ { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
+ { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
+ { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
+ { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
+ { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
+ { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
+ { 0x00000000, 0x00000000, NULL }
+};
+
static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
{
/* Note that this is called with all non-FP cases from
* table C3-6 so it must UNDEF for entries not specifically
* allocated to instructions in that table.
*/
- unsupported_encoding(s, insn);
+ AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
+ if (fn) {
+ fn(s, insn);
+ } else {
+ unallocated_encoding(s);
+ }
}
/* C3.6 Data processing - SIMD and floating point */
@@ -4318,7 +9038,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
dc->condexec_mask = 0;
dc->condexec_cond = 0;
#if !defined(CONFIG_USER_ONLY)
- dc->user = 0;
+ dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
#endif
dc->vfp_enabled = 0;
dc->vec_len = 0;
@@ -4429,6 +9149,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
/* This is a special case because we don't want to just halt the CPU
* if trying to debug across a WFI.
*/
+ gen_a64_set_pc_im(dc->pc);
gen_helper_wfi(cpu_env);
break;
}
@@ -4443,7 +9164,7 @@ done_generating:
qemu_log("----------------\n");
qemu_log("IN: %s\n", lookup_symbol(pc_start));
log_target_disas(env, pc_start, dc->pc - pc_start,
- dc->thumb | (dc->bswap_code << 1));
+ 4 | (dc->bswap_code << 1));
qemu_log("\n");
}
#endif
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 8d240e160d..253d2a13eb 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -2759,6 +2759,113 @@ static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
return 0;
}
+static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
+ int rounding)
+{
+ TCGv_ptr fpst = get_fpstatus_ptr(0);
+ TCGv_i32 tcg_rmode;
+
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+
+ if (dp) {
+ TCGv_i64 tcg_op;
+ TCGv_i64 tcg_res;
+ tcg_op = tcg_temp_new_i64();
+ tcg_res = tcg_temp_new_i64();
+ tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
+ gen_helper_rintd(tcg_res, tcg_op, fpst);
+ tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
+ tcg_temp_free_i64(tcg_op);
+ tcg_temp_free_i64(tcg_res);
+ } else {
+ TCGv_i32 tcg_op;
+ TCGv_i32 tcg_res;
+ tcg_op = tcg_temp_new_i32();
+ tcg_res = tcg_temp_new_i32();
+ tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
+ gen_helper_rints(tcg_res, tcg_op, fpst);
+ tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
+ tcg_temp_free_i32(tcg_op);
+ tcg_temp_free_i32(tcg_res);
+ }
+
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ tcg_temp_free_i32(tcg_rmode);
+
+ tcg_temp_free_ptr(fpst);
+ return 0;
+}
+
+static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
+ int rounding)
+{
+ bool is_signed = extract32(insn, 7, 1);
+ TCGv_ptr fpst = get_fpstatus_ptr(0);
+ TCGv_i32 tcg_rmode, tcg_shift;
+
+ tcg_shift = tcg_const_i32(0);
+
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+
+ if (dp) {
+ TCGv_i64 tcg_double, tcg_res;
+ TCGv_i32 tcg_tmp;
+ /* Rd is encoded as a single precision register even when the source
+ * is double precision.
+ */
+ rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
+ tcg_double = tcg_temp_new_i64();
+ tcg_res = tcg_temp_new_i64();
+ tcg_tmp = tcg_temp_new_i32();
+ tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
+ if (is_signed) {
+ gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
+ } else {
+ gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
+ }
+ tcg_gen_trunc_i64_i32(tcg_tmp, tcg_res);
+ tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
+ tcg_temp_free_i32(tcg_tmp);
+ tcg_temp_free_i64(tcg_res);
+ tcg_temp_free_i64(tcg_double);
+ } else {
+ TCGv_i32 tcg_single, tcg_res;
+ tcg_single = tcg_temp_new_i32();
+ tcg_res = tcg_temp_new_i32();
+ tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
+ if (is_signed) {
+ gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
+ } else {
+ gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
+ }
+ tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_single);
+ }
+
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ tcg_temp_free_i32(tcg_rmode);
+
+ tcg_temp_free_i32(tcg_shift);
+
+ tcg_temp_free_ptr(fpst);
+
+ return 0;
+}
+
+/* Table for converting the most common AArch32 encoding of
+ * rounding mode to arm_fprounding order (which matches the
+ * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
+ */
+static const uint8_t fp_decode_rm[] = {
+ FPROUNDING_TIEAWAY,
+ FPROUNDING_TIEEVEN,
+ FPROUNDING_POSINF,
+ FPROUNDING_NEGINF,
+};
+
static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
{
uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
@@ -2781,6 +2888,14 @@ static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
return handle_vsel(insn, rd, rn, rm, dp);
} else if ((insn & 0x0fb00e10) == 0x0e800a00) {
return handle_vminmaxnm(insn, rd, rn, rm, dp);
+ } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
+ /* VRINTA, VRINTN, VRINTP, VRINTM */
+ int rounding = fp_decode_rm[extract32(insn, 16, 2)];
+ return handle_vrint(insn, rd, rm, dp, rounding);
+ } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
+ /* VCVTA, VCVTN, VCVTP, VCVTM */
+ int rounding = fp_decode_rm[extract32(insn, 16, 2)];
+ return handle_vcvt(insn, rd, rm, dp, rounding);
}
return 1;
}
@@ -3027,16 +3142,19 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
VFP_DREG_N(rn, insn);
}
- if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) {
- /* Integer or single precision destination. */
+ if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
+ ((rn & 0x1e) == 0x6))) {
+ /* Integer or single/half precision destination. */
rd = VFP_SREG_D(insn);
} else {
VFP_DREG_D(rd, insn);
}
if (op == 15 &&
- (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) {
- /* VCVT from int is always from S reg regardless of dp bit.
- * VCVT with immediate frac_bits has same format as SREG_M
+ (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
+ ((rn & 0x1e) == 0x4))) {
+ /* VCVT from int or half precision is always from S reg
+ * regardless of dp bit. VCVT with immediate frac_bits
+ * has same format as SREG_M.
*/
rm = VFP_SREG_M(insn);
} else {
@@ -3126,12 +3244,19 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
case 5:
case 6:
case 7:
- /* VCVTB, VCVTT: only present with the halfprec extension,
- * UNPREDICTABLE if bit 8 is set (we choose to UNDEF)
+ /* VCVTB, VCVTT: only present with the halfprec extension
+ * UNPREDICTABLE if bit 8 is set prior to ARMv8
+ * (we choose to UNDEF)
*/
- if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
+ if ((dp && !arm_feature(env, ARM_FEATURE_V8)) ||
+ !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
return 1;
}
+ if (!extract32(rn, 1, 1)) {
+ /* Half precision source. */
+ gen_mov_F0_vreg(0, rm);
+ break;
+ }
/* Otherwise fall through */
default:
/* One source operand. */
@@ -3279,21 +3404,39 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
case 3: /* sqrt */
gen_vfp_sqrt(dp);
break;
- case 4: /* vcvtb.f32.f16 */
+ case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
tmp = gen_vfp_mrs();
tcg_gen_ext16u_i32(tmp, tmp);
- gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
+ if (dp) {
+ gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
+ cpu_env);
+ } else {
+ gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
+ cpu_env);
+ }
tcg_temp_free_i32(tmp);
break;
- case 5: /* vcvtt.f32.f16 */
+ case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
tmp = gen_vfp_mrs();
tcg_gen_shri_i32(tmp, tmp, 16);
- gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
+ if (dp) {
+ gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
+ cpu_env);
+ } else {
+ gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
+ cpu_env);
+ }
tcg_temp_free_i32(tmp);
break;
- case 6: /* vcvtb.f16.f32 */
+ case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
tmp = tcg_temp_new_i32();
- gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+ if (dp) {
+ gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
+ cpu_env);
+ } else {
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
+ cpu_env);
+ }
gen_mov_F0_vreg(0, rd);
tmp2 = gen_vfp_mrs();
tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
@@ -3301,9 +3444,15 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
tcg_temp_free_i32(tmp2);
gen_vfp_msr(tmp);
break;
- case 7: /* vcvtt.f16.f32 */
+ case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
tmp = tcg_temp_new_i32();
- gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+ if (dp) {
+ gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
+ cpu_env);
+ } else {
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
+ cpu_env);
+ }
tcg_gen_shli_i32(tmp, tmp, 16);
gen_mov_F0_vreg(0, rd);
tmp2 = gen_vfp_mrs();
@@ -3325,6 +3474,44 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
gen_vfp_F1_ld0(dp);
gen_vfp_cmpe(dp);
break;
+ case 12: /* vrintr */
+ {
+ TCGv_ptr fpst = get_fpstatus_ptr(0);
+ if (dp) {
+ gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
+ } else {
+ gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
+ }
+ tcg_temp_free_ptr(fpst);
+ break;
+ }
+ case 13: /* vrintz */
+ {
+ TCGv_ptr fpst = get_fpstatus_ptr(0);
+ TCGv_i32 tcg_rmode;
+ tcg_rmode = tcg_const_i32(float_round_to_zero);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ if (dp) {
+ gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
+ } else {
+ gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
+ }
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ tcg_temp_free_i32(tcg_rmode);
+ tcg_temp_free_ptr(fpst);
+ break;
+ }
+ case 14: /* vrintx */
+ {
+ TCGv_ptr fpst = get_fpstatus_ptr(0);
+ if (dp) {
+ gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
+ } else {
+ gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
+ }
+ tcg_temp_free_ptr(fpst);
+ break;
+ }
case 15: /* single<->double conversion */
if (dp)
gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
@@ -3398,16 +3585,21 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
}
/* Write back the result. */
- if (op == 15 && (rn >= 8 && rn <= 11))
- ; /* Comparison, do nothing. */
- else if (op == 15 && dp && ((rn & 0x1c) == 0x18))
- /* VCVT double to int: always integer result. */
+ if (op == 15 && (rn >= 8 && rn <= 11)) {
+ /* Comparison, do nothing. */
+ } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
+ (rn & 0x1e) == 0x6)) {
+ /* VCVT double to int: always integer result.
+ * VCVT double to half precision is always a single
+ * precision result.
+ */
gen_mov_vreg_F0(0, rd);
- else if (op == 15 && rn == 15)
+ } else if (op == 15 && rn == 15) {
/* conversion */
gen_mov_vreg_F0(!dp, rd);
- else
+ } else {
gen_mov_vreg_F0(dp, rd);
+ }
/* break out of the loop if we have finished */
if (veclen == 0)
@@ -4617,8 +4809,22 @@ static const uint8_t neon_3r_sizes[] = {
#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
#define NEON_2RM_VSHLL 38
+#define NEON_2RM_VRINTN 40
+#define NEON_2RM_VRINTX 41
+#define NEON_2RM_VRINTA 42
+#define NEON_2RM_VRINTZ 43
#define NEON_2RM_VCVT_F16_F32 44
+#define NEON_2RM_VRINTM 45
#define NEON_2RM_VCVT_F32_F16 46
+#define NEON_2RM_VRINTP 47
+#define NEON_2RM_VCVTAU 48
+#define NEON_2RM_VCVTAS 49
+#define NEON_2RM_VCVTNU 50
+#define NEON_2RM_VCVTNS 51
+#define NEON_2RM_VCVTPU 52
+#define NEON_2RM_VCVTPS 53
+#define NEON_2RM_VCVTMU 54
+#define NEON_2RM_VCVTMS 55
#define NEON_2RM_VRECPE 56
#define NEON_2RM_VRSQRTE 57
#define NEON_2RM_VRECPE_F 58
@@ -4632,6 +4838,9 @@ static int neon_2rm_is_float_op(int op)
{
/* Return true if this neon 2reg-misc op is float-to-float */
return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
+ (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
+ op == NEON_2RM_VRINTM ||
+ (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
op >= NEON_2RM_VRECPE_F);
}
@@ -4676,8 +4885,22 @@ static const uint8_t neon_2rm_sizes[] = {
[NEON_2RM_VMOVN] = 0x7,
[NEON_2RM_VQMOVN] = 0x7,
[NEON_2RM_VSHLL] = 0x7,
+ [NEON_2RM_VRINTN] = 0x4,
+ [NEON_2RM_VRINTX] = 0x4,
+ [NEON_2RM_VRINTA] = 0x4,
+ [NEON_2RM_VRINTZ] = 0x4,
[NEON_2RM_VCVT_F16_F32] = 0x2,
+ [NEON_2RM_VRINTM] = 0x4,
[NEON_2RM_VCVT_F32_F16] = 0x2,
+ [NEON_2RM_VRINTP] = 0x4,
+ [NEON_2RM_VCVTAU] = 0x4,
+ [NEON_2RM_VCVTAS] = 0x4,
+ [NEON_2RM_VCVTNU] = 0x4,
+ [NEON_2RM_VCVTNS] = 0x4,
+ [NEON_2RM_VCVTPU] = 0x4,
+ [NEON_2RM_VCVTPS] = 0x4,
+ [NEON_2RM_VCVTMU] = 0x4,
+ [NEON_2RM_VCVTMS] = 0x4,
[NEON_2RM_VRECPE] = 0x4,
[NEON_2RM_VRSQRTE] = 0x4,
[NEON_2RM_VRECPE_F] = 0x4,
@@ -6388,6 +6611,73 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
}
neon_store_reg(rm, pass, tmp2);
break;
+ case NEON_2RM_VRINTN:
+ case NEON_2RM_VRINTA:
+ case NEON_2RM_VRINTM:
+ case NEON_2RM_VRINTP:
+ case NEON_2RM_VRINTZ:
+ {
+ TCGv_i32 tcg_rmode;
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+ int rmode;
+
+ if (op == NEON_2RM_VRINTZ) {
+ rmode = FPROUNDING_ZERO;
+ } else {
+ rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
+ }
+
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+ gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+ cpu_env);
+ gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
+ gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+ cpu_env);
+ tcg_temp_free_ptr(fpstatus);
+ tcg_temp_free_i32(tcg_rmode);
+ break;
+ }
+ case NEON_2RM_VRINTX:
+ {
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+ gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
+ tcg_temp_free_ptr(fpstatus);
+ break;
+ }
+ case NEON_2RM_VCVTAU:
+ case NEON_2RM_VCVTAS:
+ case NEON_2RM_VCVTNU:
+ case NEON_2RM_VCVTNS:
+ case NEON_2RM_VCVTPU:
+ case NEON_2RM_VCVTPS:
+ case NEON_2RM_VCVTMU:
+ case NEON_2RM_VCVTMS:
+ {
+ bool is_signed = !extract32(insn, 7, 1);
+ TCGv_ptr fpst = get_fpstatus_ptr(1);
+ TCGv_i32 tcg_rmode, tcg_shift;
+ int rmode = fp_decode_rm[extract32(insn, 8, 2)];
+
+ tcg_shift = tcg_const_i32(0);
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+ gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+ cpu_env);
+
+ if (is_signed) {
+ gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
+ tcg_shift, fpst);
+ } else {
+ gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
+ tcg_shift, fpst);
+ }
+
+ gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+ cpu_env);
+ tcg_temp_free_i32(tcg_rmode);
+ tcg_temp_free_i32(tcg_shift);
+ tcg_temp_free_ptr(fpst);
+ break;
+ }
case NEON_2RM_VRECPE:
gen_helper_recpe_u32(tmp, tmp, cpu_env);
break;
@@ -6547,6 +6837,17 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
return 1;
}
+ if (ri->accessfn) {
+ /* Emit code to perform further access permissions checks at
+ * runtime; this may result in an exception.
+ */
+ TCGv_ptr tmpptr;
+ gen_set_pc_im(s, s->pc);
+ tmpptr = tcg_const_ptr(ri);
+ gen_helper_access_check_cp_reg(cpu_env, tmpptr);
+ tcg_temp_free_ptr(tmpptr);
+ }
+
/* Handle special cases first */
switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
case ARM_CP_NOP:
@@ -6575,7 +6876,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
tmp64 = tcg_const_i64(ri->resetvalue);
} else if (ri->readfn) {
TCGv_ptr tmpptr;
- gen_set_pc_im(s, s->pc);
tmp64 = tcg_temp_new_i64();
tmpptr = tcg_const_ptr(ri);
gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
@@ -6598,7 +6898,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
tmp = tcg_const_i32(ri->resetvalue);
} else if (ri->readfn) {
TCGv_ptr tmpptr;
- gen_set_pc_im(s, s->pc);
tmp = tcg_temp_new_i32();
tmpptr = tcg_const_ptr(ri);
gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
@@ -6633,7 +6932,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
tcg_temp_free_i32(tmphi);
if (ri->writefn) {
TCGv_ptr tmpptr = tcg_const_ptr(ri);
- gen_set_pc_im(s, s->pc);
gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
tcg_temp_free_ptr(tmpptr);
} else {
@@ -6644,7 +6942,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
if (ri->writefn) {
TCGv_i32 tmp;
TCGv_ptr tmpptr;
- gen_set_pc_im(s, s->pc);
tmp = load_reg(s, rt);
tmpptr = tcg_const_ptr(ri);
gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
@@ -6672,6 +6969,19 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
return 0;
}
+ /* Unknown register; this might be a guest error or a QEMU
+ * unimplemented feature.
+ */
+ if (is64) {
+ qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
+ "64 bit system register cp:%d opc1: %d crm:%d\n",
+ isread ? "read" : "write", cpnum, opc1, crm);
+ } else {
+ qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
+ "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d\n",
+ isread ? "read" : "write", cpnum, opc1, crn, crm, opc2);
+ }
+
return 1;
}
@@ -7251,6 +7561,36 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
store_reg(s, 14, tmp2);
gen_bx(s, tmp);
break;
+ case 0x4:
+ {
+ /* crc32/crc32c */
+ uint32_t c = extract32(insn, 8, 4);
+
+ /* Check this CPU supports ARMv8 CRC instructions.
+ * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
+ * Bits 8, 10 and 11 should be zero.
+ */
+ if (!arm_feature(env, ARM_FEATURE_CRC) || op1 == 0x3 ||
+ (c & 0xd) != 0) {
+ goto illegal_op;
+ }
+
+ rn = extract32(insn, 16, 4);
+ rd = extract32(insn, 12, 4);
+
+ tmp = load_reg(s, rn);
+ tmp2 = load_reg(s, rm);
+ tmp3 = tcg_const_i32(1 << op1);
+ if (c & 0x2) {
+ gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
+ } else {
+ gen_helper_crc32(tmp, tmp, tmp2, tmp3);
+ }
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp3);
+ store_reg(s, rd, tmp);
+ break;
+ }
case 0x5: /* saturating add/subtract */
ARCH(5TE);
rd = (insn >> 12) & 0xf;
@@ -8835,6 +9175,32 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
case 0x18: /* clz */
gen_helper_clz(tmp, tmp);
break;
+ case 0x20:
+ case 0x21:
+ case 0x22:
+ case 0x28:
+ case 0x29:
+ case 0x2a:
+ {
+ /* crc32/crc32c */
+ uint32_t sz = op & 0x3;
+ uint32_t c = op & 0x8;
+
+ if (!arm_feature(env, ARM_FEATURE_CRC)) {
+ goto illegal_op;
+ }
+
+ tmp2 = load_reg(s, rm);
+ tmp3 = tcg_const_i32(1 << sz);
+ if (c) {
+ gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
+ } else {
+ gen_helper_crc32(tmp, tmp, tmp2, tmp3);
+ }
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp3);
+ break;
+ }
default:
goto illegal_op;
}