aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-02-15 10:20:04 +0000
committerPeter Maydell <peter.maydell@linaro.org>2019-02-15 10:20:04 +0000
commit81dbcfa9e1d8bab3f7c4cc923c0b40cd666f374f (patch)
tree2bb4b8e5422dc39a98281abe7017003f5335a69c
parent0266c739abbed804deabb4ccde2aa449466ac3b4 (diff)
parent0f8b09b22234460cb5b8766a25066cf6b5f06842 (diff)
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20190215' into staging
target-arm queue: * gdbstub: Send a reply to the vKill packet * Improve codegen for neon min/max and saturating arithmetic * Fix a bug in clearing FPSCR exception status bits * hw/arm/armsse: Fix miswiring of expansion IRQs * hw/intc/armv7m_nvic: Allow byte accesses to SHPR1 * MAINTAINERS: Remove Peter Crosthwaite from various entries * arm: Allow system registers for KVM guests to be changed by QEMU code * linux-user: support HWCAP_CPUID which exposes ID registers to user code * Fix bug in 128-bit cmpxchg for BE Arm guests * Implement (no-op) HACR_EL2 * Fix CRn to be 14 for PMEVTYPER/PMEVCNTR # gpg: Signature made Fri 15 Feb 2019 10:19:14 GMT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20190215: (25 commits) gdbstub: Send a reply to the vKill packet. target/arm: Add missing clear_tail calls target/arm: Use vector operations for saturation target/arm: Split out FPSCR.QC to a vector field target/arm: Fix set of bits kept in xregs[ARM_VFP_FPSCR] target/arm: Split out flags setting from vfp compares target/arm: Fix arm_cpu_dump_state vs FPSCR target/arm: Fix vfp_gdb_get/set_reg vs FPSCR target/arm: Remove neon min/max helpers target/arm: Use tcg integer min/max primitives for neon target/arm: Use vector minmax expanders for aarch32 target/arm: Use vector minmax expanders for aarch64 target/arm: Rely on optimization within tcg_gen_gvec_or hw/arm/armsse: Fix miswiring of expansion IRQs hw/intc/armv7m_nvic: Allow byte accesses to SHPR1 MAINTAINERS: Remove Peter Crosthwaite from various entries arm: Allow system registers for KVM guests to be changed by QEMU code linux-user/elfload: enable HWCAP_CPUID for AArch64 target/arm: expose remaining CPUID registers as RAZ target/arm: expose MPIDR_EL1 to userspace ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS4
-rw-r--r--gdbstub.c1
-rw-r--r--hw/arm/armsse.c2
-rw-r--r--hw/intc/armv7m_nvic.c4
-rw-r--r--linux-user/elfload.c1
-rw-r--r--target/arm/cpu.h50
-rw-r--r--target/arm/helper-a64.c4
-rw-r--r--target/arm/helper.c228
-rw-r--r--target/arm/helper.h45
-rw-r--r--target/arm/kvm32.c20
-rw-r--r--target/arm/kvm64.c2
-rw-r--r--target/arm/machine.c2
-rw-r--r--target/arm/neon_helper.c14
-rw-r--r--target/arm/translate-a64.c77
-rw-r--r--target/arm/translate-sve.c6
-rw-r--r--target/arm/translate.c219
-rw-r--r--target/arm/translate.h4
-rw-r--r--target/arm/vec_helper.c134
18 files changed, 622 insertions, 195 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index e170a4c733..ffb029f63a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -110,7 +110,6 @@ Guest CPU cores (TCG):
----------------------
Overall
L: qemu-devel@nongnu.org
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
M: Richard Henderson <rth@twiddle.net>
R: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
@@ -1345,7 +1344,6 @@ F: tests/virtio-scsi-test.c
T: git https://github.com/bonzini/qemu.git scsi-next
SSI
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
M: Alistair Francis <alistair@alistair23.me>
S: Maintained
F: hw/ssi/*
@@ -1356,7 +1354,6 @@ F: tests/m25p80-test.c
Xilinx SPI
M: Alistair Francis <alistair@alistair23.me>
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
S: Maintained
F: hw/ssi/xilinx_*
@@ -1766,7 +1763,6 @@ F: qom/cpu.c
F: include/qom/cpu.h
Device Tree
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
M: Alexander Graf <agraf@suse.de>
S: Maintained
F: device_tree.c
diff --git a/gdbstub.c b/gdbstub.c
index ff19579452..bc774ae992 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1361,6 +1361,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
break;
} else if (strncmp(p, "Kill;", 5) == 0) {
/* Kill the target */
+ put_packet(s, "OK");
error_report("QEMU: Terminated via GDBstub");
exit(0);
} else {
diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c
index 5d53071a5a..9a8c49547d 100644
--- a/hw/arm/armsse.c
+++ b/hw/arm/armsse.c
@@ -565,7 +565,7 @@ static void armsse_realize(DeviceState *dev, Error **errp)
/* Connect EXP_IRQ/EXP_CPUn_IRQ GPIOs to the NVIC's lines 32 and up */
s->exp_irqs[i] = g_new(qemu_irq, s->exp_numirq);
for (j = 0; j < s->exp_numirq; j++) {
- s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, i + 32);
+ s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, j + 32);
}
if (i == 0) {
gpioname = g_strdup("EXP_IRQ");
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 790a3d9584..ab822f4251 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -1841,7 +1841,7 @@ static MemTxResult nvic_sysreg_read(void *opaque, hwaddr addr,
}
}
break;
- case 0xd18: /* System Handler Priority (SHPR1) */
+ case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */
if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) {
val = 0;
break;
@@ -1956,7 +1956,7 @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
}
nvic_irq_update(s);
return MEMTX_OK;
- case 0xd18: /* System Handler Priority (SHPR1) */
+ case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */
if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) {
return MEMTX_OK;
}
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 775a36ccdd..3a50d587ff 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -580,6 +580,7 @@ static uint32_t get_elf_hwcap(void)
hwcaps |= ARM_HWCAP_A64_FP;
hwcaps |= ARM_HWCAP_A64_ASIMD;
+ hwcaps |= ARM_HWCAP_A64_CPUID;
/* probe for the extra features */
#define GET_FEATURE_ID(feat, hwcap) \
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 47238e4245..84ae6849c2 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -577,11 +577,13 @@ typedef struct CPUARMState {
ARMPredicateReg preg_tmp;
#endif
- uint32_t xregs[16];
/* We store these fpcsr fields separately for convenience. */
+ uint32_t qc[4] QEMU_ALIGNED(16);
int vec_len;
int vec_stride;
+ uint32_t xregs[16];
+
/* Scratch space for aa32 neon expansion. */
uint32_t scratch[8];
@@ -1427,6 +1429,7 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
#define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */
#define FPCR_FZ (1 << 24) /* Flush-to-zero enable bit */
#define FPCR_DN (1 << 25) /* Default NaN enable bit */
+#define FPCR_QC (1 << 27) /* Cumulative saturation bit */
static inline uint32_t vfp_get_fpsr(CPUARMState *env)
{
@@ -2226,6 +2229,18 @@ static inline bool cptype_valid(int cptype)
#define PL0_R (0x02 | PL1_R)
#define PL0_W (0x01 | PL1_W)
+/*
+ * For user-mode some registers are accessible to EL0 via a kernel
+ * trap-and-emulate ABI. In this case we define the read permissions
+ * as actually being PL0_R. However some bits of any given register
+ * may still be masked.
+ */
+#ifdef CONFIG_USER_ONLY
+#define PL0U_R PL0_R
+#else
+#define PL0U_R PL1_R
+#endif
+
#define PL3_RW (PL3_R | PL3_W)
#define PL2_RW (PL2_R | PL2_W)
#define PL1_RW (PL1_R | PL1_W)
@@ -2452,6 +2467,30 @@ static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs)
}
const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp);
+/*
+ * Definition of an ARM co-processor register as viewed from
+ * userspace. This is used for presenting sanitised versions of
+ * registers to userspace when emulating the Linux AArch64 CPU
+ * ID/feature ABI (advertised as HWCAP_CPUID).
+ */
+typedef struct ARMCPRegUserSpaceInfo {
+ /* Name of register */
+ const char *name;
+
+ /* Is the name actually a glob pattern */
+ bool is_glob;
+
+ /* Only some bits are exported to user space */
+ uint64_t exported_bits;
+
+ /* Fixed bits are applied after the mask */
+ uint64_t fixed_bits;
+} ARMCPRegUserSpaceInfo;
+
+#define REGUSERINFO_SENTINEL { .name = NULL }
+
+void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods);
+
/* CPWriteFn that can be used to implement writes-ignored behaviour */
void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value);
@@ -2499,18 +2538,25 @@ bool write_list_to_cpustate(ARMCPU *cpu);
/**
* write_cpustate_to_list:
* @cpu: ARMCPU
+ * @kvm_sync: true if this is for syncing back to KVM
*
* For each register listed in the ARMCPU cpreg_indexes list, write
* its value from the ARMCPUState structure into the cpreg_values list.
* This is used to copy info from TCG's working data structures into
* KVM or for outbound migration.
*
+ * @kvm_sync is true if we are doing this in order to sync the
+ * register state back to KVM. In this case we will only update
+ * values in the list if the previous list->cpustate sync actually
+ * successfully wrote the CPU state. Otherwise we will keep the value
+ * that is in the list.
+ *
* Returns: true if all register values were read correctly,
* false if some register was unknown or could not be read.
* Note that we do not stop early on failure -- we will attempt
* reading all registers in the list.
*/
-bool write_cpustate_to_list(ARMCPU *cpu);
+bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
#define ARM_CPUID_TI915T 0x54029152
#define ARM_CPUID_TI925T 0x54029252
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 101fa6d3ea..70850e564d 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -583,8 +583,8 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
* High and low need to be switched here because this is not actually a
* 128bit store but two doublewords stored consecutively
*/
- Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
- Int128 newv = int128_make128(new_lo, new_hi);
+ Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
+ Int128 newv = int128_make128(new_hi, new_lo);
Int128 oldv;
uintptr_t ra = GETPC();
uint64_t o0, o1;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 520ceea7a4..55e9b77bb1 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -81,7 +81,7 @@ static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
}
switch (reg - nregs) {
case 0: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSID]); return 4;
- case 1: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSCR]); return 4;
+ case 1: stl_p(buf, vfp_get_fpscr(env)); return 4;
case 2: stl_p(buf, env->vfp.xregs[ARM_VFP_FPEXC]); return 4;
}
return 0;
@@ -107,7 +107,7 @@ static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
}
switch (reg - nregs) {
case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4;
- case 1: env->vfp.xregs[ARM_VFP_FPSCR] = ldl_p(buf); return 4;
+ case 1: vfp_set_fpscr(env, ldl_p(buf)); return 4;
case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4;
}
return 0;
@@ -264,7 +264,7 @@ static bool raw_accessors_invalid(const ARMCPRegInfo *ri)
return true;
}
-bool write_cpustate_to_list(ARMCPU *cpu)
+bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync)
{
/* Write the coprocessor state from cpu->env to the (index,value) list. */
int i;
@@ -273,6 +273,7 @@ bool write_cpustate_to_list(ARMCPU *cpu)
for (i = 0; i < cpu->cpreg_array_len; i++) {
uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
const ARMCPRegInfo *ri;
+ uint64_t newval;
ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
if (!ri) {
@@ -282,7 +283,29 @@ bool write_cpustate_to_list(ARMCPU *cpu)
if (ri->type & ARM_CP_NO_RAW) {
continue;
}
- cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri);
+
+ newval = read_raw_cp_reg(&cpu->env, ri);
+ if (kvm_sync) {
+ /*
+ * Only sync if the previous list->cpustate sync succeeded.
+ * Rather than tracking the success/failure state for every
+ * item in the list, we just recheck "does the raw write we must
+ * have made in write_list_to_cpustate() read back OK" here.
+ */
+ uint64_t oldval = cpu->cpreg_values[i];
+
+ if (oldval == newval) {
+ continue;
+ }
+
+ write_raw_cp_reg(&cpu->env, ri, oldval);
+ if (read_raw_cp_reg(&cpu->env, ri) != oldval) {
+ continue;
+ }
+
+ write_raw_cp_reg(&cpu->env, ri, newval);
+ }
+ cpu->cpreg_values[i] = newval;
}
return ok;
}
@@ -3657,13 +3680,6 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
return mpidr_read_val(env);
}
-static const ARMCPRegInfo mpidr_cp_reginfo[] = {
- { .name = "MPIDR", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
- .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW },
- REGINFO_SENTINEL
-};
-
static const ARMCPRegInfo lpae_cp_reginfo[] = {
/* NOP AMAIR0/1 */
{ .name = "AMAIR0", .state = ARM_CP_STATE_BOTH,
@@ -4434,6 +4450,9 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL2_RW,
.type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7,
+ .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
.access = PL2_RW,
@@ -4666,6 +4685,9 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
.writefn = hcr_writelow },
+ { .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7,
+ .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "ELR_EL2", .state = ARM_CP_STATE_AA64,
.type = ARM_CP_ALIAS,
.opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1,
@@ -5855,25 +5877,25 @@ void register_cp_regs_for_features(ARMCPU *cpu)
char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i);
char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i);
ARMCPRegInfo pmev_regs[] = {
- { .name = pmevcntr_name, .cp = 15, .crn = 15,
+ { .name = pmevcntr_name, .cp = 15, .crn = 14,
.crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
.access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
.readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
.accessfn = pmreg_access },
{ .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 3, .crn = 15, .crm = 8 | (3 & (i >> 3)),
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)),
.opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
.type = ARM_CP_IO,
.readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
.raw_readfn = pmevcntr_rawread,
.raw_writefn = pmevcntr_rawwrite },
- { .name = pmevtyper_name, .cp = 15, .crn = 15,
+ { .name = pmevtyper_name, .cp = 15, .crn = 14,
.crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
.access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
.readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
.accessfn = pmreg_access },
{ .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 3, .crn = 15, .crm = 12 | (3 & (i >> 3)),
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)),
.opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
.type = ARM_CP_IO,
.readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
@@ -6103,6 +6125,38 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.resetvalue = cpu->pmceid1 },
REGINFO_SENTINEL
};
+#ifdef CONFIG_USER_ONLY
+ ARMCPRegUserSpaceInfo v8_user_idregs[] = {
+ { .name = "ID_AA64PFR0_EL1",
+ .exported_bits = 0x000f000f00ff0000,
+ .fixed_bits = 0x0000000000000011 },
+ { .name = "ID_AA64PFR1_EL1",
+ .exported_bits = 0x00000000000000f0 },
+ { .name = "ID_AA64PFR*_EL1_RESERVED",
+ .is_glob = true },
+ { .name = "ID_AA64ZFR0_EL1" },
+ { .name = "ID_AA64MMFR0_EL1",
+ .fixed_bits = 0x00000000ff000000 },
+ { .name = "ID_AA64MMFR1_EL1" },
+ { .name = "ID_AA64MMFR*_EL1_RESERVED",
+ .is_glob = true },
+ { .name = "ID_AA64DFR0_EL1",
+ .fixed_bits = 0x0000000000000006 },
+ { .name = "ID_AA64DFR1_EL1" },
+ { .name = "ID_AA64DFR*_EL1_RESERVED",
+ .is_glob = true },
+ { .name = "ID_AA64AFR*",
+ .is_glob = true },
+ { .name = "ID_AA64ISAR0_EL1",
+ .exported_bits = 0x00fffffff0fffff0 },
+ { .name = "ID_AA64ISAR1_EL1",
+ .exported_bits = 0x000000f0ffffffff },
+ { .name = "ID_AA64ISAR*_EL1_RESERVED",
+ .is_glob = true },
+ REGUSERINFO_SENTINEL
+ };
+ modify_arm_cp_regs(v8_idregs, v8_user_idregs);
+#endif
/* RVBAR_EL1 is only implemented if EL1 is the highest EL */
if (!arm_feature(env, ARM_FEATURE_EL3) &&
!arm_feature(env, ARM_FEATURE_EL2)) {
@@ -6379,6 +6433,15 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W,
.type = ARM_CP_NOP | ARM_CP_OVERRIDE
};
+#ifdef CONFIG_USER_ONLY
+ ARMCPRegUserSpaceInfo id_v8_user_midr_cp_reginfo[] = {
+ { .name = "MIDR_EL1",
+ .exported_bits = 0x00000000ffffffff },
+ { .name = "REVIDR_EL1" },
+ REGUSERINFO_SENTINEL
+ };
+ modify_arm_cp_regs(id_v8_midr_cp_reginfo, id_v8_user_midr_cp_reginfo);
+#endif
if (arm_feature(env, ARM_FEATURE_OMAPCP) ||
arm_feature(env, ARM_FEATURE_STRONGARM)) {
ARMCPRegInfo *r;
@@ -6412,6 +6475,20 @@ void register_cp_regs_for_features(ARMCPU *cpu)
}
if (arm_feature(env, ARM_FEATURE_MPIDR)) {
+ ARMCPRegInfo mpidr_cp_reginfo[] = {
+ { .name = "MPIDR_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
+ .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW },
+ REGINFO_SENTINEL
+ };
+#ifdef CONFIG_USER_ONLY
+ ARMCPRegUserSpaceInfo mpidr_user_cp_reginfo[] = {
+ { .name = "MPIDR_EL1",
+ .fixed_bits = 0x0000000080000000 },
+ REGUSERINFO_SENTINEL
+ };
+ modify_arm_cp_regs(mpidr_cp_reginfo, mpidr_user_cp_reginfo);
+#endif
define_arm_cp_regs(cpu, mpidr_cp_reginfo);
}
@@ -6851,7 +6928,11 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
if (r->state != ARM_CP_STATE_AA32) {
int mask = 0;
switch (r->opc1) {
- case 0: case 1: case 2:
+ case 0:
+ /* min_EL EL1, but some accessible to EL0 via kernel ABI */
+ mask = PL0U_R | PL1_RW;
+ break;
+ case 1: case 2:
/* min_EL EL1 */
mask = PL1_RW;
break;
@@ -6956,6 +7037,44 @@ void define_arm_cp_regs_with_opaque(ARMCPU *cpu,
}
}
+/*
+ * Modify ARMCPRegInfo for access from userspace.
+ *
+ * This is a data driven modification directed by
+ * ARMCPRegUserSpaceInfo. All registers become ARM_CP_CONST as
+ * user-space cannot alter any values and dynamic values pertaining to
+ * execution state are hidden from user space view anyway.
+ */
+void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods)
+{
+ const ARMCPRegUserSpaceInfo *m;
+ ARMCPRegInfo *r;
+
+ for (m = mods; m->name; m++) {
+ GPatternSpec *pat = NULL;
+ if (m->is_glob) {
+ pat = g_pattern_spec_new(m->name);
+ }
+ for (r = regs; r->type != ARM_CP_SENTINEL; r++) {
+ if (pat && g_pattern_match_string(pat, r->name)) {
+ r->type = ARM_CP_CONST;
+ r->access = PL0U_R;
+ r->resetvalue = 0;
+ /* continue */
+ } else if (strcmp(r->name, m->name) == 0) {
+ r->type = ARM_CP_CONST;
+ r->access = PL0U_R;
+ r->resetvalue &= m->exported_bits;
+ r->resetvalue |= m->fixed_bits;
+ break;
+ }
+ }
+ if (pat) {
+ g_pattern_spec_free(pat);
+ }
+ }
+}
+
const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp)
{
return g_hash_table_lookup(cpregs, &encoded_cp);
@@ -12585,10 +12704,9 @@ static inline int vfp_exceptbits_from_host(int host_bits)
uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
{
- int i;
- uint32_t fpscr;
+ uint32_t i, fpscr;
- fpscr = (env->vfp.xregs[ARM_VFP_FPSCR] & 0xffc8ffff)
+ fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
| (env->vfp.vec_len << 16)
| (env->vfp.vec_stride << 20);
@@ -12597,8 +12715,11 @@ uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
/* FZ16 does not generate an input denormal exception. */
i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
& ~float_flag_input_denormal);
-
fpscr |= vfp_exceptbits_from_host(i);
+
+ i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
+ fpscr |= i ? FPCR_QC : 0;
+
return fpscr;
}
@@ -12630,7 +12751,7 @@ static inline int vfp_exceptbits_to_host(int target_bits)
void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
{
int i;
- uint32_t changed;
+ uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
/* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) {
@@ -12639,15 +12760,25 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
/*
* We don't implement trapped exception handling, so the
- * trap enable bits are all RAZ/WI (not RES0!)
+ * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
+ *
+ * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC
+ * (which are stored in fp_status), and the other RES0 bits
+ * in between, then we clear all of the low 16 bits.
*/
- val &= ~(FPCR_IDE | FPCR_IXE | FPCR_UFE | FPCR_OFE | FPCR_DZE | FPCR_IOE);
-
- changed = env->vfp.xregs[ARM_VFP_FPSCR];
- env->vfp.xregs[ARM_VFP_FPSCR] = (val & 0xffc8ffff);
+ env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
env->vfp.vec_len = (val >> 16) & 7;
env->vfp.vec_stride = (val >> 20) & 3;
+ /*
+ * The bit we set within fpscr_q is arbitrary; the register as a
+ * whole being zero/non-zero is what counts.
+ */
+ env->vfp.qc[0] = val & FPCR_QC;
+ env->vfp.qc[1] = 0;
+ env->vfp.qc[2] = 0;
+ env->vfp.qc[3] = 0;
+
changed ^= val;
if (changed & (3 << 22)) {
i = (val >> 22) & 3;
@@ -12752,31 +12883,40 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
return float64_sqrt(a, &env->vfp.fp_status);
}
+static void softfloat_to_vfp_compare(CPUARMState *env, int cmp)
+{
+ uint32_t flags;
+ switch (cmp) {
+ case float_relation_equal:
+ flags = 0x6;
+ break;
+ case float_relation_less:
+ flags = 0x8;
+ break;
+ case float_relation_greater:
+ flags = 0x2;
+ break;
+ case float_relation_unordered:
+ flags = 0x3;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ env->vfp.xregs[ARM_VFP_FPSCR] =
+ deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
+}
+
/* XXX: check quiet/signaling case */
#define DO_VFP_cmp(p, type) \
void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \
{ \
- uint32_t flags; \
- switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \
- case 0: flags = 0x6; break; \
- case -1: flags = 0x8; break; \
- case 1: flags = 0x2; break; \
- default: case 2: flags = 0x3; break; \
- } \
- env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \
- | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \
+ softfloat_to_vfp_compare(env, \
+ type ## _compare_quiet(a, b, &env->vfp.fp_status)); \
} \
void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
{ \
- uint32_t flags; \
- switch(type ## _compare(a, b, &env->vfp.fp_status)) { \
- case 0: flags = 0x6; break; \
- case -1: flags = 0x8; break; \
- case 1: flags = 0x2; break; \
- default: case 2: flags = 0x3; break; \
- } \
- env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \
- | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \
+ softfloat_to_vfp_compare(env, \
+ type ## _compare(a, b, &env->vfp.fp_status)); \
}
DO_VFP_cmp(s, float32)
DO_VFP_cmp(d, float64)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 53a38188c6..923e8e1525 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -276,18 +276,6 @@ DEF_HELPER_2(neon_cge_s16, i32, i32, i32)
DEF_HELPER_2(neon_cge_u32, i32, i32, i32)
DEF_HELPER_2(neon_cge_s32, i32, i32, i32)
-DEF_HELPER_2(neon_min_u8, i32, i32, i32)
-DEF_HELPER_2(neon_min_s8, i32, i32, i32)
-DEF_HELPER_2(neon_min_u16, i32, i32, i32)
-DEF_HELPER_2(neon_min_s16, i32, i32, i32)
-DEF_HELPER_2(neon_min_u32, i32, i32, i32)
-DEF_HELPER_2(neon_min_s32, i32, i32, i32)
-DEF_HELPER_2(neon_max_u8, i32, i32, i32)
-DEF_HELPER_2(neon_max_s8, i32, i32, i32)
-DEF_HELPER_2(neon_max_u16, i32, i32, i32)
-DEF_HELPER_2(neon_max_s16, i32, i32, i32)
-DEF_HELPER_2(neon_max_u32, i32, i32, i32)
-DEF_HELPER_2(neon_max_s32, i32, i32, i32)
DEF_HELPER_2(neon_pmin_u8, i32, i32, i32)
DEF_HELPER_2(neon_pmin_s8, i32, i32, i32)
DEF_HELPER_2(neon_pmin_u16, i32, i32, i32)
@@ -653,6 +641,39 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c
index bd51eb43c8..a75e04cc8f 100644
--- a/target/arm/kvm32.c
+++ b/target/arm/kvm32.c
@@ -387,24 +387,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
return ret;
}
- /* Note that we do not call write_cpustate_to_list()
- * here, so we are only writing the tuple list back to
- * KVM. This is safe because nothing can change the
- * CPUARMState cp15 fields (in particular gdb accesses cannot)
- * and so there are no changes to sync. In fact syncing would
- * be wrong at this point: for a constant register where TCG and
- * KVM disagree about its value, the preceding write_list_to_cpustate()
- * would not have had any effect on the CPUARMState value (since the
- * register is read-only), and a write_cpustate_to_list() here would
- * then try to write the TCG value back into KVM -- this would either
- * fail or incorrectly change the value the guest sees.
- *
- * If we ever want to allow the user to modify cp15 registers via
- * the gdb stub, we would need to be more clever here (for instance
- * tracking the set of registers kvm_arch_get_registers() successfully
- * managed to update the CPUARMState with, and only allowing those
- * to be written back up into the kernel).
- */
+ write_cpustate_to_list(cpu, true);
+
if (!write_list_to_kvmstate(cpu, level)) {
return EINVAL;
}
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 089af9c5f0..e3ba149248 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -838,6 +838,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
return ret;
}
+ write_cpustate_to_list(cpu, true);
+
if (!write_list_to_kvmstate(cpu, level)) {
return EINVAL;
}
diff --git a/target/arm/machine.c b/target/arm/machine.c
index b292549614..124192bfc2 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -630,7 +630,7 @@ static int cpu_pre_save(void *opaque)
abort();
}
} else {
- if (!write_cpustate_to_list(cpu)) {
+ if (!write_cpustate_to_list(cpu, false)) {
/* This should never fail. */
abort();
}
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index c2c6491a83..ed1c6fc41c 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -15,7 +15,7 @@
#define SIGNBIT (uint32_t)0x80000000
#define SIGNBIT64 ((uint64_t)1 << 63)
-#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
+#define SET_QC() env->vfp.qc[0] = 1
#define NEON_TYPE1(name, type) \
typedef struct \
@@ -581,12 +581,6 @@ NEON_VOP(cge_u32, neon_u32, 1)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
-NEON_VOP(min_s8, neon_s8, 4)
-NEON_VOP(min_u8, neon_u8, 4)
-NEON_VOP(min_s16, neon_s16, 2)
-NEON_VOP(min_u16, neon_u16, 2)
-NEON_VOP(min_s32, neon_s32, 1)
-NEON_VOP(min_u32, neon_u32, 1)
NEON_POP(pmin_s8, neon_s8, 4)
NEON_POP(pmin_u8, neon_u8, 4)
NEON_POP(pmin_s16, neon_s16, 2)
@@ -594,12 +588,6 @@ NEON_POP(pmin_u16, neon_u16, 2)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
-NEON_VOP(max_s8, neon_s8, 4)
-NEON_VOP(max_u8, neon_u8, 4)
-NEON_VOP(max_s16, neon_s16, 2)
-NEON_VOP(max_u16, neon_u16, 2)
-NEON_VOP(max_s32, neon_s32, 1)
-NEON_VOP(max_u32, neon_u32, 1)
NEON_POP(pmax_s8, neon_s8, 4)
NEON_POP(pmax_u8, neon_u8, 4)
NEON_POP(pmax_s16, neon_s16, 2)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index e002251ac6..af8e4fd4be 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -10648,11 +10648,7 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
return;
case 2: /* ORR */
- if (rn == rm) { /* MOV */
- gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0);
- } else {
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
- }
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
return;
case 3: /* ORN */
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
@@ -10952,6 +10948,36 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
}
switch (opcode) {
+ case 0x01: /* SQADD, UQADD */
+ tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
+ offsetof(CPUARMState, vfp.qc),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ (u ? uqadd_op : sqadd_op) + size);
+ return;
+ case 0x05: /* SQSUB, UQSUB */
+ tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
+ offsetof(CPUARMState, vfp.qc),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ (u ? uqsub_op : sqsub_op) + size);
+ return;
+ case 0x0c: /* SMAX, UMAX */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
+ }
+ return;
+ case 0x0d: /* SMIN, UMIN */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
+ }
+ return;
case 0x10: /* ADD, SUB */
if (u) {
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
@@ -11033,16 +11059,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genfn = fns[size][u];
break;
}
- case 0x1: /* SQADD, UQADD */
- {
- static NeonGenTwoOpEnvFn * const fns[3][2] = {
- { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
- { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
- { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
- };
- genenvfn = fns[size][u];
- break;
- }
case 0x2: /* SRHADD, URHADD */
{
static NeonGenTwoOpFn * const fns[3][2] = {
@@ -11063,16 +11079,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genfn = fns[size][u];
break;
}
- case 0x5: /* SQSUB, UQSUB */
- {
- static NeonGenTwoOpEnvFn * const fns[3][2] = {
- { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
- { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
- { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
- };
- genenvfn = fns[size][u];
- break;
- }
case 0x8: /* SSHL, USHL */
{
static NeonGenTwoOpFn * const fns[3][2] = {
@@ -11113,27 +11119,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genenvfn = fns[size][u];
break;
}
- case 0xc: /* SMAX, UMAX */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
- { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
- { tcg_gen_smax_i32, tcg_gen_umax_i32 },
- };
- genfn = fns[size][u];
- break;
- }
-
- case 0xd: /* SMIN, UMIN */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
- { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
- { tcg_gen_smin_i32, tcg_gen_umin_i32 },
- };
- genfn = fns[size][u];
- break;
- }
case 0xe: /* SABD, UABD */
case 0xf: /* SABA, UABA */
{
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index b15b615ceb..3a2eb51566 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -280,11 +280,7 @@ static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
{
- if (a->rn == a->rm) { /* MOV */
- return do_mov_z(s, a->rd, a->rn);
- } else {
- return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
- }
+ return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
}
static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 66cf28c8cb..dac737f6ca 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -4760,10 +4760,10 @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
}
/* 32-bit pairwise ops end up the same as the elementwise versions. */
-#define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32
-#define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32
-#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32
-#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32
+#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
+#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
+#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
+#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
#define GEN_NEON_INTEGER_OP_ENV(name) do { \
switch ((size << 1) | u) { \
@@ -6148,6 +6148,142 @@ const GVecGen3 cmtst_op[4] = {
.vece = MO_64 },
};
+static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_add_vec(vece, x, a, b);
+ tcg_gen_usadd_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+ tcg_temp_free_vec(x);
+}
+
+const GVecGen4 uqadd_op[4] = {
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_b,
+ .opc = INDEX_op_usadd_vec,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_h,
+ .opc = INDEX_op_usadd_vec,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_s,
+ .opc = INDEX_op_usadd_vec,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_d,
+ .opc = INDEX_op_usadd_vec,
+ .write_aofs = true,
+ .vece = MO_64 },
+};
+
+static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_add_vec(vece, x, a, b);
+ tcg_gen_ssadd_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+ tcg_temp_free_vec(x);
+}
+
+const GVecGen4 sqadd_op[4] = {
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_b,
+ .opc = INDEX_op_ssadd_vec,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_h,
+ .opc = INDEX_op_ssadd_vec,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_s,
+ .opc = INDEX_op_ssadd_vec,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_d,
+ .opc = INDEX_op_ssadd_vec,
+ .write_aofs = true,
+ .vece = MO_64 },
+};
+
+static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_sub_vec(vece, x, a, b);
+ tcg_gen_ussub_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+ tcg_temp_free_vec(x);
+}
+
+const GVecGen4 uqsub_op[4] = {
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_b,
+ .opc = INDEX_op_ussub_vec,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_h,
+ .opc = INDEX_op_ussub_vec,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_s,
+ .opc = INDEX_op_ussub_vec,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_d,
+ .opc = INDEX_op_ussub_vec,
+ .write_aofs = true,
+ .vece = MO_64 },
+};
+
+static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_sub_vec(vece, x, a, b);
+ tcg_gen_sssub_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+ tcg_temp_free_vec(x);
+}
+
+const GVecGen4 sqsub_op[4] = {
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_b,
+ .opc = INDEX_op_sssub_vec,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_h,
+ .opc = INDEX_op_sssub_vec,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_s,
+ .opc = INDEX_op_sssub_vec,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_d,
+ .opc = INDEX_op_sssub_vec,
+ .write_aofs = true,
+ .vece = MO_64 },
+};
+
/* Translate a NEON data processing instruction. Return nonzero if the
instruction is invalid.
We process data in a mixture of 32-bit and 64-bit chunks.
@@ -6294,15 +6430,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
break;
- case 2:
- if (rn == rm) {
- /* VMOV */
- tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size);
- } else {
- /* VORR */
- tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- }
+ case 2: /* VORR */
+ tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
break;
case 3: /* VORN */
tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
@@ -6337,6 +6467,18 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
return 0;
+ case NEON_3R_VQADD:
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, vec_size, vec_size,
+ (u ? uqadd_op : sqadd_op) + size);
+ break;
+
+ case NEON_3R_VQSUB:
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, vec_size, vec_size,
+ (u ? uqsub_op : sqsub_op) + size);
+ break;
+
case NEON_3R_VMUL: /* VMUL */
if (u) {
/* Polynomial case allows only P8 and is handled below. */
@@ -6374,6 +6516,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
return 0;
+
+ case NEON_3R_VMAX:
+ if (u) {
+ tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ } else {
+ tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ }
+ return 0;
+ case NEON_3R_VMIN:
+ if (u) {
+ tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ } else {
+ tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ }
+ return 0;
}
if (size == 3) {
@@ -6382,24 +6543,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
neon_load_reg64(cpu_V0, rn + pass);
neon_load_reg64(cpu_V1, rm + pass);
switch (op) {
- case NEON_3R_VQADD:
- if (u) {
- gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- } else {
- gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- }
- break;
- case NEON_3R_VQSUB:
- if (u) {
- gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- } else {
- gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- }
- break;
case NEON_3R_VSHL:
if (u) {
gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
@@ -6515,18 +6658,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case NEON_3R_VHADD:
GEN_NEON_INTEGER_OP(hadd);
break;
- case NEON_3R_VQADD:
- GEN_NEON_INTEGER_OP_ENV(qadd);
- break;
case NEON_3R_VRHADD:
GEN_NEON_INTEGER_OP(rhadd);
break;
case NEON_3R_VHSUB:
GEN_NEON_INTEGER_OP(hsub);
break;
- case NEON_3R_VQSUB:
- GEN_NEON_INTEGER_OP_ENV(qsub);
- break;
case NEON_3R_VSHL:
GEN_NEON_INTEGER_OP(shl);
break;
@@ -6539,12 +6676,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case NEON_3R_VQRSHL:
GEN_NEON_INTEGER_OP_ENV(qrshl);
break;
- case NEON_3R_VMAX:
- GEN_NEON_INTEGER_OP(max);
- break;
- case NEON_3R_VMIN:
- GEN_NEON_INTEGER_OP(min);
- break;
case NEON_3R_VABD:
GEN_NEON_INTEGER_OP(abd);
break;
@@ -13634,7 +13765,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
i * 2 + 1, (uint32_t)(v >> 32),
i, v);
}
- cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
+ cpu_fprintf(f, "FPSCR: %08x\n", vfp_get_fpscr(env));
}
}
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 17748ddfb9..f25fe75685 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -214,6 +214,10 @@ extern const GVecGen2i ssra_op[4];
extern const GVecGen2i usra_op[4];
extern const GVecGen2i sri_op[4];
extern const GVecGen2i sli_op[4];
+extern const GVecGen4 uqadd_op[4];
+extern const GVecGen4 sqadd_op[4];
+extern const GVecGen4 uqsub_op[4];
+extern const GVecGen4 sqsub_op[4];
void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
/*
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 37f338732e..dfc635cf9a 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -36,7 +36,7 @@
#define H4(x) (x)
#endif
-#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
+#define SET_QC() env->vfp.qc[0] = 1
static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
{
@@ -638,6 +638,7 @@ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
d[i] = FUNC(n[i], stat); \
} \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
}
DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16)
@@ -688,6 +689,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
d[i] = FUNC(n[i], m[i], stat); \
} \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
}
DO_3OP(gvec_fadd_h, float16_add, float16)
@@ -766,3 +768,133 @@ DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
DO_FMLA_IDX(gvec_fmla_idx_d, float64, )
#undef DO_FMLA_IDX
+
+#define DO_SAT(NAME, WTYPE, TYPEN, TYPEM, OP, MIN, MAX) \
+void HELPER(NAME)(void *vd, void *vq, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ TYPEN *d = vd, *n = vn; TYPEM *m = vm; \
+ bool q = false; \
+ for (i = 0; i < oprsz / sizeof(TYPEN); i++) { \
+ WTYPE dd = (WTYPE)n[i] OP m[i]; \
+ if (dd < MIN) { \
+ dd = MIN; \
+ q = true; \
+ } else if (dd > MAX) { \
+ dd = MAX; \
+ q = true; \
+ } \
+ d[i] = dd; \
+ } \
+ if (q) { \
+ uint32_t *qc = vq; \
+ qc[0] = 1; \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SAT(gvec_uqadd_b, int, uint8_t, uint8_t, +, 0, UINT8_MAX)
+DO_SAT(gvec_uqadd_h, int, uint16_t, uint16_t, +, 0, UINT16_MAX)
+DO_SAT(gvec_uqadd_s, int64_t, uint32_t, uint32_t, +, 0, UINT32_MAX)
+
+DO_SAT(gvec_sqadd_b, int, int8_t, int8_t, +, INT8_MIN, INT8_MAX)
+DO_SAT(gvec_sqadd_h, int, int16_t, int16_t, +, INT16_MIN, INT16_MAX)
+DO_SAT(gvec_sqadd_s, int64_t, int32_t, int32_t, +, INT32_MIN, INT32_MAX)
+
+DO_SAT(gvec_uqsub_b, int, uint8_t, uint8_t, -, 0, UINT8_MAX)
+DO_SAT(gvec_uqsub_h, int, uint16_t, uint16_t, -, 0, UINT16_MAX)
+DO_SAT(gvec_uqsub_s, int64_t, uint32_t, uint32_t, -, 0, UINT32_MAX)
+
+DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX)
+DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX)
+DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX)
+
+#undef DO_SAT
+
+void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn,
+ void *vm, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ bool q = false;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ uint64_t nn = n[i], mm = m[i], dd = nn + mm;
+ if (dd < nn) {
+ dd = UINT64_MAX;
+ q = true;
+ }
+ d[i] = dd;
+ }
+ if (q) {
+ uint32_t *qc = vq;
+ qc[0] = 1;
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_uqsub_d)(void *vd, void *vq, void *vn,
+ void *vm, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ bool q = false;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ uint64_t nn = n[i], mm = m[i], dd = nn - mm;
+ if (nn < mm) {
+ dd = 0;
+ q = true;
+ }
+ d[i] = dd;
+ }
+ if (q) {
+ uint32_t *qc = vq;
+ qc[0] = 1;
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_sqadd_d)(void *vd, void *vq, void *vn,
+ void *vm, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ int64_t *d = vd, *n = vn, *m = vm;
+ bool q = false;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ int64_t nn = n[i], mm = m[i], dd = nn + mm;
+ if (((dd ^ nn) & ~(nn ^ mm)) & INT64_MIN) {
+ dd = (nn >> 63) ^ ~INT64_MIN;
+ q = true;
+ }
+ d[i] = dd;
+ }
+ if (q) {
+ uint32_t *qc = vq;
+ qc[0] = 1;
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
+ void *vm, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ int64_t *d = vd, *n = vn, *m = vm;
+ bool q = false;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ int64_t nn = n[i], mm = m[i], dd = nn - mm;
+ if (((dd ^ nn) & (nn ^ mm)) & INT64_MIN) {
+ dd = (nn >> 63) ^ ~INT64_MIN;
+ q = true;
+ }
+ d[i] = dd;
+ }
+ if (q) {
+ uint32_t *qc = vq;
+ qc[0] = 1;
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}