aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2014-03-18 14:31:42 +0000
committerPeter Maydell <peter.maydell@linaro.org>2014-03-18 14:31:42 +0000
commit2dda43bacc79f8e283702614745cd700c637de64 (patch)
tree091425c65ad18714e967d6376cada6a6a6ae9db6
parent315b59344126beab85a62b53582794b14436a5a4 (diff)
parent1ed27a17cd9d9ebec8963bc358d74060b1dd6127 (diff)
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20140317' into staging
target-arm queue: * more A64 Neon instructions * fixes to reset CBAR values for A9 and A15 boards * fix accesses to PMCR register in -icount mode # gpg: Signature made Mon 17 Mar 2014 22:04:52 GMT using RSA key ID 14360CDE # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" * remotes/pmaydell/tags/pull-target-arm-20140317: (30 commits) scripts/qemu-binfmt-conf.sh: Add AArch64 registration target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate) target-arm: A64: Implement FCVTXN target-arm: A64: Implement scalar saturating narrow ops target-arm: A64: Move handle_2misc_narrow function target-arm: A64: Implement AdvSIMD reciprocal estimate insns URECPE, FRECPE softfloat: export squash_input_denormal functions target-arm: A64: Implement FCVTZS, FCVTZU in the shift-imm categories target-arm: A64: Handle saturating left shifts SQSHL, SQSHLU, UQSHL exec-all.h: Increase MAX_OP_PER_INSTR for ARM A64 decoder target-arm: A64: Implement FRINT* target-arm: A64: Implement SRI target-arm: A64: Add FRECPX (reciprocal exponent) target-arm: A64: List unsupported shift-imm opcodes target-arm: A64: Implement FCVTL target-arm: A64: Implement FCVTN target-arm: A64: Implement FCVT[NMAPZ][SU] SIMD instructions target-arm: A64: Implement SHLL, SHLL2 target-arm: A64: Implement SADDLP, UADDLP, SADALP, UADALP target-arm: A64: Saturating and narrowing shift ops ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--fpu/softfloat.c4
-rw-r--r--hw/arm/exynos4210.c16
-rw-r--r--hw/arm/realview.c39
-rw-r--r--hw/arm/vexpress.c123
-rw-r--r--hw/arm/virt.c6
-rw-r--r--include/exec/exec-all.h2
-rw-r--r--include/fpu/softfloat.h7
-rw-r--r--scripts/qemu-binfmt-conf.sh3
-rw-r--r--target-arm/helper-a64.c178
-rw-r--r--target-arm/helper-a64.h10
-rw-r--r--target-arm/helper.c332
-rw-r--r--target-arm/helper.h10
-rw-r--r--target-arm/translate-a64.c1357
-rw-r--r--target-arm/translate.c25
-rw-r--r--target-arm/translate.h6
15 files changed, 1855 insertions, 263 deletions
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index fc0b179df4..5f02c16d8d 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -288,7 +288,7 @@ INLINE flag extractFloat32Sign( float32 a )
| If `a' is denormal and we are in flush-to-zero mode then set the
| input-denormal exception and return zero. Otherwise just return the value.
*----------------------------------------------------------------------------*/
-static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
+float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
{
if (STATUS(flush_inputs_to_zero)) {
if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
@@ -473,7 +473,7 @@ INLINE flag extractFloat64Sign( float64 a )
| If `a' is denormal and we are in flush-to-zero mode then set the
| input-denormal exception and return zero. Otherwise just return the value.
*----------------------------------------------------------------------------*/
-static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
+float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
{
if (STATUS(flush_inputs_to_zero)) {
if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
index 9f137e9acd..6426d168d2 100644
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -143,11 +143,21 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
unsigned long mem_size;
DeviceState *dev;
SysBusDevice *busdev;
+ ObjectClass *cpu_oc;
+
+ cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, "cortex-a9");
+ assert(cpu_oc);
for (n = 0; n < EXYNOS4210_NCPUS; n++) {
- s->cpu[n] = cpu_arm_init("cortex-a9");
- if (!s->cpu[n]) {
- fprintf(stderr, "Unable to find CPU %d definition\n", n);
+ Object *cpuobj = object_new(object_class_get_name(cpu_oc));
+ Error *err = NULL;
+
+ s->cpu[n] = ARM_CPU(cpuobj);
+ object_property_set_int(cpuobj, EXYNOS4210_SMP_PRIVATE_BASE_ADDR,
+ "reset-cbar", &error_abort);
+ object_property_set_bool(cpuobj, true, "realized", &err);
+ if (err) {
+ error_report("%s", error_get_pretty(err));
exit(1);
}
}
diff --git a/hw/arm/realview.c b/hw/arm/realview.c
index 6ef7646002..7e04e507f9 100644
--- a/hw/arm/realview.c
+++ b/hw/arm/realview.c
@@ -18,6 +18,7 @@
#include "hw/i2c/i2c.h"
#include "sysemu/blockdev.h"
#include "exec/address-spaces.h"
+#include "qemu/error-report.h"
#define SMP_BOOT_ADDR 0xe0000000
#define SMP_BOOTREG_ADDR 0x10000030
@@ -49,6 +50,7 @@ static void realview_init(QEMUMachineInitArgs *args,
{
ARMCPU *cpu = NULL;
CPUARMState *env;
+ ObjectClass *cpu_oc;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram_lo = g_new(MemoryRegion, 1);
MemoryRegion *ram_hi = g_new(MemoryRegion, 1);
@@ -70,12 +72,14 @@ static void realview_init(QEMUMachineInitArgs *args,
uint32_t sys_id;
ram_addr_t low_ram_size;
ram_addr_t ram_size = args->ram_size;
+ hwaddr periphbase = 0;
switch (board_type) {
case BOARD_EB:
break;
case BOARD_EB_MPCORE:
is_mpcore = 1;
+ periphbase = 0x10100000;
break;
case BOARD_PB_A8:
is_pb = 1;
@@ -83,16 +87,37 @@ static void realview_init(QEMUMachineInitArgs *args,
case BOARD_PBX_A9:
is_mpcore = 1;
is_pb = 1;
+ periphbase = 0x1f000000;
break;
}
+
+ cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, args->cpu_model);
+ if (!cpu_oc) {
+ fprintf(stderr, "Unable to find CPU definition\n");
+ exit(1);
+ }
+
for (n = 0; n < smp_cpus; n++) {
- cpu = cpu_arm_init(args->cpu_model);
- if (!cpu) {
- fprintf(stderr, "Unable to find CPU definition\n");
+ Object *cpuobj = object_new(object_class_get_name(cpu_oc));
+ Error *err = NULL;
+
+ if (is_pb && is_mpcore) {
+ object_property_set_int(cpuobj, periphbase, "reset-cbar", &err);
+ if (err) {
+ error_report("%s", error_get_pretty(err));
+ exit(1);
+ }
+ }
+
+ object_property_set_bool(cpuobj, true, "realized", &err);
+ if (err) {
+ error_report("%s", error_get_pretty(err));
exit(1);
}
- cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ);
+
+ cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpuobj), ARM_CPU_IRQ);
}
+ cpu = ARM_CPU(first_cpu);
env = &cpu->env;
if (arm_feature(env, ARM_FEATURE_V7)) {
if (is_mpcore) {
@@ -141,16 +166,10 @@ static void realview_init(QEMUMachineInitArgs *args,
sysbus_mmio_map(SYS_BUS_DEVICE(sysctl), 0, 0x10000000);
if (is_mpcore) {
- hwaddr periphbase;
dev = qdev_create(NULL, is_pb ? "a9mpcore_priv": "realview_mpcore");
qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
qdev_init_nofail(dev);
busdev = SYS_BUS_DEVICE(dev);
- if (is_pb) {
- periphbase = 0x1f000000;
- } else {
- periphbase = 0x10100000;
- }
sysbus_mmio_map(busdev, 0, periphbase);
for (n = 0; n < smp_cpus; n++) {
sysbus_connect_irq(busdev, n, cpu_irq[n]);
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index ef1707aef0..67628af588 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -32,6 +32,7 @@
#include "sysemu/blockdev.h"
#include "hw/block/flash.h"
#include "sysemu/device_tree.h"
+#include "qemu/error-report.h"
#include <libfdt.h>
#define VEXPRESS_BOARD_ID 0x8e0
@@ -173,6 +174,64 @@ struct VEDBoardInfo {
DBoardInitFn *init;
};
+static void init_cpus(const char *cpu_model, const char *privdev,
+ hwaddr periphbase, qemu_irq *pic)
+{
+ ObjectClass *cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, cpu_model);
+ DeviceState *dev;
+ SysBusDevice *busdev;
+ int n;
+
+ if (!cpu_oc) {
+ fprintf(stderr, "Unable to find CPU definition\n");
+ exit(1);
+ }
+
+ /* Create the actual CPUs */
+ for (n = 0; n < smp_cpus; n++) {
+ Object *cpuobj = object_new(object_class_get_name(cpu_oc));
+ Error *err = NULL;
+
+ object_property_set_int(cpuobj, periphbase, "reset-cbar", &err);
+ if (err) {
+ error_report("%s", error_get_pretty(err));
+ exit(1);
+ }
+ object_property_set_bool(cpuobj, true, "realized", &err);
+ if (err) {
+ error_report("%s", error_get_pretty(err));
+ exit(1);
+ }
+ }
+
+ /* Create the private peripheral devices (including the GIC);
+ * this must happen after the CPUs are created because a15mpcore_priv
+ * wires itself up to the CPU's generic_timer gpio out lines.
+ */
+ dev = qdev_create(NULL, privdev);
+ qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
+ qdev_init_nofail(dev);
+ busdev = SYS_BUS_DEVICE(dev);
+ sysbus_mmio_map(busdev, 0, periphbase);
+
+ /* Interrupts [42:0] are from the motherboard;
+ * [47:43] are reserved; [63:48] are daughterboard
+ * peripherals. Note that some documentation numbers
+ * external interrupts starting from 32 (because there
+ * are internal interrupts 0..31).
+ */
+ for (n = 0; n < 64; n++) {
+ pic[n] = qdev_get_gpio_in(dev, n);
+ }
+
+ /* Connect the CPUs to the GIC */
+ for (n = 0; n < smp_cpus; n++) {
+ DeviceState *cpudev = DEVICE(qemu_get_cpu(n));
+
+ sysbus_connect_irq(busdev, n, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
+ }
+}
+
static void a9_daughterboard_init(const VEDBoardInfo *daughterboard,
ram_addr_t ram_size,
const char *cpu_model,
@@ -181,25 +240,12 @@ static void a9_daughterboard_init(const VEDBoardInfo *daughterboard,
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram = g_new(MemoryRegion, 1);
MemoryRegion *lowram = g_new(MemoryRegion, 1);
- DeviceState *dev;
- SysBusDevice *busdev;
- int n;
- qemu_irq cpu_irq[4];
ram_addr_t low_ram_size;
if (!cpu_model) {
cpu_model = "cortex-a9";
}
- for (n = 0; n < smp_cpus; n++) {
- ARMCPU *cpu = cpu_arm_init(cpu_model);
- if (!cpu) {
- fprintf(stderr, "Unable to find CPU definition\n");
- exit(1);
- }
- cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ);
- }
-
if (ram_size > 0x40000000) {
/* 1GB is the maximum the address space permits */
fprintf(stderr, "vexpress-a9: cannot model more than 1GB RAM\n");
@@ -221,23 +267,7 @@ static void a9_daughterboard_init(const VEDBoardInfo *daughterboard,
memory_region_add_subregion(sysmem, 0x60000000, ram);
/* 0x1e000000 A9MPCore (SCU) private memory region */
- dev = qdev_create(NULL, "a9mpcore_priv");
- qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
- qdev_init_nofail(dev);
- busdev = SYS_BUS_DEVICE(dev);
- sysbus_mmio_map(busdev, 0, 0x1e000000);
- for (n = 0; n < smp_cpus; n++) {
- sysbus_connect_irq(busdev, n, cpu_irq[n]);
- }
- /* Interrupts [42:0] are from the motherboard;
- * [47:43] are reserved; [63:48] are daughterboard
- * peripherals. Note that some documentation numbers
- * external interrupts starting from 32 (because the
- * A9MP has internal interrupts 0..31).
- */
- for (n = 0; n < 64; n++) {
- pic[n] = qdev_get_gpio_in(dev, n);
- }
+ init_cpus(cpu_model, "a9mpcore_priv", 0x1e000000, pic);
/* Daughterboard peripherals : 0x10020000 .. 0x20000000 */
@@ -296,29 +326,14 @@ static void a15_daughterboard_init(const VEDBoardInfo *daughterboard,
const char *cpu_model,
qemu_irq *pic)
{
- int n;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram = g_new(MemoryRegion, 1);
MemoryRegion *sram = g_new(MemoryRegion, 1);
- qemu_irq cpu_irq[4];
- DeviceState *dev;
- SysBusDevice *busdev;
if (!cpu_model) {
cpu_model = "cortex-a15";
}
- for (n = 0; n < smp_cpus; n++) {
- ARMCPU *cpu;
-
- cpu = cpu_arm_init(cpu_model);
- if (!cpu) {
- fprintf(stderr, "Unable to find CPU definition\n");
- exit(1);
- }
- cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ);
- }
-
{
/* We have to use a separate 64 bit variable here to avoid the gcc
* "comparison is always false due to limited range of data type"
@@ -337,23 +352,7 @@ static void a15_daughterboard_init(const VEDBoardInfo *daughterboard,
memory_region_add_subregion(sysmem, 0x80000000, ram);
/* 0x2c000000 A15MPCore private memory region (GIC) */
- dev = qdev_create(NULL, "a15mpcore_priv");
- qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
- qdev_init_nofail(dev);
- busdev = SYS_BUS_DEVICE(dev);
- sysbus_mmio_map(busdev, 0, 0x2c000000);
- for (n = 0; n < smp_cpus; n++) {
- sysbus_connect_irq(busdev, n, cpu_irq[n]);
- }
- /* Interrupts [42:0] are from the motherboard;
- * [47:43] are reserved; [63:48] are daughterboard
- * peripherals. Note that some documentation numbers
- * external interrupts starting from 32 (because there
- * are internal interrupts 0..31).
- */
- for (n = 0; n < 64; n++) {
- pic[n] = qdev_get_gpio_in(dev, n);
- }
+ init_cpus(cpu_model, "a15mpcore_priv", 0x2c000000, pic);
/* A15 daughterboard peripherals: */
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 517f2fe30f..2bbc9313d2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -390,6 +390,12 @@ static void machvirt_init(QEMUMachineInitArgs *args)
if (n > 0) {
object_property_set_bool(cpuobj, true, "start-powered-off", NULL);
}
+
+ if (object_property_find(cpuobj, "reset-cbar", NULL)) {
+ object_property_set_int(cpuobj, vbi->memmap[VIRT_CPUPERIPHS].base,
+ "reset-cbar", &error_abort);
+ }
+
object_property_set_bool(cpuobj, true, "realized", NULL);
}
fdt_add_cpu_nodes(vbi);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 502b7aa084..f9ac332f9d 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -44,7 +44,7 @@ struct TranslationBlock;
typedef struct TranslationBlock TranslationBlock;
/* XXX: make safe guess about sizes */
-#define MAX_OP_PER_INSTR 208
+#define MAX_OP_PER_INSTR 266
#if HOST_LONG_BITS == 32
#define MAX_OPC_PARAM_PER_ARG 2
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 4b4df88527..db878c1313 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -245,6 +245,13 @@ INLINE flag get_default_nan_mode(float_status *status)
void float_raise( int8 flags STATUS_PARAM);
/*----------------------------------------------------------------------------
+| If `a' is denormal and we are in flush-to-zero mode then set the
+| input-denormal exception and return zero. Otherwise just return the value.
+*----------------------------------------------------------------------------*/
+float32 float32_squash_input_denormal(float32 a STATUS_PARAM);
+float64 float64_squash_input_denormal(float64 a STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
| Options to indicate which negations to perform in float*_muladd()
| Using these differs from negating an input or output before calling
| the muladd function in that this means that a NaN doesn't have its
diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
index 0da2618883..289b1a3963 100644
--- a/scripts/qemu-binfmt-conf.sh
+++ b/scripts/qemu-binfmt-conf.sh
@@ -41,6 +41,9 @@ if [ $cpu != "arm" ] ; then
echo ':arm:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-arm:' > /proc/sys/fs/binfmt_misc/register
echo ':armeb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-armeb:' > /proc/sys/fs/binfmt_misc/register
fi
+if [ $cpu != "aarch64" ] ; then
+ echo ':aarch64:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-aarch64:' > /proc/sys/fs/binfmt_misc/register
+fi
if [ $cpu != "sparc" ] ; then
echo ':sparc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sparc:' > /proc/sys/fs/binfmt_misc/register
fi
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index c2ce33ee88..ec0258295f 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -60,6 +60,11 @@ uint32_t HELPER(cls32)(uint32_t x)
return clrsb32(x);
}
+uint32_t HELPER(clz32)(uint32_t x)
+{
+ return clz32(x);
+}
+
uint64_t HELPER(rbit64)(uint64_t x)
{
/* assign the correct byte position */
@@ -180,6 +185,36 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
return result;
}
+/* Helper function for 64 bit polynomial multiply case:
+ * perform PolynomialMult(op1, op2) and return either the top or
+ * bottom half of the 128 bit result.
+ */
+uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
+{
+ int bitnum;
+ uint64_t res = 0;
+
+ for (bitnum = 0; bitnum < 64; bitnum++) {
+ if (op1 & (1ULL << bitnum)) {
+ res ^= op2 << bitnum;
+ }
+ }
+ return res;
+}
+uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
+{
+ int bitnum;
+ uint64_t res = 0;
+
+ /* bit 0 of op1 can't influence the high 64 bits at all */
+ for (bitnum = 1; bitnum < 64; bitnum++) {
+ if (op1 & (1ULL << bitnum)) {
+ res ^= op2 >> (64 - bitnum);
+ }
+ }
+ return res;
+}
+
/* 64bit/double versions of the neon float compare functions */
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
{
@@ -258,3 +293,146 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
}
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
}
+
+/* Pairwise long add: add pairs of adjacent elements into
+ * double-width elements in the result (eg _s8 is an 8x8->16 op)
+ */
+uint64_t HELPER(neon_addlp_s8)(uint64_t a)
+{
+ uint64_t nsignmask = 0x0080008000800080ULL;
+ uint64_t wsignmask = 0x8000800080008000ULL;
+ uint64_t elementmask = 0x00ff00ff00ff00ffULL;
+ uint64_t tmp1, tmp2;
+ uint64_t res, signres;
+
+ /* Extract odd elements, sign extend each to a 16 bit field */
+ tmp1 = a & elementmask;
+ tmp1 ^= nsignmask;
+ tmp1 |= wsignmask;
+ tmp1 = (tmp1 - nsignmask) ^ wsignmask;
+ /* Ditto for the even elements */
+ tmp2 = (a >> 8) & elementmask;
+ tmp2 ^= nsignmask;
+ tmp2 |= wsignmask;
+ tmp2 = (tmp2 - nsignmask) ^ wsignmask;
+
+ /* calculate the result by summing bits 0..14, 16..22, etc,
+ * and then adjusting the sign bits 15, 23, etc manually.
+ * This ensures the addition can't overflow the 16 bit field.
+ */
+ signres = (tmp1 ^ tmp2) & wsignmask;
+ res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
+ res ^= signres;
+
+ return res;
+}
+
+uint64_t HELPER(neon_addlp_u8)(uint64_t a)
+{
+ uint64_t tmp;
+
+ tmp = a & 0x00ff00ff00ff00ffULL;
+ tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
+ return tmp;
+}
+
+uint64_t HELPER(neon_addlp_s16)(uint64_t a)
+{
+ int32_t reslo, reshi;
+
+ reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
+ reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
+
+ return (uint32_t)reslo | (((uint64_t)reshi) << 32);
+}
+
+uint64_t HELPER(neon_addlp_u16)(uint64_t a)
+{
+ uint64_t tmp;
+
+ tmp = a & 0x0000ffff0000ffffULL;
+ tmp += (a >> 16) & 0x0000ffff0000ffffULL;
+ return tmp;
+}
+
+/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
+float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ uint32_t val32, sbit;
+ int32_t exp;
+
+ if (float32_is_any_nan(a)) {
+ float32 nan = a;
+ if (float32_is_signaling_nan(a)) {
+ float_raise(float_flag_invalid, fpst);
+ nan = float32_maybe_silence_nan(a);
+ }
+ if (fpst->default_nan_mode) {
+ nan = float32_default_nan;
+ }
+ return nan;
+ }
+
+ val32 = float32_val(a);
+ sbit = 0x80000000ULL & val32;
+ exp = extract32(val32, 23, 8);
+
+ if (exp == 0) {
+ return make_float32(sbit | (0xfe << 23));
+ } else {
+ return make_float32(sbit | (~exp & 0xff) << 23);
+ }
+}
+
+float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ uint64_t val64, sbit;
+ int64_t exp;
+
+ if (float64_is_any_nan(a)) {
+ float64 nan = a;
+ if (float64_is_signaling_nan(a)) {
+ float_raise(float_flag_invalid, fpst);
+ nan = float64_maybe_silence_nan(a);
+ }
+ if (fpst->default_nan_mode) {
+ nan = float64_default_nan;
+ }
+ return nan;
+ }
+
+ val64 = float64_val(a);
+ sbit = 0x8000000000000000ULL & val64;
+ exp = extract64(float64_val(a), 52, 11);
+
+ if (exp == 0) {
+ return make_float64(sbit | (0x7feULL << 52));
+ } else {
+ return make_float64(sbit | (~exp & 0x7ffULL) << 52);
+ }
+}
+
+float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
+{
+ /* Von Neumann rounding is implemented by using round-to-zero
+ * and then setting the LSB of the result if Inexact was raised.
+ */
+ float32 r;
+ float_status *fpst = &env->vfp.fp_status;
+ float_status tstat = *fpst;
+ int exflags;
+
+ set_float_rounding_mode(float_round_to_zero, &tstat);
+ set_float_exception_flags(0, &tstat);
+ r = float64_to_float32(a, &tstat);
+ r = float32_maybe_silence_nan(r);
+ exflags = get_float_exception_flags(&tstat);
+ if (exflags & float_flag_inexact) {
+ r = make_float32(float32_val(r) | 1);
+ }
+ exflags |= get_float_exception_flags(fpst);
+ set_float_exception_flags(exflags, fpst);
+ return r;
+}
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index ab9933cab0..3f05bedcca 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -21,12 +21,15 @@ DEF_HELPER_FLAGS_2(sdiv64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
DEF_HELPER_FLAGS_1(clz64, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(cls64, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(cls32, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(clz32, TCG_CALL_NO_RWG_SE, i32, i32)
DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
+DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
@@ -36,3 +39,10 @@ DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
+DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index f0a1fd48e6..55077ed1b6 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1983,6 +1983,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
ARMCPRegInfo pmcr = {
.name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
.access = PL0_RW, .resetvalue = cpu->midr & 0xff000000,
+ .type = ARM_CP_IO,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
.accessfn = pmreg_access, .writefn = pmcr_write,
.raw_writefn = raw_write,
@@ -4519,16 +4520,21 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
* int->float conversions at run-time. */
#define float64_256 make_float64(0x4070000000000000LL)
#define float64_512 make_float64(0x4080000000000000LL)
+#define float32_maxnorm make_float32(0x7f7fffff)
+#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
-/* The algorithm that must be used to calculate the estimate
- * is specified by the ARM ARM.
+/* Reciprocal functions
+ *
+ * The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM, see FPRecipEstimate()
*/
-static float64 recip_estimate(float64 a, CPUARMState *env)
+
+static float64 recip_estimate(float64 a, float_status *real_fp_status)
{
/* These calculations mustn't set any fp exception flags,
* so we use a local copy of the fp_status.
*/
- float_status dummy_status = env->vfp.standard_fp_status;
+ float_status dummy_status = *real_fp_status;
float_status *s = &dummy_status;
/* q = (int)(a * 512.0) */
float64 q = float64_mul(float64_512, a, s);
@@ -4549,56 +4555,178 @@ static float64 recip_estimate(float64 a, CPUARMState *env)
return float64_div(int64_to_float64(q_int, s), float64_256, s);
}
-float32 HELPER(recpe_f32)(float32 a, CPUARMState *env)
+/* Common wrapper to call recip_estimate */
+static float64 call_recip_estimate(float64 num, int off, float_status *fpst)
{
- float_status *s = &env->vfp.standard_fp_status;
- float64 f64;
- uint32_t val32 = float32_val(a);
+ uint64_t val64 = float64_val(num);
+ uint64_t frac = extract64(val64, 0, 52);
+ int64_t exp = extract64(val64, 52, 11);
+ uint64_t sbit;
+ float64 scaled, estimate;
- int result_exp;
- int a_exp = (val32 & 0x7f800000) >> 23;
- int sign = val32 & 0x80000000;
+ /* Generate the scaled number for the estimate function */
+ if (exp == 0) {
+ if (extract64(frac, 51, 1) == 0) {
+ exp = -1;
+ frac = extract64(frac, 0, 50) << 2;
+ } else {
+ frac = extract64(frac, 0, 51) << 1;
+ }
+ }
- if (float32_is_any_nan(a)) {
- if (float32_is_signaling_nan(a)) {
- float_raise(float_flag_invalid, s);
+ /* scaled = '0' : '01111111110' : fraction<51:44> : Zeros(44); */
+ scaled = make_float64((0x3feULL << 52)
+ | extract64(frac, 44, 8) << 44);
+
+ estimate = recip_estimate(scaled, fpst);
+
+ /* Build new result */
+ val64 = float64_val(estimate);
+ sbit = 0x8000000000000000ULL & val64;
+ exp = off - exp;
+ frac = extract64(val64, 0, 52);
+
+ if (exp == 0) {
+ frac = 1ULL << 51 | extract64(frac, 1, 51);
+ } else if (exp == -1) {
+ frac = 1ULL << 50 | extract64(frac, 2, 50);
+ exp = 0;
+ }
+
+ return make_float64(sbit | (exp << 52) | frac);
+}
+
+static bool round_to_inf(float_status *fpst, bool sign_bit)
+{
+ switch (fpst->float_rounding_mode) {
+ case float_round_nearest_even: /* Round to Nearest */
+ return true;
+ case float_round_up: /* Round to +Inf */
+ return !sign_bit;
+ case float_round_down: /* Round to -Inf */
+ return sign_bit;
+ case float_round_to_zero: /* Round to Zero */
+ return false;
+ }
+
+ g_assert_not_reached();
+}
+
+float32 HELPER(recpe_f32)(float32 input, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float32 f32 = float32_squash_input_denormal(input, fpst);
+ uint32_t f32_val = float32_val(f32);
+ uint32_t f32_sbit = 0x80000000ULL & f32_val;
+ int32_t f32_exp = extract32(f32_val, 23, 8);
+ uint32_t f32_frac = extract32(f32_val, 0, 23);
+ float64 f64, r64;
+ uint64_t r64_val;
+ int64_t r64_exp;
+ uint64_t r64_frac;
+
+ if (float32_is_any_nan(f32)) {
+ float32 nan = f32;
+ if (float32_is_signaling_nan(f32)) {
+ float_raise(float_flag_invalid, fpst);
+ nan = float32_maybe_silence_nan(f32);
}
- return float32_default_nan;
- } else if (float32_is_infinity(a)) {
- return float32_set_sign(float32_zero, float32_is_neg(a));
- } else if (float32_is_zero_or_denormal(a)) {
- if (!float32_is_zero(a)) {
- float_raise(float_flag_input_denormal, s);
+ if (fpst->default_nan_mode) {
+ nan = float32_default_nan;
}
- float_raise(float_flag_divbyzero, s);
- return float32_set_sign(float32_infinity, float32_is_neg(a));
- } else if (a_exp >= 253) {
- float_raise(float_flag_underflow, s);
- return float32_set_sign(float32_zero, float32_is_neg(a));
+ return nan;
+ } else if (float32_is_infinity(f32)) {
+ return float32_set_sign(float32_zero, float32_is_neg(f32));
+ } else if (float32_is_zero(f32)) {
+ float_raise(float_flag_divbyzero, fpst);
+ return float32_set_sign(float32_infinity, float32_is_neg(f32));
+ } else if ((f32_val & ~(1ULL << 31)) < (1ULL << 21)) {
+ /* Abs(value) < 2.0^-128 */
+ float_raise(float_flag_overflow | float_flag_inexact, fpst);
+ if (round_to_inf(fpst, f32_sbit)) {
+ return float32_set_sign(float32_infinity, float32_is_neg(f32));
+ } else {
+ return float32_set_sign(float32_maxnorm, float32_is_neg(f32));
+ }
+ } else if (f32_exp >= 253 && fpst->flush_to_zero) {
+ float_raise(float_flag_underflow, fpst);
+ return float32_set_sign(float32_zero, float32_is_neg(f32));
}
- f64 = make_float64((0x3feULL << 52)
- | ((int64_t)(val32 & 0x7fffff) << 29));
- result_exp = 253 - a_exp;
+ f64 = make_float64(((int64_t)(f32_exp) << 52) | (int64_t)(f32_frac) << 29);
+ r64 = call_recip_estimate(f64, 253, fpst);
+ r64_val = float64_val(r64);
+ r64_exp = extract64(r64_val, 52, 11);
+ r64_frac = extract64(r64_val, 0, 52);
- f64 = recip_estimate(f64, env);
+ /* result = sign : result_exp<7:0> : fraction<51:29>; */
+ return make_float32(f32_sbit |
+ (r64_exp & 0xff) << 23 |
+ extract64(r64_frac, 29, 24));
+}
- val32 = sign
- | ((result_exp & 0xff) << 23)
- | ((float64_val(f64) >> 29) & 0x7fffff);
- return make_float32(val32);
+float64 HELPER(recpe_f64)(float64 input, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float64 f64 = float64_squash_input_denormal(input, fpst);
+ uint64_t f64_val = float64_val(f64);
+ uint64_t f64_sbit = 0x8000000000000000ULL & f64_val;
+ int64_t f64_exp = extract64(f64_val, 52, 11);
+ float64 r64;
+ uint64_t r64_val;
+ int64_t r64_exp;
+ uint64_t r64_frac;
+
+ /* Deal with any special cases */
+ if (float64_is_any_nan(f64)) {
+ float64 nan = f64;
+ if (float64_is_signaling_nan(f64)) {
+ float_raise(float_flag_invalid, fpst);
+ nan = float64_maybe_silence_nan(f64);
+ }
+ if (fpst->default_nan_mode) {
+ nan = float64_default_nan;
+ }
+ return nan;
+ } else if (float64_is_infinity(f64)) {
+ return float64_set_sign(float64_zero, float64_is_neg(f64));
+ } else if (float64_is_zero(f64)) {
+ float_raise(float_flag_divbyzero, fpst);
+ return float64_set_sign(float64_infinity, float64_is_neg(f64));
+ } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
+ /* Abs(value) < 2.0^-1024 */
+ float_raise(float_flag_overflow | float_flag_inexact, fpst);
+ if (round_to_inf(fpst, f64_sbit)) {
+ return float64_set_sign(float64_infinity, float64_is_neg(f64));
+ } else {
+ return float64_set_sign(float64_maxnorm, float64_is_neg(f64));
+ }
+ } else if (f64_exp >= 1023 && fpst->flush_to_zero) {
+ float_raise(float_flag_underflow, fpst);
+ return float64_set_sign(float64_zero, float64_is_neg(f64));
+ }
+
+ r64 = call_recip_estimate(f64, 2045, fpst);
+ r64_val = float64_val(r64);
+ r64_exp = extract64(r64_val, 52, 11);
+ r64_frac = extract64(r64_val, 0, 52);
+
+ /* result = sign : result_exp<10:0> : fraction<51:0> */
+ return make_float64(f64_sbit |
+ ((r64_exp & 0x7ff) << 52) |
+ r64_frac);
}
/* The algorithm that must be used to calculate the estimate
* is specified by the ARM ARM.
*/
-static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
+static float64 recip_sqrt_estimate(float64 a, float_status *real_fp_status)
{
/* These calculations mustn't set any fp exception flags,
* so we use a local copy of the fp_status.
*/
- float_status dummy_status = env->vfp.standard_fp_status;
+ float_status dummy_status = *real_fp_status;
float_status *s = &dummy_status;
float64 q;
int64_t q_int;
@@ -4645,49 +4773,64 @@ static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
return float64_div(int64_to_float64(q_int, s), float64_256, s);
}
-float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
+float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
{
- float_status *s = &env->vfp.standard_fp_status;
+ float_status *s = fpstp;
+ float32 f32 = float32_squash_input_denormal(input, s);
+ uint32_t val = float32_val(f32);
+ uint32_t f32_sbit = 0x80000000 & val;
+ int32_t f32_exp = extract32(val, 23, 8);
+ uint32_t f32_frac = extract32(val, 0, 23);
+ uint64_t f64_frac;
+ uint64_t val64;
int result_exp;
float64 f64;
- uint32_t val;
- uint64_t val64;
-
- val = float32_val(a);
- if (float32_is_any_nan(a)) {
- if (float32_is_signaling_nan(a)) {
+ if (float32_is_any_nan(f32)) {
+ float32 nan = f32;
+ if (float32_is_signaling_nan(f32)) {
float_raise(float_flag_invalid, s);
+ nan = float32_maybe_silence_nan(f32);
}
- return float32_default_nan;
- } else if (float32_is_zero_or_denormal(a)) {
- if (!float32_is_zero(a)) {
- float_raise(float_flag_input_denormal, s);
+ if (s->default_nan_mode) {
+ nan = float32_default_nan;
}
+ return nan;
+ } else if (float32_is_zero(f32)) {
float_raise(float_flag_divbyzero, s);
- return float32_set_sign(float32_infinity, float32_is_neg(a));
- } else if (float32_is_neg(a)) {
+ return float32_set_sign(float32_infinity, float32_is_neg(f32));
+ } else if (float32_is_neg(f32)) {
float_raise(float_flag_invalid, s);
return float32_default_nan;
- } else if (float32_is_infinity(a)) {
+ } else if (float32_is_infinity(f32)) {
return float32_zero;
}
- /* Normalize to a double-precision value between 0.25 and 1.0,
+ /* Scale and normalize to a double-precision value between 0.25 and 1.0,
* preserving the parity of the exponent. */
- if ((val & 0x800000) == 0) {
- f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+
+ f64_frac = ((uint64_t) f32_frac) << 29;
+ if (f32_exp == 0) {
+ while (extract64(f64_frac, 51, 1) == 0) {
+ f64_frac = f64_frac << 1;
+ f32_exp = f32_exp-1;
+ }
+ f64_frac = extract64(f64_frac, 0, 51) << 1;
+ }
+
+ if (extract64(f32_exp, 0, 1) == 0) {
+ f64 = make_float64(((uint64_t) f32_sbit) << 32
| (0x3feULL << 52)
- | ((uint64_t)(val & 0x7fffff) << 29));
+ | f64_frac);
} else {
- f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+ f64 = make_float64(((uint64_t) f32_sbit) << 32
| (0x3fdULL << 52)
- | ((uint64_t)(val & 0x7fffff) << 29));
+ | f64_frac);
}
- result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2;
+ result_exp = (380 - f32_exp) / 2;
- f64 = recip_sqrt_estimate(f64, env);
+ f64 = recip_sqrt_estimate(f64, s);
val64 = float64_val(f64);
@@ -4696,8 +4839,72 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
return make_float32(val);
}
-uint32_t HELPER(recpe_u32)(uint32_t a, CPUARMState *env)
+float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
+{
+ float_status *s = fpstp;
+ float64 f64 = float64_squash_input_denormal(input, s);
+ uint64_t val = float64_val(f64);
+ uint64_t f64_sbit = 0x8000000000000000ULL & val;
+ int64_t f64_exp = extract64(val, 52, 11);
+ uint64_t f64_frac = extract64(val, 0, 52);
+ int64_t result_exp;
+ uint64_t result_frac;
+
+ if (float64_is_any_nan(f64)) {
+ float64 nan = f64;
+ if (float64_is_signaling_nan(f64)) {
+ float_raise(float_flag_invalid, s);
+ nan = float64_maybe_silence_nan(f64);
+ }
+ if (s->default_nan_mode) {
+ nan = float64_default_nan;
+ }
+ return nan;
+ } else if (float64_is_zero(f64)) {
+ float_raise(float_flag_divbyzero, s);
+ return float64_set_sign(float64_infinity, float64_is_neg(f64));
+ } else if (float64_is_neg(f64)) {
+ float_raise(float_flag_invalid, s);
+ return float64_default_nan;
+ } else if (float64_is_infinity(f64)) {
+ return float64_zero;
+ }
+
+ /* Scale and normalize to a double-precision value between 0.25 and 1.0,
+ * preserving the parity of the exponent. */
+
+ if (f64_exp == 0) {
+ while (extract64(f64_frac, 51, 1) == 0) {
+ f64_frac = f64_frac << 1;
+ f64_exp = f64_exp - 1;
+ }
+ f64_frac = extract64(f64_frac, 0, 51) << 1;
+ }
+
+ if (extract64(f64_exp, 0, 1) == 0) {
+ f64 = make_float64(f64_sbit
+ | (0x3feULL << 52)
+ | f64_frac);
+ } else {
+ f64 = make_float64(f64_sbit
+ | (0x3fdULL << 52)
+ | f64_frac);
+ }
+
+ result_exp = (3068 - f64_exp) / 2;
+
+ f64 = recip_sqrt_estimate(f64, s);
+
+ result_frac = extract64(float64_val(f64), 0, 52);
+
+ return make_float64(f64_sbit |
+ ((result_exp & 0x7ff) << 52) |
+ result_frac);
+}
+
+uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
{
+ float_status *s = fpstp;
float64 f64;
if ((a & 0x80000000) == 0) {
@@ -4707,13 +4914,14 @@ uint32_t HELPER(recpe_u32)(uint32_t a, CPUARMState *env)
f64 = make_float64((0x3feULL << 52)
| ((int64_t)(a & 0x7fffffff) << 21));
- f64 = recip_estimate (f64, env);
+ f64 = recip_estimate(f64, s);
return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
}
-uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
+uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
{
+ float_status *fpst = fpstp;
float64 f64;
if ((a & 0xc0000000) == 0) {
@@ -4728,7 +4936,7 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
| ((uint64_t)(a & 0x3fffffff) << 22));
}
- f64 = recip_sqrt_estimate(f64, env);
+ f64 = recip_sqrt_estimate(f64, fpst);
return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 8923f8ae71..a3d6f32b06 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -167,10 +167,12 @@ DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
DEF_HELPER_3(recps_f32, f32, f32, f32, env)
DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
-DEF_HELPER_2(recpe_f32, f32, f32, env)
-DEF_HELPER_2(rsqrte_f32, f32, f32, env)
-DEF_HELPER_2(recpe_u32, i32, i32, env)
-DEF_HELPER_2(rsqrte_u32, i32, i32, env)
+DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_2(recpe_u32, i32, i32, ptr)
+DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
DEF_HELPER_3(shl_cc, i32, env, i32, i32)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 2fd9113628..befffac2e3 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -76,11 +76,13 @@ typedef struct AArch64DecodeTable {
typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
+typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
+typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
/* initialize TCG globals. */
void a64_translate_init(void)
@@ -3096,12 +3098,11 @@ static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
/* non-flag setting ops may use SP */
if (!setflags) {
- tcg_rn = read_cpu_reg_sp(s, rn, sf);
tcg_rd = cpu_reg_sp(s, rd);
} else {
- tcg_rn = read_cpu_reg(s, rn, sf);
tcg_rd = cpu_reg(s, rd);
}
+ tcg_rn = read_cpu_reg_sp(s, rn, sf);
tcg_rm = read_cpu_reg(s, rm, sf);
ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
@@ -5828,6 +5829,21 @@ static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
}
}
+/* SRI: shift right with insert */
+static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
+ int size, int shift)
+{
+ int esize = 8 << size;
+
+ /* shift count same as element size is valid but does nothing;
+ * special case to avoid potential shift by 64.
+ */
+ if (shift != esize) {
+ tcg_gen_shri_i64(tcg_src, tcg_src, shift);
+ tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
+ }
+}
+
/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
static void handle_scalar_simd_shri(DisasContext *s,
bool is_u, int immh, int immb,
@@ -5838,6 +5854,7 @@ static void handle_scalar_simd_shri(DisasContext *s,
int shift = 2 * (8 << size) - immhb;
bool accumulate = false;
bool round = false;
+ bool insert = false;
TCGv_i64 tcg_rn;
TCGv_i64 tcg_rd;
TCGv_i64 tcg_round;
@@ -5857,6 +5874,9 @@ static void handle_scalar_simd_shri(DisasContext *s,
case 0x06: /* SRSRA / URSRA (accum + rounding) */
accumulate = round = true;
break;
+ case 0x08: /* SRI */
+ insert = true;
+ break;
}
if (round) {
@@ -5867,10 +5887,14 @@ static void handle_scalar_simd_shri(DisasContext *s,
}
tcg_rn = read_fp_dreg(s, rn);
- tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
+ tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- accumulate, is_u, size, shift);
+ if (insert) {
+ handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
+ } else {
+ handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+ accumulate, is_u, size, shift);
+ }
write_fp_dreg(s, rd, tcg_rd);
@@ -5908,6 +5932,374 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert,
tcg_temp_free_i64(tcg_rd);
}
+/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
+ * (signed/unsigned) narrowing */
+static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
+ bool is_u_shift, bool is_u_narrow,
+ int immh, int immb, int opcode,
+ int rn, int rd)
+{
+ int immhb = immh << 3 | immb;
+ int size = 32 - clz32(immh) - 1;
+ int esize = 8 << size;
+ int shift = (2 * esize) - immhb;
+ int elements = is_scalar ? 1 : (64 / esize);
+ bool round = extract32(opcode, 0, 1);
+ TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
+ TCGv_i64 tcg_rn, tcg_rd, tcg_round;
+ TCGv_i32 tcg_rd_narrowed;
+ TCGv_i64 tcg_final;
+
+ static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
+ { gen_helper_neon_narrow_sat_s8,
+ gen_helper_neon_unarrow_sat8 },
+ { gen_helper_neon_narrow_sat_s16,
+ gen_helper_neon_unarrow_sat16 },
+ { gen_helper_neon_narrow_sat_s32,
+ gen_helper_neon_unarrow_sat32 },
+ { NULL, NULL },
+ };
+ static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
+ gen_helper_neon_narrow_sat_u8,
+ gen_helper_neon_narrow_sat_u16,
+ gen_helper_neon_narrow_sat_u32,
+ NULL
+ };
+ NeonGenNarrowEnvFn *narrowfn;
+
+ int i;
+
+ assert(size < 4);
+
+ if (extract32(immh, 3, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (is_u_shift) {
+ narrowfn = unsigned_narrow_fns[size];
+ } else {
+ narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
+ }
+
+ tcg_rn = tcg_temp_new_i64();
+ tcg_rd = tcg_temp_new_i64();
+ tcg_rd_narrowed = tcg_temp_new_i32();
+ tcg_final = tcg_const_i64(0);
+
+ if (round) {
+ uint64_t round_const = 1ULL << (shift - 1);
+ tcg_round = tcg_const_i64(round_const);
+ } else {
+ TCGV_UNUSED_I64(tcg_round);
+ }
+
+ for (i = 0; i < elements; i++) {
+ read_vec_element(s, tcg_rn, rn, i, ldop);
+ handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+ false, is_u_shift, size+1, shift);
+ narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
+ tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
+ tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
+ }
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ write_vec_element(s, tcg_final, rd, 0, MO_64);
+ } else {
+ write_vec_element(s, tcg_final, rd, 1, MO_64);
+ }
+
+ if (round) {
+ tcg_temp_free_i64(tcg_round);
+ }
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rd);
+ tcg_temp_free_i32(tcg_rd_narrowed);
+ tcg_temp_free_i64(tcg_final);
+ return;
+}
+
+/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
+static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
+ bool src_unsigned, bool dst_unsigned,
+ int immh, int immb, int rn, int rd)
+{
+ int immhb = immh << 3 | immb;
+ int size = 32 - clz32(immh) - 1;
+ int shift = immhb - (8 << size);
+ int pass;
+
+ assert(immh != 0);
+ assert(!(scalar && is_q));
+
+ if (!scalar) {
+ if (!is_q && extract32(immh, 3, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* Since we use the variable-shift helpers we must
+ * replicate the shift count into each element of
+ * the tcg_shift value.
+ */
+ switch (size) {
+ case 0:
+ shift |= shift << 8;
+ /* fall through */
+ case 1:
+ shift |= shift << 16;
+ break;
+ case 2:
+ case 3:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ if (size == 3) {
+ TCGv_i64 tcg_shift = tcg_const_i64(shift);
+ static NeonGenTwo64OpEnvFn * const fns[2][2] = {
+ { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
+ { NULL, gen_helper_neon_qshl_u64 },
+ };
+ NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
+ int maxpass = is_q ? 2 : 1;
+
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+ genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
+ write_vec_element(s, tcg_op, rd, pass, MO_64);
+
+ tcg_temp_free_i64(tcg_op);
+ }
+ tcg_temp_free_i64(tcg_shift);
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+ } else {
+ TCGv_i32 tcg_shift = tcg_const_i32(shift);
+ static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
+ {
+ { gen_helper_neon_qshl_s8,
+ gen_helper_neon_qshl_s16,
+ gen_helper_neon_qshl_s32 },
+ { gen_helper_neon_qshlu_s8,
+ gen_helper_neon_qshlu_s16,
+ gen_helper_neon_qshlu_s32 }
+ }, {
+ { NULL, NULL, NULL },
+ { gen_helper_neon_qshl_u8,
+ gen_helper_neon_qshl_u16,
+ gen_helper_neon_qshl_u32 }
+ }
+ };
+ NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
+ TCGMemOp memop = scalar ? size : MO_32;
+ int maxpass = scalar ? 1 : is_q ? 4 : 2;
+
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, pass, memop);
+ genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
+ if (scalar) {
+ switch (size) {
+ case 0:
+ tcg_gen_ext8u_i32(tcg_op, tcg_op);
+ break;
+ case 1:
+ tcg_gen_ext16u_i32(tcg_op, tcg_op);
+ break;
+ case 2:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ write_fp_sreg(s, rd, tcg_op);
+ } else {
+ write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
+ }
+
+ tcg_temp_free_i32(tcg_op);
+ }
+ tcg_temp_free_i32(tcg_shift);
+
+ if (!is_q && !scalar) {
+ clear_vec_high(s, rd);
+ }
+ }
+}
+
+/* Common vector code for handling integer to FP conversion */
+static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
+ int elements, int is_signed,
+ int fracbits, int size)
+{
+ bool is_double = size == 3 ? true : false;
+ TCGv_ptr tcg_fpst = get_fpstatus_ptr();
+ TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
+ TCGv_i64 tcg_int = tcg_temp_new_i64();
+ TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
+ int pass;
+
+ for (pass = 0; pass < elements; pass++) {
+ read_vec_element(s, tcg_int, rn, pass, mop);
+
+ if (is_double) {
+ TCGv_i64 tcg_double = tcg_temp_new_i64();
+ if (is_signed) {
+ gen_helper_vfp_sqtod(tcg_double, tcg_int,
+ tcg_shift, tcg_fpst);
+ } else {
+ gen_helper_vfp_uqtod(tcg_double, tcg_int,
+ tcg_shift, tcg_fpst);
+ }
+ if (elements == 1) {
+ write_fp_dreg(s, rd, tcg_double);
+ } else {
+ write_vec_element(s, tcg_double, rd, pass, MO_64);
+ }
+ tcg_temp_free_i64(tcg_double);
+ } else {
+ TCGv_i32 tcg_single = tcg_temp_new_i32();
+ if (is_signed) {
+ gen_helper_vfp_sqtos(tcg_single, tcg_int,
+ tcg_shift, tcg_fpst);
+ } else {
+ gen_helper_vfp_uqtos(tcg_single, tcg_int,
+ tcg_shift, tcg_fpst);
+ }
+ if (elements == 1) {
+ write_fp_sreg(s, rd, tcg_single);
+ } else {
+ write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
+ }
+ tcg_temp_free_i32(tcg_single);
+ }
+ }
+
+ if (!is_double && elements == 2) {
+ clear_vec_high(s, rd);
+ }
+
+ tcg_temp_free_i64(tcg_int);
+ tcg_temp_free_ptr(tcg_fpst);
+ tcg_temp_free_i32(tcg_shift);
+}
+
+/* UCVTF/SCVTF - Integer to FP conversion */
+static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
+ bool is_q, bool is_u,
+ int immh, int immb, int opcode,
+ int rn, int rd)
+{
+ bool is_double = extract32(immh, 3, 1);
+ int size = is_double ? MO_64 : MO_32;
+ int elements;
+ int immhb = immh << 3 | immb;
+ int fracbits = (is_double ? 128 : 64) - immhb;
+
+ if (!extract32(immh, 2, 2)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (is_scalar) {
+ elements = 1;
+ } else {
+ elements = is_double ? 2 : is_q ? 4 : 2;
+ if (is_double && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ }
+ /* immh == 0 would be a failure of the decode logic */
+ g_assert(immh);
+
+ handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
+}
+
+/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
+static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
+ bool is_q, bool is_u,
+ int immh, int immb, int rn, int rd)
+{
+ bool is_double = extract32(immh, 3, 1);
+ int immhb = immh << 3 | immb;
+ int fracbits = (is_double ? 128 : 64) - immhb;
+ int pass;
+ TCGv_ptr tcg_fpstatus;
+ TCGv_i32 tcg_rmode, tcg_shift;
+
+ if (!extract32(immh, 2, 2)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!is_scalar && !is_q && is_double) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ assert(!(is_scalar && is_q));
+
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ tcg_fpstatus = get_fpstatus_ptr();
+ tcg_shift = tcg_const_i32(fracbits);
+
+ if (is_double) {
+ int maxpass = is_scalar ? 1 : is_q ? 2 : 1;
+
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+ if (is_u) {
+ gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
+ } else {
+ gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
+ }
+ write_vec_element(s, tcg_op, rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_op);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+ } else {
+ int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+ if (is_u) {
+ gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
+ } else {
+ gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
+ }
+ if (is_scalar) {
+ write_fp_sreg(s, rd, tcg_op);
+ } else {
+ write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
+ }
+ tcg_temp_free_i32(tcg_op);
+ }
+ if (!is_q && !is_scalar) {
+ clear_vec_high(s, rd);
+ }
+ }
+
+ tcg_temp_free_ptr(tcg_fpstatus);
+ tcg_temp_free_i32(tcg_shift);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ tcg_temp_free_i32(tcg_rmode);
+}
+
/* C3.6.9 AdvSIMD scalar shift by immediate
* 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
* +-----+---+-------------+------+------+--------+---+------+------+
@@ -5925,7 +6317,18 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
int immh = extract32(insn, 19, 4);
bool is_u = extract32(insn, 29, 1);
+ if (immh == 0) {
+ unallocated_encoding(s);
+ return;
+ }
+
switch (opcode) {
+ case 0x08: /* SRI */
+ if (!is_u) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
case 0x00: /* SSHR / USHR */
case 0x02: /* SSRA / USRA */
case 0x04: /* SRSHR / URSHR */
@@ -5935,8 +6338,39 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
case 0x0a: /* SHL / SLI */
handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
break;
+ case 0x1c: /* SCVTF, UCVTF */
+ handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
+ opcode, rn, rd);
+ break;
+ case 0x10: /* SQSHRUN, SQSHRUN2 */
+ case 0x11: /* SQRSHRUN, SQRSHRUN2 */
+ if (!is_u) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_vec_simd_sqshrn(s, true, false, false, true,
+ immh, immb, opcode, rn, rd);
+ break;
+ case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
+ case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
+ handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
+ immh, immb, opcode, rn, rd);
+ break;
+ case 0xc: /* SQSHLU */
+ if (!is_u) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
+ break;
+ case 0xe: /* SQSHL, UQSHL */
+ handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
+ break;
+ case 0x1f: /* FCVTZS, FCVTZU */
+ handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
+ break;
default:
- unsupported_encoding(s, insn);
+ unallocated_encoding(s);
break;
}
}
@@ -6483,15 +6917,25 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
}
static void handle_2misc_64(DisasContext *s, int opcode, bool u,
- TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
+ TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
+ TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
{
/* Handle 64->64 opcodes which are shared between the scalar and
* vector 2-reg-misc groups. We cover every integer opcode where size == 3
* is valid in either group and also the double-precision fp ops.
+ * The caller only need provide tcg_rmode and tcg_fpstatus if the op
+ * requires them.
*/
TCGCond cond;
switch (opcode) {
+ case 0x4: /* CLS, CLZ */
+ if (u) {
+ gen_helper_clz64(tcg_rd, tcg_rn);
+ } else {
+ gen_helper_cls64(tcg_rd, tcg_rn);
+ }
+ break;
case 0x5: /* NOT */
/* This opcode is shared with CNT and RBIT but we have earlier
* enforced that size == 3 if and only if this is the NOT insn.
@@ -6531,6 +6975,42 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
case 0x6f: /* FNEG */
gen_helper_vfp_negd(tcg_rd, tcg_rn);
break;
+ case 0x7f: /* FSQRT */
+ gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
+ break;
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ {
+ TCGv_i32 tcg_shift = tcg_const_i32(0);
+ gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
+ tcg_temp_free_i32(tcg_shift);
+ break;
+ }
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ {
+ TCGv_i32 tcg_shift = tcg_const_i32(0);
+ gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
+ tcg_temp_free_i32(tcg_shift);
+ break;
+ }
+ case 0x18: /* FRINTN */
+ case 0x19: /* FRINTM */
+ case 0x38: /* FRINTP */
+ case 0x39: /* FRINTZ */
+ case 0x58: /* FRINTA */
+ case 0x79: /* FRINTI */
+ gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
+ break;
+ case 0x59: /* FRINTX */
+ gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
+ break;
default:
g_assert_not_reached();
}
@@ -6645,6 +7125,194 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
tcg_temp_free_ptr(fpst);
}
+static void handle_2misc_reciprocal(DisasContext *s, int opcode,
+ bool is_scalar, bool is_u, bool is_q,
+ int size, int rn, int rd)
+{
+ bool is_double = (size == 3);
+ TCGv_ptr fpst = get_fpstatus_ptr();
+
+ if (is_double) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+ int pass;
+
+ for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+ switch (opcode) {
+ case 0x3d: /* FRECPE */
+ gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
+ break;
+ case 0x3f: /* FRECPX */
+ gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
+ break;
+ case 0x7d: /* FRSQRTE */
+ gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+ }
+ if (is_scalar) {
+ clear_vec_high(s, rd);
+ }
+
+ tcg_temp_free_i64(tcg_res);
+ tcg_temp_free_i64(tcg_op);
+ } else {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+ int pass, maxpasses;
+
+ if (is_scalar) {
+ maxpasses = 1;
+ } else {
+ maxpasses = is_q ? 4 : 2;
+ }
+
+ for (pass = 0; pass < maxpasses; pass++) {
+ read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+
+ switch (opcode) {
+ case 0x3c: /* URECPE */
+ gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
+ break;
+ case 0x3d: /* FRECPE */
+ gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
+ break;
+ case 0x3f: /* FRECPX */
+ gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
+ break;
+ case 0x7d: /* FRSQRTE */
+ gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (is_scalar) {
+ write_fp_sreg(s, rd, tcg_res);
+ } else {
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ }
+ }
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op);
+ if (!is_q && !is_scalar) {
+ clear_vec_high(s, rd);
+ }
+ }
+ tcg_temp_free_ptr(fpst);
+}
+
+static void handle_2misc_narrow(DisasContext *s, bool scalar,
+ int opcode, bool u, bool is_q,
+ int size, int rn, int rd)
+{
+ /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
+ * in the source becomes a size element in the destination).
+ */
+ int pass;
+ TCGv_i32 tcg_res[2];
+ int destelt = is_q ? 2 : 0;
+ int passes = scalar ? 1 : 2;
+
+ if (scalar) {
+ tcg_res[1] = tcg_const_i32(0);
+ }
+
+ for (pass = 0; pass < passes; pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ NeonGenNarrowFn *genfn = NULL;
+ NeonGenNarrowEnvFn *genenvfn = NULL;
+
+ if (scalar) {
+ read_vec_element(s, tcg_op, rn, pass, size + 1);
+ } else {
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+ }
+ tcg_res[pass] = tcg_temp_new_i32();
+
+ switch (opcode) {
+ case 0x12: /* XTN, SQXTUN */
+ {
+ static NeonGenNarrowFn * const xtnfns[3] = {
+ gen_helper_neon_narrow_u8,
+ gen_helper_neon_narrow_u16,
+ tcg_gen_trunc_i64_i32,
+ };
+ static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
+ gen_helper_neon_unarrow_sat8,
+ gen_helper_neon_unarrow_sat16,
+ gen_helper_neon_unarrow_sat32,
+ };
+ if (u) {
+ genenvfn = sqxtunfns[size];
+ } else {
+ genfn = xtnfns[size];
+ }
+ break;
+ }
+ case 0x14: /* SQXTN, UQXTN */
+ {
+ static NeonGenNarrowEnvFn * const fns[3][2] = {
+ { gen_helper_neon_narrow_sat_s8,
+ gen_helper_neon_narrow_sat_u8 },
+ { gen_helper_neon_narrow_sat_s16,
+ gen_helper_neon_narrow_sat_u16 },
+ { gen_helper_neon_narrow_sat_s32,
+ gen_helper_neon_narrow_sat_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0x16: /* FCVTN, FCVTN2 */
+ /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
+ if (size == 2) {
+ gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
+ } else {
+ TCGv_i32 tcg_lo = tcg_temp_new_i32();
+ TCGv_i32 tcg_hi = tcg_temp_new_i32();
+ tcg_gen_trunc_i64_i32(tcg_lo, tcg_op);
+ gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
+ tcg_gen_shri_i64(tcg_op, tcg_op, 32);
+ tcg_gen_trunc_i64_i32(tcg_hi, tcg_op);
+ gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
+ tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
+ tcg_temp_free_i32(tcg_lo);
+ tcg_temp_free_i32(tcg_hi);
+ }
+ break;
+ case 0x56: /* FCVTXN, FCVTXN2 */
+ /* 64 bit to 32 bit float conversion
+ * with von Neumann rounding (round to odd)
+ */
+ assert(size == 2);
+ gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (genfn) {
+ genfn(tcg_res[pass], tcg_op);
+ } else if (genenvfn) {
+ genenvfn(tcg_res[pass], cpu_env, tcg_op);
+ }
+
+ tcg_temp_free_i64(tcg_op);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
+ tcg_temp_free_i32(tcg_res[pass]);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
/* C3.6.12 AdvSIMD scalar two reg misc
* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
* +-----+---+-----------+------+-----------+--------+-----+------+------+
@@ -6658,6 +7326,10 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
int opcode = extract32(insn, 12, 5);
int size = extract32(insn, 22, 2);
bool u = extract32(insn, 29, 1);
+ bool is_fcvt = false;
+ int rmode;
+ TCGv_i32 tcg_rmode;
+ TCGv_ptr tcg_fpstatus;
switch (opcode) {
case 0xa: /* CMLT */
@@ -6674,6 +7346,19 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
return;
}
break;
+ case 0x12: /* SQXTUN */
+ if (u) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x14: /* SQXTN, UQXTN */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
+ return;
case 0xc ... 0xf:
case 0x16 ... 0x1d:
case 0x1f:
@@ -6690,23 +7375,41 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
case 0x6d: /* FCMLE (zero) */
handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
return;
+ case 0x1d: /* SCVTF */
+ case 0x5d: /* UCVTF */
+ {
+ bool is_signed = (opcode == 0x1d);
+ handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
+ return;
+ }
+ case 0x3d: /* FRECPE */
+ case 0x3f: /* FRECPX */
+ case 0x7d: /* FRSQRTE */
+ handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
+ return;
case 0x1a: /* FCVTNS */
case 0x1b: /* FCVTMS */
- case 0x1c: /* FCVTAS */
- case 0x1d: /* SCVTF */
case 0x3a: /* FCVTPS */
case 0x3b: /* FCVTZS */
- case 0x3d: /* FRECPE */
- case 0x3f: /* FRECPX */
- case 0x56: /* FCVTXN, FCVTXN2 */
case 0x5a: /* FCVTNU */
case 0x5b: /* FCVTMU */
- case 0x5c: /* FCVTAU */
- case 0x5d: /* UCVTF */
case 0x7a: /* FCVTPU */
case 0x7b: /* FCVTZU */
- case 0x7d: /* FRSQRTE */
- unsupported_encoding(s, insn);
+ is_fcvt = true;
+ rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
+ break;
+ case 0x1c: /* FCVTAS */
+ case 0x5c: /* FCVTAU */
+ /* TIEAWAY doesn't fit in the usual rounding mode encoding */
+ is_fcvt = true;
+ rmode = FPROUNDING_TIEAWAY;
+ break;
+ case 0x56: /* FCVTXN, FCVTXN2 */
+ if (size == 2) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
return;
default:
unallocated_encoding(s);
@@ -6716,25 +7419,71 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
default:
/* Other categories of encoding in this class:
* + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
- * + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2:
- * narrowing saturate ops: size 64/32/16 -> 32/16/8
*/
unsupported_encoding(s, insn);
return;
}
+ if (is_fcvt) {
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ tcg_fpstatus = get_fpstatus_ptr();
+ } else {
+ TCGV_UNUSED_I32(tcg_rmode);
+ TCGV_UNUSED_PTR(tcg_fpstatus);
+ }
+
if (size == 3) {
TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
TCGv_i64 tcg_rd = tcg_temp_new_i64();
- handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn);
+ handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
write_fp_dreg(s, rd, tcg_rd);
tcg_temp_free_i64(tcg_rd);
tcg_temp_free_i64(tcg_rn);
+ } else if (size == 2) {
+ TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
+ TCGv_i32 tcg_rd = tcg_temp_new_i32();
+
+ switch (opcode) {
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ {
+ TCGv_i32 tcg_shift = tcg_const_i32(0);
+ gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
+ tcg_temp_free_i32(tcg_shift);
+ break;
+ }
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ {
+ TCGv_i32 tcg_shift = tcg_const_i32(0);
+ gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
+ tcg_temp_free_i32(tcg_shift);
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_sreg(s, rd, tcg_rd);
+ tcg_temp_free_i32(tcg_rd);
+ tcg_temp_free_i32(tcg_rn);
} else {
- /* the 'size might not be 64' ops aren't implemented yet */
g_assert_not_reached();
}
+
+ if (is_fcvt) {
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ tcg_temp_free_i32(tcg_rmode);
+ tcg_temp_free_ptr(tcg_fpstatus);
+ }
}
/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
@@ -6746,6 +7495,7 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
int shift = 2 * (8 << size) - immhb;
bool accumulate = false;
bool round = false;
+ bool insert = false;
int dsize = is_q ? 128 : 64;
int esize = 8 << size;
int elements = dsize/esize;
@@ -6775,6 +7525,9 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
case 0x06: /* SRSRA / URSRA (accum + rounding) */
accumulate = round = true;
break;
+ case 0x08: /* SRI */
+ insert = true;
+ break;
}
if (round) {
@@ -6786,12 +7539,16 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
for (i = 0; i < elements; i++) {
read_vec_element(s, tcg_rn, rn, i, memop);
- if (accumulate) {
+ if (accumulate || insert) {
read_vec_element(s, tcg_rd, rd, i, memop);
}
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- accumulate, is_u, size, shift);
+ if (insert) {
+ handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
+ } else {
+ handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+ accumulate, is_u, size, shift);
+ }
write_vec_element(s, tcg_rd, rd, i, size);
}
@@ -6878,6 +7635,62 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
}
}
+/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
+static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
+ int immh, int immb, int opcode, int rn, int rd)
+{
+ int immhb = immh << 3 | immb;
+ int size = 32 - clz32(immh) - 1;
+ int dsize = 64;
+ int esize = 8 << size;
+ int elements = dsize/esize;
+ int shift = (2 * esize) - immhb;
+ bool round = extract32(opcode, 0, 1);
+ TCGv_i64 tcg_rn, tcg_rd, tcg_final;
+ TCGv_i64 tcg_round;
+ int i;
+
+ if (extract32(immh, 3, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ tcg_rn = tcg_temp_new_i64();
+ tcg_rd = tcg_temp_new_i64();
+ tcg_final = tcg_temp_new_i64();
+ read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
+
+ if (round) {
+ uint64_t round_const = 1ULL << (shift - 1);
+ tcg_round = tcg_const_i64(round_const);
+ } else {
+ TCGV_UNUSED_I64(tcg_round);
+ }
+
+ for (i = 0; i < elements; i++) {
+ read_vec_element(s, tcg_rn, rn, i, size+1);
+ handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+ false, true, size+1, shift);
+
+ tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
+ }
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ write_vec_element(s, tcg_final, rd, 0, MO_64);
+ } else {
+ write_vec_element(s, tcg_final, rd, 1, MO_64);
+ }
+
+ if (round) {
+ tcg_temp_free_i64(tcg_round);
+ }
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rd);
+ tcg_temp_free_i64(tcg_final);
+ return;
+}
+
/* C3.6.14 AdvSIMD shift by immediate
* 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
@@ -6896,6 +7709,12 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
bool is_q = extract32(insn, 30, 1);
switch (opcode) {
+ case 0x08: /* SRI */
+ if (!is_u) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
case 0x00: /* SSHR / USHR */
case 0x02: /* SSRA / USRA (accumulate) */
case 0x04: /* SRSHR / URSHR (rounding) */
@@ -6905,15 +7724,42 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
case 0x0a: /* SHL / SLI */
handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
break;
+ case 0x10: /* SHRN */
+ case 0x11: /* RSHRN / SQRSHRUN */
+ if (is_u) {
+ handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
+ opcode, rn, rd);
+ } else {
+ handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
+ }
+ break;
+ case 0x12: /* SQSHRN / UQSHRN */
+ case 0x13: /* SQRSHRN / UQRSHRN */
+ handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
+ opcode, rn, rd);
+ break;
case 0x14: /* SSHLL / USHLL */
handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
break;
+ case 0x1c: /* SCVTF / UCVTF */
+ handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
+ opcode, rn, rd);
+ break;
+ case 0xc: /* SQSHLU */
+ if (!is_u) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
+ break;
+ case 0xe: /* SQSHL, UQSHL */
+ handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
+ break;
+ case 0x1f: /* FCVTZS/ FCVTZU */
+ handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
+ return;
default:
- /* We don't currently implement any of the Narrow or saturating shifts;
- * nor do we implement the fixed-point conversions in this
- * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU).
- */
- unsupported_encoding(s, insn);
+ unallocated_encoding(s);
return;
}
}
@@ -7124,6 +7970,10 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
tcg_passres, tcg_passres);
break;
+ case 14: /* PMULL */
+ assert(size == 0);
+ gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
+ break;
default:
g_assert_not_reached();
}
@@ -7243,6 +8093,30 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
}
}
+static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
+{
+ /* PMULL of 64 x 64 -> 128 is an odd special case because it
+ * is the only three-reg-diff instruction which produces a
+ * 128-bit wide result from a single operation. However since
+ * it's possible to calculate the two halves more or less
+ * separately we just use two helper calls.
+ */
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, is_q, MO_64);
+ read_vec_element(s, tcg_op2, rm, is_q, MO_64);
+ gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
+ write_vec_element(s, tcg_res, rd, 0, MO_64);
+ gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
+ write_vec_element(s, tcg_res, rd, 1, MO_64);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ tcg_temp_free_i64(tcg_res);
+}
+
/* C3.6.15 AdvSIMD three different
* 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
* +---+---+---+-----------+------+---+------+--------+-----+------+------+
@@ -7293,8 +8167,15 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
- unsupported_encoding(s, insn);
- break;
+ if (size == 3) {
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_pmull_64(s, is_q, rd, rn, rm);
+ return;
+ }
+ goto is_widening;
case 9: /* SQDMLAL, SQDMLAL2 */
case 11: /* SQDMLSL, SQDMLSL2 */
case 13: /* SQDMULL, SQDMULL2 */
@@ -7315,6 +8196,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
+ is_widening:
handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
break;
default:
@@ -7991,76 +8873,48 @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
}
}
-static void handle_2misc_narrow(DisasContext *s, int opcode, bool u, bool is_q,
- int size, int rn, int rd)
+static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
+ int size, int rn, int rd)
{
- /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
- * in the source becomes a size element in the destination).
+ /* Handle 2-reg-misc ops which are widening (so each size element
+ * in the source becomes a 2*size element in the destination.
+ * The only instruction like this is FCVTL.
*/
int pass;
- TCGv_i32 tcg_res[2];
- int destelt = is_q ? 2 : 0;
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- NeonGenNarrowFn *genfn = NULL;
- NeonGenNarrowEnvFn *genenvfn = NULL;
+ if (size == 3) {
+ /* 32 -> 64 bit fp conversion */
+ TCGv_i64 tcg_res[2];
+ int srcelt = is_q ? 2 : 0;
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- tcg_res[pass] = tcg_temp_new_i32();
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ tcg_res[pass] = tcg_temp_new_i64();
- switch (opcode) {
- case 0x12: /* XTN, SQXTUN */
- {
- static NeonGenNarrowFn * const xtnfns[3] = {
- gen_helper_neon_narrow_u8,
- gen_helper_neon_narrow_u16,
- tcg_gen_trunc_i64_i32,
- };
- static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
- gen_helper_neon_unarrow_sat8,
- gen_helper_neon_unarrow_sat16,
- gen_helper_neon_unarrow_sat32,
- };
- if (u) {
- genenvfn = sqxtunfns[size];
- } else {
- genfn = xtnfns[size];
- }
- break;
- }
- case 0x14: /* SQXTN, UQXTN */
- {
- static NeonGenNarrowEnvFn * const fns[3][2] = {
- { gen_helper_neon_narrow_sat_s8,
- gen_helper_neon_narrow_sat_u8 },
- { gen_helper_neon_narrow_sat_s16,
- gen_helper_neon_narrow_sat_u16 },
- { gen_helper_neon_narrow_sat_s32,
- gen_helper_neon_narrow_sat_u32 },
- };
- genenvfn = fns[size][u];
- break;
- }
- default:
- g_assert_not_reached();
+ read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
+ gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
+ tcg_temp_free_i32(tcg_op);
}
-
- if (genfn) {
- genfn(tcg_res[pass], tcg_op);
- } else {
- genenvfn(tcg_res[pass], cpu_env, tcg_op);
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
}
+ } else {
+ /* 16 -> 32 bit fp conversion */
+ int srcelt = is_q ? 4 : 0;
+ TCGv_i32 tcg_res[4];
- tcg_temp_free_i64(tcg_op);
- }
+ for (pass = 0; pass < 4; pass++) {
+ tcg_res[pass] = tcg_temp_new_i32();
- for (pass = 0; pass < 2; pass++) {
- write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
- tcg_temp_free_i32(tcg_res[pass]);
- }
- if (!is_q) {
- clear_vec_high(s, rd);
+ read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
+ gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
+ cpu_env);
+ }
+ for (pass = 0; pass < 4; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
+ tcg_temp_free_i32(tcg_res[pass]);
+ }
}
}
@@ -8133,6 +8987,108 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
}
}
+static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
+ bool is_q, int size, int rn, int rd)
+{
+ /* Implement the pairwise operations from 2-misc:
+ * SADDLP, UADDLP, SADALP, UADALP.
+ * These all add pairs of elements in the input to produce a
+ * double-width result element in the output (possibly accumulating).
+ */
+ bool accum = (opcode == 0x6);
+ int maxpass = is_q ? 2 : 1;
+ int pass;
+ TCGv_i64 tcg_res[2];
+
+ if (size == 2) {
+ /* 32 + 32 -> 64 op */
+ TCGMemOp memop = size + (u ? 0 : MO_SIGN);
+
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, pass * 2, memop);
+ read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
+ tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
+ if (accum) {
+ read_vec_element(s, tcg_op1, rd, pass, MO_64);
+ tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
+ }
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ }
+ } else {
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ NeonGenOneOpFn *genfn;
+ static NeonGenOneOpFn * const fns[2][2] = {
+ { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
+ { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
+ };
+
+ genfn = fns[size][u];
+
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+ genfn(tcg_res[pass], tcg_op);
+
+ if (accum) {
+ read_vec_element(s, tcg_op, rd, pass, MO_64);
+ if (size == 0) {
+ gen_helper_neon_addl_u16(tcg_res[pass],
+ tcg_res[pass], tcg_op);
+ } else {
+ gen_helper_neon_addl_u32(tcg_res[pass],
+ tcg_res[pass], tcg_op);
+ }
+ }
+ tcg_temp_free_i64(tcg_op);
+ }
+ }
+ if (!is_q) {
+ tcg_res[1] = tcg_const_i64(0);
+ }
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
+ }
+}
+
+static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
+{
+ /* Implement SHLL and SHLL2 */
+ int pass;
+ int part = is_q ? 2 : 0;
+ TCGv_i64 tcg_res[2];
+
+ for (pass = 0; pass < 2; pass++) {
+ static NeonGenWidenFn * const widenfns[3] = {
+ gen_helper_neon_widen_u8,
+ gen_helper_neon_widen_u16,
+ tcg_gen_extu_i32_i64,
+ };
+ NeonGenWidenFn *widenfn = widenfns[size];
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
+ tcg_res[pass] = tcg_temp_new_i64();
+ widenfn(tcg_res[pass], tcg_op);
+ tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
+
+ tcg_temp_free_i32(tcg_op);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
+ }
+}
+
/* C3.6.17 AdvSIMD two reg misc
* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
* +---+---+---+-----------+------+-----------+--------+-----+------+------+
@@ -8147,6 +9103,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
bool is_q = extract32(insn, 30, 1);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
+ bool need_fpstatus = false;
+ bool need_rmode = false;
+ int rmode = -1;
+ TCGv_i32 tcg_rmode;
+ TCGv_ptr tcg_fpstatus;
switch (opcode) {
case 0x0: /* REV64, REV32 */
@@ -8173,23 +9134,28 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
- handle_2misc_narrow(s, opcode, u, is_q, size, rn, rd);
+ handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
return;
- case 0x2: /* SADDLP, UADDLP */
case 0x4: /* CLS, CLZ */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x2: /* SADDLP, UADDLP */
case 0x6: /* SADALP, UADALP */
if (size == 3) {
unallocated_encoding(s);
return;
}
- unsupported_encoding(s, insn);
+ handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
return;
case 0x13: /* SHLL, SHLL2 */
if (u == 0 || size == 3) {
unallocated_encoding(s);
return;
}
- unsupported_encoding(s, insn);
+ handle_shll(s, is_q, size, rn, rd);
return;
case 0xa: /* CMLT */
if (u == 1) {
@@ -8220,8 +9186,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
/* Floating point: U, size[1] and opcode indicate operation;
* size[0] indicates single or double precision.
*/
+ int is_double = extract32(size, 0, 1);
opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
- size = extract32(size, 0, 1) ? 3 : 2;
+ size = is_double ? 3 : 2;
switch (opcode) {
case 0x2f: /* FABS */
case 0x6f: /* FNEG */
@@ -8230,6 +9197,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
return;
}
break;
+ case 0x1d: /* SCVTF */
+ case 0x5d: /* UCVTF */
+ {
+ bool is_signed = (opcode == 0x1d) ? true : false;
+ int elements = is_double ? 2 : is_q ? 4 : 2;
+ if (is_double && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
+ return;
+ }
case 0x2c: /* FCMGT (zero) */
case 0x2d: /* FCMEQ (zero) */
case 0x2e: /* FCMLT (zero) */
@@ -8241,35 +9220,98 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
return;
- case 0x16: /* FCVTN, FCVTN2 */
- case 0x17: /* FCVTL, FCVTL2 */
- case 0x18: /* FRINTN */
- case 0x19: /* FRINTM */
+ case 0x7f: /* FSQRT */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
case 0x1a: /* FCVTNS */
case 0x1b: /* FCVTMS */
- case 0x1c: /* FCVTAS */
- case 0x1d: /* SCVTF */
- case 0x38: /* FRINTP */
- case 0x39: /* FRINTZ */
case 0x3a: /* FCVTPS */
case 0x3b: /* FCVTZS */
- case 0x3c: /* URECPE */
- case 0x3d: /* FRECPE */
- case 0x56: /* FCVTXN, FCVTXN2 */
- case 0x58: /* FRINTA */
- case 0x59: /* FRINTX */
case 0x5a: /* FCVTNU */
case 0x5b: /* FCVTMU */
- case 0x5c: /* FCVTAU */
- case 0x5d: /* UCVTF */
- case 0x79: /* FRINTI */
case 0x7a: /* FCVTPU */
case 0x7b: /* FCVTZU */
- case 0x7c: /* URSQRTE */
+ need_fpstatus = true;
+ need_rmode = true;
+ rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x5c: /* FCVTAU */
+ case 0x1c: /* FCVTAS */
+ need_fpstatus = true;
+ need_rmode = true;
+ rmode = FPROUNDING_TIEAWAY;
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x3c: /* URECPE */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x3d: /* FRECPE */
case 0x7d: /* FRSQRTE */
- case 0x7f: /* FSQRT */
- unsupported_encoding(s, insn);
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
+ return;
+ case 0x56: /* FCVTXN, FCVTXN2 */
+ if (size == 2) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x16: /* FCVTN, FCVTN2 */
+ /* handle_2misc_narrow does a 2*size -> size operation, but these
+ * instructions encode the source size rather than dest size.
+ */
+ handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
+ return;
+ case 0x17: /* FCVTL, FCVTL2 */
+ handle_2misc_widening(s, opcode, is_q, size, rn, rd);
return;
+ case 0x18: /* FRINTN */
+ case 0x19: /* FRINTM */
+ case 0x38: /* FRINTP */
+ case 0x39: /* FRINTZ */
+ need_rmode = true;
+ rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
+ /* fall through */
+ case 0x59: /* FRINTX */
+ case 0x79: /* FRINTI */
+ need_fpstatus = true;
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x58: /* FRINTA */
+ need_rmode = true;
+ rmode = FPROUNDING_TIEAWAY;
+ need_fpstatus = true;
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x7c: /* URSQRTE */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ need_fpstatus = true;
+ break;
default:
unallocated_encoding(s);
return;
@@ -8281,6 +9323,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
return;
}
+ if (need_fpstatus) {
+ tcg_fpstatus = get_fpstatus_ptr();
+ } else {
+ TCGV_UNUSED_PTR(tcg_fpstatus);
+ }
+ if (need_rmode) {
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ } else {
+ TCGV_UNUSED_I32(tcg_rmode);
+ }
+
if (size == 3) {
/* All 64-bit element operations can be shared with scalar 2misc */
int pass;
@@ -8291,7 +9345,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
read_vec_element(s, tcg_op, rn, pass, MO_64);
- handle_2misc_64(s, opcode, u, tcg_res, tcg_op);
+ handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
+ tcg_rmode, tcg_fpstatus);
write_vec_element(s, tcg_res, rd, pass, MO_64);
@@ -8327,6 +9382,13 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
case 0x9: /* CMEQ, CMLE */
cond = u ? TCG_COND_LE : TCG_COND_EQ;
goto do_cmop;
+ case 0x4: /* CLS */
+ if (u) {
+ gen_helper_clz32(tcg_res, tcg_op);
+ } else {
+ gen_helper_cls32(tcg_res, tcg_op);
+ }
+ break;
case 0xb: /* ABS, NEG */
if (u) {
tcg_gen_neg_i32(tcg_res, tcg_op);
@@ -8344,6 +9406,47 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
case 0x6f: /* FNEG */
gen_helper_vfp_negs(tcg_res, tcg_op);
break;
+ case 0x7f: /* FSQRT */
+ gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
+ break;
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ {
+ TCGv_i32 tcg_shift = tcg_const_i32(0);
+ gen_helper_vfp_tosls(tcg_res, tcg_op,
+ tcg_shift, tcg_fpstatus);
+ tcg_temp_free_i32(tcg_shift);
+ break;
+ }
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ {
+ TCGv_i32 tcg_shift = tcg_const_i32(0);
+ gen_helper_vfp_touls(tcg_res, tcg_op,
+ tcg_shift, tcg_fpstatus);
+ tcg_temp_free_i32(tcg_shift);
+ break;
+ }
+ case 0x18: /* FRINTN */
+ case 0x19: /* FRINTM */
+ case 0x38: /* FRINTP */
+ case 0x39: /* FRINTZ */
+ case 0x58: /* FRINTA */
+ case 0x79: /* FRINTI */
+ gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x59: /* FRINTX */
+ gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x7c: /* URSQRTE */
+ gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
+ break;
default:
g_assert_not_reached();
}
@@ -8407,6 +9510,21 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
}
break;
+ case 0x4: /* CLS, CLZ */
+ if (u) {
+ if (size == 0) {
+ gen_helper_neon_clz_u8(tcg_res, tcg_op);
+ } else {
+ gen_helper_neon_clz_u16(tcg_res, tcg_op);
+ }
+ } else {
+ if (size == 0) {
+ gen_helper_neon_cls_s8(tcg_res, tcg_op);
+ } else {
+ gen_helper_neon_cls_s16(tcg_res, tcg_op);
+ }
+ }
+ break;
default:
g_assert_not_reached();
}
@@ -8421,6 +9539,14 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
if (!is_q) {
clear_vec_high(s, rd);
}
+
+ if (need_rmode) {
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ tcg_temp_free_i32(tcg_rmode);
+ }
+ if (need_fpstatus) {
+ tcg_temp_free_ptr(tcg_fpstatus);
+ }
}
/* C3.6.13 AdvSIMD scalar x indexed element
@@ -9045,6 +10171,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
dc->vec_stride = 0;
dc->cp_regs = cpu->cp_regs;
dc->current_pl = arm_current_pl(env);
+ dc->features = env->features;
init_tmp_a64_array(dc);
diff --git a/target-arm/translate.c b/target-arm/translate.c
index fbe513b40d..56e3b4bf7f 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -6682,17 +6682,33 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
break;
}
case NEON_2RM_VRECPE:
- gen_helper_recpe_u32(tmp, tmp, cpu_env);
+ {
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+ gen_helper_recpe_u32(tmp, tmp, fpstatus);
+ tcg_temp_free_ptr(fpstatus);
break;
+ }
case NEON_2RM_VRSQRTE:
- gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
+ {
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+ gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
+ tcg_temp_free_ptr(fpstatus);
break;
+ }
case NEON_2RM_VRECPE_F:
- gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
+ {
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+ gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
+ tcg_temp_free_ptr(fpstatus);
break;
+ }
case NEON_2RM_VRSQRTE_F:
- gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
+ {
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+ gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
+ tcg_temp_free_ptr(fpstatus);
break;
+ }
case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
gen_vfp_sito(0, 1);
break;
@@ -10654,6 +10670,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
dc->cp_regs = cpu->cp_regs;
dc->current_pl = arm_current_pl(env);
+ dc->features = env->features;
cpu_F0s = tcg_temp_new_i32();
cpu_F1s = tcg_temp_new_i32();
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 2f491f9ff6..3525ffcecb 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -26,6 +26,7 @@ typedef struct DisasContext {
int aarch64;
int current_pl;
GHashTable *cp_regs;
+ uint64_t features; /* CPU features bits */
#define TMP_A64_MAX 16
int tmp_a64_count;
TCGv_i64 tmp_a64[TMP_A64_MAX];
@@ -33,6 +34,11 @@ typedef struct DisasContext {
extern TCGv_ptr cpu_env;
+static inline int arm_dc_feature(DisasContext *dc, int feature)
+{
+ return (dc->features & (1ULL << feature)) != 0;
+}
+
/* target-specific extra values for is_jmp */
/* These instructions trap after executing, so the A32/T32 decoder must
* defer them until after the conditional execution state has been updated.