aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-05-04 13:37:17 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-05-04 13:37:17 +0100
commit9af638cc1f665712522608c5d6b8c03d8fa67666 (patch)
treeb04f0d713a642e29f08f66b50b9592ee7529b8fe
parent2ef486e76d64436be90f7359a3071fb2a56ce835 (diff)
parent9aefc6cf9b73f66062d2f914a0136756e7a28211 (diff)
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200504' into staging
target-arm queue: * Start of conversion of Neon insns to decodetree * versal board: support SD and RTC * Implement ARMv8.2-TTS2UXN * Make VQDMULL undefined when U=1 * Some minor code cleanups # gpg: Signature made Mon 04 May 2020 13:32:08 BST # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20200504: (39 commits) target/arm: Move gen_ function typedefs to translate.h target/arm: Convert Neon 3-reg-same VMUL, VMLA, VMLS, VSHL to decodetree target/arm: Convert Neon 3-reg-same VQADD/VQSUB to decodetree target/arm: Convert Neon 3-reg-same comparisons to decodetree target/arm: Convert Neon 3-reg-same VMAX/VMIN to decodetree target/arm: Convert Neon 3-reg-same logic ops to decodetree target/arm: Convert Neon 3-reg-same VADD/VSUB to decodetree target/arm: Convert Neon 'load/store single structure' to decodetree target/arm: Convert Neon 'load single structure to all lanes' to decodetree target/arm: Convert Neon load/store multiple structures to decodetree target/arm: Convert VFM[AS]L (scalar) to decodetree target/arm: Convert V[US]DOT (scalar) to decodetree target/arm: Convert VCMLA (scalar) to decodetree target/arm: Convert VFM[AS]L (vector) to decodetree target/arm: Convert V[US]DOT (vector) to decodetree target/arm: Convert VCADD (vector) to decodetree target/arm: Convert VCMLA (vector) to decodetree target/arm: Add stubs for AArch32 Neon decodetree target/arm: Don't allow Thumb Neon insns without FEATURE_NEON target/arm/translate-vfp.inc.c: Remove duplicate simd_r32 check ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/arm/mps2-tz.c2
-rw-r--r--hw/arm/xlnx-versal-virt.c74
-rw-r--r--hw/arm/xlnx-versal.c115
-rw-r--r--include/hw/arm/xlnx-versal.h31
-rw-r--r--target/arm/Makefile.objs18
-rw-r--r--target/arm/cpu-param.h2
-rw-r--r--target/arm/cpu.c3
-rw-r--r--target/arm/cpu.h38
-rw-r--r--target/arm/cpu64.c8
-rw-r--r--target/arm/helper.c183
-rw-r--r--target/arm/neon-dp.decode86
-rw-r--r--target/arm/neon-ls.decode52
-rw-r--r--target/arm/neon-shared.decode66
-rw-r--r--target/arm/translate-a64.c17
-rw-r--r--target/arm/translate-a64.h9
-rw-r--r--target/arm/translate-neon.inc.c714
-rw-r--r--target/arm/translate-vfp.inc.c6
-rw-r--r--target/arm/translate.c714
-rw-r--r--target/arm/translate.h26
19 files changed, 1301 insertions, 863 deletions
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
index a8dea7dde1..2c43041564 100644
--- a/hw/arm/mps2-tz.c
+++ b/hw/arm/mps2-tz.c
@@ -395,7 +395,7 @@ static void mps2tz_common_init(MachineState *machine)
exit(EXIT_FAILURE);
}
- sysbus_init_child_obj(OBJECT(machine), "iotkit", &mms->iotkit,
+ sysbus_init_child_obj(OBJECT(machine), TYPE_IOTKIT, &mms->iotkit,
sizeof(mms->iotkit), mmc->armsse_type);
iotkitdev = DEVICE(&mms->iotkit);
object_property_set_link(OBJECT(&mms->iotkit), OBJECT(system_memory),
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
index 878a275140..7e749e1926 100644
--- a/hw/arm/xlnx-versal-virt.c
+++ b/hw/arm/xlnx-versal-virt.c
@@ -20,6 +20,7 @@
#include "hw/arm/sysbus-fdt.h"
#include "hw/arm/fdt.h"
#include "cpu.h"
+#include "hw/qdev-properties.h"
#include "hw/arm/xlnx-versal.h"
#define TYPE_XLNX_VERSAL_VIRT_MACHINE MACHINE_TYPE_NAME("xlnx-versal-virt")
@@ -256,6 +257,53 @@ static void fdt_add_zdma_nodes(VersalVirt *s)
}
}
+static void fdt_add_sd_nodes(VersalVirt *s)
+{
+ const char clocknames[] = "clk_xin\0clk_ahb";
+ const char compat[] = "arasan,sdhci-8.9a";
+ int i;
+
+ for (i = ARRAY_SIZE(s->soc.pmc.iou.sd) - 1; i >= 0; i--) {
+ uint64_t addr = MM_PMC_SD0 + MM_PMC_SD0_SIZE * i;
+ char *name = g_strdup_printf("/sdhci@%" PRIx64, addr);
+
+ qemu_fdt_add_subnode(s->fdt, name);
+
+ qemu_fdt_setprop_cells(s->fdt, name, "clocks",
+ s->phandle.clk_25Mhz, s->phandle.clk_25Mhz);
+ qemu_fdt_setprop(s->fdt, name, "clock-names",
+ clocknames, sizeof(clocknames));
+ qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
+ GIC_FDT_IRQ_TYPE_SPI, VERSAL_SD0_IRQ_0 + i * 2,
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+ qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+ 2, addr, 2, MM_PMC_SD0_SIZE);
+ qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
+ g_free(name);
+ }
+}
+
+static void fdt_add_rtc_node(VersalVirt *s)
+{
+ const char compat[] = "xlnx,zynqmp-rtc";
+ const char interrupt_names[] = "alarm\0sec";
+ char *name = g_strdup_printf("/rtc@%x", MM_PMC_RTC);
+
+ qemu_fdt_add_subnode(s->fdt, name);
+
+ qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
+ GIC_FDT_IRQ_TYPE_SPI, VERSAL_RTC_ALARM_IRQ,
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI,
+ GIC_FDT_IRQ_TYPE_SPI, VERSAL_RTC_SECONDS_IRQ,
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+ qemu_fdt_setprop(s->fdt, name, "interrupt-names",
+ interrupt_names, sizeof(interrupt_names));
+ qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+ 2, MM_PMC_RTC, 2, MM_PMC_RTC_SIZE);
+ qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
+ g_free(name);
+}
+
static void fdt_nop_memory_nodes(void *fdt, Error **errp)
{
Error *err = NULL;
@@ -411,10 +459,23 @@ static void create_virtio_regions(VersalVirt *s)
}
}
+static void sd_plugin_card(SDHCIState *sd, DriveInfo *di)
+{
+ BlockBackend *blk = di ? blk_by_legacy_dinfo(di) : NULL;
+ DeviceState *card;
+
+ card = qdev_create(qdev_get_child_bus(DEVICE(sd), "sd-bus"), TYPE_SD_CARD);
+ object_property_add_child(OBJECT(sd), "card[*]", OBJECT(card),
+ &error_fatal);
+ qdev_prop_set_drive(card, "drive", blk, &error_fatal);
+ object_property_set_bool(OBJECT(card), true, "realized", &error_fatal);
+}
+
static void versal_virt_init(MachineState *machine)
{
VersalVirt *s = XLNX_VERSAL_VIRT_MACHINE(machine);
int psci_conduit = QEMU_PSCI_CONDUIT_DISABLED;
+ int i;
/*
* If the user provides an Operating System to be loaded, we expect them
@@ -440,7 +501,7 @@ static void versal_virt_init(MachineState *machine)
psci_conduit = QEMU_PSCI_CONDUIT_SMC;
}
- sysbus_init_child_obj(OBJECT(machine), "xlnx-ve", &s->soc,
+ sysbus_init_child_obj(OBJECT(machine), "xlnx-versal", &s->soc,
sizeof(s->soc), TYPE_XLNX_VERSAL);
object_property_set_link(OBJECT(&s->soc), OBJECT(machine->ram),
"ddr", &error_abort);
@@ -455,6 +516,8 @@ static void versal_virt_init(MachineState *machine)
fdt_add_gic_nodes(s);
fdt_add_timer_nodes(s);
fdt_add_zdma_nodes(s);
+ fdt_add_sd_nodes(s);
+ fdt_add_rtc_node(s);
fdt_add_cpu_nodes(s, psci_conduit);
fdt_add_clk_node(s, "/clk125", 125000000, s->phandle.clk_125Mhz);
fdt_add_clk_node(s, "/clk25", 25000000, s->phandle.clk_25Mhz);
@@ -464,14 +527,19 @@ static void versal_virt_init(MachineState *machine)
memory_region_add_subregion_overlap(get_system_memory(),
0, &s->soc.fpd.apu.mr, 0);
+ /* Plugin SD cards. */
+ for (i = 0; i < ARRAY_SIZE(s->soc.pmc.iou.sd); i++) {
+ sd_plugin_card(&s->soc.pmc.iou.sd[i], drive_get_next(IF_SD));
+ }
+
s->binfo.ram_size = machine->ram_size;
s->binfo.loader_start = 0x0;
s->binfo.get_dtb = versal_virt_get_dtb;
s->binfo.modify_dtb = versal_virt_modify_dtb;
if (machine->kernel_filename) {
- arm_load_kernel(s->soc.fpd.apu.cpu[0], machine, &s->binfo);
+ arm_load_kernel(&s->soc.fpd.apu.cpu[0], machine, &s->binfo);
} else {
- AddressSpace *as = arm_boot_address_space(s->soc.fpd.apu.cpu[0],
+ AddressSpace *as = arm_boot_address_space(&s->soc.fpd.apu.cpu[0],
&s->binfo);
/* Some boot-loaders (e.g u-boot) don't like blobs at address 0 (NULL).
* Offset things by 4K. */
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
index 94460f2343..321171bcce 100644
--- a/hw/arm/xlnx-versal.c
+++ b/hw/arm/xlnx-versal.c
@@ -20,9 +20,7 @@
#include "hw/arm/boot.h"
#include "kvm_arm.h"
#include "hw/misc/unimp.h"
-#include "hw/intc/arm_gicv3_common.h"
#include "hw/arm/xlnx-versal.h"
-#include "hw/char/pl011.h"
#define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72")
#define GEM_REVISION 0x40070106
@@ -33,23 +31,15 @@ static void versal_create_apu_cpus(Versal *s)
for (i = 0; i < ARRAY_SIZE(s->fpd.apu.cpu); i++) {
Object *obj;
- char *name;
-
- obj = object_new(XLNX_VERSAL_ACPU_TYPE);
- if (!obj) {
- /* Secondary CPUs start in PSCI powered-down state */
- error_report("Unable to create apu.cpu[%d] of type %s",
- i, XLNX_VERSAL_ACPU_TYPE);
- exit(EXIT_FAILURE);
- }
-
- name = g_strdup_printf("apu-cpu[%d]", i);
- object_property_add_child(OBJECT(s), name, obj, &error_fatal);
- g_free(name);
+ object_initialize_child(OBJECT(s), "apu-cpu[*]",
+ &s->fpd.apu.cpu[i], sizeof(s->fpd.apu.cpu[i]),
+ XLNX_VERSAL_ACPU_TYPE, &error_abort, NULL);
+ obj = OBJECT(&s->fpd.apu.cpu[i]);
object_property_set_int(obj, s->cfg.psci_conduit,
"psci-conduit", &error_abort);
if (i) {
+ /* Secondary CPUs start in PSCI powered-down state */
object_property_set_bool(obj, true,
"start-powered-off", &error_abort);
}
@@ -59,7 +49,6 @@ static void versal_create_apu_cpus(Versal *s)
object_property_set_link(obj, OBJECT(&s->fpd.apu.mr), "memory",
&error_abort);
object_property_set_bool(obj, true, "realized", &error_fatal);
- s->fpd.apu.cpu[i] = ARM_CPU(obj);
}
}
@@ -97,7 +86,7 @@ static void versal_create_apu_gic(Versal *s, qemu_irq *pic)
}
for (i = 0; i < nr_apu_cpus; i++) {
- DeviceState *cpudev = DEVICE(s->fpd.apu.cpu[i]);
+ DeviceState *cpudev = DEVICE(&s->fpd.apu.cpu[i]);
int ppibase = XLNX_VERSAL_NR_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS;
qemu_irq maint_irq;
int ti;
@@ -145,16 +134,17 @@ static void versal_create_uarts(Versal *s, qemu_irq *pic)
DeviceState *dev;
MemoryRegion *mr;
- dev = qdev_create(NULL, TYPE_PL011);
- s->lpd.iou.uart[i] = SYS_BUS_DEVICE(dev);
+ sysbus_init_child_obj(OBJECT(s), name,
+ &s->lpd.iou.uart[i], sizeof(s->lpd.iou.uart[i]),
+ TYPE_PL011);
+ dev = DEVICE(&s->lpd.iou.uart[i]);
qdev_prop_set_chr(dev, "chardev", serial_hd(i));
- object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal);
qdev_init_nofail(dev);
- mr = sysbus_mmio_get_region(s->lpd.iou.uart[i], 0);
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
memory_region_add_subregion(&s->mr_ps, addrs[i], mr);
- sysbus_connect_irq(s->lpd.iou.uart[i], 0, pic[irqs[i]]);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irqs[i]]);
g_free(name);
}
}
@@ -171,25 +161,26 @@ static void versal_create_gems(Versal *s, qemu_irq *pic)
DeviceState *dev;
MemoryRegion *mr;
- dev = qdev_create(NULL, "cadence_gem");
- s->lpd.iou.gem[i] = SYS_BUS_DEVICE(dev);
- object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal);
+ sysbus_init_child_obj(OBJECT(s), name,
+ &s->lpd.iou.gem[i], sizeof(s->lpd.iou.gem[i]),
+ TYPE_CADENCE_GEM);
+ dev = DEVICE(&s->lpd.iou.gem[i]);
if (nd->used) {
qemu_check_nic_model(nd, "cadence_gem");
qdev_set_nic_properties(dev, nd);
}
- object_property_set_int(OBJECT(s->lpd.iou.gem[i]),
+ object_property_set_int(OBJECT(dev),
2, "num-priority-queues",
&error_abort);
- object_property_set_link(OBJECT(s->lpd.iou.gem[i]),
+ object_property_set_link(OBJECT(dev),
OBJECT(&s->mr_ps), "dma",
&error_abort);
qdev_init_nofail(dev);
- mr = sysbus_mmio_get_region(s->lpd.iou.gem[i], 0);
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
memory_region_add_subregion(&s->mr_ps, addrs[i], mr);
- sysbus_connect_irq(s->lpd.iou.gem[i], 0, pic[irqs[i]]);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irqs[i]]);
g_free(name);
}
}
@@ -203,22 +194,72 @@ static void versal_create_admas(Versal *s, qemu_irq *pic)
DeviceState *dev;
MemoryRegion *mr;
- dev = qdev_create(NULL, "xlnx.zdma");
- s->lpd.iou.adma[i] = SYS_BUS_DEVICE(dev);
- object_property_set_int(OBJECT(s->lpd.iou.adma[i]), 128, "bus-width",
- &error_abort);
- object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal);
+ sysbus_init_child_obj(OBJECT(s), name,
+ &s->lpd.iou.adma[i], sizeof(s->lpd.iou.adma[i]),
+ TYPE_XLNX_ZDMA);
+ dev = DEVICE(&s->lpd.iou.adma[i]);
+ object_property_set_int(OBJECT(dev), 128, "bus-width", &error_abort);
qdev_init_nofail(dev);
- mr = sysbus_mmio_get_region(s->lpd.iou.adma[i], 0);
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
memory_region_add_subregion(&s->mr_ps,
MM_ADMA_CH0 + i * MM_ADMA_CH0_SIZE, mr);
- sysbus_connect_irq(s->lpd.iou.adma[i], 0, pic[VERSAL_ADMA_IRQ_0 + i]);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[VERSAL_ADMA_IRQ_0 + i]);
g_free(name);
}
}
+#define SDHCI_CAPABILITIES 0x280737ec6481 /* Same as on ZynqMP. */
+static void versal_create_sds(Versal *s, qemu_irq *pic)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(s->pmc.iou.sd); i++) {
+ DeviceState *dev;
+ MemoryRegion *mr;
+
+ sysbus_init_child_obj(OBJECT(s), "sd[*]",
+ &s->pmc.iou.sd[i], sizeof(s->pmc.iou.sd[i]),
+ TYPE_SYSBUS_SDHCI);
+ dev = DEVICE(&s->pmc.iou.sd[i]);
+
+ object_property_set_uint(OBJECT(dev),
+ 3, "sd-spec-version", &error_fatal);
+ object_property_set_uint(OBJECT(dev), SDHCI_CAPABILITIES, "capareg",
+ &error_fatal);
+ object_property_set_uint(OBJECT(dev), UHS_I, "uhs", &error_fatal);
+ qdev_init_nofail(dev);
+
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
+ memory_region_add_subregion(&s->mr_ps,
+ MM_PMC_SD0 + i * MM_PMC_SD0_SIZE, mr);
+
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0,
+ pic[VERSAL_SD0_IRQ_0 + i * 2]);
+ }
+}
+
+static void versal_create_rtc(Versal *s, qemu_irq *pic)
+{
+ SysBusDevice *sbd;
+ MemoryRegion *mr;
+
+ sysbus_init_child_obj(OBJECT(s), "rtc", &s->pmc.rtc, sizeof(s->pmc.rtc),
+ TYPE_XLNX_ZYNQMP_RTC);
+ sbd = SYS_BUS_DEVICE(&s->pmc.rtc);
+ qdev_init_nofail(DEVICE(sbd));
+
+ mr = sysbus_mmio_get_region(sbd, 0);
+ memory_region_add_subregion(&s->mr_ps, MM_PMC_RTC, mr);
+
+ /*
+ * TODO: Connect the ALARM and SECONDS interrupts once our RTC model
+ * supports them.
+ */
+ sysbus_connect_irq(sbd, 1, pic[VERSAL_RTC_APB_ERR_IRQ]);
+}
+
/* This takes the board allocated linear DDR memory and creates aliases
* for each split DDR range/aperture on the Versal address map.
*/
@@ -301,6 +342,8 @@ static void versal_realize(DeviceState *dev, Error **errp)
versal_create_uarts(s, pic);
versal_create_gems(s, pic);
versal_create_admas(s, pic);
+ versal_create_sds(s, pic);
+ versal_create_rtc(s, pic);
versal_map_ddr(s);
versal_unimp(s);
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
index 6c0a692b2f..9c9f47ba9d 100644
--- a/include/hw/arm/xlnx-versal.h
+++ b/include/hw/arm/xlnx-versal.h
@@ -14,7 +14,12 @@
#include "hw/sysbus.h"
#include "hw/arm/boot.h"
+#include "hw/sd/sdhci.h"
#include "hw/intc/arm_gicv3.h"
+#include "hw/char/pl011.h"
+#include "hw/dma/xlnx-zdma.h"
+#include "hw/net/cadence_gem.h"
+#include "hw/rtc/xlnx-zynqmp-rtc.h"
#define TYPE_XLNX_VERSAL "xlnx-versal"
#define XLNX_VERSAL(obj) OBJECT_CHECK(Versal, (obj), TYPE_XLNX_VERSAL)
@@ -23,6 +28,7 @@
#define XLNX_VERSAL_NR_UARTS 2
#define XLNX_VERSAL_NR_GEMS 2
#define XLNX_VERSAL_NR_ADMAS 8
+#define XLNX_VERSAL_NR_SDS 2
#define XLNX_VERSAL_NR_IRQS 192
typedef struct Versal {
@@ -33,7 +39,7 @@ typedef struct Versal {
struct {
struct {
MemoryRegion mr;
- ARMCPU *cpu[XLNX_VERSAL_NR_ACPUS];
+ ARMCPU cpu[XLNX_VERSAL_NR_ACPUS];
GICv3State gic;
} apu;
} fpd;
@@ -49,12 +55,21 @@ typedef struct Versal {
MemoryRegion mr_ocm;
struct {
- SysBusDevice *uart[XLNX_VERSAL_NR_UARTS];
- SysBusDevice *gem[XLNX_VERSAL_NR_GEMS];
- SysBusDevice *adma[XLNX_VERSAL_NR_ADMAS];
+ PL011State uart[XLNX_VERSAL_NR_UARTS];
+ CadenceGEMState gem[XLNX_VERSAL_NR_GEMS];
+ XlnxZDMA adma[XLNX_VERSAL_NR_ADMAS];
} iou;
} lpd;
+ /* The Platform Management Controller subsystem. */
+ struct {
+ struct {
+ SDHCIState sd[XLNX_VERSAL_NR_SDS];
+ } iou;
+
+ XlnxZynqMPRTC rtc;
+ } pmc;
+
struct {
MemoryRegion *mr_ddr;
uint32_t psci_conduit;
@@ -77,6 +92,10 @@ typedef struct Versal {
#define VERSAL_GEM1_IRQ_0 58
#define VERSAL_GEM1_WAKE_IRQ_0 59
#define VERSAL_ADMA_IRQ_0 60
+#define VERSAL_RTC_APB_ERR_IRQ 121
+#define VERSAL_SD0_IRQ_0 126
+#define VERSAL_RTC_ALARM_IRQ 142
+#define VERSAL_RTC_SECONDS_IRQ 143
/* Architecturally reserved IRQs suitable for virtualization. */
#define VERSAL_RSVD_IRQ_FIRST 111
@@ -126,6 +145,10 @@ typedef struct Versal {
#define MM_FPD_CRF 0xfd1a0000U
#define MM_FPD_CRF_SIZE 0x140000
+#define MM_PMC_SD0 0xf1040000U
+#define MM_PMC_SD0_SIZE 0x10000
#define MM_PMC_CRP 0xf1260000U
#define MM_PMC_CRP_SIZE 0x10000
+#define MM_PMC_RTC 0xf12a0000
+#define MM_PMC_RTC_SIZE 0x10000
#endif
diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs
index cf26c16f5f..775b3e24f2 100644
--- a/target/arm/Makefile.objs
+++ b/target/arm/Makefile.objs
@@ -18,6 +18,21 @@ target/arm/decode-sve.inc.c: $(SRC_PATH)/target/arm/sve.decode $(DECODETREE)
$(PYTHON) $(DECODETREE) --decode disas_sve -o $@ $<,\
"GEN", $(TARGET_DIR)$@)
+target/arm/decode-neon-shared.inc.c: $(SRC_PATH)/target/arm/neon-shared.decode $(DECODETREE)
+ $(call quiet-command,\
+ $(PYTHON) $(DECODETREE) --static-decode disas_neon_shared -o $@ $<,\
+ "GEN", $(TARGET_DIR)$@)
+
+target/arm/decode-neon-dp.inc.c: $(SRC_PATH)/target/arm/neon-dp.decode $(DECODETREE)
+ $(call quiet-command,\
+ $(PYTHON) $(DECODETREE) --static-decode disas_neon_dp -o $@ $<,\
+ "GEN", $(TARGET_DIR)$@)
+
+target/arm/decode-neon-ls.inc.c: $(SRC_PATH)/target/arm/neon-ls.decode $(DECODETREE)
+ $(call quiet-command,\
+ $(PYTHON) $(DECODETREE) --static-decode disas_neon_ls -o $@ $<,\
+ "GEN", $(TARGET_DIR)$@)
+
target/arm/decode-vfp.inc.c: $(SRC_PATH)/target/arm/vfp.decode $(DECODETREE)
$(call quiet-command,\
$(PYTHON) $(DECODETREE) --static-decode disas_vfp -o $@ $<,\
@@ -49,6 +64,9 @@ target/arm/decode-t16.inc.c: $(SRC_PATH)/target/arm/t16.decode $(DECODETREE)
"GEN", $(TARGET_DIR)$@)
target/arm/translate-sve.o: target/arm/decode-sve.inc.c
+target/arm/translate.o: target/arm/decode-neon-shared.inc.c
+target/arm/translate.o: target/arm/decode-neon-dp.inc.c
+target/arm/translate.o: target/arm/decode-neon-ls.inc.c
target/arm/translate.o: target/arm/decode-vfp.inc.c
target/arm/translate.o: target/arm/decode-vfp-uncond.inc.c
target/arm/translate.o: target/arm/decode-a32.inc.c
diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h
index d593b60b28..6321385b46 100644
--- a/target/arm/cpu-param.h
+++ b/target/arm/cpu-param.h
@@ -29,6 +29,6 @@
# define TARGET_PAGE_BITS_MIN 10
#endif
-#define NB_MMU_MODES 12
+#define NB_MMU_MODES 11
#endif
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 141d947775..5d64adfe76 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2686,6 +2686,7 @@ static void arm_max_initfn(Object *obj)
t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */
t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* TTCNP */
+ t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* TTS2UXN */
cpu->isar.id_mmfr4 = t;
}
#endif
@@ -2754,7 +2755,7 @@ static const ARMCPUInfo arm_cpus[] = {
static Property arm_cpu_properties[] = {
DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0),
- DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
+ DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0),
DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
mp_affinity, ARM64_AFFINITY_INVALID),
DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID),
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 8b9f2961ba..8608da6b6f 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -894,7 +894,7 @@ struct ARMCPU {
uint64_t id_aa64dfr0;
uint64_t id_aa64dfr1;
} isar;
- uint32_t midr;
+ uint64_t midr;
uint32_t revidr;
uint32_t reset_fpsid;
uint32_t ctr;
@@ -2801,6 +2801,9 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
* handling via the TLB. The only way to do a stage 1 translation without
* the immediate stage 2 translation is via the ATS or AT system insns,
* which can be slow-pathed and always do a page table walk.
+ * The only use of stage 2 translations is either as part of an s1+2
+ * lookup or when loading the descriptors during a stage 1 page table walk,
+ * and in both those cases we don't use the TLB.
* 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3"
* translation regimes, because they map reasonably well to each other
* and they can't both be active at the same time.
@@ -2816,15 +2819,15 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
* NS EL1 EL1&0 stage 1+2 (aka NS PL1)
* NS EL1 EL1&0 stage 1+2 +PAN
* NS EL0 EL2&0
+ * NS EL2 EL2&0
* NS EL2 EL2&0 +PAN
* NS EL2 (aka NS PL2)
* S EL0 EL1&0 (aka S PL0)
* S EL1 EL1&0 (not used if EL3 is 32 bit)
* S EL1 EL1&0 +PAN
* S EL3 (aka S PL1)
- * NS EL1&0 stage 2
*
- * for a total of 12 different mmu_idx.
+ * for a total of 11 different mmu_idx.
*
* R profile CPUs have an MPU, but can use the same set of MMU indexes
* as A profile. They only need to distinguish NS EL0 and NS EL1 (and
@@ -2846,7 +2849,8 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
* are not quite the same -- different CPU types (most notably M profile
* vs A/R profile) would like to use MMU indexes with different semantics,
* but since we don't ever need to use all of those in a single CPU we
- * can avoid setting NB_MMU_MODES to more than 8. The lower bits of
+ * can avoid having to set NB_MMU_MODES to "total number of A profile MMU
+ * modes + total number of M profile MMU modes". The lower bits of
* ARMMMUIdx are the core TLB mmu index, and the higher bits are always
* the same for any particular CPU.
* Variables of type ARMMUIdx are always full values, and the core
@@ -2894,8 +2898,6 @@ typedef enum ARMMMUIdx {
ARMMMUIdx_SE10_1_PAN = 9 | ARM_MMU_IDX_A,
ARMMMUIdx_SE3 = 10 | ARM_MMU_IDX_A,
- ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A,
-
/*
* These are not allocated TLBs and are used only for AT system
* instructions or for the first stage of an S12 page table walk.
@@ -2903,6 +2905,14 @@ typedef enum ARMMMUIdx {
ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB,
ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB,
ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB,
+ /*
+ * Not allocated a TLB: used only for second stage of an S12 page
+ * table walk, or for descriptor loads during first stage of an S1
+ * page table walk. Note that if we ever want to have a TLB for this
+ * then various TLB flush insns which currently are no-ops or flush
+ * only stage 1 MMU indexes will need to change to flush stage 2.
+ */
+ ARMMMUIdx_Stage2 = 3 | ARM_MMU_IDX_NOTLB,
/*
* M-profile.
@@ -2936,7 +2946,6 @@ typedef enum ARMMMUIdxBit {
TO_CORE_BIT(SE10_1),
TO_CORE_BIT(SE10_1_PAN),
TO_CORE_BIT(SE3),
- TO_CORE_BIT(Stage2),
TO_CORE_BIT(MUser),
TO_CORE_BIT(MPriv),
@@ -3601,6 +3610,11 @@ static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id)
return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0;
}
+static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0;
+}
+
/*
* 64-bit feature tests via id registers.
*/
@@ -3813,6 +3827,11 @@ static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0;
}
+static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0;
+}
+
/*
* Feature tests for "does this exist in either 32-bit or 64-bit?"
*/
@@ -3841,6 +3860,11 @@ static inline bool isar_feature_any_ccidx(const ARMISARegisters *id)
return isar_feature_aa64_ccidx(id) || isar_feature_aa32_ccidx(id);
}
+static inline bool isar_feature_any_tts2uxn(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_tts2uxn(id) || isar_feature_aa32_tts2uxn(id);
+}
+
/*
* Forward to the above feature tests given an ARMCPU pointer.
*/
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 74afc28d53..9bdf75b1ab 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -673,6 +673,7 @@ static void aarch64_max_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1);
t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 2); /* ATS1E1 */
t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* VMID16 */
+ t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* TTS2UXN */
cpu->isar.id_aa64mmfr1 = t;
t = cpu->isar.id_aa64mmfr2;
@@ -706,11 +707,12 @@ static void aarch64_max_initfn(Object *obj)
u = FIELD_DP32(u, ID_MMFR4, HPDS, 1); /* AA32HPD */
u = FIELD_DP32(u, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
u = FIELD_DP32(u, ID_MMFR4, CNP, 1); /* TTCNP */
+ u = FIELD_DP32(u, ID_MMFR4, XNX, 1); /* TTS2UXN */
cpu->isar.id_mmfr4 = u;
- u = cpu->isar.id_aa64dfr0;
- u = FIELD_DP64(u, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */
- cpu->isar.id_aa64dfr0 = u;
+ t = cpu->isar.id_aa64dfr0;
+ t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */
+ cpu->isar.id_aa64dfr0 = t;
u = cpu->isar.id_dfr0;
u = FIELD_DP32(u, ID_DFR0, PERFMON, 5); /* v8.4-PMU */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index dfefb9b3d9..a94f650795 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -41,6 +41,7 @@
static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
MMUAccessType access_type, ARMMMUIdx mmu_idx,
+ bool s1_is_el0,
hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
target_ulong *page_size_ptr,
ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs);
@@ -814,8 +815,7 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush_by_mmuidx(cs,
ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
- ARMMMUIdxBit_E10_0 |
- ARMMMUIdxBit_Stage2);
+ ARMMMUIdxBit_E10_0);
}
static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -826,46 +826,9 @@ static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush_by_mmuidx_all_cpus_synced(cs,
ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
- ARMMMUIdxBit_E10_0 |
- ARMMMUIdxBit_Stage2);
+ ARMMMUIdxBit_E10_0);
}
-static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Invalidate by IPA. This has to invalidate any structures that
- * contain only stage 2 translation information, but does not need
- * to apply to structures that contain combined stage 1 and stage 2
- * translation information.
- * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
- */
- CPUState *cs = env_cpu(env);
- uint64_t pageaddr;
-
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
- return;
- }
-
- pageaddr = sextract64(value << 12, 0, 40);
-
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
-}
-
-static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- uint64_t pageaddr;
-
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
- return;
- }
-
- pageaddr = sextract64(value << 12, 0, 40);
-
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_Stage2);
-}
static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
@@ -4055,8 +4018,7 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush_by_mmuidx(cs,
ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
- ARMMMUIdxBit_E10_0 |
- ARMMMUIdxBit_Stage2);
+ ARMMMUIdxBit_E10_0);
raw_write(env, ri, value);
}
}
@@ -4538,11 +4500,6 @@ static int alle1_tlbmask(CPUARMState *env)
return ARMMMUIdxBit_SE10_1 |
ARMMMUIdxBit_SE10_1_PAN |
ARMMMUIdxBit_SE10_0;
- } else if (arm_feature(env, ARM_FEATURE_EL2)) {
- return ARMMMUIdxBit_E10_1 |
- ARMMMUIdxBit_E10_1_PAN |
- ARMMMUIdxBit_E10_0 |
- ARMMMUIdxBit_Stage2;
} else {
return ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
@@ -4689,44 +4646,6 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
ARMMMUIdxBit_SE3);
}
-static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Invalidate by IPA. This has to invalidate any structures that
- * contain only stage 2 translation information, but does not need
- * to apply to structures that contain combined stage 1 and stage 2
- * translation information.
- * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
- */
- ARMCPU *cpu = env_archcpu(env);
- CPUState *cs = CPU(cpu);
- uint64_t pageaddr;
-
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
- return;
- }
-
- pageaddr = sextract64(value << 12, 0, 48);
-
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
-}
-
-static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- uint64_t pageaddr;
-
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
- return;
- }
-
- pageaddr = sextract64(value << 12, 0, 48);
-
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_Stage2);
-}
-
static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
@@ -4965,12 +4884,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.writefn = tlbi_aa64_vae1_write },
{ .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ipas2e1is_write },
+ .access = PL2_W, .type = ARM_CP_NOP },
{ .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ipas2e1is_write },
+ .access = PL2_W, .type = ARM_CP_NOP },
{ .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4,
.access = PL2_W, .type = ARM_CP_NO_RAW,
@@ -4981,12 +4898,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.writefn = tlbi_aa64_alle1is_write },
{ .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ipas2e1_write },
+ .access = PL2_W, .type = ARM_CP_NOP },
{ .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ipas2e1_write },
+ .access = PL2_W, .type = ARM_CP_NOP },
{ .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4,
.access = PL2_W, .type = ARM_CP_NO_RAW,
@@ -5067,20 +4982,16 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.writefn = tlbimva_hyp_is_write },
{ .name = "TLBIIPAS2",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2_write },
+ .type = ARM_CP_NOP, .access = PL2_W },
{ .name = "TLBIIPAS2IS",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2_is_write },
+ .type = ARM_CP_NOP, .access = PL2_W },
{ .name = "TLBIIPAS2L",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2_write },
+ .type = ARM_CP_NOP, .access = PL2_W },
{ .name = "TLBIIPAS2LIS",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2_is_write },
+ .type = ARM_CP_NOP, .access = PL2_W },
/* 32 bit cache operations */
{ .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
.type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access },
@@ -9997,9 +9908,10 @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
*
* @env: CPUARMState
* @s2ap: The 2-bit stage2 access permissions (S2AP)
- * @xn: XN (execute-never) bit
+ * @xn: XN (execute-never) bits
+ * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0
*/
-static int get_S2prot(CPUARMState *env, int s2ap, int xn)
+static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
{
int prot = 0;
@@ -10009,9 +9921,32 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn)
if (s2ap & 2) {
prot |= PAGE_WRITE;
}
- if (!xn) {
- if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
+
+ if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) {
+ switch (xn) {
+ case 0:
prot |= PAGE_EXEC;
+ break;
+ case 1:
+ if (s1_is_el0) {
+ prot |= PAGE_EXEC;
+ }
+ break;
+ case 2:
+ break;
+ case 3:
+ if (!s1_is_el0) {
+ prot |= PAGE_EXEC;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ if (!extract32(xn, 1, 1)) {
+ if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
+ prot |= PAGE_EXEC;
+ }
}
}
return prot;
@@ -10142,8 +10077,10 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
pcacheattrs = &cacheattrs;
}
- ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_Stage2, &s2pa,
- &txattrs, &s2prot, &s2size, fi, pcacheattrs);
+ ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, ARMMMUIdx_Stage2,
+ false,
+ &s2pa, &txattrs, &s2prot, &s2size, fi,
+ pcacheattrs);
if (ret) {
assert(fi->type != ARMFault_None);
fi->s2addr = addr;
@@ -10744,8 +10681,32 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
};
}
+/**
+ * get_phys_addr_lpae: perform one stage of page table walk, LPAE format
+ *
+ * Returns false if the translation was successful. Otherwise, phys_ptr, attrs,
+ * prot and page_size may not be filled in, and the populated fsr value provides
+ * information on why the translation aborted, in the format of a long-format
+ * DFSR/IFSR fault register, with the following caveats:
+ * * the WnR bit is never set (the caller must do this).
+ *
+ * @env: CPUARMState
+ * @address: virtual address to get physical address for
+ * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH
+ * @mmu_idx: MMU index indicating required translation regime
+ * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page table
+ * walk), must be true if this is stage 2 of a stage 1+2 walk for an
+ * EL0 access). If @mmu_idx is anything else, @s1_is_el0 is ignored.
+ * @phys_ptr: set to the physical address corresponding to the virtual address
+ * @attrs: set to the memory transaction attributes to use
+ * @prot: set to the permissions for the page containing phys_ptr
+ * @page_size_ptr: set to the size of the page containing phys_ptr
+ * @fi: set to fault info if the translation fails
+ * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
+ */
static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
MMUAccessType access_type, ARMMMUIdx mmu_idx,
+ bool s1_is_el0,
hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
target_ulong *page_size_ptr,
ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
@@ -10964,13 +10925,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
}
ap = extract32(attrs, 4, 2);
- xn = extract32(attrs, 12, 1);
if (mmu_idx == ARMMMUIdx_Stage2) {
ns = true;
- *prot = get_S2prot(env, ap, xn);
+ xn = extract32(attrs, 11, 2);
+ *prot = get_S2prot(env, ap, xn, s1_is_el0);
} else {
ns = extract32(attrs, 3, 1);
+ xn = extract32(attrs, 12, 1);
pxn = extract32(attrs, 11, 1);
*prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn);
}
@@ -11837,6 +11799,7 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
/* S1 is done. Now do S2 translation. */
ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_Stage2,
+ mmu_idx == ARMMMUIdx_E10_0,
phys_ptr, attrs, &s2_prot,
page_size, fi,
cacheattrs != NULL ? &cacheattrs2 : NULL);
@@ -11961,7 +11924,7 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
}
if (regime_using_lpae_format(env, mmu_idx)) {
- return get_phys_addr_lpae(env, address, access_type, mmu_idx,
+ return get_phys_addr_lpae(env, address, access_type, mmu_idx, false,
phys_ptr, attrs, prot, page_size,
fi, cacheattrs);
} else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
new file mode 100644
index 0000000000..ec3a92fe75
--- /dev/null
+++ b/target/arm/neon-dp.decode
@@ -0,0 +1,86 @@
+# AArch32 Neon data-processing instruction descriptions
+#
+# Copyright (c) 2020 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+# VFP/Neon register fields; same as vfp.decode
+%vm_dp 5:1 0:4
+%vn_dp 7:1 16:4
+%vd_dp 22:1 12:4
+
+# Encodings for Neon data processing instructions where the T32 encoding
+# is a simple transformation of the A32 encoding.
+# More specifically, this file covers instructions where the A32 encoding is
+# 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+# and the T32 encoding is
+# 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+# This file works on the A32 encoding only; calling code for T32 has to
+# transform the insn into the A32 version first.
+
+######################################################################
+# 3-reg-same grouping:
+# 1111 001 U 0 D sz:2 Vn:4 Vd:4 opc:4 N Q M op Vm:4
+######################################################################
+
+&3same vm vn vd q size
+
+@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
+VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same
+
+@3same_logic .... ... . . . .. .... .... .... . q:1 .. .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0
+
+VAND_3s 1111 001 0 0 . 00 .... .... 0001 ... 1 .... @3same_logic
+VBIC_3s 1111 001 0 0 . 01 .... .... 0001 ... 1 .... @3same_logic
+VORR_3s 1111 001 0 0 . 10 .... .... 0001 ... 1 .... @3same_logic
+VORN_3s 1111 001 0 0 . 11 .... .... 0001 ... 1 .... @3same_logic
+VEOR_3s 1111 001 1 0 . 00 .... .... 0001 ... 1 .... @3same_logic
+VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
+VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
+VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
+
+VQSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 1 .... @3same
+VQSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 1 .... @3same
+
+VCGT_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 0 .... @3same
+VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same
+VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same
+VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
+
+VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same
+VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same
+
+VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
+VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
+VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
+VMIN_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 1 .... @3same
+
+VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
+VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
+
+VTST_3s 1111 001 0 0 . .. .... .... 1000 . . . 1 .... @3same
+VCEQ_3s 1111 001 1 0 . .. .... .... 1000 . . . 1 .... @3same
+
+VMLA_3s 1111 001 0 0 . .. .... .... 1001 . . . 0 .... @3same
+VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
+
+VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
+VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode
new file mode 100644
index 0000000000..c7b03a72e8
--- /dev/null
+++ b/target/arm/neon-ls.decode
@@ -0,0 +1,52 @@
+# AArch32 Neon load/store instruction descriptions
+#
+# Copyright (c) 2020 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+# Encodings for Neon load/store instructions where the T32 encoding
+# is a simple transformation of the A32 encoding.
+# More specifically, this file covers instructions where the A32 encoding is
+# 0b1111_0100_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
+# and the T32 encoding is
+# 0b1111_1001_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
+# This file works on the A32 encoding only; calling code for T32 has to
+# transform the insn into the A32 version first.
+
+%vd_dp 22:1 12:4
+
+# Neon load/store multiple structures
+
+VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
+ vd=%vd_dp
+
+# Neon load single element to all lanes
+
+VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \
+ vd=%vd_dp
+
+# Neon load/store single structure to one lane
+%imm1_5_p1 5:1 !function=plus1
+%imm1_6_p1 6:1 !function=plus1
+
+VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 00 n:2 reg_idx:3 align:1 rm:4 \
+ vd=%vd_dp size=0 stride=1
+VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 align:2 rm:4 \
+ vd=%vd_dp size=1 stride=%imm1_5_p1
+VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 align:3 rm:4 \
+ vd=%vd_dp size=2 stride=%imm1_6_p1
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
new file mode 100644
index 0000000000..f297ba8cdf
--- /dev/null
+++ b/target/arm/neon-shared.decode
@@ -0,0 +1,66 @@
+# AArch32 Neon instruction descriptions
+#
+# Copyright (c) 2020 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+# Encodings for Neon instructions whose encoding is the same for
+# both A32 and T32.
+
+# More specifically, this covers:
+# 2reg scalar ext: 0b1111_1110_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
+# 3same ext: 0b1111_110x_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
+
+# VFP/Neon register fields; same as vfp.decode
+%vm_dp 5:1 0:4
+%vm_sp 0:4 5:1
+%vn_dp 7:1 16:4
+%vn_sp 16:4 7:1
+%vd_dp 22:1 12:4
+%vd_sp 12:4 22:1
+
+VCMLA 1111 110 rot:2 . 1 size:1 .... .... 1000 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+VCADD 1111 110 rot:1 1 . 0 size:1 .... .... 1000 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+# VUDOT and VSDOT
+VDOT 1111 110 00 . 10 .... .... 1101 . q:1 . u:1 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+# VFM[AS]L
+VFML 1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \
+ vm=%vm_sp vn=%vn_sp vd=%vd_dp q=0
+VFML 1111 110 0 s:1 . 10 .... .... 1000 . 1 . 1 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp q=1
+
+VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
+ vn=%vn_dp vd=%vd_dp size=0
+VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp size=1 index=0
+
+VDOT_scalar 1111 1110 0 . 10 .... .... 1101 . q:1 index:1 u:1 rm:4 \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+%vfml_scalar_q0_rm 0:3 5:1
+%vfml_scalar_q1_index 5:1 3:1
+VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 0 . 1 index:1 ... \
+ rm=%vfml_scalar_q0_rm vn=%vn_sp vd=%vd_dp q=0
+VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 1 . 1 . rm:3 \
+ index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp q=1
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index efb1c4adc4..a896f9c4b8 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -70,23 +70,6 @@ typedef struct AArch64DecodeTable {
AArch64DecodeFn *disas_fn;
} AArch64DecodeTable;
-/* Function prototype for gen_ functions for calling Neon helpers */
-typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
-typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
-typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
-typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
-typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
-typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
-typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
-typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
-typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
-typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
-typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
-typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
-typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
-typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
-typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
-
/* initialize TCG globals. */
void a64_translate_init(void)
{
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index 4c2c91ae1b..f02fbb63a4 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -115,13 +115,4 @@ static inline int vec_full_reg_size(DisasContext *s)
bool disas_sve(DisasContext *, uint32_t);
-/* Note that the gvec expanders operate on offsets + sizes. */
-typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
-typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
- uint32_t, uint32_t);
-typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
- uint32_t, uint32_t, uint32_t);
-typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
- uint32_t, uint32_t, uint32_t);
-
#endif /* TARGET_ARM_TRANSLATE_A64_H */
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
new file mode 100644
index 0000000000..50b77b6d71
--- /dev/null
+++ b/target/arm/translate-neon.inc.c
@@ -0,0 +1,714 @@
+/*
+ * ARM translation: AArch32 Neon instructions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2005-2007 CodeSourcery
+ * Copyright (c) 2007 OpenedHand, Ltd.
+ * Copyright (c) 2020 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This file is intended to be included from translate.c; it uses
+ * some macros and definitions provided by that file.
+ * It might be possible to convert it to a standalone .c file eventually.
+ */
+
+static inline int plus1(DisasContext *s, int x)
+{
+ return x + 1;
+}
+
+/* Include the generated Neon decoder */
+#include "decode-neon-dp.inc.c"
+#include "decode-neon-ls.inc.c"
+#include "decode-neon-shared.inc.c"
+
+static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
+{
+ int opr_sz;
+ TCGv_ptr fpst;
+ gen_helper_gvec_3_ptr *fn_gvec_ptr;
+
+ if (!dc_isar_feature(aa32_vcma, s)
+ || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ opr_sz = (1 + a->q) * 8;
+ fpst = get_fpstatus_ptr(1);
+ fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(1, a->vn),
+ vfp_reg_offset(1, a->vm),
+ fpst, opr_sz, opr_sz, a->rot,
+ fn_gvec_ptr);
+ tcg_temp_free_ptr(fpst);
+ return true;
+}
+
+static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
+{
+ int opr_sz;
+ TCGv_ptr fpst;
+ gen_helper_gvec_3_ptr *fn_gvec_ptr;
+
+ if (!dc_isar_feature(aa32_vcma, s)
+ || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ opr_sz = (1 + a->q) * 8;
+ fpst = get_fpstatus_ptr(1);
+ fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(1, a->vn),
+ vfp_reg_offset(1, a->vm),
+ fpst, opr_sz, opr_sz, a->rot,
+ fn_gvec_ptr);
+ tcg_temp_free_ptr(fpst);
+ return true;
+}
+
+static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
+{
+ int opr_sz;
+ gen_helper_gvec_3 *fn_gvec;
+
+ if (!dc_isar_feature(aa32_dp, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ opr_sz = (1 + a->q) * 8;
+ fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
+ tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(1, a->vn),
+ vfp_reg_offset(1, a->vm),
+ opr_sz, opr_sz, 0, fn_gvec);
+ return true;
+}
+
+static bool trans_VFML(DisasContext *s, arg_VFML *a)
+{
+ int opr_sz;
+
+ if (!dc_isar_feature(aa32_fhm, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ (a->vd & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ opr_sz = (1 + a->q) * 8;
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(a->q, a->vn),
+ vfp_reg_offset(a->q, a->vm),
+ cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
+ gen_helper_gvec_fmlal_a32);
+ return true;
+}
+
+static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
+{
+ gen_helper_gvec_3_ptr *fn_gvec_ptr;
+ int opr_sz;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_vcma, s)) {
+ return false;
+ }
+ if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vd | a->vn) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
+ : gen_helper_gvec_fcmlah_idx);
+ opr_sz = (1 + a->q) * 8;
+ fpst = get_fpstatus_ptr(1);
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(1, a->vn),
+ vfp_reg_offset(1, a->vm),
+ fpst, opr_sz, opr_sz,
+ (a->index << 2) | a->rot, fn_gvec_ptr);
+ tcg_temp_free_ptr(fpst);
+ return true;
+}
+
+static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
+{
+ gen_helper_gvec_3 *fn_gvec;
+ int opr_sz;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_dp, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vd | a->vn) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
+ opr_sz = (1 + a->q) * 8;
+ fpst = get_fpstatus_ptr(1);
+ tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(1, a->vn),
+ vfp_reg_offset(1, a->rm),
+ opr_sz, opr_sz, a->index, fn_gvec);
+ tcg_temp_free_ptr(fpst);
+ return true;
+}
+
+static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
+{
+ int opr_sz;
+
+ if (!dc_isar_feature(aa32_fhm, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
+ return false;
+ }
+
+ if (a->vd & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ opr_sz = (1 + a->q) * 8;
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(a->q, a->vn),
+ vfp_reg_offset(a->q, a->rm),
+ cpu_env, opr_sz, opr_sz,
+ (a->index << 2) | a->s, /* is_2 == 0 */
+ gen_helper_gvec_fmlal_idx_a32);
+ return true;
+}
+
+static struct {
+ int nregs;
+ int interleave;
+ int spacing;
+} const neon_ls_element_type[11] = {
+ {1, 4, 1},
+ {1, 4, 2},
+ {4, 1, 1},
+ {2, 2, 2},
+ {1, 3, 1},
+ {1, 3, 2},
+ {3, 1, 1},
+ {1, 1, 1},
+ {1, 2, 1},
+ {1, 2, 2},
+ {2, 1, 1}
+};
+
+static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
+ int stride)
+{
+ if (rm != 15) {
+ TCGv_i32 base;
+
+ base = load_reg(s, rn);
+ if (rm == 13) {
+ tcg_gen_addi_i32(base, base, stride);
+ } else {
+ TCGv_i32 index;
+ index = load_reg(s, rm);
+ tcg_gen_add_i32(base, base, index);
+ tcg_temp_free_i32(index);
+ }
+ store_reg(s, rn, base);
+ }
+}
+
+static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
+{
+ /* Neon load/store multiple structures */
+ int nregs, interleave, spacing, reg, n;
+ MemOp endian = s->be_data;
+ int mmu_idx = get_mem_index(s);
+ int size = a->size;
+ TCGv_i64 tmp64;
+ TCGv_i32 addr, tmp;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+ if (a->itype > 10) {
+ return false;
+ }
+ /* Catch UNDEF cases for bad values of align field */
+ switch (a->itype & 0xc) {
+ case 4:
+ if (a->align >= 2) {
+ return false;
+ }
+ break;
+ case 8:
+ if (a->align == 3) {
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+ nregs = neon_ls_element_type[a->itype].nregs;
+ interleave = neon_ls_element_type[a->itype].interleave;
+ spacing = neon_ls_element_type[a->itype].spacing;
+ if (size == 3 && (interleave | spacing) != 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* For our purposes, bytes are always little-endian. */
+ if (size == 0) {
+ endian = MO_LE;
+ }
+ /*
+ * Consecutive little-endian elements from a single register
+ * can be promoted to a larger little-endian operation.
+ */
+ if (interleave == 1 && endian == MO_LE) {
+ size = 3;
+ }
+ tmp64 = tcg_temp_new_i64();
+ addr = tcg_temp_new_i32();
+ tmp = tcg_const_i32(1 << size);
+ load_reg_var(s, addr, a->rn);
+ for (reg = 0; reg < nregs; reg++) {
+ for (n = 0; n < 8 >> size; n++) {
+ int xs;
+ for (xs = 0; xs < interleave; xs++) {
+ int tt = a->vd + reg + spacing * xs;
+
+ if (a->l) {
+ gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
+ neon_store_element64(tt, n, size, tmp64);
+ } else {
+ neon_load_element64(tmp64, tt, n, size);
+ gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
+ }
+ tcg_gen_add_i32(addr, addr, tmp);
+ }
+ }
+ }
+ tcg_temp_free_i32(addr);
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i64(tmp64);
+
+ gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
+ return true;
+}
+
+static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
+{
+ /* Neon load single structure to all lanes */
+ int reg, stride, vec_size;
+ int vd = a->vd;
+ int size = a->size;
+ int nregs = a->n + 1;
+ TCGv_i32 addr, tmp;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ if (size == 3) {
+ if (nregs != 4 || a->a == 0) {
+ return false;
+ }
+ /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
+ size = 2;
+ }
+ if (nregs == 1 && a->a == 1 && size == 0) {
+ return false;
+ }
+ if (nregs == 3 && a->a == 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * VLD1 to all lanes: T bit indicates how many Dregs to write.
+ * VLD2/3/4 to all lanes: T bit indicates register stride.
+ */
+ stride = a->t ? 2 : 1;
+ vec_size = nregs == 1 ? stride * 8 : 8;
+
+ tmp = tcg_temp_new_i32();
+ addr = tcg_temp_new_i32();
+ load_reg_var(s, addr, a->rn);
+ for (reg = 0; reg < nregs; reg++) {
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
+ s->be_data | size);
+ if ((vd & 1) && vec_size == 16) {
+ /*
+ * We cannot write 16 bytes at once because the
+ * destination is unaligned.
+ */
+ tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
+ 8, 8, tmp);
+ tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
+ neon_reg_offset(vd, 0), 8, 8);
+ } else {
+ tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
+ vec_size, vec_size, tmp);
+ }
+ tcg_gen_addi_i32(addr, addr, 1 << size);
+ vd += stride;
+ }
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(addr);
+
+ gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
+
+ return true;
+}
+
+static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
+{
+ /* Neon load/store single structure to one lane */
+ int reg;
+ int nregs = a->n + 1;
+ int vd = a->vd;
+ TCGv_i32 addr, tmp;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ /* Catch the UNDEF cases. This is unavoidably a bit messy. */
+ switch (nregs) {
+ case 1:
+ if (((a->align & (1 << a->size)) != 0) ||
+ (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) {
+ return false;
+ }
+ break;
+ case 3:
+ if ((a->align & 1) != 0) {
+ return false;
+ }
+ /* fall through */
+ case 2:
+ if (a->size == 2 && (a->align & 2) != 0) {
+ return false;
+ }
+ break;
+ case 4:
+ if ((a->size == 2) && ((a->align & 3) == 3)) {
+ return false;
+ }
+ break;
+ default:
+ abort();
+ }
+ if ((vd + a->stride * (nregs - 1)) > 31) {
+ /*
+ * Attempts to write off the end of the register file are
+ * UNPREDICTABLE; we choose to UNDEF because otherwise we would
+ * access off the end of the array that holds the register data.
+ */
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ addr = tcg_temp_new_i32();
+ load_reg_var(s, addr, a->rn);
+ /*
+ * TODO: if we implemented alignment exceptions, we should check
+ * addr against the alignment encoded in a->align here.
+ */
+ for (reg = 0; reg < nregs; reg++) {
+ if (a->l) {
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
+ s->be_data | a->size);
+ neon_store_element(vd, a->reg_idx, a->size, tmp);
+ } else { /* Store */
+ neon_load_element(tmp, vd, a->reg_idx, a->size);
+ gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
+ s->be_data | a->size);
+ }
+ vd += a->stride;
+ tcg_gen_addi_i32(addr, addr, 1 << a->size);
+ }
+ tcg_temp_free_i32(addr);
+ tcg_temp_free_i32(tmp);
+
+ gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
+
+ return true;
+}
+
+static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
+{
+ int vec_size = a->q ? 16 : 8;
+ int rd_ofs = neon_reg_offset(a->vd, 0);
+ int rn_ofs = neon_reg_offset(a->vn, 0);
+ int rm_ofs = neon_reg_offset(a->vm, 0);
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
+ return true;
+}
+
+#define DO_3SAME(INSN, FUNC) \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ return do_3same(s, a, FUNC); \
+ }
+
+DO_3SAME(VADD, tcg_gen_gvec_add)
+DO_3SAME(VSUB, tcg_gen_gvec_sub)
+DO_3SAME(VAND, tcg_gen_gvec_and)
+DO_3SAME(VBIC, tcg_gen_gvec_andc)
+DO_3SAME(VORR, tcg_gen_gvec_or)
+DO_3SAME(VORN, tcg_gen_gvec_orc)
+DO_3SAME(VEOR, tcg_gen_gvec_xor)
+
+/* These insns are all gvec_bitsel but with the inputs in various orders. */
+#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
+ } \
+ DO_3SAME(INSN, gen_##INSN##_3s)
+
+DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
+DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
+DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
+
+#define DO_3SAME_NO_SZ_3(INSN, FUNC) \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size == 3) { \
+ return false; \
+ } \
+ return do_3same(s, a, FUNC); \
+ }
+
+DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
+DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
+DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
+DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
+DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
+
+#define DO_3SAME_CMP(INSN, COND) \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
+ } \
+ DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
+
+DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
+DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
+DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
+DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
+DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
+
+static void gen_VTST_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &cmtst_op[vece]);
+}
+DO_3SAME_NO_SZ_3(VTST, gen_VTST_3s)
+
+#define DO_3SAME_GVEC4(INSN, OPARRAY) \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), \
+ rn_ofs, rm_ofs, oprsz, maxsz, &OPARRAY[vece]); \
+ } \
+ DO_3SAME(INSN, gen_##INSN##_3s)
+
+DO_3SAME_GVEC4(VQADD_S, sqadd_op)
+DO_3SAME_GVEC4(VQADD_U, uqadd_op)
+DO_3SAME_GVEC4(VQSUB_S, sqsub_op)
+DO_3SAME_GVEC4(VQSUB_U, uqsub_op)
+
+static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz,
+ 0, gen_helper_gvec_pmul_b);
+}
+
+static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
+{
+ if (a->size != 0) {
+ return false;
+ }
+ return do_3same(s, a, gen_VMUL_p_3s);
+}
+
+#define DO_3SAME_GVEC3_NO_SZ_3(INSN, OPARRAY) \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \
+ oprsz, maxsz, &OPARRAY[vece]); \
+ } \
+ DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
+
+
+DO_3SAME_GVEC3_NO_SZ_3(VMLA, mla_op)
+DO_3SAME_GVEC3_NO_SZ_3(VMLS, mls_op)
+
+#define DO_3SAME_GVEC3_SHIFT(INSN, OPARRAY) \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ /* Note the operation is vshl vd,vm,vn */ \
+ tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, \
+ oprsz, maxsz, &OPARRAY[vece]); \
+ } \
+ DO_3SAME(INSN, gen_##INSN##_3s)
+
+DO_3SAME_GVEC3_SHIFT(VSHL_S, sshl_op)
+DO_3SAME_GVEC3_SHIFT(VSHL_U, ushl_op)
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index b087bbd812..e1a9017598 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -1872,12 +1872,6 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
return false;
}
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vn | a->vm) & 0x10)) {
- return false;
- }
-
if (!vfp_access_check(s)) {
return true;
}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index d4ad2028f1..025747c0bd 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1313,8 +1313,9 @@ static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
#define ARM_CP_RW_BIT (1 << 20)
-/* Include the VFP decoder */
+/* Include the VFP and Neon decoders */
#include "translate-vfp.inc.c"
+#include "translate-neon.inc.c"
static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
{
@@ -2609,8 +2610,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
}
#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
-#define VFP_SREG(insn, bigbit, smallbit) \
- ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
if (dc_isar_feature(aa32_simd_r32, s)) { \
reg = (((insn) >> (bigbit)) & 0x0f) \
@@ -2621,11 +2620,8 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
reg = ((insn) >> (bigbit)) & 0x0f; \
}} while (0)
-#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
-#define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
-#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
static void gen_neon_dup_low16(TCGv_i32 var)
@@ -3217,274 +3213,6 @@ static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
tcg_temp_free_i32(rd);
}
-
-static struct {
- int nregs;
- int interleave;
- int spacing;
-} const neon_ls_element_type[11] = {
- {1, 4, 1},
- {1, 4, 2},
- {4, 1, 1},
- {2, 2, 2},
- {1, 3, 1},
- {1, 3, 2},
- {3, 1, 1},
- {1, 1, 1},
- {1, 2, 1},
- {1, 2, 2},
- {2, 1, 1}
-};
-
-/* Translate a NEON load/store element instruction. Return nonzero if the
- instruction is invalid. */
-static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
-{
- int rd, rn, rm;
- int op;
- int nregs;
- int interleave;
- int spacing;
- int stride;
- int size;
- int reg;
- int load;
- int n;
- int vec_size;
- int mmu_idx;
- MemOp endian;
- TCGv_i32 addr;
- TCGv_i32 tmp;
- TCGv_i32 tmp2;
- TCGv_i64 tmp64;
-
- /* FIXME: this access check should not take precedence over UNDEF
- * for invalid encodings; we will generate incorrect syndrome information
- * for attempts to execute invalid vfp/neon encodings with FP disabled.
- */
- if (s->fp_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
-
- if (!s->vfp_enabled)
- return 1;
- VFP_DREG_D(rd, insn);
- rn = (insn >> 16) & 0xf;
- rm = insn & 0xf;
- load = (insn & (1 << 21)) != 0;
- endian = s->be_data;
- mmu_idx = get_mem_index(s);
- if ((insn & (1 << 23)) == 0) {
- /* Load store all elements. */
- op = (insn >> 8) & 0xf;
- size = (insn >> 6) & 3;
- if (op > 10)
- return 1;
- /* Catch UNDEF cases for bad values of align field */
- switch (op & 0xc) {
- case 4:
- if (((insn >> 5) & 1) == 1) {
- return 1;
- }
- break;
- case 8:
- if (((insn >> 4) & 3) == 3) {
- return 1;
- }
- break;
- default:
- break;
- }
- nregs = neon_ls_element_type[op].nregs;
- interleave = neon_ls_element_type[op].interleave;
- spacing = neon_ls_element_type[op].spacing;
- if (size == 3 && (interleave | spacing) != 1) {
- return 1;
- }
- /* For our purposes, bytes are always little-endian. */
- if (size == 0) {
- endian = MO_LE;
- }
- /* Consecutive little-endian elements from a single register
- * can be promoted to a larger little-endian operation.
- */
- if (interleave == 1 && endian == MO_LE) {
- size = 3;
- }
- tmp64 = tcg_temp_new_i64();
- addr = tcg_temp_new_i32();
- tmp2 = tcg_const_i32(1 << size);
- load_reg_var(s, addr, rn);
- for (reg = 0; reg < nregs; reg++) {
- for (n = 0; n < 8 >> size; n++) {
- int xs;
- for (xs = 0; xs < interleave; xs++) {
- int tt = rd + reg + spacing * xs;
-
- if (load) {
- gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
- neon_store_element64(tt, n, size, tmp64);
- } else {
- neon_load_element64(tmp64, tt, n, size);
- gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
- }
- tcg_gen_add_i32(addr, addr, tmp2);
- }
- }
- }
- tcg_temp_free_i32(addr);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i64(tmp64);
- stride = nregs * interleave * 8;
- } else {
- size = (insn >> 10) & 3;
- if (size == 3) {
- /* Load single element to all lanes. */
- int a = (insn >> 4) & 1;
- if (!load) {
- return 1;
- }
- size = (insn >> 6) & 3;
- nregs = ((insn >> 8) & 3) + 1;
-
- if (size == 3) {
- if (nregs != 4 || a == 0) {
- return 1;
- }
- /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
- size = 2;
- }
- if (nregs == 1 && a == 1 && size == 0) {
- return 1;
- }
- if (nregs == 3 && a == 1) {
- return 1;
- }
- addr = tcg_temp_new_i32();
- load_reg_var(s, addr, rn);
-
- /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
- * VLD2/3/4 to all lanes: bit 5 indicates register stride.
- */
- stride = (insn & (1 << 5)) ? 2 : 1;
- vec_size = nregs == 1 ? stride * 8 : 8;
-
- tmp = tcg_temp_new_i32();
- for (reg = 0; reg < nregs; reg++) {
- gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
- s->be_data | size);
- if ((rd & 1) && vec_size == 16) {
- /* We cannot write 16 bytes at once because the
- * destination is unaligned.
- */
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
- 8, 8, tmp);
- tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
- neon_reg_offset(rd, 0), 8, 8);
- } else {
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
- vec_size, vec_size, tmp);
- }
- tcg_gen_addi_i32(addr, addr, 1 << size);
- rd += stride;
- }
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(addr);
- stride = (1 << size) * nregs;
- } else {
- /* Single element. */
- int idx = (insn >> 4) & 0xf;
- int reg_idx;
- switch (size) {
- case 0:
- reg_idx = (insn >> 5) & 7;
- stride = 1;
- break;
- case 1:
- reg_idx = (insn >> 6) & 3;
- stride = (insn & (1 << 5)) ? 2 : 1;
- break;
- case 2:
- reg_idx = (insn >> 7) & 1;
- stride = (insn & (1 << 6)) ? 2 : 1;
- break;
- default:
- abort();
- }
- nregs = ((insn >> 8) & 3) + 1;
- /* Catch the UNDEF cases. This is unavoidably a bit messy. */
- switch (nregs) {
- case 1:
- if (((idx & (1 << size)) != 0) ||
- (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
- return 1;
- }
- break;
- case 3:
- if ((idx & 1) != 0) {
- return 1;
- }
- /* fall through */
- case 2:
- if (size == 2 && (idx & 2) != 0) {
- return 1;
- }
- break;
- case 4:
- if ((size == 2) && ((idx & 3) == 3)) {
- return 1;
- }
- break;
- default:
- abort();
- }
- if ((rd + stride * (nregs - 1)) > 31) {
- /* Attempts to write off the end of the register file
- * are UNPREDICTABLE; we choose to UNDEF because otherwise
- * the neon_load_reg() would write off the end of the array.
- */
- return 1;
- }
- tmp = tcg_temp_new_i32();
- addr = tcg_temp_new_i32();
- load_reg_var(s, addr, rn);
- for (reg = 0; reg < nregs; reg++) {
- if (load) {
- gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
- s->be_data | size);
- neon_store_element(rd, reg_idx, size, tmp);
- } else { /* Store */
- neon_load_element(tmp, rd, reg_idx, size);
- gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
- s->be_data | size);
- }
- rd += stride;
- tcg_gen_addi_i32(addr, addr, 1 << size);
- }
- tcg_temp_free_i32(addr);
- tcg_temp_free_i32(tmp);
- stride = nregs * (1 << size);
- }
- }
- if (rm != 15) {
- TCGv_i32 base;
-
- base = load_reg(s, rn);
- if (rm == 13) {
- tcg_gen_addi_i32(base, base, stride);
- } else {
- TCGv_i32 index;
- index = load_reg(s, rm);
- tcg_gen_add_i32(base, base, index);
- tcg_temp_free_i32(index);
- }
- store_reg(s, rn, base);
- }
- return 0;
-}
-
static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
{
switch (size) {
@@ -5002,6 +4730,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
TCGv_ptr ptr1, ptr2, ptr3;
TCGv_i64 tmp64;
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return 1;
+ }
+
/* FIXME: this access check should not take precedence over UNDEF
* for invalid encodings; we will generate incorrect syndrome information
* for attempts to execute invalid vfp/neon encodings with FP disabled.
@@ -5116,128 +4848,20 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
return 1;
- case NEON_3R_LOGIC: /* Logic ops. */
- switch ((u << 2) | size) {
- case 0: /* VAND */
- tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 1: /* VBIC */
- tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 2: /* VORR */
- tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 3: /* VORN */
- tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 4: /* VEOR */
- tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 5: /* VBSL */
- tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 6: /* VBIT */
- tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
- vec_size, vec_size);
- break;
- case 7: /* VBIF */
- tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
- vec_size, vec_size);
- break;
- }
- return 0;
-
case NEON_3R_VADD_VSUB:
- if (u) {
- tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- } else {
- tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- }
- return 0;
-
- case NEON_3R_VQADD:
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
- rn_ofs, rm_ofs, vec_size, vec_size,
- (u ? uqadd_op : sqadd_op) + size);
- return 0;
-
- case NEON_3R_VQSUB:
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
- rn_ofs, rm_ofs, vec_size, vec_size,
- (u ? uqsub_op : sqsub_op) + size);
- return 0;
-
- case NEON_3R_VMUL: /* VMUL */
- if (u) {
- /* Polynomial case allows only P8. */
- if (size != 0) {
- return 1;
- }
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
- 0, gen_helper_gvec_pmul_b);
- } else {
- tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- }
- return 0;
-
- case NEON_3R_VML: /* VMLA, VMLS */
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
- u ? &mls_op[size] : &mla_op[size]);
- return 0;
-
+ case NEON_3R_LOGIC:
+ case NEON_3R_VMAX:
+ case NEON_3R_VMIN:
case NEON_3R_VTST_VCEQ:
- if (u) { /* VCEQ */
- tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- } else { /* VTST */
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size, &cmtst_op[size]);
- }
- return 0;
-
case NEON_3R_VCGT:
- tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
- rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
- return 0;
-
case NEON_3R_VCGE:
- tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
- rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
- return 0;
-
- case NEON_3R_VMAX:
- if (u) {
- tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- } else {
- tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- }
- return 0;
- case NEON_3R_VMIN:
- if (u) {
- tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- } else {
- tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- }
- return 0;
-
+ case NEON_3R_VQADD:
+ case NEON_3R_VQSUB:
+ case NEON_3R_VMUL:
+ case NEON_3R_VML:
case NEON_3R_VSHL:
- /* Note the operation is vshl vd,vm,vn */
- tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size,
- u ? &ushl_op[size] : &sshl_op[size]);
- return 0;
+ /* Already handled by decodetree */
+ return 1;
}
if (size == 3) {
@@ -6016,7 +5640,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
{0, 0, 0, 0}, /* VMLSL */
{0, 0, 0, 9}, /* VQDMLSL */
{0, 0, 0, 0}, /* Integer VMULL */
- {0, 0, 0, 1}, /* VQDMULL */
+ {0, 0, 0, 9}, /* VQDMULL */
{0, 0, 0, 0xa}, /* Polynomial VMULL */
{0, 0, 0, 7}, /* Reserved: always UNDEF */
};
@@ -7023,232 +6647,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
return 0;
}
-/* Advanced SIMD three registers of the same length extension.
- * 31 25 23 22 20 16 12 11 10 9 8 3 0
- * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
- * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
- * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
- */
-static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
-{
- gen_helper_gvec_3 *fn_gvec = NULL;
- gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
- int rd, rn, rm, opr_sz;
- int data = 0;
- int off_rn, off_rm;
- bool is_long = false, q = extract32(insn, 6, 1);
- bool ptr_is_env = false;
-
- if ((insn & 0xfe200f10) == 0xfc200800) {
- /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
- int size = extract32(insn, 20, 1);
- data = extract32(insn, 23, 2); /* rot */
- if (!dc_isar_feature(aa32_vcma, s)
- || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
- return 1;
- }
- fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
- } else if ((insn & 0xfea00f10) == 0xfc800800) {
- /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
- int size = extract32(insn, 20, 1);
- data = extract32(insn, 24, 1); /* rot */
- if (!dc_isar_feature(aa32_vcma, s)
- || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
- return 1;
- }
- fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
- } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
- /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
- bool u = extract32(insn, 4, 1);
- if (!dc_isar_feature(aa32_dp, s)) {
- return 1;
- }
- fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
- } else if ((insn & 0xff300f10) == 0xfc200810) {
- /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
- int is_s = extract32(insn, 23, 1);
- if (!dc_isar_feature(aa32_fhm, s)) {
- return 1;
- }
- is_long = true;
- data = is_s; /* is_2 == 0 */
- fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
- ptr_is_env = true;
- } else {
- return 1;
- }
-
- VFP_DREG_D(rd, insn);
- if (rd & q) {
- return 1;
- }
- if (q || !is_long) {
- VFP_DREG_N(rn, insn);
- VFP_DREG_M(rm, insn);
- if ((rn | rm) & q & !is_long) {
- return 1;
- }
- off_rn = vfp_reg_offset(1, rn);
- off_rm = vfp_reg_offset(1, rm);
- } else {
- rn = VFP_SREG_N(insn);
- rm = VFP_SREG_M(insn);
- off_rn = vfp_reg_offset(0, rn);
- off_rm = vfp_reg_offset(0, rm);
- }
-
- if (s->fp_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
- if (!s->vfp_enabled) {
- return 1;
- }
-
- opr_sz = (1 + q) * 8;
- if (fn_gvec_ptr) {
- TCGv_ptr ptr;
- if (ptr_is_env) {
- ptr = cpu_env;
- } else {
- ptr = get_fpstatus_ptr(1);
- }
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
- opr_sz, opr_sz, data, fn_gvec_ptr);
- if (!ptr_is_env) {
- tcg_temp_free_ptr(ptr);
- }
- } else {
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
- opr_sz, opr_sz, data, fn_gvec);
- }
- return 0;
-}
-
-/* Advanced SIMD two registers and a scalar extension.
- * 31 24 23 22 20 16 12 11 10 9 8 3 0
- * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
- * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
- * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
- *
- */
-
-static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
-{
- gen_helper_gvec_3 *fn_gvec = NULL;
- gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
- int rd, rn, rm, opr_sz, data;
- int off_rn, off_rm;
- bool is_long = false, q = extract32(insn, 6, 1);
- bool ptr_is_env = false;
-
- if ((insn & 0xff000f10) == 0xfe000800) {
- /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
- int rot = extract32(insn, 20, 2);
- int size = extract32(insn, 23, 1);
- int index;
-
- if (!dc_isar_feature(aa32_vcma, s)) {
- return 1;
- }
- if (size == 0) {
- if (!dc_isar_feature(aa32_fp16_arith, s)) {
- return 1;
- }
- /* For fp16, rm is just Vm, and index is M. */
- rm = extract32(insn, 0, 4);
- index = extract32(insn, 5, 1);
- } else {
- /* For fp32, rm is the usual M:Vm, and index is 0. */
- VFP_DREG_M(rm, insn);
- index = 0;
- }
- data = (index << 2) | rot;
- fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
- : gen_helper_gvec_fcmlah_idx);
- } else if ((insn & 0xffb00f00) == 0xfe200d00) {
- /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
- int u = extract32(insn, 4, 1);
-
- if (!dc_isar_feature(aa32_dp, s)) {
- return 1;
- }
- fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
- /* rm is just Vm, and index is M. */
- data = extract32(insn, 5, 1); /* index */
- rm = extract32(insn, 0, 4);
- } else if ((insn & 0xffa00f10) == 0xfe000810) {
- /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
- int is_s = extract32(insn, 20, 1);
- int vm20 = extract32(insn, 0, 3);
- int vm3 = extract32(insn, 3, 1);
- int m = extract32(insn, 5, 1);
- int index;
-
- if (!dc_isar_feature(aa32_fhm, s)) {
- return 1;
- }
- if (q) {
- rm = vm20;
- index = m * 2 + vm3;
- } else {
- rm = vm20 * 2 + m;
- index = vm3;
- }
- is_long = true;
- data = (index << 2) | is_s; /* is_2 == 0 */
- fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
- ptr_is_env = true;
- } else {
- return 1;
- }
-
- VFP_DREG_D(rd, insn);
- if (rd & q) {
- return 1;
- }
- if (q || !is_long) {
- VFP_DREG_N(rn, insn);
- if (rn & q & !is_long) {
- return 1;
- }
- off_rn = vfp_reg_offset(1, rn);
- off_rm = vfp_reg_offset(1, rm);
- } else {
- rn = VFP_SREG_N(insn);
- off_rn = vfp_reg_offset(0, rn);
- off_rm = vfp_reg_offset(0, rm);
- }
- if (s->fp_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
- if (!s->vfp_enabled) {
- return 1;
- }
-
- opr_sz = (1 + q) * 8;
- if (fn_gvec_ptr) {
- TCGv_ptr ptr;
- if (ptr_is_env) {
- ptr = cpu_env;
- } else {
- ptr = get_fpstatus_ptr(1);
- }
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
- opr_sz, opr_sz, data, fn_gvec_ptr);
- if (!ptr_is_env) {
- tcg_temp_free_ptr(ptr);
- }
- } else {
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
- opr_sz, opr_sz, data, fn_gvec);
- }
- return 0;
-}
-
static int disas_coproc_insn(DisasContext *s, uint32_t insn)
{
int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
@@ -10941,33 +10339,21 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
/* Unconditional instructions. */
/* TODO: Perhaps merge these into one decodetree output file. */
if (disas_a32_uncond(s, insn) ||
- disas_vfp_uncond(s, insn)) {
+ disas_vfp_uncond(s, insn) ||
+ disas_neon_dp(s, insn) ||
+ disas_neon_ls(s, insn) ||
+ disas_neon_shared(s, insn)) {
return;
}
/* fall back to legacy decoder */
if (((insn >> 25) & 7) == 1) {
/* NEON Data processing. */
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- goto illegal_op;
- }
-
if (disas_neon_data_insn(s, insn)) {
goto illegal_op;
}
return;
}
- if ((insn & 0x0f100000) == 0x04000000) {
- /* NEON load/store. */
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- goto illegal_op;
- }
-
- if (disas_neon_ls_insn(s, insn)) {
- goto illegal_op;
- }
- return;
- }
if ((insn & 0x0e000f00) == 0x0c000100) {
if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
/* iWMMXt register transfer. */
@@ -10977,18 +10363,6 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
}
}
}
- } else if ((insn & 0x0e000a00) == 0x0c000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- if (disas_neon_insn_3same_ext(s, insn)) {
- goto illegal_op;
- }
- return;
- } else if ((insn & 0x0f000a00) == 0x0e000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
- goto illegal_op;
- }
- return;
}
goto illegal_op;
}
@@ -11102,6 +10476,33 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
ARCH(6T2);
}
+ if ((insn & 0xef000000) == 0xef000000) {
+ /*
+ * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+ * transform into
+ * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+ */
+ uint32_t a32_insn = (insn & 0xe2ffffff) |
+ ((insn & (1 << 28)) >> 4) | (1 << 28);
+
+ if (disas_neon_dp(s, a32_insn)) {
+ return;
+ }
+ }
+
+ if ((insn & 0xff100000) == 0xf9000000) {
+ /*
+ * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
+ * transform into
+ * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
+ */
+ uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
+
+ if (disas_neon_ls(s, a32_insn)) {
+ return;
+ }
+ }
+
/*
* TODO: Perhaps merge these into one decodetree output file.
* Note disas_vfp is written for a32 with cond field in the
@@ -11109,6 +10510,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
*/
if (disas_t32(s, insn) ||
disas_vfp_uncond(s, insn) ||
+ disas_neon_shared(s, insn) ||
((insn >> 28) == 0xe && disas_vfp(s, insn))) {
return;
}
@@ -11138,19 +10540,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
}
break;
}
- if ((insn & 0xfe000a00) == 0xfc000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- /* The Thumb2 and ARM encodings are identical. */
- if (disas_neon_insn_3same_ext(s, insn)) {
- goto illegal_op;
- }
- } else if ((insn & 0xff000a00) == 0xfe000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- /* The Thumb2 and ARM encodings are identical. */
- if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
- goto illegal_op;
- }
- } else if (((insn >> 24) & 3) == 3) {
+ if (((insn >> 24) & 3) == 3) {
/* Translate into the equivalent ARM encoding. */
insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
if (disas_neon_data_insn(s, insn)) {
@@ -11168,12 +10558,6 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
}
break;
case 12:
- if ((insn & 0x01100000) == 0x01000000) {
- if (disas_neon_ls_insn(s, insn)) {
- goto illegal_op;
- }
- break;
- }
goto illegal_op;
default:
illegal_op:
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 98b319f3f6..cb7925ea46 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -305,4 +305,30 @@ void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
#define dc_isar_feature(name, ctx) \
({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); })
+/* Note that the gvec expanders operate on offsets + sizes. */
+typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
+typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
+ uint32_t, uint32_t);
+typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t);
+typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t);
+
+/* Function prototype for gen_ functions for calling Neon helpers */
+typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
+typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
+typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
+typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
+typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
+typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
+typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
+typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
+typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
+typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
+
#endif /* TARGET_ARM_TRANSLATE_H */