diff options
-rw-r--r-- | hw/arm/mps2-tz.c | 2 | ||||
-rw-r--r-- | hw/arm/xlnx-versal-virt.c | 74 | ||||
-rw-r--r-- | hw/arm/xlnx-versal.c | 115 | ||||
-rw-r--r-- | include/hw/arm/xlnx-versal.h | 31 | ||||
-rw-r--r-- | target/arm/Makefile.objs | 18 | ||||
-rw-r--r-- | target/arm/cpu-param.h | 2 | ||||
-rw-r--r-- | target/arm/cpu.c | 3 | ||||
-rw-r--r-- | target/arm/cpu.h | 38 | ||||
-rw-r--r-- | target/arm/cpu64.c | 8 | ||||
-rw-r--r-- | target/arm/helper.c | 183 | ||||
-rw-r--r-- | target/arm/neon-dp.decode | 86 | ||||
-rw-r--r-- | target/arm/neon-ls.decode | 52 | ||||
-rw-r--r-- | target/arm/neon-shared.decode | 66 | ||||
-rw-r--r-- | target/arm/translate-a64.c | 17 | ||||
-rw-r--r-- | target/arm/translate-a64.h | 9 | ||||
-rw-r--r-- | target/arm/translate-neon.inc.c | 714 | ||||
-rw-r--r-- | target/arm/translate-vfp.inc.c | 6 | ||||
-rw-r--r-- | target/arm/translate.c | 714 | ||||
-rw-r--r-- | target/arm/translate.h | 26 |
19 files changed, 1301 insertions, 863 deletions
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c index a8dea7dde1..2c43041564 100644 --- a/hw/arm/mps2-tz.c +++ b/hw/arm/mps2-tz.c @@ -395,7 +395,7 @@ static void mps2tz_common_init(MachineState *machine) exit(EXIT_FAILURE); } - sysbus_init_child_obj(OBJECT(machine), "iotkit", &mms->iotkit, + sysbus_init_child_obj(OBJECT(machine), TYPE_IOTKIT, &mms->iotkit, sizeof(mms->iotkit), mmc->armsse_type); iotkitdev = DEVICE(&mms->iotkit); object_property_set_link(OBJECT(&mms->iotkit), OBJECT(system_memory), diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c index 878a275140..7e749e1926 100644 --- a/hw/arm/xlnx-versal-virt.c +++ b/hw/arm/xlnx-versal-virt.c @@ -20,6 +20,7 @@ #include "hw/arm/sysbus-fdt.h" #include "hw/arm/fdt.h" #include "cpu.h" +#include "hw/qdev-properties.h" #include "hw/arm/xlnx-versal.h" #define TYPE_XLNX_VERSAL_VIRT_MACHINE MACHINE_TYPE_NAME("xlnx-versal-virt") @@ -256,6 +257,53 @@ static void fdt_add_zdma_nodes(VersalVirt *s) } } +static void fdt_add_sd_nodes(VersalVirt *s) +{ + const char clocknames[] = "clk_xin\0clk_ahb"; + const char compat[] = "arasan,sdhci-8.9a"; + int i; + + for (i = ARRAY_SIZE(s->soc.pmc.iou.sd) - 1; i >= 0; i--) { + uint64_t addr = MM_PMC_SD0 + MM_PMC_SD0_SIZE * i; + char *name = g_strdup_printf("/sdhci@%" PRIx64, addr); + + qemu_fdt_add_subnode(s->fdt, name); + + qemu_fdt_setprop_cells(s->fdt, name, "clocks", + s->phandle.clk_25Mhz, s->phandle.clk_25Mhz); + qemu_fdt_setprop(s->fdt, name, "clock-names", + clocknames, sizeof(clocknames)); + qemu_fdt_setprop_cells(s->fdt, name, "interrupts", + GIC_FDT_IRQ_TYPE_SPI, VERSAL_SD0_IRQ_0 + i * 2, + GIC_FDT_IRQ_FLAGS_LEVEL_HI); + qemu_fdt_setprop_sized_cells(s->fdt, name, "reg", + 2, addr, 2, MM_PMC_SD0_SIZE); + qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat)); + g_free(name); + } +} + +static void fdt_add_rtc_node(VersalVirt *s) +{ + const char compat[] = "xlnx,zynqmp-rtc"; + const char interrupt_names[] = "alarm\0sec"; + char *name = g_strdup_printf("/rtc@%x", MM_PMC_RTC); + + qemu_fdt_add_subnode(s->fdt, name); + + qemu_fdt_setprop_cells(s->fdt, name, "interrupts", + GIC_FDT_IRQ_TYPE_SPI, VERSAL_RTC_ALARM_IRQ, + GIC_FDT_IRQ_FLAGS_LEVEL_HI, + GIC_FDT_IRQ_TYPE_SPI, VERSAL_RTC_SECONDS_IRQ, + GIC_FDT_IRQ_FLAGS_LEVEL_HI); + qemu_fdt_setprop(s->fdt, name, "interrupt-names", + interrupt_names, sizeof(interrupt_names)); + qemu_fdt_setprop_sized_cells(s->fdt, name, "reg", + 2, MM_PMC_RTC, 2, MM_PMC_RTC_SIZE); + qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat)); + g_free(name); +} + static void fdt_nop_memory_nodes(void *fdt, Error **errp) { Error *err = NULL; @@ -411,10 +459,23 @@ static void create_virtio_regions(VersalVirt *s) } } +static void sd_plugin_card(SDHCIState *sd, DriveInfo *di) +{ + BlockBackend *blk = di ? blk_by_legacy_dinfo(di) : NULL; + DeviceState *card; + + card = qdev_create(qdev_get_child_bus(DEVICE(sd), "sd-bus"), TYPE_SD_CARD); + object_property_add_child(OBJECT(sd), "card[*]", OBJECT(card), + &error_fatal); + qdev_prop_set_drive(card, "drive", blk, &error_fatal); + object_property_set_bool(OBJECT(card), true, "realized", &error_fatal); +} + static void versal_virt_init(MachineState *machine) { VersalVirt *s = XLNX_VERSAL_VIRT_MACHINE(machine); int psci_conduit = QEMU_PSCI_CONDUIT_DISABLED; + int i; /* * If the user provides an Operating System to be loaded, we expect them @@ -440,7 +501,7 @@ static void versal_virt_init(MachineState *machine) psci_conduit = QEMU_PSCI_CONDUIT_SMC; } - sysbus_init_child_obj(OBJECT(machine), "xlnx-ve", &s->soc, + sysbus_init_child_obj(OBJECT(machine), "xlnx-versal", &s->soc, sizeof(s->soc), TYPE_XLNX_VERSAL); object_property_set_link(OBJECT(&s->soc), OBJECT(machine->ram), "ddr", &error_abort); @@ -455,6 +516,8 @@ static void versal_virt_init(MachineState *machine) fdt_add_gic_nodes(s); fdt_add_timer_nodes(s); fdt_add_zdma_nodes(s); + fdt_add_sd_nodes(s); + fdt_add_rtc_node(s); fdt_add_cpu_nodes(s, psci_conduit); fdt_add_clk_node(s, "/clk125", 125000000, s->phandle.clk_125Mhz); fdt_add_clk_node(s, "/clk25", 25000000, s->phandle.clk_25Mhz); @@ -464,14 +527,19 @@ static void versal_virt_init(MachineState *machine) memory_region_add_subregion_overlap(get_system_memory(), 0, &s->soc.fpd.apu.mr, 0); + /* Plugin SD cards. */ + for (i = 0; i < ARRAY_SIZE(s->soc.pmc.iou.sd); i++) { + sd_plugin_card(&s->soc.pmc.iou.sd[i], drive_get_next(IF_SD)); + } + s->binfo.ram_size = machine->ram_size; s->binfo.loader_start = 0x0; s->binfo.get_dtb = versal_virt_get_dtb; s->binfo.modify_dtb = versal_virt_modify_dtb; if (machine->kernel_filename) { - arm_load_kernel(s->soc.fpd.apu.cpu[0], machine, &s->binfo); + arm_load_kernel(&s->soc.fpd.apu.cpu[0], machine, &s->binfo); } else { - AddressSpace *as = arm_boot_address_space(s->soc.fpd.apu.cpu[0], + AddressSpace *as = arm_boot_address_space(&s->soc.fpd.apu.cpu[0], &s->binfo); /* Some boot-loaders (e.g u-boot) don't like blobs at address 0 (NULL). * Offset things by 4K. */ diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c index 94460f2343..321171bcce 100644 --- a/hw/arm/xlnx-versal.c +++ b/hw/arm/xlnx-versal.c @@ -20,9 +20,7 @@ #include "hw/arm/boot.h" #include "kvm_arm.h" #include "hw/misc/unimp.h" -#include "hw/intc/arm_gicv3_common.h" #include "hw/arm/xlnx-versal.h" -#include "hw/char/pl011.h" #define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72") #define GEM_REVISION 0x40070106 @@ -33,23 +31,15 @@ static void versal_create_apu_cpus(Versal *s) for (i = 0; i < ARRAY_SIZE(s->fpd.apu.cpu); i++) { Object *obj; - char *name; - - obj = object_new(XLNX_VERSAL_ACPU_TYPE); - if (!obj) { - /* Secondary CPUs start in PSCI powered-down state */ - error_report("Unable to create apu.cpu[%d] of type %s", - i, XLNX_VERSAL_ACPU_TYPE); - exit(EXIT_FAILURE); - } - - name = g_strdup_printf("apu-cpu[%d]", i); - object_property_add_child(OBJECT(s), name, obj, &error_fatal); - g_free(name); + object_initialize_child(OBJECT(s), "apu-cpu[*]", + &s->fpd.apu.cpu[i], sizeof(s->fpd.apu.cpu[i]), + XLNX_VERSAL_ACPU_TYPE, &error_abort, NULL); + obj = OBJECT(&s->fpd.apu.cpu[i]); object_property_set_int(obj, s->cfg.psci_conduit, "psci-conduit", &error_abort); if (i) { + /* Secondary CPUs start in PSCI powered-down state */ object_property_set_bool(obj, true, "start-powered-off", &error_abort); } @@ -59,7 +49,6 @@ static void versal_create_apu_cpus(Versal *s) object_property_set_link(obj, OBJECT(&s->fpd.apu.mr), "memory", &error_abort); object_property_set_bool(obj, true, "realized", &error_fatal); - s->fpd.apu.cpu[i] = ARM_CPU(obj); } } @@ -97,7 +86,7 @@ static void versal_create_apu_gic(Versal *s, qemu_irq *pic) } for (i = 0; i < nr_apu_cpus; i++) { - DeviceState *cpudev = DEVICE(s->fpd.apu.cpu[i]); + DeviceState *cpudev = DEVICE(&s->fpd.apu.cpu[i]); int ppibase = XLNX_VERSAL_NR_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS; qemu_irq maint_irq; int ti; @@ -145,16 +134,17 @@ static void versal_create_uarts(Versal *s, qemu_irq *pic) DeviceState *dev; MemoryRegion *mr; - dev = qdev_create(NULL, TYPE_PL011); - s->lpd.iou.uart[i] = SYS_BUS_DEVICE(dev); + sysbus_init_child_obj(OBJECT(s), name, + &s->lpd.iou.uart[i], sizeof(s->lpd.iou.uart[i]), + TYPE_PL011); + dev = DEVICE(&s->lpd.iou.uart[i]); qdev_prop_set_chr(dev, "chardev", serial_hd(i)); - object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal); qdev_init_nofail(dev); - mr = sysbus_mmio_get_region(s->lpd.iou.uart[i], 0); + mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); memory_region_add_subregion(&s->mr_ps, addrs[i], mr); - sysbus_connect_irq(s->lpd.iou.uart[i], 0, pic[irqs[i]]); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irqs[i]]); g_free(name); } } @@ -171,25 +161,26 @@ static void versal_create_gems(Versal *s, qemu_irq *pic) DeviceState *dev; MemoryRegion *mr; - dev = qdev_create(NULL, "cadence_gem"); - s->lpd.iou.gem[i] = SYS_BUS_DEVICE(dev); - object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal); + sysbus_init_child_obj(OBJECT(s), name, + &s->lpd.iou.gem[i], sizeof(s->lpd.iou.gem[i]), + TYPE_CADENCE_GEM); + dev = DEVICE(&s->lpd.iou.gem[i]); if (nd->used) { qemu_check_nic_model(nd, "cadence_gem"); qdev_set_nic_properties(dev, nd); } - object_property_set_int(OBJECT(s->lpd.iou.gem[i]), + object_property_set_int(OBJECT(dev), 2, "num-priority-queues", &error_abort); - object_property_set_link(OBJECT(s->lpd.iou.gem[i]), + object_property_set_link(OBJECT(dev), OBJECT(&s->mr_ps), "dma", &error_abort); qdev_init_nofail(dev); - mr = sysbus_mmio_get_region(s->lpd.iou.gem[i], 0); + mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); memory_region_add_subregion(&s->mr_ps, addrs[i], mr); - sysbus_connect_irq(s->lpd.iou.gem[i], 0, pic[irqs[i]]); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irqs[i]]); g_free(name); } } @@ -203,22 +194,72 @@ static void versal_create_admas(Versal *s, qemu_irq *pic) DeviceState *dev; MemoryRegion *mr; - dev = qdev_create(NULL, "xlnx.zdma"); - s->lpd.iou.adma[i] = SYS_BUS_DEVICE(dev); - object_property_set_int(OBJECT(s->lpd.iou.adma[i]), 128, "bus-width", - &error_abort); - object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal); + sysbus_init_child_obj(OBJECT(s), name, + &s->lpd.iou.adma[i], sizeof(s->lpd.iou.adma[i]), + TYPE_XLNX_ZDMA); + dev = DEVICE(&s->lpd.iou.adma[i]); + object_property_set_int(OBJECT(dev), 128, "bus-width", &error_abort); qdev_init_nofail(dev); - mr = sysbus_mmio_get_region(s->lpd.iou.adma[i], 0); + mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); memory_region_add_subregion(&s->mr_ps, MM_ADMA_CH0 + i * MM_ADMA_CH0_SIZE, mr); - sysbus_connect_irq(s->lpd.iou.adma[i], 0, pic[VERSAL_ADMA_IRQ_0 + i]); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[VERSAL_ADMA_IRQ_0 + i]); g_free(name); } } +#define SDHCI_CAPABILITIES 0x280737ec6481 /* Same as on ZynqMP. */ +static void versal_create_sds(Versal *s, qemu_irq *pic) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(s->pmc.iou.sd); i++) { + DeviceState *dev; + MemoryRegion *mr; + + sysbus_init_child_obj(OBJECT(s), "sd[*]", + &s->pmc.iou.sd[i], sizeof(s->pmc.iou.sd[i]), + TYPE_SYSBUS_SDHCI); + dev = DEVICE(&s->pmc.iou.sd[i]); + + object_property_set_uint(OBJECT(dev), + 3, "sd-spec-version", &error_fatal); + object_property_set_uint(OBJECT(dev), SDHCI_CAPABILITIES, "capareg", + &error_fatal); + object_property_set_uint(OBJECT(dev), UHS_I, "uhs", &error_fatal); + qdev_init_nofail(dev); + + mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); + memory_region_add_subregion(&s->mr_ps, + MM_PMC_SD0 + i * MM_PMC_SD0_SIZE, mr); + + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, + pic[VERSAL_SD0_IRQ_0 + i * 2]); + } +} + +static void versal_create_rtc(Versal *s, qemu_irq *pic) +{ + SysBusDevice *sbd; + MemoryRegion *mr; + + sysbus_init_child_obj(OBJECT(s), "rtc", &s->pmc.rtc, sizeof(s->pmc.rtc), + TYPE_XLNX_ZYNQMP_RTC); + sbd = SYS_BUS_DEVICE(&s->pmc.rtc); + qdev_init_nofail(DEVICE(sbd)); + + mr = sysbus_mmio_get_region(sbd, 0); + memory_region_add_subregion(&s->mr_ps, MM_PMC_RTC, mr); + + /* + * TODO: Connect the ALARM and SECONDS interrupts once our RTC model + * supports them. + */ + sysbus_connect_irq(sbd, 1, pic[VERSAL_RTC_APB_ERR_IRQ]); +} + /* This takes the board allocated linear DDR memory and creates aliases * for each split DDR range/aperture on the Versal address map. */ @@ -301,6 +342,8 @@ static void versal_realize(DeviceState *dev, Error **errp) versal_create_uarts(s, pic); versal_create_gems(s, pic); versal_create_admas(s, pic); + versal_create_sds(s, pic); + versal_create_rtc(s, pic); versal_map_ddr(s); versal_unimp(s); diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h index 6c0a692b2f..9c9f47ba9d 100644 --- a/include/hw/arm/xlnx-versal.h +++ b/include/hw/arm/xlnx-versal.h @@ -14,7 +14,12 @@ #include "hw/sysbus.h" #include "hw/arm/boot.h" +#include "hw/sd/sdhci.h" #include "hw/intc/arm_gicv3.h" +#include "hw/char/pl011.h" +#include "hw/dma/xlnx-zdma.h" +#include "hw/net/cadence_gem.h" +#include "hw/rtc/xlnx-zynqmp-rtc.h" #define TYPE_XLNX_VERSAL "xlnx-versal" #define XLNX_VERSAL(obj) OBJECT_CHECK(Versal, (obj), TYPE_XLNX_VERSAL) @@ -23,6 +28,7 @@ #define XLNX_VERSAL_NR_UARTS 2 #define XLNX_VERSAL_NR_GEMS 2 #define XLNX_VERSAL_NR_ADMAS 8 +#define XLNX_VERSAL_NR_SDS 2 #define XLNX_VERSAL_NR_IRQS 192 typedef struct Versal { @@ -33,7 +39,7 @@ typedef struct Versal { struct { struct { MemoryRegion mr; - ARMCPU *cpu[XLNX_VERSAL_NR_ACPUS]; + ARMCPU cpu[XLNX_VERSAL_NR_ACPUS]; GICv3State gic; } apu; } fpd; @@ -49,12 +55,21 @@ typedef struct Versal { MemoryRegion mr_ocm; struct { - SysBusDevice *uart[XLNX_VERSAL_NR_UARTS]; - SysBusDevice *gem[XLNX_VERSAL_NR_GEMS]; - SysBusDevice *adma[XLNX_VERSAL_NR_ADMAS]; + PL011State uart[XLNX_VERSAL_NR_UARTS]; + CadenceGEMState gem[XLNX_VERSAL_NR_GEMS]; + XlnxZDMA adma[XLNX_VERSAL_NR_ADMAS]; } iou; } lpd; + /* The Platform Management Controller subsystem. */ + struct { + struct { + SDHCIState sd[XLNX_VERSAL_NR_SDS]; + } iou; + + XlnxZynqMPRTC rtc; + } pmc; + struct { MemoryRegion *mr_ddr; uint32_t psci_conduit; @@ -77,6 +92,10 @@ typedef struct Versal { #define VERSAL_GEM1_IRQ_0 58 #define VERSAL_GEM1_WAKE_IRQ_0 59 #define VERSAL_ADMA_IRQ_0 60 +#define VERSAL_RTC_APB_ERR_IRQ 121 +#define VERSAL_SD0_IRQ_0 126 +#define VERSAL_RTC_ALARM_IRQ 142 +#define VERSAL_RTC_SECONDS_IRQ 143 /* Architecturally reserved IRQs suitable for virtualization. */ #define VERSAL_RSVD_IRQ_FIRST 111 @@ -126,6 +145,10 @@ typedef struct Versal { #define MM_FPD_CRF 0xfd1a0000U #define MM_FPD_CRF_SIZE 0x140000 +#define MM_PMC_SD0 0xf1040000U +#define MM_PMC_SD0_SIZE 0x10000 #define MM_PMC_CRP 0xf1260000U #define MM_PMC_CRP_SIZE 0x10000 +#define MM_PMC_RTC 0xf12a0000 +#define MM_PMC_RTC_SIZE 0x10000 #endif diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs index cf26c16f5f..775b3e24f2 100644 --- a/target/arm/Makefile.objs +++ b/target/arm/Makefile.objs @@ -18,6 +18,21 @@ target/arm/decode-sve.inc.c: $(SRC_PATH)/target/arm/sve.decode $(DECODETREE) $(PYTHON) $(DECODETREE) --decode disas_sve -o $@ $<,\ "GEN", $(TARGET_DIR)$@) +target/arm/decode-neon-shared.inc.c: $(SRC_PATH)/target/arm/neon-shared.decode $(DECODETREE) + $(call quiet-command,\ + $(PYTHON) $(DECODETREE) --static-decode disas_neon_shared -o $@ $<,\ + "GEN", $(TARGET_DIR)$@) + +target/arm/decode-neon-dp.inc.c: $(SRC_PATH)/target/arm/neon-dp.decode $(DECODETREE) + $(call quiet-command,\ + $(PYTHON) $(DECODETREE) --static-decode disas_neon_dp -o $@ $<,\ + "GEN", $(TARGET_DIR)$@) + +target/arm/decode-neon-ls.inc.c: $(SRC_PATH)/target/arm/neon-ls.decode $(DECODETREE) + $(call quiet-command,\ + $(PYTHON) $(DECODETREE) --static-decode disas_neon_ls -o $@ $<,\ + "GEN", $(TARGET_DIR)$@) + target/arm/decode-vfp.inc.c: $(SRC_PATH)/target/arm/vfp.decode $(DECODETREE) $(call quiet-command,\ $(PYTHON) $(DECODETREE) --static-decode disas_vfp -o $@ $<,\ @@ -49,6 +64,9 @@ target/arm/decode-t16.inc.c: $(SRC_PATH)/target/arm/t16.decode $(DECODETREE) "GEN", $(TARGET_DIR)$@) target/arm/translate-sve.o: target/arm/decode-sve.inc.c +target/arm/translate.o: target/arm/decode-neon-shared.inc.c +target/arm/translate.o: target/arm/decode-neon-dp.inc.c +target/arm/translate.o: target/arm/decode-neon-ls.inc.c target/arm/translate.o: target/arm/decode-vfp.inc.c target/arm/translate.o: target/arm/decode-vfp-uncond.inc.c target/arm/translate.o: target/arm/decode-a32.inc.c diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h index d593b60b28..6321385b46 100644 --- a/target/arm/cpu-param.h +++ b/target/arm/cpu-param.h @@ -29,6 +29,6 @@ # define TARGET_PAGE_BITS_MIN 10 #endif -#define NB_MMU_MODES 12 +#define NB_MMU_MODES 11 #endif diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 141d947775..5d64adfe76 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2686,6 +2686,7 @@ static void arm_max_initfn(Object *obj) t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */ t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* TTCNP */ + t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* TTS2UXN */ cpu->isar.id_mmfr4 = t; } #endif @@ -2754,7 +2755,7 @@ static const ARMCPUInfo arm_cpus[] = { static Property arm_cpu_properties[] = { DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false), DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0), - DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0), + DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0), DEFINE_PROP_UINT64("mp-affinity", ARMCPU, mp_affinity, ARM64_AFFINITY_INVALID), DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID), diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 8b9f2961ba..8608da6b6f 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -894,7 +894,7 @@ struct ARMCPU { uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; } isar; - uint32_t midr; + uint64_t midr; uint32_t revidr; uint32_t reset_fpsid; uint32_t ctr; @@ -2801,6 +2801,9 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * handling via the TLB. The only way to do a stage 1 translation without * the immediate stage 2 translation is via the ATS or AT system insns, * which can be slow-pathed and always do a page table walk. + * The only use of stage 2 translations is either as part of an s1+2 + * lookup or when loading the descriptors during a stage 1 page table walk, + * and in both those cases we don't use the TLB. * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" * translation regimes, because they map reasonably well to each other * and they can't both be active at the same time. @@ -2816,15 +2819,15 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * NS EL1 EL1&0 stage 1+2 (aka NS PL1) * NS EL1 EL1&0 stage 1+2 +PAN * NS EL0 EL2&0 + * NS EL2 EL2&0 * NS EL2 EL2&0 +PAN * NS EL2 (aka NS PL2) * S EL0 EL1&0 (aka S PL0) * S EL1 EL1&0 (not used if EL3 is 32 bit) * S EL1 EL1&0 +PAN * S EL3 (aka S PL1) - * NS EL1&0 stage 2 * - * for a total of 12 different mmu_idx. + * for a total of 11 different mmu_idx. * * R profile CPUs have an MPU, but can use the same set of MMU indexes * as A profile. They only need to distinguish NS EL0 and NS EL1 (and @@ -2846,7 +2849,8 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * are not quite the same -- different CPU types (most notably M profile * vs A/R profile) would like to use MMU indexes with different semantics, * but since we don't ever need to use all of those in a single CPU we - * can avoid setting NB_MMU_MODES to more than 8. The lower bits of + * can avoid having to set NB_MMU_MODES to "total number of A profile MMU + * modes + total number of M profile MMU modes". The lower bits of * ARMMMUIdx are the core TLB mmu index, and the higher bits are always * the same for any particular CPU. * Variables of type ARMMUIdx are always full values, and the core @@ -2894,8 +2898,6 @@ typedef enum ARMMMUIdx { ARMMMUIdx_SE10_1_PAN = 9 | ARM_MMU_IDX_A, ARMMMUIdx_SE3 = 10 | ARM_MMU_IDX_A, - ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A, - /* * These are not allocated TLBs and are used only for AT system * instructions or for the first stage of an S12 page table walk. @@ -2903,6 +2905,14 @@ typedef enum ARMMMUIdx { ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB, ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB, ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB, + /* + * Not allocated a TLB: used only for second stage of an S12 page + * table walk, or for descriptor loads during first stage of an S1 + * page table walk. Note that if we ever want to have a TLB for this + * then various TLB flush insns which currently are no-ops or flush + * only stage 1 MMU indexes will need to change to flush stage 2. + */ + ARMMMUIdx_Stage2 = 3 | ARM_MMU_IDX_NOTLB, /* * M-profile. @@ -2936,7 +2946,6 @@ typedef enum ARMMMUIdxBit { TO_CORE_BIT(SE10_1), TO_CORE_BIT(SE10_1_PAN), TO_CORE_BIT(SE3), - TO_CORE_BIT(Stage2), TO_CORE_BIT(MUser), TO_CORE_BIT(MPriv), @@ -3601,6 +3610,11 @@ static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id) return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0; } +static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0; +} + /* * 64-bit feature tests via id registers. */ @@ -3813,6 +3827,11 @@ static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0; } +static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; +} + /* * Feature tests for "does this exist in either 32-bit or 64-bit?" */ @@ -3841,6 +3860,11 @@ static inline bool isar_feature_any_ccidx(const ARMISARegisters *id) return isar_feature_aa64_ccidx(id) || isar_feature_aa32_ccidx(id); } +static inline bool isar_feature_any_tts2uxn(const ARMISARegisters *id) +{ + return isar_feature_aa64_tts2uxn(id) || isar_feature_aa32_tts2uxn(id); +} + /* * Forward to the above feature tests given an ARMCPU pointer. */ diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 74afc28d53..9bdf75b1ab 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -673,6 +673,7 @@ static void aarch64_max_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 2); /* ATS1E1 */ t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* VMID16 */ + t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* TTS2UXN */ cpu->isar.id_aa64mmfr1 = t; t = cpu->isar.id_aa64mmfr2; @@ -706,11 +707,12 @@ static void aarch64_max_initfn(Object *obj) u = FIELD_DP32(u, ID_MMFR4, HPDS, 1); /* AA32HPD */ u = FIELD_DP32(u, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ u = FIELD_DP32(u, ID_MMFR4, CNP, 1); /* TTCNP */ + u = FIELD_DP32(u, ID_MMFR4, XNX, 1); /* TTS2UXN */ cpu->isar.id_mmfr4 = u; - u = cpu->isar.id_aa64dfr0; - u = FIELD_DP64(u, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */ - cpu->isar.id_aa64dfr0 = u; + t = cpu->isar.id_aa64dfr0; + t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */ + cpu->isar.id_aa64dfr0 = t; u = cpu->isar.id_dfr0; u = FIELD_DP32(u, ID_DFR0, PERFMON, 5); /* v8.4-PMU */ diff --git a/target/arm/helper.c b/target/arm/helper.c index dfefb9b3d9..a94f650795 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -41,6 +41,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool s1_is_el0, hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, target_ulong *page_size_ptr, ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs); @@ -814,8 +815,7 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2); + ARMMMUIdxBit_E10_0); } static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -826,46 +826,9 @@ static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2); + ARMMMUIdxBit_E10_0); } -static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by IPA. This has to invalidate any structures that - * contain only stage 2 translation information, but does not need - * to apply to structures that contain combined stage 1 and stage 2 - * translation information. - * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero. - */ - CPUState *cs = env_cpu(env); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 40); - - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2); -} - -static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 40); - - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_Stage2); -} static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) @@ -4055,8 +4018,7 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2); + ARMMMUIdxBit_E10_0); raw_write(env, ri, value); } } @@ -4538,11 +4500,6 @@ static int alle1_tlbmask(CPUARMState *env) return ARMMMUIdxBit_SE10_1 | ARMMMUIdxBit_SE10_1_PAN | ARMMMUIdxBit_SE10_0; - } else if (arm_feature(env, ARM_FEATURE_EL2)) { - return ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2; } else { return ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | @@ -4689,44 +4646,6 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, ARMMMUIdxBit_SE3); } -static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by IPA. This has to invalidate any structures that - * contain only stage 2 translation information, but does not need - * to apply to structures that contain combined stage 1 and stage 2 - * translation information. - * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero. - */ - ARMCPU *cpu = env_archcpu(env); - CPUState *cs = CPU(cpu); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 48); - - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2); -} - -static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 48); - - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_Stage2); -} - static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -4965,12 +4884,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbi_aa64_vae1_write }, { .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1is_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1is_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4, .access = PL2_W, .type = ARM_CP_NO_RAW, @@ -4981,12 +4898,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbi_aa64_alle1is_write }, { .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4, .access = PL2_W, .type = ARM_CP_NO_RAW, @@ -5067,20 +4982,16 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbimva_hyp_is_write }, { .name = "TLBIIPAS2", .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_write }, + .type = ARM_CP_NOP, .access = PL2_W }, { .name = "TLBIIPAS2IS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_is_write }, + .type = ARM_CP_NOP, .access = PL2_W }, { .name = "TLBIIPAS2L", .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_write }, + .type = ARM_CP_NOP, .access = PL2_W }, { .name = "TLBIIPAS2LIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_is_write }, + .type = ARM_CP_NOP, .access = PL2_W }, /* 32 bit cache operations */ { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, @@ -9997,9 +9908,10 @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) * * @env: CPUARMState * @s2ap: The 2-bit stage2 access permissions (S2AP) - * @xn: XN (execute-never) bit + * @xn: XN (execute-never) bits + * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 */ -static int get_S2prot(CPUARMState *env, int s2ap, int xn) +static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) { int prot = 0; @@ -10009,9 +9921,32 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn) if (s2ap & 2) { prot |= PAGE_WRITE; } - if (!xn) { - if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) { + + if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) { + switch (xn) { + case 0: prot |= PAGE_EXEC; + break; + case 1: + if (s1_is_el0) { + prot |= PAGE_EXEC; + } + break; + case 2: + break; + case 3: + if (!s1_is_el0) { + prot |= PAGE_EXEC; + } + break; + default: + g_assert_not_reached(); + } + } else { + if (!extract32(xn, 1, 1)) { + if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) { + prot |= PAGE_EXEC; + } } } return prot; @@ -10142,8 +10077,10 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, pcacheattrs = &cacheattrs; } - ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_Stage2, &s2pa, - &txattrs, &s2prot, &s2size, fi, pcacheattrs); + ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, ARMMMUIdx_Stage2, + false, + &s2pa, &txattrs, &s2prot, &s2size, fi, + pcacheattrs); if (ret) { assert(fi->type != ARMFault_None); fi->s2addr = addr; @@ -10744,8 +10681,32 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, }; } +/** + * get_phys_addr_lpae: perform one stage of page table walk, LPAE format + * + * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, + * prot and page_size may not be filled in, and the populated fsr value provides + * information on why the translation aborted, in the format of a long-format + * DFSR/IFSR fault register, with the following caveats: + * * the WnR bit is never set (the caller must do this). + * + * @env: CPUARMState + * @address: virtual address to get physical address for + * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH + * @mmu_idx: MMU index indicating required translation regime + * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page table + * walk), must be true if this is stage 2 of a stage 1+2 walk for an + * EL0 access). If @mmu_idx is anything else, @s1_is_el0 is ignored. + * @phys_ptr: set to the physical address corresponding to the virtual address + * @attrs: set to the memory transaction attributes to use + * @prot: set to the permissions for the page containing phys_ptr + * @page_size_ptr: set to the size of the page containing phys_ptr + * @fi: set to fault info if the translation fails + * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes + */ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool s1_is_el0, hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, target_ulong *page_size_ptr, ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) @@ -10964,13 +10925,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, } ap = extract32(attrs, 4, 2); - xn = extract32(attrs, 12, 1); if (mmu_idx == ARMMMUIdx_Stage2) { ns = true; - *prot = get_S2prot(env, ap, xn); + xn = extract32(attrs, 11, 2); + *prot = get_S2prot(env, ap, xn, s1_is_el0); } else { ns = extract32(attrs, 3, 1); + xn = extract32(attrs, 12, 1); pxn = extract32(attrs, 11, 1); *prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); } @@ -11837,6 +11799,7 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, /* S1 is done. Now do S2 translation. */ ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_Stage2, + mmu_idx == ARMMMUIdx_E10_0, phys_ptr, attrs, &s2_prot, page_size, fi, cacheattrs != NULL ? &cacheattrs2 : NULL); @@ -11961,7 +11924,7 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, } if (regime_using_lpae_format(env, mmu_idx)) { - return get_phys_addr_lpae(env, address, access_type, mmu_idx, + return get_phys_addr_lpae(env, address, access_type, mmu_idx, false, phys_ptr, attrs, prot, page_size, fi, cacheattrs); } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) { diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode new file mode 100644 index 0000000000..ec3a92fe75 --- /dev/null +++ b/target/arm/neon-dp.decode @@ -0,0 +1,86 @@ +# AArch32 Neon data-processing instruction descriptions +# +# Copyright (c) 2020 Linaro, Ltd +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. + +# +# This file is processed by scripts/decodetree.py +# +# VFP/Neon register fields; same as vfp.decode +%vm_dp 5:1 0:4 +%vn_dp 7:1 16:4 +%vd_dp 22:1 12:4 + +# Encodings for Neon data processing instructions where the T32 encoding +# is a simple transformation of the A32 encoding. +# More specifically, this file covers instructions where the A32 encoding is +# 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq +# and the T32 encoding is +# 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq +# This file works on the A32 encoding only; calling code for T32 has to +# transform the insn into the A32 version first. + +###################################################################### +# 3-reg-same grouping: +# 1111 001 U 0 D sz:2 Vn:4 Vd:4 opc:4 N Q M op Vm:4 +###################################################################### + +&3same vm vn vd q size + +@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \ + &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp + +VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same +VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same + +@3same_logic .... ... . . . .. .... .... .... . q:1 .. .... \ + &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0 + +VAND_3s 1111 001 0 0 . 00 .... .... 0001 ... 1 .... @3same_logic +VBIC_3s 1111 001 0 0 . 01 .... .... 0001 ... 1 .... @3same_logic +VORR_3s 1111 001 0 0 . 10 .... .... 0001 ... 1 .... @3same_logic +VORN_3s 1111 001 0 0 . 11 .... .... 0001 ... 1 .... @3same_logic +VEOR_3s 1111 001 1 0 . 00 .... .... 0001 ... 1 .... @3same_logic +VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic +VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic +VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic + +VQSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 1 .... @3same +VQSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 1 .... @3same + +VCGT_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 0 .... @3same +VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same +VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same +VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same + +VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same +VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same + +VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same +VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same +VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same +VMIN_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 1 .... @3same + +VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same +VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same + +VTST_3s 1111 001 0 0 . .. .... .... 1000 . . . 1 .... @3same +VCEQ_3s 1111 001 1 0 . .. .... .... 1000 . . . 1 .... @3same + +VMLA_3s 1111 001 0 0 . .. .... .... 1001 . . . 0 .... @3same +VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same + +VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same +VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode new file mode 100644 index 0000000000..c7b03a72e8 --- /dev/null +++ b/target/arm/neon-ls.decode @@ -0,0 +1,52 @@ +# AArch32 Neon load/store instruction descriptions +# +# Copyright (c) 2020 Linaro, Ltd +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. + +# +# This file is processed by scripts/decodetree.py +# + +# Encodings for Neon load/store instructions where the T32 encoding +# is a simple transformation of the A32 encoding. +# More specifically, this file covers instructions where the A32 encoding is +# 0b1111_0100_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx +# and the T32 encoding is +# 0b1111_1001_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx +# This file works on the A32 encoding only; calling code for T32 has to +# transform the insn into the A32 version first. + +%vd_dp 22:1 12:4 + +# Neon load/store multiple structures + +VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \ + vd=%vd_dp + +# Neon load single element to all lanes + +VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \ + vd=%vd_dp + +# Neon load/store single structure to one lane +%imm1_5_p1 5:1 !function=plus1 +%imm1_6_p1 6:1 !function=plus1 + +VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 00 n:2 reg_idx:3 align:1 rm:4 \ + vd=%vd_dp size=0 stride=1 +VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 align:2 rm:4 \ + vd=%vd_dp size=1 stride=%imm1_5_p1 +VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 align:3 rm:4 \ + vd=%vd_dp size=2 stride=%imm1_6_p1 diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode new file mode 100644 index 0000000000..f297ba8cdf --- /dev/null +++ b/target/arm/neon-shared.decode @@ -0,0 +1,66 @@ +# AArch32 Neon instruction descriptions +# +# Copyright (c) 2020 Linaro, Ltd +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. + +# +# This file is processed by scripts/decodetree.py +# + +# Encodings for Neon instructions whose encoding is the same for +# both A32 and T32. + +# More specifically, this covers: +# 2reg scalar ext: 0b1111_1110_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx +# 3same ext: 0b1111_110x_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx + +# VFP/Neon register fields; same as vfp.decode +%vm_dp 5:1 0:4 +%vm_sp 0:4 5:1 +%vn_dp 7:1 16:4 +%vn_sp 16:4 7:1 +%vd_dp 22:1 12:4 +%vd_sp 12:4 22:1 + +VCMLA 1111 110 rot:2 . 1 size:1 .... .... 1000 . q:1 . 0 .... \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp + +VCADD 1111 110 rot:1 1 . 0 size:1 .... .... 1000 . q:1 . 0 .... \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp + +# VUDOT and VSDOT +VDOT 1111 110 00 . 10 .... .... 1101 . q:1 . u:1 .... \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp + +# VFM[AS]L +VFML 1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \ + vm=%vm_sp vn=%vn_sp vd=%vd_dp q=0 +VFML 1111 110 0 s:1 . 10 .... .... 1000 . 1 . 1 .... \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp q=1 + +VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \ + vn=%vn_dp vd=%vd_dp size=0 +VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp size=1 index=0 + +VDOT_scalar 1111 1110 0 . 10 .... .... 1101 . q:1 index:1 u:1 rm:4 \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp + +%vfml_scalar_q0_rm 0:3 5:1 +%vfml_scalar_q1_index 5:1 3:1 +VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 0 . 1 index:1 ... \ + rm=%vfml_scalar_q0_rm vn=%vn_sp vd=%vd_dp q=0 +VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 1 . 1 . rm:3 \ + index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp q=1 diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index efb1c4adc4..a896f9c4b8 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -70,23 +70,6 @@ typedef struct AArch64DecodeTable { AArch64DecodeFn *disas_fn; } AArch64DecodeTable; -/* Function prototype for gen_ functions for calling Neon helpers */ -typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32); -typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32); -typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); -typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64); -typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); -typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64); -typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64); -typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); -typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); -typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); -typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); -typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); -typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); -typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); -typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); - /* initialize TCG globals. */ void a64_translate_init(void) { diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index 4c2c91ae1b..f02fbb63a4 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -115,13 +115,4 @@ static inline int vec_full_reg_size(DisasContext *s) bool disas_sve(DisasContext *, uint32_t); -/* Note that the gvec expanders operate on offsets + sizes. */ -typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t); -typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t, - uint32_t, uint32_t); -typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t); -typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t); - #endif /* TARGET_ARM_TRANSLATE_A64_H */ diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c new file mode 100644 index 0000000000..50b77b6d71 --- /dev/null +++ b/target/arm/translate-neon.inc.c @@ -0,0 +1,714 @@ +/* + * ARM translation: AArch32 Neon instructions + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2005-2007 CodeSourcery + * Copyright (c) 2007 OpenedHand, Ltd. + * Copyright (c) 2020 Linaro, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * This file is intended to be included from translate.c; it uses + * some macros and definitions provided by that file. + * It might be possible to convert it to a standalone .c file eventually. + */ + +static inline int plus1(DisasContext *s, int x) +{ + return x + 1; +} + +/* Include the generated Neon decoder */ +#include "decode-neon-dp.inc.c" +#include "decode-neon-ls.inc.c" +#include "decode-neon-shared.inc.c" + +static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a) +{ + int opr_sz; + TCGv_ptr fpst; + gen_helper_gvec_3_ptr *fn_gvec_ptr; + + if (!dc_isar_feature(aa32_vcma, s) + || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(1); + fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + fpst, opr_sz, opr_sz, a->rot, + fn_gvec_ptr); + tcg_temp_free_ptr(fpst); + return true; +} + +static bool trans_VCADD(DisasContext *s, arg_VCADD *a) +{ + int opr_sz; + TCGv_ptr fpst; + gen_helper_gvec_3_ptr *fn_gvec_ptr; + + if (!dc_isar_feature(aa32_vcma, s) + || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(1); + fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + fpst, opr_sz, opr_sz, a->rot, + fn_gvec_ptr); + tcg_temp_free_ptr(fpst); + return true; +} + +static bool trans_VDOT(DisasContext *s, arg_VDOT *a) +{ + int opr_sz; + gen_helper_gvec_3 *fn_gvec; + + if (!dc_isar_feature(aa32_dp, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; + tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + opr_sz, opr_sz, 0, fn_gvec); + return true; +} + +static bool trans_VFML(DisasContext *s, arg_VFML *a) +{ + int opr_sz; + + if (!dc_isar_feature(aa32_fhm, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + (a->vd & 0x10)) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), + vfp_reg_offset(a->q, a->vn), + vfp_reg_offset(a->q, a->vm), + cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ + gen_helper_gvec_fmlal_a32); + return true; +} + +static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a) +{ + gen_helper_gvec_3_ptr *fn_gvec_ptr; + int opr_sz; + TCGv_ptr fpst; + + if (!dc_isar_feature(aa32_vcma, s)) { + return false; + } + if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vn) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx + : gen_helper_gvec_fcmlah_idx); + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(1); + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + fpst, opr_sz, opr_sz, + (a->index << 2) | a->rot, fn_gvec_ptr); + tcg_temp_free_ptr(fpst); + return true; +} + +static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a) +{ + gen_helper_gvec_3 *fn_gvec; + int opr_sz; + TCGv_ptr fpst; + + if (!dc_isar_feature(aa32_dp, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn) & 0x10)) { + return false; + } + + if ((a->vd | a->vn) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(1); + tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->rm), + opr_sz, opr_sz, a->index, fn_gvec); + tcg_temp_free_ptr(fpst); + return true; +} + +static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) +{ + int opr_sz; + + if (!dc_isar_feature(aa32_fhm, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), + vfp_reg_offset(a->q, a->vn), + vfp_reg_offset(a->q, a->rm), + cpu_env, opr_sz, opr_sz, + (a->index << 2) | a->s, /* is_2 == 0 */ + gen_helper_gvec_fmlal_idx_a32); + return true; +} + +static struct { + int nregs; + int interleave; + int spacing; +} const neon_ls_element_type[11] = { + {1, 4, 1}, + {1, 4, 2}, + {4, 1, 1}, + {2, 2, 2}, + {1, 3, 1}, + {1, 3, 2}, + {3, 1, 1}, + {1, 1, 1}, + {1, 2, 1}, + {1, 2, 2}, + {2, 1, 1} +}; + +static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn, + int stride) +{ + if (rm != 15) { + TCGv_i32 base; + + base = load_reg(s, rn); + if (rm == 13) { + tcg_gen_addi_i32(base, base, stride); + } else { + TCGv_i32 index; + index = load_reg(s, rm); + tcg_gen_add_i32(base, base, index); + tcg_temp_free_i32(index); + } + store_reg(s, rn, base); + } +} + +static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) +{ + /* Neon load/store multiple structures */ + int nregs, interleave, spacing, reg, n; + MemOp endian = s->be_data; + int mmu_idx = get_mem_index(s); + int size = a->size; + TCGv_i64 tmp64; + TCGv_i32 addr, tmp; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + if (a->itype > 10) { + return false; + } + /* Catch UNDEF cases for bad values of align field */ + switch (a->itype & 0xc) { + case 4: + if (a->align >= 2) { + return false; + } + break; + case 8: + if (a->align == 3) { + return false; + } + break; + default: + break; + } + nregs = neon_ls_element_type[a->itype].nregs; + interleave = neon_ls_element_type[a->itype].interleave; + spacing = neon_ls_element_type[a->itype].spacing; + if (size == 3 && (interleave | spacing) != 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + /* + * Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (interleave == 1 && endian == MO_LE) { + size = 3; + } + tmp64 = tcg_temp_new_i64(); + addr = tcg_temp_new_i32(); + tmp = tcg_const_i32(1 << size); + load_reg_var(s, addr, a->rn); + for (reg = 0; reg < nregs; reg++) { + for (n = 0; n < 8 >> size; n++) { + int xs; + for (xs = 0; xs < interleave; xs++) { + int tt = a->vd + reg + spacing * xs; + + if (a->l) { + gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); + neon_store_element64(tt, n, size, tmp64); + } else { + neon_load_element64(tmp64, tt, n, size); + gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); + } + tcg_gen_add_i32(addr, addr, tmp); + } + } + } + tcg_temp_free_i32(addr); + tcg_temp_free_i32(tmp); + tcg_temp_free_i64(tmp64); + + gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8); + return true; +} + +static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) +{ + /* Neon load single structure to all lanes */ + int reg, stride, vec_size; + int vd = a->vd; + int size = a->size; + int nregs = a->n + 1; + TCGv_i32 addr, tmp; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + + if (size == 3) { + if (nregs != 4 || a->a == 0) { + return false; + } + /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ + size = 2; + } + if (nregs == 1 && a->a == 1 && size == 0) { + return false; + } + if (nregs == 3 && a->a == 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * VLD1 to all lanes: T bit indicates how many Dregs to write. + * VLD2/3/4 to all lanes: T bit indicates register stride. + */ + stride = a->t ? 2 : 1; + vec_size = nregs == 1 ? stride * 8 : 8; + + tmp = tcg_temp_new_i32(); + addr = tcg_temp_new_i32(); + load_reg_var(s, addr, a->rn); + for (reg = 0; reg < nregs; reg++) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + if ((vd & 1) && vec_size == 16) { + /* + * We cannot write 16 bytes at once because the + * destination is unaligned. + */ + tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0), + 8, 8, tmp); + tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0), + neon_reg_offset(vd, 0), 8, 8); + } else { + tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0), + vec_size, vec_size, tmp); + } + tcg_gen_addi_i32(addr, addr, 1 << size); + vd += stride; + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(addr); + + gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs); + + return true; +} + +static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) +{ + /* Neon load/store single structure to one lane */ + int reg; + int nregs = a->n + 1; + int vd = a->vd; + TCGv_i32 addr, tmp; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + + /* Catch the UNDEF cases. This is unavoidably a bit messy. */ + switch (nregs) { + case 1: + if (((a->align & (1 << a->size)) != 0) || + (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) { + return false; + } + break; + case 3: + if ((a->align & 1) != 0) { + return false; + } + /* fall through */ + case 2: + if (a->size == 2 && (a->align & 2) != 0) { + return false; + } + break; + case 4: + if ((a->size == 2) && ((a->align & 3) == 3)) { + return false; + } + break; + default: + abort(); + } + if ((vd + a->stride * (nregs - 1)) > 31) { + /* + * Attempts to write off the end of the register file are + * UNPREDICTABLE; we choose to UNDEF because otherwise we would + * access off the end of the array that holds the register data. + */ + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = tcg_temp_new_i32(); + addr = tcg_temp_new_i32(); + load_reg_var(s, addr, a->rn); + /* + * TODO: if we implemented alignment exceptions, we should check + * addr against the alignment encoded in a->align here. + */ + for (reg = 0; reg < nregs; reg++) { + if (a->l) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | a->size); + neon_store_element(vd, a->reg_idx, a->size, tmp); + } else { /* Store */ + neon_load_element(tmp, vd, a->reg_idx, a->size); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), + s->be_data | a->size); + } + vd += a->stride; + tcg_gen_addi_i32(addr, addr, 1 << a->size); + } + tcg_temp_free_i32(addr); + tcg_temp_free_i32(tmp); + + gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs); + + return true; +} + +static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn) +{ + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rn_ofs = neon_reg_offset(a->vn, 0); + int rm_ofs = neon_reg_offset(a->vm, 0); + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return true; +} + +#define DO_3SAME(INSN, FUNC) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + return do_3same(s, a, FUNC); \ + } + +DO_3SAME(VADD, tcg_gen_gvec_add) +DO_3SAME(VSUB, tcg_gen_gvec_sub) +DO_3SAME(VAND, tcg_gen_gvec_and) +DO_3SAME(VBIC, tcg_gen_gvec_andc) +DO_3SAME(VORR, tcg_gen_gvec_or) +DO_3SAME(VORN, tcg_gen_gvec_orc) +DO_3SAME(VEOR, tcg_gen_gvec_xor) + +/* These insns are all gvec_bitsel but with the inputs in various orders. */ +#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ + static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \ + } \ + DO_3SAME(INSN, gen_##INSN##_3s) + +DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs) +DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs) +DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs) + +#define DO_3SAME_NO_SZ_3(INSN, FUNC) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size == 3) { \ + return false; \ + } \ + return do_3same(s, a, FUNC); \ + } + +DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax) +DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax) +DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin) +DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin) +DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul) + +#define DO_3SAME_CMP(INSN, COND) \ + static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \ + } \ + DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s) + +DO_3SAME_CMP(VCGT_S, TCG_COND_GT) +DO_3SAME_CMP(VCGT_U, TCG_COND_GTU) +DO_3SAME_CMP(VCGE_S, TCG_COND_GE) +DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) +DO_3SAME_CMP(VCEQ, TCG_COND_EQ) + +static void gen_VTST_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &cmtst_op[vece]); +} +DO_3SAME_NO_SZ_3(VTST, gen_VTST_3s) + +#define DO_3SAME_GVEC4(INSN, OPARRAY) \ + static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), \ + rn_ofs, rm_ofs, oprsz, maxsz, &OPARRAY[vece]); \ + } \ + DO_3SAME(INSN, gen_##INSN##_3s) + +DO_3SAME_GVEC4(VQADD_S, sqadd_op) +DO_3SAME_GVEC4(VQADD_U, uqadd_op) +DO_3SAME_GVEC4(VQSUB_S, sqsub_op) +DO_3SAME_GVEC4(VQSUB_U, uqsub_op) + +static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, + 0, gen_helper_gvec_pmul_b); +} + +static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) +{ + if (a->size != 0) { + return false; + } + return do_3same(s, a, gen_VMUL_p_3s); +} + +#define DO_3SAME_GVEC3_NO_SZ_3(INSN, OPARRAY) \ + static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \ + oprsz, maxsz, &OPARRAY[vece]); \ + } \ + DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s) + + +DO_3SAME_GVEC3_NO_SZ_3(VMLA, mla_op) +DO_3SAME_GVEC3_NO_SZ_3(VMLS, mls_op) + +#define DO_3SAME_GVEC3_SHIFT(INSN, OPARRAY) \ + static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + /* Note the operation is vshl vd,vm,vn */ \ + tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, \ + oprsz, maxsz, &OPARRAY[vece]); \ + } \ + DO_3SAME(INSN, gen_##INSN##_3s) + +DO_3SAME_GVEC3_SHIFT(VSHL_S, sshl_op) +DO_3SAME_GVEC3_SHIFT(VSHL_U, ushl_op) diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c index b087bbd812..e1a9017598 100644 --- a/target/arm/translate-vfp.inc.c +++ b/target/arm/translate-vfp.inc.c @@ -1872,12 +1872,6 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) return false; } - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - if (!vfp_access_check(s)) { return true; } diff --git a/target/arm/translate.c b/target/arm/translate.c index d4ad2028f1..025747c0bd 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -1313,8 +1313,9 @@ static TCGv_ptr vfp_reg_ptr(bool dp, int reg) #define ARM_CP_RW_BIT (1 << 20) -/* Include the VFP decoder */ +/* Include the VFP and Neon decoders */ #include "translate-vfp.inc.c" +#include "translate-neon.inc.c" static inline void iwmmxt_load_reg(TCGv_i64 var, int reg) { @@ -2609,8 +2610,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) } #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n)) -#define VFP_SREG(insn, bigbit, smallbit) \ - ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1)) #define VFP_DREG(reg, insn, bigbit, smallbit) do { \ if (dc_isar_feature(aa32_simd_r32, s)) { \ reg = (((insn) >> (bigbit)) & 0x0f) \ @@ -2621,11 +2620,8 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) reg = ((insn) >> (bigbit)) & 0x0f; \ }} while (0) -#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22) #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22) -#define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7) #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7) -#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5) #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5) static void gen_neon_dup_low16(TCGv_i32 var) @@ -3217,274 +3213,6 @@ static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1) tcg_temp_free_i32(rd); } - -static struct { - int nregs; - int interleave; - int spacing; -} const neon_ls_element_type[11] = { - {1, 4, 1}, - {1, 4, 2}, - {4, 1, 1}, - {2, 2, 2}, - {1, 3, 1}, - {1, 3, 2}, - {3, 1, 1}, - {1, 1, 1}, - {1, 2, 1}, - {1, 2, 2}, - {2, 1, 1} -}; - -/* Translate a NEON load/store element instruction. Return nonzero if the - instruction is invalid. */ -static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) -{ - int rd, rn, rm; - int op; - int nregs; - int interleave; - int spacing; - int stride; - int size; - int reg; - int load; - int n; - int vec_size; - int mmu_idx; - MemOp endian; - TCGv_i32 addr; - TCGv_i32 tmp; - TCGv_i32 tmp2; - TCGv_i64 tmp64; - - /* FIXME: this access check should not take precedence over UNDEF - * for invalid encodings; we will generate incorrect syndrome information - * for attempts to execute invalid vfp/neon encodings with FP disabled. - */ - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } - - if (!s->vfp_enabled) - return 1; - VFP_DREG_D(rd, insn); - rn = (insn >> 16) & 0xf; - rm = insn & 0xf; - load = (insn & (1 << 21)) != 0; - endian = s->be_data; - mmu_idx = get_mem_index(s); - if ((insn & (1 << 23)) == 0) { - /* Load store all elements. */ - op = (insn >> 8) & 0xf; - size = (insn >> 6) & 3; - if (op > 10) - return 1; - /* Catch UNDEF cases for bad values of align field */ - switch (op & 0xc) { - case 4: - if (((insn >> 5) & 1) == 1) { - return 1; - } - break; - case 8: - if (((insn >> 4) & 3) == 3) { - return 1; - } - break; - default: - break; - } - nregs = neon_ls_element_type[op].nregs; - interleave = neon_ls_element_type[op].interleave; - spacing = neon_ls_element_type[op].spacing; - if (size == 3 && (interleave | spacing) != 1) { - return 1; - } - /* For our purposes, bytes are always little-endian. */ - if (size == 0) { - endian = MO_LE; - } - /* Consecutive little-endian elements from a single register - * can be promoted to a larger little-endian operation. - */ - if (interleave == 1 && endian == MO_LE) { - size = 3; - } - tmp64 = tcg_temp_new_i64(); - addr = tcg_temp_new_i32(); - tmp2 = tcg_const_i32(1 << size); - load_reg_var(s, addr, rn); - for (reg = 0; reg < nregs; reg++) { - for (n = 0; n < 8 >> size; n++) { - int xs; - for (xs = 0; xs < interleave; xs++) { - int tt = rd + reg + spacing * xs; - - if (load) { - gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); - neon_store_element64(tt, n, size, tmp64); - } else { - neon_load_element64(tmp64, tt, n, size); - gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); - } - tcg_gen_add_i32(addr, addr, tmp2); - } - } - } - tcg_temp_free_i32(addr); - tcg_temp_free_i32(tmp2); - tcg_temp_free_i64(tmp64); - stride = nregs * interleave * 8; - } else { - size = (insn >> 10) & 3; - if (size == 3) { - /* Load single element to all lanes. */ - int a = (insn >> 4) & 1; - if (!load) { - return 1; - } - size = (insn >> 6) & 3; - nregs = ((insn >> 8) & 3) + 1; - - if (size == 3) { - if (nregs != 4 || a == 0) { - return 1; - } - /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */ - size = 2; - } - if (nregs == 1 && a == 1 && size == 0) { - return 1; - } - if (nregs == 3 && a == 1) { - return 1; - } - addr = tcg_temp_new_i32(); - load_reg_var(s, addr, rn); - - /* VLD1 to all lanes: bit 5 indicates how many Dregs to write. - * VLD2/3/4 to all lanes: bit 5 indicates register stride. - */ - stride = (insn & (1 << 5)) ? 2 : 1; - vec_size = nregs == 1 ? stride * 8 : 8; - - tmp = tcg_temp_new_i32(); - for (reg = 0; reg < nregs; reg++) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); - if ((rd & 1) && vec_size == 16) { - /* We cannot write 16 bytes at once because the - * destination is unaligned. - */ - tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), - 8, 8, tmp); - tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0), - neon_reg_offset(rd, 0), 8, 8); - } else { - tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), - vec_size, vec_size, tmp); - } - tcg_gen_addi_i32(addr, addr, 1 << size); - rd += stride; - } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(addr); - stride = (1 << size) * nregs; - } else { - /* Single element. */ - int idx = (insn >> 4) & 0xf; - int reg_idx; - switch (size) { - case 0: - reg_idx = (insn >> 5) & 7; - stride = 1; - break; - case 1: - reg_idx = (insn >> 6) & 3; - stride = (insn & (1 << 5)) ? 2 : 1; - break; - case 2: - reg_idx = (insn >> 7) & 1; - stride = (insn & (1 << 6)) ? 2 : 1; - break; - default: - abort(); - } - nregs = ((insn >> 8) & 3) + 1; - /* Catch the UNDEF cases. This is unavoidably a bit messy. */ - switch (nregs) { - case 1: - if (((idx & (1 << size)) != 0) || - (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) { - return 1; - } - break; - case 3: - if ((idx & 1) != 0) { - return 1; - } - /* fall through */ - case 2: - if (size == 2 && (idx & 2) != 0) { - return 1; - } - break; - case 4: - if ((size == 2) && ((idx & 3) == 3)) { - return 1; - } - break; - default: - abort(); - } - if ((rd + stride * (nregs - 1)) > 31) { - /* Attempts to write off the end of the register file - * are UNPREDICTABLE; we choose to UNDEF because otherwise - * the neon_load_reg() would write off the end of the array. - */ - return 1; - } - tmp = tcg_temp_new_i32(); - addr = tcg_temp_new_i32(); - load_reg_var(s, addr, rn); - for (reg = 0; reg < nregs; reg++) { - if (load) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); - neon_store_element(rd, reg_idx, size, tmp); - } else { /* Store */ - neon_load_element(tmp, rd, reg_idx, size); - gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); - } - rd += stride; - tcg_gen_addi_i32(addr, addr, 1 << size); - } - tcg_temp_free_i32(addr); - tcg_temp_free_i32(tmp); - stride = nregs * (1 << size); - } - } - if (rm != 15) { - TCGv_i32 base; - - base = load_reg(s, rn); - if (rm == 13) { - tcg_gen_addi_i32(base, base, stride); - } else { - TCGv_i32 index; - index = load_reg(s, rm); - tcg_gen_add_i32(base, base, index); - tcg_temp_free_i32(index); - } - store_reg(s, rn, base); - } - return 0; -} - static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src) { switch (size) { @@ -5002,6 +4730,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) TCGv_ptr ptr1, ptr2, ptr3; TCGv_i64 tmp64; + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return 1; + } + /* FIXME: this access check should not take precedence over UNDEF * for invalid encodings; we will generate incorrect syndrome information * for attempts to execute invalid vfp/neon encodings with FP disabled. @@ -5116,128 +4848,20 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } return 1; - case NEON_3R_LOGIC: /* Logic ops. */ - switch ((u << 2) | size) { - case 0: /* VAND */ - tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 1: /* VBIC */ - tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 2: /* VORR */ - tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 3: /* VORN */ - tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 4: /* VEOR */ - tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 5: /* VBSL */ - tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 6: /* VBIT */ - tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs, - vec_size, vec_size); - break; - case 7: /* VBIF */ - tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs, - vec_size, vec_size); - break; - } - return 0; - case NEON_3R_VADD_VSUB: - if (u) { - tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { - tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - - case NEON_3R_VQADD: - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, vec_size, vec_size, - (u ? uqadd_op : sqadd_op) + size); - return 0; - - case NEON_3R_VQSUB: - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, vec_size, vec_size, - (u ? uqsub_op : sqsub_op) + size); - return 0; - - case NEON_3R_VMUL: /* VMUL */ - if (u) { - /* Polynomial case allows only P8. */ - if (size != 0) { - return 1; - } - tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, - 0, gen_helper_gvec_pmul_b); - } else { - tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - - case NEON_3R_VML: /* VMLA, VMLS */ - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, - u ? &mls_op[size] : &mla_op[size]); - return 0; - + case NEON_3R_LOGIC: + case NEON_3R_VMAX: + case NEON_3R_VMIN: case NEON_3R_VTST_VCEQ: - if (u) { /* VCEQ */ - tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { /* VTST */ - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size, &cmtst_op[size]); - } - return 0; - case NEON_3R_VCGT: - tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size, - rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); - return 0; - case NEON_3R_VCGE: - tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size, - rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); - return 0; - - case NEON_3R_VMAX: - if (u) { - tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { - tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - case NEON_3R_VMIN: - if (u) { - tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { - tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - + case NEON_3R_VQADD: + case NEON_3R_VQSUB: + case NEON_3R_VMUL: + case NEON_3R_VML: case NEON_3R_VSHL: - /* Note the operation is vshl vd,vm,vn */ - tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size, - u ? &ushl_op[size] : &sshl_op[size]); - return 0; + /* Already handled by decodetree */ + return 1; } if (size == 3) { @@ -6016,7 +5640,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) {0, 0, 0, 0}, /* VMLSL */ {0, 0, 0, 9}, /* VQDMLSL */ {0, 0, 0, 0}, /* Integer VMULL */ - {0, 0, 0, 1}, /* VQDMULL */ + {0, 0, 0, 9}, /* VQDMULL */ {0, 0, 0, 0xa}, /* Polynomial VMULL */ {0, 0, 0, 7}, /* Reserved: always UNDEF */ }; @@ -7023,232 +6647,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 0; } -/* Advanced SIMD three registers of the same length extension. - * 31 25 23 22 20 16 12 11 10 9 8 3 0 - * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+ - * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm | - * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+ - */ -static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) -{ - gen_helper_gvec_3 *fn_gvec = NULL; - gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL; - int rd, rn, rm, opr_sz; - int data = 0; - int off_rn, off_rm; - bool is_long = false, q = extract32(insn, 6, 1); - bool ptr_is_env = false; - - if ((insn & 0xfe200f10) == 0xfc200800) { - /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */ - int size = extract32(insn, 20, 1); - data = extract32(insn, 23, 2); /* rot */ - if (!dc_isar_feature(aa32_vcma, s) - || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { - return 1; - } - fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; - } else if ((insn & 0xfea00f10) == 0xfc800800) { - /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */ - int size = extract32(insn, 20, 1); - data = extract32(insn, 24, 1); /* rot */ - if (!dc_isar_feature(aa32_vcma, s) - || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { - return 1; - } - fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; - } else if ((insn & 0xfeb00f00) == 0xfc200d00) { - /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */ - bool u = extract32(insn, 4, 1); - if (!dc_isar_feature(aa32_dp, s)) { - return 1; - } - fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; - } else if ((insn & 0xff300f10) == 0xfc200810) { - /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */ - int is_s = extract32(insn, 23, 1); - if (!dc_isar_feature(aa32_fhm, s)) { - return 1; - } - is_long = true; - data = is_s; /* is_2 == 0 */ - fn_gvec_ptr = gen_helper_gvec_fmlal_a32; - ptr_is_env = true; - } else { - return 1; - } - - VFP_DREG_D(rd, insn); - if (rd & q) { - return 1; - } - if (q || !is_long) { - VFP_DREG_N(rn, insn); - VFP_DREG_M(rm, insn); - if ((rn | rm) & q & !is_long) { - return 1; - } - off_rn = vfp_reg_offset(1, rn); - off_rm = vfp_reg_offset(1, rm); - } else { - rn = VFP_SREG_N(insn); - rm = VFP_SREG_M(insn); - off_rn = vfp_reg_offset(0, rn); - off_rm = vfp_reg_offset(0, rm); - } - - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } - if (!s->vfp_enabled) { - return 1; - } - - opr_sz = (1 + q) * 8; - if (fn_gvec_ptr) { - TCGv_ptr ptr; - if (ptr_is_env) { - ptr = cpu_env; - } else { - ptr = get_fpstatus_ptr(1); - } - tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr, - opr_sz, opr_sz, data, fn_gvec_ptr); - if (!ptr_is_env) { - tcg_temp_free_ptr(ptr); - } - } else { - tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm, - opr_sz, opr_sz, data, fn_gvec); - } - return 0; -} - -/* Advanced SIMD two registers and a scalar extension. - * 31 24 23 22 20 16 12 11 10 9 8 3 0 - * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+ - * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm | - * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+ - * - */ - -static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) -{ - gen_helper_gvec_3 *fn_gvec = NULL; - gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL; - int rd, rn, rm, opr_sz, data; - int off_rn, off_rm; - bool is_long = false, q = extract32(insn, 6, 1); - bool ptr_is_env = false; - - if ((insn & 0xff000f10) == 0xfe000800) { - /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */ - int rot = extract32(insn, 20, 2); - int size = extract32(insn, 23, 1); - int index; - - if (!dc_isar_feature(aa32_vcma, s)) { - return 1; - } - if (size == 0) { - if (!dc_isar_feature(aa32_fp16_arith, s)) { - return 1; - } - /* For fp16, rm is just Vm, and index is M. */ - rm = extract32(insn, 0, 4); - index = extract32(insn, 5, 1); - } else { - /* For fp32, rm is the usual M:Vm, and index is 0. */ - VFP_DREG_M(rm, insn); - index = 0; - } - data = (index << 2) | rot; - fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx - : gen_helper_gvec_fcmlah_idx); - } else if ((insn & 0xffb00f00) == 0xfe200d00) { - /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */ - int u = extract32(insn, 4, 1); - - if (!dc_isar_feature(aa32_dp, s)) { - return 1; - } - fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; - /* rm is just Vm, and index is M. */ - data = extract32(insn, 5, 1); /* index */ - rm = extract32(insn, 0, 4); - } else if ((insn & 0xffa00f10) == 0xfe000810) { - /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */ - int is_s = extract32(insn, 20, 1); - int vm20 = extract32(insn, 0, 3); - int vm3 = extract32(insn, 3, 1); - int m = extract32(insn, 5, 1); - int index; - - if (!dc_isar_feature(aa32_fhm, s)) { - return 1; - } - if (q) { - rm = vm20; - index = m * 2 + vm3; - } else { - rm = vm20 * 2 + m; - index = vm3; - } - is_long = true; - data = (index << 2) | is_s; /* is_2 == 0 */ - fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32; - ptr_is_env = true; - } else { - return 1; - } - - VFP_DREG_D(rd, insn); - if (rd & q) { - return 1; - } - if (q || !is_long) { - VFP_DREG_N(rn, insn); - if (rn & q & !is_long) { - return 1; - } - off_rn = vfp_reg_offset(1, rn); - off_rm = vfp_reg_offset(1, rm); - } else { - rn = VFP_SREG_N(insn); - off_rn = vfp_reg_offset(0, rn); - off_rm = vfp_reg_offset(0, rm); - } - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } - if (!s->vfp_enabled) { - return 1; - } - - opr_sz = (1 + q) * 8; - if (fn_gvec_ptr) { - TCGv_ptr ptr; - if (ptr_is_env) { - ptr = cpu_env; - } else { - ptr = get_fpstatus_ptr(1); - } - tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr, - opr_sz, opr_sz, data, fn_gvec_ptr); - if (!ptr_is_env) { - tcg_temp_free_ptr(ptr); - } - } else { - tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm, - opr_sz, opr_sz, data, fn_gvec); - } - return 0; -} - static int disas_coproc_insn(DisasContext *s, uint32_t insn) { int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2; @@ -10941,33 +10339,21 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) /* Unconditional instructions. */ /* TODO: Perhaps merge these into one decodetree output file. */ if (disas_a32_uncond(s, insn) || - disas_vfp_uncond(s, insn)) { + disas_vfp_uncond(s, insn) || + disas_neon_dp(s, insn) || + disas_neon_ls(s, insn) || + disas_neon_shared(s, insn)) { return; } /* fall back to legacy decoder */ if (((insn >> 25) & 7) == 1) { /* NEON Data processing. */ - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - goto illegal_op; - } - if (disas_neon_data_insn(s, insn)) { goto illegal_op; } return; } - if ((insn & 0x0f100000) == 0x04000000) { - /* NEON load/store. */ - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - goto illegal_op; - } - - if (disas_neon_ls_insn(s, insn)) { - goto illegal_op; - } - return; - } if ((insn & 0x0e000f00) == 0x0c000100) { if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { /* iWMMXt register transfer. */ @@ -10977,18 +10363,6 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) } } } - } else if ((insn & 0x0e000a00) == 0x0c000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - if (disas_neon_insn_3same_ext(s, insn)) { - goto illegal_op; - } - return; - } else if ((insn & 0x0f000a00) == 0x0e000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - if (disas_neon_insn_2reg_scalar_ext(s, insn)) { - goto illegal_op; - } - return; } goto illegal_op; } @@ -11102,6 +10476,33 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) ARCH(6T2); } + if ((insn & 0xef000000) == 0xef000000) { + /* + * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq + * transform into + * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq + */ + uint32_t a32_insn = (insn & 0xe2ffffff) | + ((insn & (1 << 28)) >> 4) | (1 << 28); + + if (disas_neon_dp(s, a32_insn)) { + return; + } + } + + if ((insn & 0xff100000) == 0xf9000000) { + /* + * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq + * transform into + * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq + */ + uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000; + + if (disas_neon_ls(s, a32_insn)) { + return; + } + } + /* * TODO: Perhaps merge these into one decodetree output file. * Note disas_vfp is written for a32 with cond field in the @@ -11109,6 +10510,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) */ if (disas_t32(s, insn) || disas_vfp_uncond(s, insn) || + disas_neon_shared(s, insn) || ((insn >> 28) == 0xe && disas_vfp(s, insn))) { return; } @@ -11138,19 +10540,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) } break; } - if ((insn & 0xfe000a00) == 0xfc000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - /* The Thumb2 and ARM encodings are identical. */ - if (disas_neon_insn_3same_ext(s, insn)) { - goto illegal_op; - } - } else if ((insn & 0xff000a00) == 0xfe000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - /* The Thumb2 and ARM encodings are identical. */ - if (disas_neon_insn_2reg_scalar_ext(s, insn)) { - goto illegal_op; - } - } else if (((insn >> 24) & 3) == 3) { + if (((insn >> 24) & 3) == 3) { /* Translate into the equivalent ARM encoding. */ insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28); if (disas_neon_data_insn(s, insn)) { @@ -11168,12 +10558,6 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) } break; case 12: - if ((insn & 0x01100000) == 0x01000000) { - if (disas_neon_ls_insn(s, insn)) { - goto illegal_op; - } - break; - } goto illegal_op; default: illegal_op: diff --git a/target/arm/translate.h b/target/arm/translate.h index 98b319f3f6..cb7925ea46 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -305,4 +305,30 @@ void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); #define dc_isar_feature(name, ctx) \ ({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); }) +/* Note that the gvec expanders operate on offsets + sizes. */ +typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t); +typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t, + uint32_t, uint32_t); +typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); +typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); + +/* Function prototype for gen_ functions for calling Neon helpers */ +typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32); +typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32); +typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); +typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64); +typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); +typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64); +typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64); +typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); +typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); +typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); +typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); +typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); +typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); +typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); +typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); + #endif /* TARGET_ARM_TRANSLATE_H */ |