diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-02-28 17:35:42 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-02-28 17:35:42 +0000 |
commit | 9403bccfe3e271f04e12c8c64d306e0cff589009 (patch) | |
tree | 060b865d955534528520291bc8bb51701a818385 | |
parent | 21ee7686d7b63c3e58a7ccab33315a8c543c7171 (diff) | |
parent | 1c9af3a9e05c1607a36df4943f8f5393d7621a91 (diff) |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20190228-1' into staging
target-arm queue:
* add MHU and dual-core support to Musca boards
* refactor some VFP insns to be gated by ID registers
* Revert "arm: Allow system registers for KVM guests to be changed by QEMU code"
* Implement ARMv8.2-FHM extension
* Advertise JSCVT via HWCAP for linux-user
# gpg: Signature made Thu 28 Feb 2019 11:06:55 GMT
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20190228-1:
linux-user: Enable HWCAP_ASIMDFHM, HWCAP_JSCVT
target/arm: Enable ARMv8.2-FHM for -cpu max
target/arm: Implement VFMAL and VFMSL for aarch32
target/arm: Implement FMLAL and FMLSL for aarch64
target/arm: Add helpers for FMLAL
Revert "arm: Allow system registers for KVM guests to be changed by QEMU code"
target/arm: Gate "miscellaneous FP" insns by ID register field
target/arm: Use MVFR1 feature bits to gate A32/T32 FP16 instructions
hw/arm/armsse: Unify init-svtor and cpuwait handling
hw/arm/iotkit-sysctl: Implement CPUWAIT and INITSVTOR*
hw/arm/iotkit-sysctl: Add SSE-200 registers
hw/misc/iotkit-sysctl: Correct typo in INITSVTOR0 register name
target/arm/arm-powerctl: Add new arm_set_cpu_on_and_reset()
target/arm/cpu: Allow init-svtor property to be set after realize
hw/arm/armsse: Wire up the MHUs
hw/misc/armsse-mhu.c: Model the SSE-200 Message Handling Unit
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | MAINTAINERS | 2 | ||||
-rw-r--r-- | default-configs/arm-softmmu.mak | 1 | ||||
-rw-r--r-- | hw/arm/armsse.c | 87 | ||||
-rw-r--r-- | hw/misc/Makefile.objs | 1 | ||||
-rw-r--r-- | hw/misc/armsse-mhu.c | 198 | ||||
-rw-r--r-- | hw/misc/iotkit-sysctl.c | 294 | ||||
-rw-r--r-- | hw/misc/trace-events | 4 | ||||
-rw-r--r-- | include/hw/arm/armsse.h | 3 | ||||
-rw-r--r-- | include/hw/misc/armsse-mhu.h | 44 | ||||
-rw-r--r-- | include/hw/misc/iotkit-sysctl.h | 25 | ||||
-rw-r--r-- | linux-user/elfload.c | 2 | ||||
-rw-r--r-- | target/arm/arm-powerctl.c | 56 | ||||
-rw-r--r-- | target/arm/arm-powerctl.h | 16 | ||||
-rw-r--r-- | target/arm/cpu.c | 32 | ||||
-rw-r--r-- | target/arm/cpu.h | 76 | ||||
-rw-r--r-- | target/arm/cpu64.c | 2 | ||||
-rw-r--r-- | target/arm/helper.c | 27 | ||||
-rw-r--r-- | target/arm/helper.h | 9 | ||||
-rw-r--r-- | target/arm/kvm32.c | 23 | ||||
-rw-r--r-- | target/arm/kvm64.c | 2 | ||||
-rw-r--r-- | target/arm/machine.c | 2 | ||||
-rw-r--r-- | target/arm/translate-a64.c | 49 | ||||
-rw-r--r-- | target/arm/translate.c | 180 | ||||
-rw-r--r-- | target/arm/vec_helper.c | 148 |
24 files changed, 1137 insertions, 146 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 8f9f9d7c7d..0a6190ecff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -633,6 +633,8 @@ F: hw/misc/iotkit-sysinfo.c F: include/hw/misc/iotkit-sysinfo.h F: hw/misc/armsse-cpuid.c F: include/hw/misc/armsse-cpuid.h +F: hw/misc/armsse-mhu.c +F: include/hw/misc/armsse-mhu.h Musca M: Peter Maydell <peter.maydell@linaro.org> diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index 87ad267494..bd6943b691 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -119,6 +119,7 @@ CONFIG_IOTKIT_SECCTL=y CONFIG_IOTKIT_SYSCTL=y CONFIG_IOTKIT_SYSINFO=y CONFIG_ARMSSE_CPUID=y +CONFIG_ARMSSE_MHU=y CONFIG_VERSATILE=y CONFIG_VERSATILE_PCI=y diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c index 129e7ea7fe..76cc690579 100644 --- a/hw/arm/armsse.c +++ b/hw/arm/armsse.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" +#include "qemu/bitops.h" #include "qapi/error.h" #include "trace.h" #include "hw/sysbus.h" @@ -29,6 +30,7 @@ struct ARMSSEInfo { int sram_banks; int num_cpus; uint32_t sys_version; + uint32_t cpuwait_rst; SysConfigFormat sys_config_format; bool has_mhus; bool has_ppus; @@ -43,6 +45,7 @@ static const ARMSSEInfo armsse_variants[] = { .sram_banks = 1, .num_cpus = 1, .sys_version = 0x41743, + .cpuwait_rst = 0, .sys_config_format = IoTKitFormat, .has_mhus = false, .has_ppus = false, @@ -55,6 +58,7 @@ static const ARMSSEInfo armsse_variants[] = { .sram_banks = 4, .num_cpus = 2, .sys_version = 0x22041743, + .cpuwait_rst = 2, .sys_config_format = SSE200Format, .has_mhus = true, .has_ppus = true, @@ -282,9 +286,9 @@ static void armsse_init(Object *obj) sizeof(s->sysinfo), TYPE_IOTKIT_SYSINFO); if (info->has_mhus) { sysbus_init_child_obj(obj, "mhu0", &s->mhu[0], sizeof(s->mhu[0]), - TYPE_UNIMPLEMENTED_DEVICE); + TYPE_ARMSSE_MHU); sysbus_init_child_obj(obj, "mhu1", &s->mhu[1], sizeof(s->mhu[1]), - TYPE_UNIMPLEMENTED_DEVICE); + TYPE_ARMSSE_MHU); } if (info->has_ppus) { for (i = 0; i < info->num_cpus; i++) { @@ -495,30 +499,33 @@ static void armsse_realize(DeviceState *dev, Error **errp) qdev_prop_set_uint32(cpudev, "num-irq", s->exp_numirq + 32); /* - * In real hardware the initial Secure VTOR is set from the INITSVTOR0 - * register in the IoT Kit System Control Register block, and the - * initial value of that is in turn specifiable by the FPGA that - * instantiates the IoT Kit. In QEMU we don't implement this wrinkle, - * and simply set the CPU's init-svtor to the IoT Kit default value. - * In SSE-200 the situation is similar, except that the default value - * is a reset-time signal input. Typically a board using the SSE-200 - * will have a system control processor whose boot firmware initializes - * the INITSVTOR* registers before powering up the CPUs in any case, - * so the hardware's default value doesn't matter. QEMU doesn't emulate + * In real hardware the initial Secure VTOR is set from the INITSVTOR* + * registers in the IoT Kit System Control Register block. In QEMU + * we set the initial value here, and also the reset value of the + * sysctl register, from this object's QOM init-svtor property. + * If the guest changes the INITSVTOR* registers at runtime then the + * code in iotkit-sysctl.c will update the CPU init-svtor property + * (which will then take effect on the next CPU warm-reset). + * + * Note that typically a board using the SSE-200 will have a system + * control processor whose boot firmware initializes the INITSVTOR* + * registers before powering up the CPUs. QEMU doesn't emulate * the control processor, so instead we behave in the way that the - * firmware does. The initial value is configurable by the board code - * to match whatever its firmware does. + * firmware does: the initial value should be set by the board code + * (using the init-svtor property on the ARMSSE object) to match + * whatever its firmware does. */ qdev_prop_set_uint32(cpudev, "init-svtor", s->init_svtor); /* - * Start all CPUs except CPU0 powered down. In real hardware it is - * a configurable property of the SSE-200 which CPUs start powered up - * (via the CPUWAIT0_RST and CPUWAIT1_RST parameters), but since all - * the boards we care about start CPU0 and leave CPU1 powered off, - * we hard-code that for now. We can add QOM properties for this + * CPUs start powered down if the corresponding bit in the CPUWAIT + * register is 1. In real hardware the CPUWAIT register reset value is + * a configurable property of the SSE-200 (via the CPUWAIT0_RST and + * CPUWAIT1_RST parameters), but since all the boards we care about + * start CPU0 and leave CPU1 powered off, we hard-code that in + * info->cpuwait_rst for now. We can add QOM properties for this * later if necessary. */ - if (i > 0) { + if (extract32(info->cpuwait_rst, i, 1)) { object_property_set_bool(cpuobj, true, "start-powered-off", &err); if (err) { error_propagate(errp, err); @@ -766,22 +773,28 @@ static void armsse_realize(DeviceState *dev, Error **errp) } if (info->has_mhus) { + /* + * An SSE-200 with only one CPU should have only one MHU created, + * with the region where the second MHU usually is being RAZ/WI. + * We don't implement that SSE-200 config; if we want to support + * it then this code needs to be enhanced to handle creating the + * RAZ/WI region instead of the second MHU. + */ + assert(info->num_cpus == ARRAY_SIZE(s->mhu)); + for (i = 0; i < ARRAY_SIZE(s->mhu); i++) { - char *name; char *port; + int cpunum; + SysBusDevice *mhu_sbd = SYS_BUS_DEVICE(&s->mhu[i]); - name = g_strdup_printf("MHU%d", i); - qdev_prop_set_string(DEVICE(&s->mhu[i]), "name", name); - qdev_prop_set_uint64(DEVICE(&s->mhu[i]), "size", 0x1000); object_property_set_bool(OBJECT(&s->mhu[i]), true, "realized", &err); - g_free(name); if (err) { error_propagate(errp, err); return; } port = g_strdup_printf("port[%d]", i + 3); - mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->mhu[i]), 0); + mr = sysbus_mmio_get_region(mhu_sbd, 0); object_property_set_link(OBJECT(&s->apb_ppc0), OBJECT(mr), port, &err); g_free(port); @@ -789,6 +802,20 @@ static void armsse_realize(DeviceState *dev, Error **errp) error_propagate(errp, err); return; } + + /* + * Each MHU has an irq line for each CPU: + * MHU 0 irq line 0 -> CPU 0 IRQ 6 + * MHU 0 irq line 1 -> CPU 1 IRQ 6 + * MHU 1 irq line 0 -> CPU 0 IRQ 7 + * MHU 1 irq line 1 -> CPU 1 IRQ 7 + */ + for (cpunum = 0; cpunum < info->num_cpus; cpunum++) { + DeviceState *cpudev = DEVICE(&s->armv7m[cpunum]); + + sysbus_connect_irq(mhu_sbd, cpunum, + qdev_get_gpio_in(cpudev, 6 + i)); + } } } @@ -977,6 +1004,14 @@ static void armsse_realize(DeviceState *dev, Error **errp) /* System information registers */ sysbus_mmio_map(SYS_BUS_DEVICE(&s->sysinfo), 0, 0x40020000); /* System control registers */ + object_property_set_int(OBJECT(&s->sysctl), info->sys_version, + "SYS_VERSION", &err); + object_property_set_int(OBJECT(&s->sysctl), info->cpuwait_rst, + "CPUWAIT_RST", &err); + object_property_set_int(OBJECT(&s->sysctl), s->init_svtor, + "INITSVTOR0_RST", &err); + object_property_set_int(OBJECT(&s->sysctl), s->init_svtor, + "INITSVTOR1_RST", &err); object_property_set_bool(OBJECT(&s->sysctl), true, "realized", &err); if (err) { error_propagate(errp, err); diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index 74c91d250c..c71e07ae35 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -70,6 +70,7 @@ obj-$(CONFIG_IOTKIT_SECCTL) += iotkit-secctl.o obj-$(CONFIG_IOTKIT_SYSCTL) += iotkit-sysctl.o obj-$(CONFIG_IOTKIT_SYSINFO) += iotkit-sysinfo.o obj-$(CONFIG_ARMSSE_CPUID) += armsse-cpuid.o +obj-$(CONFIG_ARMSSE_MHU) += armsse-mhu.o obj-$(CONFIG_PVPANIC) += pvpanic.o obj-$(CONFIG_AUX) += auxbus.o diff --git a/hw/misc/armsse-mhu.c b/hw/misc/armsse-mhu.c new file mode 100644 index 0000000000..9ebca32e9a --- /dev/null +++ b/hw/misc/armsse-mhu.c @@ -0,0 +1,198 @@ +/* + * ARM SSE-200 Message Handling Unit (MHU) + * + * Copyright (c) 2019 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the Message Handling Unit (MHU) which is part of the + * Arm SSE-200 and documented in + * http://infocenter.arm.com/help/topic/com.arm.doc.101104_0100_00_en/corelink_sse200_subsystem_for_embedded_technical_reference_manual_101104_0100_00_en.pdf + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "trace.h" +#include "qapi/error.h" +#include "sysemu/sysemu.h" +#include "hw/sysbus.h" +#include "hw/registerfields.h" +#include "hw/misc/armsse-mhu.h" + +REG32(CPU0INTR_STAT, 0x0) +REG32(CPU0INTR_SET, 0x4) +REG32(CPU0INTR_CLR, 0x8) +REG32(CPU1INTR_STAT, 0x10) +REG32(CPU1INTR_SET, 0x14) +REG32(CPU1INTR_CLR, 0x18) +REG32(PID4, 0xfd0) +REG32(PID5, 0xfd4) +REG32(PID6, 0xfd8) +REG32(PID7, 0xfdc) +REG32(PID0, 0xfe0) +REG32(PID1, 0xfe4) +REG32(PID2, 0xfe8) +REG32(PID3, 0xfec) +REG32(CID0, 0xff0) +REG32(CID1, 0xff4) +REG32(CID2, 0xff8) +REG32(CID3, 0xffc) + +/* Valid bits in the interrupt registers. If any are set the IRQ is raised */ +#define INTR_MASK 0xf + +/* PID/CID values */ +static const int armsse_mhu_id[] = { + 0x04, 0x00, 0x00, 0x00, /* PID4..PID7 */ + 0x56, 0xb8, 0x0b, 0x00, /* PID0..PID3 */ + 0x0d, 0xf0, 0x05, 0xb1, /* CID0..CID3 */ +}; + +static void armsse_mhu_update(ARMSSEMHU *s) +{ + qemu_set_irq(s->cpu0irq, s->cpu0intr != 0); + qemu_set_irq(s->cpu1irq, s->cpu1intr != 0); +} + +static uint64_t armsse_mhu_read(void *opaque, hwaddr offset, unsigned size) +{ + ARMSSEMHU *s = ARMSSE_MHU(opaque); + uint64_t r; + + switch (offset) { + case A_CPU0INTR_STAT: + r = s->cpu0intr; + break; + + case A_CPU1INTR_STAT: + r = s->cpu1intr; + break; + + case A_PID4 ... A_CID3: + r = armsse_mhu_id[(offset - A_PID4) / 4]; + break; + + case A_CPU0INTR_SET: + case A_CPU0INTR_CLR: + case A_CPU1INTR_SET: + case A_CPU1INTR_CLR: + qemu_log_mask(LOG_GUEST_ERROR, + "SSE MHU: read of write-only register at offset 0x%x\n", + (int)offset); + r = 0; + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "SSE MHU read: bad offset 0x%x\n", (int)offset); + r = 0; + break; + } + trace_armsse_mhu_read(offset, r, size); + return r; +} + +static void armsse_mhu_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + ARMSSEMHU *s = ARMSSE_MHU(opaque); + + trace_armsse_mhu_write(offset, value, size); + + switch (offset) { + case A_CPU0INTR_SET: + s->cpu0intr |= (value & INTR_MASK); + break; + case A_CPU0INTR_CLR: + s->cpu0intr &= ~(value & INTR_MASK); + break; + case A_CPU1INTR_SET: + s->cpu1intr |= (value & INTR_MASK); + break; + case A_CPU1INTR_CLR: + s->cpu1intr &= ~(value & INTR_MASK); + break; + + case A_CPU0INTR_STAT: + case A_CPU1INTR_STAT: + case A_PID4 ... A_CID3: + qemu_log_mask(LOG_GUEST_ERROR, + "SSE MHU: write to read-only register at offset 0x%x\n", + (int)offset); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "SSE MHU write: bad offset 0x%x\n", (int)offset); + break; + } + + armsse_mhu_update(s); +} + +static const MemoryRegionOps armsse_mhu_ops = { + .read = armsse_mhu_read, + .write = armsse_mhu_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static void armsse_mhu_reset(DeviceState *dev) +{ + ARMSSEMHU *s = ARMSSE_MHU(dev); + + s->cpu0intr = 0; + s->cpu1intr = 0; +} + +static const VMStateDescription armsse_mhu_vmstate = { + .name = "armsse-mhu", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(cpu0intr, ARMSSEMHU), + VMSTATE_UINT32(cpu1intr, ARMSSEMHU), + VMSTATE_END_OF_LIST() + }, +}; + +static void armsse_mhu_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + ARMSSEMHU *s = ARMSSE_MHU(obj); + + memory_region_init_io(&s->iomem, obj, &armsse_mhu_ops, + s, "armsse-mhu", 0x1000); + sysbus_init_mmio(sbd, &s->iomem); + sysbus_init_irq(sbd, &s->cpu0irq); + sysbus_init_irq(sbd, &s->cpu1irq); +} + +static void armsse_mhu_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = armsse_mhu_reset; + dc->vmsd = &armsse_mhu_vmstate; +} + +static const TypeInfo armsse_mhu_info = { + .name = TYPE_ARMSSE_MHU, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(ARMSSEMHU), + .instance_init = armsse_mhu_init, + .class_init = armsse_mhu_class_init, +}; + +static void armsse_mhu_register_types(void) +{ + type_register_static(&armsse_mhu_info); +} + +type_init(armsse_mhu_register_types); diff --git a/hw/misc/iotkit-sysctl.c b/hw/misc/iotkit-sysctl.c index a21d8bd678..54064a31ef 100644 --- a/hw/misc/iotkit-sysctl.c +++ b/hw/misc/iotkit-sysctl.c @@ -17,6 +17,7 @@ */ #include "qemu/osdep.h" +#include "qemu/bitops.h" #include "qemu/log.h" #include "trace.h" #include "qapi/error.h" @@ -24,19 +25,32 @@ #include "hw/sysbus.h" #include "hw/registerfields.h" #include "hw/misc/iotkit-sysctl.h" +#include "target/arm/arm-powerctl.h" +#include "target/arm/cpu.h" REG32(SECDBGSTAT, 0x0) REG32(SECDBGSET, 0x4) REG32(SECDBGCLR, 0x8) +REG32(SCSECCTRL, 0xc) +REG32(FCLK_DIV, 0x10) +REG32(SYSCLK_DIV, 0x14) +REG32(CLOCK_FORCE, 0x18) REG32(RESET_SYNDROME, 0x100) REG32(RESET_MASK, 0x104) REG32(SWRESET, 0x108) FIELD(SWRESET, SWRESETREQ, 9, 1) REG32(GRETREG, 0x10c) -REG32(INITSVRTOR0, 0x110) +REG32(INITSVTOR0, 0x110) +REG32(INITSVTOR1, 0x114) REG32(CPUWAIT, 0x118) -REG32(BUSWAIT, 0x11c) +REG32(NMI_ENABLE, 0x11c) /* BUSWAIT in IoTKit */ REG32(WICCTRL, 0x120) +REG32(EWCTRL, 0x124) +REG32(PDCM_PD_SYS_SENSE, 0x200) +REG32(PDCM_PD_SRAM0_SENSE, 0x20c) +REG32(PDCM_PD_SRAM1_SENSE, 0x210) +REG32(PDCM_PD_SRAM2_SENSE, 0x214) +REG32(PDCM_PD_SRAM3_SENSE, 0x218) REG32(PID4, 0xfd0) REG32(PID5, 0xfd4) REG32(PID6, 0xfd8) @@ -57,6 +71,21 @@ static const int sysctl_id[] = { 0x0d, 0xf0, 0x05, 0xb1, /* CID0..CID3 */ }; +/* + * Set the initial secure vector table offset address for the core. + * This will take effect when the CPU next resets. + */ +static void set_init_vtor(uint64_t cpuid, uint32_t vtor) +{ + Object *cpuobj = OBJECT(arm_get_cpu_by_id(cpuid)); + + if (cpuobj) { + if (object_property_find(cpuobj, "init-svtor", NULL)) { + object_property_set_uint(cpuobj, vtor, "init-svtor", &error_abort); + } + } +} + static uint64_t iotkit_sysctl_read(void *opaque, hwaddr offset, unsigned size) { @@ -67,6 +96,30 @@ static uint64_t iotkit_sysctl_read(void *opaque, hwaddr offset, case A_SECDBGSTAT: r = s->secure_debug; break; + case A_SCSECCTRL: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->scsecctrl; + break; + case A_FCLK_DIV: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->fclk_div; + break; + case A_SYSCLK_DIV: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->sysclk_div; + break; + case A_CLOCK_FORCE: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->clock_force; + break; case A_RESET_SYNDROME: r = s->reset_syndrome; break; @@ -76,19 +129,65 @@ static uint64_t iotkit_sysctl_read(void *opaque, hwaddr offset, case A_GRETREG: r = s->gretreg; break; - case A_INITSVRTOR0: - r = s->initsvrtor0; + case A_INITSVTOR0: + r = s->initsvtor0; + break; + case A_INITSVTOR1: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->initsvtor1; break; case A_CPUWAIT: r = s->cpuwait; break; - case A_BUSWAIT: - /* In IoTKit BUSWAIT is reserved, R/O, zero */ - r = 0; + case A_NMI_ENABLE: + /* In IoTKit this is named BUSWAIT but is marked reserved, R/O, zero */ + if (!s->is_sse200) { + r = 0; + break; + } + r = s->nmi_enable; break; case A_WICCTRL: r = s->wicctrl; break; + case A_EWCTRL: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->ewctrl; + break; + case A_PDCM_PD_SYS_SENSE: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->pdcm_pd_sys_sense; + break; + case A_PDCM_PD_SRAM0_SENSE: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->pdcm_pd_sram0_sense; + break; + case A_PDCM_PD_SRAM1_SENSE: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->pdcm_pd_sram1_sense; + break; + case A_PDCM_PD_SRAM2_SENSE: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->pdcm_pd_sram2_sense; + break; + case A_PDCM_PD_SRAM3_SENSE: + if (!s->is_sse200) { + goto bad_offset; + } + r = s->pdcm_pd_sram3_sense; + break; case A_PID4 ... A_CID3: r = sysctl_id[(offset - A_PID4) / 4]; break; @@ -101,6 +200,7 @@ static uint64_t iotkit_sysctl_read(void *opaque, hwaddr offset, r = 0; break; default: + bad_offset: qemu_log_mask(LOG_GUEST_ERROR, "IoTKit SysCtl read: bad offset %x\n", (int)offset); r = 0; @@ -145,12 +245,19 @@ static void iotkit_sysctl_write(void *opaque, hwaddr offset, */ s->gretreg = value; break; - case A_INITSVRTOR0: - qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl INITSVRTOR0 unimplemented\n"); - s->initsvrtor0 = value; + case A_INITSVTOR0: + s->initsvtor0 = value; + set_init_vtor(0, s->initsvtor0); break; case A_CPUWAIT: - qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl CPUWAIT unimplemented\n"); + if ((s->cpuwait & 1) && !(value & 1)) { + /* Powering up CPU 0 */ + arm_set_cpu_on_and_reset(0); + } + if ((s->cpuwait & 2) && !(value & 2)) { + /* Powering up CPU 1 */ + arm_set_cpu_on_and_reset(1); + } s->cpuwait = value; break; case A_WICCTRL: @@ -172,14 +279,105 @@ static void iotkit_sysctl_write(void *opaque, hwaddr offset, qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; - case A_BUSWAIT: /* In IoTKit BUSWAIT is reserved, R/O, zero */ + case A_SCSECCTRL: + if (!s->is_sse200) { + goto bad_offset; + } + qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl SCSECCTRL unimplemented\n"); + s->scsecctrl = value; + break; + case A_FCLK_DIV: + if (!s->is_sse200) { + goto bad_offset; + } + qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl FCLK_DIV unimplemented\n"); + s->fclk_div = value; + break; + case A_SYSCLK_DIV: + if (!s->is_sse200) { + goto bad_offset; + } + qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl SYSCLK_DIV unimplemented\n"); + s->sysclk_div = value; + break; + case A_CLOCK_FORCE: + if (!s->is_sse200) { + goto bad_offset; + } + qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl CLOCK_FORCE unimplemented\n"); + s->clock_force = value; + break; + case A_INITSVTOR1: + if (!s->is_sse200) { + goto bad_offset; + } + s->initsvtor1 = value; + set_init_vtor(1, s->initsvtor1); + break; + case A_EWCTRL: + if (!s->is_sse200) { + goto bad_offset; + } + qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl EWCTRL unimplemented\n"); + s->ewctrl = value; + break; + case A_PDCM_PD_SYS_SENSE: + if (!s->is_sse200) { + goto bad_offset; + } + qemu_log_mask(LOG_UNIMP, + "IoTKit SysCtl PDCM_PD_SYS_SENSE unimplemented\n"); + s->pdcm_pd_sys_sense = value; + break; + case A_PDCM_PD_SRAM0_SENSE: + if (!s->is_sse200) { + goto bad_offset; + } + qemu_log_mask(LOG_UNIMP, + "IoTKit SysCtl PDCM_PD_SRAM0_SENSE unimplemented\n"); + s->pdcm_pd_sram0_sense = value; + break; + case A_PDCM_PD_SRAM1_SENSE: + if (!s->is_sse200) { + goto bad_offset; + } + qemu_log_mask(LOG_UNIMP, + "IoTKit SysCtl PDCM_PD_SRAM1_SENSE unimplemented\n"); + s->pdcm_pd_sram1_sense = value; + break; + case A_PDCM_PD_SRAM2_SENSE: + if (!s->is_sse200) { + goto bad_offset; + } + qemu_log_mask(LOG_UNIMP, + "IoTKit SysCtl PDCM_PD_SRAM2_SENSE unimplemented\n"); + s->pdcm_pd_sram2_sense = value; + break; + case A_PDCM_PD_SRAM3_SENSE: + if (!s->is_sse200) { + goto bad_offset; + } + qemu_log_mask(LOG_UNIMP, + "IoTKit SysCtl PDCM_PD_SRAM3_SENSE unimplemented\n"); + s->pdcm_pd_sram3_sense = value; + break; + case A_NMI_ENABLE: + /* In IoTKit this is BUSWAIT: reserved, R/O, zero */ + if (!s->is_sse200) { + goto ro_offset; + } + qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl NMI_ENABLE unimplemented\n"); + s->nmi_enable = value; + break; case A_SECDBGSTAT: case A_PID4 ... A_CID3: + ro_offset: qemu_log_mask(LOG_GUEST_ERROR, "IoTKit SysCtl write: write of RO offset %x\n", (int)offset); break; default: + bad_offset: qemu_log_mask(LOG_GUEST_ERROR, "IoTKit SysCtl write: bad offset %x\n", (int)offset); break; @@ -206,9 +404,21 @@ static void iotkit_sysctl_reset(DeviceState *dev) s->reset_syndrome = 1; s->reset_mask = 0; s->gretreg = 0; - s->initsvrtor0 = 0x10000000; - s->cpuwait = 0; + s->initsvtor0 = s->initsvtor0_rst; + s->initsvtor1 = s->initsvtor1_rst; + s->cpuwait = s->cpuwait_rst; s->wicctrl = 0; + s->scsecctrl = 0; + s->fclk_div = 0; + s->sysclk_div = 0; + s->clock_force = 0; + s->nmi_enable = 0; + s->ewctrl = 0; + s->pdcm_pd_sys_sense = 0x7f; + s->pdcm_pd_sram0_sense = 0; + s->pdcm_pd_sram1_sense = 0; + s->pdcm_pd_sram2_sense = 0; + s->pdcm_pd_sram3_sense = 0; } static void iotkit_sysctl_init(Object *obj) @@ -221,6 +431,44 @@ static void iotkit_sysctl_init(Object *obj) sysbus_init_mmio(sbd, &s->iomem); } +static void iotkit_sysctl_realize(DeviceState *dev, Error **errp) +{ + IoTKitSysCtl *s = IOTKIT_SYSCTL(dev); + + /* The top 4 bits of the SYS_VERSION register tell us if we're an SSE-200 */ + if (extract32(s->sys_version, 28, 4) == 2) { + s->is_sse200 = true; + } +} + +static bool sse200_needed(void *opaque) +{ + IoTKitSysCtl *s = IOTKIT_SYSCTL(opaque); + + return s->is_sse200; +} + +static const VMStateDescription iotkit_sysctl_sse200_vmstate = { + .name = "iotkit-sysctl/sse-200", + .version_id = 1, + .minimum_version_id = 1, + .needed = sse200_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(scsecctrl, IoTKitSysCtl), + VMSTATE_UINT32(fclk_div, IoTKitSysCtl), + VMSTATE_UINT32(sysclk_div, IoTKitSysCtl), + VMSTATE_UINT32(clock_force, IoTKitSysCtl), + VMSTATE_UINT32(initsvtor1, IoTKitSysCtl), + VMSTATE_UINT32(nmi_enable, IoTKitSysCtl), + VMSTATE_UINT32(pdcm_pd_sys_sense, IoTKitSysCtl), + VMSTATE_UINT32(pdcm_pd_sram0_sense, IoTKitSysCtl), + VMSTATE_UINT32(pdcm_pd_sram1_sense, IoTKitSysCtl), + VMSTATE_UINT32(pdcm_pd_sram2_sense, IoTKitSysCtl), + VMSTATE_UINT32(pdcm_pd_sram3_sense, IoTKitSysCtl), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription iotkit_sysctl_vmstate = { .name = "iotkit-sysctl", .version_id = 1, @@ -230,19 +478,35 @@ static const VMStateDescription iotkit_sysctl_vmstate = { VMSTATE_UINT32(reset_syndrome, IoTKitSysCtl), VMSTATE_UINT32(reset_mask, IoTKitSysCtl), VMSTATE_UINT32(gretreg, IoTKitSysCtl), - VMSTATE_UINT32(initsvrtor0, IoTKitSysCtl), + VMSTATE_UINT32(initsvtor0, IoTKitSysCtl), VMSTATE_UINT32(cpuwait, IoTKitSysCtl), VMSTATE_UINT32(wicctrl, IoTKitSysCtl), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &iotkit_sysctl_sse200_vmstate, + NULL } }; +static Property iotkit_sysctl_props[] = { + DEFINE_PROP_UINT32("SYS_VERSION", IoTKitSysCtl, sys_version, 0), + DEFINE_PROP_UINT32("CPUWAIT_RST", IoTKitSysCtl, cpuwait_rst, 0), + DEFINE_PROP_UINT32("INITSVTOR0_RST", IoTKitSysCtl, initsvtor0_rst, + 0x10000000), + DEFINE_PROP_UINT32("INITSVTOR1_RST", IoTKitSysCtl, initsvtor1_rst, + 0x10000000), + DEFINE_PROP_END_OF_LIST() +}; + static void iotkit_sysctl_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->vmsd = &iotkit_sysctl_vmstate; dc->reset = iotkit_sysctl_reset; + dc->props = iotkit_sysctl_props; + dc->realize = iotkit_sysctl_realize; } static const TypeInfo iotkit_sysctl_info = { diff --git a/hw/misc/trace-events b/hw/misc/trace-events index b0701bddd3..c1795bb54b 100644 --- a/hw/misc/trace-events +++ b/hw/misc/trace-events @@ -136,3 +136,7 @@ iotkit_sysctl_reset(void) "IoTKit SysCtl: reset" # hw/misc/armsse-cpuid.c armsse_cpuid_read(uint64_t offset, uint64_t data, unsigned size) "SSE-200 CPU_IDENTITY read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" armsse_cpuid_write(uint64_t offset, uint64_t data, unsigned size) "SSE-200 CPU_IDENTITY write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" + +# hw/misc/armsse-mhu.c +armsse_mhu_read(uint64_t offset, uint64_t data, unsigned size) "SSE-200 MHU read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" +armsse_mhu_write(uint64_t offset, uint64_t data, unsigned size) "SSE-200 MHU write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h index 7ef871c7df..81e082cccf 100644 --- a/include/hw/arm/armsse.h +++ b/include/hw/arm/armsse.h @@ -95,6 +95,7 @@ #include "hw/misc/iotkit-sysctl.h" #include "hw/misc/iotkit-sysinfo.h" #include "hw/misc/armsse-cpuid.h" +#include "hw/misc/armsse-mhu.h" #include "hw/misc/unimp.h" #include "hw/or-irq.h" #include "hw/core/split-irq.h" @@ -166,7 +167,7 @@ typedef struct ARMSSE { IoTKitSysCtl sysctl; IoTKitSysCtl sysinfo; - UnimplementedDeviceState mhu[2]; + ARMSSEMHU mhu[2]; UnimplementedDeviceState ppu[NUM_PPUS]; UnimplementedDeviceState cachectrl[SSE_MAX_CPUS]; UnimplementedDeviceState cpusecctrl[SSE_MAX_CPUS]; diff --git a/include/hw/misc/armsse-mhu.h b/include/hw/misc/armsse-mhu.h new file mode 100644 index 0000000000..e57eafc252 --- /dev/null +++ b/include/hw/misc/armsse-mhu.h @@ -0,0 +1,44 @@ +/* + * ARM SSE-200 Message Handling Unit (MHU) + * + * Copyright (c) 2019 Linaro Limited + * Written by Peter Maydell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the Message Handling Unit (MHU) which is part of the + * Arm SSE-200 and documented in + * http://infocenter.arm.com/help/topic/com.arm.doc.101104_0100_00_en/corelink_sse200_subsystem_for_embedded_technical_reference_manual_101104_0100_00_en.pdf + * + * QEMU interface: + * + sysbus MMIO region 0: the system information register bank + * + sysbus IRQ 0: interrupt for CPU 0 + * + sysbus IRQ 1: interrupt for CPU 1 + */ + +#ifndef HW_MISC_SSE_MHU_H +#define HW_MISC_SSE_MHU_H + +#include "hw/sysbus.h" + +#define TYPE_ARMSSE_MHU "armsse-mhu" +#define ARMSSE_MHU(obj) OBJECT_CHECK(ARMSSEMHU, (obj), TYPE_ARMSSE_MHU) + +typedef struct ARMSSEMHU { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; + qemu_irq cpu0irq; + qemu_irq cpu1irq; + + uint32_t cpu0intr; + uint32_t cpu1intr; +} ARMSSEMHU; + +#endif diff --git a/include/hw/misc/iotkit-sysctl.h b/include/hw/misc/iotkit-sysctl.h index e36613cb5e..601c8ecc0d 100644 --- a/include/hw/misc/iotkit-sysctl.h +++ b/include/hw/misc/iotkit-sysctl.h @@ -17,6 +17,9 @@ * "system control register" blocks. * * QEMU interface: + * + QOM property "SYS_VERSION": value of the SYS_VERSION register of the + * system information block of the SSE + * (used to identify whether to provide SSE-200-only registers) * + sysbus MMIO region 0: the system information register bank * + sysbus MMIO region 1: the system control register bank */ @@ -41,9 +44,29 @@ typedef struct IoTKitSysCtl { uint32_t reset_syndrome; uint32_t reset_mask; uint32_t gretreg; - uint32_t initsvrtor0; + uint32_t initsvtor0; uint32_t cpuwait; uint32_t wicctrl; + uint32_t scsecctrl; + uint32_t fclk_div; + uint32_t sysclk_div; + uint32_t clock_force; + uint32_t initsvtor1; + uint32_t nmi_enable; + uint32_t ewctrl; + uint32_t pdcm_pd_sys_sense; + uint32_t pdcm_pd_sram0_sense; + uint32_t pdcm_pd_sram1_sense; + uint32_t pdcm_pd_sram2_sense; + uint32_t pdcm_pd_sram3_sense; + + /* Properties */ + uint32_t sys_version; + uint32_t cpuwait_rst; + uint32_t initsvtor0_rst; + uint32_t initsvtor1_rst; + + bool is_sse200; } IoTKitSysCtl; #endif diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 3a50d587ff..b9f7cbbdc1 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -602,6 +602,8 @@ static uint32_t get_elf_hwcap(void) GET_FEATURE_ID(aa64_fcma, ARM_HWCAP_A64_FCMA); GET_FEATURE_ID(aa64_sve, ARM_HWCAP_A64_SVE); GET_FEATURE_ID(aa64_pauth, ARM_HWCAP_A64_PACA | ARM_HWCAP_A64_PACG); + GET_FEATURE_ID(aa64_fhm, ARM_HWCAP_A64_ASIMDFHM); + GET_FEATURE_ID(aa64_jscvt, ARM_HWCAP_A64_JSCVT); #undef GET_FEATURE_ID diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index f9de5164e5..f77a950db6 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -228,6 +228,62 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, return QEMU_ARM_POWERCTL_RET_SUCCESS; } +static void arm_set_cpu_on_and_reset_async_work(CPUState *target_cpu_state, + run_on_cpu_data data) +{ + ARMCPU *target_cpu = ARM_CPU(target_cpu_state); + + /* Initialize the cpu we are turning on */ + cpu_reset(target_cpu_state); + target_cpu_state->halted = 0; + + /* Finally set the power status */ + assert(qemu_mutex_iothread_locked()); + target_cpu->power_state = PSCI_ON; +} + +int arm_set_cpu_on_and_reset(uint64_t cpuid) +{ + CPUState *target_cpu_state; + ARMCPU *target_cpu; + + assert(qemu_mutex_iothread_locked()); + + /* Retrieve the cpu we are powering up */ + target_cpu_state = arm_get_cpu_by_id(cpuid); + if (!target_cpu_state) { + /* The cpu was not found */ + return QEMU_ARM_POWERCTL_INVALID_PARAM; + } + + target_cpu = ARM_CPU(target_cpu_state); + if (target_cpu->power_state == PSCI_ON) { + qemu_log_mask(LOG_GUEST_ERROR, + "[ARM]%s: CPU %" PRId64 " is already on\n", + __func__, cpuid); + return QEMU_ARM_POWERCTL_ALREADY_ON; + } + + /* + * If another CPU has powered the target on we are in the state + * ON_PENDING and additional attempts to power on the CPU should + * fail (see 6.6 Implementation CPU_ON/CPU_OFF races in the PSCI + * spec) + */ + if (target_cpu->power_state == PSCI_ON_PENDING) { + qemu_log_mask(LOG_GUEST_ERROR, + "[ARM]%s: CPU %" PRId64 " is already powering on\n", + __func__, cpuid); + return QEMU_ARM_POWERCTL_ON_PENDING; + } + + async_run_on_cpu(target_cpu_state, arm_set_cpu_on_and_reset_async_work, + RUN_ON_CPU_NULL); + + /* We are good to go */ + return QEMU_ARM_POWERCTL_RET_SUCCESS; +} + static void arm_set_cpu_off_async_work(CPUState *target_cpu_state, run_on_cpu_data data) { diff --git a/target/arm/arm-powerctl.h b/target/arm/arm-powerctl.h index 04353923c0..37c8a04f0a 100644 --- a/target/arm/arm-powerctl.h +++ b/target/arm/arm-powerctl.h @@ -74,4 +74,20 @@ int arm_set_cpu_off(uint64_t cpuid); */ int arm_reset_cpu(uint64_t cpuid); +/* + * arm_set_cpu_on_and_reset: + * @cpuid: the id of the CPU we want to star + * + * Start the cpu designated by @cpuid and put it through its normal + * CPU reset process. The CPU will start in the way it is architected + * to start after a power-on reset. + * + * Returns: QEMU_ARM_POWERCTL_RET_SUCCESS on success. + * QEMU_ARM_POWERCTL_INVALID_PARAM if there is no CPU with that ID. + * QEMU_ARM_POWERCTL_ALREADY_ON if the CPU is already on. + * QEMU_ARM_POWERCTL_ON_PENDING if the CPU is already partway through + * powering on. + */ +int arm_set_cpu_on_and_reset(uint64_t cpuid); + #endif diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 8ea6569088..54b61f917b 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -22,6 +22,7 @@ #include "target/arm/idau.h" #include "qemu/error-report.h" #include "qapi/error.h" +#include "qapi/visitor.h" #include "cpu.h" #include "internals.h" #include "qemu-common.h" @@ -771,9 +772,21 @@ static Property arm_cpu_pmsav7_dregion_property = pmsav7_dregion, qdev_prop_uint32, uint32_t); -/* M profile: initial value of the Secure VTOR */ -static Property arm_cpu_initsvtor_property = - DEFINE_PROP_UINT32("init-svtor", ARMCPU, init_svtor, 0); +static void arm_get_init_svtor(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + + visit_type_uint32(v, name, &cpu->init_svtor, errp); +} + +static void arm_set_init_svtor(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + + visit_type_uint32(v, name, &cpu->init_svtor, errp); +} void arm_cpu_post_init(Object *obj) { @@ -845,8 +858,14 @@ void arm_cpu_post_init(Object *obj) qdev_prop_allow_set_link_before_realize, OBJ_PROP_LINK_STRONG, &error_abort); - qdev_property_add_static(DEVICE(obj), &arm_cpu_initsvtor_property, - &error_abort); + /* + * M profile: initial value of the Secure VTOR. We can't just use + * a simple DEFINE_PROP_UINT32 for this because we want to permit + * the property to be set after realize. + */ + object_property_add(obj, "init-svtor", "uint32", + arm_get_init_svtor, arm_set_init_svtor, + NULL, NULL, &error_abort); } qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property, @@ -995,7 +1014,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (arm_feature(env, ARM_FEATURE_VFP4)) { set_feature(env, ARM_FEATURE_VFP3); - set_feature(env, ARM_FEATURE_VFP_FP16); } if (arm_feature(env, ARM_FEATURE_VFP3)) { set_feature(env, ARM_FEATURE_VFP); @@ -1656,7 +1674,6 @@ static void cortex_a9_initfn(Object *obj) cpu->dtb_compatible = "arm,cortex-a9"; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_VFP3); - set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); set_feature(&cpu->env, ARM_FEATURE_EL3); @@ -2003,6 +2020,7 @@ static void arm_max_initfn(Object *obj) t = cpu->isar.id_isar6; t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1); t = FIELD_DP32(t, ID_ISAR6, DP, 1); + t = FIELD_DP32(t, ID_ISAR6, FHM, 1); cpu->isar.id_isar6 = t; t = cpu->id_mmfr4; diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 1eea1a408b..36cd365efa 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1730,6 +1730,27 @@ FIELD(ID_DFR0, MPROFDBG, 20, 4) FIELD(ID_DFR0, PERFMON, 24, 4) FIELD(ID_DFR0, TRACEFILT, 28, 4) +FIELD(MVFR0, SIMDREG, 0, 4) +FIELD(MVFR0, FPSP, 4, 4) +FIELD(MVFR0, FPDP, 8, 4) +FIELD(MVFR0, FPTRAP, 12, 4) +FIELD(MVFR0, FPDIVIDE, 16, 4) +FIELD(MVFR0, FPSQRT, 20, 4) +FIELD(MVFR0, FPSHVEC, 24, 4) +FIELD(MVFR0, FPROUND, 28, 4) + +FIELD(MVFR1, FPFTZ, 0, 4) +FIELD(MVFR1, FPDNAN, 4, 4) +FIELD(MVFR1, SIMDLS, 8, 4) +FIELD(MVFR1, SIMDINT, 12, 4) +FIELD(MVFR1, SIMDSP, 16, 4) +FIELD(MVFR1, SIMDHP, 20, 4) +FIELD(MVFR1, FPHP, 24, 4) +FIELD(MVFR1, SIMDFMAC, 28, 4) + +FIELD(MVFR2, SIMDMISC, 0, 4) +FIELD(MVFR2, FPMISC, 4, 4) + QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); /* If adding a feature bit which corresponds to a Linux ELF @@ -1747,7 +1768,6 @@ enum arm_features { ARM_FEATURE_THUMB2, ARM_FEATURE_PMSA, /* no MMU; may have Memory Protection Unit */ ARM_FEATURE_VFP3, - ARM_FEATURE_VFP_FP16, ARM_FEATURE_NEON, ARM_FEATURE_M, /* Microcontroller profile. */ ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */ @@ -2538,25 +2558,18 @@ bool write_list_to_cpustate(ARMCPU *cpu); /** * write_cpustate_to_list: * @cpu: ARMCPU - * @kvm_sync: true if this is for syncing back to KVM * * For each register listed in the ARMCPU cpreg_indexes list, write * its value from the ARMCPUState structure into the cpreg_values list. * This is used to copy info from TCG's working data structures into * KVM or for outbound migration. * - * @kvm_sync is true if we are doing this in order to sync the - * register state back to KVM. In this case we will only update - * values in the list if the previous list->cpustate sync actually - * successfully wrote the CPU state. Otherwise we will keep the value - * that is in the list. - * * Returns: true if all register values were read correctly, * false if some register was unknown or could not be read. * Note that we do not stop early on failure -- we will attempt * reading all registers in the list. */ -bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); +bool write_cpustate_to_list(ARMCPU *cpu); #define ARM_CPUID_TI915T 0x54029152 #define ARM_CPUID_TI925T 0x54029252 @@ -3283,6 +3296,11 @@ static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; } +static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0; +} + static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) { /* @@ -3294,6 +3312,41 @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) } /* + * We always set the FP and SIMD FP16 fields to indicate identical + * levels of support (assuming SIMD is implemented at all), so + * we only need one set of accessors. + */ +static inline bool isar_feature_aa32_fp16_spconv(const ARMISARegisters *id) +{ + return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 0; +} + +static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id) +{ + return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 1; +} + +static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id) +{ + return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 1; +} + +static inline bool isar_feature_aa32_vcvt_dr(const ARMISARegisters *id) +{ + return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 2; +} + +static inline bool isar_feature_aa32_vrint(const ARMISARegisters *id) +{ + return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 3; +} + +static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 4; +} + +/* * 64-bit feature tests via id registers. */ static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) @@ -3356,6 +3409,11 @@ static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; } +static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0; +} + static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0; diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 69e4134f79..1b0c427277 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -308,6 +308,7 @@ static void aarch64_max_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1); t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 1); t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 1); cpu->isar.id_aa64isar0 = t; t = cpu->isar.id_aa64isar1; @@ -347,6 +348,7 @@ static void aarch64_max_initfn(Object *obj) u = cpu->isar.id_isar6; u = FIELD_DP32(u, ID_ISAR6, JSCVT, 1); u = FIELD_DP32(u, ID_ISAR6, DP, 1); + u = FIELD_DP32(u, ID_ISAR6, FHM, 1); cpu->isar.id_isar6 = u; /* diff --git a/target/arm/helper.c b/target/arm/helper.c index fbaa801cea..1fa282a7fc 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -265,7 +265,7 @@ static bool raw_accessors_invalid(const ARMCPRegInfo *ri) return true; } -bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) +bool write_cpustate_to_list(ARMCPU *cpu) { /* Write the coprocessor state from cpu->env to the (index,value) list. */ int i; @@ -274,7 +274,6 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) for (i = 0; i < cpu->cpreg_array_len; i++) { uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]); const ARMCPRegInfo *ri; - uint64_t newval; ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); if (!ri) { @@ -284,29 +283,7 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) if (ri->type & ARM_CP_NO_RAW) { continue; } - - newval = read_raw_cp_reg(&cpu->env, ri); - if (kvm_sync) { - /* - * Only sync if the previous list->cpustate sync succeeded. - * Rather than tracking the success/failure state for every - * item in the list, we just recheck "does the raw write we must - * have made in write_list_to_cpustate() read back OK" here. - */ - uint64_t oldval = cpu->cpreg_values[i]; - - if (oldval == newval) { - continue; - } - - write_raw_cp_reg(&cpu->env, ri, oldval); - if (read_raw_cp_reg(&cpu->env, ri) != oldval) { - continue; - } - - write_raw_cp_reg(&cpu->env, ri, newval); - } - cpu->cpreg_values[i] = newval; + cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri); } return ok; } diff --git a/target/arm/helper.h b/target/arm/helper.h index 747cb64d29..d363904278 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -677,6 +677,15 @@ DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmlal_a64, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a32, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a64, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index a75e04cc8f..50327989dc 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -125,9 +125,6 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) if (extract32(id_pfr0, 12, 4) == 1) { set_feature(&features, ARM_FEATURE_THUMB2EE); } - if (extract32(ahcf->isar.mvfr1, 20, 4) == 1) { - set_feature(&features, ARM_FEATURE_VFP_FP16); - } if (extract32(ahcf->isar.mvfr1, 12, 4) == 1) { set_feature(&features, ARM_FEATURE_NEON); } @@ -387,8 +384,24 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } - write_cpustate_to_list(cpu, true); - + /* Note that we do not call write_cpustate_to_list() + * here, so we are only writing the tuple list back to + * KVM. This is safe because nothing can change the + * CPUARMState cp15 fields (in particular gdb accesses cannot) + * and so there are no changes to sync. In fact syncing would + * be wrong at this point: for a constant register where TCG and + * KVM disagree about its value, the preceding write_list_to_cpustate() + * would not have had any effect on the CPUARMState value (since the + * register is read-only), and a write_cpustate_to_list() here would + * then try to write the TCG value back into KVM -- this would either + * fail or incorrectly change the value the guest sees. + * + * If we ever want to allow the user to modify cp15 registers via + * the gdb stub, we would need to be more clever here (for instance + * tracking the set of registers kvm_arch_get_registers() successfully + * managed to update the CPUARMState with, and only allowing those + * to be written back up into the kernel). + */ if (!write_list_to_kvmstate(cpu, level)) { return EINVAL; } diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index e3ba149248..089af9c5f0 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -838,8 +838,6 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } - write_cpustate_to_list(cpu, true); - if (!write_list_to_kvmstate(cpu, level)) { return EINVAL; } diff --git a/target/arm/machine.c b/target/arm/machine.c index 124192bfc2..b292549614 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -630,7 +630,7 @@ static int cpu_pre_save(void *opaque) abort(); } } else { - if (!write_cpustate_to_list(cpu, false)) { + if (!write_cpustate_to_list(cpu)) { /* This should never fail. */ abort(); } diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index c56e878787..d3c8eaf089 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -10917,9 +10917,29 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } - handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); return; + + case 0x1d: /* FMLAL */ + case 0x3d: /* FMLSL */ + case 0x59: /* FMLAL2 */ + case 0x79: /* FMLSL2 */ + if (size & 1 || !dc_isar_feature(aa64_fhm, s)) { + unallocated_encoding(s); + return; + } + if (fp_access_check(s)) { + int is_s = extract32(insn, 23, 1); + int is_2 = extract32(insn, 29, 1); + int data = (is_2 << 1) | is_s; + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), cpu_env, + is_q ? 16 : 8, vec_full_reg_size(s), + data, gen_helper_gvec_fmlal_a64); + } + return; + default: unallocated_encoding(s); return; @@ -12739,6 +12759,17 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } is_fp = 2; break; + case 0x00: /* FMLAL */ + case 0x04: /* FMLSL */ + case 0x18: /* FMLAL2 */ + case 0x1c: /* FMLSL2 */ + if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) { + unallocated_encoding(s); + return; + } + size = MO_16; + /* is_fp, but we pass cpu_env not fp_status. */ + break; default: unallocated_encoding(s); return; @@ -12849,6 +12880,22 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(fpst); } return; + + case 0x00: /* FMLAL */ + case 0x04: /* FMLSL */ + case 0x18: /* FMLAL2 */ + case 0x1c: /* FMLSL2 */ + { + int is_s = extract32(opcode, 2, 1); + int is_2 = u; + int data = (index << 2) | (is_2 << 1) | is_s; + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), cpu_env, + is_q ? 16 : 8, vec_full_reg_size(s), + data, gen_helper_gvec_fmlal_idx_a64); + } + return; } if (size == 3) { diff --git a/target/arm/translate.c b/target/arm/translate.c index c1175798ac..8f7f5b95aa 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -3357,14 +3357,10 @@ static const uint8_t fp_decode_rm[] = { FPROUNDING_NEGINF, }; -static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn) +static int disas_vfp_misc_insn(DisasContext *s, uint32_t insn) { uint32_t rd, rn, rm, dp = extract32(insn, 8, 1); - if (!arm_dc_feature(s, ARM_FEATURE_V8)) { - return 1; - } - if (dp) { VFP_DREG_D(rd, insn); VFP_DREG_N(rn, insn); @@ -3375,15 +3371,18 @@ static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn) rm = VFP_SREG_M(insn); } - if ((insn & 0x0f800e50) == 0x0e000a00) { + if ((insn & 0x0f800e50) == 0x0e000a00 && dc_isar_feature(aa32_vsel, s)) { return handle_vsel(insn, rd, rn, rm, dp); - } else if ((insn & 0x0fb00e10) == 0x0e800a00) { + } else if ((insn & 0x0fb00e10) == 0x0e800a00 && + dc_isar_feature(aa32_vminmaxnm, s)) { return handle_vminmaxnm(insn, rd, rn, rm, dp); - } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) { + } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40 && + dc_isar_feature(aa32_vrint, s)) { /* VRINTA, VRINTN, VRINTP, VRINTM */ int rounding = fp_decode_rm[extract32(insn, 16, 2)]; return handle_vrint(insn, rd, rm, dp, rounding); - } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) { + } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40 && + dc_isar_feature(aa32_vcvt_dr, s)) { /* VCVTA, VCVTN, VCVTP, VCVTM */ int rounding = fp_decode_rm[extract32(insn, 16, 2)]; return handle_vcvt(insn, rd, rm, dp, rounding); @@ -3427,10 +3426,12 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) } if (extract32(insn, 28, 4) == 0xf) { - /* Encodings with T=1 (Thumb) or unconditional (ARM): - * only used in v8 and above. + /* + * Encodings with T=1 (Thumb) or unconditional (ARM): + * only used for the "miscellaneous VFP features" added in v8A + * and v7M (and gated on the MVFR2.FPMisc field). */ - return disas_vfp_v8_insn(s, insn); + return disas_vfp_misc_insn(s, insn); } dp = ((insn & 0xf00) == 0xb00); @@ -3663,17 +3664,27 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) * UNPREDICTABLE if bit 8 is set prior to ARMv8 * (we choose to UNDEF) */ - if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) || - !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) { - return 1; + if (dp) { + if (!dc_isar_feature(aa32_fp16_dpconv, s)) { + return 1; + } + } else { + if (!dc_isar_feature(aa32_fp16_spconv, s)) { + return 1; + } } rm_is_dp = false; break; case 0x06: /* vcvtb.f16.f32, vcvtb.f16.f64 */ case 0x07: /* vcvtt.f16.f32, vcvtt.f16.f64 */ - if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) || - !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) { - return 1; + if (dp) { + if (!dc_isar_feature(aa32_fp16_dpconv, s)) { + return 1; + } + } else { + if (!dc_isar_feature(aa32_fp16_spconv, s)) { + return 1; + } } rd_is_dp = false; break; @@ -7876,7 +7887,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) TCGv_ptr fpst; TCGv_i32 ahp; - if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) || + if (!dc_isar_feature(aa32_fp16_spconv, s) || q || (rm & 1)) { return 1; } @@ -7908,7 +7919,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) { TCGv_ptr fpst; TCGv_i32 ahp; - if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) || + if (!dc_isar_feature(aa32_fp16_spconv, s) || q || (rd & 1)) { return 1; } @@ -8372,15 +8383,9 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL; int rd, rn, rm, opr_sz; int data = 0; - bool q; - - q = extract32(insn, 6, 1); - VFP_DREG_D(rd, insn); - VFP_DREG_N(rn, insn); - VFP_DREG_M(rm, insn); - if ((rd | rn | rm) & q) { - return 1; - } + int off_rn, off_rm; + bool is_long = false, q = extract32(insn, 6, 1); + bool ptr_is_env = false; if ((insn & 0xfe200f10) == 0xfc200800) { /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */ @@ -8407,10 +8412,39 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) return 1; } fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; + } else if ((insn & 0xff300f10) == 0xfc200810) { + /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */ + int is_s = extract32(insn, 23, 1); + if (!dc_isar_feature(aa32_fhm, s)) { + return 1; + } + is_long = true; + data = is_s; /* is_2 == 0 */ + fn_gvec_ptr = gen_helper_gvec_fmlal_a32; + ptr_is_env = true; } else { return 1; } + VFP_DREG_D(rd, insn); + if (rd & q) { + return 1; + } + if (q || !is_long) { + VFP_DREG_N(rn, insn); + VFP_DREG_M(rm, insn); + if ((rn | rm) & q & !is_long) { + return 1; + } + off_rn = vfp_reg_offset(1, rn); + off_rm = vfp_reg_offset(1, rm); + } else { + rn = VFP_SREG_N(insn); + rm = VFP_SREG_M(insn); + off_rn = vfp_reg_offset(0, rn); + off_rm = vfp_reg_offset(0, rm); + } + if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); @@ -8422,16 +8456,19 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) opr_sz = (1 + q) * 8; if (fn_gvec_ptr) { - TCGv_ptr fpst = get_fpstatus_ptr(1); - tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), - vfp_reg_offset(1, rn), - vfp_reg_offset(1, rm), fpst, + TCGv_ptr ptr; + if (ptr_is_env) { + ptr = cpu_env; + } else { + ptr = get_fpstatus_ptr(1); + } + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr, opr_sz, opr_sz, data, fn_gvec_ptr); - tcg_temp_free_ptr(fpst); + if (!ptr_is_env) { + tcg_temp_free_ptr(ptr); + } } else { - tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), - vfp_reg_offset(1, rn), - vfp_reg_offset(1, rm), + tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm, opr_sz, opr_sz, data, fn_gvec); } return 0; @@ -8450,14 +8487,9 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) gen_helper_gvec_3 *fn_gvec = NULL; gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL; int rd, rn, rm, opr_sz, data; - bool q; - - q = extract32(insn, 6, 1); - VFP_DREG_D(rd, insn); - VFP_DREG_N(rn, insn); - if ((rd | rn) & q) { - return 1; - } + int off_rn, off_rm; + bool is_long = false, q = extract32(insn, 6, 1); + bool ptr_is_env = false; if ((insn & 0xff000f10) == 0xfe000800) { /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */ @@ -8486,6 +8518,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) } else if ((insn & 0xffb00f00) == 0xfe200d00) { /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */ int u = extract32(insn, 4, 1); + if (!dc_isar_feature(aa32_dp, s)) { return 1; } @@ -8493,10 +8526,48 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) /* rm is just Vm, and index is M. */ data = extract32(insn, 5, 1); /* index */ rm = extract32(insn, 0, 4); + } else if ((insn & 0xffa00f10) == 0xfe000810) { + /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */ + int is_s = extract32(insn, 20, 1); + int vm20 = extract32(insn, 0, 3); + int vm3 = extract32(insn, 3, 1); + int m = extract32(insn, 5, 1); + int index; + + if (!dc_isar_feature(aa32_fhm, s)) { + return 1; + } + if (q) { + rm = vm20; + index = m * 2 + vm3; + } else { + rm = vm20 * 2 + m; + index = vm3; + } + is_long = true; + data = (index << 2) | is_s; /* is_2 == 0 */ + fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32; + ptr_is_env = true; } else { return 1; } + VFP_DREG_D(rd, insn); + if (rd & q) { + return 1; + } + if (q || !is_long) { + VFP_DREG_N(rn, insn); + if (rn & q & !is_long) { + return 1; + } + off_rn = vfp_reg_offset(1, rn); + off_rm = vfp_reg_offset(1, rm); + } else { + rn = VFP_SREG_N(insn); + off_rn = vfp_reg_offset(0, rn); + off_rm = vfp_reg_offset(0, rm); + } if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); @@ -8508,16 +8579,19 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) opr_sz = (1 + q) * 8; if (fn_gvec_ptr) { - TCGv_ptr fpst = get_fpstatus_ptr(1); - tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), - vfp_reg_offset(1, rn), - vfp_reg_offset(1, rm), fpst, + TCGv_ptr ptr; + if (ptr_is_env) { + ptr = cpu_env; + } else { + ptr = get_fpstatus_ptr(1); + } + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr, opr_sz, opr_sz, data, fn_gvec_ptr); - tcg_temp_free_ptr(fpst); + if (!ptr_is_env) { + tcg_temp_free_ptr(ptr); + } } else { - tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), - vfp_reg_offset(1, rn), - vfp_reg_offset(1, rm), + tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm, opr_sz, opr_sz, data, fn_gvec); } return 0; diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index dfc635cf9a..dedef62403 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -898,3 +898,151 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn, } clear_tail(d, oprsz, simd_maxsz(desc)); } + +/* + * Convert float16 to float32, raising no exceptions and + * preserving exceptional values, including SNaN. + * This is effectively an unpack+repack operation. + */ +static float32 float16_to_float32_by_bits(uint32_t f16, bool fz16) +{ + const int f16_bias = 15; + const int f32_bias = 127; + uint32_t sign = extract32(f16, 15, 1); + uint32_t exp = extract32(f16, 10, 5); + uint32_t frac = extract32(f16, 0, 10); + + if (exp == 0x1f) { + /* Inf or NaN */ + exp = 0xff; + } else if (exp == 0) { + /* Zero or denormal. */ + if (frac != 0) { + if (fz16) { + frac = 0; + } else { + /* + * Denormal; these are all normal float32. + * Shift the fraction so that the msb is at bit 11, + * then remove bit 11 as the implicit bit of the + * normalized float32. Note that we still go through + * the shift for normal numbers below, to put the + * float32 fraction at the right place. + */ + int shift = clz32(frac) - 21; + frac = (frac << shift) & 0x3ff; + exp = f32_bias - f16_bias - shift + 1; + } + } + } else { + /* Normal number; adjust the bias. */ + exp += f32_bias - f16_bias; + } + sign <<= 31; + exp <<= 23; + frac <<= 23 - 10; + + return sign | exp | frac; +} + +static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) +{ + /* + * Branchless load of u32[0], u64[0], u32[1], or u64[1]. + * Load the 2nd qword iff is_q & is_2. + * Shift to the 2nd dword iff !is_q & is_2. + * For !is_q & !is_2, the upper bits of the result are garbage. + */ + return ptr[is_q & is_2] >> ((is_2 & ~is_q) << 5); +} + +/* + * Note that FMLAL requires oprsz == 8 or oprsz == 16, + * as there is not yet SVE versions that might use blocking. + */ + +static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, + uint32_t desc, bool fz16) +{ + intptr_t i, oprsz = simd_oprsz(desc); + int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); + int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + int is_q = oprsz == 16; + uint64_t n_4, m_4; + + /* Pre-load all of the f16 data, avoiding overlap issues. */ + n_4 = load4_f16(vn, is_q, is_2); + m_4 = load4_f16(vm, is_q, is_2); + + /* Negate all inputs for FMLSL at once. */ + if (is_s) { + n_4 ^= 0x8000800080008000ull; + } + + for (i = 0; i < oprsz / 4; i++) { + float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); + float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, + void *venv, uint32_t desc) +{ + CPUARMState *env = venv; + do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc, + get_flush_inputs_to_zero(&env->vfp.fp_status_f16)); +} + +void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, + void *venv, uint32_t desc) +{ + CPUARMState *env = venv; + do_fmlal(vd, vn, vm, &env->vfp.fp_status, desc, + get_flush_inputs_to_zero(&env->vfp.fp_status_f16)); +} + +static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, + uint32_t desc, bool fz16) +{ + intptr_t i, oprsz = simd_oprsz(desc); + int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); + int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); + int is_q = oprsz == 16; + uint64_t n_4; + float32 m_1; + + /* Pre-load all of the f16 data, avoiding overlap issues. */ + n_4 = load4_f16(vn, is_q, is_2); + + /* Negate all inputs for FMLSL at once. */ + if (is_s) { + n_4 ^= 0x8000800080008000ull; + } + + m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); + + for (i = 0; i < oprsz / 4; i++) { + float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, + void *venv, uint32_t desc) +{ + CPUARMState *env = venv; + do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc, + get_flush_inputs_to_zero(&env->vfp.fp_status_f16)); +} + +void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, + void *venv, uint32_t desc) +{ + CPUARMState *env = venv; + do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status, desc, + get_flush_inputs_to_zero(&env->vfp.fp_status_f16)); +} |