diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-02-21 16:18:38 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-02-21 16:18:38 +0000 |
commit | 9ac5df20f51fabcba0d902025df4bd7ea987c158 (patch) | |
tree | ed11117e803f345aa1e04d050a4c40fb0cc61b85 | |
parent | a8c6af67e1e8d460e2c6e87070807e0a02c0fec2 (diff) | |
parent | 9eb4f58918a851fb46895fd9b7ce579afeac9d02 (diff) |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200221-1' into staging
target-arm queue:
* aspeed/scu: Implement chip ID register
* hw/misc/iotkit-secctl: Fix writing to 'PPC Interrupt Clear' register
* mainstone: Make providing flash images non-mandatory
* z2: Make providing flash images non-mandatory
* Fix failures to flush SVE high bits after AdvSIMD INS/ZIP/UZP/TRN/TBL/TBX/EXT
* Minor performance improvement: spend less time recalculating hflags values
* Code cleanup to isar_feature function tests
* Implement ARMv8.1-PMU and ARMv8.4-PMU extensions
* Bugfix: correct handling of PMCR_EL0.LC bit
* Bugfix: correct definition of PMCRDP
* Correctly implement ACTLR2, HACTLR2
* allwinner: Wire up USB ports
* Vectorize emulation of USHL, SSHL, PMUL*
* xilinx_spips: Correct the number of dummy cycles for the FAST_READ_4 cmd
* sh4: Fix PCI ISA IO memory subregion
# gpg: Signature made Fri 21 Feb 2020 16:17:37 GMT
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20200221-1: (46 commits)
target/arm: Set MVFR0.FPSP for ARMv5 cpus
target/arm: Use isar_feature_aa32_simd_r32 more places
target/arm: Rename isar_feature_aa32_simd_r32
sh4: Fix PCI ISA IO memory subregion
xilinx_spips: Correct the number of dummy cycles for the FAST_READ_4 cmd
target/arm: Convert PMULL.8 to gvec
target/arm: Convert PMULL.64 to gvec
target/arm: Convert PMUL.8 to gvec
target/arm: Vectorize USHL and SSHL
arm: allwinner: Wire up USB ports
hcd-ehci: Introduce "companion-enable" sysbus property
hw: usb: hcd-ohci: Move OHCISysBusState and TYPE_SYSBUS_OHCI to include file
target/arm: Correctly implement ACTLR2, HACTLR2
target/arm: Use FIELD_EX32 for testing 32-bit fields
target/arm: Use isar_feature function for testing AA32HPD feature
target/arm: Test correct register in aa32_pan and aa32_ats1e1 checks
target/arm: Correct handling of PMCR_EL0.LC bit
target/arm: Correct definition of PMCRDP
target/arm: Provide ARMv8.4-PMU in '-cpu max'
target/arm: Implement ARMv8.4-PMU extension
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
31 files changed, 1361 insertions, 655 deletions
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c index 1cde165611..2ae9c15311 100644 --- a/hw/arm/allwinner-a10.c +++ b/hw/arm/allwinner-a10.c @@ -24,11 +24,15 @@ #include "hw/arm/allwinner-a10.h" #include "hw/misc/unimp.h" #include "sysemu/sysemu.h" +#include "hw/boards.h" +#include "hw/usb/hcd-ohci.h" #define AW_A10_PIC_REG_BASE 0x01c20400 #define AW_A10_PIT_REG_BASE 0x01c20c00 #define AW_A10_UART0_REG_BASE 0x01c28000 #define AW_A10_EMAC_BASE 0x01c0b000 +#define AW_A10_EHCI_BASE 0x01c14000 +#define AW_A10_OHCI_BASE 0x01c14400 #define AW_A10_SATA_BASE 0x01c18000 static void aw_a10_init(Object *obj) @@ -49,6 +53,17 @@ static void aw_a10_init(Object *obj) sysbus_init_child_obj(obj, "sata", &s->sata, sizeof(s->sata), TYPE_ALLWINNER_AHCI); + + if (machine_usb(current_machine)) { + int i; + + for (i = 0; i < AW_A10_NUM_USB; i++) { + sysbus_init_child_obj(obj, "ehci[*]", OBJECT(&s->ehci[i]), + sizeof(s->ehci[i]), TYPE_PLATFORM_EHCI); + sysbus_init_child_obj(obj, "ohci[*]", OBJECT(&s->ohci[i]), + sizeof(s->ohci[i]), TYPE_SYSBUS_OHCI); + } + } } static void aw_a10_realize(DeviceState *dev, Error **errp) @@ -121,6 +136,34 @@ static void aw_a10_realize(DeviceState *dev, Error **errp) serial_mm_init(get_system_memory(), AW_A10_UART0_REG_BASE, 2, qdev_get_gpio_in(dev, 1), 115200, serial_hd(0), DEVICE_NATIVE_ENDIAN); + + if (machine_usb(current_machine)) { + int i; + + for (i = 0; i < AW_A10_NUM_USB; i++) { + char bus[16]; + + sprintf(bus, "usb-bus.%d", i); + + object_property_set_bool(OBJECT(&s->ehci[i]), true, + "companion-enable", &error_fatal); + object_property_set_bool(OBJECT(&s->ehci[i]), true, "realized", + &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->ehci[i]), 0, + AW_A10_EHCI_BASE + i * 0x8000); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0, + qdev_get_gpio_in(dev, 39 + i)); + + object_property_set_str(OBJECT(&s->ohci[i]), bus, "masterbus", + &error_fatal); + object_property_set_bool(OBJECT(&s->ohci[i]), true, "realized", + &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->ohci[i]), 0, + AW_A10_OHCI_BASE + i * 0x8000); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->ohci[i]), 0, + qdev_get_gpio_in(dev, 64 + i)); + } + } } static void aw_a10_class_init(ObjectClass *oc, void *data) diff --git a/hw/arm/mainstone.c b/hw/arm/mainstone.c index b01ce3ce08..6e64dfab50 100644 --- a/hw/arm/mainstone.c +++ b/hw/arm/mainstone.c @@ -138,19 +138,10 @@ static void mainstone_common_init(MemoryRegion *address_space_mem, /* There are two 32MiB flash devices on the board */ for (i = 0; i < 2; i ++) { dinfo = drive_get(IF_PFLASH, 0, i); - if (!dinfo) { - if (qtest_enabled()) { - break; - } - error_report("Two flash images must be given with the " - "'pflash' parameter"); - exit(1); - } - if (!pflash_cfi01_register(mainstone_flash_base[i], i ? "mainstone.flash1" : "mainstone.flash0", MAINSTONE_FLASH, - blk_by_legacy_dinfo(dinfo), + dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, sector_len, 4, 0, 0, 0, 0, be)) { error_report("Error registering flash memory"); exit(1); diff --git a/hw/arm/z2.c b/hw/arm/z2.c index 34794fe3ae..4bb237f22d 100644 --- a/hw/arm/z2.c +++ b/hw/arm/z2.c @@ -314,12 +314,6 @@ static void z2_init(MachineState *machine) be = 0; #endif dinfo = drive_get(IF_PFLASH, 0, 0); - if (!dinfo && !qtest_enabled()) { - error_report("Flash image must be given with the " - "'pflash' parameter"); - exit(1); - } - if (!pflash_cfi01_register(Z2_FLASH_BASE, "z2.flash0", Z2_FLASH_SIZE, dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, sector_len, 4, 0, 0, 0, 0, be)) { diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index f9e0eeaace..22a43e4984 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -1227,17 +1227,17 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) case 0xd44: /* PFR1. */ return cpu->id_pfr1; case 0xd48: /* DFR0. */ - return cpu->id_dfr0; + return cpu->isar.id_dfr0; case 0xd4c: /* AFR0. */ return cpu->id_afr0; case 0xd50: /* MMFR0. */ - return cpu->id_mmfr0; + return cpu->isar.id_mmfr0; case 0xd54: /* MMFR1. */ - return cpu->id_mmfr1; + return cpu->isar.id_mmfr1; case 0xd58: /* MMFR2. */ - return cpu->id_mmfr2; + return cpu->isar.id_mmfr2; case 0xd5c: /* MMFR3. */ - return cpu->id_mmfr3; + return cpu->isar.id_mmfr3; case 0xd60: /* ISAR0. */ return cpu->isar.id_isar0; case 0xd64: /* ISAR1. */ diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c index ce2f9562d4..9d7482a9df 100644 --- a/hw/misc/aspeed_scu.c +++ b/hw/misc/aspeed_scu.c @@ -77,6 +77,8 @@ #define CPU2_BASE_SEG4 TO_REG(0x110) #define CPU2_BASE_SEG5 TO_REG(0x114) #define CPU2_CACHE_CTRL TO_REG(0x118) +#define CHIP_ID0 TO_REG(0x150) +#define CHIP_ID1 TO_REG(0x154) #define UART_HPLL_CLK TO_REG(0x160) #define PCIE_CTRL TO_REG(0x180) #define BMC_MMIO_CTRL TO_REG(0x184) @@ -115,6 +117,8 @@ #define AST2600_HW_STRAP2_PROT TO_REG(0x518) #define AST2600_RNG_CTRL TO_REG(0x524) #define AST2600_RNG_DATA TO_REG(0x540) +#define AST2600_CHIP_ID0 TO_REG(0x5B0) +#define AST2600_CHIP_ID1 TO_REG(0x5B4) #define AST2600_CLK TO_REG(0x40) @@ -182,6 +186,8 @@ static const uint32_t ast2500_a1_resets[ASPEED_SCU_NR_REGS] = { [CPU2_BASE_SEG1] = 0x80000000U, [CPU2_BASE_SEG4] = 0x1E600000U, [CPU2_BASE_SEG5] = 0xC0000000U, + [CHIP_ID0] = 0x1234ABCDU, + [CHIP_ID1] = 0x88884444U, [UART_HPLL_CLK] = 0x00001903U, [PCIE_CTRL] = 0x0000007BU, [BMC_DEV_ID] = 0x00002402U @@ -232,8 +238,47 @@ static uint64_t aspeed_scu_read(void *opaque, hwaddr offset, unsigned size) return s->regs[reg]; } -static void aspeed_scu_write(void *opaque, hwaddr offset, uint64_t data, - unsigned size) +static void aspeed_ast2400_scu_write(void *opaque, hwaddr offset, + uint64_t data, unsigned size) +{ + AspeedSCUState *s = ASPEED_SCU(opaque); + int reg = TO_REG(offset); + + if (reg >= ASPEED_SCU_NR_REGS) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n", + __func__, offset); + return; + } + + if (reg > PROT_KEY && reg < CPU2_BASE_SEG1 && + !s->regs[PROT_KEY]) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: SCU is locked!\n", __func__); + } + + trace_aspeed_scu_write(offset, size, data); + + switch (reg) { + case PROT_KEY: + s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0; + return; + case SILICON_REV: + case FREQ_CNTR_EVAL: + case VGA_SCRATCH1 ... VGA_SCRATCH8: + case RNG_DATA: + case FREE_CNTR4: + case FREE_CNTR4_EXT: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Write to read-only offset 0x%" HWADDR_PRIx "\n", + __func__, offset); + return; + } + + s->regs[reg] = data; +} + +static void aspeed_ast2500_scu_write(void *opaque, hwaddr offset, + uint64_t data, unsigned size) { AspeedSCUState *s = ASPEED_SCU(opaque); int reg = TO_REG(offset); @@ -257,31 +302,19 @@ static void aspeed_scu_write(void *opaque, hwaddr offset, uint64_t data, case PROT_KEY: s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0; return; - case CLK_SEL: - s->regs[reg] = data; - break; case HW_STRAP1: - if (ASPEED_IS_AST2500(s->regs[SILICON_REV])) { - s->regs[HW_STRAP1] |= data; - return; - } - /* Jump to assignment below */ - break; + s->regs[HW_STRAP1] |= data; + return; case SILICON_REV: - if (ASPEED_IS_AST2500(s->regs[SILICON_REV])) { - s->regs[HW_STRAP1] &= ~data; - } else { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Write to read-only offset 0x%" HWADDR_PRIx "\n", - __func__, offset); - } - /* Avoid assignment below, we've handled everything */ + s->regs[HW_STRAP1] &= ~data; return; case FREQ_CNTR_EVAL: case VGA_SCRATCH1 ... VGA_SCRATCH8: case RNG_DATA: case FREE_CNTR4: case FREE_CNTR4_EXT: + case CHIP_ID0: + case CHIP_ID1: qemu_log_mask(LOG_GUEST_ERROR, "%s: Write to read-only offset 0x%" HWADDR_PRIx "\n", __func__, offset); @@ -291,9 +324,18 @@ static void aspeed_scu_write(void *opaque, hwaddr offset, uint64_t data, s->regs[reg] = data; } -static const MemoryRegionOps aspeed_scu_ops = { +static const MemoryRegionOps aspeed_ast2400_scu_ops = { + .read = aspeed_scu_read, + .write = aspeed_ast2400_scu_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .valid.unaligned = false, +}; + +static const MemoryRegionOps aspeed_ast2500_scu_ops = { .read = aspeed_scu_read, - .write = aspeed_scu_write, + .write = aspeed_ast2500_scu_write, .endianness = DEVICE_LITTLE_ENDIAN, .valid.min_access_size = 4, .valid.max_access_size = 4, @@ -469,7 +511,7 @@ static void aspeed_2400_scu_class_init(ObjectClass *klass, void *data) asc->calc_hpll = aspeed_2400_scu_calc_hpll; asc->apb_divider = 2; asc->nr_regs = ASPEED_SCU_NR_REGS; - asc->ops = &aspeed_scu_ops; + asc->ops = &aspeed_ast2400_scu_ops; } static const TypeInfo aspeed_2400_scu_info = { @@ -489,7 +531,7 @@ static void aspeed_2500_scu_class_init(ObjectClass *klass, void *data) asc->calc_hpll = aspeed_2500_scu_calc_hpll; asc->apb_divider = 4; asc->nr_regs = ASPEED_SCU_NR_REGS; - asc->ops = &aspeed_scu_ops; + asc->ops = &aspeed_ast2500_scu_ops; } static const TypeInfo aspeed_2500_scu_info = { @@ -586,6 +628,8 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, case AST2600_RNG_DATA: case AST2600_SILICON_REV: case AST2600_SILICON_REV2: + case AST2600_CHIP_ID0: + case AST2600_CHIP_ID1: /* Add read only registers here */ qemu_log_mask(LOG_GUEST_ERROR, "%s: Write to read-only offset 0x%" HWADDR_PRIx "\n", @@ -614,6 +658,9 @@ static const uint32_t ast2600_a0_resets[ASPEED_AST2600_SCU_NR_REGS] = { [AST2600_CLK_STOP_CTRL2] = 0xFFF0FFF0, [AST2600_SDRAM_HANDSHAKE] = 0x00000040, /* SoC completed DRAM init */ [AST2600_HPLL_PARAM] = 0x1000405F, + [AST2600_CHIP_ID0] = 0x1234ABCD, + [AST2600_CHIP_ID1] = 0x88884444, + }; static void aspeed_ast2600_scu_reset(DeviceState *dev) diff --git a/hw/misc/iotkit-secctl.c b/hw/misc/iotkit-secctl.c index 609869821a..9fdb82056a 100644 --- a/hw/misc/iotkit-secctl.c +++ b/hw/misc/iotkit-secctl.c @@ -340,7 +340,7 @@ static MemTxResult iotkit_secctl_s_write(void *opaque, hwaddr addr, qemu_set_irq(s->sec_resp_cfg, s->secrespcfg); break; case A_SECPPCINTCLR: - value &= 0x00f000f3; + s->secppcintstat &= ~(value & 0x00f000f3); foreach_ppc(s, iotkit_secctl_ppc_update_irq_clear); break; case A_SECPPCINTEN: diff --git a/hw/sh4/sh_pci.c b/hw/sh4/sh_pci.c index 71afd23b67..08f2fc1dde 100644 --- a/hw/sh4/sh_pci.c +++ b/hw/sh4/sh_pci.c @@ -67,12 +67,8 @@ static void sh_pci_reg_write (void *p, hwaddr addr, uint64_t val, pcic->mbr = val & 0xff000001; break; case 0x1c8: - if ((val & 0xfffc0000) != (pcic->iobr & 0xfffc0000)) { - memory_region_del_subregion(get_system_memory(), &pcic->isa); - pcic->iobr = val & 0xfffc0001; - memory_region_add_subregion(get_system_memory(), - pcic->iobr & 0xfffc0000, &pcic->isa); - } + pcic->iobr = val & 0xfffc0001; + memory_region_set_alias_offset(&pcic->isa, val & 0xfffc0000); break; case 0x220: pci_data_write(phb->bus, pcic->par, val, 4); @@ -147,8 +143,7 @@ static void sh_pci_device_realize(DeviceState *dev, Error **errp) get_system_io(), 0, 0x40000); sysbus_init_mmio(sbd, &s->memconfig_p4); sysbus_init_mmio(sbd, &s->memconfig_a7); - s->iobr = 0xfe240000; - memory_region_add_subregion(get_system_memory(), s->iobr, &s->isa); + memory_region_add_subregion(get_system_memory(), 0xfe240000, &s->isa); s->dev = pci_create_simple(phb->bus, PCI_DEVFN(0, 0), "sh_pci_host"); } diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c index 6c9ef59779..c57850a505 100644 --- a/hw/ssi/xilinx_spips.c +++ b/hw/ssi/xilinx_spips.c @@ -576,11 +576,11 @@ static int xilinx_spips_num_dummies(XilinxQSPIPS *qs, uint8_t command) case FAST_READ: case DOR: case QOR: + case FAST_READ_4: case DOR_4: case QOR_4: return 1; case DIOR: - case FAST_READ_4: case DIOR_4: return 2; case QIOR: diff --git a/hw/usb/hcd-ehci-sysbus.c b/hw/usb/hcd-ehci-sysbus.c index 8d4738565e..b22fb258be 100644 --- a/hw/usb/hcd-ehci-sysbus.c +++ b/hw/usb/hcd-ehci-sysbus.c @@ -33,6 +33,8 @@ static const VMStateDescription vmstate_ehci_sysbus = { static Property ehci_sysbus_properties[] = { DEFINE_PROP_UINT32("maxframes", EHCISysBusState, ehci.maxframes, 128), + DEFINE_PROP_BOOL("companion-enable", EHCISysBusState, ehci.companion_enable, + false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index 8a94bd004a..1e6e85e86a 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -1870,21 +1870,6 @@ void ohci_sysbus_die(struct OHCIState *ohci) ohci_bus_stop(ohci); } -#define TYPE_SYSBUS_OHCI "sysbus-ohci" -#define SYSBUS_OHCI(obj) OBJECT_CHECK(OHCISysBusState, (obj), TYPE_SYSBUS_OHCI) - -typedef struct { - /*< private >*/ - SysBusDevice parent_obj; - /*< public >*/ - - OHCIState ohci; - char *masterbus; - uint32_t num_ports; - uint32_t firstport; - dma_addr_t dma_offset; -} OHCISysBusState; - static void ohci_realize_pxa(DeviceState *dev, Error **errp) { OHCISysBusState *s = SYSBUS_OHCI(dev); diff --git a/hw/usb/hcd-ohci.h b/hw/usb/hcd-ohci.h index 16e3f1e13a..5c8819aedf 100644 --- a/hw/usb/hcd-ohci.h +++ b/hw/usb/hcd-ohci.h @@ -22,6 +22,7 @@ #define HCD_OHCI_H #include "sysemu/dma.h" +#include "hw/usb.h" /* Number of Downstream Ports on the root hub: */ #define OHCI_MAX_PORTS 15 @@ -90,6 +91,21 @@ typedef struct OHCIState { void (*ohci_die)(struct OHCIState *ohci); } OHCIState; +#define TYPE_SYSBUS_OHCI "sysbus-ohci" +#define SYSBUS_OHCI(obj) OBJECT_CHECK(OHCISysBusState, (obj), TYPE_SYSBUS_OHCI) + +typedef struct { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + OHCIState ohci; + char *masterbus; + uint32_t num_ports; + uint32_t firstport; + dma_addr_t dma_offset; +} OHCISysBusState; + extern const VMStateDescription vmstate_ohci_state; void usb_ohci_init(OHCIState *ohci, DeviceState *dev, uint32_t num_ports, diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h index 40d0b1d9c0..8af724548f 100644 --- a/include/hw/arm/allwinner-a10.h +++ b/include/hw/arm/allwinner-a10.h @@ -8,12 +8,16 @@ #include "hw/intc/allwinner-a10-pic.h" #include "hw/net/allwinner_emac.h" #include "hw/ide/ahci.h" +#include "hw/usb/hcd-ohci.h" +#include "hw/usb/hcd-ehci.h" #include "target/arm/cpu.h" #define AW_A10_SDRAM_BASE 0x40000000 +#define AW_A10_NUM_USB 2 + #define TYPE_AW_A10 "allwinner-a10" #define AW_A10(obj) OBJECT_CHECK(AwA10State, (obj), TYPE_AW_A10) @@ -28,6 +32,8 @@ typedef struct AwA10State { AwEmacState emac; AllwinnerAHCIState sata; MemoryRegion sram_a; + EHCISysBusState ehci[AW_A10_NUM_USB]; + OHCISysBusState ohci[AW_A10_NUM_USB]; } AwA10State; #endif diff --git a/linux-user/elfload.c b/linux-user/elfload.c index f3080a1635..b1a895f24c 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -475,8 +475,8 @@ static uint32_t get_elf_hwcap(void) GET_FEATURE(ARM_FEATURE_VFP3, ARM_HWCAP_ARM_VFPv3); GET_FEATURE(ARM_FEATURE_V6K, ARM_HWCAP_ARM_TLS); GET_FEATURE(ARM_FEATURE_VFP4, ARM_HWCAP_ARM_VFPv4); - GET_FEATURE_ID(arm_div, ARM_HWCAP_ARM_IDIVA); - GET_FEATURE_ID(thumb_div, ARM_HWCAP_ARM_IDIVT); + GET_FEATURE_ID(aa32_arm_div, ARM_HWCAP_ARM_IDIVA); + GET_FEATURE_ID(aa32_thumb_div, ARM_HWCAP_ARM_IDIVT); /* All QEMU's VFPv3 CPUs have 32 registers, see VFP_DREG in translate.c. * Note that the ARM_HWCAP_ARM_VFPv3D16 bit is always the inverse of * ARM_HWCAP_ARM_VFPD32 (and so always clear for QEMU); it is unrelated diff --git a/target/arm/cpu.c b/target/arm/cpu.c index de733aceeb..2eadf4dcb8 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1009,11 +1009,10 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags) if (flags & CPU_DUMP_FPU) { int numvfpregs = 0; - if (arm_feature(env, ARM_FEATURE_VFP)) { - numvfpregs += 16; - } - if (arm_feature(env, ARM_FEATURE_VFP3)) { - numvfpregs += 16; + if (cpu_isar_feature(aa32_simd_r32, cpu)) { + numvfpregs = 32; + } else if (arm_feature(env, ARM_FEATURE_VFP)) { + numvfpregs = 16; } for (i = 0; i < numvfpregs; i++) { uint64_t v = *aa32_vfp_dreg(env, i); @@ -1586,7 +1585,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Presence of EL2 itself is ARM_FEATURE_EL2, and of the * Security Extensions is ARM_FEATURE_EL3. */ - assert(!tcg_enabled() || no_aa32 || cpu_isar_feature(arm_div, cpu)); + assert(!tcg_enabled() || no_aa32 || + cpu_isar_feature(aa32_arm_div, cpu)); set_feature(env, ARM_FEATURE_LPAE); set_feature(env, ARM_FEATURE_V7); } @@ -1612,7 +1612,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) if (arm_feature(env, ARM_FEATURE_V6)) { set_feature(env, ARM_FEATURE_V5); if (!arm_feature(env, ARM_FEATURE_M)) { - assert(!tcg_enabled() || no_aa32 || cpu_isar_feature(jazelle, cpu)); + assert(!tcg_enabled() || no_aa32 || + cpu_isar_feature(aa32_jazelle, cpu)); set_feature(env, ARM_FEATURE_AUXCR); } } @@ -1716,8 +1717,9 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) cpu); #endif } else { - cpu->id_aa64dfr0 &= ~0xf00; - cpu->id_dfr0 &= ~(0xf << 24); + cpu->isar.id_aa64dfr0 = + FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); + cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0); cpu->pmceid0 = 0; cpu->pmceid1 = 0; } @@ -1870,10 +1872,11 @@ static void arm926_initfn(Object *obj) */ cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); /* - * Similarly, we need to set MVFR0 fields to enable double precision - * and short vector support even though ARMv5 doesn't have this register. + * Similarly, we need to set MVFR0 fields to enable vfp and short vector + * support even though ARMv5 doesn't have this register. */ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1); cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); } @@ -1912,10 +1915,11 @@ static void arm1026_initfn(Object *obj) */ cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); /* - * Similarly, we need to set MVFR0 fields to enable double precision - * and short vector support even though ARMv5 doesn't have this register. + * Similarly, we need to set MVFR0 fields to enable vfp and short vector + * support even though ARMv5 doesn't have this register. */ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1); cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); { @@ -1955,11 +1959,11 @@ static void arm1136_r2_initfn(Object *obj) cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; cpu->id_pfr1 = 0x1; - cpu->id_dfr0 = 0x2; + cpu->isar.id_dfr0 = 0x2; cpu->id_afr0 = 0x3; - cpu->id_mmfr0 = 0x01130003; - cpu->id_mmfr1 = 0x10030302; - cpu->id_mmfr2 = 0x01222110; + cpu->isar.id_mmfr0 = 0x01130003; + cpu->isar.id_mmfr1 = 0x10030302; + cpu->isar.id_mmfr2 = 0x01222110; cpu->isar.id_isar0 = 0x00140011; cpu->isar.id_isar1 = 0x12002111; cpu->isar.id_isar2 = 0x11231111; @@ -1987,11 +1991,11 @@ static void arm1136_initfn(Object *obj) cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; cpu->id_pfr1 = 0x1; - cpu->id_dfr0 = 0x2; + cpu->isar.id_dfr0 = 0x2; cpu->id_afr0 = 0x3; - cpu->id_mmfr0 = 0x01130003; - cpu->id_mmfr1 = 0x10030302; - cpu->id_mmfr2 = 0x01222110; + cpu->isar.id_mmfr0 = 0x01130003; + cpu->isar.id_mmfr1 = 0x10030302; + cpu->isar.id_mmfr2 = 0x01222110; cpu->isar.id_isar0 = 0x00140011; cpu->isar.id_isar1 = 0x12002111; cpu->isar.id_isar2 = 0x11231111; @@ -2020,11 +2024,11 @@ static void arm1176_initfn(Object *obj) cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; cpu->id_pfr1 = 0x11; - cpu->id_dfr0 = 0x33; + cpu->isar.id_dfr0 = 0x33; cpu->id_afr0 = 0; - cpu->id_mmfr0 = 0x01130003; - cpu->id_mmfr1 = 0x10030302; - cpu->id_mmfr2 = 0x01222100; + cpu->isar.id_mmfr0 = 0x01130003; + cpu->isar.id_mmfr1 = 0x10030302; + cpu->isar.id_mmfr2 = 0x01222100; cpu->isar.id_isar0 = 0x0140011; cpu->isar.id_isar1 = 0x12002111; cpu->isar.id_isar2 = 0x11231121; @@ -2050,11 +2054,11 @@ static void arm11mpcore_initfn(Object *obj) cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ cpu->id_pfr0 = 0x111; cpu->id_pfr1 = 0x1; - cpu->id_dfr0 = 0; + cpu->isar.id_dfr0 = 0; cpu->id_afr0 = 0x2; - cpu->id_mmfr0 = 0x01100103; - cpu->id_mmfr1 = 0x10020302; - cpu->id_mmfr2 = 0x01222000; + cpu->isar.id_mmfr0 = 0x01100103; + cpu->isar.id_mmfr1 = 0x10020302; + cpu->isar.id_mmfr2 = 0x01222000; cpu->isar.id_isar0 = 0x00100011; cpu->isar.id_isar1 = 0x12002111; cpu->isar.id_isar2 = 0x11221011; @@ -2082,12 +2086,12 @@ static void cortex_m3_initfn(Object *obj) cpu->pmsav7_dregion = 8; cpu->id_pfr0 = 0x00000030; cpu->id_pfr1 = 0x00000200; - cpu->id_dfr0 = 0x00100000; + cpu->isar.id_dfr0 = 0x00100000; cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x00000030; - cpu->id_mmfr1 = 0x00000000; - cpu->id_mmfr2 = 0x00000000; - cpu->id_mmfr3 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00000030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x00000000; + cpu->isar.id_mmfr3 = 0x00000000; cpu->isar.id_isar0 = 0x01141110; cpu->isar.id_isar1 = 0x02111000; cpu->isar.id_isar2 = 0x21112231; @@ -2113,12 +2117,12 @@ static void cortex_m4_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000000; cpu->id_pfr0 = 0x00000030; cpu->id_pfr1 = 0x00000200; - cpu->id_dfr0 = 0x00100000; + cpu->isar.id_dfr0 = 0x00100000; cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x00000030; - cpu->id_mmfr1 = 0x00000000; - cpu->id_mmfr2 = 0x00000000; - cpu->id_mmfr3 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00000030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x00000000; + cpu->isar.id_mmfr3 = 0x00000000; cpu->isar.id_isar0 = 0x01141110; cpu->isar.id_isar1 = 0x02111000; cpu->isar.id_isar2 = 0x21112231; @@ -2144,12 +2148,12 @@ static void cortex_m7_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000040; cpu->id_pfr0 = 0x00000030; cpu->id_pfr1 = 0x00000200; - cpu->id_dfr0 = 0x00100000; + cpu->isar.id_dfr0 = 0x00100000; cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x00100030; - cpu->id_mmfr1 = 0x00000000; - cpu->id_mmfr2 = 0x01000000; - cpu->id_mmfr3 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00100030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x01000000; + cpu->isar.id_mmfr3 = 0x00000000; cpu->isar.id_isar0 = 0x01101110; cpu->isar.id_isar1 = 0x02112000; cpu->isar.id_isar2 = 0x20232231; @@ -2177,12 +2181,12 @@ static void cortex_m33_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000040; cpu->id_pfr0 = 0x00000030; cpu->id_pfr1 = 0x00000210; - cpu->id_dfr0 = 0x00200000; + cpu->isar.id_dfr0 = 0x00200000; cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x00101F40; - cpu->id_mmfr1 = 0x00000000; - cpu->id_mmfr2 = 0x01000000; - cpu->id_mmfr3 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00101F40; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x01000000; + cpu->isar.id_mmfr3 = 0x00000000; cpu->isar.id_isar0 = 0x01101110; cpu->isar.id_isar1 = 0x02212000; cpu->isar.id_isar2 = 0x20232232; @@ -2229,12 +2233,12 @@ static void cortex_r5_initfn(Object *obj) cpu->midr = 0x411fc153; /* r1p3 */ cpu->id_pfr0 = 0x0131; cpu->id_pfr1 = 0x001; - cpu->id_dfr0 = 0x010400; + cpu->isar.id_dfr0 = 0x010400; cpu->id_afr0 = 0x0; - cpu->id_mmfr0 = 0x0210030; - cpu->id_mmfr1 = 0x00000000; - cpu->id_mmfr2 = 0x01200000; - cpu->id_mmfr3 = 0x0211; + cpu->isar.id_mmfr0 = 0x0210030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x01200000; + cpu->isar.id_mmfr3 = 0x0211; cpu->isar.id_isar0 = 0x02101111; cpu->isar.id_isar1 = 0x13112111; cpu->isar.id_isar2 = 0x21232141; @@ -2284,18 +2288,18 @@ static void cortex_a8_initfn(Object *obj) cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x1031; cpu->id_pfr1 = 0x11; - cpu->id_dfr0 = 0x400; + cpu->isar.id_dfr0 = 0x400; cpu->id_afr0 = 0; - cpu->id_mmfr0 = 0x31100003; - cpu->id_mmfr1 = 0x20000000; - cpu->id_mmfr2 = 0x01202000; - cpu->id_mmfr3 = 0x11; + cpu->isar.id_mmfr0 = 0x31100003; + cpu->isar.id_mmfr1 = 0x20000000; + cpu->isar.id_mmfr2 = 0x01202000; + cpu->isar.id_mmfr3 = 0x11; cpu->isar.id_isar0 = 0x00101111; cpu->isar.id_isar1 = 0x12112111; cpu->isar.id_isar2 = 0x21232031; cpu->isar.id_isar3 = 0x11112131; cpu->isar.id_isar4 = 0x00111142; - cpu->dbgdidr = 0x15141000; + cpu->isar.dbgdidr = 0x15141000; cpu->clidr = (1 << 27) | (2 << 24) | 3; cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */ @@ -2357,18 +2361,18 @@ static void cortex_a9_initfn(Object *obj) cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x1031; cpu->id_pfr1 = 0x11; - cpu->id_dfr0 = 0x000; + cpu->isar.id_dfr0 = 0x000; cpu->id_afr0 = 0; - cpu->id_mmfr0 = 0x00100103; - cpu->id_mmfr1 = 0x20000000; - cpu->id_mmfr2 = 0x01230000; - cpu->id_mmfr3 = 0x00002111; + cpu->isar.id_mmfr0 = 0x00100103; + cpu->isar.id_mmfr1 = 0x20000000; + cpu->isar.id_mmfr2 = 0x01230000; + cpu->isar.id_mmfr3 = 0x00002111; cpu->isar.id_isar0 = 0x00101111; cpu->isar.id_isar1 = 0x13112111; cpu->isar.id_isar2 = 0x21232041; cpu->isar.id_isar3 = 0x11112131; cpu->isar.id_isar4 = 0x00111142; - cpu->dbgdidr = 0x35141000; + cpu->isar.dbgdidr = 0x35141000; cpu->clidr = (1 << 27) | (1 << 24) | 3; cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ @@ -2422,12 +2426,12 @@ static void cortex_a7_initfn(Object *obj) cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x00001131; cpu->id_pfr1 = 0x00011011; - cpu->id_dfr0 = 0x02010555; + cpu->isar.id_dfr0 = 0x02010555; cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x10101105; - cpu->id_mmfr1 = 0x40000000; - cpu->id_mmfr2 = 0x01240000; - cpu->id_mmfr3 = 0x02102211; + cpu->isar.id_mmfr0 = 0x10101105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01240000; + cpu->isar.id_mmfr3 = 0x02102211; /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but * table 4-41 gives 0x02101110, which includes the arm div insns. */ @@ -2436,7 +2440,7 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.id_isar2 = 0x21232041; cpu->isar.id_isar3 = 0x11112131; cpu->isar.id_isar4 = 0x10011142; - cpu->dbgdidr = 0x3515f005; + cpu->isar.dbgdidr = 0x3515f005; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ @@ -2468,18 +2472,18 @@ static void cortex_a15_initfn(Object *obj) cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x00001131; cpu->id_pfr1 = 0x00011011; - cpu->id_dfr0 = 0x02010555; + cpu->isar.id_dfr0 = 0x02010555; cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x10201105; - cpu->id_mmfr1 = 0x20000000; - cpu->id_mmfr2 = 0x01240000; - cpu->id_mmfr3 = 0x02102211; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x20000000; + cpu->isar.id_mmfr2 = 0x01240000; + cpu->isar.id_mmfr3 = 0x02102211; cpu->isar.id_isar0 = 0x02101110; cpu->isar.id_isar1 = 0x13112111; cpu->isar.id_isar2 = 0x21232041; cpu->isar.id_isar3 = 0x11112131; cpu->isar.id_isar4 = 0x10011142; - cpu->dbgdidr = 0x3515f021; + cpu->isar.dbgdidr = 0x3515f021; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ @@ -2709,13 +2713,14 @@ static void arm_max_initfn(Object *obj) t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ cpu->isar.mvfr2 = t; - t = cpu->id_mmfr3; + t = cpu->isar.id_mmfr3; t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */ - cpu->id_mmfr3 = t; + cpu->isar.id_mmfr3 = t; - t = cpu->id_mmfr4; + t = cpu->isar.id_mmfr4; t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */ - cpu->id_mmfr4 = t; + t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ + cpu->isar.id_mmfr4 = t; } #endif } diff --git a/target/arm/cpu.h b/target/arm/cpu.h index e943ffe8a9..65171cb30e 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -853,6 +853,11 @@ struct ARMCPU { * prefix means a constant register. * Some of these registers are split out into a substructure that * is shared with the translators to control the ISA. + * + * Note that if you add an ID register to the ARMISARegisters struct + * you need to also update the 32-bit and 64-bit versions of the + * kvm_arm_get_host_cpu_features() function to correctly populate the + * field by reading the value from the KVM vCPU. */ struct ARMISARegisters { uint32_t id_isar0; @@ -862,9 +867,16 @@ struct ARMCPU { uint32_t id_isar4; uint32_t id_isar5; uint32_t id_isar6; + uint32_t id_mmfr0; + uint32_t id_mmfr1; + uint32_t id_mmfr2; + uint32_t id_mmfr3; + uint32_t id_mmfr4; uint32_t mvfr0; uint32_t mvfr1; uint32_t mvfr2; + uint32_t id_dfr0; + uint32_t dbgdidr; uint64_t id_aa64isar0; uint64_t id_aa64isar1; uint64_t id_aa64pfr0; @@ -872,6 +884,8 @@ struct ARMCPU { uint64_t id_aa64mmfr0; uint64_t id_aa64mmfr1; uint64_t id_aa64mmfr2; + uint64_t id_aa64dfr0; + uint64_t id_aa64dfr1; } isar; uint32_t midr; uint32_t revidr; @@ -880,20 +894,11 @@ struct ARMCPU { uint32_t reset_sctlr; uint32_t id_pfr0; uint32_t id_pfr1; - uint32_t id_dfr0; uint64_t pmceid0; uint64_t pmceid1; uint32_t id_afr0; - uint32_t id_mmfr0; - uint32_t id_mmfr1; - uint32_t id_mmfr2; - uint32_t id_mmfr3; - uint32_t id_mmfr4; - uint64_t id_aa64dfr0; - uint64_t id_aa64dfr1; uint64_t id_aa64afr0; uint64_t id_aa64afr1; - uint32_t dbgdidr; uint32_t clidr; uint64_t mp_affinity; /* MP ID without feature bits */ /* The elements of this array are the CCSIDR values for each cache, @@ -1821,6 +1826,16 @@ FIELD(ID_AA64MMFR2, BBM, 52, 4) FIELD(ID_AA64MMFR2, EVT, 56, 4) FIELD(ID_AA64MMFR2, E0PD, 60, 4) +FIELD(ID_AA64DFR0, DEBUGVER, 0, 4) +FIELD(ID_AA64DFR0, TRACEVER, 4, 4) +FIELD(ID_AA64DFR0, PMUVER, 8, 4) +FIELD(ID_AA64DFR0, BRPS, 12, 4) +FIELD(ID_AA64DFR0, WRPS, 20, 4) +FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) +FIELD(ID_AA64DFR0, PMSVER, 32, 4) +FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) +FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) + FIELD(ID_DFR0, COPDBG, 0, 4) FIELD(ID_DFR0, COPSDBG, 4, 4) FIELD(ID_DFR0, MMAPDBG, 8, 4) @@ -1830,6 +1845,13 @@ FIELD(ID_DFR0, MPROFDBG, 20, 4) FIELD(ID_DFR0, PERFMON, 24, 4) FIELD(ID_DFR0, TRACEFILT, 28, 4) +FIELD(DBGDIDR, SE_IMP, 12, 1) +FIELD(DBGDIDR, NSUHD_IMP, 14, 1) +FIELD(DBGDIDR, VERSION, 16, 4) +FIELD(DBGDIDR, CTX_CMPS, 20, 4) +FIELD(DBGDIDR, BRPS, 24, 4) +FIELD(DBGDIDR, WRPS, 28, 4) + FIELD(MVFR0, SIMDREG, 0, 4) FIELD(MVFR0, FPSP, 4, 4) FIELD(MVFR0, FPDP, 8, 4) @@ -3325,19 +3347,35 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) extern const uint64_t pred_esz_masks[4]; /* + * Naming convention for isar_feature functions: + * Functions which test 32-bit ID registers should have _aa32_ in + * their name. Functions which test 64-bit ID registers should have + * _aa64_ in their name. These must only be used in code where we + * know for certain that the CPU has AArch32 or AArch64 respectively + * or where the correct answer for a CPU which doesn't implement that + * CPU state is "false" (eg when generating A32 or A64 code, if adding + * system registers that are specific to that CPU state, for "should + * we let this system register bit be set" tests where the 32-bit + * flavour of the register doesn't have the bit, and so on). + * Functions which simply ask "does this feature exist at all" have + * _any_ in their name, and always return the logical OR of the _aa64_ + * and the _aa32_ function. + */ + +/* * 32-bit feature tests via id registers. */ -static inline bool isar_feature_thumb_div(const ARMISARegisters *id) +static inline bool isar_feature_aa32_thumb_div(const ARMISARegisters *id) { return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; } -static inline bool isar_feature_arm_div(const ARMISARegisters *id) +static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id) { return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; } -static inline bool isar_feature_jazelle(const ARMISARegisters *id) +static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id) { return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; } @@ -3412,21 +3450,21 @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; } -static inline bool isar_feature_aa32_fp_d32(const ARMISARegisters *id) +static inline bool isar_feature_aa32_simd_r32(const ARMISARegisters *id) { /* Return true if D16-D31 are implemented */ - return FIELD_EX64(id->mvfr0, MVFR0, SIMDREG) >= 2; + return FIELD_EX32(id->mvfr0, MVFR0, SIMDREG) >= 2; } static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id) { - return FIELD_EX64(id->mvfr0, MVFR0, FPSHVEC) > 0; + return FIELD_EX32(id->mvfr0, MVFR0, FPSHVEC) > 0; } static inline bool isar_feature_aa32_fpdp(const ARMISARegisters *id) { /* Return true if CPU supports double precision floating point */ - return FIELD_EX64(id->mvfr0, MVFR0, FPDP) > 0; + return FIELD_EX32(id->mvfr0, MVFR0, FPDP) > 0; } /* @@ -3436,42 +3474,66 @@ static inline bool isar_feature_aa32_fpdp(const ARMISARegisters *id) */ static inline bool isar_feature_aa32_fp16_spconv(const ARMISARegisters *id) { - return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 0; + return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 0; } static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id) { - return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 1; + return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1; } static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id) { - return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 1; + return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 1; } static inline bool isar_feature_aa32_vcvt_dr(const ARMISARegisters *id) { - return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 2; + return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 2; } static inline bool isar_feature_aa32_vrint(const ARMISARegisters *id) { - return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 3; + return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 3; } static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) { - return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 4; + return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 4; } static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) { - return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) != 0; + return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0; } static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) { - return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) >= 2; + return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2; +} + +static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id) +{ + /* 0xf means "non-standard IMPDEF PMU" */ + return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 && + FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; +} + +static inline bool isar_feature_aa32_pmu_8_4(const ARMISARegisters *id) +{ + /* 0xf means "non-standard IMPDEF PMU" */ + return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 5 && + FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; +} + +static inline bool isar_feature_aa32_hpd(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, HPDS) != 0; +} + +static inline bool isar_feature_aa32_ac2(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, AC2) != 0; } /* @@ -3653,6 +3715,41 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; } +static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && + FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; +} + +static inline bool isar_feature_aa64_pmu_8_4(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 5 && + FIELD_EX32(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; +} + +/* + * Feature tests for "does this exist in either 32-bit or 64-bit?" + */ +static inline bool isar_feature_any_fp16(const ARMISARegisters *id) +{ + return isar_feature_aa64_fp16(id) || isar_feature_aa32_fp16_arith(id); +} + +static inline bool isar_feature_any_predinv(const ARMISARegisters *id) +{ + return isar_feature_aa64_predinv(id) || isar_feature_aa32_predinv(id); +} + +static inline bool isar_feature_any_pmu_8_1(const ARMISARegisters *id) +{ + return isar_feature_aa64_pmu_8_1(id) || isar_feature_aa32_pmu_8_1(id); +} + +static inline bool isar_feature_any_pmu_8_4(const ARMISARegisters *id) +{ + return isar_feature_aa64_pmu_8_4(id) || isar_feature_aa32_pmu_8_4(id); +} + /* * Forward to the above feature tests given an ARMCPU pointer. */ diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index f0d98bc79d..0929401a4d 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -121,12 +121,12 @@ static void aarch64_a57_initfn(Object *obj) cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; cpu->id_pfr1 = 0x00011011; - cpu->id_dfr0 = 0x03010066; + cpu->isar.id_dfr0 = 0x03010066; cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x10101105; - cpu->id_mmfr1 = 0x40000000; - cpu->id_mmfr2 = 0x01260000; - cpu->id_mmfr3 = 0x02102211; + cpu->isar.id_mmfr0 = 0x10101105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02102211; cpu->isar.id_isar0 = 0x02101110; cpu->isar.id_isar1 = 0x13112111; cpu->isar.id_isar2 = 0x21232042; @@ -135,10 +135,10 @@ static void aarch64_a57_initfn(Object *obj) cpu->isar.id_isar5 = 0x00011121; cpu->isar.id_isar6 = 0; cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64dfr0 = 0x10305106; cpu->isar.id_aa64isar0 = 0x00011120; cpu->isar.id_aa64mmfr0 = 0x00001124; - cpu->dbgdidr = 0x3516d000; + cpu->isar.dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ @@ -175,12 +175,12 @@ static void aarch64_a53_initfn(Object *obj) cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; cpu->id_pfr1 = 0x00011011; - cpu->id_dfr0 = 0x03010066; + cpu->isar.id_dfr0 = 0x03010066; cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x10101105; - cpu->id_mmfr1 = 0x40000000; - cpu->id_mmfr2 = 0x01260000; - cpu->id_mmfr3 = 0x02102211; + cpu->isar.id_mmfr0 = 0x10101105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02102211; cpu->isar.id_isar0 = 0x02101110; cpu->isar.id_isar1 = 0x13112111; cpu->isar.id_isar2 = 0x21232042; @@ -189,10 +189,10 @@ static void aarch64_a53_initfn(Object *obj) cpu->isar.id_isar5 = 0x00011121; cpu->isar.id_isar6 = 0; cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64dfr0 = 0x10305106; cpu->isar.id_aa64isar0 = 0x00011120; cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ - cpu->dbgdidr = 0x3516d000; + cpu->isar.dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ @@ -228,12 +228,12 @@ static void aarch64_a72_initfn(Object *obj) cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; cpu->id_pfr1 = 0x00011011; - cpu->id_dfr0 = 0x03010066; + cpu->isar.id_dfr0 = 0x03010066; cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x10201105; - cpu->id_mmfr1 = 0x40000000; - cpu->id_mmfr2 = 0x01260000; - cpu->id_mmfr3 = 0x02102211; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02102211; cpu->isar.id_isar0 = 0x02101110; cpu->isar.id_isar1 = 0x13112111; cpu->isar.id_isar2 = 0x21232042; @@ -241,10 +241,10 @@ static void aarch64_a72_initfn(Object *obj) cpu->isar.id_isar4 = 0x00011142; cpu->isar.id_isar5 = 0x00011121; cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64dfr0 = 0x10305106; cpu->isar.id_aa64isar0 = 0x00011120; cpu->isar.id_aa64mmfr0 = 0x00001124; - cpu->dbgdidr = 0x3516d000; + cpu->isar.dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ @@ -699,9 +699,21 @@ static void aarch64_max_initfn(Object *obj) u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1); cpu->isar.id_isar6 = u; - u = cpu->id_mmfr3; + u = cpu->isar.id_mmfr3; u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */ - cpu->id_mmfr3 = u; + cpu->isar.id_mmfr3 = u; + + u = cpu->isar.id_mmfr4; + u = FIELD_DP32(u, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ + cpu->isar.id_mmfr4 = u; + + u = cpu->isar.id_aa64dfr0; + u = FIELD_DP64(u, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */ + cpu->isar.id_aa64dfr0 = u; + + u = cpu->isar.id_dfr0; + u = FIELD_DP32(u, ID_DFR0, PERFMON, 5); /* v8.4-PMU */ + cpu->isar.id_dfr0 = u; /* * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c index 2e3e90c6a5..2ff72d47d1 100644 --- a/target/arm/debug_helper.c +++ b/target/arm/debug_helper.c @@ -16,8 +16,8 @@ static bool linked_bp_matches(ARMCPU *cpu, int lbn) { CPUARMState *env = &cpu->env; uint64_t bcr = env->cp15.dbgbcr[lbn]; - int brps = extract32(cpu->dbgdidr, 24, 4); - int ctx_cmps = extract32(cpu->dbgdidr, 20, 4); + int brps = arm_num_brps(cpu); + int ctx_cmps = arm_num_ctx_cmps(cpu); int bt; uint32_t contextidr; uint64_t hcr_el2; @@ -29,7 +29,7 @@ static bool linked_bp_matches(ARMCPU *cpu, int lbn) * case DBGWCR<n>_EL1.LBN must indicate that breakpoint). * We choose the former. */ - if (lbn > brps || lbn < (brps - ctx_cmps)) { + if (lbn >= brps || lbn < (brps - ctx_cmps)) { return false; } diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 9e79182ab4..2f47279155 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1574,3 +1574,5 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/helper.c b/target/arm/helper.c index 366dbcf460..79db169e04 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -25,6 +25,7 @@ #include "hw/semihosting/semihost.h" #include "sysemu/cpus.h" #include "sysemu/kvm.h" +#include "sysemu/tcg.h" #include "qemu/range.h" #include "qapi/qapi-commands-machine-target.h" #include "qapi/error.h" @@ -49,10 +50,10 @@ static void switch_mode(CPUARMState *env, int mode); static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) { - int nregs; + ARMCPU *cpu = env_archcpu(env); + int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16; /* VFP data registers are always little-endian. */ - nregs = arm_feature(env, ARM_FEATURE_VFP3) ? 32 : 16; if (reg < nregs) { stq_le_p(buf, *aa32_vfp_dreg(env, reg)); return 8; @@ -77,9 +78,9 @@ static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) { - int nregs; + ARMCPU *cpu = env_archcpu(env); + int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16; - nregs = arm_feature(env, ARM_FEATURE_VFP3) ? 32 : 16; if (reg < nregs) { *aa32_vfp_dreg(env, reg) = ldq_le_p(buf); return 8; @@ -905,8 +906,7 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, /* VFPv3 and upwards with NEON implement 32 double precision * registers (D0-D31). */ - if (!arm_feature(env, ARM_FEATURE_NEON) || - !arm_feature(env, ARM_FEATURE_VFP3)) { + if (!cpu_isar_feature(aa32_simd_r32, env_archcpu(env))) { /* D32DIS [30] is RAO/WI if D16-31 are not implemented. */ value |= (1 << 30); } @@ -1016,11 +1016,17 @@ static const ARMCPRegInfo v6_cp_reginfo[] = { #define PMCRN_MASK 0xf800 #define PMCRN_SHIFT 11 #define PMCRLC 0x40 -#define PMCRDP 0x10 +#define PMCRDP 0x20 +#define PMCRX 0x10 #define PMCRD 0x8 #define PMCRC 0x4 #define PMCRP 0x2 #define PMCRE 0x1 +/* + * Mask of PMCR bits writeable by guest (not including WO bits like C, P, + * which can be written as 1 to trigger behaviour but which stay RAZ). + */ +#define PMCR_WRITEABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE) #define PMXEVTYPER_P 0x80000000 #define PMXEVTYPER_U 0x40000000 @@ -1123,6 +1129,30 @@ static int64_t instructions_ns_per(uint64_t icount) } #endif +static bool pmu_8_1_events_supported(CPUARMState *env) +{ + /* For events which are supported in any v8.1 PMU */ + return cpu_isar_feature(any_pmu_8_1, env_archcpu(env)); +} + +static bool pmu_8_4_events_supported(CPUARMState *env) +{ + /* For events which are supported in any v8.1 PMU */ + return cpu_isar_feature(any_pmu_8_4, env_archcpu(env)); +} + +static uint64_t zero_event_get_count(CPUARMState *env) +{ + /* For events which on QEMU never fire, so their count is always zero */ + return 0; +} + +static int64_t zero_event_ns_per(uint64_t cycles) +{ + /* An event which never fires can never overflow */ + return -1; +} + static const pm_event pm_events[] = { { .number = 0x000, /* SW_INCR */ .supported = event_always_supported, @@ -1139,8 +1169,23 @@ static const pm_event pm_events[] = { .supported = event_always_supported, .get_count = cycles_get_count, .ns_per_count = cycles_ns_per, - } + }, #endif + { .number = 0x023, /* STALL_FRONTEND */ + .supported = pmu_8_1_events_supported, + .get_count = zero_event_get_count, + .ns_per_count = zero_event_ns_per, + }, + { .number = 0x024, /* STALL_BACKEND */ + .supported = pmu_8_1_events_supported, + .get_count = zero_event_get_count, + .ns_per_count = zero_event_ns_per, + }, + { .number = 0x03c, /* STALL */ + .supported = pmu_8_4_events_supported, + .get_count = zero_event_get_count, + .ns_per_count = zero_event_ns_per, + }, }; /* @@ -1149,7 +1194,7 @@ static const pm_event pm_events[] = { * should first be updated to something sparse instead of the current * supported_event_map[] array. */ -#define MAX_EVENT_ID 0x11 +#define MAX_EVENT_ID 0x3c #define UNSUPPORTED_EVENT UINT16_MAX static uint16_t supported_event_map[MAX_EVENT_ID + 1]; @@ -1536,9 +1581,8 @@ static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, } } - /* only the DP, X, D and E bits are writable */ - env->cp15.c9_pmcr &= ~0x39; - env->cp15.c9_pmcr |= (value & 0x39); + env->cp15.c9_pmcr &= ~PMCR_WRITEABLE_MASK; + env->cp15.c9_pmcr |= (value & PMCR_WRITEABLE_MASK); pmu_op_finish(env); } @@ -6251,26 +6295,16 @@ static void define_debug_regs(ARMCPU *cpu) ARMCPRegInfo dbgdidr = { .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL0_R, .accessfn = access_tda, - .type = ARM_CP_CONST, .resetvalue = cpu->dbgdidr, + .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdidr, }; /* Note that all these register fields hold "number of Xs minus 1". */ - brps = extract32(cpu->dbgdidr, 24, 4); - wrps = extract32(cpu->dbgdidr, 28, 4); - ctx_cmps = extract32(cpu->dbgdidr, 20, 4); + brps = arm_num_brps(cpu); + wrps = arm_num_wrps(cpu); + ctx_cmps = arm_num_ctx_cmps(cpu); assert(ctx_cmps <= brps); - /* The DBGDIDR and ID_AA64DFR0_EL1 define various properties - * of the debug registers such as number of breakpoints; - * check that if they both exist then they agree. - */ - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - assert(extract32(cpu->id_aa64dfr0, 12, 4) == brps); - assert(extract32(cpu->id_aa64dfr0, 20, 4) == wrps); - assert(extract32(cpu->id_aa64dfr0, 28, 4) == ctx_cmps); - } - define_one_arm_cp_reg(cpu, &dbgdidr); define_arm_cp_regs(cpu, debug_cp_reginfo); @@ -6278,7 +6312,7 @@ static void define_debug_regs(ARMCPU *cpu) define_arm_cp_regs(cpu, debug_lpae_cp_reginfo); } - for (i = 0; i < brps + 1; i++) { + for (i = 0; i < brps; i++) { ARMCPRegInfo dbgregs[] = { { .name = "DBGBVR", .state = ARM_CP_STATE_BOTH, .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4, @@ -6297,7 +6331,7 @@ static void define_debug_regs(ARMCPU *cpu) define_arm_cp_regs(cpu, dbgregs); } - for (i = 0; i < wrps + 1; i++) { + for (i = 0; i < wrps; i++) { ARMCPRegInfo dbgregs[] = { { .name = "DBGWVR", .state = ARM_CP_STATE_BOTH, .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6, @@ -6317,6 +6351,96 @@ static void define_debug_regs(ARMCPU *cpu) } } +static void define_pmu_regs(ARMCPU *cpu) +{ + /* + * v7 performance monitor control register: same implementor + * field as main ID register, and we implement four counters in + * addition to the cycle count register. + */ + unsigned int i, pmcrn = 4; + ARMCPRegInfo pmcr = { + .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, + .access = PL0_RW, + .type = ARM_CP_IO | ARM_CP_ALIAS, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr), + .accessfn = pmreg_access, .writefn = pmcr_write, + .raw_writefn = raw_write, + }; + ARMCPRegInfo pmcr64 = { + .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0, + .access = PL0_RW, .accessfn = pmreg_access, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr), + .resetvalue = (cpu->midr & 0xff000000) | (pmcrn << PMCRN_SHIFT) | + PMCRLC, + .writefn = pmcr_write, .raw_writefn = raw_write, + }; + define_one_arm_cp_reg(cpu, &pmcr); + define_one_arm_cp_reg(cpu, &pmcr64); + for (i = 0; i < pmcrn; i++) { + char *pmevcntr_name = g_strdup_printf("PMEVCNTR%d", i); + char *pmevcntr_el0_name = g_strdup_printf("PMEVCNTR%d_EL0", i); + char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i); + char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i); + ARMCPRegInfo pmev_regs[] = { + { .name = pmevcntr_name, .cp = 15, .crn = 14, + .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, + .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, + .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, + .accessfn = pmreg_access }, + { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)), + .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, + .type = ARM_CP_IO, + .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, + .raw_readfn = pmevcntr_rawread, + .raw_writefn = pmevcntr_rawwrite }, + { .name = pmevtyper_name, .cp = 15, .crn = 14, + .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, + .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, + .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, + .accessfn = pmreg_access }, + { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)), + .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, + .type = ARM_CP_IO, + .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, + .raw_writefn = pmevtyper_rawwrite }, + REGINFO_SENTINEL + }; + define_arm_cp_regs(cpu, pmev_regs); + g_free(pmevcntr_name); + g_free(pmevcntr_el0_name); + g_free(pmevtyper_name); + g_free(pmevtyper_el0_name); + } + if (cpu_isar_feature(aa32_pmu_8_1, cpu)) { + ARMCPRegInfo v81_pmu_regs[] = { + { .name = "PMCEID2", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .resetvalue = extract64(cpu->pmceid0, 32, 32) }, + { .name = "PMCEID3", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 5, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .resetvalue = extract64(cpu->pmceid1, 32, 32) }, + REGINFO_SENTINEL + }; + define_arm_cp_regs(cpu, v81_pmu_regs); + } + if (cpu_isar_feature(any_pmu_8_4, cpu)) { + static const ARMCPRegInfo v84_pmmir = { + .name = "PMMIR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 6, + .access = PL1_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .resetvalue = 0 + }; + define_one_arm_cp_reg(cpu, &v84_pmmir); + } +} + /* We don't know until after realize whether there's a GICv3 * attached, and that is what registers the gicv3 sysregs. * So we have to fill in the GIC fields in ID_PFR/ID_PFR1_EL1/ID_AA64PFR0_EL1 @@ -6737,6 +6861,27 @@ static const ARMCPRegInfo ats1cp_reginfo[] = { }; #endif +/* + * ACTLR2 and HACTLR2 map to ACTLR_EL1[63:32] and + * ACTLR_EL2[63:32]. They exist only if the ID_MMFR4.AC2 field + * is non-zero, which is never for ARMv7, optionally in ARMv8 + * and mandatorily for ARMv8.2 and up. + * ACTLR2 is banked for S and NS if EL3 is AArch32. Since QEMU's + * implementation is RAZ/WI we can ignore this detail, as we + * do for ACTLR. + */ +static const ARMCPRegInfo actlr2_hactlr2_reginfo[] = { + { .name = "ACTLR2", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 3, + .access = PL1_RW, .type = ARM_CP_CONST, + .resetvalue = 0 }, + { .name = "HACTLR2", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 3, + .access = PL2_RW, .type = ARM_CP_CONST, + .resetvalue = 0 }, + REGINFO_SENTINEL +}; + void register_cp_regs_for_features(ARMCPU *cpu) { /* Register all the coprocessor registers based on feature bits */ @@ -6775,7 +6920,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->id_dfr0 }, + .resetvalue = cpu->isar.id_dfr0 }, { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -6785,22 +6930,22 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->id_mmfr0 }, + .resetvalue = cpu->isar.id_mmfr0 }, { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->id_mmfr1 }, + .resetvalue = cpu->isar.id_mmfr1 }, { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->id_mmfr2 }, + .resetvalue = cpu->isar.id_mmfr2 }, { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->id_mmfr3 }, + .resetvalue = cpu->isar.id_mmfr3 }, { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, @@ -6835,7 +6980,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->id_mmfr4 }, + .resetvalue = cpu->isar.id_mmfr4 }, { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, @@ -6859,67 +7004,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, pmovsset_cp_reginfo); } if (arm_feature(env, ARM_FEATURE_V7)) { - /* v7 performance monitor control register: same implementor - * field as main ID register, and we implement four counters in - * addition to the cycle count register. - */ - unsigned int i, pmcrn = 4; - ARMCPRegInfo pmcr = { - .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, - .access = PL0_RW, - .type = ARM_CP_IO | ARM_CP_ALIAS, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr), - .accessfn = pmreg_access, .writefn = pmcr_write, - .raw_writefn = raw_write, - }; - ARMCPRegInfo pmcr64 = { - .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0, - .access = PL0_RW, .accessfn = pmreg_access, - .type = ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr), - .resetvalue = (cpu->midr & 0xff000000) | (pmcrn << PMCRN_SHIFT), - .writefn = pmcr_write, .raw_writefn = raw_write, - }; - define_one_arm_cp_reg(cpu, &pmcr); - define_one_arm_cp_reg(cpu, &pmcr64); - for (i = 0; i < pmcrn; i++) { - char *pmevcntr_name = g_strdup_printf("PMEVCNTR%d", i); - char *pmevcntr_el0_name = g_strdup_printf("PMEVCNTR%d_EL0", i); - char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i); - char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i); - ARMCPRegInfo pmev_regs[] = { - { .name = pmevcntr_name, .cp = 15, .crn = 14, - .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, - .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, - .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, - .accessfn = pmreg_access }, - { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)), - .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, - .type = ARM_CP_IO, - .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, - .raw_readfn = pmevcntr_rawread, - .raw_writefn = pmevcntr_rawwrite }, - { .name = pmevtyper_name, .cp = 15, .crn = 14, - .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, - .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, - .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, - .accessfn = pmreg_access }, - { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)), - .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, - .type = ARM_CP_IO, - .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, - .raw_writefn = pmevtyper_rawwrite }, - REGINFO_SENTINEL - }; - define_arm_cp_regs(cpu, pmev_regs); - g_free(pmevcntr_name); - g_free(pmevcntr_el0_name); - g_free(pmevtyper_name); - g_free(pmevtyper_el0_name); - } ARMCPRegInfo clidr = { .name = "CLIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1, @@ -6930,24 +7014,10 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, &clidr); define_arm_cp_regs(cpu, v7_cp_reginfo); define_debug_regs(cpu); + define_pmu_regs(cpu); } else { define_arm_cp_regs(cpu, not_v7_cp_reginfo); } - if (FIELD_EX32(cpu->id_dfr0, ID_DFR0, PERFMON) >= 4 && - FIELD_EX32(cpu->id_dfr0, ID_DFR0, PERFMON) != 0xf) { - ARMCPRegInfo v81_pmu_regs[] = { - { .name = "PMCEID2", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .resetvalue = extract64(cpu->pmceid0, 32, 32) }, - { .name = "PMCEID3", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 5, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .resetvalue = extract64(cpu->pmceid1, 32, 32) }, - REGINFO_SENTINEL - }; - define_arm_cp_regs(cpu, v81_pmu_regs); - } if (arm_feature(env, ARM_FEATURE_V8)) { /* AArch64 ID registers, which all have impdef reset values. * Note that within the ID register ranges the unused slots @@ -7005,12 +7075,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->id_aa64dfr0 }, + .resetvalue = cpu->isar.id_aa64dfr0 }, { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->id_aa64dfr1 }, + .resetvalue = cpu->isar.id_aa64dfr1 }, { .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -7358,8 +7428,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) } else { define_arm_cp_regs(cpu, vmsa_pmsa_cp_reginfo); define_arm_cp_regs(cpu, vmsa_cp_reginfo); - /* TTCBR2 is introduced with ARMv8.2-A32HPD. */ - if (FIELD_EX32(cpu->id_mmfr4, ID_MMFR4, HPDS) != 0) { + /* TTCBR2 is introduced with ARMv8.2-AA32HPD. */ + if (cpu_isar_feature(aa32_hpd, cpu)) { define_one_arm_cp_reg(cpu, &ttbcr2_reginfo); } } @@ -7396,7 +7466,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (arm_feature(env, ARM_FEATURE_LPAE)) { define_arm_cp_regs(cpu, lpae_cp_reginfo); } - if (cpu_isar_feature(jazelle, cpu)) { + if (cpu_isar_feature(aa32_jazelle, cpu)) { define_arm_cp_regs(cpu, jazelle_regs); } /* Slightly awkwardly, the OMAP and StrongARM cores need all of @@ -7573,15 +7643,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) REGINFO_SENTINEL }; define_arm_cp_regs(cpu, auxcr_reginfo); - if (arm_feature(env, ARM_FEATURE_V8)) { - /* HACTLR2 maps to ACTLR_EL2[63:32] and is not in ARMv7 */ - ARMCPRegInfo hactlr2_reginfo = { - .name = "HACTLR2", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 3, - .access = PL2_RW, .type = ARM_CP_CONST, - .resetvalue = 0 - }; - define_one_arm_cp_reg(cpu, &hactlr2_reginfo); + if (cpu_isar_feature(aa32_ac2, cpu)) { + define_arm_cp_regs(cpu, actlr2_hactlr2_reginfo); } } @@ -7721,14 +7784,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) #endif /*CONFIG_USER_ONLY*/ #endif - /* - * While all v8.0 cpus support aarch64, QEMU does have configurations - * that do not set ID_AA64ISAR1, e.g. user-only qemu-arm -cpu max, - * which will set ID_ISAR6. - */ - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) - ? cpu_isar_feature(aa64_predinv, cpu) - : cpu_isar_feature(aa32_predinv, cpu)) { + if (cpu_isar_feature(any_predinv, cpu)) { define_arm_cp_regs(cpu, predinv_reginfo); } @@ -7755,7 +7811,7 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) } else if (arm_feature(env, ARM_FEATURE_NEON)) { gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, 51, "arm-neon.xml", 0); - } else if (arm_feature(env, ARM_FEATURE_VFP3)) { + } else if (cpu_isar_feature(aa32_simd_r32, cpu)) { gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, 35, "arm-vfp3.xml", 0); } else if (arm_feature(env, ARM_FEATURE_VFP)) { @@ -8858,7 +8914,7 @@ static void take_aarch32_exception(CPUARMState *env, int new_mode, env->elr_el[2] = env->regs[15]; } else { /* CPSR.PAN is normally preserved preserved unless... */ - if (cpu_isar_feature(aa64_pan, env_archcpu(env))) { + if (cpu_isar_feature(aa32_pan, env_archcpu(env))) { switch (new_el) { case 3: if (!arm_is_secure_below_el3(env)) { @@ -10234,58 +10290,82 @@ static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs) } #endif /* !CONFIG_USER_ONLY */ -ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va, - ARMMMUIdx mmu_idx) +static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) { - uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; - bool tbi, tbid, epd, hpd, using16k, using64k; - int select, tsz; + if (regime_has_2_ranges(mmu_idx)) { + return extract64(tcr, 37, 2); + } else if (mmu_idx == ARMMMUIdx_Stage2) { + return 0; /* VTCR_EL2 */ + } else { + return extract32(tcr, 20, 1); + } +} - /* - * Bit 55 is always between the two regions, and is canonical for - * determining if address tagging is enabled. - */ - select = extract64(va, 55, 1); +static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) +{ + if (regime_has_2_ranges(mmu_idx)) { + return extract64(tcr, 51, 2); + } else if (mmu_idx == ARMMMUIdx_Stage2) { + return 0; /* VTCR_EL2 */ + } else { + return extract32(tcr, 29, 1); + } +} + +ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, + ARMMMUIdx mmu_idx, bool data) +{ + uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; + bool epd, hpd, using16k, using64k; + int select, tsz, tbi; if (!regime_has_2_ranges(mmu_idx)) { + select = 0; tsz = extract32(tcr, 0, 6); using64k = extract32(tcr, 14, 1); using16k = extract32(tcr, 15, 1); if (mmu_idx == ARMMMUIdx_Stage2) { /* VTCR_EL2 */ - tbi = tbid = hpd = false; + hpd = false; } else { - tbi = extract32(tcr, 20, 1); hpd = extract32(tcr, 24, 1); - tbid = extract32(tcr, 29, 1); } epd = false; - } else if (!select) { - tsz = extract32(tcr, 0, 6); - epd = extract32(tcr, 7, 1); - using64k = extract32(tcr, 14, 1); - using16k = extract32(tcr, 15, 1); - tbi = extract64(tcr, 37, 1); - hpd = extract64(tcr, 41, 1); - tbid = extract64(tcr, 51, 1); } else { - int tg = extract32(tcr, 30, 2); - using16k = tg == 1; - using64k = tg == 3; - tsz = extract32(tcr, 16, 6); - epd = extract32(tcr, 23, 1); - tbi = extract64(tcr, 38, 1); - hpd = extract64(tcr, 42, 1); - tbid = extract64(tcr, 52, 1); + /* + * Bit 55 is always between the two regions, and is canonical for + * determining if address tagging is enabled. + */ + select = extract64(va, 55, 1); + if (!select) { + tsz = extract32(tcr, 0, 6); + epd = extract32(tcr, 7, 1); + using64k = extract32(tcr, 14, 1); + using16k = extract32(tcr, 15, 1); + hpd = extract64(tcr, 41, 1); + } else { + int tg = extract32(tcr, 30, 2); + using16k = tg == 1; + using64k = tg == 3; + tsz = extract32(tcr, 16, 6); + epd = extract32(tcr, 23, 1); + hpd = extract64(tcr, 42, 1); + } } tsz = MIN(tsz, 39); /* TODO: ARMv8.4-TTST */ tsz = MAX(tsz, 16); /* TODO: ARMv8.2-LVA */ + /* Present TBI as a composite with TBID. */ + tbi = aa64_va_parameter_tbi(tcr, mmu_idx); + if (!data) { + tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx); + } + tbi = (tbi >> select) & 1; + return (ARMVAParameters) { .tsz = tsz, .select = select, .tbi = tbi, - .tbid = tbid, .epd = epd, .hpd = hpd, .using16k = using16k, @@ -10293,16 +10373,6 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va, }; } -ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, - ARMMMUIdx mmu_idx, bool data) -{ - ARMVAParameters ret = aa64_va_parameters_both(env, va, mmu_idx); - - /* Present TBI as a composite with TBID. */ - ret.tbi &= (data || !ret.tbid); - return ret; -} - #ifndef CONFIG_USER_ONLY static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, ARMMMUIdx mmu_idx) @@ -10388,7 +10458,6 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, TCR *tcr = regime_tcr(env, mmu_idx); int ap, ns, xn, pxn; uint32_t el = regime_el(env, mmu_idx); - bool ttbr1_valid; uint64_t descaddrmask; bool aarch64 = arm_el_is_aa64(env, el); bool guarded = false; @@ -10403,14 +10472,11 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, param = aa64_va_parameters(env, address, mmu_idx, access_type != MMU_INST_FETCH); level = 0; - ttbr1_valid = regime_has_2_ranges(mmu_idx); addrsize = 64 - 8 * param.tbi; inputsize = 64 - param.tsz; } else { param = aa32_va_parameters(env, address, mmu_idx); level = 1; - /* There is no TTBR1 for EL2 */ - ttbr1_valid = (el != 2); addrsize = (mmu_idx == ARMMMUIdx_Stage2 ? 40 : 32); inputsize = addrsize - param.tsz; } @@ -10427,7 +10493,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, if (inputsize < addrsize) { target_ulong top_bits = sextract64(address, inputsize, addrsize - inputsize); - if (-top_bits != param.select || (param.select && !ttbr1_valid)) { + if (-top_bits != param.select) { /* The gap between the two regions is a Translation fault */ fault_type = ARMFault_Translation; goto do_fault; @@ -12136,21 +12202,15 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, { uint32_t flags = rebuild_hflags_aprofile(env); ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx); - ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1); + uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; uint64_t sctlr; int tbii, tbid; flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1); /* Get control bits for tagged addresses. */ - if (regime_has_2_ranges(mmu_idx)) { - ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1); - tbid = (p1.tbi << 1) | p0.tbi; - tbii = tbid & ~((p1.tbid << 1) | p0.tbid); - } else { - tbid = p0.tbi; - tbii = tbid & !p0.tbid; - } + tbid = aa64_va_parameter_tbi(tcr, mmu_idx); + tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx); flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii); flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid); diff --git a/target/arm/helper.h b/target/arm/helper.h index aa3d8cd08f..fcbf504121 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -303,14 +303,8 @@ DEF_HELPER_2(neon_abd_s16, i32, i32, i32) DEF_HELPER_2(neon_abd_u32, i32, i32, i32) DEF_HELPER_2(neon_abd_s32, i32, i32, i32) -DEF_HELPER_2(neon_shl_u8, i32, i32, i32) -DEF_HELPER_2(neon_shl_s8, i32, i32, i32) DEF_HELPER_2(neon_shl_u16, i32, i32, i32) DEF_HELPER_2(neon_shl_s16, i32, i32, i32) -DEF_HELPER_2(neon_shl_u32, i32, i32, i32) -DEF_HELPER_2(neon_shl_s32, i32, i32, i32) -DEF_HELPER_2(neon_shl_u64, i64, i64, i64) -DEF_HELPER_2(neon_shl_s64, i64, i64, i64) DEF_HELPER_2(neon_rshl_u8, i32, i32, i32) DEF_HELPER_2(neon_rshl_s8, i32, i32, i32) DEF_HELPER_2(neon_rshl_u16, i32, i32, i32) @@ -348,8 +342,6 @@ DEF_HELPER_2(neon_sub_u8, i32, i32, i32) DEF_HELPER_2(neon_sub_u16, i32, i32, i32) DEF_HELPER_2(neon_mul_u8, i32, i32, i32) DEF_HELPER_2(neon_mul_u16, i32, i32, i32) -DEF_HELPER_2(neon_mul_p8, i32, i32, i32) -DEF_HELPER_2(neon_mull_p8, i64, i32, i32) DEF_HELPER_2(neon_tst_u8, i32, i32, i32) DEF_HELPER_2(neon_tst_u16, i32, i32, i32) @@ -569,9 +561,6 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_2(dc_zva, void, env, i64) -DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) - DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG, @@ -697,6 +686,16 @@ DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/internals.h b/target/arm/internals.h index 58c4d707c5..9f96a2359f 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -931,6 +931,48 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env) } } +/** + * arm_num_brps: Return number of implemented breakpoints. + * Note that the ID register BRPS field is "number of bps - 1", + * and we return the actual number of breakpoints. + */ +static inline int arm_num_brps(ARMCPU *cpu) +{ + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; + } else { + return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1; + } +} + +/** + * arm_num_wrps: Return number of implemented watchpoints. + * Note that the ID register WRPS field is "number of wps - 1", + * and we return the actual number of watchpoints. + */ +static inline int arm_num_wrps(ARMCPU *cpu) +{ + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; + } else { + return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1; + } +} + +/** + * arm_num_ctx_cmps: Return number of implemented context comparators. + * Note that the ID register CTX_CMPS field is "number of cmps - 1", + * and we return the actual number of comparators. + */ +static inline int arm_num_ctx_cmps(ARMCPU *cpu) +{ + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1; + } else { + return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1; + } +} + /* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3. * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits. */ @@ -1091,7 +1133,7 @@ static inline uint32_t aarch32_cpsr_valid_mask(uint64_t features, if ((features >> ARM_FEATURE_THUMB2) & 1) { valid |= CPSR_IT; } - if (isar_feature_jazelle(id)) { + if (isar_feature_aa32_jazelle(id)) { valid |= CPSR_J; } if (isar_feature_aa32_pan(id)) { @@ -1127,15 +1169,12 @@ typedef struct ARMVAParameters { unsigned tsz : 8; unsigned select : 1; bool tbi : 1; - bool tbid : 1; bool epd : 1; bool hpd : 1; bool using16k : 1; bool using64k : 1; } ARMVAParameters; -ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va, - ARMMMUIdx mmu_idx); ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ARMMMUIdx mmu_idx, bool data); diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index 3a8b437eef..7981ae3bc4 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -97,6 +97,9 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) ahcf->isar.id_isar6 = 0; } + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, + ARM_CP15_REG32(0, 0, 1, 2)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP | KVM_REG_ARM_VFP_MVFR0); @@ -108,6 +111,28 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * Fortunately there is not yet anything in there that affects migration. */ + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, + ARM_CP15_REG32(0, 0, 1, 4)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, + ARM_CP15_REG32(0, 0, 1, 5)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, + ARM_CP15_REG32(0, 0, 1, 6)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, + ARM_CP15_REG32(0, 0, 1, 7)); + if (read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, + ARM_CP15_REG32(0, 0, 2, 6))) { + /* + * Older kernels don't support reading ID_MMFR4 (a new in v8 + * register); assume it's zero. + */ + ahcf->isar.id_mmfr4 = 0; + } + + /* + * There is no way to read DBGDIDR, because currently 32-bit KVM + * doesn't implement debug at all. Leave it at zero. + */ + kvm_arm_destroy_scratch_host_vcpu(fdarray); if (err < 0) { diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 3bae9e4a66..0ad96c3500 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -541,6 +541,10 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) } else { err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, ARM64_SYS_REG(3, 0, 0, 4, 1)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, + ARM64_SYS_REG(3, 0, 0, 5, 0)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, + ARM64_SYS_REG(3, 0, 0, 5, 1)); err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, ARM64_SYS_REG(3, 0, 0, 6, 0)); err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, @@ -559,6 +563,16 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * than skipping the reads and leaving 0, as we must avoid * considering the values in every case. */ + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, + ARM64_SYS_REG(3, 0, 0, 1, 2)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, + ARM64_SYS_REG(3, 0, 0, 1, 4)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, + ARM64_SYS_REG(3, 0, 0, 1, 5)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, + ARM64_SYS_REG(3, 0, 0, 1, 6)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, + ARM64_SYS_REG(3, 0, 0, 1, 7)); err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, ARM64_SYS_REG(3, 0, 0, 2, 0)); err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, @@ -571,6 +585,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) ARM64_SYS_REG(3, 0, 0, 2, 4)); err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, ARM64_SYS_REG(3, 0, 0, 2, 5)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, + ARM64_SYS_REG(3, 0, 0, 2, 6)); err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, ARM64_SYS_REG(3, 0, 0, 2, 7)); @@ -580,6 +596,36 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) ARM64_SYS_REG(3, 0, 0, 3, 1)); err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, ARM64_SYS_REG(3, 0, 0, 3, 2)); + + /* + * DBGDIDR is a bit complicated because the kernel doesn't + * provide an accessor for it in 64-bit mode, which is what this + * scratch VM is in, and there's no architected "64-bit sysreg + * which reads the same as the 32-bit register" the way there is + * for other ID registers. Instead we synthesize a value from the + * AArch64 ID_AA64DFR0, the same way the kernel code in + * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. + * We only do this if the CPU supports AArch32 at EL1. + */ + if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { + int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); + int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); + int ctx_cmps = + FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); + int version = 6; /* ARMv8 debug architecture */ + bool has_el3 = + !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); + uint32_t dbgdidr = 0; + + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); + dbgdidr |= (1 << 15); /* RES1 bit */ + ahcf->isar.dbgdidr = dbgdidr; + } } sve_supported = ioctl(fdarray[0], KVM_CHECK_EXTENSION, KVM_CAP_ARM_SVE) > 0; diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c index 4259056723..c7a8438b42 100644 --- a/target/arm/neon_helper.c +++ b/target/arm/neon_helper.c @@ -615,24 +615,9 @@ NEON_VOP(abd_u32, neon_u32, 1) } else { \ dest = src1 << tmp; \ }} while (0) -NEON_VOP(shl_u8, neon_u8, 4) NEON_VOP(shl_u16, neon_u16, 2) -NEON_VOP(shl_u32, neon_u32, 1) #undef NEON_FN -uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) -{ - int8_t shift = (int8_t)shiftop; - if (shift >= 64 || shift <= -64) { - val = 0; - } else if (shift < 0) { - val >>= -shift; - } else { - val <<= shift; - } - return val; -} - #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -645,27 +630,9 @@ uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) } else { \ dest = src1 << tmp; \ }} while (0) -NEON_VOP(shl_s8, neon_s8, 4) NEON_VOP(shl_s16, neon_s16, 2) -NEON_VOP(shl_s32, neon_s32, 1) #undef NEON_FN -uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) -{ - int8_t shift = (int8_t)shiftop; - int64_t val = valop; - if (shift >= 64) { - val = 0; - } else if (shift <= -64) { - val >>= 63; - } else if (shift < 0) { - val >>= -shift; - } else { - val <<= shift; - } - return val; -} - #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -1162,60 +1129,6 @@ NEON_VOP(mul_u8, neon_u8, 4) NEON_VOP(mul_u16, neon_u16, 2) #undef NEON_FN -/* Polynomial multiplication is like integer multiplication except the - partial products are XORed, not added. */ -uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) -{ - uint32_t mask; - uint32_t result; - result = 0; - while (op1) { - mask = 0; - if (op1 & 1) - mask |= 0xff; - if (op1 & (1 << 8)) - mask |= (0xff << 8); - if (op1 & (1 << 16)) - mask |= (0xff << 16); - if (op1 & (1 << 24)) - mask |= (0xff << 24); - result ^= op2 & mask; - op1 = (op1 >> 1) & 0x7f7f7f7f; - op2 = (op2 << 1) & 0xfefefefe; - } - return result; -} - -uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2) -{ - uint64_t result = 0; - uint64_t mask; - uint64_t op2ex = op2; - op2ex = (op2ex & 0xff) | - ((op2ex & 0xff00) << 8) | - ((op2ex & 0xff0000) << 16) | - ((op2ex & 0xff000000) << 24); - while (op1) { - mask = 0; - if (op1 & 1) { - mask |= 0xffff; - } - if (op1 & (1 << 8)) { - mask |= (0xffffU << 16); - } - if (op1 & (1 << 16)) { - mask |= (0xffffULL << 32); - } - if (op1 & (1 << 24)) { - mask |= (0xffffULL << 48); - } - result ^= op2ex & mask; - op1 = (op1 >> 1) & 0x7f7f7f7f; - op2ex <<= 1; - } - return result; -} - #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 NEON_VOP(tst_u8, neon_u8, 4) NEON_VOP(tst_u16, neon_u16, 2) @@ -2207,33 +2120,3 @@ void HELPER(neon_zip16)(void *vd, void *vm) rm[0] = m0; rd[0] = d0; } - -/* Helper function for 64 bit polynomial multiply case: - * perform PolynomialMult(op1, op2) and return either the top or - * bottom half of the 128 bit result. - */ -uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) -{ - int bitnum; - uint64_t res = 0; - - for (bitnum = 0; bitnum < 64; bitnum++) { - if (op1 & (1ULL << bitnum)) { - res ^= op2 << bitnum; - } - } - return res; -} -uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) -{ - int bitnum; - uint64_t res = 0; - - /* bit 0 of op1 can't influence the high 64 bits at all */ - for (bitnum = 1; bitnum < 64; bitnum++) { - if (op1 & (1ULL << bitnum)) { - res ^= op2 >> (64 - bitnum); - } - } - return res; -} diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c index 9746e32bf8..b909630317 100644 --- a/target/arm/pauth_helper.c +++ b/target/arm/pauth_helper.c @@ -320,7 +320,8 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, static uint64_t pauth_original_ptr(uint64_t ptr, ARMVAParameters param) { - uint64_t extfield = -param.select; + /* Note that bit 55 is used whether or not the regime has 2 ranges. */ + uint64_t extfield = sextract64(ptr, 55, 1); int bot_pac_bit = 64 - param.tsz; int top_pac_bit = 64 - 8 * param.tbi; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 7c26c3bfeb..596bf4cf73 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -6895,6 +6895,7 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn) tcg_temp_free_i64(tcg_resl); write_vec_element(s, tcg_resh, rd, 1, MO_64); tcg_temp_free_i64(tcg_resh); + clear_vec_high(s, true, rd); } /* TBL/TBX @@ -6963,6 +6964,7 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) tcg_temp_free_i64(tcg_resl); write_vec_element(s, tcg_resh, rd, 1, MO_64); tcg_temp_free_i64(tcg_resh); + clear_vec_high(s, true, rd); } /* ZIP/UZP/TRN @@ -7052,6 +7054,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) tcg_temp_free_i64(tcg_resl); write_vec_element(s, tcg_resh, rd, 1, MO_64); tcg_temp_free_i64(tcg_resh); + clear_vec_high(s, true, rd); } /* @@ -7409,6 +7412,9 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn, write_vec_element(s, tmp, rd, dst_index, size); tcg_temp_free_i64(tmp); + + /* INS is considered a 128-bit write for SVE. */ + clear_vec_high(s, true, rd); } @@ -7438,6 +7444,9 @@ static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) idx = extract32(imm5, 1 + size, 4 - size); write_vec_element(s, cpu_reg(s, rn), rd, idx, size); + + /* INS is considered a 128-bit write for SVE. */ + clear_vec_high(s, true, rd); } /* @@ -8735,9 +8744,9 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, break; case 0x8: /* SSHL, USHL */ if (u) { - gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm); + gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm); } else { - gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm); + gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm); } break; case 0x9: /* SQSHL, UQSHL */ @@ -10533,10 +10542,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, tcg_passres, tcg_passres); break; - case 14: /* PMULL */ - assert(size == 0); - gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2); - break; default: g_assert_not_reached(); } @@ -10648,30 +10653,6 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, clear_vec_high(s, is_q, rd); } -static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm) -{ - /* PMULL of 64 x 64 -> 128 is an odd special case because it - * is the only three-reg-diff instruction which produces a - * 128-bit wide result from a single operation. However since - * it's possible to calculate the two halves more or less - * separately we just use two helper calls. - */ - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - read_vec_element(s, tcg_op1, rn, is_q, MO_64); - read_vec_element(s, tcg_op2, rm, is_q, MO_64); - gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2); - write_vec_element(s, tcg_res, rd, 0, MO_64); - gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2); - write_vec_element(s, tcg_res, rd, 1, MO_64); - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_res); -} - /* AdvSIMD three different * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ @@ -10724,11 +10705,21 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); break; case 14: /* PMULL, PMULL2 */ - if (is_u || size == 1 || size == 2) { + if (is_u) { unallocated_encoding(s); return; } - if (size == 3) { + switch (size) { + case 0: /* PMULL.P8 */ + if (!fp_access_check(s)) { + return; + } + /* The Q field specifies lo/hi half input for this insn. */ + gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, + gen_helper_neon_pmull_h); + break; + + case 3: /* PMULL.P64 */ if (!dc_isar_feature(aa64_pmull, s)) { unallocated_encoding(s); return; @@ -10736,10 +10727,16 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } - handle_pmull_64(s, is_q, rd, rn, rm); - return; + /* The Q field specifies lo/hi half input for this insn. */ + gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, + gen_helper_gvec_pmull_q); + break; + + default: + unallocated_encoding(s); + break; } - goto is_widening; + return; case 9: /* SQDMLAL, SQDMLAL2 */ case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ @@ -10760,7 +10757,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - is_widening: if (!fp_access_check(s)) { return; } @@ -11132,6 +11128,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) is_q ? 16 : 8, vec_full_reg_size(s), (u ? uqsub_op : sqsub_op) + size); return; + case 0x08: /* SSHL, USHL */ + gen_gvec_op3(s, is_q, rd, rn, rm, + u ? &ushl_op[size] : &sshl_op[size]); + return; case 0x0c: /* SMAX, UMAX */ if (u) { gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); @@ -11156,9 +11156,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) case 0x13: /* MUL, PMUL */ if (!u) { /* MUL */ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); - return; + } else { /* PMUL */ + gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); } - break; + return; case 0x12: /* MLA, MLS */ if (u) { gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]); @@ -11247,16 +11248,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genfn = fns[size][u]; break; } - case 0x8: /* SSHL, USHL */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 }, - { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 }, - { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 }, - }; - genfn = fns[size][u]; - break; - } case 0x9: /* SQSHL, UQSHL */ { static NeonGenTwoOpEnvFn * const fns[3][2] = { @@ -11298,11 +11289,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genfn = fns[size][u]; break; } - case 0x13: /* MUL, PMUL */ - assert(u); /* PMUL */ - assert(size == 0); - genfn = gen_helper_neon_mul_p8; - break; case 0x16: /* SQDMULH, SQRDMULH */ { static NeonGenTwoOpEnvFn * const fns[2][2] = { diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c index bf90ac0e5b..ba46e2557a 100644 --- a/target/arm/translate-vfp.inc.c +++ b/target/arm/translate-vfp.inc.c @@ -201,7 +201,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) } /* UNDEF accesses to D16-D31 if they don't exist */ - if (dp && !dc_isar_feature(aa32_fp_d32, s) && + if (dp && !dc_isar_feature(aa32_simd_r32, s) && ((a->vm | a->vn | a->vd) & 0x10)) { return false; } @@ -334,7 +334,7 @@ static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a) } /* UNDEF accesses to D16-D31 if they don't exist */ - if (dp && !dc_isar_feature(aa32_fp_d32, s) && + if (dp && !dc_isar_feature(aa32_simd_r32, s) && ((a->vm | a->vn | a->vd) & 0x10)) { return false; } @@ -420,7 +420,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) } /* UNDEF accesses to D16-D31 if they don't exist */ - if (dp && !dc_isar_feature(aa32_fp_d32, s) && + if (dp && !dc_isar_feature(aa32_simd_r32, s) && ((a->vm | a->vd) & 0x10)) { return false; } @@ -484,7 +484,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) } /* UNDEF accesses to D16-D31 if they don't exist */ - if (dp && !dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) { + if (dp && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { return false; } @@ -556,7 +556,7 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) uint32_t offset; /* UNDEF accesses to D16-D31 if they don't exist */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { return false; } @@ -615,7 +615,7 @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a) uint32_t offset; /* UNDEF accesses to D16-D31 if they don't exist */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { return false; } @@ -662,7 +662,7 @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a) } /* UNDEF accesses to D16-D31 if they don't exist */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { return false; } @@ -912,7 +912,7 @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a) */ /* UNDEF accesses to D16-D31 if they don't exist */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { return false; } @@ -978,7 +978,7 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a) TCGv_i64 tmp; /* UNDEF accesses to D16-D31 if they don't exist */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { return false; } @@ -1101,7 +1101,7 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) } /* UNDEF accesses to D16-D31 if they don't exist */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd + n) > 16) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) { return false; } @@ -1309,7 +1309,7 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, TCGv_ptr fpst; /* UNDEF accesses to D16-D31 if they don't exist */ - if (!dc_isar_feature(aa32_fp_d32, s) && ((vd | vn | vm) & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) { return false; } @@ -1458,7 +1458,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) TCGv_i64 f0, fd; /* UNDEF accesses to D16-D31 if they don't exist */ - if (!dc_isar_feature(aa32_fp_d32, s) && ((vd | vm) & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) { return false; } @@ -1822,7 +1822,8 @@ static bool trans_VFM_dp(DisasContext *s, arg_VFM_dp *a) } /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vn | a->vm) & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { return false; } @@ -1921,7 +1922,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) vd = a->vd; /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && (vd & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) { return false; } @@ -2065,7 +2066,7 @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a) } /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { return false; } @@ -2138,7 +2139,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) } /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { return false; } @@ -2204,7 +2205,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) } /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { return false; } @@ -2264,7 +2265,7 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) } /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { return false; } @@ -2325,7 +2326,7 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) } /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { return false; } @@ -2384,7 +2385,7 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) } /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { return false; } @@ -2412,7 +2413,7 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a) TCGv_i32 vm; /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { return false; } @@ -2440,7 +2441,7 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) TCGv_i32 vd; /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { return false; } @@ -2494,7 +2495,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) TCGv_ptr fpst; /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { return false; } @@ -2534,7 +2535,7 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a) } /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { return false; } @@ -2627,7 +2628,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) } /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { return false; } @@ -2723,7 +2724,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) TCGv_ptr fpst; /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) { + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { return false; } diff --git a/target/arm/translate.c b/target/arm/translate.c index 20f89ace2f..79880adaad 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -42,7 +42,7 @@ #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5) /* currently all emulated v5 cores are also v5TE, so don't bother */ #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5) -#define ENABLE_ARCH_5J dc_isar_feature(jazelle, s) +#define ENABLE_ARCH_5J dc_isar_feature(aa32_jazelle, s) #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6) #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K) #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2) @@ -2612,7 +2612,7 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) #define VFP_SREG(insn, bigbit, smallbit) \ ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1)) #define VFP_DREG(reg, insn, bigbit, smallbit) do { \ - if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \ + if (dc_isar_feature(aa32_simd_r32, s)) { \ reg = (((insn) >> (bigbit)) & 0x0f) \ | (((insn) >> ((smallbit) - 4)) & 0x10); \ } else { \ @@ -3575,13 +3575,13 @@ static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift, if (u) { switch (size) { case 1: gen_helper_neon_shl_u16(var, var, shift); break; - case 2: gen_helper_neon_shl_u32(var, var, shift); break; + case 2: gen_ushl_i32(var, var, shift); break; default: abort(); } } else { switch (size) { case 1: gen_helper_neon_shl_s16(var, var, shift); break; - case 2: gen_helper_neon_shl_s32(var, var, shift); break; + case 2: gen_sshl_i32(var, var, shift); break; default: abort(); } } @@ -4384,6 +4384,280 @@ const GVecGen3 cmtst_op[4] = { .vece = MO_64 }, }; +void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) +{ + TCGv_i32 lval = tcg_temp_new_i32(); + TCGv_i32 rval = tcg_temp_new_i32(); + TCGv_i32 lsh = tcg_temp_new_i32(); + TCGv_i32 rsh = tcg_temp_new_i32(); + TCGv_i32 zero = tcg_const_i32(0); + TCGv_i32 max = tcg_const_i32(32); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i32(lsh, shift); + tcg_gen_neg_i32(rsh, lsh); + tcg_gen_shl_i32(lval, src, lsh); + tcg_gen_shr_i32(rval, src, rsh); + tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); + tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); + + tcg_temp_free_i32(lval); + tcg_temp_free_i32(rval); + tcg_temp_free_i32(lsh); + tcg_temp_free_i32(rsh); + tcg_temp_free_i32(zero); + tcg_temp_free_i32(max); +} + +void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) +{ + TCGv_i64 lval = tcg_temp_new_i64(); + TCGv_i64 rval = tcg_temp_new_i64(); + TCGv_i64 lsh = tcg_temp_new_i64(); + TCGv_i64 rsh = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_const_i64(0); + TCGv_i64 max = tcg_const_i64(64); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i64(lsh, shift); + tcg_gen_neg_i64(rsh, lsh); + tcg_gen_shl_i64(lval, src, lsh); + tcg_gen_shr_i64(rval, src, rsh); + tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); + tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); + + tcg_temp_free_i64(lval); + tcg_temp_free_i64(rval); + tcg_temp_free_i64(lsh); + tcg_temp_free_i64(rsh); + tcg_temp_free_i64(zero); + tcg_temp_free_i64(max); +} + +static void gen_ushl_vec(unsigned vece, TCGv_vec dst, + TCGv_vec src, TCGv_vec shift) +{ + TCGv_vec lval = tcg_temp_new_vec_matching(dst); + TCGv_vec rval = tcg_temp_new_vec_matching(dst); + TCGv_vec lsh = tcg_temp_new_vec_matching(dst); + TCGv_vec rsh = tcg_temp_new_vec_matching(dst); + TCGv_vec msk, max; + + tcg_gen_neg_vec(vece, rsh, shift); + if (vece == MO_8) { + tcg_gen_mov_vec(lsh, shift); + } else { + msk = tcg_temp_new_vec_matching(dst); + tcg_gen_dupi_vec(vece, msk, 0xff); + tcg_gen_and_vec(vece, lsh, shift, msk); + tcg_gen_and_vec(vece, rsh, rsh, msk); + tcg_temp_free_vec(msk); + } + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_shlv_vec(vece, lval, src, lsh); + tcg_gen_shrv_vec(vece, rval, src, rsh); + + max = tcg_temp_new_vec_matching(dst); + tcg_gen_dupi_vec(vece, max, 8 << vece); + + /* + * The choice of LT (signed) and GEU (unsigned) are biased toward + * the instructions of the x86_64 host. For MO_8, the whole byte + * is significant so we must use an unsigned compare; otherwise we + * have already masked to a byte and so a signed compare works. + * Other tcg hosts have a full set of comparisons and do not care. + */ + if (vece == MO_8) { + tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); + tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); + tcg_gen_andc_vec(vece, lval, lval, lsh); + tcg_gen_andc_vec(vece, rval, rval, rsh); + } else { + tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); + tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); + tcg_gen_and_vec(vece, lval, lval, lsh); + tcg_gen_and_vec(vece, rval, rval, rsh); + } + tcg_gen_or_vec(vece, dst, lval, rval); + + tcg_temp_free_vec(max); + tcg_temp_free_vec(lval); + tcg_temp_free_vec(rval); + tcg_temp_free_vec(lsh); + tcg_temp_free_vec(rsh); +} + +static const TCGOpcode ushl_list[] = { + INDEX_op_neg_vec, INDEX_op_shlv_vec, + INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 +}; + +const GVecGen3 ushl_op[4] = { + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_b, + .opt_opc = ushl_list, + .vece = MO_8 }, + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_h, + .opt_opc = ushl_list, + .vece = MO_16 }, + { .fni4 = gen_ushl_i32, + .fniv = gen_ushl_vec, + .opt_opc = ushl_list, + .vece = MO_32 }, + { .fni8 = gen_ushl_i64, + .fniv = gen_ushl_vec, + .opt_opc = ushl_list, + .vece = MO_64 }, +}; + +void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) +{ + TCGv_i32 lval = tcg_temp_new_i32(); + TCGv_i32 rval = tcg_temp_new_i32(); + TCGv_i32 lsh = tcg_temp_new_i32(); + TCGv_i32 rsh = tcg_temp_new_i32(); + TCGv_i32 zero = tcg_const_i32(0); + TCGv_i32 max = tcg_const_i32(31); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i32(lsh, shift); + tcg_gen_neg_i32(rsh, lsh); + tcg_gen_shl_i32(lval, src, lsh); + tcg_gen_umin_i32(rsh, rsh, max); + tcg_gen_sar_i32(rval, src, rsh); + tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); + tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); + + tcg_temp_free_i32(lval); + tcg_temp_free_i32(rval); + tcg_temp_free_i32(lsh); + tcg_temp_free_i32(rsh); + tcg_temp_free_i32(zero); + tcg_temp_free_i32(max); +} + +void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) +{ + TCGv_i64 lval = tcg_temp_new_i64(); + TCGv_i64 rval = tcg_temp_new_i64(); + TCGv_i64 lsh = tcg_temp_new_i64(); + TCGv_i64 rsh = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_const_i64(0); + TCGv_i64 max = tcg_const_i64(63); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i64(lsh, shift); + tcg_gen_neg_i64(rsh, lsh); + tcg_gen_shl_i64(lval, src, lsh); + tcg_gen_umin_i64(rsh, rsh, max); + tcg_gen_sar_i64(rval, src, rsh); + tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); + tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); + + tcg_temp_free_i64(lval); + tcg_temp_free_i64(rval); + tcg_temp_free_i64(lsh); + tcg_temp_free_i64(rsh); + tcg_temp_free_i64(zero); + tcg_temp_free_i64(max); +} + +static void gen_sshl_vec(unsigned vece, TCGv_vec dst, + TCGv_vec src, TCGv_vec shift) +{ + TCGv_vec lval = tcg_temp_new_vec_matching(dst); + TCGv_vec rval = tcg_temp_new_vec_matching(dst); + TCGv_vec lsh = tcg_temp_new_vec_matching(dst); + TCGv_vec rsh = tcg_temp_new_vec_matching(dst); + TCGv_vec tmp = tcg_temp_new_vec_matching(dst); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_neg_vec(vece, rsh, shift); + if (vece == MO_8) { + tcg_gen_mov_vec(lsh, shift); + } else { + tcg_gen_dupi_vec(vece, tmp, 0xff); + tcg_gen_and_vec(vece, lsh, shift, tmp); + tcg_gen_and_vec(vece, rsh, rsh, tmp); + } + + /* Bound rsh so out of bound right shift gets -1. */ + tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); + tcg_gen_umin_vec(vece, rsh, rsh, tmp); + tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); + + tcg_gen_shlv_vec(vece, lval, src, lsh); + tcg_gen_sarv_vec(vece, rval, src, rsh); + + /* Select in-bound left shift. */ + tcg_gen_andc_vec(vece, lval, lval, tmp); + + /* Select between left and right shift. */ + if (vece == MO_8) { + tcg_gen_dupi_vec(vece, tmp, 0); + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); + } else { + tcg_gen_dupi_vec(vece, tmp, 0x80); + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); + } + + tcg_temp_free_vec(lval); + tcg_temp_free_vec(rval); + tcg_temp_free_vec(lsh); + tcg_temp_free_vec(rsh); + tcg_temp_free_vec(tmp); +} + +static const TCGOpcode sshl_list[] = { + INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, + INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 +}; + +const GVecGen3 sshl_op[4] = { + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_b, + .opt_opc = sshl_list, + .vece = MO_8 }, + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_h, + .opt_opc = sshl_list, + .vece = MO_16 }, + { .fni4 = gen_sshl_i32, + .fniv = gen_sshl_vec, + .opt_opc = sshl_list, + .vece = MO_32 }, + { .fni8 = gen_sshl_i64, + .fniv = gen_sshl_vec, + .opt_opc = sshl_list, + .vece = MO_64 }, +}; + static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b) { @@ -4733,16 +5007,17 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VMUL: /* VMUL */ if (u) { - /* Polynomial case allows only P8 and is handled below. */ + /* Polynomial case allows only P8. */ if (size != 0) { return 1; } + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, + 0, gen_helper_gvec_pmul_b); } else { tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); - return 0; } - break; + return 0; case NEON_3R_VML: /* VMLA, VMLS */ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, @@ -4787,6 +5062,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) vec_size, vec_size); } return 0; + + case NEON_3R_VSHL: + /* Note the operation is vshl vd,vm,vn */ + tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size, + u ? &ushl_op[size] : &sshl_op[size]); + return 0; } if (size == 3) { @@ -4795,13 +5076,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) neon_load_reg64(cpu_V0, rn + pass); neon_load_reg64(cpu_V1, rm + pass); switch (op) { - case NEON_3R_VSHL: - if (u) { - gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); - } else { - gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0); - } - break; case NEON_3R_VQSHL: if (u) { gen_helper_neon_qshl_u64(cpu_V0, cpu_env, @@ -4836,7 +5110,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } pairwise = 0; switch (op) { - case NEON_3R_VSHL: case NEON_3R_VQSHL: case NEON_3R_VRSHL: case NEON_3R_VQRSHL: @@ -4916,9 +5189,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; - case NEON_3R_VSHL: - GEN_NEON_INTEGER_OP(shl); - break; case NEON_3R_VQSHL: GEN_NEON_INTEGER_OP_ENV(qshl); break; @@ -4937,10 +5207,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(rd, pass); gen_neon_add(size, tmp, tmp2); break; - case NEON_3R_VMUL: - /* VMUL.P8; other cases already eliminated. */ - gen_helper_neon_mul_p8(tmp, tmp, tmp2); - break; case NEON_3R_VPMAX: GEN_NEON_INTEGER_OP(pmax); break; @@ -5327,9 +5593,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } } else { if (input_unsigned) { - gen_helper_neon_shl_u64(cpu_V0, in, tmp64); + gen_ushl_i64(cpu_V0, in, tmp64); } else { - gen_helper_neon_shl_s64(cpu_V0, in, tmp64); + gen_sshl_i64(cpu_V0, in, tmp64); } } tmp = tcg_temp_new_i32(); @@ -5600,27 +5866,20 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } - /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply) - * outside the loop below as it only performs a single pass. - */ - if (op == 14 && size == 2) { - TCGv_i64 tcg_rn, tcg_rm, tcg_rd; - - if (!dc_isar_feature(aa32_pmull, s)) { - return 1; + /* Handle polynomial VMULL in a single pass. */ + if (op == 14) { + if (size == 0) { + /* VMULL.P8 */ + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16, + 0, gen_helper_neon_pmull_h); + } else { + /* VMULL.P64 */ + if (!dc_isar_feature(aa32_pmull, s)) { + return 1; + } + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16, + 0, gen_helper_gvec_pmull_q); } - tcg_rn = tcg_temp_new_i64(); - tcg_rm = tcg_temp_new_i64(); - tcg_rd = tcg_temp_new_i64(); - neon_load_reg64(tcg_rn, rn); - neon_load_reg64(tcg_rm, rm); - gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm); - neon_store_reg64(tcg_rd, rd); - gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm); - neon_store_reg64(tcg_rd, rd + 1); - tcg_temp_free_i64(tcg_rn); - tcg_temp_free_i64(tcg_rm); - tcg_temp_free_i64(tcg_rd); return 0; } @@ -5698,11 +5957,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ gen_neon_mull(cpu_V0, tmp, tmp2, size, u); break; - case 14: /* Polynomial VMULL */ - gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2); - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp); - break; default: /* 15 is RESERVED: caught earlier */ abort(); } @@ -9845,8 +10099,8 @@ static bool op_div(DisasContext *s, arg_rrr *a, bool u) TCGv_i32 t1, t2; if (s->thumb - ? !dc_isar_feature(thumb_div, s) - : !dc_isar_feature(arm_div, s)) { + ? !dc_isar_feature(aa32_thumb_div, s) + : !dc_isar_feature(aa32_arm_div, s)) { return false; } diff --git a/target/arm/translate.h b/target/arm/translate.h index 5b167c416a..d9ea0c99cc 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -278,6 +278,8 @@ uint64_t vfp_expand_imm(int size, uint8_t imm8); extern const GVecGen3 mla_op[4]; extern const GVecGen3 mls_op[4]; extern const GVecGen3 cmtst_op[4]; +extern const GVecGen3 sshl_op[4]; +extern const GVecGen3 ushl_op[4]; extern const GVecGen2i ssra_op[4]; extern const GVecGen2i usra_op[4]; extern const GVecGen2i sri_op[4]; @@ -287,6 +289,10 @@ extern const GVecGen4 sqadd_op[4]; extern const GVecGen4 uqsub_op[4]; extern const GVecGen4 sqsub_op[4]; void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); +void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); +void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); /* * Forward to the isar_feature_* tests given a DisasContext pointer. diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index dedef62403..8017bd88c4 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -1046,3 +1046,214 @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status, desc, get_flush_inputs_to_zero(&env->vfp.fp_status_f16)); } + +void HELPER(gvec_sshl_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int8_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz; ++i) { + int8_t mm = m[i]; + int8_t nn = n[i]; + int8_t res = 0; + if (mm >= 0) { + if (mm < 8) { + res = nn << mm; + } + } else { + res = nn >> (mm > -8 ? -mm : 7); + } + d[i] = res; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_sshl_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + int8_t mm = m[i]; /* only 8 bits of shift are significant */ + int16_t nn = n[i]; + int16_t res = 0; + if (mm >= 0) { + if (mm < 16) { + res = nn << mm; + } + } else { + res = nn >> (mm > -16 ? -mm : 15); + } + d[i] = res; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_ushl_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint8_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz; ++i) { + int8_t mm = m[i]; + uint8_t nn = n[i]; + uint8_t res = 0; + if (mm >= 0) { + if (mm < 8) { + res = nn << mm; + } + } else { + if (mm > -8) { + res = nn >> -mm; + } + } + d[i] = res; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + int8_t mm = m[i]; /* only 8 bits of shift are significant */ + uint16_t nn = n[i]; + uint16_t res = 0; + if (mm >= 0) { + if (mm < 16) { + res = nn << mm; + } + } else { + if (mm > -16) { + res = nn >> -mm; + } + } + d[i] = res; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +/* + * 8x8->8 polynomial multiply. + * + * Polynomial multiplication is like integer multiplication except the + * partial products are XORed, not added. + * + * TODO: expose this as a generic vector operation, as it is a common + * crypto building block. + */ +void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + uint64_t nn = n[i]; + uint64_t mm = m[i]; + uint64_t rr = 0; + + for (j = 0; j < 8; ++j) { + uint64_t mask = (nn & 0x0101010101010101ull) * 0xff; + rr ^= mm & mask; + mm = (mm << 1) & 0xfefefefefefefefeull; + nn >>= 1; + } + d[i] = rr; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +/* + * 64x64->128 polynomial multiply. + * Because of the lanes are not accessed in strict columns, + * this probably cannot be turned into a generic helper. + */ +void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + intptr_t hi = simd_data(desc); + uint64_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; i += 2) { + uint64_t nn = n[i + hi]; + uint64_t mm = m[i + hi]; + uint64_t rhi = 0; + uint64_t rlo = 0; + + /* Bit 0 can only influence the low 64-bit result. */ + if (nn & 1) { + rlo = mm; + } + + for (j = 1; j < 64; ++j) { + uint64_t mask = -((nn >> j) & 1); + rlo ^= (mm << j) & mask; + rhi ^= (mm >> (64 - j)) & mask; + } + d[i] = rlo; + d[i + 1] = rhi; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +/* + * 8x8->16 polynomial multiply. + * + * The byte inputs are expanded to (or extracted from) half-words. + * Note that neon and sve2 get the inputs from different positions. + * This allows 4 bytes to be processed in parallel with uint64_t. + */ + +static uint64_t expand_byte_to_half(uint64_t x) +{ + return (x & 0x000000ff) + | ((x & 0x0000ff00) << 8) + | ((x & 0x00ff0000) << 16) + | ((x & 0xff000000) << 24); +} + +static uint64_t pmull_h(uint64_t op1, uint64_t op2) +{ + uint64_t result = 0; + int i; + + for (i = 0; i < 8; ++i) { + uint64_t mask = (op1 & 0x0001000100010001ull) * 0xffff; + result ^= op2 & mask; + op1 >>= 1; + op2 <<= 1; + } + return result; +} + +void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + int hi = simd_data(desc); + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t nn = n[hi], mm = m[hi]; + + d[0] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm)); + nn >>= 32; + mm >>= 32; + d[1] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm)); + + clear_tail(d, 16, simd_maxsz(desc)); +} + +#ifdef TARGET_AARCH64 +void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + int shift = simd_data(desc) * 8; + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + uint64_t nn = (n[i] >> shift) & 0x00ff00ff00ff00ffull; + uint64_t mm = (m[i] >> shift) & 0x00ff00ff00ff00ffull; + + d[i] = pmull_h(nn, mm); + } +} +#endif diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 0ae7d4f34a..930d6e747f 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -185,7 +185,7 @@ uint32_t vfp_get_fpscr(CPUARMState *env) void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) { /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ - if (!cpu_isar_feature(aa64_fp16, env_archcpu(env))) { + if (!cpu_isar_feature(any_fp16, env_archcpu(env))) { val &= ~FPCR_FZ16; } |