aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-02-21 16:18:38 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-02-21 16:18:38 +0000
commit9ac5df20f51fabcba0d902025df4bd7ea987c158 (patch)
treeed11117e803f345aa1e04d050a4c40fb0cc61b85
parenta8c6af67e1e8d460e2c6e87070807e0a02c0fec2 (diff)
parent9eb4f58918a851fb46895fd9b7ce579afeac9d02 (diff)
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200221-1' into staging
target-arm queue: * aspeed/scu: Implement chip ID register * hw/misc/iotkit-secctl: Fix writing to 'PPC Interrupt Clear' register * mainstone: Make providing flash images non-mandatory * z2: Make providing flash images non-mandatory * Fix failures to flush SVE high bits after AdvSIMD INS/ZIP/UZP/TRN/TBL/TBX/EXT * Minor performance improvement: spend less time recalculating hflags values * Code cleanup to isar_feature function tests * Implement ARMv8.1-PMU and ARMv8.4-PMU extensions * Bugfix: correct handling of PMCR_EL0.LC bit * Bugfix: correct definition of PMCRDP * Correctly implement ACTLR2, HACTLR2 * allwinner: Wire up USB ports * Vectorize emulation of USHL, SSHL, PMUL* * xilinx_spips: Correct the number of dummy cycles for the FAST_READ_4 cmd * sh4: Fix PCI ISA IO memory subregion # gpg: Signature made Fri 21 Feb 2020 16:17:37 GMT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20200221-1: (46 commits) target/arm: Set MVFR0.FPSP for ARMv5 cpus target/arm: Use isar_feature_aa32_simd_r32 more places target/arm: Rename isar_feature_aa32_simd_r32 sh4: Fix PCI ISA IO memory subregion xilinx_spips: Correct the number of dummy cycles for the FAST_READ_4 cmd target/arm: Convert PMULL.8 to gvec target/arm: Convert PMULL.64 to gvec target/arm: Convert PMUL.8 to gvec target/arm: Vectorize USHL and SSHL arm: allwinner: Wire up USB ports hcd-ehci: Introduce "companion-enable" sysbus property hw: usb: hcd-ohci: Move OHCISysBusState and TYPE_SYSBUS_OHCI to include file target/arm: Correctly implement ACTLR2, HACTLR2 target/arm: Use FIELD_EX32 for testing 32-bit fields target/arm: Use isar_feature function for testing AA32HPD feature target/arm: Test correct register in aa32_pan and aa32_ats1e1 checks target/arm: Correct handling of PMCR_EL0.LC bit target/arm: Correct definition of PMCRDP target/arm: Provide ARMv8.4-PMU in '-cpu max' target/arm: Implement ARMv8.4-PMU extension ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/arm/allwinner-a10.c43
-rw-r--r--hw/arm/mainstone.c11
-rw-r--r--hw/arm/z2.c6
-rw-r--r--hw/intc/armv7m_nvic.c10
-rw-r--r--hw/misc/aspeed_scu.c93
-rw-r--r--hw/misc/iotkit-secctl.c2
-rw-r--r--hw/sh4/sh_pci.c11
-rw-r--r--hw/ssi/xilinx_spips.c2
-rw-r--r--hw/usb/hcd-ehci-sysbus.c2
-rw-r--r--hw/usb/hcd-ohci.c15
-rw-r--r--hw/usb/hcd-ohci.h16
-rw-r--r--include/hw/arm/allwinner-a10.h6
-rw-r--r--linux-user/elfload.c4
-rw-r--r--target/arm/cpu.c169
-rw-r--r--target/arm/cpu.h145
-rw-r--r--target/arm/cpu64.c58
-rw-r--r--target/arm/debug_helper.c6
-rw-r--r--target/arm/helper-sve.h2
-rw-r--r--target/arm/helper.c436
-rw-r--r--target/arm/helper.h21
-rw-r--r--target/arm/internals.h47
-rw-r--r--target/arm/kvm32.c25
-rw-r--r--target/arm/kvm64.c46
-rw-r--r--target/arm/neon_helper.c117
-rw-r--r--target/arm/pauth_helper.c3
-rw-r--r--target/arm/translate-a64.c92
-rw-r--r--target/arm/translate-vfp.inc.c53
-rw-r--r--target/arm/translate.c356
-rw-r--r--target/arm/translate.h6
-rw-r--r--target/arm/vec_helper.c211
-rw-r--r--target/arm/vfp_helper.c2
31 files changed, 1361 insertions, 655 deletions
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
index 1cde165611..2ae9c15311 100644
--- a/hw/arm/allwinner-a10.c
+++ b/hw/arm/allwinner-a10.c
@@ -24,11 +24,15 @@
#include "hw/arm/allwinner-a10.h"
#include "hw/misc/unimp.h"
#include "sysemu/sysemu.h"
+#include "hw/boards.h"
+#include "hw/usb/hcd-ohci.h"
#define AW_A10_PIC_REG_BASE 0x01c20400
#define AW_A10_PIT_REG_BASE 0x01c20c00
#define AW_A10_UART0_REG_BASE 0x01c28000
#define AW_A10_EMAC_BASE 0x01c0b000
+#define AW_A10_EHCI_BASE 0x01c14000
+#define AW_A10_OHCI_BASE 0x01c14400
#define AW_A10_SATA_BASE 0x01c18000
static void aw_a10_init(Object *obj)
@@ -49,6 +53,17 @@ static void aw_a10_init(Object *obj)
sysbus_init_child_obj(obj, "sata", &s->sata, sizeof(s->sata),
TYPE_ALLWINNER_AHCI);
+
+ if (machine_usb(current_machine)) {
+ int i;
+
+ for (i = 0; i < AW_A10_NUM_USB; i++) {
+ sysbus_init_child_obj(obj, "ehci[*]", OBJECT(&s->ehci[i]),
+ sizeof(s->ehci[i]), TYPE_PLATFORM_EHCI);
+ sysbus_init_child_obj(obj, "ohci[*]", OBJECT(&s->ohci[i]),
+ sizeof(s->ohci[i]), TYPE_SYSBUS_OHCI);
+ }
+ }
}
static void aw_a10_realize(DeviceState *dev, Error **errp)
@@ -121,6 +136,34 @@ static void aw_a10_realize(DeviceState *dev, Error **errp)
serial_mm_init(get_system_memory(), AW_A10_UART0_REG_BASE, 2,
qdev_get_gpio_in(dev, 1),
115200, serial_hd(0), DEVICE_NATIVE_ENDIAN);
+
+ if (machine_usb(current_machine)) {
+ int i;
+
+ for (i = 0; i < AW_A10_NUM_USB; i++) {
+ char bus[16];
+
+ sprintf(bus, "usb-bus.%d", i);
+
+ object_property_set_bool(OBJECT(&s->ehci[i]), true,
+ "companion-enable", &error_fatal);
+ object_property_set_bool(OBJECT(&s->ehci[i]), true, "realized",
+ &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->ehci[i]), 0,
+ AW_A10_EHCI_BASE + i * 0x8000);
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0,
+ qdev_get_gpio_in(dev, 39 + i));
+
+ object_property_set_str(OBJECT(&s->ohci[i]), bus, "masterbus",
+ &error_fatal);
+ object_property_set_bool(OBJECT(&s->ohci[i]), true, "realized",
+ &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->ohci[i]), 0,
+ AW_A10_OHCI_BASE + i * 0x8000);
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->ohci[i]), 0,
+ qdev_get_gpio_in(dev, 64 + i));
+ }
+ }
}
static void aw_a10_class_init(ObjectClass *oc, void *data)
diff --git a/hw/arm/mainstone.c b/hw/arm/mainstone.c
index b01ce3ce08..6e64dfab50 100644
--- a/hw/arm/mainstone.c
+++ b/hw/arm/mainstone.c
@@ -138,19 +138,10 @@ static void mainstone_common_init(MemoryRegion *address_space_mem,
/* There are two 32MiB flash devices on the board */
for (i = 0; i < 2; i ++) {
dinfo = drive_get(IF_PFLASH, 0, i);
- if (!dinfo) {
- if (qtest_enabled()) {
- break;
- }
- error_report("Two flash images must be given with the "
- "'pflash' parameter");
- exit(1);
- }
-
if (!pflash_cfi01_register(mainstone_flash_base[i],
i ? "mainstone.flash1" : "mainstone.flash0",
MAINSTONE_FLASH,
- blk_by_legacy_dinfo(dinfo),
+ dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
sector_len, 4, 0, 0, 0, 0, be)) {
error_report("Error registering flash memory");
exit(1);
diff --git a/hw/arm/z2.c b/hw/arm/z2.c
index 34794fe3ae..4bb237f22d 100644
--- a/hw/arm/z2.c
+++ b/hw/arm/z2.c
@@ -314,12 +314,6 @@ static void z2_init(MachineState *machine)
be = 0;
#endif
dinfo = drive_get(IF_PFLASH, 0, 0);
- if (!dinfo && !qtest_enabled()) {
- error_report("Flash image must be given with the "
- "'pflash' parameter");
- exit(1);
- }
-
if (!pflash_cfi01_register(Z2_FLASH_BASE, "z2.flash0", Z2_FLASH_SIZE,
dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
sector_len, 4, 0, 0, 0, 0, be)) {
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index f9e0eeaace..22a43e4984 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -1227,17 +1227,17 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs)
case 0xd44: /* PFR1. */
return cpu->id_pfr1;
case 0xd48: /* DFR0. */
- return cpu->id_dfr0;
+ return cpu->isar.id_dfr0;
case 0xd4c: /* AFR0. */
return cpu->id_afr0;
case 0xd50: /* MMFR0. */
- return cpu->id_mmfr0;
+ return cpu->isar.id_mmfr0;
case 0xd54: /* MMFR1. */
- return cpu->id_mmfr1;
+ return cpu->isar.id_mmfr1;
case 0xd58: /* MMFR2. */
- return cpu->id_mmfr2;
+ return cpu->isar.id_mmfr2;
case 0xd5c: /* MMFR3. */
- return cpu->id_mmfr3;
+ return cpu->isar.id_mmfr3;
case 0xd60: /* ISAR0. */
return cpu->isar.id_isar0;
case 0xd64: /* ISAR1. */
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index ce2f9562d4..9d7482a9df 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -77,6 +77,8 @@
#define CPU2_BASE_SEG4 TO_REG(0x110)
#define CPU2_BASE_SEG5 TO_REG(0x114)
#define CPU2_CACHE_CTRL TO_REG(0x118)
+#define CHIP_ID0 TO_REG(0x150)
+#define CHIP_ID1 TO_REG(0x154)
#define UART_HPLL_CLK TO_REG(0x160)
#define PCIE_CTRL TO_REG(0x180)
#define BMC_MMIO_CTRL TO_REG(0x184)
@@ -115,6 +117,8 @@
#define AST2600_HW_STRAP2_PROT TO_REG(0x518)
#define AST2600_RNG_CTRL TO_REG(0x524)
#define AST2600_RNG_DATA TO_REG(0x540)
+#define AST2600_CHIP_ID0 TO_REG(0x5B0)
+#define AST2600_CHIP_ID1 TO_REG(0x5B4)
#define AST2600_CLK TO_REG(0x40)
@@ -182,6 +186,8 @@ static const uint32_t ast2500_a1_resets[ASPEED_SCU_NR_REGS] = {
[CPU2_BASE_SEG1] = 0x80000000U,
[CPU2_BASE_SEG4] = 0x1E600000U,
[CPU2_BASE_SEG5] = 0xC0000000U,
+ [CHIP_ID0] = 0x1234ABCDU,
+ [CHIP_ID1] = 0x88884444U,
[UART_HPLL_CLK] = 0x00001903U,
[PCIE_CTRL] = 0x0000007BU,
[BMC_DEV_ID] = 0x00002402U
@@ -232,8 +238,47 @@ static uint64_t aspeed_scu_read(void *opaque, hwaddr offset, unsigned size)
return s->regs[reg];
}
-static void aspeed_scu_write(void *opaque, hwaddr offset, uint64_t data,
- unsigned size)
+static void aspeed_ast2400_scu_write(void *opaque, hwaddr offset,
+ uint64_t data, unsigned size)
+{
+ AspeedSCUState *s = ASPEED_SCU(opaque);
+ int reg = TO_REG(offset);
+
+ if (reg >= ASPEED_SCU_NR_REGS) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
+ __func__, offset);
+ return;
+ }
+
+ if (reg > PROT_KEY && reg < CPU2_BASE_SEG1 &&
+ !s->regs[PROT_KEY]) {
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: SCU is locked!\n", __func__);
+ }
+
+ trace_aspeed_scu_write(offset, size, data);
+
+ switch (reg) {
+ case PROT_KEY:
+ s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0;
+ return;
+ case SILICON_REV:
+ case FREQ_CNTR_EVAL:
+ case VGA_SCRATCH1 ... VGA_SCRATCH8:
+ case RNG_DATA:
+ case FREE_CNTR4:
+ case FREE_CNTR4_EXT:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Write to read-only offset 0x%" HWADDR_PRIx "\n",
+ __func__, offset);
+ return;
+ }
+
+ s->regs[reg] = data;
+}
+
+static void aspeed_ast2500_scu_write(void *opaque, hwaddr offset,
+ uint64_t data, unsigned size)
{
AspeedSCUState *s = ASPEED_SCU(opaque);
int reg = TO_REG(offset);
@@ -257,31 +302,19 @@ static void aspeed_scu_write(void *opaque, hwaddr offset, uint64_t data,
case PROT_KEY:
s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0;
return;
- case CLK_SEL:
- s->regs[reg] = data;
- break;
case HW_STRAP1:
- if (ASPEED_IS_AST2500(s->regs[SILICON_REV])) {
- s->regs[HW_STRAP1] |= data;
- return;
- }
- /* Jump to assignment below */
- break;
+ s->regs[HW_STRAP1] |= data;
+ return;
case SILICON_REV:
- if (ASPEED_IS_AST2500(s->regs[SILICON_REV])) {
- s->regs[HW_STRAP1] &= ~data;
- } else {
- qemu_log_mask(LOG_GUEST_ERROR,
- "%s: Write to read-only offset 0x%" HWADDR_PRIx "\n",
- __func__, offset);
- }
- /* Avoid assignment below, we've handled everything */
+ s->regs[HW_STRAP1] &= ~data;
return;
case FREQ_CNTR_EVAL:
case VGA_SCRATCH1 ... VGA_SCRATCH8:
case RNG_DATA:
case FREE_CNTR4:
case FREE_CNTR4_EXT:
+ case CHIP_ID0:
+ case CHIP_ID1:
qemu_log_mask(LOG_GUEST_ERROR,
"%s: Write to read-only offset 0x%" HWADDR_PRIx "\n",
__func__, offset);
@@ -291,9 +324,18 @@ static void aspeed_scu_write(void *opaque, hwaddr offset, uint64_t data,
s->regs[reg] = data;
}
-static const MemoryRegionOps aspeed_scu_ops = {
+static const MemoryRegionOps aspeed_ast2400_scu_ops = {
+ .read = aspeed_scu_read,
+ .write = aspeed_ast2400_scu_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid.min_access_size = 4,
+ .valid.max_access_size = 4,
+ .valid.unaligned = false,
+};
+
+static const MemoryRegionOps aspeed_ast2500_scu_ops = {
.read = aspeed_scu_read,
- .write = aspeed_scu_write,
+ .write = aspeed_ast2500_scu_write,
.endianness = DEVICE_LITTLE_ENDIAN,
.valid.min_access_size = 4,
.valid.max_access_size = 4,
@@ -469,7 +511,7 @@ static void aspeed_2400_scu_class_init(ObjectClass *klass, void *data)
asc->calc_hpll = aspeed_2400_scu_calc_hpll;
asc->apb_divider = 2;
asc->nr_regs = ASPEED_SCU_NR_REGS;
- asc->ops = &aspeed_scu_ops;
+ asc->ops = &aspeed_ast2400_scu_ops;
}
static const TypeInfo aspeed_2400_scu_info = {
@@ -489,7 +531,7 @@ static void aspeed_2500_scu_class_init(ObjectClass *klass, void *data)
asc->calc_hpll = aspeed_2500_scu_calc_hpll;
asc->apb_divider = 4;
asc->nr_regs = ASPEED_SCU_NR_REGS;
- asc->ops = &aspeed_scu_ops;
+ asc->ops = &aspeed_ast2500_scu_ops;
}
static const TypeInfo aspeed_2500_scu_info = {
@@ -586,6 +628,8 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset,
case AST2600_RNG_DATA:
case AST2600_SILICON_REV:
case AST2600_SILICON_REV2:
+ case AST2600_CHIP_ID0:
+ case AST2600_CHIP_ID1:
/* Add read only registers here */
qemu_log_mask(LOG_GUEST_ERROR,
"%s: Write to read-only offset 0x%" HWADDR_PRIx "\n",
@@ -614,6 +658,9 @@ static const uint32_t ast2600_a0_resets[ASPEED_AST2600_SCU_NR_REGS] = {
[AST2600_CLK_STOP_CTRL2] = 0xFFF0FFF0,
[AST2600_SDRAM_HANDSHAKE] = 0x00000040, /* SoC completed DRAM init */
[AST2600_HPLL_PARAM] = 0x1000405F,
+ [AST2600_CHIP_ID0] = 0x1234ABCD,
+ [AST2600_CHIP_ID1] = 0x88884444,
+
};
static void aspeed_ast2600_scu_reset(DeviceState *dev)
diff --git a/hw/misc/iotkit-secctl.c b/hw/misc/iotkit-secctl.c
index 609869821a..9fdb82056a 100644
--- a/hw/misc/iotkit-secctl.c
+++ b/hw/misc/iotkit-secctl.c
@@ -340,7 +340,7 @@ static MemTxResult iotkit_secctl_s_write(void *opaque, hwaddr addr,
qemu_set_irq(s->sec_resp_cfg, s->secrespcfg);
break;
case A_SECPPCINTCLR:
- value &= 0x00f000f3;
+ s->secppcintstat &= ~(value & 0x00f000f3);
foreach_ppc(s, iotkit_secctl_ppc_update_irq_clear);
break;
case A_SECPPCINTEN:
diff --git a/hw/sh4/sh_pci.c b/hw/sh4/sh_pci.c
index 71afd23b67..08f2fc1dde 100644
--- a/hw/sh4/sh_pci.c
+++ b/hw/sh4/sh_pci.c
@@ -67,12 +67,8 @@ static void sh_pci_reg_write (void *p, hwaddr addr, uint64_t val,
pcic->mbr = val & 0xff000001;
break;
case 0x1c8:
- if ((val & 0xfffc0000) != (pcic->iobr & 0xfffc0000)) {
- memory_region_del_subregion(get_system_memory(), &pcic->isa);
- pcic->iobr = val & 0xfffc0001;
- memory_region_add_subregion(get_system_memory(),
- pcic->iobr & 0xfffc0000, &pcic->isa);
- }
+ pcic->iobr = val & 0xfffc0001;
+ memory_region_set_alias_offset(&pcic->isa, val & 0xfffc0000);
break;
case 0x220:
pci_data_write(phb->bus, pcic->par, val, 4);
@@ -147,8 +143,7 @@ static void sh_pci_device_realize(DeviceState *dev, Error **errp)
get_system_io(), 0, 0x40000);
sysbus_init_mmio(sbd, &s->memconfig_p4);
sysbus_init_mmio(sbd, &s->memconfig_a7);
- s->iobr = 0xfe240000;
- memory_region_add_subregion(get_system_memory(), s->iobr, &s->isa);
+ memory_region_add_subregion(get_system_memory(), 0xfe240000, &s->isa);
s->dev = pci_create_simple(phb->bus, PCI_DEVFN(0, 0), "sh_pci_host");
}
diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
index 6c9ef59779..c57850a505 100644
--- a/hw/ssi/xilinx_spips.c
+++ b/hw/ssi/xilinx_spips.c
@@ -576,11 +576,11 @@ static int xilinx_spips_num_dummies(XilinxQSPIPS *qs, uint8_t command)
case FAST_READ:
case DOR:
case QOR:
+ case FAST_READ_4:
case DOR_4:
case QOR_4:
return 1;
case DIOR:
- case FAST_READ_4:
case DIOR_4:
return 2;
case QIOR:
diff --git a/hw/usb/hcd-ehci-sysbus.c b/hw/usb/hcd-ehci-sysbus.c
index 8d4738565e..b22fb258be 100644
--- a/hw/usb/hcd-ehci-sysbus.c
+++ b/hw/usb/hcd-ehci-sysbus.c
@@ -33,6 +33,8 @@ static const VMStateDescription vmstate_ehci_sysbus = {
static Property ehci_sysbus_properties[] = {
DEFINE_PROP_UINT32("maxframes", EHCISysBusState, ehci.maxframes, 128),
+ DEFINE_PROP_BOOL("companion-enable", EHCISysBusState, ehci.companion_enable,
+ false),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 8a94bd004a..1e6e85e86a 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -1870,21 +1870,6 @@ void ohci_sysbus_die(struct OHCIState *ohci)
ohci_bus_stop(ohci);
}
-#define TYPE_SYSBUS_OHCI "sysbus-ohci"
-#define SYSBUS_OHCI(obj) OBJECT_CHECK(OHCISysBusState, (obj), TYPE_SYSBUS_OHCI)
-
-typedef struct {
- /*< private >*/
- SysBusDevice parent_obj;
- /*< public >*/
-
- OHCIState ohci;
- char *masterbus;
- uint32_t num_ports;
- uint32_t firstport;
- dma_addr_t dma_offset;
-} OHCISysBusState;
-
static void ohci_realize_pxa(DeviceState *dev, Error **errp)
{
OHCISysBusState *s = SYSBUS_OHCI(dev);
diff --git a/hw/usb/hcd-ohci.h b/hw/usb/hcd-ohci.h
index 16e3f1e13a..5c8819aedf 100644
--- a/hw/usb/hcd-ohci.h
+++ b/hw/usb/hcd-ohci.h
@@ -22,6 +22,7 @@
#define HCD_OHCI_H
#include "sysemu/dma.h"
+#include "hw/usb.h"
/* Number of Downstream Ports on the root hub: */
#define OHCI_MAX_PORTS 15
@@ -90,6 +91,21 @@ typedef struct OHCIState {
void (*ohci_die)(struct OHCIState *ohci);
} OHCIState;
+#define TYPE_SYSBUS_OHCI "sysbus-ohci"
+#define SYSBUS_OHCI(obj) OBJECT_CHECK(OHCISysBusState, (obj), TYPE_SYSBUS_OHCI)
+
+typedef struct {
+ /*< private >*/
+ SysBusDevice parent_obj;
+ /*< public >*/
+
+ OHCIState ohci;
+ char *masterbus;
+ uint32_t num_ports;
+ uint32_t firstport;
+ dma_addr_t dma_offset;
+} OHCISysBusState;
+
extern const VMStateDescription vmstate_ohci_state;
void usb_ohci_init(OHCIState *ohci, DeviceState *dev, uint32_t num_ports,
diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h
index 40d0b1d9c0..8af724548f 100644
--- a/include/hw/arm/allwinner-a10.h
+++ b/include/hw/arm/allwinner-a10.h
@@ -8,12 +8,16 @@
#include "hw/intc/allwinner-a10-pic.h"
#include "hw/net/allwinner_emac.h"
#include "hw/ide/ahci.h"
+#include "hw/usb/hcd-ohci.h"
+#include "hw/usb/hcd-ehci.h"
#include "target/arm/cpu.h"
#define AW_A10_SDRAM_BASE 0x40000000
+#define AW_A10_NUM_USB 2
+
#define TYPE_AW_A10 "allwinner-a10"
#define AW_A10(obj) OBJECT_CHECK(AwA10State, (obj), TYPE_AW_A10)
@@ -28,6 +32,8 @@ typedef struct AwA10State {
AwEmacState emac;
AllwinnerAHCIState sata;
MemoryRegion sram_a;
+ EHCISysBusState ehci[AW_A10_NUM_USB];
+ OHCISysBusState ohci[AW_A10_NUM_USB];
} AwA10State;
#endif
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index f3080a1635..b1a895f24c 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -475,8 +475,8 @@ static uint32_t get_elf_hwcap(void)
GET_FEATURE(ARM_FEATURE_VFP3, ARM_HWCAP_ARM_VFPv3);
GET_FEATURE(ARM_FEATURE_V6K, ARM_HWCAP_ARM_TLS);
GET_FEATURE(ARM_FEATURE_VFP4, ARM_HWCAP_ARM_VFPv4);
- GET_FEATURE_ID(arm_div, ARM_HWCAP_ARM_IDIVA);
- GET_FEATURE_ID(thumb_div, ARM_HWCAP_ARM_IDIVT);
+ GET_FEATURE_ID(aa32_arm_div, ARM_HWCAP_ARM_IDIVA);
+ GET_FEATURE_ID(aa32_thumb_div, ARM_HWCAP_ARM_IDIVT);
/* All QEMU's VFPv3 CPUs have 32 registers, see VFP_DREG in translate.c.
* Note that the ARM_HWCAP_ARM_VFPv3D16 bit is always the inverse of
* ARM_HWCAP_ARM_VFPD32 (and so always clear for QEMU); it is unrelated
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index de733aceeb..2eadf4dcb8 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1009,11 +1009,10 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
if (flags & CPU_DUMP_FPU) {
int numvfpregs = 0;
- if (arm_feature(env, ARM_FEATURE_VFP)) {
- numvfpregs += 16;
- }
- if (arm_feature(env, ARM_FEATURE_VFP3)) {
- numvfpregs += 16;
+ if (cpu_isar_feature(aa32_simd_r32, cpu)) {
+ numvfpregs = 32;
+ } else if (arm_feature(env, ARM_FEATURE_VFP)) {
+ numvfpregs = 16;
}
for (i = 0; i < numvfpregs; i++) {
uint64_t v = *aa32_vfp_dreg(env, i);
@@ -1586,7 +1585,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
* Presence of EL2 itself is ARM_FEATURE_EL2, and of the
* Security Extensions is ARM_FEATURE_EL3.
*/
- assert(!tcg_enabled() || no_aa32 || cpu_isar_feature(arm_div, cpu));
+ assert(!tcg_enabled() || no_aa32 ||
+ cpu_isar_feature(aa32_arm_div, cpu));
set_feature(env, ARM_FEATURE_LPAE);
set_feature(env, ARM_FEATURE_V7);
}
@@ -1612,7 +1612,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
if (arm_feature(env, ARM_FEATURE_V6)) {
set_feature(env, ARM_FEATURE_V5);
if (!arm_feature(env, ARM_FEATURE_M)) {
- assert(!tcg_enabled() || no_aa32 || cpu_isar_feature(jazelle, cpu));
+ assert(!tcg_enabled() || no_aa32 ||
+ cpu_isar_feature(aa32_jazelle, cpu));
set_feature(env, ARM_FEATURE_AUXCR);
}
}
@@ -1716,8 +1717,9 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
cpu);
#endif
} else {
- cpu->id_aa64dfr0 &= ~0xf00;
- cpu->id_dfr0 &= ~(0xf << 24);
+ cpu->isar.id_aa64dfr0 =
+ FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0);
+ cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0);
cpu->pmceid0 = 0;
cpu->pmceid1 = 0;
}
@@ -1870,10 +1872,11 @@ static void arm926_initfn(Object *obj)
*/
cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1);
/*
- * Similarly, we need to set MVFR0 fields to enable double precision
- * and short vector support even though ARMv5 doesn't have this register.
+ * Similarly, we need to set MVFR0 fields to enable vfp and short vector
+ * support even though ARMv5 doesn't have this register.
*/
cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
+ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1);
cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1);
}
@@ -1912,10 +1915,11 @@ static void arm1026_initfn(Object *obj)
*/
cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1);
/*
- * Similarly, we need to set MVFR0 fields to enable double precision
- * and short vector support even though ARMv5 doesn't have this register.
+ * Similarly, we need to set MVFR0 fields to enable vfp and short vector
+ * support even though ARMv5 doesn't have this register.
*/
cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
+ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1);
cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1);
{
@@ -1955,11 +1959,11 @@ static void arm1136_r2_initfn(Object *obj)
cpu->reset_sctlr = 0x00050078;
cpu->id_pfr0 = 0x111;
cpu->id_pfr1 = 0x1;
- cpu->id_dfr0 = 0x2;
+ cpu->isar.id_dfr0 = 0x2;
cpu->id_afr0 = 0x3;
- cpu->id_mmfr0 = 0x01130003;
- cpu->id_mmfr1 = 0x10030302;
- cpu->id_mmfr2 = 0x01222110;
+ cpu->isar.id_mmfr0 = 0x01130003;
+ cpu->isar.id_mmfr1 = 0x10030302;
+ cpu->isar.id_mmfr2 = 0x01222110;
cpu->isar.id_isar0 = 0x00140011;
cpu->isar.id_isar1 = 0x12002111;
cpu->isar.id_isar2 = 0x11231111;
@@ -1987,11 +1991,11 @@ static void arm1136_initfn(Object *obj)
cpu->reset_sctlr = 0x00050078;
cpu->id_pfr0 = 0x111;
cpu->id_pfr1 = 0x1;
- cpu->id_dfr0 = 0x2;
+ cpu->isar.id_dfr0 = 0x2;
cpu->id_afr0 = 0x3;
- cpu->id_mmfr0 = 0x01130003;
- cpu->id_mmfr1 = 0x10030302;
- cpu->id_mmfr2 = 0x01222110;
+ cpu->isar.id_mmfr0 = 0x01130003;
+ cpu->isar.id_mmfr1 = 0x10030302;
+ cpu->isar.id_mmfr2 = 0x01222110;
cpu->isar.id_isar0 = 0x00140011;
cpu->isar.id_isar1 = 0x12002111;
cpu->isar.id_isar2 = 0x11231111;
@@ -2020,11 +2024,11 @@ static void arm1176_initfn(Object *obj)
cpu->reset_sctlr = 0x00050078;
cpu->id_pfr0 = 0x111;
cpu->id_pfr1 = 0x11;
- cpu->id_dfr0 = 0x33;
+ cpu->isar.id_dfr0 = 0x33;
cpu->id_afr0 = 0;
- cpu->id_mmfr0 = 0x01130003;
- cpu->id_mmfr1 = 0x10030302;
- cpu->id_mmfr2 = 0x01222100;
+ cpu->isar.id_mmfr0 = 0x01130003;
+ cpu->isar.id_mmfr1 = 0x10030302;
+ cpu->isar.id_mmfr2 = 0x01222100;
cpu->isar.id_isar0 = 0x0140011;
cpu->isar.id_isar1 = 0x12002111;
cpu->isar.id_isar2 = 0x11231121;
@@ -2050,11 +2054,11 @@ static void arm11mpcore_initfn(Object *obj)
cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */
cpu->id_pfr0 = 0x111;
cpu->id_pfr1 = 0x1;
- cpu->id_dfr0 = 0;
+ cpu->isar.id_dfr0 = 0;
cpu->id_afr0 = 0x2;
- cpu->id_mmfr0 = 0x01100103;
- cpu->id_mmfr1 = 0x10020302;
- cpu->id_mmfr2 = 0x01222000;
+ cpu->isar.id_mmfr0 = 0x01100103;
+ cpu->isar.id_mmfr1 = 0x10020302;
+ cpu->isar.id_mmfr2 = 0x01222000;
cpu->isar.id_isar0 = 0x00100011;
cpu->isar.id_isar1 = 0x12002111;
cpu->isar.id_isar2 = 0x11221011;
@@ -2082,12 +2086,12 @@ static void cortex_m3_initfn(Object *obj)
cpu->pmsav7_dregion = 8;
cpu->id_pfr0 = 0x00000030;
cpu->id_pfr1 = 0x00000200;
- cpu->id_dfr0 = 0x00100000;
+ cpu->isar.id_dfr0 = 0x00100000;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x00000030;
- cpu->id_mmfr1 = 0x00000000;
- cpu->id_mmfr2 = 0x00000000;
- cpu->id_mmfr3 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00000030;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x00000000;
+ cpu->isar.id_mmfr3 = 0x00000000;
cpu->isar.id_isar0 = 0x01141110;
cpu->isar.id_isar1 = 0x02111000;
cpu->isar.id_isar2 = 0x21112231;
@@ -2113,12 +2117,12 @@ static void cortex_m4_initfn(Object *obj)
cpu->isar.mvfr2 = 0x00000000;
cpu->id_pfr0 = 0x00000030;
cpu->id_pfr1 = 0x00000200;
- cpu->id_dfr0 = 0x00100000;
+ cpu->isar.id_dfr0 = 0x00100000;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x00000030;
- cpu->id_mmfr1 = 0x00000000;
- cpu->id_mmfr2 = 0x00000000;
- cpu->id_mmfr3 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00000030;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x00000000;
+ cpu->isar.id_mmfr3 = 0x00000000;
cpu->isar.id_isar0 = 0x01141110;
cpu->isar.id_isar1 = 0x02111000;
cpu->isar.id_isar2 = 0x21112231;
@@ -2144,12 +2148,12 @@ static void cortex_m7_initfn(Object *obj)
cpu->isar.mvfr2 = 0x00000040;
cpu->id_pfr0 = 0x00000030;
cpu->id_pfr1 = 0x00000200;
- cpu->id_dfr0 = 0x00100000;
+ cpu->isar.id_dfr0 = 0x00100000;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x00100030;
- cpu->id_mmfr1 = 0x00000000;
- cpu->id_mmfr2 = 0x01000000;
- cpu->id_mmfr3 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00100030;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x01000000;
+ cpu->isar.id_mmfr3 = 0x00000000;
cpu->isar.id_isar0 = 0x01101110;
cpu->isar.id_isar1 = 0x02112000;
cpu->isar.id_isar2 = 0x20232231;
@@ -2177,12 +2181,12 @@ static void cortex_m33_initfn(Object *obj)
cpu->isar.mvfr2 = 0x00000040;
cpu->id_pfr0 = 0x00000030;
cpu->id_pfr1 = 0x00000210;
- cpu->id_dfr0 = 0x00200000;
+ cpu->isar.id_dfr0 = 0x00200000;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x00101F40;
- cpu->id_mmfr1 = 0x00000000;
- cpu->id_mmfr2 = 0x01000000;
- cpu->id_mmfr3 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00101F40;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x01000000;
+ cpu->isar.id_mmfr3 = 0x00000000;
cpu->isar.id_isar0 = 0x01101110;
cpu->isar.id_isar1 = 0x02212000;
cpu->isar.id_isar2 = 0x20232232;
@@ -2229,12 +2233,12 @@ static void cortex_r5_initfn(Object *obj)
cpu->midr = 0x411fc153; /* r1p3 */
cpu->id_pfr0 = 0x0131;
cpu->id_pfr1 = 0x001;
- cpu->id_dfr0 = 0x010400;
+ cpu->isar.id_dfr0 = 0x010400;
cpu->id_afr0 = 0x0;
- cpu->id_mmfr0 = 0x0210030;
- cpu->id_mmfr1 = 0x00000000;
- cpu->id_mmfr2 = 0x01200000;
- cpu->id_mmfr3 = 0x0211;
+ cpu->isar.id_mmfr0 = 0x0210030;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x01200000;
+ cpu->isar.id_mmfr3 = 0x0211;
cpu->isar.id_isar0 = 0x02101111;
cpu->isar.id_isar1 = 0x13112111;
cpu->isar.id_isar2 = 0x21232141;
@@ -2284,18 +2288,18 @@ static void cortex_a8_initfn(Object *obj)
cpu->reset_sctlr = 0x00c50078;
cpu->id_pfr0 = 0x1031;
cpu->id_pfr1 = 0x11;
- cpu->id_dfr0 = 0x400;
+ cpu->isar.id_dfr0 = 0x400;
cpu->id_afr0 = 0;
- cpu->id_mmfr0 = 0x31100003;
- cpu->id_mmfr1 = 0x20000000;
- cpu->id_mmfr2 = 0x01202000;
- cpu->id_mmfr3 = 0x11;
+ cpu->isar.id_mmfr0 = 0x31100003;
+ cpu->isar.id_mmfr1 = 0x20000000;
+ cpu->isar.id_mmfr2 = 0x01202000;
+ cpu->isar.id_mmfr3 = 0x11;
cpu->isar.id_isar0 = 0x00101111;
cpu->isar.id_isar1 = 0x12112111;
cpu->isar.id_isar2 = 0x21232031;
cpu->isar.id_isar3 = 0x11112131;
cpu->isar.id_isar4 = 0x00111142;
- cpu->dbgdidr = 0x15141000;
+ cpu->isar.dbgdidr = 0x15141000;
cpu->clidr = (1 << 27) | (2 << 24) | 3;
cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */
cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */
@@ -2357,18 +2361,18 @@ static void cortex_a9_initfn(Object *obj)
cpu->reset_sctlr = 0x00c50078;
cpu->id_pfr0 = 0x1031;
cpu->id_pfr1 = 0x11;
- cpu->id_dfr0 = 0x000;
+ cpu->isar.id_dfr0 = 0x000;
cpu->id_afr0 = 0;
- cpu->id_mmfr0 = 0x00100103;
- cpu->id_mmfr1 = 0x20000000;
- cpu->id_mmfr2 = 0x01230000;
- cpu->id_mmfr3 = 0x00002111;
+ cpu->isar.id_mmfr0 = 0x00100103;
+ cpu->isar.id_mmfr1 = 0x20000000;
+ cpu->isar.id_mmfr2 = 0x01230000;
+ cpu->isar.id_mmfr3 = 0x00002111;
cpu->isar.id_isar0 = 0x00101111;
cpu->isar.id_isar1 = 0x13112111;
cpu->isar.id_isar2 = 0x21232041;
cpu->isar.id_isar3 = 0x11112131;
cpu->isar.id_isar4 = 0x00111142;
- cpu->dbgdidr = 0x35141000;
+ cpu->isar.dbgdidr = 0x35141000;
cpu->clidr = (1 << 27) | (1 << 24) | 3;
cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */
cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */
@@ -2422,12 +2426,12 @@ static void cortex_a7_initfn(Object *obj)
cpu->reset_sctlr = 0x00c50078;
cpu->id_pfr0 = 0x00001131;
cpu->id_pfr1 = 0x00011011;
- cpu->id_dfr0 = 0x02010555;
+ cpu->isar.id_dfr0 = 0x02010555;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x10101105;
- cpu->id_mmfr1 = 0x40000000;
- cpu->id_mmfr2 = 0x01240000;
- cpu->id_mmfr3 = 0x02102211;
+ cpu->isar.id_mmfr0 = 0x10101105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01240000;
+ cpu->isar.id_mmfr3 = 0x02102211;
/* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but
* table 4-41 gives 0x02101110, which includes the arm div insns.
*/
@@ -2436,7 +2440,7 @@ static void cortex_a7_initfn(Object *obj)
cpu->isar.id_isar2 = 0x21232041;
cpu->isar.id_isar3 = 0x11112131;
cpu->isar.id_isar4 = 0x10011142;
- cpu->dbgdidr = 0x3515f005;
+ cpu->isar.dbgdidr = 0x3515f005;
cpu->clidr = 0x0a200023;
cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
@@ -2468,18 +2472,18 @@ static void cortex_a15_initfn(Object *obj)
cpu->reset_sctlr = 0x00c50078;
cpu->id_pfr0 = 0x00001131;
cpu->id_pfr1 = 0x00011011;
- cpu->id_dfr0 = 0x02010555;
+ cpu->isar.id_dfr0 = 0x02010555;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x10201105;
- cpu->id_mmfr1 = 0x20000000;
- cpu->id_mmfr2 = 0x01240000;
- cpu->id_mmfr3 = 0x02102211;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x20000000;
+ cpu->isar.id_mmfr2 = 0x01240000;
+ cpu->isar.id_mmfr3 = 0x02102211;
cpu->isar.id_isar0 = 0x02101110;
cpu->isar.id_isar1 = 0x13112111;
cpu->isar.id_isar2 = 0x21232041;
cpu->isar.id_isar3 = 0x11112131;
cpu->isar.id_isar4 = 0x10011142;
- cpu->dbgdidr = 0x3515f021;
+ cpu->isar.dbgdidr = 0x3515f021;
cpu->clidr = 0x0a200023;
cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
@@ -2709,13 +2713,14 @@ static void arm_max_initfn(Object *obj)
t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */
cpu->isar.mvfr2 = t;
- t = cpu->id_mmfr3;
+ t = cpu->isar.id_mmfr3;
t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */
- cpu->id_mmfr3 = t;
+ cpu->isar.id_mmfr3 = t;
- t = cpu->id_mmfr4;
+ t = cpu->isar.id_mmfr4;
t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */
- cpu->id_mmfr4 = t;
+ t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
+ cpu->isar.id_mmfr4 = t;
}
#endif
}
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index e943ffe8a9..65171cb30e 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -853,6 +853,11 @@ struct ARMCPU {
* prefix means a constant register.
* Some of these registers are split out into a substructure that
* is shared with the translators to control the ISA.
+ *
+ * Note that if you add an ID register to the ARMISARegisters struct
+ * you need to also update the 32-bit and 64-bit versions of the
+ * kvm_arm_get_host_cpu_features() function to correctly populate the
+ * field by reading the value from the KVM vCPU.
*/
struct ARMISARegisters {
uint32_t id_isar0;
@@ -862,9 +867,16 @@ struct ARMCPU {
uint32_t id_isar4;
uint32_t id_isar5;
uint32_t id_isar6;
+ uint32_t id_mmfr0;
+ uint32_t id_mmfr1;
+ uint32_t id_mmfr2;
+ uint32_t id_mmfr3;
+ uint32_t id_mmfr4;
uint32_t mvfr0;
uint32_t mvfr1;
uint32_t mvfr2;
+ uint32_t id_dfr0;
+ uint32_t dbgdidr;
uint64_t id_aa64isar0;
uint64_t id_aa64isar1;
uint64_t id_aa64pfr0;
@@ -872,6 +884,8 @@ struct ARMCPU {
uint64_t id_aa64mmfr0;
uint64_t id_aa64mmfr1;
uint64_t id_aa64mmfr2;
+ uint64_t id_aa64dfr0;
+ uint64_t id_aa64dfr1;
} isar;
uint32_t midr;
uint32_t revidr;
@@ -880,20 +894,11 @@ struct ARMCPU {
uint32_t reset_sctlr;
uint32_t id_pfr0;
uint32_t id_pfr1;
- uint32_t id_dfr0;
uint64_t pmceid0;
uint64_t pmceid1;
uint32_t id_afr0;
- uint32_t id_mmfr0;
- uint32_t id_mmfr1;
- uint32_t id_mmfr2;
- uint32_t id_mmfr3;
- uint32_t id_mmfr4;
- uint64_t id_aa64dfr0;
- uint64_t id_aa64dfr1;
uint64_t id_aa64afr0;
uint64_t id_aa64afr1;
- uint32_t dbgdidr;
uint32_t clidr;
uint64_t mp_affinity; /* MP ID without feature bits */
/* The elements of this array are the CCSIDR values for each cache,
@@ -1821,6 +1826,16 @@ FIELD(ID_AA64MMFR2, BBM, 52, 4)
FIELD(ID_AA64MMFR2, EVT, 56, 4)
FIELD(ID_AA64MMFR2, E0PD, 60, 4)
+FIELD(ID_AA64DFR0, DEBUGVER, 0, 4)
+FIELD(ID_AA64DFR0, TRACEVER, 4, 4)
+FIELD(ID_AA64DFR0, PMUVER, 8, 4)
+FIELD(ID_AA64DFR0, BRPS, 12, 4)
+FIELD(ID_AA64DFR0, WRPS, 20, 4)
+FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4)
+FIELD(ID_AA64DFR0, PMSVER, 32, 4)
+FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4)
+FIELD(ID_AA64DFR0, TRACEFILT, 40, 4)
+
FIELD(ID_DFR0, COPDBG, 0, 4)
FIELD(ID_DFR0, COPSDBG, 4, 4)
FIELD(ID_DFR0, MMAPDBG, 8, 4)
@@ -1830,6 +1845,13 @@ FIELD(ID_DFR0, MPROFDBG, 20, 4)
FIELD(ID_DFR0, PERFMON, 24, 4)
FIELD(ID_DFR0, TRACEFILT, 28, 4)
+FIELD(DBGDIDR, SE_IMP, 12, 1)
+FIELD(DBGDIDR, NSUHD_IMP, 14, 1)
+FIELD(DBGDIDR, VERSION, 16, 4)
+FIELD(DBGDIDR, CTX_CMPS, 20, 4)
+FIELD(DBGDIDR, BRPS, 24, 4)
+FIELD(DBGDIDR, WRPS, 28, 4)
+
FIELD(MVFR0, SIMDREG, 0, 4)
FIELD(MVFR0, FPSP, 4, 4)
FIELD(MVFR0, FPDP, 8, 4)
@@ -3325,19 +3347,35 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno)
extern const uint64_t pred_esz_masks[4];
/*
+ * Naming convention for isar_feature functions:
+ * Functions which test 32-bit ID registers should have _aa32_ in
+ * their name. Functions which test 64-bit ID registers should have
+ * _aa64_ in their name. These must only be used in code where we
+ * know for certain that the CPU has AArch32 or AArch64 respectively
+ * or where the correct answer for a CPU which doesn't implement that
+ * CPU state is "false" (eg when generating A32 or A64 code, if adding
+ * system registers that are specific to that CPU state, for "should
+ * we let this system register bit be set" tests where the 32-bit
+ * flavour of the register doesn't have the bit, and so on).
+ * Functions which simply ask "does this feature exist at all" have
+ * _any_ in their name, and always return the logical OR of the _aa64_
+ * and the _aa32_ function.
+ */
+
+/*
* 32-bit feature tests via id registers.
*/
-static inline bool isar_feature_thumb_div(const ARMISARegisters *id)
+static inline bool isar_feature_aa32_thumb_div(const ARMISARegisters *id)
{
return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0;
}
-static inline bool isar_feature_arm_div(const ARMISARegisters *id)
+static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id)
{
return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1;
}
-static inline bool isar_feature_jazelle(const ARMISARegisters *id)
+static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id)
{
return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0;
}
@@ -3412,21 +3450,21 @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;
}
-static inline bool isar_feature_aa32_fp_d32(const ARMISARegisters *id)
+static inline bool isar_feature_aa32_simd_r32(const ARMISARegisters *id)
{
/* Return true if D16-D31 are implemented */
- return FIELD_EX64(id->mvfr0, MVFR0, SIMDREG) >= 2;
+ return FIELD_EX32(id->mvfr0, MVFR0, SIMDREG) >= 2;
}
static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id)
{
- return FIELD_EX64(id->mvfr0, MVFR0, FPSHVEC) > 0;
+ return FIELD_EX32(id->mvfr0, MVFR0, FPSHVEC) > 0;
}
static inline bool isar_feature_aa32_fpdp(const ARMISARegisters *id)
{
/* Return true if CPU supports double precision floating point */
- return FIELD_EX64(id->mvfr0, MVFR0, FPDP) > 0;
+ return FIELD_EX32(id->mvfr0, MVFR0, FPDP) > 0;
}
/*
@@ -3436,42 +3474,66 @@ static inline bool isar_feature_aa32_fpdp(const ARMISARegisters *id)
*/
static inline bool isar_feature_aa32_fp16_spconv(const ARMISARegisters *id)
{
- return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 0;
+ return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 0;
}
static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)
{
- return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 1;
+ return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1;
}
static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id)
{
- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 1;
+ return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 1;
}
static inline bool isar_feature_aa32_vcvt_dr(const ARMISARegisters *id)
{
- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 2;
+ return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 2;
}
static inline bool isar_feature_aa32_vrint(const ARMISARegisters *id)
{
- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 3;
+ return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 3;
}
static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id)
{
- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 4;
+ return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 4;
}
static inline bool isar_feature_aa32_pan(const ARMISARegisters *id)
{
- return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) != 0;
+ return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0;
}
static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id)
{
- return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) >= 2;
+ return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2;
+}
+
+static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id)
+{
+ /* 0xf means "non-standard IMPDEF PMU" */
+ return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 &&
+ FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf;
+}
+
+static inline bool isar_feature_aa32_pmu_8_4(const ARMISARegisters *id)
+{
+ /* 0xf means "non-standard IMPDEF PMU" */
+ return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 5 &&
+ FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf;
+}
+
+static inline bool isar_feature_aa32_hpd(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr4, ID_MMFR4, HPDS) != 0;
+}
+
+static inline bool isar_feature_aa32_ac2(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr4, ID_MMFR4, AC2) != 0;
}
/*
@@ -3653,6 +3715,41 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0;
}
+static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 &&
+ FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf;
+}
+
+static inline bool isar_feature_aa64_pmu_8_4(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 5 &&
+ FIELD_EX32(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf;
+}
+
+/*
+ * Feature tests for "does this exist in either 32-bit or 64-bit?"
+ */
+static inline bool isar_feature_any_fp16(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_fp16(id) || isar_feature_aa32_fp16_arith(id);
+}
+
+static inline bool isar_feature_any_predinv(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_predinv(id) || isar_feature_aa32_predinv(id);
+}
+
+static inline bool isar_feature_any_pmu_8_1(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_pmu_8_1(id) || isar_feature_aa32_pmu_8_1(id);
+}
+
+static inline bool isar_feature_any_pmu_8_4(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_pmu_8_4(id) || isar_feature_aa32_pmu_8_4(id);
+}
+
/*
* Forward to the above feature tests given an ARMCPU pointer.
*/
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index f0d98bc79d..0929401a4d 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -121,12 +121,12 @@ static void aarch64_a57_initfn(Object *obj)
cpu->reset_sctlr = 0x00c50838;
cpu->id_pfr0 = 0x00000131;
cpu->id_pfr1 = 0x00011011;
- cpu->id_dfr0 = 0x03010066;
+ cpu->isar.id_dfr0 = 0x03010066;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x10101105;
- cpu->id_mmfr1 = 0x40000000;
- cpu->id_mmfr2 = 0x01260000;
- cpu->id_mmfr3 = 0x02102211;
+ cpu->isar.id_mmfr0 = 0x10101105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02102211;
cpu->isar.id_isar0 = 0x02101110;
cpu->isar.id_isar1 = 0x13112111;
cpu->isar.id_isar2 = 0x21232042;
@@ -135,10 +135,10 @@ static void aarch64_a57_initfn(Object *obj)
cpu->isar.id_isar5 = 0x00011121;
cpu->isar.id_isar6 = 0;
cpu->isar.id_aa64pfr0 = 0x00002222;
- cpu->id_aa64dfr0 = 0x10305106;
+ cpu->isar.id_aa64dfr0 = 0x10305106;
cpu->isar.id_aa64isar0 = 0x00011120;
cpu->isar.id_aa64mmfr0 = 0x00001124;
- cpu->dbgdidr = 0x3516d000;
+ cpu->isar.dbgdidr = 0x3516d000;
cpu->clidr = 0x0a200023;
cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
@@ -175,12 +175,12 @@ static void aarch64_a53_initfn(Object *obj)
cpu->reset_sctlr = 0x00c50838;
cpu->id_pfr0 = 0x00000131;
cpu->id_pfr1 = 0x00011011;
- cpu->id_dfr0 = 0x03010066;
+ cpu->isar.id_dfr0 = 0x03010066;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x10101105;
- cpu->id_mmfr1 = 0x40000000;
- cpu->id_mmfr2 = 0x01260000;
- cpu->id_mmfr3 = 0x02102211;
+ cpu->isar.id_mmfr0 = 0x10101105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02102211;
cpu->isar.id_isar0 = 0x02101110;
cpu->isar.id_isar1 = 0x13112111;
cpu->isar.id_isar2 = 0x21232042;
@@ -189,10 +189,10 @@ static void aarch64_a53_initfn(Object *obj)
cpu->isar.id_isar5 = 0x00011121;
cpu->isar.id_isar6 = 0;
cpu->isar.id_aa64pfr0 = 0x00002222;
- cpu->id_aa64dfr0 = 0x10305106;
+ cpu->isar.id_aa64dfr0 = 0x10305106;
cpu->isar.id_aa64isar0 = 0x00011120;
cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */
- cpu->dbgdidr = 0x3516d000;
+ cpu->isar.dbgdidr = 0x3516d000;
cpu->clidr = 0x0a200023;
cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */
@@ -228,12 +228,12 @@ static void aarch64_a72_initfn(Object *obj)
cpu->reset_sctlr = 0x00c50838;
cpu->id_pfr0 = 0x00000131;
cpu->id_pfr1 = 0x00011011;
- cpu->id_dfr0 = 0x03010066;
+ cpu->isar.id_dfr0 = 0x03010066;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x10201105;
- cpu->id_mmfr1 = 0x40000000;
- cpu->id_mmfr2 = 0x01260000;
- cpu->id_mmfr3 = 0x02102211;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02102211;
cpu->isar.id_isar0 = 0x02101110;
cpu->isar.id_isar1 = 0x13112111;
cpu->isar.id_isar2 = 0x21232042;
@@ -241,10 +241,10 @@ static void aarch64_a72_initfn(Object *obj)
cpu->isar.id_isar4 = 0x00011142;
cpu->isar.id_isar5 = 0x00011121;
cpu->isar.id_aa64pfr0 = 0x00002222;
- cpu->id_aa64dfr0 = 0x10305106;
+ cpu->isar.id_aa64dfr0 = 0x10305106;
cpu->isar.id_aa64isar0 = 0x00011120;
cpu->isar.id_aa64mmfr0 = 0x00001124;
- cpu->dbgdidr = 0x3516d000;
+ cpu->isar.dbgdidr = 0x3516d000;
cpu->clidr = 0x0a200023;
cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
@@ -699,9 +699,21 @@ static void aarch64_max_initfn(Object *obj)
u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1);
cpu->isar.id_isar6 = u;
- u = cpu->id_mmfr3;
+ u = cpu->isar.id_mmfr3;
u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */
- cpu->id_mmfr3 = u;
+ cpu->isar.id_mmfr3 = u;
+
+ u = cpu->isar.id_mmfr4;
+ u = FIELD_DP32(u, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
+ cpu->isar.id_mmfr4 = u;
+
+ u = cpu->isar.id_aa64dfr0;
+ u = FIELD_DP64(u, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */
+ cpu->isar.id_aa64dfr0 = u;
+
+ u = cpu->isar.id_dfr0;
+ u = FIELD_DP32(u, ID_DFR0, PERFMON, 5); /* v8.4-PMU */
+ cpu->isar.id_dfr0 = u;
/*
* FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet,
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 2e3e90c6a5..2ff72d47d1 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -16,8 +16,8 @@ static bool linked_bp_matches(ARMCPU *cpu, int lbn)
{
CPUARMState *env = &cpu->env;
uint64_t bcr = env->cp15.dbgbcr[lbn];
- int brps = extract32(cpu->dbgdidr, 24, 4);
- int ctx_cmps = extract32(cpu->dbgdidr, 20, 4);
+ int brps = arm_num_brps(cpu);
+ int ctx_cmps = arm_num_ctx_cmps(cpu);
int bt;
uint32_t contextidr;
uint64_t hcr_el2;
@@ -29,7 +29,7 @@ static bool linked_bp_matches(ARMCPU *cpu, int lbn)
* case DBGWCR<n>_EL1.LBN must indicate that breakpoint).
* We choose the former.
*/
- if (lbn > brps || lbn < (brps - ctx_cmps)) {
+ if (lbn >= brps || lbn < (brps - ctx_cmps)) {
return false;
}
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 9e79182ab4..2f47279155 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -1574,3 +1574,5 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 366dbcf460..79db169e04 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -25,6 +25,7 @@
#include "hw/semihosting/semihost.h"
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
#include "qemu/range.h"
#include "qapi/qapi-commands-machine-target.h"
#include "qapi/error.h"
@@ -49,10 +50,10 @@ static void switch_mode(CPUARMState *env, int mode);
static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
{
- int nregs;
+ ARMCPU *cpu = env_archcpu(env);
+ int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16;
/* VFP data registers are always little-endian. */
- nregs = arm_feature(env, ARM_FEATURE_VFP3) ? 32 : 16;
if (reg < nregs) {
stq_le_p(buf, *aa32_vfp_dreg(env, reg));
return 8;
@@ -77,9 +78,9 @@ static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
{
- int nregs;
+ ARMCPU *cpu = env_archcpu(env);
+ int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16;
- nregs = arm_feature(env, ARM_FEATURE_VFP3) ? 32 : 16;
if (reg < nregs) {
*aa32_vfp_dreg(env, reg) = ldq_le_p(buf);
return 8;
@@ -905,8 +906,7 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
/* VFPv3 and upwards with NEON implement 32 double precision
* registers (D0-D31).
*/
- if (!arm_feature(env, ARM_FEATURE_NEON) ||
- !arm_feature(env, ARM_FEATURE_VFP3)) {
+ if (!cpu_isar_feature(aa32_simd_r32, env_archcpu(env))) {
/* D32DIS [30] is RAO/WI if D16-31 are not implemented. */
value |= (1 << 30);
}
@@ -1016,11 +1016,17 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
#define PMCRN_MASK 0xf800
#define PMCRN_SHIFT 11
#define PMCRLC 0x40
-#define PMCRDP 0x10
+#define PMCRDP 0x20
+#define PMCRX 0x10
#define PMCRD 0x8
#define PMCRC 0x4
#define PMCRP 0x2
#define PMCRE 0x1
+/*
+ * Mask of PMCR bits writeable by guest (not including WO bits like C, P,
+ * which can be written as 1 to trigger behaviour but which stay RAZ).
+ */
+#define PMCR_WRITEABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE)
#define PMXEVTYPER_P 0x80000000
#define PMXEVTYPER_U 0x40000000
@@ -1123,6 +1129,30 @@ static int64_t instructions_ns_per(uint64_t icount)
}
#endif
+static bool pmu_8_1_events_supported(CPUARMState *env)
+{
+ /* For events which are supported in any v8.1 PMU */
+ return cpu_isar_feature(any_pmu_8_1, env_archcpu(env));
+}
+
+static bool pmu_8_4_events_supported(CPUARMState *env)
+{
+ /* For events which are supported in any v8.1 PMU */
+ return cpu_isar_feature(any_pmu_8_4, env_archcpu(env));
+}
+
+static uint64_t zero_event_get_count(CPUARMState *env)
+{
+ /* For events which on QEMU never fire, so their count is always zero */
+ return 0;
+}
+
+static int64_t zero_event_ns_per(uint64_t cycles)
+{
+ /* An event which never fires can never overflow */
+ return -1;
+}
+
static const pm_event pm_events[] = {
{ .number = 0x000, /* SW_INCR */
.supported = event_always_supported,
@@ -1139,8 +1169,23 @@ static const pm_event pm_events[] = {
.supported = event_always_supported,
.get_count = cycles_get_count,
.ns_per_count = cycles_ns_per,
- }
+ },
#endif
+ { .number = 0x023, /* STALL_FRONTEND */
+ .supported = pmu_8_1_events_supported,
+ .get_count = zero_event_get_count,
+ .ns_per_count = zero_event_ns_per,
+ },
+ { .number = 0x024, /* STALL_BACKEND */
+ .supported = pmu_8_1_events_supported,
+ .get_count = zero_event_get_count,
+ .ns_per_count = zero_event_ns_per,
+ },
+ { .number = 0x03c, /* STALL */
+ .supported = pmu_8_4_events_supported,
+ .get_count = zero_event_get_count,
+ .ns_per_count = zero_event_ns_per,
+ },
};
/*
@@ -1149,7 +1194,7 @@ static const pm_event pm_events[] = {
* should first be updated to something sparse instead of the current
* supported_event_map[] array.
*/
-#define MAX_EVENT_ID 0x11
+#define MAX_EVENT_ID 0x3c
#define UNSUPPORTED_EVENT UINT16_MAX
static uint16_t supported_event_map[MAX_EVENT_ID + 1];
@@ -1536,9 +1581,8 @@ static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
}
}
- /* only the DP, X, D and E bits are writable */
- env->cp15.c9_pmcr &= ~0x39;
- env->cp15.c9_pmcr |= (value & 0x39);
+ env->cp15.c9_pmcr &= ~PMCR_WRITEABLE_MASK;
+ env->cp15.c9_pmcr |= (value & PMCR_WRITEABLE_MASK);
pmu_op_finish(env);
}
@@ -6251,26 +6295,16 @@ static void define_debug_regs(ARMCPU *cpu)
ARMCPRegInfo dbgdidr = {
.name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
.access = PL0_R, .accessfn = access_tda,
- .type = ARM_CP_CONST, .resetvalue = cpu->dbgdidr,
+ .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdidr,
};
/* Note that all these register fields hold "number of Xs minus 1". */
- brps = extract32(cpu->dbgdidr, 24, 4);
- wrps = extract32(cpu->dbgdidr, 28, 4);
- ctx_cmps = extract32(cpu->dbgdidr, 20, 4);
+ brps = arm_num_brps(cpu);
+ wrps = arm_num_wrps(cpu);
+ ctx_cmps = arm_num_ctx_cmps(cpu);
assert(ctx_cmps <= brps);
- /* The DBGDIDR and ID_AA64DFR0_EL1 define various properties
- * of the debug registers such as number of breakpoints;
- * check that if they both exist then they agree.
- */
- if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
- assert(extract32(cpu->id_aa64dfr0, 12, 4) == brps);
- assert(extract32(cpu->id_aa64dfr0, 20, 4) == wrps);
- assert(extract32(cpu->id_aa64dfr0, 28, 4) == ctx_cmps);
- }
-
define_one_arm_cp_reg(cpu, &dbgdidr);
define_arm_cp_regs(cpu, debug_cp_reginfo);
@@ -6278,7 +6312,7 @@ static void define_debug_regs(ARMCPU *cpu)
define_arm_cp_regs(cpu, debug_lpae_cp_reginfo);
}
- for (i = 0; i < brps + 1; i++) {
+ for (i = 0; i < brps; i++) {
ARMCPRegInfo dbgregs[] = {
{ .name = "DBGBVR", .state = ARM_CP_STATE_BOTH,
.cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4,
@@ -6297,7 +6331,7 @@ static void define_debug_regs(ARMCPU *cpu)
define_arm_cp_regs(cpu, dbgregs);
}
- for (i = 0; i < wrps + 1; i++) {
+ for (i = 0; i < wrps; i++) {
ARMCPRegInfo dbgregs[] = {
{ .name = "DBGWVR", .state = ARM_CP_STATE_BOTH,
.cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6,
@@ -6317,6 +6351,96 @@ static void define_debug_regs(ARMCPU *cpu)
}
}
+static void define_pmu_regs(ARMCPU *cpu)
+{
+ /*
+ * v7 performance monitor control register: same implementor
+ * field as main ID register, and we implement four counters in
+ * addition to the cycle count register.
+ */
+ unsigned int i, pmcrn = 4;
+ ARMCPRegInfo pmcr = {
+ .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
+ .access = PL0_RW,
+ .type = ARM_CP_IO | ARM_CP_ALIAS,
+ .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr),
+ .accessfn = pmreg_access, .writefn = pmcr_write,
+ .raw_writefn = raw_write,
+ };
+ ARMCPRegInfo pmcr64 = {
+ .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0,
+ .access = PL0_RW, .accessfn = pmreg_access,
+ .type = ARM_CP_IO,
+ .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
+ .resetvalue = (cpu->midr & 0xff000000) | (pmcrn << PMCRN_SHIFT) |
+ PMCRLC,
+ .writefn = pmcr_write, .raw_writefn = raw_write,
+ };
+ define_one_arm_cp_reg(cpu, &pmcr);
+ define_one_arm_cp_reg(cpu, &pmcr64);
+ for (i = 0; i < pmcrn; i++) {
+ char *pmevcntr_name = g_strdup_printf("PMEVCNTR%d", i);
+ char *pmevcntr_el0_name = g_strdup_printf("PMEVCNTR%d_EL0", i);
+ char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i);
+ char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i);
+ ARMCPRegInfo pmev_regs[] = {
+ { .name = pmevcntr_name, .cp = 15, .crn = 14,
+ .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
+ .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
+ .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
+ .accessfn = pmreg_access },
+ { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)),
+ .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
+ .type = ARM_CP_IO,
+ .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
+ .raw_readfn = pmevcntr_rawread,
+ .raw_writefn = pmevcntr_rawwrite },
+ { .name = pmevtyper_name, .cp = 15, .crn = 14,
+ .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
+ .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
+ .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
+ .accessfn = pmreg_access },
+ { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)),
+ .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
+ .type = ARM_CP_IO,
+ .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
+ .raw_writefn = pmevtyper_rawwrite },
+ REGINFO_SENTINEL
+ };
+ define_arm_cp_regs(cpu, pmev_regs);
+ g_free(pmevcntr_name);
+ g_free(pmevcntr_el0_name);
+ g_free(pmevtyper_name);
+ g_free(pmevtyper_el0_name);
+ }
+ if (cpu_isar_feature(aa32_pmu_8_1, cpu)) {
+ ARMCPRegInfo v81_pmu_regs[] = {
+ { .name = "PMCEID2", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4,
+ .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
+ .resetvalue = extract64(cpu->pmceid0, 32, 32) },
+ { .name = "PMCEID3", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 5,
+ .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
+ .resetvalue = extract64(cpu->pmceid1, 32, 32) },
+ REGINFO_SENTINEL
+ };
+ define_arm_cp_regs(cpu, v81_pmu_regs);
+ }
+ if (cpu_isar_feature(any_pmu_8_4, cpu)) {
+ static const ARMCPRegInfo v84_pmmir = {
+ .name = "PMMIR_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 6,
+ .access = PL1_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
+ .resetvalue = 0
+ };
+ define_one_arm_cp_reg(cpu, &v84_pmmir);
+ }
+}
+
/* We don't know until after realize whether there's a GICv3
* attached, and that is what registers the gicv3 sysregs.
* So we have to fill in the GIC fields in ID_PFR/ID_PFR1_EL1/ID_AA64PFR0_EL1
@@ -6737,6 +6861,27 @@ static const ARMCPRegInfo ats1cp_reginfo[] = {
};
#endif
+/*
+ * ACTLR2 and HACTLR2 map to ACTLR_EL1[63:32] and
+ * ACTLR_EL2[63:32]. They exist only if the ID_MMFR4.AC2 field
+ * is non-zero, which is never for ARMv7, optionally in ARMv8
+ * and mandatorily for ARMv8.2 and up.
+ * ACTLR2 is banked for S and NS if EL3 is AArch32. Since QEMU's
+ * implementation is RAZ/WI we can ignore this detail, as we
+ * do for ACTLR.
+ */
+static const ARMCPRegInfo actlr2_hactlr2_reginfo[] = {
+ { .name = "ACTLR2", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 3,
+ .access = PL1_RW, .type = ARM_CP_CONST,
+ .resetvalue = 0 },
+ { .name = "HACTLR2", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 3,
+ .access = PL2_RW, .type = ARM_CP_CONST,
+ .resetvalue = 0 },
+ REGINFO_SENTINEL
+};
+
void register_cp_regs_for_features(ARMCPU *cpu)
{
/* Register all the coprocessor registers based on feature bits */
@@ -6775,7 +6920,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->id_dfr0 },
+ .resetvalue = cpu->isar.id_dfr0 },
{ .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -6785,22 +6930,22 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->id_mmfr0 },
+ .resetvalue = cpu->isar.id_mmfr0 },
{ .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->id_mmfr1 },
+ .resetvalue = cpu->isar.id_mmfr1 },
{ .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->id_mmfr2 },
+ .resetvalue = cpu->isar.id_mmfr2 },
{ .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->id_mmfr3 },
+ .resetvalue = cpu->isar.id_mmfr3 },
{ .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -6835,7 +6980,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->id_mmfr4 },
+ .resetvalue = cpu->isar.id_mmfr4 },
{ .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -6859,67 +7004,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
define_arm_cp_regs(cpu, pmovsset_cp_reginfo);
}
if (arm_feature(env, ARM_FEATURE_V7)) {
- /* v7 performance monitor control register: same implementor
- * field as main ID register, and we implement four counters in
- * addition to the cycle count register.
- */
- unsigned int i, pmcrn = 4;
- ARMCPRegInfo pmcr = {
- .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
- .access = PL0_RW,
- .type = ARM_CP_IO | ARM_CP_ALIAS,
- .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr),
- .accessfn = pmreg_access, .writefn = pmcr_write,
- .raw_writefn = raw_write,
- };
- ARMCPRegInfo pmcr64 = {
- .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0,
- .access = PL0_RW, .accessfn = pmreg_access,
- .type = ARM_CP_IO,
- .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
- .resetvalue = (cpu->midr & 0xff000000) | (pmcrn << PMCRN_SHIFT),
- .writefn = pmcr_write, .raw_writefn = raw_write,
- };
- define_one_arm_cp_reg(cpu, &pmcr);
- define_one_arm_cp_reg(cpu, &pmcr64);
- for (i = 0; i < pmcrn; i++) {
- char *pmevcntr_name = g_strdup_printf("PMEVCNTR%d", i);
- char *pmevcntr_el0_name = g_strdup_printf("PMEVCNTR%d_EL0", i);
- char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i);
- char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i);
- ARMCPRegInfo pmev_regs[] = {
- { .name = pmevcntr_name, .cp = 15, .crn = 14,
- .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
- .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
- .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
- .accessfn = pmreg_access },
- { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)),
- .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
- .type = ARM_CP_IO,
- .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
- .raw_readfn = pmevcntr_rawread,
- .raw_writefn = pmevcntr_rawwrite },
- { .name = pmevtyper_name, .cp = 15, .crn = 14,
- .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
- .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
- .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
- .accessfn = pmreg_access },
- { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)),
- .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
- .type = ARM_CP_IO,
- .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
- .raw_writefn = pmevtyper_rawwrite },
- REGINFO_SENTINEL
- };
- define_arm_cp_regs(cpu, pmev_regs);
- g_free(pmevcntr_name);
- g_free(pmevcntr_el0_name);
- g_free(pmevtyper_name);
- g_free(pmevtyper_el0_name);
- }
ARMCPRegInfo clidr = {
.name = "CLIDR", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1,
@@ -6930,24 +7014,10 @@ void register_cp_regs_for_features(ARMCPU *cpu)
define_one_arm_cp_reg(cpu, &clidr);
define_arm_cp_regs(cpu, v7_cp_reginfo);
define_debug_regs(cpu);
+ define_pmu_regs(cpu);
} else {
define_arm_cp_regs(cpu, not_v7_cp_reginfo);
}
- if (FIELD_EX32(cpu->id_dfr0, ID_DFR0, PERFMON) >= 4 &&
- FIELD_EX32(cpu->id_dfr0, ID_DFR0, PERFMON) != 0xf) {
- ARMCPRegInfo v81_pmu_regs[] = {
- { .name = "PMCEID2", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4,
- .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
- .resetvalue = extract64(cpu->pmceid0, 32, 32) },
- { .name = "PMCEID3", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 5,
- .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
- .resetvalue = extract64(cpu->pmceid1, 32, 32) },
- REGINFO_SENTINEL
- };
- define_arm_cp_regs(cpu, v81_pmu_regs);
- }
if (arm_feature(env, ARM_FEATURE_V8)) {
/* AArch64 ID registers, which all have impdef reset values.
* Note that within the ID register ranges the unused slots
@@ -7005,12 +7075,12 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->id_aa64dfr0 },
+ .resetvalue = cpu->isar.id_aa64dfr0 },
{ .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->id_aa64dfr1 },
+ .resetvalue = cpu->isar.id_aa64dfr1 },
{ .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -7358,8 +7428,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
} else {
define_arm_cp_regs(cpu, vmsa_pmsa_cp_reginfo);
define_arm_cp_regs(cpu, vmsa_cp_reginfo);
- /* TTCBR2 is introduced with ARMv8.2-A32HPD. */
- if (FIELD_EX32(cpu->id_mmfr4, ID_MMFR4, HPDS) != 0) {
+ /* TTCBR2 is introduced with ARMv8.2-AA32HPD. */
+ if (cpu_isar_feature(aa32_hpd, cpu)) {
define_one_arm_cp_reg(cpu, &ttbcr2_reginfo);
}
}
@@ -7396,7 +7466,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
if (arm_feature(env, ARM_FEATURE_LPAE)) {
define_arm_cp_regs(cpu, lpae_cp_reginfo);
}
- if (cpu_isar_feature(jazelle, cpu)) {
+ if (cpu_isar_feature(aa32_jazelle, cpu)) {
define_arm_cp_regs(cpu, jazelle_regs);
}
/* Slightly awkwardly, the OMAP and StrongARM cores need all of
@@ -7573,15 +7643,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
REGINFO_SENTINEL
};
define_arm_cp_regs(cpu, auxcr_reginfo);
- if (arm_feature(env, ARM_FEATURE_V8)) {
- /* HACTLR2 maps to ACTLR_EL2[63:32] and is not in ARMv7 */
- ARMCPRegInfo hactlr2_reginfo = {
- .name = "HACTLR2", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 3,
- .access = PL2_RW, .type = ARM_CP_CONST,
- .resetvalue = 0
- };
- define_one_arm_cp_reg(cpu, &hactlr2_reginfo);
+ if (cpu_isar_feature(aa32_ac2, cpu)) {
+ define_arm_cp_regs(cpu, actlr2_hactlr2_reginfo);
}
}
@@ -7721,14 +7784,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
#endif /*CONFIG_USER_ONLY*/
#endif
- /*
- * While all v8.0 cpus support aarch64, QEMU does have configurations
- * that do not set ID_AA64ISAR1, e.g. user-only qemu-arm -cpu max,
- * which will set ID_ISAR6.
- */
- if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)
- ? cpu_isar_feature(aa64_predinv, cpu)
- : cpu_isar_feature(aa32_predinv, cpu)) {
+ if (cpu_isar_feature(any_predinv, cpu)) {
define_arm_cp_regs(cpu, predinv_reginfo);
}
@@ -7755,7 +7811,7 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
} else if (arm_feature(env, ARM_FEATURE_NEON)) {
gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
51, "arm-neon.xml", 0);
- } else if (arm_feature(env, ARM_FEATURE_VFP3)) {
+ } else if (cpu_isar_feature(aa32_simd_r32, cpu)) {
gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
35, "arm-vfp3.xml", 0);
} else if (arm_feature(env, ARM_FEATURE_VFP)) {
@@ -8858,7 +8914,7 @@ static void take_aarch32_exception(CPUARMState *env, int new_mode,
env->elr_el[2] = env->regs[15];
} else {
/* CPSR.PAN is normally preserved preserved unless... */
- if (cpu_isar_feature(aa64_pan, env_archcpu(env))) {
+ if (cpu_isar_feature(aa32_pan, env_archcpu(env))) {
switch (new_el) {
case 3:
if (!arm_is_secure_below_el3(env)) {
@@ -10234,58 +10290,82 @@ static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs)
}
#endif /* !CONFIG_USER_ONLY */
-ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
- ARMMMUIdx mmu_idx)
+static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
{
- uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
- bool tbi, tbid, epd, hpd, using16k, using64k;
- int select, tsz;
+ if (regime_has_2_ranges(mmu_idx)) {
+ return extract64(tcr, 37, 2);
+ } else if (mmu_idx == ARMMMUIdx_Stage2) {
+ return 0; /* VTCR_EL2 */
+ } else {
+ return extract32(tcr, 20, 1);
+ }
+}
- /*
- * Bit 55 is always between the two regions, and is canonical for
- * determining if address tagging is enabled.
- */
- select = extract64(va, 55, 1);
+static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx)
+{
+ if (regime_has_2_ranges(mmu_idx)) {
+ return extract64(tcr, 51, 2);
+ } else if (mmu_idx == ARMMMUIdx_Stage2) {
+ return 0; /* VTCR_EL2 */
+ } else {
+ return extract32(tcr, 29, 1);
+ }
+}
+
+ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
+ ARMMMUIdx mmu_idx, bool data)
+{
+ uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
+ bool epd, hpd, using16k, using64k;
+ int select, tsz, tbi;
if (!regime_has_2_ranges(mmu_idx)) {
+ select = 0;
tsz = extract32(tcr, 0, 6);
using64k = extract32(tcr, 14, 1);
using16k = extract32(tcr, 15, 1);
if (mmu_idx == ARMMMUIdx_Stage2) {
/* VTCR_EL2 */
- tbi = tbid = hpd = false;
+ hpd = false;
} else {
- tbi = extract32(tcr, 20, 1);
hpd = extract32(tcr, 24, 1);
- tbid = extract32(tcr, 29, 1);
}
epd = false;
- } else if (!select) {
- tsz = extract32(tcr, 0, 6);
- epd = extract32(tcr, 7, 1);
- using64k = extract32(tcr, 14, 1);
- using16k = extract32(tcr, 15, 1);
- tbi = extract64(tcr, 37, 1);
- hpd = extract64(tcr, 41, 1);
- tbid = extract64(tcr, 51, 1);
} else {
- int tg = extract32(tcr, 30, 2);
- using16k = tg == 1;
- using64k = tg == 3;
- tsz = extract32(tcr, 16, 6);
- epd = extract32(tcr, 23, 1);
- tbi = extract64(tcr, 38, 1);
- hpd = extract64(tcr, 42, 1);
- tbid = extract64(tcr, 52, 1);
+ /*
+ * Bit 55 is always between the two regions, and is canonical for
+ * determining if address tagging is enabled.
+ */
+ select = extract64(va, 55, 1);
+ if (!select) {
+ tsz = extract32(tcr, 0, 6);
+ epd = extract32(tcr, 7, 1);
+ using64k = extract32(tcr, 14, 1);
+ using16k = extract32(tcr, 15, 1);
+ hpd = extract64(tcr, 41, 1);
+ } else {
+ int tg = extract32(tcr, 30, 2);
+ using16k = tg == 1;
+ using64k = tg == 3;
+ tsz = extract32(tcr, 16, 6);
+ epd = extract32(tcr, 23, 1);
+ hpd = extract64(tcr, 42, 1);
+ }
}
tsz = MIN(tsz, 39); /* TODO: ARMv8.4-TTST */
tsz = MAX(tsz, 16); /* TODO: ARMv8.2-LVA */
+ /* Present TBI as a composite with TBID. */
+ tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
+ if (!data) {
+ tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
+ }
+ tbi = (tbi >> select) & 1;
+
return (ARMVAParameters) {
.tsz = tsz,
.select = select,
.tbi = tbi,
- .tbid = tbid,
.epd = epd,
.hpd = hpd,
.using16k = using16k,
@@ -10293,16 +10373,6 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
};
}
-ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
- ARMMMUIdx mmu_idx, bool data)
-{
- ARMVAParameters ret = aa64_va_parameters_both(env, va, mmu_idx);
-
- /* Present TBI as a composite with TBID. */
- ret.tbi &= (data || !ret.tbid);
- return ret;
-}
-
#ifndef CONFIG_USER_ONLY
static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
ARMMMUIdx mmu_idx)
@@ -10388,7 +10458,6 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
TCR *tcr = regime_tcr(env, mmu_idx);
int ap, ns, xn, pxn;
uint32_t el = regime_el(env, mmu_idx);
- bool ttbr1_valid;
uint64_t descaddrmask;
bool aarch64 = arm_el_is_aa64(env, el);
bool guarded = false;
@@ -10403,14 +10472,11 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
param = aa64_va_parameters(env, address, mmu_idx,
access_type != MMU_INST_FETCH);
level = 0;
- ttbr1_valid = regime_has_2_ranges(mmu_idx);
addrsize = 64 - 8 * param.tbi;
inputsize = 64 - param.tsz;
} else {
param = aa32_va_parameters(env, address, mmu_idx);
level = 1;
- /* There is no TTBR1 for EL2 */
- ttbr1_valid = (el != 2);
addrsize = (mmu_idx == ARMMMUIdx_Stage2 ? 40 : 32);
inputsize = addrsize - param.tsz;
}
@@ -10427,7 +10493,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
if (inputsize < addrsize) {
target_ulong top_bits = sextract64(address, inputsize,
addrsize - inputsize);
- if (-top_bits != param.select || (param.select && !ttbr1_valid)) {
+ if (-top_bits != param.select) {
/* The gap between the two regions is a Translation fault */
fault_type = ARMFault_Translation;
goto do_fault;
@@ -12136,21 +12202,15 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
{
uint32_t flags = rebuild_hflags_aprofile(env);
ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
- ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
+ uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
uint64_t sctlr;
int tbii, tbid;
flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
/* Get control bits for tagged addresses. */
- if (regime_has_2_ranges(mmu_idx)) {
- ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
- tbid = (p1.tbi << 1) | p0.tbi;
- tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
- } else {
- tbid = p0.tbi;
- tbii = tbid & !p0.tbid;
- }
+ tbid = aa64_va_parameter_tbi(tcr, mmu_idx);
+ tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx);
flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);
diff --git a/target/arm/helper.h b/target/arm/helper.h
index aa3d8cd08f..fcbf504121 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -303,14 +303,8 @@ DEF_HELPER_2(neon_abd_s16, i32, i32, i32)
DEF_HELPER_2(neon_abd_u32, i32, i32, i32)
DEF_HELPER_2(neon_abd_s32, i32, i32, i32)
-DEF_HELPER_2(neon_shl_u8, i32, i32, i32)
-DEF_HELPER_2(neon_shl_s8, i32, i32, i32)
DEF_HELPER_2(neon_shl_u16, i32, i32, i32)
DEF_HELPER_2(neon_shl_s16, i32, i32, i32)
-DEF_HELPER_2(neon_shl_u32, i32, i32, i32)
-DEF_HELPER_2(neon_shl_s32, i32, i32, i32)
-DEF_HELPER_2(neon_shl_u64, i64, i64, i64)
-DEF_HELPER_2(neon_shl_s64, i64, i64, i64)
DEF_HELPER_2(neon_rshl_u8, i32, i32, i32)
DEF_HELPER_2(neon_rshl_s8, i32, i32, i32)
DEF_HELPER_2(neon_rshl_u16, i32, i32, i32)
@@ -348,8 +342,6 @@ DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
-DEF_HELPER_2(neon_mul_p8, i32, i32, i32)
-DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
DEF_HELPER_2(neon_tst_u16, i32, i32, i32)
@@ -569,9 +561,6 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_2(dc_zva, void, env, i64)
-DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-
DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG,
@@ -697,6 +686,16 @@ DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 58c4d707c5..9f96a2359f 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -931,6 +931,48 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env)
}
}
+/**
+ * arm_num_brps: Return number of implemented breakpoints.
+ * Note that the ID register BRPS field is "number of bps - 1",
+ * and we return the actual number of breakpoints.
+ */
+static inline int arm_num_brps(ARMCPU *cpu)
+{
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1;
+ } else {
+ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1;
+ }
+}
+
+/**
+ * arm_num_wrps: Return number of implemented watchpoints.
+ * Note that the ID register WRPS field is "number of wps - 1",
+ * and we return the actual number of watchpoints.
+ */
+static inline int arm_num_wrps(ARMCPU *cpu)
+{
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1;
+ } else {
+ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1;
+ }
+}
+
+/**
+ * arm_num_ctx_cmps: Return number of implemented context comparators.
+ * Note that the ID register CTX_CMPS field is "number of cmps - 1",
+ * and we return the actual number of comparators.
+ */
+static inline int arm_num_ctx_cmps(ARMCPU *cpu)
+{
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1;
+ } else {
+ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1;
+ }
+}
+
/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3.
* Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits.
*/
@@ -1091,7 +1133,7 @@ static inline uint32_t aarch32_cpsr_valid_mask(uint64_t features,
if ((features >> ARM_FEATURE_THUMB2) & 1) {
valid |= CPSR_IT;
}
- if (isar_feature_jazelle(id)) {
+ if (isar_feature_aa32_jazelle(id)) {
valid |= CPSR_J;
}
if (isar_feature_aa32_pan(id)) {
@@ -1127,15 +1169,12 @@ typedef struct ARMVAParameters {
unsigned tsz : 8;
unsigned select : 1;
bool tbi : 1;
- bool tbid : 1;
bool epd : 1;
bool hpd : 1;
bool using16k : 1;
bool using64k : 1;
} ARMVAParameters;
-ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
- ARMMMUIdx mmu_idx);
ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
ARMMMUIdx mmu_idx, bool data);
diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c
index 3a8b437eef..7981ae3bc4 100644
--- a/target/arm/kvm32.c
+++ b/target/arm/kvm32.c
@@ -97,6 +97,9 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
ahcf->isar.id_isar6 = 0;
}
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0,
+ ARM_CP15_REG32(0, 0, 1, 2));
+
err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0,
KVM_REG_ARM | KVM_REG_SIZE_U32 |
KVM_REG_ARM_VFP | KVM_REG_ARM_VFP_MVFR0);
@@ -108,6 +111,28 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
* Fortunately there is not yet anything in there that affects migration.
*/
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0,
+ ARM_CP15_REG32(0, 0, 1, 4));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1,
+ ARM_CP15_REG32(0, 0, 1, 5));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2,
+ ARM_CP15_REG32(0, 0, 1, 6));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3,
+ ARM_CP15_REG32(0, 0, 1, 7));
+ if (read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4,
+ ARM_CP15_REG32(0, 0, 2, 6))) {
+ /*
+ * Older kernels don't support reading ID_MMFR4 (a new in v8
+ * register); assume it's zero.
+ */
+ ahcf->isar.id_mmfr4 = 0;
+ }
+
+ /*
+ * There is no way to read DBGDIDR, because currently 32-bit KVM
+ * doesn't implement debug at all. Leave it at zero.
+ */
+
kvm_arm_destroy_scratch_host_vcpu(fdarray);
if (err < 0) {
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 3bae9e4a66..0ad96c3500 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -541,6 +541,10 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
} else {
err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1,
ARM64_SYS_REG(3, 0, 0, 4, 1));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0,
+ ARM64_SYS_REG(3, 0, 0, 5, 0));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1,
+ ARM64_SYS_REG(3, 0, 0, 5, 1));
err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0,
ARM64_SYS_REG(3, 0, 0, 6, 0));
err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1,
@@ -559,6 +563,16 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
* than skipping the reads and leaving 0, as we must avoid
* considering the values in every case.
*/
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0,
+ ARM64_SYS_REG(3, 0, 0, 1, 2));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0,
+ ARM64_SYS_REG(3, 0, 0, 1, 4));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1,
+ ARM64_SYS_REG(3, 0, 0, 1, 5));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2,
+ ARM64_SYS_REG(3, 0, 0, 1, 6));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3,
+ ARM64_SYS_REG(3, 0, 0, 1, 7));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0,
ARM64_SYS_REG(3, 0, 0, 2, 0));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1,
@@ -571,6 +585,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
ARM64_SYS_REG(3, 0, 0, 2, 4));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5,
ARM64_SYS_REG(3, 0, 0, 2, 5));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4,
+ ARM64_SYS_REG(3, 0, 0, 2, 6));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6,
ARM64_SYS_REG(3, 0, 0, 2, 7));
@@ -580,6 +596,36 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
ARM64_SYS_REG(3, 0, 0, 3, 1));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2,
ARM64_SYS_REG(3, 0, 0, 3, 2));
+
+ /*
+ * DBGDIDR is a bit complicated because the kernel doesn't
+ * provide an accessor for it in 64-bit mode, which is what this
+ * scratch VM is in, and there's no architected "64-bit sysreg
+ * which reads the same as the 32-bit register" the way there is
+ * for other ID registers. Instead we synthesize a value from the
+ * AArch64 ID_AA64DFR0, the same way the kernel code in
+ * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does.
+ * We only do this if the CPU supports AArch32 at EL1.
+ */
+ if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) {
+ int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS);
+ int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS);
+ int ctx_cmps =
+ FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS);
+ int version = 6; /* ARMv8 debug architecture */
+ bool has_el3 =
+ !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3);
+ uint32_t dbgdidr = 0;
+
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps);
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps);
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps);
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version);
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3);
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3);
+ dbgdidr |= (1 << 15); /* RES1 bit */
+ ahcf->isar.dbgdidr = dbgdidr;
+ }
}
sve_supported = ioctl(fdarray[0], KVM_CHECK_EXTENSION, KVM_CAP_ARM_SVE) > 0;
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index 4259056723..c7a8438b42 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -615,24 +615,9 @@ NEON_VOP(abd_u32, neon_u32, 1)
} else { \
dest = src1 << tmp; \
}} while (0)
-NEON_VOP(shl_u8, neon_u8, 4)
NEON_VOP(shl_u16, neon_u16, 2)
-NEON_VOP(shl_u32, neon_u32, 1)
#undef NEON_FN
-uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
-{
- int8_t shift = (int8_t)shiftop;
- if (shift >= 64 || shift <= -64) {
- val = 0;
- } else if (shift < 0) {
- val >>= -shift;
- } else {
- val <<= shift;
- }
- return val;
-}
-
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
@@ -645,27 +630,9 @@ uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
} else { \
dest = src1 << tmp; \
}} while (0)
-NEON_VOP(shl_s8, neon_s8, 4)
NEON_VOP(shl_s16, neon_s16, 2)
-NEON_VOP(shl_s32, neon_s32, 1)
#undef NEON_FN
-uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
-{
- int8_t shift = (int8_t)shiftop;
- int64_t val = valop;
- if (shift >= 64) {
- val = 0;
- } else if (shift <= -64) {
- val >>= 63;
- } else if (shift < 0) {
- val >>= -shift;
- } else {
- val <<= shift;
- }
- return val;
-}
-
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
@@ -1162,60 +1129,6 @@ NEON_VOP(mul_u8, neon_u8, 4)
NEON_VOP(mul_u16, neon_u16, 2)
#undef NEON_FN
-/* Polynomial multiplication is like integer multiplication except the
- partial products are XORed, not added. */
-uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
-{
- uint32_t mask;
- uint32_t result;
- result = 0;
- while (op1) {
- mask = 0;
- if (op1 & 1)
- mask |= 0xff;
- if (op1 & (1 << 8))
- mask |= (0xff << 8);
- if (op1 & (1 << 16))
- mask |= (0xff << 16);
- if (op1 & (1 << 24))
- mask |= (0xff << 24);
- result ^= op2 & mask;
- op1 = (op1 >> 1) & 0x7f7f7f7f;
- op2 = (op2 << 1) & 0xfefefefe;
- }
- return result;
-}
-
-uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
-{
- uint64_t result = 0;
- uint64_t mask;
- uint64_t op2ex = op2;
- op2ex = (op2ex & 0xff) |
- ((op2ex & 0xff00) << 8) |
- ((op2ex & 0xff0000) << 16) |
- ((op2ex & 0xff000000) << 24);
- while (op1) {
- mask = 0;
- if (op1 & 1) {
- mask |= 0xffff;
- }
- if (op1 & (1 << 8)) {
- mask |= (0xffffU << 16);
- }
- if (op1 & (1 << 16)) {
- mask |= (0xffffULL << 32);
- }
- if (op1 & (1 << 24)) {
- mask |= (0xffffULL << 48);
- }
- result ^= op2ex & mask;
- op1 = (op1 >> 1) & 0x7f7f7f7f;
- op2ex <<= 1;
- }
- return result;
-}
-
#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
NEON_VOP(tst_u8, neon_u8, 4)
NEON_VOP(tst_u16, neon_u16, 2)
@@ -2207,33 +2120,3 @@ void HELPER(neon_zip16)(void *vd, void *vm)
rm[0] = m0;
rd[0] = d0;
}
-
-/* Helper function for 64 bit polynomial multiply case:
- * perform PolynomialMult(op1, op2) and return either the top or
- * bottom half of the 128 bit result.
- */
-uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
-{
- int bitnum;
- uint64_t res = 0;
-
- for (bitnum = 0; bitnum < 64; bitnum++) {
- if (op1 & (1ULL << bitnum)) {
- res ^= op2 << bitnum;
- }
- }
- return res;
-}
-uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
-{
- int bitnum;
- uint64_t res = 0;
-
- /* bit 0 of op1 can't influence the high 64 bits at all */
- for (bitnum = 1; bitnum < 64; bitnum++) {
- if (op1 & (1ULL << bitnum)) {
- res ^= op2 >> (64 - bitnum);
- }
- }
- return res;
-}
diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c
index 9746e32bf8..b909630317 100644
--- a/target/arm/pauth_helper.c
+++ b/target/arm/pauth_helper.c
@@ -320,7 +320,8 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier,
static uint64_t pauth_original_ptr(uint64_t ptr, ARMVAParameters param)
{
- uint64_t extfield = -param.select;
+ /* Note that bit 55 is used whether or not the regime has 2 ranges. */
+ uint64_t extfield = sextract64(ptr, 55, 1);
int bot_pac_bit = 64 - param.tsz;
int top_pac_bit = 64 - 8 * param.tbi;
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 7c26c3bfeb..596bf4cf73 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -6895,6 +6895,7 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
tcg_temp_free_i64(tcg_resl);
write_vec_element(s, tcg_resh, rd, 1, MO_64);
tcg_temp_free_i64(tcg_resh);
+ clear_vec_high(s, true, rd);
}
/* TBL/TBX
@@ -6963,6 +6964,7 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
tcg_temp_free_i64(tcg_resl);
write_vec_element(s, tcg_resh, rd, 1, MO_64);
tcg_temp_free_i64(tcg_resh);
+ clear_vec_high(s, true, rd);
}
/* ZIP/UZP/TRN
@@ -7052,6 +7054,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
tcg_temp_free_i64(tcg_resl);
write_vec_element(s, tcg_resh, rd, 1, MO_64);
tcg_temp_free_i64(tcg_resh);
+ clear_vec_high(s, true, rd);
}
/*
@@ -7409,6 +7412,9 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn,
write_vec_element(s, tmp, rd, dst_index, size);
tcg_temp_free_i64(tmp);
+
+ /* INS is considered a 128-bit write for SVE. */
+ clear_vec_high(s, true, rd);
}
@@ -7438,6 +7444,9 @@ static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
idx = extract32(imm5, 1 + size, 4 - size);
write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
+
+ /* INS is considered a 128-bit write for SVE. */
+ clear_vec_high(s, true, rd);
}
/*
@@ -8735,9 +8744,9 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
break;
case 0x8: /* SSHL, USHL */
if (u) {
- gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
+ gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
} else {
- gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
+ gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
}
break;
case 0x9: /* SQSHL, UQSHL */
@@ -10533,10 +10542,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
tcg_passres, tcg_passres);
break;
- case 14: /* PMULL */
- assert(size == 0);
- gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
- break;
default:
g_assert_not_reached();
}
@@ -10648,30 +10653,6 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
clear_vec_high(s, is_q, rd);
}
-static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
-{
- /* PMULL of 64 x 64 -> 128 is an odd special case because it
- * is the only three-reg-diff instruction which produces a
- * 128-bit wide result from a single operation. However since
- * it's possible to calculate the two halves more or less
- * separately we just use two helper calls.
- */
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_res = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op1, rn, is_q, MO_64);
- read_vec_element(s, tcg_op2, rm, is_q, MO_64);
- gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
- write_vec_element(s, tcg_res, rd, 0, MO_64);
- gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
- write_vec_element(s, tcg_res, rd, 1, MO_64);
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_res);
-}
-
/* AdvSIMD three different
* 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
* +---+---+---+-----------+------+---+------+--------+-----+------+------+
@@ -10724,11 +10705,21 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
break;
case 14: /* PMULL, PMULL2 */
- if (is_u || size == 1 || size == 2) {
+ if (is_u) {
unallocated_encoding(s);
return;
}
- if (size == 3) {
+ switch (size) {
+ case 0: /* PMULL.P8 */
+ if (!fp_access_check(s)) {
+ return;
+ }
+ /* The Q field specifies lo/hi half input for this insn. */
+ gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
+ gen_helper_neon_pmull_h);
+ break;
+
+ case 3: /* PMULL.P64 */
if (!dc_isar_feature(aa64_pmull, s)) {
unallocated_encoding(s);
return;
@@ -10736,10 +10727,16 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
- handle_pmull_64(s, is_q, rd, rn, rm);
- return;
+ /* The Q field specifies lo/hi half input for this insn. */
+ gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
+ gen_helper_gvec_pmull_q);
+ break;
+
+ default:
+ unallocated_encoding(s);
+ break;
}
- goto is_widening;
+ return;
case 9: /* SQDMLAL, SQDMLAL2 */
case 11: /* SQDMLSL, SQDMLSL2 */
case 13: /* SQDMULL, SQDMULL2 */
@@ -10760,7 +10757,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
- is_widening:
if (!fp_access_check(s)) {
return;
}
@@ -11132,6 +11128,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
is_q ? 16 : 8, vec_full_reg_size(s),
(u ? uqsub_op : sqsub_op) + size);
return;
+ case 0x08: /* SSHL, USHL */
+ gen_gvec_op3(s, is_q, rd, rn, rm,
+ u ? &ushl_op[size] : &sshl_op[size]);
+ return;
case 0x0c: /* SMAX, UMAX */
if (u) {
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
@@ -11156,9 +11156,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
case 0x13: /* MUL, PMUL */
if (!u) { /* MUL */
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
- return;
+ } else { /* PMUL */
+ gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
}
- break;
+ return;
case 0x12: /* MLA, MLS */
if (u) {
gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
@@ -11247,16 +11248,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genfn = fns[size][u];
break;
}
- case 0x8: /* SSHL, USHL */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
- { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
- { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
- };
- genfn = fns[size][u];
- break;
- }
case 0x9: /* SQSHL, UQSHL */
{
static NeonGenTwoOpEnvFn * const fns[3][2] = {
@@ -11298,11 +11289,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genfn = fns[size][u];
break;
}
- case 0x13: /* MUL, PMUL */
- assert(u); /* PMUL */
- assert(size == 0);
- genfn = gen_helper_neon_mul_p8;
- break;
case 0x16: /* SQDMULH, SQRDMULH */
{
static NeonGenTwoOpEnvFn * const fns[2][2] = {
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index bf90ac0e5b..ba46e2557a 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -201,7 +201,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
+ if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
((a->vm | a->vn | a->vd) & 0x10)) {
return false;
}
@@ -334,7 +334,7 @@ static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a)
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
+ if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
((a->vm | a->vn | a->vd) & 0x10)) {
return false;
}
@@ -420,7 +420,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
+ if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
((a->vm | a->vd) & 0x10)) {
return false;
}
@@ -484,7 +484,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (dp && !dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
+ if (dp && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
return false;
}
@@ -556,7 +556,7 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
uint32_t offset;
/* UNDEF accesses to D16-D31 if they don't exist */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
return false;
}
@@ -615,7 +615,7 @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
uint32_t offset;
/* UNDEF accesses to D16-D31 if they don't exist */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
return false;
}
@@ -662,7 +662,7 @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
return false;
}
@@ -912,7 +912,7 @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
*/
/* UNDEF accesses to D16-D31 if they don't exist */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
return false;
}
@@ -978,7 +978,7 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
TCGv_i64 tmp;
/* UNDEF accesses to D16-D31 if they don't exist */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
return false;
}
@@ -1101,7 +1101,7 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd + n) > 16) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
return false;
}
@@ -1309,7 +1309,7 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
TCGv_ptr fpst;
/* UNDEF accesses to D16-D31 if they don't exist */
- if (!dc_isar_feature(aa32_fp_d32, s) && ((vd | vn | vm) & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
return false;
}
@@ -1458,7 +1458,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
TCGv_i64 f0, fd;
/* UNDEF accesses to D16-D31 if they don't exist */
- if (!dc_isar_feature(aa32_fp_d32, s) && ((vd | vm) & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
return false;
}
@@ -1822,7 +1822,8 @@ static bool trans_VFM_dp(DisasContext *s, arg_VFM_dp *a)
}
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vn | a->vm) & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
return false;
}
@@ -1921,7 +1922,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
vd = a->vd;
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && (vd & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
return false;
}
@@ -2065,7 +2066,7 @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
}
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
return false;
}
@@ -2138,7 +2139,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
}
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
return false;
}
@@ -2204,7 +2205,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
}
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
return false;
}
@@ -2264,7 +2265,7 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
}
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
return false;
}
@@ -2325,7 +2326,7 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
}
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
return false;
}
@@ -2384,7 +2385,7 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
}
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
return false;
}
@@ -2412,7 +2413,7 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
TCGv_i32 vm;
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
return false;
}
@@ -2440,7 +2441,7 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
TCGv_i32 vd;
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
return false;
}
@@ -2494,7 +2495,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
TCGv_ptr fpst;
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
return false;
}
@@ -2534,7 +2535,7 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
}
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
return false;
}
@@ -2627,7 +2628,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
}
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
return false;
}
@@ -2723,7 +2724,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
TCGv_ptr fpst;
/* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
return false;
}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 20f89ace2f..79880adaad 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -42,7 +42,7 @@
#define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
/* currently all emulated v5 cores are also v5TE, so don't bother */
#define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
-#define ENABLE_ARCH_5J dc_isar_feature(jazelle, s)
+#define ENABLE_ARCH_5J dc_isar_feature(aa32_jazelle, s)
#define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
#define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
#define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
@@ -2612,7 +2612,7 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
#define VFP_SREG(insn, bigbit, smallbit) \
((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
- if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
+ if (dc_isar_feature(aa32_simd_r32, s)) { \
reg = (((insn) >> (bigbit)) & 0x0f) \
| (((insn) >> ((smallbit) - 4)) & 0x10); \
} else { \
@@ -3575,13 +3575,13 @@ static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
if (u) {
switch (size) {
case 1: gen_helper_neon_shl_u16(var, var, shift); break;
- case 2: gen_helper_neon_shl_u32(var, var, shift); break;
+ case 2: gen_ushl_i32(var, var, shift); break;
default: abort();
}
} else {
switch (size) {
case 1: gen_helper_neon_shl_s16(var, var, shift); break;
- case 2: gen_helper_neon_shl_s32(var, var, shift); break;
+ case 2: gen_sshl_i32(var, var, shift); break;
default: abort();
}
}
@@ -4384,6 +4384,280 @@ const GVecGen3 cmtst_op[4] = {
.vece = MO_64 },
};
+void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
+{
+ TCGv_i32 lval = tcg_temp_new_i32();
+ TCGv_i32 rval = tcg_temp_new_i32();
+ TCGv_i32 lsh = tcg_temp_new_i32();
+ TCGv_i32 rsh = tcg_temp_new_i32();
+ TCGv_i32 zero = tcg_const_i32(0);
+ TCGv_i32 max = tcg_const_i32(32);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i32(lsh, shift);
+ tcg_gen_neg_i32(rsh, lsh);
+ tcg_gen_shl_i32(lval, src, lsh);
+ tcg_gen_shr_i32(rval, src, rsh);
+ tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
+ tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
+
+ tcg_temp_free_i32(lval);
+ tcg_temp_free_i32(rval);
+ tcg_temp_free_i32(lsh);
+ tcg_temp_free_i32(rsh);
+ tcg_temp_free_i32(zero);
+ tcg_temp_free_i32(max);
+}
+
+void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
+{
+ TCGv_i64 lval = tcg_temp_new_i64();
+ TCGv_i64 rval = tcg_temp_new_i64();
+ TCGv_i64 lsh = tcg_temp_new_i64();
+ TCGv_i64 rsh = tcg_temp_new_i64();
+ TCGv_i64 zero = tcg_const_i64(0);
+ TCGv_i64 max = tcg_const_i64(64);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i64(lsh, shift);
+ tcg_gen_neg_i64(rsh, lsh);
+ tcg_gen_shl_i64(lval, src, lsh);
+ tcg_gen_shr_i64(rval, src, rsh);
+ tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
+ tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
+
+ tcg_temp_free_i64(lval);
+ tcg_temp_free_i64(rval);
+ tcg_temp_free_i64(lsh);
+ tcg_temp_free_i64(rsh);
+ tcg_temp_free_i64(zero);
+ tcg_temp_free_i64(max);
+}
+
+static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
+ TCGv_vec src, TCGv_vec shift)
+{
+ TCGv_vec lval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec msk, max;
+
+ tcg_gen_neg_vec(vece, rsh, shift);
+ if (vece == MO_8) {
+ tcg_gen_mov_vec(lsh, shift);
+ } else {
+ msk = tcg_temp_new_vec_matching(dst);
+ tcg_gen_dupi_vec(vece, msk, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, msk);
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
+ tcg_temp_free_vec(msk);
+ }
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_shlv_vec(vece, lval, src, lsh);
+ tcg_gen_shrv_vec(vece, rval, src, rsh);
+
+ max = tcg_temp_new_vec_matching(dst);
+ tcg_gen_dupi_vec(vece, max, 8 << vece);
+
+ /*
+ * The choice of LT (signed) and GEU (unsigned) are biased toward
+ * the instructions of the x86_64 host. For MO_8, the whole byte
+ * is significant so we must use an unsigned compare; otherwise we
+ * have already masked to a byte and so a signed compare works.
+ * Other tcg hosts have a full set of comparisons and do not care.
+ */
+ if (vece == MO_8) {
+ tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
+ tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
+ tcg_gen_andc_vec(vece, lval, lval, lsh);
+ tcg_gen_andc_vec(vece, rval, rval, rsh);
+ } else {
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
+ tcg_gen_and_vec(vece, lval, lval, lsh);
+ tcg_gen_and_vec(vece, rval, rval, rsh);
+ }
+ tcg_gen_or_vec(vece, dst, lval, rval);
+
+ tcg_temp_free_vec(max);
+ tcg_temp_free_vec(lval);
+ tcg_temp_free_vec(rval);
+ tcg_temp_free_vec(lsh);
+ tcg_temp_free_vec(rsh);
+}
+
+static const TCGOpcode ushl_list[] = {
+ INDEX_op_neg_vec, INDEX_op_shlv_vec,
+ INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
+};
+
+const GVecGen3 ushl_op[4] = {
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_b,
+ .opt_opc = ushl_list,
+ .vece = MO_8 },
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_h,
+ .opt_opc = ushl_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ushl_i32,
+ .fniv = gen_ushl_vec,
+ .opt_opc = ushl_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ushl_i64,
+ .fniv = gen_ushl_vec,
+ .opt_opc = ushl_list,
+ .vece = MO_64 },
+};
+
+void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
+{
+ TCGv_i32 lval = tcg_temp_new_i32();
+ TCGv_i32 rval = tcg_temp_new_i32();
+ TCGv_i32 lsh = tcg_temp_new_i32();
+ TCGv_i32 rsh = tcg_temp_new_i32();
+ TCGv_i32 zero = tcg_const_i32(0);
+ TCGv_i32 max = tcg_const_i32(31);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i32(lsh, shift);
+ tcg_gen_neg_i32(rsh, lsh);
+ tcg_gen_shl_i32(lval, src, lsh);
+ tcg_gen_umin_i32(rsh, rsh, max);
+ tcg_gen_sar_i32(rval, src, rsh);
+ tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
+ tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
+
+ tcg_temp_free_i32(lval);
+ tcg_temp_free_i32(rval);
+ tcg_temp_free_i32(lsh);
+ tcg_temp_free_i32(rsh);
+ tcg_temp_free_i32(zero);
+ tcg_temp_free_i32(max);
+}
+
+void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
+{
+ TCGv_i64 lval = tcg_temp_new_i64();
+ TCGv_i64 rval = tcg_temp_new_i64();
+ TCGv_i64 lsh = tcg_temp_new_i64();
+ TCGv_i64 rsh = tcg_temp_new_i64();
+ TCGv_i64 zero = tcg_const_i64(0);
+ TCGv_i64 max = tcg_const_i64(63);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i64(lsh, shift);
+ tcg_gen_neg_i64(rsh, lsh);
+ tcg_gen_shl_i64(lval, src, lsh);
+ tcg_gen_umin_i64(rsh, rsh, max);
+ tcg_gen_sar_i64(rval, src, rsh);
+ tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
+ tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
+
+ tcg_temp_free_i64(lval);
+ tcg_temp_free_i64(rval);
+ tcg_temp_free_i64(lsh);
+ tcg_temp_free_i64(rsh);
+ tcg_temp_free_i64(zero);
+ tcg_temp_free_i64(max);
+}
+
+static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
+ TCGv_vec src, TCGv_vec shift)
+{
+ TCGv_vec lval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_neg_vec(vece, rsh, shift);
+ if (vece == MO_8) {
+ tcg_gen_mov_vec(lsh, shift);
+ } else {
+ tcg_gen_dupi_vec(vece, tmp, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, tmp);
+ tcg_gen_and_vec(vece, rsh, rsh, tmp);
+ }
+
+ /* Bound rsh so out of bound right shift gets -1. */
+ tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
+ tcg_gen_umin_vec(vece, rsh, rsh, tmp);
+ tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
+
+ tcg_gen_shlv_vec(vece, lval, src, lsh);
+ tcg_gen_sarv_vec(vece, rval, src, rsh);
+
+ /* Select in-bound left shift. */
+ tcg_gen_andc_vec(vece, lval, lval, tmp);
+
+ /* Select between left and right shift. */
+ if (vece == MO_8) {
+ tcg_gen_dupi_vec(vece, tmp, 0);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
+ } else {
+ tcg_gen_dupi_vec(vece, tmp, 0x80);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
+ }
+
+ tcg_temp_free_vec(lval);
+ tcg_temp_free_vec(rval);
+ tcg_temp_free_vec(lsh);
+ tcg_temp_free_vec(rsh);
+ tcg_temp_free_vec(tmp);
+}
+
+static const TCGOpcode sshl_list[] = {
+ INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
+ INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+};
+
+const GVecGen3 sshl_op[4] = {
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_b,
+ .opt_opc = sshl_list,
+ .vece = MO_8 },
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_h,
+ .opt_opc = sshl_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sshl_i32,
+ .fniv = gen_sshl_vec,
+ .opt_opc = sshl_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sshl_i64,
+ .fniv = gen_sshl_vec,
+ .opt_opc = sshl_list,
+ .vece = MO_64 },
+};
+
static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
{
@@ -4733,16 +5007,17 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case NEON_3R_VMUL: /* VMUL */
if (u) {
- /* Polynomial case allows only P8 and is handled below. */
+ /* Polynomial case allows only P8. */
if (size != 0) {
return 1;
}
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
+ 0, gen_helper_gvec_pmul_b);
} else {
tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
- return 0;
}
- break;
+ return 0;
case NEON_3R_VML: /* VMLA, VMLS */
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
@@ -4787,6 +5062,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
vec_size, vec_size);
}
return 0;
+
+ case NEON_3R_VSHL:
+ /* Note the operation is vshl vd,vm,vn */
+ tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size,
+ u ? &ushl_op[size] : &sshl_op[size]);
+ return 0;
}
if (size == 3) {
@@ -4795,13 +5076,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
neon_load_reg64(cpu_V0, rn + pass);
neon_load_reg64(cpu_V1, rm + pass);
switch (op) {
- case NEON_3R_VSHL:
- if (u) {
- gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
- }
- break;
case NEON_3R_VQSHL:
if (u) {
gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
@@ -4836,7 +5110,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
pairwise = 0;
switch (op) {
- case NEON_3R_VSHL:
case NEON_3R_VQSHL:
case NEON_3R_VRSHL:
case NEON_3R_VQRSHL:
@@ -4916,9 +5189,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case NEON_3R_VHSUB:
GEN_NEON_INTEGER_OP(hsub);
break;
- case NEON_3R_VSHL:
- GEN_NEON_INTEGER_OP(shl);
- break;
case NEON_3R_VQSHL:
GEN_NEON_INTEGER_OP_ENV(qshl);
break;
@@ -4937,10 +5207,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
tmp2 = neon_load_reg(rd, pass);
gen_neon_add(size, tmp, tmp2);
break;
- case NEON_3R_VMUL:
- /* VMUL.P8; other cases already eliminated. */
- gen_helper_neon_mul_p8(tmp, tmp, tmp2);
- break;
case NEON_3R_VPMAX:
GEN_NEON_INTEGER_OP(pmax);
break;
@@ -5327,9 +5593,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
} else {
if (input_unsigned) {
- gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
+ gen_ushl_i64(cpu_V0, in, tmp64);
} else {
- gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
+ gen_sshl_i64(cpu_V0, in, tmp64);
}
}
tmp = tcg_temp_new_i32();
@@ -5600,27 +5866,20 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
return 1;
}
- /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
- * outside the loop below as it only performs a single pass.
- */
- if (op == 14 && size == 2) {
- TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
-
- if (!dc_isar_feature(aa32_pmull, s)) {
- return 1;
+ /* Handle polynomial VMULL in a single pass. */
+ if (op == 14) {
+ if (size == 0) {
+ /* VMULL.P8 */
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
+ 0, gen_helper_neon_pmull_h);
+ } else {
+ /* VMULL.P64 */
+ if (!dc_isar_feature(aa32_pmull, s)) {
+ return 1;
+ }
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
+ 0, gen_helper_gvec_pmull_q);
}
- tcg_rn = tcg_temp_new_i64();
- tcg_rm = tcg_temp_new_i64();
- tcg_rd = tcg_temp_new_i64();
- neon_load_reg64(tcg_rn, rn);
- neon_load_reg64(tcg_rm, rm);
- gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
- neon_store_reg64(tcg_rd, rd);
- gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
- neon_store_reg64(tcg_rd, rd + 1);
- tcg_temp_free_i64(tcg_rn);
- tcg_temp_free_i64(tcg_rm);
- tcg_temp_free_i64(tcg_rd);
return 0;
}
@@ -5698,11 +5957,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
/* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
break;
- case 14: /* Polynomial VMULL */
- gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- break;
default: /* 15 is RESERVED: caught earlier */
abort();
}
@@ -9845,8 +10099,8 @@ static bool op_div(DisasContext *s, arg_rrr *a, bool u)
TCGv_i32 t1, t2;
if (s->thumb
- ? !dc_isar_feature(thumb_div, s)
- : !dc_isar_feature(arm_div, s)) {
+ ? !dc_isar_feature(aa32_thumb_div, s)
+ : !dc_isar_feature(aa32_arm_div, s)) {
return false;
}
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 5b167c416a..d9ea0c99cc 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -278,6 +278,8 @@ uint64_t vfp_expand_imm(int size, uint8_t imm8);
extern const GVecGen3 mla_op[4];
extern const GVecGen3 mls_op[4];
extern const GVecGen3 cmtst_op[4];
+extern const GVecGen3 sshl_op[4];
+extern const GVecGen3 ushl_op[4];
extern const GVecGen2i ssra_op[4];
extern const GVecGen2i usra_op[4];
extern const GVecGen2i sri_op[4];
@@ -287,6 +289,10 @@ extern const GVecGen4 sqadd_op[4];
extern const GVecGen4 uqsub_op[4];
extern const GVecGen4 sqsub_op[4];
void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
/*
* Forward to the isar_feature_* tests given a DisasContext pointer.
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index dedef62403..8017bd88c4 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -1046,3 +1046,214 @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status, desc,
get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
}
+
+void HELPER(gvec_sshl_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ int8_t mm = m[i];
+ int8_t nn = n[i];
+ int8_t res = 0;
+ if (mm >= 0) {
+ if (mm < 8) {
+ res = nn << mm;
+ }
+ } else {
+ res = nn >> (mm > -8 ? -mm : 7);
+ }
+ d[i] = res;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_sshl_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ int8_t mm = m[i]; /* only 8 bits of shift are significant */
+ int16_t nn = n[i];
+ int16_t res = 0;
+ if (mm >= 0) {
+ if (mm < 16) {
+ res = nn << mm;
+ }
+ } else {
+ res = nn >> (mm > -16 ? -mm : 15);
+ }
+ d[i] = res;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_ushl_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ int8_t mm = m[i];
+ uint8_t nn = n[i];
+ uint8_t res = 0;
+ if (mm >= 0) {
+ if (mm < 8) {
+ res = nn << mm;
+ }
+ } else {
+ if (mm > -8) {
+ res = nn >> -mm;
+ }
+ }
+ d[i] = res;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ int8_t mm = m[i]; /* only 8 bits of shift are significant */
+ uint16_t nn = n[i];
+ uint16_t res = 0;
+ if (mm >= 0) {
+ if (mm < 16) {
+ res = nn << mm;
+ }
+ } else {
+ if (mm > -16) {
+ res = nn >> -mm;
+ }
+ }
+ d[i] = res;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+/*
+ * 8x8->8 polynomial multiply.
+ *
+ * Polynomial multiplication is like integer multiplication except the
+ * partial products are XORed, not added.
+ *
+ * TODO: expose this as a generic vector operation, as it is a common
+ * crypto building block.
+ */
+void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ uint64_t nn = n[i];
+ uint64_t mm = m[i];
+ uint64_t rr = 0;
+
+ for (j = 0; j < 8; ++j) {
+ uint64_t mask = (nn & 0x0101010101010101ull) * 0xff;
+ rr ^= mm & mask;
+ mm = (mm << 1) & 0xfefefefefefefefeull;
+ nn >>= 1;
+ }
+ d[i] = rr;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+/*
+ * 64x64->128 polynomial multiply.
+ * Because of the lanes are not accessed in strict columns,
+ * this probably cannot be turned into a generic helper.
+ */
+void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ intptr_t hi = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; i += 2) {
+ uint64_t nn = n[i + hi];
+ uint64_t mm = m[i + hi];
+ uint64_t rhi = 0;
+ uint64_t rlo = 0;
+
+ /* Bit 0 can only influence the low 64-bit result. */
+ if (nn & 1) {
+ rlo = mm;
+ }
+
+ for (j = 1; j < 64; ++j) {
+ uint64_t mask = -((nn >> j) & 1);
+ rlo ^= (mm << j) & mask;
+ rhi ^= (mm >> (64 - j)) & mask;
+ }
+ d[i] = rlo;
+ d[i + 1] = rhi;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+/*
+ * 8x8->16 polynomial multiply.
+ *
+ * The byte inputs are expanded to (or extracted from) half-words.
+ * Note that neon and sve2 get the inputs from different positions.
+ * This allows 4 bytes to be processed in parallel with uint64_t.
+ */
+
+static uint64_t expand_byte_to_half(uint64_t x)
+{
+ return (x & 0x000000ff)
+ | ((x & 0x0000ff00) << 8)
+ | ((x & 0x00ff0000) << 16)
+ | ((x & 0xff000000) << 24);
+}
+
+static uint64_t pmull_h(uint64_t op1, uint64_t op2)
+{
+ uint64_t result = 0;
+ int i;
+
+ for (i = 0; i < 8; ++i) {
+ uint64_t mask = (op1 & 0x0001000100010001ull) * 0xffff;
+ result ^= op2 & mask;
+ op1 >>= 1;
+ op2 <<= 1;
+ }
+ return result;
+}
+
+void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ int hi = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t nn = n[hi], mm = m[hi];
+
+ d[0] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm));
+ nn >>= 32;
+ mm >>= 32;
+ d[1] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm));
+
+ clear_tail(d, 16, simd_maxsz(desc));
+}
+
+#ifdef TARGET_AARCH64
+void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ int shift = simd_data(desc) * 8;
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ uint64_t nn = (n[i] >> shift) & 0x00ff00ff00ff00ffull;
+ uint64_t mm = (m[i] >> shift) & 0x00ff00ff00ff00ffull;
+
+ d[i] = pmull_h(nn, mm);
+ }
+}
+#endif
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 0ae7d4f34a..930d6e747f 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -185,7 +185,7 @@ uint32_t vfp_get_fpscr(CPUARMState *env)
void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
{
/* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
- if (!cpu_isar_feature(aa64_fp16, env_archcpu(env))) {
+ if (!cpu_isar_feature(any_fp16, env_archcpu(env))) {
val &= ~FPCR_FZ16;
}