diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2021-07-03 22:34:37 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-07-03 22:34:37 +0100 |
commit | 73c8bf4ccff8951d228b8a0d49968c56e32da4de (patch) | |
tree | 61ef201108751227a8ec3b4b1d16b2a4420fa9e9 | |
parent | 9c2647f75004c4f7d64c9c0ec55f8c6f0739a8b1 (diff) | |
parent | 04ea4d3cfd0a21b248ece8eb7a9436a3d9898dd8 (diff) |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20210702' into staging
target-arm queue:
* more MVE instructions
* hw/gpio/gpio_pwr: use shutdown function for reboot
* target/arm: Check NaN mode before silencing NaN
* tests: Boot and halt a Linux guest on the Raspberry Pi 2 machine
* hw/arm: Add basic power management to raspi.
* docs/system/arm: Add quanta-gbs-bmc, quanta-q7l1-bmc
# gpg: Signature made Fri 02 Jul 2021 13:59:19 BST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20210702: (24 commits)
target/arm: Implement MVE shifts by register
target/arm: Implement MVE shifts by immediate
target/arm: Implement MVE long shifts by register
target/arm: Implement MVE long shifts by immediate
target/arm: Implement MVE VADDLV
target/arm: Implement MVE VSHLC
target/arm: Implement MVE saturating narrowing shifts
target/arm: Implement MVE VSHRN, VRSHRN
target/arm: Implement MVE VSRI, VSLI
target/arm: Implement MVE VSHLL
target/arm: Implement MVE vector shift right by immediate insns
target/arm: Implement MVE vector shift left by immediate insns
target/arm: Implement MVE logical immediate insns
target/arm: Use dup_const() instead of bitfield_replicate()
target/arm: Use asimd_imm_const for A64 decode
target/arm: Make asimd_imm_const() public
target/arm: Fix bugs in MVE VRMLALDAVH, VRMLSLDAVH
target/arm: Fix MVE widening/narrowing VLDR/VSTR offset calculation
hw/gpio/gpio_pwr: use shutdown function for reboot
target/arm: Check NaN mode before silencing NaN
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | docs/system/arm/aspeed.rst | 1 | ||||
-rw-r--r-- | docs/system/arm/nuvoton.rst | 5 | ||||
-rw-r--r-- | hw/arm/bcm2835_peripherals.c | 13 | ||||
-rw-r--r-- | hw/gpio/gpio_pwr.c | 2 | ||||
-rw-r--r-- | hw/misc/bcm2835_powermgt.c | 160 | ||||
-rw-r--r-- | hw/misc/meson.build | 1 | ||||
-rw-r--r-- | include/hw/arm/bcm2835_peripherals.h | 3 | ||||
-rw-r--r-- | include/hw/misc/bcm2835_powermgt.h | 29 | ||||
-rw-r--r-- | target/arm/helper-a64.c | 12 | ||||
-rw-r--r-- | target/arm/helper-mve.h | 108 | ||||
-rw-r--r-- | target/arm/mve.decode | 177 | ||||
-rw-r--r-- | target/arm/mve_helper.c | 524 | ||||
-rw-r--r-- | target/arm/t32.decode | 71 | ||||
-rw-r--r-- | target/arm/translate-a64.c | 86 | ||||
-rw-r--r-- | target/arm/translate-mve.c | 261 | ||||
-rw-r--r-- | target/arm/translate-neon.c | 81 | ||||
-rw-r--r-- | target/arm/translate.c | 327 | ||||
-rw-r--r-- | target/arm/translate.h | 41 | ||||
-rw-r--r-- | target/arm/vfp_helper.c | 24 | ||||
-rw-r--r-- | tests/acceptance/boot_linux_console.py | 43 |
20 files changed, 1760 insertions, 209 deletions
diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst index 57ee2bd94f..cec87e3743 100644 --- a/docs/system/arm/aspeed.rst +++ b/docs/system/arm/aspeed.rst @@ -13,6 +13,7 @@ etc. AST2400 SoC based machines : - ``palmetto-bmc`` OpenPOWER Palmetto POWER8 BMC +- ``quanta-q71l-bmc`` OpenBMC Quanta BMC AST2500 SoC based machines : diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst index ca011bd479..3cd2b2b18d 100644 --- a/docs/system/arm/nuvoton.rst +++ b/docs/system/arm/nuvoton.rst @@ -1,5 +1,5 @@ -Nuvoton iBMC boards (``npcm750-evb``, ``quanta-gsj``) -===================================================== +Nuvoton iBMC boards (``*-bmc``, ``npcm750-evb``, ``quanta-gsj``) +================================================================ The `Nuvoton iBMC`_ chips (NPCM7xx) are a family of ARM-based SoCs that are designed to be used as Baseboard Management Controllers (BMCs) in various @@ -18,6 +18,7 @@ segment. The following machines are based on this chip : The NPCM730 SoC has two Cortex-A9 cores and is targeted for Data Center and Hyperscale applications. The following machines are based on this chip : +- ``quanta-gbs-bmc`` Quanta GBS server BMC - ``quanta-gsj`` Quanta GSJ server BMC There are also two more SoCs, NPCM710 and NPCM705, which are single-core diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index dcff13433e..48538c9360 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -126,6 +126,10 @@ static void bcm2835_peripherals_init(Object *obj) object_property_add_const_link(OBJECT(&s->dwc2), "dma-mr", OBJECT(&s->gpu_bus_mr)); + + /* Power Management */ + object_initialize_child(obj, "powermgt", &s->powermgt, + TYPE_BCM2835_POWERMGT); } static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) @@ -364,9 +368,16 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, INTERRUPT_USB)); + /* Power Management */ + if (!sysbus_realize(SYS_BUS_DEVICE(&s->powermgt), errp)) { + return; + } + + memory_region_add_subregion(&s->peri_mr, PM_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->powermgt), 0)); + create_unimp(s, &s->txp, "bcm2835-txp", TXP_OFFSET, 0x1000); create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 0x40); - create_unimp(s, &s->powermgt, "bcm2835-powermgt", PM_OFFSET, 0x114); create_unimp(s, &s->i2s, "bcm2835-i2s", I2S_OFFSET, 0x100); create_unimp(s, &s->smi, "bcm2835-smi", SMI_OFFSET, 0x100); create_unimp(s, &s->spi[0], "bcm2835-spi0", SPI0_OFFSET, 0x20); diff --git a/hw/gpio/gpio_pwr.c b/hw/gpio/gpio_pwr.c index 7714fa0dc4..dbaf1c70c8 100644 --- a/hw/gpio/gpio_pwr.c +++ b/hw/gpio/gpio_pwr.c @@ -43,7 +43,7 @@ static void gpio_pwr_reset(void *opaque, int n, int level) static void gpio_pwr_shutdown(void *opaque, int n, int level) { if (level) { - qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } } diff --git a/hw/misc/bcm2835_powermgt.c b/hw/misc/bcm2835_powermgt.c new file mode 100644 index 0000000000..25fa804cbd --- /dev/null +++ b/hw/misc/bcm2835_powermgt.c @@ -0,0 +1,160 @@ +/* + * BCM2835 Power Management emulation + * + * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com> + * Copyright (C) 2021 Nolan Leake <nolan@sigbus.net> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "hw/misc/bcm2835_powermgt.h" +#include "migration/vmstate.h" +#include "sysemu/runstate.h" + +#define PASSWORD 0x5a000000 +#define PASSWORD_MASK 0xff000000 + +#define R_RSTC 0x1c +#define V_RSTC_RESET 0x20 +#define R_RSTS 0x20 +#define V_RSTS_POWEROFF 0x555 /* Linux uses partition 63 to indicate halt. */ +#define R_WDOG 0x24 + +static uint64_t bcm2835_powermgt_read(void *opaque, hwaddr offset, + unsigned size) +{ + BCM2835PowerMgtState *s = (BCM2835PowerMgtState *)opaque; + uint32_t res = 0; + + switch (offset) { + case R_RSTC: + res = s->rstc; + break; + case R_RSTS: + res = s->rsts; + break; + case R_WDOG: + res = s->wdog; + break; + + default: + qemu_log_mask(LOG_UNIMP, + "bcm2835_powermgt_read: Unknown offset 0x%08"HWADDR_PRIx + "\n", offset); + res = 0; + break; + } + + return res; +} + +static void bcm2835_powermgt_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + BCM2835PowerMgtState *s = (BCM2835PowerMgtState *)opaque; + + if ((value & PASSWORD_MASK) != PASSWORD) { + qemu_log_mask(LOG_GUEST_ERROR, + "bcm2835_powermgt_write: Bad password 0x%"PRIx64 + " at offset 0x%08"HWADDR_PRIx"\n", + value, offset); + return; + } + + value = value & ~PASSWORD_MASK; + + switch (offset) { + case R_RSTC: + s->rstc = value; + if (value & V_RSTC_RESET) { + if ((s->rsts & 0xfff) == V_RSTS_POWEROFF) { + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } else { + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + } + } + break; + case R_RSTS: + qemu_log_mask(LOG_UNIMP, + "bcm2835_powermgt_write: RSTS\n"); + s->rsts = value; + break; + case R_WDOG: + qemu_log_mask(LOG_UNIMP, + "bcm2835_powermgt_write: WDOG\n"); + s->wdog = value; + break; + + default: + qemu_log_mask(LOG_UNIMP, + "bcm2835_powermgt_write: Unknown offset 0x%08"HWADDR_PRIx + "\n", offset); + break; + } +} + +static const MemoryRegionOps bcm2835_powermgt_ops = { + .read = bcm2835_powermgt_read, + .write = bcm2835_powermgt_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl.min_access_size = 4, + .impl.max_access_size = 4, +}; + +static const VMStateDescription vmstate_bcm2835_powermgt = { + .name = TYPE_BCM2835_POWERMGT, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(rstc, BCM2835PowerMgtState), + VMSTATE_UINT32(rsts, BCM2835PowerMgtState), + VMSTATE_UINT32(wdog, BCM2835PowerMgtState), + VMSTATE_END_OF_LIST() + } +}; + +static void bcm2835_powermgt_init(Object *obj) +{ + BCM2835PowerMgtState *s = BCM2835_POWERMGT(obj); + + memory_region_init_io(&s->iomem, obj, &bcm2835_powermgt_ops, s, + TYPE_BCM2835_POWERMGT, 0x200); + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); +} + +static void bcm2835_powermgt_reset(DeviceState *dev) +{ + BCM2835PowerMgtState *s = BCM2835_POWERMGT(dev); + + /* https://elinux.org/BCM2835_registers#PM */ + s->rstc = 0x00000102; + s->rsts = 0x00001000; + s->wdog = 0x00000000; +} + +static void bcm2835_powermgt_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = bcm2835_powermgt_reset; + dc->vmsd = &vmstate_bcm2835_powermgt; +} + +static TypeInfo bcm2835_powermgt_info = { + .name = TYPE_BCM2835_POWERMGT, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(BCM2835PowerMgtState), + .class_init = bcm2835_powermgt_class_init, + .instance_init = bcm2835_powermgt_init, +}; + +static void bcm2835_powermgt_register_types(void) +{ + type_register_static(&bcm2835_powermgt_info); +} + +type_init(bcm2835_powermgt_register_types) diff --git a/hw/misc/meson.build b/hw/misc/meson.build index 66e1648533..f89b5c1643 100644 --- a/hw/misc/meson.build +++ b/hw/misc/meson.build @@ -82,6 +82,7 @@ softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files( 'bcm2835_rng.c', 'bcm2835_thermal.c', 'bcm2835_cprman.c', + 'bcm2835_powermgt.c', )) softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_misc.c')) softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c', 'zynq-xadc.c')) diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h index 479e2346e8..d864879421 100644 --- a/include/hw/arm/bcm2835_peripherals.h +++ b/include/hw/arm/bcm2835_peripherals.h @@ -24,6 +24,7 @@ #include "hw/misc/bcm2835_mphi.h" #include "hw/misc/bcm2835_thermal.h" #include "hw/misc/bcm2835_cprman.h" +#include "hw/misc/bcm2835_powermgt.h" #include "hw/sd/sdhci.h" #include "hw/sd/bcm2835_sdhost.h" #include "hw/gpio/bcm2835_gpio.h" @@ -48,7 +49,7 @@ struct BCM2835PeripheralState { BCM2835MphiState mphi; UnimplementedDeviceState txp; UnimplementedDeviceState armtmr; - UnimplementedDeviceState powermgt; + BCM2835PowerMgtState powermgt; BCM2835CprmanState cprman; PL011State uart0; BCM2835AuxState aux; diff --git a/include/hw/misc/bcm2835_powermgt.h b/include/hw/misc/bcm2835_powermgt.h new file mode 100644 index 0000000000..303b9a6f68 --- /dev/null +++ b/include/hw/misc/bcm2835_powermgt.h @@ -0,0 +1,29 @@ +/* + * BCM2835 Power Management emulation + * + * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com> + * Copyright (C) 2021 Nolan Leake <nolan@sigbus.net> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_POWERMGT_H +#define BCM2835_POWERMGT_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_BCM2835_POWERMGT "bcm2835-powermgt" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835PowerMgtState, BCM2835_POWERMGT) + +struct BCM2835PowerMgtState { + SysBusDevice busdev; + MemoryRegion iomem; + + uint32_t rstc; + uint32_t rsts; + uint32_t wdog; +}; + +#endif diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 9cc3b066e2..ac5c4452d5 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -365,7 +365,9 @@ uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) float16 nan = a; if (float16_is_signaling_nan(a, fpst)) { float_raise(float_flag_invalid, fpst); - nan = float16_silence_nan(a, fpst); + if (!fpst->default_nan_mode) { + nan = float16_silence_nan(a, fpst); + } } if (fpst->default_nan_mode) { nan = float16_default_nan(fpst); @@ -396,7 +398,9 @@ float32 HELPER(frecpx_f32)(float32 a, void *fpstp) float32 nan = a; if (float32_is_signaling_nan(a, fpst)) { float_raise(float_flag_invalid, fpst); - nan = float32_silence_nan(a, fpst); + if (!fpst->default_nan_mode) { + nan = float32_silence_nan(a, fpst); + } } if (fpst->default_nan_mode) { nan = float32_default_nan(fpst); @@ -427,7 +431,9 @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp) float64 nan = a; if (float64_is_signaling_nan(a, fpst)) { float_raise(float_flag_invalid, fpst); - nan = float64_silence_nan(a, fpst); + if (!fpst->default_nan_mode) { + nan = float64_silence_nan(a, fpst); + } } if (fpst->default_nan_mode) { nan = float64_default_nan(fpst); diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h index 4bbb9b3ae2..56e40844ad 100644 --- a/target/arm/helper-mve.h +++ b/target/arm/helper-mve.h @@ -355,3 +355,111 @@ DEF_HELPER_FLAGS_3(mve_vaddvsh, TCG_CALL_NO_WG, i32, env, ptr, i32) DEF_HELPER_FLAGS_3(mve_vaddvuh, TCG_CALL_NO_WG, i32, env, ptr, i32) DEF_HELPER_FLAGS_3(mve_vaddvsw, TCG_CALL_NO_WG, i32, env, ptr, i32) DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32) + +DEF_HELPER_FLAGS_3(mve_vaddlv_s, TCG_CALL_NO_WG, i64, env, ptr, i64) +DEF_HELPER_FLAGS_3(mve_vaddlv_u, TCG_CALL_NO_WG, i64, env, ptr, i64) + +DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64) +DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64) +DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64) + +DEF_HELPER_FLAGS_4(mve_vshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqshlui_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshlui_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshlui_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vrshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vrshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vshllbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshllbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshllbub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshllbuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshlltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshlltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshlltub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshlltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vsrib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vsrih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vsriw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vslib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vslih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vsliw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vrshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vrshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vrshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vrshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqrshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqrshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqrshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vshlc, TCG_CALL_NO_WG, i32, env, ptr, i32, i32) + +DEF_HELPER_FLAGS_3(mve_sshrl, TCG_CALL_NO_RWG, i64, env, i64, i32) +DEF_HELPER_FLAGS_3(mve_ushll, TCG_CALL_NO_RWG, i64, env, i64, i32) +DEF_HELPER_FLAGS_3(mve_sqshll, TCG_CALL_NO_RWG, i64, env, i64, i32) +DEF_HELPER_FLAGS_3(mve_uqshll, TCG_CALL_NO_RWG, i64, env, i64, i32) +DEF_HELPER_FLAGS_3(mve_sqrshrl, TCG_CALL_NO_RWG, i64, env, i64, i32) +DEF_HELPER_FLAGS_3(mve_uqrshll, TCG_CALL_NO_RWG, i64, env, i64, i32) +DEF_HELPER_FLAGS_3(mve_sqrshrl48, TCG_CALL_NO_RWG, i64, env, i64, i32) +DEF_HELPER_FLAGS_3(mve_uqrshll48, TCG_CALL_NO_RWG, i64, env, i64, i32) + +DEF_HELPER_FLAGS_3(mve_uqshl, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(mve_sqshl, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(mve_uqrshl, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(mve_sqrshr, TCG_CALL_NO_RWG, i32, env, i32, i32) diff --git a/target/arm/mve.decode b/target/arm/mve.decode index d9ece7be5d..595d97568e 100644 --- a/target/arm/mve.decode +++ b/target/arm/mve.decode @@ -26,10 +26,15 @@ # VQDMULL has size in bit 28: 0 for 16 bit, 1 for 32 bit %size_28 28:1 !function=plus_1 +# 1imm format immediate +%imm_28_16_0 28:1 16:3 0:4 + &vldr_vstr rn qd imm p a w size l u &1op qd qm size &2op qd qm qn size &2scalar qd qn rm size +&1imm qd imm cmode op +&2shift qd qm shift size @vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0 # Note that both Rn and Qd are 3 bits only (no D bit) @@ -41,6 +46,7 @@ @2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0 @2op_sz28 .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn \ size=%size_28 +@1imm .... .... .... .... .... cmode:4 .. op:1 . .... &1imm qd=%qd imm=%imm_28_16_0 # The _rev suffix indicates that Vn and Vm are reversed. This is # the case for shifts. In the Arm ARM these insns are documented @@ -54,6 +60,30 @@ @2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn @2scalar_nosz .... .... .... .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn +@2_shl_b .... .... .. 001 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0 +@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1 +@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2 + +@2_shll_b .... .... ... 01 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0 +@2_shll_h .... .... ... 1 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1 +# VSHLL encoding T2 where shift == esize +@2_shll_esize_b .... .... .... 00 .. .... .... .... .... &2shift \ + qd=%qd qm=%qm size=0 shift=8 +@2_shll_esize_h .... .... .... 01 .. .... .... .... .... &2shift \ + qd=%qd qm=%qm size=1 shift=16 + +# Right shifts are encoded as N - shift, where N is the element size in bits. +%rshift_i5 16:5 !function=rsub_32 +%rshift_i4 16:4 !function=rsub_16 +%rshift_i3 16:3 !function=rsub_8 + +@2_shr_b .... .... .. 001 ... .... .... .... .... &2shift qd=%qd qm=%qm \ + size=0 shift=%rshift_i3 +@2_shr_h .... .... .. 01 .... .... .... .... .... &2shift qd=%qd qm=%qm \ + size=1 shift=%rshift_i4 +@2_shr_w .... .... .. 1 ..... .... .... .... .... &2shift qd=%qd qm=%qm \ + size=2 shift=%rshift_i5 + # Vector loads and stores # Widening loads and narrowing stores: @@ -100,11 +130,35 @@ VADD 1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op VSUB 1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op -VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op -VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op +# The VSHLL T2 encoding is not a @2op pattern, but is here because it +# overlaps what would be size=0b11 VMULH/VRMULH +{ + VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h + + VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op +} + +{ + VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h -VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op -VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op + VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op +} + +{ + VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h + + VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op +} + +{ + VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h + + VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op +} VMAX_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op VMAX_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op @@ -253,8 +307,121 @@ VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar # Vector add across vector -VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo +{ + VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo + VADDLV 111 u:1 1110 1 ... 1001 ... 0 1111 00 a:1 0 qm:3 0 \ + rdahi=%rdahi rdalo=%rdalo +} # Predicate operations %mask_22_13 22:1 13:3 VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13 + +# Logical immediate operations (1 reg and modified-immediate) + +# The cmode/op bits here decode VORR/VBIC/VMOV/VMVN, but +# not in a way we can conveniently represent in decodetree without +# a lot of repetition: +# VORR: op=0, (cmode & 1) && cmode < 12 +# VBIC: op=1, (cmode & 1) && cmode < 12 +# VMOV: everything else +# So we have a single decode line and check the cmode/op in the +# trans function. +Vimm_1r 111 . 1111 1 . 00 0 ... ... 0 .... 0 1 . 1 .... @1imm + +# Shifts by immediate + +VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b +VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h +VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w + +VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b +VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h +VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w + +VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b +VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h +VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w + +VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_b +VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_h +VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_w + +VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b +VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h +VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w + +VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b +VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h +VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w + +VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b +VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h +VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w + +VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b +VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h +VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w + +# VSHLL T1 encoding; the T2 VSHLL encoding is elsewhere in this file +VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h + +VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h + +VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h + +VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h + +# Shift-and-insert +VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_b +VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_h +VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_w + +VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b +VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h +VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w + +# Narrowing shifts (which only support b and h sizes) +VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b +VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h +VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b +VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h + +VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b +VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h +VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b +VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h + +VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h +VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h +VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h +VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h + +VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b +VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h +VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b +VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h + +VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h +VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h +VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h +VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h + +VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b +VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h +VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b +VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h + +VSHLC 111 0 1110 1 . 1 imm:5 ... 0 1111 1100 rdm:4 qd=%qd diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c index 05552ce7ee..db5d622085 100644 --- a/target/arm/mve_helper.c +++ b/target/arm/mve_helper.c @@ -18,7 +18,6 @@ */ #include "qemu/osdep.h" -#include "qemu/int128.h" #include "cpu.h" #include "internals.h" #include "vec_internal.h" @@ -324,6 +323,30 @@ DO_1OP(vnegw, 4, int32_t, DO_NEG) DO_1OP(vfnegh, 8, uint64_t, DO_FNEGH) DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS) +/* + * 1 operand immediates: Vda is destination and possibly also one source. + * All these insns work at 64-bit widths. + */ +#define DO_1OP_IMM(OP, FN) \ + void HELPER(mve_##OP)(CPUARMState *env, void *vda, uint64_t imm) \ + { \ + uint64_t *da = vda; \ + uint16_t mask = mve_element_mask(env); \ + unsigned e; \ + for (e = 0; e < 16 / 8; e++, mask >>= 8) { \ + mergemask(&da[H8(e)], FN(da[H8(e)], imm), mask); \ + } \ + mve_advance_vpt(env); \ + } + +#define DO_MOVI(N, I) (I) +#define DO_ANDI(N, I) ((N) & (I)) +#define DO_ORRI(N, I) ((N) | (I)) + +DO_1OP_IMM(vmovi, DO_MOVI) +DO_1OP_IMM(vandi, DO_ANDI) +DO_1OP_IMM(vorri, DO_ORRI) + #define DO_2OP(OP, ESIZE, TYPE, FN) \ void HELPER(glue(mve_, OP))(CPUARMState *env, \ void *vd, void *vn, void *vm) \ @@ -710,6 +733,8 @@ DO_2OP_SAT(vqsubsw, 4, int32_t, DO_SQSUB_W) WRAP_QRSHL_HELPER(do_sqrshl_bhs, N, M, true, satp) #define DO_UQRSHL_OP(N, M, satp) \ WRAP_QRSHL_HELPER(do_uqrshl_bhs, N, M, true, satp) +#define DO_SUQSHL_OP(N, M, satp) \ + WRAP_QRSHL_HELPER(do_suqrshl_bhs, N, M, false, satp) DO_2OP_SAT_S(vqshls, DO_SQSHL_OP) DO_2OP_SAT_U(vqshlu, DO_UQSHL_OP) @@ -1100,40 +1125,45 @@ DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=) DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=) /* - * Rounding multiply add long dual accumulate high: we must keep - * a 72-bit internal accumulator value and return the top 64 bits. + * Rounding multiply add long dual accumulate high. In the pseudocode + * this is implemented with a 72-bit internal accumulator value of which + * the top 64 bits are returned. We optimize this to avoid having to + * use 128-bit arithmetic -- we can do this because the 74-bit accumulator + * is squashed back into 64-bits after each beat. */ -#define DO_LDAVH(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC, TO128) \ +#define DO_LDAVH(OP, TYPE, LTYPE, XCHG, SUB) \ uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \ void *vm, uint64_t a) \ { \ uint16_t mask = mve_element_mask(env); \ unsigned e; \ TYPE *n = vn, *m = vm; \ - Int128 acc = int128_lshift(TO128(a), 8); \ - for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ + for (e = 0; e < 16 / 4; e++, mask >>= 4) { \ if (mask & 1) { \ + LTYPE mul; \ if (e & 1) { \ - acc = ODDACC(acc, TO128(n[H##ESIZE(e - 1 * XCHG)] * \ - m[H##ESIZE(e)])); \ + mul = (LTYPE)n[H4(e - 1 * XCHG)] * m[H4(e)]; \ + if (SUB) { \ + mul = -mul; \ + } \ } else { \ - acc = EVENACC(acc, TO128(n[H##ESIZE(e + 1 * XCHG)] * \ - m[H##ESIZE(e)])); \ + mul = (LTYPE)n[H4(e + 1 * XCHG)] * m[H4(e)]; \ } \ - acc = int128_add(acc, int128_make64(1 << 7)); \ + mul = (mul >> 8) + ((mul >> 7) & 1); \ + a += mul; \ } \ } \ mve_advance_vpt(env); \ - return int128_getlo(int128_rshift(acc, 8)); \ + return a; \ } -DO_LDAVH(vrmlaldavhsw, 4, int32_t, false, int128_add, int128_add, int128_makes64) -DO_LDAVH(vrmlaldavhxsw, 4, int32_t, true, int128_add, int128_add, int128_makes64) +DO_LDAVH(vrmlaldavhsw, int32_t, int64_t, false, false) +DO_LDAVH(vrmlaldavhxsw, int32_t, int64_t, true, false) -DO_LDAVH(vrmlaldavhuw, 4, uint32_t, false, int128_add, int128_add, int128_make64) +DO_LDAVH(vrmlaldavhuw, uint32_t, uint64_t, false, false) -DO_LDAVH(vrmlsldavhsw, 4, int32_t, false, int128_add, int128_sub, int128_makes64) -DO_LDAVH(vrmlsldavhxsw, 4, int32_t, true, int128_add, int128_sub, int128_makes64) +DO_LDAVH(vrmlsldavhsw, int32_t, int64_t, false, true) +DO_LDAVH(vrmlsldavhxsw, int32_t, int64_t, true, true) /* Vector add across vector */ #define DO_VADDV(OP, ESIZE, TYPE) \ @@ -1158,3 +1188,463 @@ DO_VADDV(vaddvsw, 4, uint32_t) DO_VADDV(vaddvub, 1, uint8_t) DO_VADDV(vaddvuh, 2, uint16_t) DO_VADDV(vaddvuw, 4, uint32_t) + +#define DO_VADDLV(OP, TYPE, LTYPE) \ + uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \ + uint64_t ra) \ + { \ + uint16_t mask = mve_element_mask(env); \ + unsigned e; \ + TYPE *m = vm; \ + for (e = 0; e < 16 / 4; e++, mask >>= 4) { \ + if (mask & 1) { \ + ra += (LTYPE)m[H4(e)]; \ + } \ + } \ + mve_advance_vpt(env); \ + return ra; \ + } \ + +DO_VADDLV(vaddlv_s, int32_t, int64_t) +DO_VADDLV(vaddlv_u, uint32_t, uint64_t) + +/* Shifts by immediate */ +#define DO_2SHIFT(OP, ESIZE, TYPE, FN) \ + void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \ + void *vm, uint32_t shift) \ + { \ + TYPE *d = vd, *m = vm; \ + uint16_t mask = mve_element_mask(env); \ + unsigned e; \ + for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ + mergemask(&d[H##ESIZE(e)], \ + FN(m[H##ESIZE(e)], shift), mask); \ + } \ + mve_advance_vpt(env); \ + } + +#define DO_2SHIFT_SAT(OP, ESIZE, TYPE, FN) \ + void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \ + void *vm, uint32_t shift) \ + { \ + TYPE *d = vd, *m = vm; \ + uint16_t mask = mve_element_mask(env); \ + unsigned e; \ + bool qc = false; \ + for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ + bool sat = false; \ + mergemask(&d[H##ESIZE(e)], \ + FN(m[H##ESIZE(e)], shift, &sat), mask); \ + qc |= sat & mask & 1; \ + } \ + if (qc) { \ + env->vfp.qc[0] = qc; \ + } \ + mve_advance_vpt(env); \ + } + +/* provide unsigned 2-op shift helpers for all sizes */ +#define DO_2SHIFT_U(OP, FN) \ + DO_2SHIFT(OP##b, 1, uint8_t, FN) \ + DO_2SHIFT(OP##h, 2, uint16_t, FN) \ + DO_2SHIFT(OP##w, 4, uint32_t, FN) +#define DO_2SHIFT_S(OP, FN) \ + DO_2SHIFT(OP##b, 1, int8_t, FN) \ + DO_2SHIFT(OP##h, 2, int16_t, FN) \ + DO_2SHIFT(OP##w, 4, int32_t, FN) + +#define DO_2SHIFT_SAT_U(OP, FN) \ + DO_2SHIFT_SAT(OP##b, 1, uint8_t, FN) \ + DO_2SHIFT_SAT(OP##h, 2, uint16_t, FN) \ + DO_2SHIFT_SAT(OP##w, 4, uint32_t, FN) +#define DO_2SHIFT_SAT_S(OP, FN) \ + DO_2SHIFT_SAT(OP##b, 1, int8_t, FN) \ + DO_2SHIFT_SAT(OP##h, 2, int16_t, FN) \ + DO_2SHIFT_SAT(OP##w, 4, int32_t, FN) + +DO_2SHIFT_U(vshli_u, DO_VSHLU) +DO_2SHIFT_S(vshli_s, DO_VSHLS) +DO_2SHIFT_SAT_U(vqshli_u, DO_UQSHL_OP) +DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP) +DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP) +DO_2SHIFT_U(vrshli_u, DO_VRSHLU) +DO_2SHIFT_S(vrshli_s, DO_VRSHLS) + +/* Shift-and-insert; we always work with 64 bits at a time */ +#define DO_2SHIFT_INSERT(OP, ESIZE, SHIFTFN, MASKFN) \ + void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \ + void *vm, uint32_t shift) \ + { \ + uint64_t *d = vd, *m = vm; \ + uint16_t mask; \ + uint64_t shiftmask; \ + unsigned e; \ + if (shift == 0 || shift == ESIZE * 8) { \ + /* \ + * Only VSLI can shift by 0; only VSRI can shift by <dt>. \ + * The generic logic would give the right answer for 0 but \ + * fails for <dt>. \ + */ \ + goto done; \ + } \ + assert(shift < ESIZE * 8); \ + mask = mve_element_mask(env); \ + /* ESIZE / 2 gives the MO_* value if ESIZE is in [1,2,4] */ \ + shiftmask = dup_const(ESIZE / 2, MASKFN(ESIZE * 8, shift)); \ + for (e = 0; e < 16 / 8; e++, mask >>= 8) { \ + uint64_t r = (SHIFTFN(m[H8(e)], shift) & shiftmask) | \ + (d[H8(e)] & ~shiftmask); \ + mergemask(&d[H8(e)], r, mask); \ + } \ +done: \ + mve_advance_vpt(env); \ + } + +#define DO_SHL(N, SHIFT) ((N) << (SHIFT)) +#define DO_SHR(N, SHIFT) ((N) >> (SHIFT)) +#define SHL_MASK(EBITS, SHIFT) MAKE_64BIT_MASK((SHIFT), (EBITS) - (SHIFT)) +#define SHR_MASK(EBITS, SHIFT) MAKE_64BIT_MASK(0, (EBITS) - (SHIFT)) + +DO_2SHIFT_INSERT(vsrib, 1, DO_SHR, SHR_MASK) +DO_2SHIFT_INSERT(vsrih, 2, DO_SHR, SHR_MASK) +DO_2SHIFT_INSERT(vsriw, 4, DO_SHR, SHR_MASK) +DO_2SHIFT_INSERT(vslib, 1, DO_SHL, SHL_MASK) +DO_2SHIFT_INSERT(vslih, 2, DO_SHL, SHL_MASK) +DO_2SHIFT_INSERT(vsliw, 4, DO_SHL, SHL_MASK) + +/* + * Long shifts taking half-sized inputs from top or bottom of the input + * vector and producing a double-width result. ESIZE, TYPE are for + * the input, and LESIZE, LTYPE for the output. + * Unlike the normal shift helpers, we do not handle negative shift counts, + * because the long shift is strictly left-only. + */ +#define DO_VSHLL(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE) \ + void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \ + void *vm, uint32_t shift) \ + { \ + LTYPE *d = vd; \ + TYPE *m = vm; \ + uint16_t mask = mve_element_mask(env); \ + unsigned le; \ + assert(shift <= 16); \ + for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \ + LTYPE r = (LTYPE)m[H##ESIZE(le * 2 + TOP)] << shift; \ + mergemask(&d[H##LESIZE(le)], r, mask); \ + } \ + mve_advance_vpt(env); \ + } + +#define DO_VSHLL_ALL(OP, TOP) \ + DO_VSHLL(OP##sb, TOP, 1, int8_t, 2, int16_t) \ + DO_VSHLL(OP##ub, TOP, 1, uint8_t, 2, uint16_t) \ + DO_VSHLL(OP##sh, TOP, 2, int16_t, 4, int32_t) \ + DO_VSHLL(OP##uh, TOP, 2, uint16_t, 4, uint32_t) \ + +DO_VSHLL_ALL(vshllb, false) +DO_VSHLL_ALL(vshllt, true) + +/* + * Narrowing right shifts, taking a double sized input, shifting it + * and putting the result in either the top or bottom half of the output. + * ESIZE, TYPE are the output, and LESIZE, LTYPE the input. + */ +#define DO_VSHRN(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \ + void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \ + void *vm, uint32_t shift) \ + { \ + LTYPE *m = vm; \ + TYPE *d = vd; \ + uint16_t mask = mve_element_mask(env); \ + unsigned le; \ + for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \ + TYPE r = FN(m[H##LESIZE(le)], shift); \ + mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \ + } \ + mve_advance_vpt(env); \ + } + +#define DO_VSHRN_ALL(OP, FN) \ + DO_VSHRN(OP##bb, false, 1, uint8_t, 2, uint16_t, FN) \ + DO_VSHRN(OP##bh, false, 2, uint16_t, 4, uint32_t, FN) \ + DO_VSHRN(OP##tb, true, 1, uint8_t, 2, uint16_t, FN) \ + DO_VSHRN(OP##th, true, 2, uint16_t, 4, uint32_t, FN) + +static inline uint64_t do_urshr(uint64_t x, unsigned sh) +{ + if (likely(sh < 64)) { + return (x >> sh) + ((x >> (sh - 1)) & 1); + } else if (sh == 64) { + return x >> 63; + } else { + return 0; + } +} + +static inline int64_t do_srshr(int64_t x, unsigned sh) +{ + if (likely(sh < 64)) { + return (x >> sh) + ((x >> (sh - 1)) & 1); + } else { + /* Rounding the sign bit always produces 0. */ + return 0; + } +} + +DO_VSHRN_ALL(vshrn, DO_SHR) +DO_VSHRN_ALL(vrshrn, do_urshr) + +static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max, + bool *satp) +{ + if (val > max) { + *satp = true; + return max; + } else if (val < min) { + *satp = true; + return min; + } else { + return val; + } +} + +/* Saturating narrowing right shifts */ +#define DO_VSHRN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \ + void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \ + void *vm, uint32_t shift) \ + { \ + LTYPE *m = vm; \ + TYPE *d = vd; \ + uint16_t mask = mve_element_mask(env); \ + bool qc = false; \ + unsigned le; \ + for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \ + bool sat = false; \ + TYPE r = FN(m[H##LESIZE(le)], shift, &sat); \ + mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \ + qc |= sat && (mask & 1 << (TOP * ESIZE)); \ + } \ + if (qc) { \ + env->vfp.qc[0] = qc; \ + } \ + mve_advance_vpt(env); \ + } + +#define DO_VSHRN_SAT_UB(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN) \ + DO_VSHRN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN) + +#define DO_VSHRN_SAT_UH(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN) \ + DO_VSHRN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN) + +#define DO_VSHRN_SAT_SB(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN) \ + DO_VSHRN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN) + +#define DO_VSHRN_SAT_SH(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN) \ + DO_VSHRN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN) + +#define DO_SHRN_SB(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), INT8_MIN, INT8_MAX, SATP) +#define DO_SHRN_UB(N, M, SATP) \ + do_sat_bhs((uint64_t)(N) >> (M), 0, UINT8_MAX, SATP) +#define DO_SHRUN_B(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), 0, UINT8_MAX, SATP) + +#define DO_SHRN_SH(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), INT16_MIN, INT16_MAX, SATP) +#define DO_SHRN_UH(N, M, SATP) \ + do_sat_bhs((uint64_t)(N) >> (M), 0, UINT16_MAX, SATP) +#define DO_SHRUN_H(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), 0, UINT16_MAX, SATP) + +#define DO_RSHRN_SB(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), INT8_MIN, INT8_MAX, SATP) +#define DO_RSHRN_UB(N, M, SATP) \ + do_sat_bhs(do_urshr(N, M), 0, UINT8_MAX, SATP) +#define DO_RSHRUN_B(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), 0, UINT8_MAX, SATP) + +#define DO_RSHRN_SH(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), INT16_MIN, INT16_MAX, SATP) +#define DO_RSHRN_UH(N, M, SATP) \ + do_sat_bhs(do_urshr(N, M), 0, UINT16_MAX, SATP) +#define DO_RSHRUN_H(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), 0, UINT16_MAX, SATP) + +DO_VSHRN_SAT_SB(vqshrnb_sb, vqshrnt_sb, DO_SHRN_SB) +DO_VSHRN_SAT_SH(vqshrnb_sh, vqshrnt_sh, DO_SHRN_SH) +DO_VSHRN_SAT_UB(vqshrnb_ub, vqshrnt_ub, DO_SHRN_UB) +DO_VSHRN_SAT_UH(vqshrnb_uh, vqshrnt_uh, DO_SHRN_UH) +DO_VSHRN_SAT_SB(vqshrunbb, vqshruntb, DO_SHRUN_B) +DO_VSHRN_SAT_SH(vqshrunbh, vqshrunth, DO_SHRUN_H) + +DO_VSHRN_SAT_SB(vqrshrnb_sb, vqrshrnt_sb, DO_RSHRN_SB) +DO_VSHRN_SAT_SH(vqrshrnb_sh, vqrshrnt_sh, DO_RSHRN_SH) +DO_VSHRN_SAT_UB(vqrshrnb_ub, vqrshrnt_ub, DO_RSHRN_UB) +DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH) +DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B) +DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H) + +uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm, + uint32_t shift) +{ + uint32_t *d = vd; + uint16_t mask = mve_element_mask(env); + unsigned e; + uint32_t r; + + /* + * For each 32-bit element, we shift it left, bringing in the + * low 'shift' bits of rdm at the bottom. Bits shifted out at + * the top become the new rdm, if the predicate mask permits. + * The final rdm value is returned to update the register. + * shift == 0 here means "shift by 32 bits". + */ + if (shift == 0) { + for (e = 0; e < 16 / 4; e++, mask >>= 4) { + r = rdm; + if (mask & 1) { + rdm = d[H4(e)]; + } + mergemask(&d[H4(e)], r, mask); + } + } else { + uint32_t shiftmask = MAKE_64BIT_MASK(0, shift); + + for (e = 0; e < 16 / 4; e++, mask >>= 4) { + r = (d[H4(e)] << shift) | (rdm & shiftmask); + if (mask & 1) { + rdm = d[H4(e)] >> (32 - shift); + } + mergemask(&d[H4(e)], r, mask); + } + } + mve_advance_vpt(env); + return rdm; +} + +uint64_t HELPER(mve_sshrl)(CPUARMState *env, uint64_t n, uint32_t shift) +{ + return do_sqrshl_d(n, -(int8_t)shift, false, NULL); +} + +uint64_t HELPER(mve_ushll)(CPUARMState *env, uint64_t n, uint32_t shift) +{ + return do_uqrshl_d(n, (int8_t)shift, false, NULL); +} + +uint64_t HELPER(mve_sqshll)(CPUARMState *env, uint64_t n, uint32_t shift) +{ + return do_sqrshl_d(n, (int8_t)shift, false, &env->QF); +} + +uint64_t HELPER(mve_uqshll)(CPUARMState *env, uint64_t n, uint32_t shift) +{ + return do_uqrshl_d(n, (int8_t)shift, false, &env->QF); +} + +uint64_t HELPER(mve_sqrshrl)(CPUARMState *env, uint64_t n, uint32_t shift) +{ + return do_sqrshl_d(n, -(int8_t)shift, true, &env->QF); +} + +uint64_t HELPER(mve_uqrshll)(CPUARMState *env, uint64_t n, uint32_t shift) +{ + return do_uqrshl_d(n, (int8_t)shift, true, &env->QF); +} + +/* Operate on 64-bit values, but saturate at 48 bits */ +static inline int64_t do_sqrshl48_d(int64_t src, int64_t shift, + bool round, uint32_t *sat) +{ + if (shift <= -48) { + /* Rounding the sign bit always produces 0. */ + if (round) { + return 0; + } + return src >> 63; + } else if (shift < 0) { + if (round) { + src >>= -shift - 1; + return (src >> 1) + (src & 1); + } + return src >> -shift; + } else if (shift < 48) { + int64_t val = src << shift; + int64_t extval = sextract64(val, 0, 48); + if (!sat || val == extval) { + return extval; + } + } else if (!sat || src == 0) { + return 0; + } + + *sat = 1; + return (1ULL << 47) - (src >= 0); +} + +/* Operate on 64-bit values, but saturate at 48 bits */ +static inline uint64_t do_uqrshl48_d(uint64_t src, int64_t shift, + bool round, uint32_t *sat) +{ + uint64_t val, extval; + + if (shift <= -(48 + round)) { + return 0; + } else if (shift < 0) { + if (round) { + val = src >> (-shift - 1); + val = (val >> 1) + (val & 1); + } else { + val = src >> -shift; + } + extval = extract64(val, 0, 48); + if (!sat || val == extval) { + return extval; + } + } else if (shift < 48) { + uint64_t val = src << shift; + uint64_t extval = extract64(val, 0, 48); + if (!sat || val == extval) { + return extval; + } + } else if (!sat || src == 0) { + return 0; + } + + *sat = 1; + return MAKE_64BIT_MASK(0, 48); +} + +uint64_t HELPER(mve_sqrshrl48)(CPUARMState *env, uint64_t n, uint32_t shift) +{ + return do_sqrshl48_d(n, -(int8_t)shift, true, &env->QF); +} + +uint64_t HELPER(mve_uqrshll48)(CPUARMState *env, uint64_t n, uint32_t shift) +{ + return do_uqrshl48_d(n, (int8_t)shift, true, &env->QF); +} + +uint32_t HELPER(mve_uqshl)(CPUARMState *env, uint32_t n, uint32_t shift) +{ + return do_uqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF); +} + +uint32_t HELPER(mve_sqshl)(CPUARMState *env, uint32_t n, uint32_t shift) +{ + return do_sqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF); +} + +uint32_t HELPER(mve_uqrshl)(CPUARMState *env, uint32_t n, uint32_t shift) +{ + return do_uqrshl_bhs(n, (int8_t)shift, 32, true, &env->QF); +} + +uint32_t HELPER(mve_sqrshr)(CPUARMState *env, uint32_t n, uint32_t shift) +{ + return do_sqrshl_bhs(n, -(int8_t)shift, 32, true, &env->QF); +} diff --git a/target/arm/t32.decode b/target/arm/t32.decode index 0f9326c724..2d47f31f14 100644 --- a/target/arm/t32.decode +++ b/target/arm/t32.decode @@ -48,6 +48,16 @@ &mcr !extern cp opc1 crn crm opc2 rt &mcrr !extern cp opc1 crm rt rt2 +&mve_shl_ri rdalo rdahi shim +&mve_shl_rr rdalo rdahi rm +&mve_sh_ri rda shim +&mve_sh_rr rda rm + +# rdahi: bits [3:1] from insn, bit 0 is 1 +# rdalo: bits [3:1] from insn, bit 0 is 0 +%rdahi_9 9:3 !function=times_2_plus_1 +%rdalo_17 17:3 !function=times_2 + # Data-processing (register) %imm5_12_6 12:3 6:2 @@ -59,14 +69,72 @@ @S_xrr_shi ....... .... . rn:4 .... .... .. shty:2 rm:4 \ &s_rrr_shi shim=%imm5_12_6 s=1 rd=0 +@mve_shl_ri ....... .... . ... . . ... ... . .. .. .... \ + &mve_shl_ri shim=%imm5_12_6 rdalo=%rdalo_17 rdahi=%rdahi_9 +@mve_shl_rr ....... .... . ... . rm:4 ... . .. .. .... \ + &mve_shl_rr rdalo=%rdalo_17 rdahi=%rdahi_9 +@mve_sh_ri ....... .... . rda:4 . ... ... . .. .. .... \ + &mve_sh_ri shim=%imm5_12_6 +@mve_sh_rr ....... .... . rda:4 rm:4 .... .... .... &mve_sh_rr + { TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi AND_rrri 1110101 0000 . .... 0 ... .... .... .... @s_rrr_shi } BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi { + # The v8.1M MVE shift insns overlap in encoding with MOVS/ORRS + # and are distinguished by having Rm==13 or 15. Those are UNPREDICTABLE + # cases for MOVS/ORRS. We decode the MVE cases first, ensuring that + # they explicitly call unallocated_encoding() for cases that must UNDEF + # (eg "using a new shift insn on a v8.1M CPU without MVE"), and letting + # the rest fall through (where ORR_rrri and MOV_rxri will end up + # handling them as r13 and r15 accesses with the same semantics as A32). + [ + { + UQSHL_ri 1110101 0010 1 .... 0 ... 1111 .. 00 1111 @mve_sh_ri + LSLL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 00 1111 @mve_shl_ri + UQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 00 1111 @mve_shl_ri + } + + { + URSHR_ri 1110101 0010 1 .... 0 ... 1111 .. 01 1111 @mve_sh_ri + LSRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 01 1111 @mve_shl_ri + URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri + } + + { + SRSHR_ri 1110101 0010 1 .... 0 ... 1111 .. 10 1111 @mve_sh_ri + ASRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 10 1111 @mve_shl_ri + SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri + } + + { + SQSHL_ri 1110101 0010 1 .... 0 ... 1111 .. 11 1111 @mve_sh_ri + SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri + } + + { + UQRSHL_rr 1110101 0010 1 .... .... 1111 0000 1101 @mve_sh_rr + LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr + UQRSHLL64_rr 1110101 0010 1 ... 1 .... ... 1 0000 1101 @mve_shl_rr + } + + { + SQRSHR_rr 1110101 0010 1 .... .... 1111 0010 1101 @mve_sh_rr + ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr + SQRSHRL64_rr 1110101 0010 1 ... 1 .... ... 1 0010 1101 @mve_shl_rr + } + + UQRSHLL48_rr 1110101 0010 1 ... 1 .... ... 1 1000 1101 @mve_shl_rr + SQRSHRL48_rr 1110101 0010 1 ... 1 .... ... 1 1010 1101 @mve_shl_rr + ] + MOV_rxri 1110101 0010 . 1111 0 ... .... .... .... @s_rxr_shi ORR_rrri 1110101 0010 . .... 0 ... .... .... .... @s_rrr_shi + + # v8.1M CSEL and friends + CSEL 1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4 } { MVN_rxri 1110101 0011 . 1111 0 ... .... .... .... @s_rxr_shi @@ -90,9 +158,6 @@ SBC_rrri 1110101 1011 . .... 0 ... .... .... .... @s_rrr_shi } RSB_rrri 1110101 1110 . .... 0 ... .... .... .... @s_rrr_shi -# v8.1M CSEL and friends -CSEL 1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4 - # Data-processing (register-shifted register) MOV_rxrr 1111 1010 0 shty:2 s:1 rm:4 1111 rd:4 0000 rs:4 \ diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 1a40e49db7..e81cc20d04 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -8190,8 +8190,6 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) { int rd = extract32(insn, 0, 5); int cmode = extract32(insn, 12, 4); - int cmode_3_1 = extract32(cmode, 1, 3); - int cmode_0 = extract32(cmode, 0, 1); int o2 = extract32(insn, 11, 1); uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); bool is_neg = extract32(insn, 29, 1); @@ -8210,83 +8208,13 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) return; } - /* See AdvSIMDExpandImm() in ARM ARM */ - switch (cmode_3_1) { - case 0: /* Replicate(Zeros(24):imm8, 2) */ - case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */ - case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */ - case 3: /* Replicate(imm8:Zeros(24), 2) */ - { - int shift = cmode_3_1 * 8; - imm = bitfield_replicate(abcdefgh << shift, 32); - break; - } - case 4: /* Replicate(Zeros(8):imm8, 4) */ - case 5: /* Replicate(imm8:Zeros(8), 4) */ - { - int shift = (cmode_3_1 & 0x1) * 8; - imm = bitfield_replicate(abcdefgh << shift, 16); - break; - } - case 6: - if (cmode_0) { - /* Replicate(Zeros(8):imm8:Ones(16), 2) */ - imm = (abcdefgh << 16) | 0xffff; - } else { - /* Replicate(Zeros(16):imm8:Ones(8), 2) */ - imm = (abcdefgh << 8) | 0xff; - } - imm = bitfield_replicate(imm, 32); - break; - case 7: - if (!cmode_0 && !is_neg) { - imm = bitfield_replicate(abcdefgh, 8); - } else if (!cmode_0 && is_neg) { - int i; - imm = 0; - for (i = 0; i < 8; i++) { - if ((abcdefgh) & (1 << i)) { - imm |= 0xffULL << (i * 8); - } - } - } else if (cmode_0) { - if (is_neg) { - imm = (abcdefgh & 0x3f) << 48; - if (abcdefgh & 0x80) { - imm |= 0x8000000000000000ULL; - } - if (abcdefgh & 0x40) { - imm |= 0x3fc0000000000000ULL; - } else { - imm |= 0x4000000000000000ULL; - } - } else { - if (o2) { - /* FMOV (vector, immediate) - half-precision */ - imm = vfp_expand_imm(MO_16, abcdefgh); - /* now duplicate across the lanes */ - imm = bitfield_replicate(imm, 16); - } else { - imm = (abcdefgh & 0x3f) << 19; - if (abcdefgh & 0x80) { - imm |= 0x80000000; - } - if (abcdefgh & 0x40) { - imm |= 0x3e000000; - } else { - imm |= 0x40000000; - } - imm |= (imm << 32); - } - } - } - break; - default: - g_assert_not_reached(); - } - - if (cmode_3_1 != 7 && is_neg) { - imm = ~imm; + if (cmode == 15 && o2 && !is_neg) { + /* FMOV (vector, immediate) - half-precision */ + imm = vfp_expand_imm(MO_16, abcdefgh); + /* now duplicate across the lanes */ + imm = dup_const(MO_16, imm); + } else { + imm = asimd_imm_const(abcdefgh, cmode, is_neg); } if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index 67462bdf27..a2a45036a0 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -32,8 +32,10 @@ typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); +typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); +typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ static inline long mve_qreg_offset(unsigned reg) @@ -120,7 +122,8 @@ static bool mve_skip_first_beat(DisasContext *s) } } -static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn) +static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, + unsigned msize) { TCGv_i32 addr; uint32_t offset; @@ -141,7 +144,7 @@ static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn) return true; } - offset = a->imm << a->size; + offset = a->imm << msize; if (!a->a) { offset = -offset; } @@ -178,22 +181,22 @@ static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, { NULL, NULL } }; - return do_ldst(s, a, ldstfns[a->size][a->l]); + return do_ldst(s, a, ldstfns[a->size][a->l], a->size); } -#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST) \ +#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ { \ static MVEGenLdStFn * const ldstfns[2][2] = { \ { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ { NULL, gen_helper_mve_##ULD }, \ }; \ - return do_ldst(s, a, ldstfns[a->u][a->l]); \ + return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ } -DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h) -DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w) -DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w) +DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) +DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) +DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) static bool trans_VDUP(DisasContext *s, arg_VDUP *a) { @@ -786,3 +789,245 @@ static bool trans_VADDV(DisasContext *s, arg_VADDV *a) mve_update_eci(s); return true; } + +static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) +{ + /* + * Vector Add Long Across Vector: accumulate the 32-bit + * elements of the vector into a 64-bit result stored in + * a pair of general-purpose registers. + * No need to check Qm's bank: it is only 3 bits in decode. + */ + TCGv_ptr qm; + TCGv_i64 rda; + TCGv_i32 rdalo, rdahi; + + if (!dc_isar_feature(aa32_mve, s)) { + return false; + } + /* + * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related + * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. + */ + if (a->rdahi == 13 || a->rdahi == 15) { + return false; + } + if (!mve_eci_check(s) || !vfp_access_check(s)) { + return true; + } + + /* + * This insn is subject to beat-wise execution. Partial execution + * of an A=0 (no-accumulate) insn which does not execute the first + * beat must start with the current value of RdaHi:RdaLo, not zero. + */ + if (a->a || mve_skip_first_beat(s)) { + /* Accumulate input from RdaHi:RdaLo */ + rda = tcg_temp_new_i64(); + rdalo = load_reg(s, a->rdalo); + rdahi = load_reg(s, a->rdahi); + tcg_gen_concat_i32_i64(rda, rdalo, rdahi); + tcg_temp_free_i32(rdalo); + tcg_temp_free_i32(rdahi); + } else { + /* Accumulate starting at zero */ + rda = tcg_const_i64(0); + } + + qm = mve_qreg_ptr(a->qm); + if (a->u) { + gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); + } else { + gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); + } + tcg_temp_free_ptr(qm); + + rdalo = tcg_temp_new_i32(); + rdahi = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(rdalo, rda); + tcg_gen_extrh_i64_i32(rdahi, rda); + store_reg(s, a->rdalo, rdalo); + store_reg(s, a->rdahi, rdahi); + tcg_temp_free_i64(rda); + mve_update_eci(s); + return true; +} + +static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) +{ + TCGv_ptr qd; + uint64_t imm; + + if (!dc_isar_feature(aa32_mve, s) || + !mve_check_qreg_bank(s, a->qd) || + !fn) { + return false; + } + if (!mve_eci_check(s) || !vfp_access_check(s)) { + return true; + } + + imm = asimd_imm_const(a->imm, a->cmode, a->op); + + qd = mve_qreg_ptr(a->qd); + fn(cpu_env, qd, tcg_constant_i64(imm)); + tcg_temp_free_ptr(qd); + mve_update_eci(s); + return true; +} + +static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) +{ + /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ + MVEGenOneOpImmFn *fn; + + if ((a->cmode & 1) && a->cmode < 12) { + if (a->op) { + /* + * For op=1, the immediate will be inverted by asimd_imm_const(), + * so the VBIC becomes a logical AND operation. + */ + fn = gen_helper_mve_vandi; + } else { + fn = gen_helper_mve_vorri; + } + } else { + /* There is one unallocated cmode/op combination in this space */ + if (a->cmode == 15 && a->op == 1) { + return false; + } + /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ + fn = gen_helper_mve_vmovi; + } + return do_1imm(s, a, fn); +} + +static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, + bool negateshift) +{ + TCGv_ptr qd, qm; + int shift = a->shift; + + if (!dc_isar_feature(aa32_mve, s) || + !mve_check_qreg_bank(s, a->qd | a->qm) || + !fn) { + return false; + } + if (!mve_eci_check(s) || !vfp_access_check(s)) { + return true; + } + + /* + * When we handle a right shift insn using a left-shift helper + * which permits a negative shift count to indicate a right-shift, + * we must negate the shift count. + */ + if (negateshift) { + shift = -shift; + } + + qd = mve_qreg_ptr(a->qd); + qm = mve_qreg_ptr(a->qm); + fn(cpu_env, qd, qm, tcg_constant_i32(shift)); + tcg_temp_free_ptr(qd); + tcg_temp_free_ptr(qm); + mve_update_eci(s); + return true; +} + +#define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ + static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ + { \ + static MVEGenTwoOpShiftFn * const fns[] = { \ + gen_helper_mve_##FN##b, \ + gen_helper_mve_##FN##h, \ + gen_helper_mve_##FN##w, \ + NULL, \ + }; \ + return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ + } + +DO_2SHIFT(VSHLI, vshli_u, false) +DO_2SHIFT(VQSHLI_S, vqshli_s, false) +DO_2SHIFT(VQSHLI_U, vqshli_u, false) +DO_2SHIFT(VQSHLUI, vqshlui_s, false) +/* These right shifts use a left-shift helper with negated shift count */ +DO_2SHIFT(VSHRI_S, vshli_s, true) +DO_2SHIFT(VSHRI_U, vshli_u, true) +DO_2SHIFT(VRSHRI_S, vrshli_s, true) +DO_2SHIFT(VRSHRI_U, vrshli_u, true) + +DO_2SHIFT(VSRI, vsri, false) +DO_2SHIFT(VSLI, vsli, false) + +#define DO_VSHLL(INSN, FN) \ + static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ + { \ + static MVEGenTwoOpShiftFn * const fns[] = { \ + gen_helper_mve_##FN##b, \ + gen_helper_mve_##FN##h, \ + }; \ + return do_2shift(s, a, fns[a->size], false); \ + } + +DO_VSHLL(VSHLL_BS, vshllbs) +DO_VSHLL(VSHLL_BU, vshllbu) +DO_VSHLL(VSHLL_TS, vshllts) +DO_VSHLL(VSHLL_TU, vshlltu) + +#define DO_2SHIFT_N(INSN, FN) \ + static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ + { \ + static MVEGenTwoOpShiftFn * const fns[] = { \ + gen_helper_mve_##FN##b, \ + gen_helper_mve_##FN##h, \ + }; \ + return do_2shift(s, a, fns[a->size], false); \ + } + +DO_2SHIFT_N(VSHRNB, vshrnb) +DO_2SHIFT_N(VSHRNT, vshrnt) +DO_2SHIFT_N(VRSHRNB, vrshrnb) +DO_2SHIFT_N(VRSHRNT, vrshrnt) +DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) +DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) +DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) +DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) +DO_2SHIFT_N(VQSHRUNB, vqshrunb) +DO_2SHIFT_N(VQSHRUNT, vqshrunt) +DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) +DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) +DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) +DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) +DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) +DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) + +static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) +{ + /* + * Whole Vector Left Shift with Carry. The carry is taken + * from a general purpose register and written back there. + * An imm of 0 means "shift by 32". + */ + TCGv_ptr qd; + TCGv_i32 rdm; + + if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { + return false; + } + if (a->rdm == 13 || a->rdm == 15) { + /* CONSTRAINED UNPREDICTABLE: we UNDEF */ + return false; + } + if (!mve_eci_check(s) || !vfp_access_check(s)) { + return true; + } + + qd = mve_qreg_ptr(a->qd); + rdm = load_reg(s, a->rdm); + gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); + store_reg(s, a->rdm, rdm); + tcg_temp_free_ptr(qd); + mve_update_eci(s); + return true; +} diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c index 633fef3bf7..a45616cb63 100644 --- a/target/arm/translate-neon.c +++ b/target/arm/translate-neon.c @@ -33,24 +33,6 @@ static inline int plus1(DisasContext *s, int x) return x + 1; } -static inline int rsub_64(DisasContext *s, int x) -{ - return 64 - x; -} - -static inline int rsub_32(DisasContext *s, int x) -{ - return 32 - x; -} -static inline int rsub_16(DisasContext *s, int x) -{ - return 16 - x; -} -static inline int rsub_8(DisasContext *s, int x) -{ - return 8 - x; -} - static inline int neon_3same_fp_size(DisasContext *s, int x) { /* Convert 0==fp32, 1==fp16 into a MO_* value */ @@ -1781,69 +1763,6 @@ DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh) DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs) DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu) -static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op) -{ - /* - * Expand the encoded constant. - * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. - * We choose to not special-case this and will behave as if a - * valid constant encoding of 0 had been given. - * cmode = 15 op = 1 must UNDEF; we assume decode has handled that. - */ - switch (cmode) { - case 0: case 1: - /* no-op */ - break; - case 2: case 3: - imm <<= 8; - break; - case 4: case 5: - imm <<= 16; - break; - case 6: case 7: - imm <<= 24; - break; - case 8: case 9: - imm |= imm << 16; - break; - case 10: case 11: - imm = (imm << 8) | (imm << 24); - break; - case 12: - imm = (imm << 8) | 0xff; - break; - case 13: - imm = (imm << 16) | 0xffff; - break; - case 14: - if (op) { - /* - * This is the only case where the top and bottom 32 bits - * of the encoded constant differ. - */ - uint64_t imm64 = 0; - int n; - - for (n = 0; n < 8; n++) { - if (imm & (1 << n)) { - imm64 |= (0xffULL << (n * 8)); - } - } - return imm64; - } - imm |= (imm << 8) | (imm << 16) | (imm << 24); - break; - case 15: - imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) - | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); - break; - } - if (op) { - imm = ~imm; - } - return dup_const(MO_32, imm); -} - static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, GVecGen2iFn *fn) { diff --git a/target/arm/translate.c b/target/arm/translate.c index a0c6cfa902..28e478927d 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -90,6 +90,76 @@ void arm_translate_init(void) a64_translate_init(); } +uint64_t asimd_imm_const(uint32_t imm, int cmode, int op) +{ + /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */ + switch (cmode) { + case 0: case 1: + /* no-op */ + break; + case 2: case 3: + imm <<= 8; + break; + case 4: case 5: + imm <<= 16; + break; + case 6: case 7: + imm <<= 24; + break; + case 8: case 9: + imm |= imm << 16; + break; + case 10: case 11: + imm = (imm << 8) | (imm << 24); + break; + case 12: + imm = (imm << 8) | 0xff; + break; + case 13: + imm = (imm << 16) | 0xffff; + break; + case 14: + if (op) { + /* + * This and cmode == 15 op == 1 are the only cases where + * the top and bottom 32 bits of the encoded constant differ. + */ + uint64_t imm64 = 0; + int n; + + for (n = 0; n < 8; n++) { + if (imm & (1 << n)) { + imm64 |= (0xffULL << (n * 8)); + } + } + return imm64; + } + imm |= (imm << 8) | (imm << 16) | (imm << 24); + break; + case 15: + if (op) { + /* Reserved encoding for AArch32; valid for AArch64 */ + uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48; + if (imm & 0x80) { + imm64 |= 0x8000000000000000ULL; + } + if (imm & 0x40) { + imm64 |= 0x3fc0000000000000ULL; + } else { + imm64 |= 0x4000000000000000ULL; + } + return imm64; + } + imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) + | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); + break; + } + if (op) { + imm = ~imm; + } + return dup_const(MO_32, imm); +} + /* Generate a label used for skipping this instruction */ void arm_gen_condlabel(DisasContext *s) { @@ -3148,8 +3218,14 @@ static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) { - TCGv_i32 t = tcg_temp_new_i32(); + TCGv_i32 t; + /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ + if (sh == 32) { + tcg_gen_movi_i32(d, 0); + return; + } + t = tcg_temp_new_i32(); tcg_gen_extract_i32(t, a, sh - 1, 1); tcg_gen_sari_i32(d, a, sh); tcg_gen_add_i32(d, d, t); @@ -3349,8 +3425,14 @@ static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) { - TCGv_i32 t = tcg_temp_new_i32(); + TCGv_i32 t; + /* Handle shift by the input size for the benefit of trans_URSHR_ri */ + if (sh == 32) { + tcg_gen_extract_i32(d, a, sh - 1, 1); + return; + } + t = tcg_temp_new_i32(); tcg_gen_extract_i32(t, a, sh - 1, 1); tcg_gen_shri_i32(d, a, sh); tcg_gen_add_i32(d, d, t); @@ -5633,6 +5715,247 @@ static bool trans_MOVT(DisasContext *s, arg_MOVW *a) } /* + * v8.1M MVE wide-shifts + */ +static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a, + WideShiftImmFn *fn) +{ + TCGv_i64 rda; + TCGv_i32 rdalo, rdahi; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { + /* Decode falls through to ORR/MOV UNPREDICTABLE handling */ + return false; + } + if (a->rdahi == 15) { + /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */ + return false; + } + if (!dc_isar_feature(aa32_mve, s) || + !arm_dc_feature(s, ARM_FEATURE_M_MAIN) || + a->rdahi == 13) { + /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */ + unallocated_encoding(s); + return true; + } + + if (a->shim == 0) { + a->shim = 32; + } + + rda = tcg_temp_new_i64(); + rdalo = load_reg(s, a->rdalo); + rdahi = load_reg(s, a->rdahi); + tcg_gen_concat_i32_i64(rda, rdalo, rdahi); + + fn(rda, rda, a->shim); + + tcg_gen_extrl_i64_i32(rdalo, rda); + tcg_gen_extrh_i64_i32(rdahi, rda); + store_reg(s, a->rdalo, rdalo); + store_reg(s, a->rdahi, rdahi); + tcg_temp_free_i64(rda); + + return true; +} + +static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a) +{ + return do_mve_shl_ri(s, a, tcg_gen_sari_i64); +} + +static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a) +{ + return do_mve_shl_ri(s, a, tcg_gen_shli_i64); +} + +static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a) +{ + return do_mve_shl_ri(s, a, tcg_gen_shri_i64); +} + +static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift) +{ + gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift)); +} + +static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a) +{ + return do_mve_shl_ri(s, a, gen_mve_sqshll); +} + +static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift) +{ + gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift)); +} + +static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a) +{ + return do_mve_shl_ri(s, a, gen_mve_uqshll); +} + +static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a) +{ + return do_mve_shl_ri(s, a, gen_srshr64_i64); +} + +static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a) +{ + return do_mve_shl_ri(s, a, gen_urshr64_i64); +} + +static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn) +{ + TCGv_i64 rda; + TCGv_i32 rdalo, rdahi; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { + /* Decode falls through to ORR/MOV UNPREDICTABLE handling */ + return false; + } + if (a->rdahi == 15) { + /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */ + return false; + } + if (!dc_isar_feature(aa32_mve, s) || + !arm_dc_feature(s, ARM_FEATURE_M_MAIN) || + a->rdahi == 13 || a->rm == 13 || a->rm == 15 || + a->rm == a->rdahi || a->rm == a->rdalo) { + /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */ + unallocated_encoding(s); + return true; + } + + rda = tcg_temp_new_i64(); + rdalo = load_reg(s, a->rdalo); + rdahi = load_reg(s, a->rdahi); + tcg_gen_concat_i32_i64(rda, rdalo, rdahi); + + /* The helper takes care of the sign-extension of the low 8 bits of Rm */ + fn(rda, cpu_env, rda, cpu_R[a->rm]); + + tcg_gen_extrl_i64_i32(rdalo, rda); + tcg_gen_extrh_i64_i32(rdahi, rda); + store_reg(s, a->rdalo, rdalo); + store_reg(s, a->rdahi, rdahi); + tcg_temp_free_i64(rda); + + return true; +} + +static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a) +{ + return do_mve_shl_rr(s, a, gen_helper_mve_ushll); +} + +static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a) +{ + return do_mve_shl_rr(s, a, gen_helper_mve_sshrl); +} + +static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a) +{ + return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll); +} + +static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a) +{ + return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl); +} + +static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a) +{ + return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48); +} + +static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a) +{ + return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48); +} + +static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn) +{ + if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { + /* Decode falls through to ORR/MOV UNPREDICTABLE handling */ + return false; + } + if (!dc_isar_feature(aa32_mve, s) || + !arm_dc_feature(s, ARM_FEATURE_M_MAIN) || + a->rda == 13 || a->rda == 15) { + /* These rda cases are UNPREDICTABLE; we choose to UNDEF */ + unallocated_encoding(s); + return true; + } + + if (a->shim == 0) { + a->shim = 32; + } + fn(cpu_R[a->rda], cpu_R[a->rda], a->shim); + + return true; +} + +static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a) +{ + return do_mve_sh_ri(s, a, gen_urshr32_i32); +} + +static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a) +{ + return do_mve_sh_ri(s, a, gen_srshr32_i32); +} + +static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift) +{ + gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift)); +} + +static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a) +{ + return do_mve_sh_ri(s, a, gen_mve_sqshl); +} + +static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift) +{ + gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift)); +} + +static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a) +{ + return do_mve_sh_ri(s, a, gen_mve_uqshl); +} + +static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn) +{ + if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { + /* Decode falls through to ORR/MOV UNPREDICTABLE handling */ + return false; + } + if (!dc_isar_feature(aa32_mve, s) || + !arm_dc_feature(s, ARM_FEATURE_M_MAIN) || + a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 || + a->rm == a->rda) { + /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */ + unallocated_encoding(s); + return true; + } + + /* The helper takes care of the sign-extension of the low 8 bits of Rm */ + fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]); + return true; +} + +static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a) +{ + return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr); +} + +static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a) +{ + return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl); +} + +/* * Multiply and multiply accumulate */ diff --git a/target/arm/translate.h b/target/arm/translate.h index 99c917c571..241596c5bd 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -161,6 +161,26 @@ static inline int times_2_plus_1(DisasContext *s, int x) return x * 2 + 1; } +static inline int rsub_64(DisasContext *s, int x) +{ + return 64 - x; +} + +static inline int rsub_32(DisasContext *s, int x) +{ + return 32 - x; +} + +static inline int rsub_16(DisasContext *s, int x) +{ + return 16 - x; +} + +static inline int rsub_8(DisasContext *s, int x) +{ + return 8 - x; +} + static inline int arm_dc_feature(DisasContext *dc, int feature) { return (dc->features & (1ULL << feature)) != 0; @@ -444,6 +464,10 @@ typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); +typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift); +typedef void WideShiftFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i32); +typedef void ShiftImmFn(TCGv_i32, TCGv_i32, int32_t shift); +typedef void ShiftFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); /** * arm_tbflags_from_tb: @@ -532,4 +556,21 @@ static inline MemOp finalize_memop(DisasContext *s, MemOp opc) return opc | s->be_data; } +/** + * asimd_imm_const: Expand an encoded SIMD constant value + * + * Expand a SIMD constant value. This is essentially the pseudocode + * AdvSIMDExpandImm, except that we also perform the boolean NOT needed for + * VMVN and VBIC (when cmode < 14 && op == 1). + * + * The combination cmode == 15 op == 1 is a reserved encoding for AArch32; + * callers must catch this; we return the 64-bit constant value defined + * for AArch64. + * + * cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 was UNPREDICTABLE in v7A but + * is either not unpredictable or merely CONSTRAINED UNPREDICTABLE in v8A; + * we produce an immediate constant value of 0 in these cases. + */ +uint64_t asimd_imm_const(uint32_t imm, int cmode, int op); + #endif /* TARGET_ARM_TRANSLATE_H */ diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 8a71660059..24e3d820a5 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -671,7 +671,9 @@ uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp) float16 nan = f16; if (float16_is_signaling_nan(f16, fpst)) { float_raise(float_flag_invalid, fpst); - nan = float16_silence_nan(f16, fpst); + if (!fpst->default_nan_mode) { + nan = float16_silence_nan(f16, fpst); + } } if (fpst->default_nan_mode) { nan = float16_default_nan(fpst); @@ -719,7 +721,9 @@ float32 HELPER(recpe_f32)(float32 input, void *fpstp) float32 nan = f32; if (float32_is_signaling_nan(f32, fpst)) { float_raise(float_flag_invalid, fpst); - nan = float32_silence_nan(f32, fpst); + if (!fpst->default_nan_mode) { + nan = float32_silence_nan(f32, fpst); + } } if (fpst->default_nan_mode) { nan = float32_default_nan(fpst); @@ -767,7 +771,9 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp) float64 nan = f64; if (float64_is_signaling_nan(f64, fpst)) { float_raise(float_flag_invalid, fpst); - nan = float64_silence_nan(f64, fpst); + if (!fpst->default_nan_mode) { + nan = float64_silence_nan(f64, fpst); + } } if (fpst->default_nan_mode) { nan = float64_default_nan(fpst); @@ -866,7 +872,9 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp) float16 nan = f16; if (float16_is_signaling_nan(f16, s)) { float_raise(float_flag_invalid, s); - nan = float16_silence_nan(f16, s); + if (!s->default_nan_mode) { + nan = float16_silence_nan(f16, fpstp); + } } if (s->default_nan_mode) { nan = float16_default_nan(s); @@ -910,7 +918,9 @@ float32 HELPER(rsqrte_f32)(float32 input, void *fpstp) float32 nan = f32; if (float32_is_signaling_nan(f32, s)) { float_raise(float_flag_invalid, s); - nan = float32_silence_nan(f32, s); + if (!s->default_nan_mode) { + nan = float32_silence_nan(f32, fpstp); + } } if (s->default_nan_mode) { nan = float32_default_nan(s); @@ -953,7 +963,9 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp) float64 nan = f64; if (float64_is_signaling_nan(f64, s)) { float_raise(float_flag_invalid, s); - nan = float64_silence_nan(f64, s); + if (!s->default_nan_mode) { + nan = float64_silence_nan(f64, fpstp); + } } if (s->default_nan_mode) { nan = float64_default_nan(s); diff --git a/tests/acceptance/boot_linux_console.py b/tests/acceptance/boot_linux_console.py index cded547d1d..3ae11a7a8f 100644 --- a/tests/acceptance/boot_linux_console.py +++ b/tests/acceptance/boot_linux_console.py @@ -16,6 +16,7 @@ import shutil from avocado import skip from avocado import skipUnless from avocado_qemu import Test +from avocado_qemu import exec_command from avocado_qemu import exec_command_and_wait_for_pattern from avocado_qemu import interrupt_interactive_console_until_pattern from avocado_qemu import wait_for_console_pattern @@ -477,6 +478,48 @@ class BootLinuxConsole(LinuxKernelTest): """ self.do_test_arm_raspi2(0) + def test_arm_raspi2_initrd(self): + """ + :avocado: tags=arch:arm + :avocado: tags=machine:raspi2 + """ + deb_url = ('http://archive.raspberrypi.org/debian/' + 'pool/main/r/raspberrypi-firmware/' + 'raspberrypi-kernel_1.20190215-1_armhf.deb') + deb_hash = 'cd284220b32128c5084037553db3c482426f3972' + deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash) + kernel_path = self.extract_from_deb(deb_path, '/boot/kernel7.img') + dtb_path = self.extract_from_deb(deb_path, '/boot/bcm2709-rpi-2-b.dtb') + + initrd_url = ('https://github.com/groeck/linux-build-test/raw/' + '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/' + 'arm/rootfs-armv7a.cpio.gz') + initrd_hash = '604b2e45cdf35045846b8bbfbf2129b1891bdc9c' + initrd_path_gz = self.fetch_asset(initrd_url, asset_hash=initrd_hash) + initrd_path = os.path.join(self.workdir, 'rootfs.cpio') + archive.gzip_uncompress(initrd_path_gz, initrd_path) + + self.vm.set_console() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + + 'earlycon=pl011,0x3f201000 console=ttyAMA0 ' + 'panic=-1 noreboot ' + + 'dwc_otg.fiq_fsm_enable=0') + self.vm.add_args('-kernel', kernel_path, + '-dtb', dtb_path, + '-initrd', initrd_path, + '-append', kernel_command_line, + '-no-reboot') + self.vm.launch() + self.wait_for_console_pattern('Boot successful.') + + exec_command_and_wait_for_pattern(self, 'cat /proc/cpuinfo', + 'BCM2835') + exec_command_and_wait_for_pattern(self, 'cat /proc/iomem', + '/soc/cprman@7e101000') + exec_command(self, 'halt') + # Wait for VM to shut down gracefully + self.vm.wait() + def test_arm_exynos4210_initrd(self): """ :avocado: tags=arch:arm |