aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2024-05-28 10:06:53 -0700
committerRichard Henderson <richard.henderson@linaro.org>2024-05-28 10:06:53 -0700
commitf8e5c833f9180a49786604e426dd3d87a22652ea (patch)
treecdd1fdb238c97aa93a19f5b2bc40e54951ef8470
parentad10b4badc1dd5b28305f9b9f1168cf0aa3ae946 (diff)
parentf240df3c31b40e4cf1af1f156a88efc1a1df406c (diff)
Merge tag 'pull-target-arm-20240528' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue: * xlnx_dpdma: fix descriptor endianness bug * hvf: arm: Fix encodings for ID_AA64PFR1_EL1 and debug System registers * hw/arm/npcm7xx: remove setting of mp-affinity * hw/char: Correct STM32L4x5 usart register CR2 field ADD_0 size * hw/intc/arm_gic: Fix handling of NS view of GICC_APR<n> * hw/input/tsc2005: Fix -Wchar-subscripts warning in tsc2005_txrx() * hw: arm: Remove use of tabs in some source files * docs/system: Remove ADC from raspi documentation * target/arm: Start of the conversion of A64 SIMD to decodetree # -----BEGIN PGP SIGNATURE----- # # iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmZV5HsZHHBldGVyLm1h # eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3j+CD/9V5kC3DJtovMiolr1z8YYI # eRj0I/pKacgIzz9kVwzo+UVVgzXAi80VFO7xbe+aucCKs0c2s3wrUnUWkAaGHUYR # DKhRIp017HKW8esgDVQsItn2030PLQLlhxpLvhSfN7NR2jHiJdE914Kb3h6XIEVE # CqMRaYt9Vrh5o0e51VSzzccFK+kyYV1MDvNyx1/8F4KJpsMFeK0iy9WYrXx2UxlT # dlrJZdrShIkOWiQB+bi6zQzjMveNmDicjBCgnC7TO2ayOl0AD24sNg/Z49w+4Hjb # azUDYR45uuyQD5HJLyBsk5BcYhfyZttn2U5uTvNQ6SEfMuKUFEfdoSebTHngEb6t # ObOdJW6+GmyaIaaJS99ea8u8jbe1r5zhQGJEBeEWOyGYTKUJ6Q0J+g6dZUdgniOp # bvORX4qnIlMLMGGYT34410Wf0lsE88BHspcVX0WLGFLMZcEYsHhdgG6/f0p8D3nD # m3R5+/BxUHK7A6OVe/1YU6jTqnfPBY6CGKSqEvXbJGlPp7LAjIxuUHBRxRnXU+Ad # ohBwOIEEDNhGnEiiHFFK+wrc8BncXY4eSiJBCLlRaf1AcxCT6ibWXuUlpnWeAwNk # E3Kmvq9BCdQZpIj7EsyvngTc5PsQrqK0FNIVuSVi38QQnqS/0oykvsPzgSlD6blP # zcFIgG7aUiPOkdTxcPTYnA== # =TjtM # -----END PGP SIGNATURE----- # gpg: Signature made Tue 28 May 2024 07:04:43 AM PDT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full] # gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown] * tag 'pull-target-arm-20240528' of https://git.linaro.org/people/pmaydell/qemu-arm: (42 commits) target/arm: Convert disas_simd_3same_logic to decodetree target/arm: Convert FMLAL, FMLSL to decodetree target/arm: Use gvec for neon pmax, pmin target/arm: Convert SMAXP, SMINP, UMAXP, UMINP to decodetree target/arm: Use gvec for neon padd target/arm: Convert ADDP to decodetree target/arm: Use gvec for neon faddp, fmaxp, fminp target/arm: Convert FMAXP, FMINP, FMAXNMP, FMINNMP to decodetree target/arm: Convert FADDP to decodetree target/arm: Convert FRECPS, FRSQRTS to decodetree target/arm: Convert FABD to decodetree target/arm: Convert FCMEQ, FCMGE, FCMGT, FACGE, FACGT to decodetree target/arm: Convert FMLA, FMLS to decodetree target/arm: Convert FNMUL to decodetree target/arm: Expand vfp neg and abs inline target/arm: Introduce vfp_load_reg16 target/arm: Convert FMAX, FMIN, FMAXNM, FMINNM to decodetree target/arm: Convert FADD, FSUB, FDIV, FMUL to decodetree target/arm: Convert FMULX to decodetree target/arm: Convert Advanced SIMD copy to decodetree ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--docs/system/arm/raspi.rst1
-rw-r--r--hw/arm/boot.c8
-rw-r--r--hw/arm/npcm7xx.c3
-rw-r--r--hw/char/omap_uart.c49
-rw-r--r--hw/char/stm32l4x5_usart.c2
-rw-r--r--hw/dma/xlnx_dpdma.c68
-rw-r--r--hw/gpio/zaurus.c61
-rw-r--r--hw/input/tsc2005.c135
-rw-r--r--hw/intc/arm_gic.c4
-rw-r--r--target/arm/helper.h68
-rw-r--r--target/arm/hvf/hvf.c160
-rw-r--r--target/arm/tcg/a64.decode317
-rw-r--r--target/arm/tcg/gengvec.c1672
-rw-r--r--target/arm/tcg/gengvec64.c190
-rw-r--r--target/arm/tcg/helper-a64.h12
-rw-r--r--target/arm/tcg/meson.build2
-rw-r--r--target/arm/tcg/neon_helper.c5
-rw-r--r--target/arm/tcg/t32.decode25
-rw-r--r--target/arm/tcg/translate-a64.c3131
-rw-r--r--target/arm/tcg/translate-a64.h4
-rw-r--r--target/arm/tcg/translate-neon.c136
-rw-r--r--target/arm/tcg/translate-sve.c145
-rw-r--r--target/arm/tcg/translate-vfp.c93
-rw-r--r--target/arm/tcg/translate.c1592
-rw-r--r--target/arm/tcg/translate.h51
-rw-r--r--target/arm/tcg/vec_helper.c227
-rw-r--r--target/arm/vfp_helper.c30
27 files changed, 3877 insertions, 4314 deletions
diff --git a/docs/system/arm/raspi.rst b/docs/system/arm/raspi.rst
index fbec1da6a1..44eec3f1c3 100644
--- a/docs/system/arm/raspi.rst
+++ b/docs/system/arm/raspi.rst
@@ -40,7 +40,6 @@ Implemented devices
Missing devices
---------------
- * Analog to Digital Converter (ADC)
* Pulse Width Modulation (PWM)
* PCIE Root Port (raspi4b)
* GENET Ethernet Controller (raspi4b)
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 84ea6a807a..d480a7da02 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -347,13 +347,13 @@ static void set_kernel_args_old(const struct arm_boot_info *info,
WRITE_WORD(p, info->ram_size / 4096);
/* ramdisk_size */
WRITE_WORD(p, 0);
-#define FLAG_READONLY 1
-#define FLAG_RDLOAD 4
-#define FLAG_RDPROMPT 8
+#define FLAG_READONLY 1
+#define FLAG_RDLOAD 4
+#define FLAG_RDPROMPT 8
/* flags */
WRITE_WORD(p, FLAG_READONLY | FLAG_RDLOAD | FLAG_RDPROMPT);
/* rootdev */
- WRITE_WORD(p, (31 << 8) | 0); /* /dev/mtdblock0 */
+ WRITE_WORD(p, (31 << 8) | 0); /* /dev/mtdblock0 */
/* video_num_cols */
WRITE_WORD(p, 0);
/* video_num_rows */
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
index 9f2d96c733..cb7791301b 100644
--- a/hw/arm/npcm7xx.c
+++ b/hw/arm/npcm7xx.c
@@ -487,9 +487,6 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
/* CPUs */
for (i = 0; i < nc->num_cpus; i++) {
- object_property_set_int(OBJECT(&s->cpu[i]), "mp-affinity",
- arm_build_mp_affinity(i, NPCM7XX_MAX_NUM_CPUS),
- &error_abort);
object_property_set_int(OBJECT(&s->cpu[i]), "reset-cbar",
NPCM7XX_GIC_CPU_IF_ADDR, &error_abort);
object_property_set_bool(OBJECT(&s->cpu[i]), "reset-hivecs", true,
diff --git a/hw/char/omap_uart.c b/hw/char/omap_uart.c
index 6848bddb4e..c2ef4c137e 100644
--- a/hw/char/omap_uart.c
+++ b/hw/char/omap_uart.c
@@ -61,7 +61,7 @@ struct omap_uart_s *omap_uart_init(hwaddr base,
s->fclk = fclk;
s->irq = irq;
s->serial = serial_mm_init(get_system_memory(), base, 2, irq,
- omap_clk_getrate(fclk)/16,
+ omap_clk_getrate(fclk) / 16,
chr ?: qemu_chr_new(label, "null", NULL),
DEVICE_NATIVE_ENDIAN);
return s;
@@ -76,27 +76,27 @@ static uint64_t omap_uart_read(void *opaque, hwaddr addr, unsigned size)
}
switch (addr) {
- case 0x20: /* MDR1 */
+ case 0x20: /* MDR1 */
return s->mdr[0];
- case 0x24: /* MDR2 */
+ case 0x24: /* MDR2 */
return s->mdr[1];
- case 0x40: /* SCR */
+ case 0x40: /* SCR */
return s->scr;
- case 0x44: /* SSR */
+ case 0x44: /* SSR */
return 0x0;
- case 0x48: /* EBLR (OMAP2) */
+ case 0x48: /* EBLR (OMAP2) */
return s->eblr;
- case 0x4C: /* OSC_12M_SEL (OMAP1) */
+ case 0x4C: /* OSC_12M_SEL (OMAP1) */
return s->clksel;
- case 0x50: /* MVR */
+ case 0x50: /* MVR */
return 0x30;
- case 0x54: /* SYSC (OMAP2) */
+ case 0x54: /* SYSC (OMAP2) */
return s->syscontrol;
- case 0x58: /* SYSS (OMAP2) */
+ case 0x58: /* SYSS (OMAP2) */
return 1;
- case 0x5c: /* WER (OMAP2) */
+ case 0x5c: /* WER (OMAP2) */
return s->wkup;
- case 0x60: /* CFPS (OMAP2) */
+ case 0x60: /* CFPS (OMAP2) */
return s->cfps;
}
@@ -115,35 +115,36 @@ static void omap_uart_write(void *opaque, hwaddr addr,
}
switch (addr) {
- case 0x20: /* MDR1 */
+ case 0x20: /* MDR1 */
s->mdr[0] = value & 0x7f;
break;
- case 0x24: /* MDR2 */
+ case 0x24: /* MDR2 */
s->mdr[1] = value & 0xff;
break;
- case 0x40: /* SCR */
+ case 0x40: /* SCR */
s->scr = value & 0xff;
break;
- case 0x48: /* EBLR (OMAP2) */
+ case 0x48: /* EBLR (OMAP2) */
s->eblr = value & 0xff;
break;
- case 0x4C: /* OSC_12M_SEL (OMAP1) */
+ case 0x4C: /* OSC_12M_SEL (OMAP1) */
s->clksel = value & 1;
break;
- case 0x44: /* SSR */
- case 0x50: /* MVR */
- case 0x58: /* SYSS (OMAP2) */
+ case 0x44: /* SSR */
+ case 0x50: /* MVR */
+ case 0x58: /* SYSS (OMAP2) */
OMAP_RO_REG(addr);
break;
- case 0x54: /* SYSC (OMAP2) */
+ case 0x54: /* SYSC (OMAP2) */
s->syscontrol = value & 0x1d;
- if (value & 2)
+ if (value & 2) {
omap_uart_reset(s);
+ }
break;
- case 0x5c: /* WER (OMAP2) */
+ case 0x5c: /* WER (OMAP2) */
s->wkup = value & 0x7f;
break;
- case 0x60: /* CFPS (OMAP2) */
+ case 0x60: /* CFPS (OMAP2) */
s->cfps = value & 0xff;
break;
default:
diff --git a/hw/char/stm32l4x5_usart.c b/hw/char/stm32l4x5_usart.c
index 02f666308c..fc5dcac0c4 100644
--- a/hw/char/stm32l4x5_usart.c
+++ b/hw/char/stm32l4x5_usart.c
@@ -56,7 +56,7 @@ REG32(CR1, 0x00)
FIELD(CR1, UE, 0, 1) /* USART enable */
REG32(CR2, 0x04)
FIELD(CR2, ADD_1, 28, 4) /* ADD[7:4] */
- FIELD(CR2, ADD_0, 24, 1) /* ADD[3:0] */
+ FIELD(CR2, ADD_0, 24, 4) /* ADD[3:0] */
FIELD(CR2, RTOEN, 23, 1) /* Receiver timeout enable */
FIELD(CR2, ABRMOD, 21, 2) /* Auto baud rate mode */
FIELD(CR2, ABREN, 20, 1) /* Auto baud rate enable */
diff --git a/hw/dma/xlnx_dpdma.c b/hw/dma/xlnx_dpdma.c
index 530717d188..dde4aeca40 100644
--- a/hw/dma/xlnx_dpdma.c
+++ b/hw/dma/xlnx_dpdma.c
@@ -614,6 +614,65 @@ static void xlnx_dpdma_register_types(void)
type_register_static(&xlnx_dpdma_info);
}
+static MemTxResult xlnx_dpdma_read_descriptor(XlnxDPDMAState *s,
+ uint64_t desc_addr,
+ DPDMADescriptor *desc)
+{
+ MemTxResult res = dma_memory_read(&address_space_memory, desc_addr,
+ &desc, sizeof(DPDMADescriptor),
+ MEMTXATTRS_UNSPECIFIED);
+ if (res) {
+ return res;
+ }
+
+ /* Convert from LE into host endianness. */
+ desc->control = le32_to_cpu(desc->control);
+ desc->descriptor_id = le32_to_cpu(desc->descriptor_id);
+ desc->xfer_size = le32_to_cpu(desc->xfer_size);
+ desc->line_size_stride = le32_to_cpu(desc->line_size_stride);
+ desc->timestamp_lsb = le32_to_cpu(desc->timestamp_lsb);
+ desc->timestamp_msb = le32_to_cpu(desc->timestamp_msb);
+ desc->address_extension = le32_to_cpu(desc->address_extension);
+ desc->next_descriptor = le32_to_cpu(desc->next_descriptor);
+ desc->source_address = le32_to_cpu(desc->source_address);
+ desc->address_extension_23 = le32_to_cpu(desc->address_extension_23);
+ desc->address_extension_45 = le32_to_cpu(desc->address_extension_45);
+ desc->source_address2 = le32_to_cpu(desc->source_address2);
+ desc->source_address3 = le32_to_cpu(desc->source_address3);
+ desc->source_address4 = le32_to_cpu(desc->source_address4);
+ desc->source_address5 = le32_to_cpu(desc->source_address5);
+ desc->crc = le32_to_cpu(desc->crc);
+
+ return res;
+}
+
+static MemTxResult xlnx_dpdma_write_descriptor(uint64_t desc_addr,
+ DPDMADescriptor *desc)
+{
+ DPDMADescriptor tmp_desc = *desc;
+
+ /* Convert from host endianness into LE. */
+ tmp_desc.control = cpu_to_le32(tmp_desc.control);
+ tmp_desc.descriptor_id = cpu_to_le32(tmp_desc.descriptor_id);
+ tmp_desc.xfer_size = cpu_to_le32(tmp_desc.xfer_size);
+ tmp_desc.line_size_stride = cpu_to_le32(tmp_desc.line_size_stride);
+ tmp_desc.timestamp_lsb = cpu_to_le32(tmp_desc.timestamp_lsb);
+ tmp_desc.timestamp_msb = cpu_to_le32(tmp_desc.timestamp_msb);
+ tmp_desc.address_extension = cpu_to_le32(tmp_desc.address_extension);
+ tmp_desc.next_descriptor = cpu_to_le32(tmp_desc.next_descriptor);
+ tmp_desc.source_address = cpu_to_le32(tmp_desc.source_address);
+ tmp_desc.address_extension_23 = cpu_to_le32(tmp_desc.address_extension_23);
+ tmp_desc.address_extension_45 = cpu_to_le32(tmp_desc.address_extension_45);
+ tmp_desc.source_address2 = cpu_to_le32(tmp_desc.source_address2);
+ tmp_desc.source_address3 = cpu_to_le32(tmp_desc.source_address3);
+ tmp_desc.source_address4 = cpu_to_le32(tmp_desc.source_address4);
+ tmp_desc.source_address5 = cpu_to_le32(tmp_desc.source_address5);
+ tmp_desc.crc = cpu_to_le32(tmp_desc.crc);
+
+ return dma_memory_write(&address_space_memory, desc_addr, &tmp_desc,
+ sizeof(DPDMADescriptor), MEMTXATTRS_UNSPECIFIED);
+}
+
size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel,
bool one_desc)
{
@@ -651,8 +710,7 @@ size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel,
desc_addr = xlnx_dpdma_descriptor_next_address(s, channel);
}
- if (dma_memory_read(&address_space_memory, desc_addr, &desc,
- sizeof(DPDMADescriptor), MEMTXATTRS_UNSPECIFIED)) {
+ if (xlnx_dpdma_read_descriptor(s, desc_addr, &desc)) {
s->registers[DPDMA_EISR] |= ((1 << 1) << channel);
xlnx_dpdma_update_irq(s);
s->operation_finished[channel] = true;
@@ -755,8 +813,10 @@ size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel,
/* The descriptor need to be updated when it's completed. */
DPRINTF("update the descriptor with the done flag set.\n");
xlnx_dpdma_desc_set_done(&desc);
- dma_memory_write(&address_space_memory, desc_addr, &desc,
- sizeof(DPDMADescriptor), MEMTXATTRS_UNSPECIFIED);
+ if (xlnx_dpdma_write_descriptor(desc_addr, &desc)) {
+ DPRINTF("Can't write the descriptor.\n");
+ /* TODO: check hardware behaviour for memory write failure */
+ }
}
if (xlnx_dpdma_desc_completion_interrupt(&desc)) {
diff --git a/hw/gpio/zaurus.c b/hw/gpio/zaurus.c
index 5884804c58..7342440b95 100644
--- a/hw/gpio/zaurus.c
+++ b/hw/gpio/zaurus.c
@@ -49,19 +49,20 @@ struct ScoopInfo {
uint16_t isr;
};
-#define SCOOP_MCR 0x00
-#define SCOOP_CDR 0x04
-#define SCOOP_CSR 0x08
-#define SCOOP_CPR 0x0c
-#define SCOOP_CCR 0x10
-#define SCOOP_IRR_IRM 0x14
-#define SCOOP_IMR 0x18
-#define SCOOP_ISR 0x1c
-#define SCOOP_GPCR 0x20
-#define SCOOP_GPWR 0x24
-#define SCOOP_GPRR 0x28
-
-static inline void scoop_gpio_handler_update(ScoopInfo *s) {
+#define SCOOP_MCR 0x00
+#define SCOOP_CDR 0x04
+#define SCOOP_CSR 0x08
+#define SCOOP_CPR 0x0c
+#define SCOOP_CCR 0x10
+#define SCOOP_IRR_IRM 0x14
+#define SCOOP_IMR 0x18
+#define SCOOP_ISR 0x1c
+#define SCOOP_GPCR 0x20
+#define SCOOP_GPWR 0x24
+#define SCOOP_GPRR 0x28
+
+static inline void scoop_gpio_handler_update(ScoopInfo *s)
+{
uint32_t level, diff;
int bit;
level = s->gpio_level & s->gpio_dir;
@@ -125,8 +126,9 @@ static void scoop_write(void *opaque, hwaddr addr,
break;
case SCOOP_CPR:
s->power = value;
- if (value & 0x80)
+ if (value & 0x80) {
s->power |= 0x8040;
+ }
break;
case SCOOP_CCR:
s->ccr = value;
@@ -145,7 +147,7 @@ static void scoop_write(void *opaque, hwaddr addr,
scoop_gpio_handler_update(s);
break;
case SCOOP_GPWR:
- case SCOOP_GPRR: /* GPRR is probably R/O in real HW */
+ case SCOOP_GPRR: /* GPRR is probably R/O in real HW */
s->gpio_level = value & s->gpio_dir;
scoop_gpio_handler_update(s);
break;
@@ -166,10 +168,11 @@ static void scoop_gpio_set(void *opaque, int line, int level)
{
ScoopInfo *s = (ScoopInfo *) opaque;
- if (level)
+ if (level) {
s->gpio_level |= (1 << line);
- else
+ } else {
s->gpio_level &= ~(1 << line);
+ }
}
static void scoop_init(Object *obj)
@@ -203,7 +206,7 @@ static int scoop_post_load(void *opaque, int version_id)
return 0;
}
-static bool is_version_0 (void *opaque, int version_id)
+static bool is_version_0(void *opaque, int version_id)
{
return version_id == 0;
}
@@ -265,7 +268,7 @@ type_init(scoop_register_types)
/* Write the bootloader parameters memory area. */
-#define MAGIC_CHG(a, b, c, d) ((d << 24) | (c << 16) | (b << 8) | a)
+#define MAGIC_CHG(a, b, c, d) ((d << 24) | (c << 16) | (b << 8) | a)
static struct QEMU_PACKED sl_param_info {
uint32_t comadj_keyword;
@@ -286,16 +289,16 @@ static struct QEMU_PACKED sl_param_info {
uint32_t phad_keyword;
int32_t phadadj;
} zaurus_bootparam = {
- .comadj_keyword = MAGIC_CHG('C', 'M', 'A', 'D'),
- .comadj = 125,
- .uuid_keyword = MAGIC_CHG('U', 'U', 'I', 'D'),
- .uuid = { -1 },
- .touch_keyword = MAGIC_CHG('T', 'U', 'C', 'H'),
- .touch_xp = -1,
- .adadj_keyword = MAGIC_CHG('B', 'V', 'A', 'D'),
- .adadj = -1,
- .phad_keyword = MAGIC_CHG('P', 'H', 'A', 'D'),
- .phadadj = 0x01,
+ .comadj_keyword = MAGIC_CHG('C', 'M', 'A', 'D'),
+ .comadj = 125,
+ .uuid_keyword = MAGIC_CHG('U', 'U', 'I', 'D'),
+ .uuid = { -1 },
+ .touch_keyword = MAGIC_CHG('T', 'U', 'C', 'H'),
+ .touch_xp = -1,
+ .adadj_keyword = MAGIC_CHG('B', 'V', 'A', 'D'),
+ .adadj = -1,
+ .phad_keyword = MAGIC_CHG('P', 'H', 'A', 'D'),
+ .phadadj = 0x01,
};
void sl_bootparam_write(hwaddr ptr)
diff --git a/hw/input/tsc2005.c b/hw/input/tsc2005.c
index 941f163d36..54a15d2441 100644
--- a/hw/input/tsc2005.c
+++ b/hw/input/tsc2005.c
@@ -28,10 +28,10 @@
#include "migration/vmstate.h"
#include "trace.h"
-#define TSC_CUT_RESOLUTION(value, p) ((value) >> (16 - (p ? 12 : 10)))
+#define TSC_CUT_RESOLUTION(value, p) ((value) >> (16 - (p ? 12 : 10)))
typedef struct {
- qemu_irq pint; /* Combination of the nPENIRQ and DAV signals */
+ qemu_irq pint; /* Combination of the nPENIRQ and DAV signals */
QEMUTimer *timer;
uint16_t model;
@@ -63,7 +63,7 @@ typedef struct {
} TSC2005State;
enum {
- TSC_MODE_XYZ_SCAN = 0x0,
+ TSC_MODE_XYZ_SCAN = 0x0,
TSC_MODE_XY_SCAN,
TSC_MODE_X,
TSC_MODE_Y,
@@ -82,100 +82,100 @@ enum {
};
static const uint16_t mode_regs[16] = {
- 0xf000, /* X, Y, Z scan */
- 0xc000, /* X, Y scan */
- 0x8000, /* X */
- 0x4000, /* Y */
- 0x3000, /* Z */
- 0x0800, /* AUX */
- 0x0400, /* TEMP1 */
- 0x0200, /* TEMP2 */
- 0x0800, /* AUX scan */
- 0x0040, /* X test */
- 0x0020, /* Y test */
- 0x0080, /* Short-circuit test */
- 0x0000, /* Reserved */
- 0x0000, /* X+, X- drivers */
- 0x0000, /* Y+, Y- drivers */
- 0x0000, /* Y+, X- drivers */
+ 0xf000, /* X, Y, Z scan */
+ 0xc000, /* X, Y scan */
+ 0x8000, /* X */
+ 0x4000, /* Y */
+ 0x3000, /* Z */
+ 0x0800, /* AUX */
+ 0x0400, /* TEMP1 */
+ 0x0200, /* TEMP2 */
+ 0x0800, /* AUX scan */
+ 0x0040, /* X test */
+ 0x0020, /* Y test */
+ 0x0080, /* Short-circuit test */
+ 0x0000, /* Reserved */
+ 0x0000, /* X+, X- drivers */
+ 0x0000, /* Y+, Y- drivers */
+ 0x0000, /* Y+, X- drivers */
};
-#define X_TRANSFORM(s) \
+#define X_TRANSFORM(s) \
((s->y * s->tr[0] - s->x * s->tr[1]) / s->tr[2] + s->tr[3])
-#define Y_TRANSFORM(s) \
+#define Y_TRANSFORM(s) \
((s->y * s->tr[4] - s->x * s->tr[5]) / s->tr[6] + s->tr[7])
-#define Z1_TRANSFORM(s) \
+#define Z1_TRANSFORM(s) \
((400 - ((s)->x >> 7) + ((s)->pressure << 10)) << 4)
-#define Z2_TRANSFORM(s) \
+#define Z2_TRANSFORM(s) \
((4000 + ((s)->y >> 7) - ((s)->pressure << 10)) << 4)
-#define AUX_VAL (700 << 4) /* +/- 3 at 12-bit */
-#define TEMP1_VAL (1264 << 4) /* +/- 5 at 12-bit */
-#define TEMP2_VAL (1531 << 4) /* +/- 5 at 12-bit */
+#define AUX_VAL (700 << 4) /* +/- 3 at 12-bit */
+#define TEMP1_VAL (1264 << 4) /* +/- 5 at 12-bit */
+#define TEMP2_VAL (1531 << 4) /* +/- 5 at 12-bit */
static uint16_t tsc2005_read(TSC2005State *s, int reg)
{
uint16_t ret;
switch (reg) {
- case 0x0: /* X */
+ case 0x0: /* X */
s->dav &= ~mode_regs[TSC_MODE_X];
return TSC_CUT_RESOLUTION(X_TRANSFORM(s), s->precision) +
(s->noise & 3);
- case 0x1: /* Y */
+ case 0x1: /* Y */
s->dav &= ~mode_regs[TSC_MODE_Y];
- s->noise ++;
+ s->noise++;
return TSC_CUT_RESOLUTION(Y_TRANSFORM(s), s->precision) ^
(s->noise & 3);
- case 0x2: /* Z1 */
+ case 0x2: /* Z1 */
s->dav &= 0xdfff;
return TSC_CUT_RESOLUTION(Z1_TRANSFORM(s), s->precision) -
(s->noise & 3);
- case 0x3: /* Z2 */
+ case 0x3: /* Z2 */
s->dav &= 0xefff;
return TSC_CUT_RESOLUTION(Z2_TRANSFORM(s), s->precision) |
(s->noise & 3);
- case 0x4: /* AUX */
+ case 0x4: /* AUX */
s->dav &= ~mode_regs[TSC_MODE_AUX];
return TSC_CUT_RESOLUTION(AUX_VAL, s->precision);
- case 0x5: /* TEMP1 */
+ case 0x5: /* TEMP1 */
s->dav &= ~mode_regs[TSC_MODE_TEMP1];
return TSC_CUT_RESOLUTION(TEMP1_VAL, s->precision) -
(s->noise & 5);
- case 0x6: /* TEMP2 */
+ case 0x6: /* TEMP2 */
s->dav &= 0xdfff;
s->dav &= ~mode_regs[TSC_MODE_TEMP2];
return TSC_CUT_RESOLUTION(TEMP2_VAL, s->precision) ^
(s->noise & 3);
- case 0x7: /* Status */
+ case 0x7: /* Status */
ret = s->dav | (s->reset << 7) | (s->pdst << 2) | 0x0;
s->dav &= ~(mode_regs[TSC_MODE_X_TEST] | mode_regs[TSC_MODE_Y_TEST] |
mode_regs[TSC_MODE_TS_TEST]);
s->reset = true;
return ret;
- case 0x8: /* AUX high threshold */
+ case 0x8: /* AUX high threshold */
return s->aux_thr[1];
- case 0x9: /* AUX low threshold */
+ case 0x9: /* AUX low threshold */
return s->aux_thr[0];
- case 0xa: /* TEMP high threshold */
+ case 0xa: /* TEMP high threshold */
return s->temp_thr[1];
- case 0xb: /* TEMP low threshold */
+ case 0xb: /* TEMP low threshold */
return s->temp_thr[0];
- case 0xc: /* CFR0 */
+ case 0xc: /* CFR0 */
return (s->pressure << 15) | ((!s->busy) << 14) |
- (s->nextprecision << 13) | s->timing[0];
- case 0xd: /* CFR1 */
+ (s->nextprecision << 13) | s->timing[0];
+ case 0xd: /* CFR1 */
return s->timing[1];
- case 0xe: /* CFR2 */
+ case 0xe: /* CFR2 */
return (s->pin_func << 14) | s->filter;
- case 0xf: /* Function select status */
+ case 0xf: /* Function select status */
return s->function >= 0 ? 1 << s->function : 0;
}
@@ -200,13 +200,14 @@ static void tsc2005_write(TSC2005State *s, int reg, uint16_t data)
s->temp_thr[0] = data;
break;
- case 0xc: /* CFR0 */
+ case 0xc: /* CFR0 */
s->host_mode = (data >> 15) != 0;
if (s->enabled != !(data & 0x4000)) {
s->enabled = !(data & 0x4000);
trace_tsc2005_sense(s->enabled ? "enabled" : "disabled");
- if (s->busy && !s->enabled)
+ if (s->busy && !s->enabled) {
timer_del(s->timer);
+ }
s->busy = s->busy && s->enabled;
}
s->nextprecision = (data >> 13) & 1;
@@ -216,10 +217,10 @@ static void tsc2005_write(TSC2005State *s, int reg, uint16_t data)
"tsc2005_write: illegal conversion clock setting\n");
}
break;
- case 0xd: /* CFR1 */
+ case 0xd: /* CFR1 */
s->timing[1] = data & 0xf07;
break;
- case 0xe: /* CFR2 */
+ case 0xe: /* CFR2 */
s->pin_func = (data >> 14) & 3;
s->filter = data & 0x3fff;
break;
@@ -258,10 +259,12 @@ static void tsc2005_pin_update(TSC2005State *s)
switch (s->nextfunction) {
case TSC_MODE_XYZ_SCAN:
case TSC_MODE_XY_SCAN:
- if (!s->host_mode && s->dav)
+ if (!s->host_mode && s->dav) {
s->enabled = false;
- if (!s->pressure)
+ }
+ if (!s->pressure) {
return;
+ }
/* Fall through */
case TSC_MODE_AUX_SCAN:
break;
@@ -269,8 +272,9 @@ static void tsc2005_pin_update(TSC2005State *s)
case TSC_MODE_X:
case TSC_MODE_Y:
case TSC_MODE_Z:
- if (!s->pressure)
+ if (!s->pressure) {
return;
+ }
/* Fall through */
case TSC_MODE_AUX:
case TSC_MODE_TEMP1:
@@ -278,8 +282,9 @@ static void tsc2005_pin_update(TSC2005State *s)
case TSC_MODE_X_TEST:
case TSC_MODE_Y_TEST:
case TSC_MODE_TS_TEST:
- if (s->dav)
+ if (s->dav) {
s->enabled = false;
+ }
break;
case TSC_MODE_RESERVED:
@@ -290,13 +295,14 @@ static void tsc2005_pin_update(TSC2005State *s)
return;
}
- if (!s->enabled || s->busy)
+ if (!s->enabled || s->busy) {
return;
+ }
s->busy = true;
s->precision = s->nextprecision;
s->function = s->nextfunction;
- s->pdst = !s->pnd0; /* Synchronised on internal clock */
+ s->pdst = !s->pnd0; /* Synchronised on internal clock */
expires = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
(NANOSECONDS_PER_SECOND >> 7);
timer_mod(s->timer, expires);
@@ -331,7 +337,7 @@ static uint8_t tsc2005_txrx_word(void *opaque, uint8_t value)
TSC2005State *s = opaque;
uint32_t ret = 0;
- switch (s->state ++) {
+ switch (s->state++) {
case 0:
if (value & 0x80) {
/* Command */
@@ -343,8 +349,9 @@ static uint8_t tsc2005_txrx_word(void *opaque, uint8_t value)
if (s->enabled != !(value & 1)) {
s->enabled = !(value & 1);
trace_tsc2005_sense(s->enabled ? "enabled" : "disabled");
- if (s->busy && !s->enabled)
+ if (s->busy && !s->enabled) {
timer_del(s->timer);
+ }
s->busy = s->busy && s->enabled;
}
tsc2005_pin_update(s);
@@ -368,10 +375,11 @@ static uint8_t tsc2005_txrx_word(void *opaque, uint8_t value)
break;
case 1:
- if (s->command)
+ if (s->command) {
ret = (s->data >> 8) & 0xff;
- else
+ } else {
s->data |= value << 8;
+ }
break;
case 2:
@@ -406,14 +414,18 @@ uint32_t tsc2005_txrx(void *opaque, uint32_t value, int len)
static void tsc2005_timer_tick(void *opaque)
{
TSC2005State *s = opaque;
+ unsigned int function = s->function;
+
+ assert(function < ARRAY_SIZE(mode_regs));
/* Timer ticked -- a set of conversions has been finished. */
- if (!s->busy)
+ if (!s->busy) {
return;
+ }
s->busy = false;
- s->dav |= mode_regs[s->function];
+ s->dav |= mode_regs[function];
s->function = -1;
tsc2005_pin_update(s);
}
@@ -435,8 +447,9 @@ static void tsc2005_touchscreen_event(void *opaque,
* signaling TS events immediately, but for now we simulate
* the first conversion delay for sake of correctness.
*/
- if (p != s->pressure)
+ if (p != s->pressure) {
tsc2005_pin_update(s);
+ }
}
static int tsc2005_post_load(void *opaque, int version_id)
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 074cf50af2..e4b8437f8b 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -1658,7 +1658,7 @@ static MemTxResult gic_cpu_read(GICState *s, int cpu, int offset,
*data = s->h_apr[gic_get_vcpu_real_id(cpu)];
} else if (gic_cpu_ns_access(s, cpu, attrs)) {
/* NS view of GICC_APR<n> is the top half of GIC_NSAPR<n> */
- *data = gic_apr_ns_view(s, regno, cpu);
+ *data = gic_apr_ns_view(s, cpu, regno);
} else {
*data = s->apr[regno][cpu];
}
@@ -1746,7 +1746,7 @@ static MemTxResult gic_cpu_write(GICState *s, int cpu, int offset,
s->h_apr[gic_get_vcpu_real_id(cpu)] = value;
} else if (gic_cpu_ns_access(s, cpu, attrs)) {
/* NS view of GICC_APR<n> is the top half of GIC_NSAPR<n> */
- gic_apr_write_ns_view(s, regno, cpu, value);
+ gic_apr_write_ns_view(s, cpu, regno, value);
} else {
s->apr[regno][cpu] = value;
}
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 2b02733305..f830531dd3 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -132,12 +132,6 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
-DEF_HELPER_1(vfp_negh, f16, f16)
-DEF_HELPER_1(vfp_negs, f32, f32)
-DEF_HELPER_1(vfp_negd, f64, f64)
-DEF_HELPER_1(vfp_absh, f16, f16)
-DEF_HELPER_1(vfp_abss, f32, f32)
-DEF_HELPER_1(vfp_absd, f64, f64)
DEF_HELPER_2(vfp_sqrth, f16, f16, env)
DEF_HELPER_2(vfp_sqrts, f32, f32, env)
DEF_HELPER_2(vfp_sqrtd, f64, f64, env)
@@ -360,8 +354,6 @@ DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64)
DEF_HELPER_2(neon_add_u8, i32, i32, i32)
DEF_HELPER_2(neon_add_u16, i32, i32, i32)
-DEF_HELPER_2(neon_padd_u8, i32, i32, i32)
-DEF_HELPER_2(neon_padd_u16, i32, i32, i32)
DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
@@ -656,13 +648,6 @@ DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_paddh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_pmaxh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_pminh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_padds, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_pmaxs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_pmins, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -730,33 +715,43 @@ DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fcge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fcgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_facge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_facgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@@ -772,9 +767,11 @@ DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
@@ -1042,6 +1039,47 @@ DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fminp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmaxnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_addp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_smaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_umaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "tcg/helper-a64.h"
#include "tcg/helper-sve.h"
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 08d0757438..45e2218be5 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -396,85 +396,85 @@ struct hvf_sreg_match {
};
static struct hvf_sreg_match hvf_sreg_match[] = {
- { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 7) },
-
- { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 4) },
- { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 5) },
- { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 6) },
- { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 7) },
+ { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 4) },
+ { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 5) },
+ { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 6) },
+ { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 7) },
#ifdef SYNC_NO_RAW_REGS
/*
@@ -486,7 +486,7 @@ static struct hvf_sreg_match hvf_sreg_match[] = {
{ HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) },
{ HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) },
#endif
- { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 2) },
+ { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 1) },
{ HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) },
{ HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) },
{ HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) },
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 0e7656fd15..f48adef5bb 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -19,11 +19,53 @@
# This file is processed by scripts/decodetree.py
#
-&r rn
-&ri rd imm
-&rri_sf rd rn imm sf
-&i imm
-
+%rd 0:5
+%esz_sd 22:1 !function=plus_2
+%esz_hsd 22:2 !function=xor_2
+%hl 11:1 21:1
+%hlm 11:1 20:2
+
+&r rn
+&ri rd imm
+&rri_sf rd rn imm sf
+&i imm
+&rr_e rd rn esz
+&rrr_e rd rn rm esz
+&rrx_e rd rn rm idx esz
+&qrr_e q rd rn esz
+&qrrr_e q rd rn rm esz
+&qrrx_e q rd rn rm idx esz
+&qrrrr_e q rd rn rm ra esz
+
+@rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1
+@rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3
+@rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd
+
+@rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1
+@rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd
+@rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd
+
+@rrx_h ........ .. .. rm:4 .... . . rn:5 rd:5 &rrx_e esz=1 idx=%hlm
+@rrx_s ........ .. . rm:5 .... . . rn:5 rd:5 &rrx_e esz=2 idx=%hl
+@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3
+
+@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
+@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
+@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
+@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
+@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
+
+@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
+@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1
+@qrrr_sd . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=%esz_sd
+@qrrr_e . q:1 ...... esz:2 . rm:5 ...... rn:5 rd:5 &qrrr_e
+
+@qrrx_h . q:1 .. .... .. .. rm:4 .... . . rn:5 rd:5 \
+ &qrrx_e esz=1 idx=%hlm
+@qrrx_s . q:1 .. .... .. . rm:5 .... . . rn:5 rd:5 \
+ &qrrx_e esz=2 idx=%hl
+@qrrx_d . q:1 .. .... .. . rm:5 .... idx:1 . rn:5 rd:5 \
+ &qrrx_e esz=3
### Data Processing - Immediate
@@ -590,3 +632,268 @@ CPYFE 00 011 0 01100 ..... .... 01 ..... ..... @cpy
CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy
CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy
CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy
+
+### Cryptographic AES
+
+AESE 01001110 00 10100 00100 10 ..... ..... @r2r_q1e0
+AESD 01001110 00 10100 00101 10 ..... ..... @r2r_q1e0
+AESMC 01001110 00 10100 00110 10 ..... ..... @rr_q1e0
+AESIMC 01001110 00 10100 00111 10 ..... ..... @rr_q1e0
+
+### Cryptographic three-register SHA
+
+SHA1C 0101 1110 000 ..... 000000 ..... ..... @rrr_q1e0
+SHA1P 0101 1110 000 ..... 000100 ..... ..... @rrr_q1e0
+SHA1M 0101 1110 000 ..... 001000 ..... ..... @rrr_q1e0
+SHA1SU0 0101 1110 000 ..... 001100 ..... ..... @rrr_q1e0
+SHA256H 0101 1110 000 ..... 010000 ..... ..... @rrr_q1e0
+SHA256H2 0101 1110 000 ..... 010100 ..... ..... @rrr_q1e0
+SHA256SU1 0101 1110 000 ..... 011000 ..... ..... @rrr_q1e0
+
+### Cryptographic two-register SHA
+
+SHA1H 0101 1110 0010 1000 0000 10 ..... ..... @rr_q1e0
+SHA1SU1 0101 1110 0010 1000 0001 10 ..... ..... @rr_q1e0
+SHA256SU0 0101 1110 0010 1000 0010 10 ..... ..... @rr_q1e0
+
+### Cryptographic three-register SHA512
+
+SHA512H 1100 1110 011 ..... 100000 ..... ..... @rrr_q1e0
+SHA512H2 1100 1110 011 ..... 100001 ..... ..... @rrr_q1e0
+SHA512SU1 1100 1110 011 ..... 100010 ..... ..... @rrr_q1e0
+RAX1 1100 1110 011 ..... 100011 ..... ..... @rrr_q1e3
+SM3PARTW1 1100 1110 011 ..... 110000 ..... ..... @rrr_q1e0
+SM3PARTW2 1100 1110 011 ..... 110001 ..... ..... @rrr_q1e0
+SM4EKEY 1100 1110 011 ..... 110010 ..... ..... @rrr_q1e0
+
+### Cryptographic two-register SHA512
+
+SHA512SU0 1100 1110 110 00000 100000 ..... ..... @rr_q1e0
+SM4E 1100 1110 110 00000 100001 ..... ..... @r2r_q1e0
+
+### Cryptographic four-register
+
+EOR3 1100 1110 000 ..... 0 ..... ..... ..... @rrrr_q1e3
+BCAX 1100 1110 001 ..... 0 ..... ..... ..... @rrrr_q1e3
+SM3SS1 1100 1110 010 ..... 0 ..... ..... ..... @rrrr_q1e3
+
+### Cryptographic three-register, imm2
+
+&crypto3i rd rn rm imm
+@crypto3i ........ ... rm:5 .. imm:2 .. rn:5 rd:5 &crypto3i
+
+SM3TT1A 11001110 010 ..... 10 .. 00 ..... ..... @crypto3i
+SM3TT1B 11001110 010 ..... 10 .. 01 ..... ..... @crypto3i
+SM3TT2A 11001110 010 ..... 10 .. 10 ..... ..... @crypto3i
+SM3TT2B 11001110 010 ..... 10 .. 11 ..... ..... @crypto3i
+
+### Cryptographic XAR
+
+XAR 1100 1110 100 rm:5 imm:6 rn:5 rd:5
+
+### Advanced SIMD scalar copy
+
+DUP_element_s 0101 1110 000 imm:5 0 0000 1 rn:5 rd:5
+
+### Advanced SIMD copy
+
+DUP_element_v 0 q:1 00 1110 000 imm:5 0 0000 1 rn:5 rd:5
+DUP_general 0 q:1 00 1110 000 imm:5 0 0001 1 rn:5 rd:5
+INS_general 0 1 00 1110 000 imm:5 0 0011 1 rn:5 rd:5
+SMOV 0 q:1 00 1110 000 imm:5 0 0101 1 rn:5 rd:5
+UMOV 0 q:1 00 1110 000 imm:5 0 0111 1 rn:5 rd:5
+INS_element 0 1 10 1110 000 di:5 0 si:4 1 rn:5 rd:5
+
+### Advanced SIMD scalar three same
+
+FADD_s 0001 1110 ..1 ..... 0010 10 ..... ..... @rrr_hsd
+FSUB_s 0001 1110 ..1 ..... 0011 10 ..... ..... @rrr_hsd
+FDIV_s 0001 1110 ..1 ..... 0001 10 ..... ..... @rrr_hsd
+FMUL_s 0001 1110 ..1 ..... 0000 10 ..... ..... @rrr_hsd
+FNMUL_s 0001 1110 ..1 ..... 1000 10 ..... ..... @rrr_hsd
+
+FMAX_s 0001 1110 ..1 ..... 0100 10 ..... ..... @rrr_hsd
+FMIN_s 0001 1110 ..1 ..... 0101 10 ..... ..... @rrr_hsd
+FMAXNM_s 0001 1110 ..1 ..... 0110 10 ..... ..... @rrr_hsd
+FMINNM_s 0001 1110 ..1 ..... 0111 10 ..... ..... @rrr_hsd
+
+FMULX_s 0101 1110 010 ..... 00011 1 ..... ..... @rrr_h
+FMULX_s 0101 1110 0.1 ..... 11011 1 ..... ..... @rrr_sd
+
+FCMEQ_s 0101 1110 010 ..... 00100 1 ..... ..... @rrr_h
+FCMEQ_s 0101 1110 0.1 ..... 11100 1 ..... ..... @rrr_sd
+
+FCMGE_s 0111 1110 010 ..... 00100 1 ..... ..... @rrr_h
+FCMGE_s 0111 1110 0.1 ..... 11100 1 ..... ..... @rrr_sd
+
+FCMGT_s 0111 1110 110 ..... 00100 1 ..... ..... @rrr_h
+FCMGT_s 0111 1110 1.1 ..... 11100 1 ..... ..... @rrr_sd
+
+FACGE_s 0111 1110 010 ..... 00101 1 ..... ..... @rrr_h
+FACGE_s 0111 1110 0.1 ..... 11101 1 ..... ..... @rrr_sd
+
+FACGT_s 0111 1110 110 ..... 00101 1 ..... ..... @rrr_h
+FACGT_s 0111 1110 1.1 ..... 11101 1 ..... ..... @rrr_sd
+
+FABD_s 0111 1110 110 ..... 00010 1 ..... ..... @rrr_h
+FABD_s 0111 1110 1.1 ..... 11010 1 ..... ..... @rrr_sd
+
+FRECPS_s 0101 1110 010 ..... 00111 1 ..... ..... @rrr_h
+FRECPS_s 0101 1110 0.1 ..... 11111 1 ..... ..... @rrr_sd
+
+FRSQRTS_s 0101 1110 110 ..... 00111 1 ..... ..... @rrr_h
+FRSQRTS_s 0101 1110 1.1 ..... 11111 1 ..... ..... @rrr_sd
+
+### Advanced SIMD scalar pairwise
+
+FADDP_s 0101 1110 0011 0000 1101 10 ..... ..... @rr_h
+FADDP_s 0111 1110 0.11 0000 1101 10 ..... ..... @rr_sd
+
+FMAXP_s 0101 1110 0011 0000 1111 10 ..... ..... @rr_h
+FMAXP_s 0111 1110 0.11 0000 1111 10 ..... ..... @rr_sd
+
+FMINP_s 0101 1110 1011 0000 1111 10 ..... ..... @rr_h
+FMINP_s 0111 1110 1.11 0000 1111 10 ..... ..... @rr_sd
+
+FMAXNMP_s 0101 1110 0011 0000 1100 10 ..... ..... @rr_h
+FMAXNMP_s 0111 1110 0.11 0000 1100 10 ..... ..... @rr_sd
+
+FMINNMP_s 0101 1110 1011 0000 1100 10 ..... ..... @rr_h
+FMINNMP_s 0111 1110 1.11 0000 1100 10 ..... ..... @rr_sd
+
+ADDP_s 0101 1110 1111 0001 1011 10 ..... ..... @rr_d
+
+### Advanced SIMD three same
+
+FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
+FADD_v 0.00 1110 0.1 ..... 11010 1 ..... ..... @qrrr_sd
+
+FSUB_v 0.00 1110 110 ..... 00010 1 ..... ..... @qrrr_h
+FSUB_v 0.00 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd
+
+FDIV_v 0.10 1110 010 ..... 00111 1 ..... ..... @qrrr_h
+FDIV_v 0.10 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd
+
+FMUL_v 0.10 1110 010 ..... 00011 1 ..... ..... @qrrr_h
+FMUL_v 0.10 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd
+
+FMAX_v 0.00 1110 010 ..... 00110 1 ..... ..... @qrrr_h
+FMAX_v 0.00 1110 0.1 ..... 11110 1 ..... ..... @qrrr_sd
+
+FMIN_v 0.00 1110 110 ..... 00110 1 ..... ..... @qrrr_h
+FMIN_v 0.00 1110 1.1 ..... 11110 1 ..... ..... @qrrr_sd
+
+FMAXNM_v 0.00 1110 010 ..... 00000 1 ..... ..... @qrrr_h
+FMAXNM_v 0.00 1110 0.1 ..... 11000 1 ..... ..... @qrrr_sd
+
+FMINNM_v 0.00 1110 110 ..... 00000 1 ..... ..... @qrrr_h
+FMINNM_v 0.00 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd
+
+FMULX_v 0.00 1110 010 ..... 00011 1 ..... ..... @qrrr_h
+FMULX_v 0.00 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd
+
+FMLA_v 0.00 1110 010 ..... 00001 1 ..... ..... @qrrr_h
+FMLA_v 0.00 1110 0.1 ..... 11001 1 ..... ..... @qrrr_sd
+
+FMLS_v 0.00 1110 110 ..... 00001 1 ..... ..... @qrrr_h
+FMLS_v 0.00 1110 1.1 ..... 11001 1 ..... ..... @qrrr_sd
+
+FMLAL_v 0.00 1110 001 ..... 11101 1 ..... ..... @qrrr_h
+FMLSL_v 0.00 1110 101 ..... 11101 1 ..... ..... @qrrr_h
+FMLAL2_v 0.10 1110 001 ..... 11001 1 ..... ..... @qrrr_h
+FMLSL2_v 0.10 1110 101 ..... 11001 1 ..... ..... @qrrr_h
+
+FCMEQ_v 0.00 1110 010 ..... 00100 1 ..... ..... @qrrr_h
+FCMEQ_v 0.00 1110 0.1 ..... 11100 1 ..... ..... @qrrr_sd
+
+FCMGE_v 0.10 1110 010 ..... 00100 1 ..... ..... @qrrr_h
+FCMGE_v 0.10 1110 0.1 ..... 11100 1 ..... ..... @qrrr_sd
+
+FCMGT_v 0.10 1110 110 ..... 00100 1 ..... ..... @qrrr_h
+FCMGT_v 0.10 1110 1.1 ..... 11100 1 ..... ..... @qrrr_sd
+
+FACGE_v 0.10 1110 010 ..... 00101 1 ..... ..... @qrrr_h
+FACGE_v 0.10 1110 0.1 ..... 11101 1 ..... ..... @qrrr_sd
+
+FACGT_v 0.10 1110 110 ..... 00101 1 ..... ..... @qrrr_h
+FACGT_v 0.10 1110 1.1 ..... 11101 1 ..... ..... @qrrr_sd
+
+FABD_v 0.10 1110 110 ..... 00010 1 ..... ..... @qrrr_h
+FABD_v 0.10 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd
+
+FRECPS_v 0.00 1110 010 ..... 00111 1 ..... ..... @qrrr_h
+FRECPS_v 0.00 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd
+
+FRSQRTS_v 0.00 1110 110 ..... 00111 1 ..... ..... @qrrr_h
+FRSQRTS_v 0.00 1110 1.1 ..... 11111 1 ..... ..... @qrrr_sd
+
+FADDP_v 0.10 1110 010 ..... 00010 1 ..... ..... @qrrr_h
+FADDP_v 0.10 1110 0.1 ..... 11010 1 ..... ..... @qrrr_sd
+
+FMAXP_v 0.10 1110 010 ..... 00110 1 ..... ..... @qrrr_h
+FMAXP_v 0.10 1110 0.1 ..... 11110 1 ..... ..... @qrrr_sd
+
+FMINP_v 0.10 1110 110 ..... 00110 1 ..... ..... @qrrr_h
+FMINP_v 0.10 1110 1.1 ..... 11110 1 ..... ..... @qrrr_sd
+
+FMAXNMP_v 0.10 1110 010 ..... 00000 1 ..... ..... @qrrr_h
+FMAXNMP_v 0.10 1110 0.1 ..... 11000 1 ..... ..... @qrrr_sd
+
+FMINNMP_v 0.10 1110 110 ..... 00000 1 ..... ..... @qrrr_h
+FMINNMP_v 0.10 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd
+
+ADDP_v 0.00 1110 ..1 ..... 10111 1 ..... ..... @qrrr_e
+SMAXP_v 0.00 1110 ..1 ..... 10100 1 ..... ..... @qrrr_e
+SMINP_v 0.00 1110 ..1 ..... 10101 1 ..... ..... @qrrr_e
+UMAXP_v 0.10 1110 ..1 ..... 10100 1 ..... ..... @qrrr_e
+UMINP_v 0.10 1110 ..1 ..... 10101 1 ..... ..... @qrrr_e
+
+AND_v 0.00 1110 001 ..... 00011 1 ..... ..... @qrrr_b
+BIC_v 0.00 1110 011 ..... 00011 1 ..... ..... @qrrr_b
+ORR_v 0.00 1110 101 ..... 00011 1 ..... ..... @qrrr_b
+ORN_v 0.00 1110 111 ..... 00011 1 ..... ..... @qrrr_b
+EOR_v 0.10 1110 001 ..... 00011 1 ..... ..... @qrrr_b
+BSL_v 0.10 1110 011 ..... 00011 1 ..... ..... @qrrr_b
+BIT_v 0.10 1110 101 ..... 00011 1 ..... ..... @qrrr_b
+BIF_v 0.10 1110 111 ..... 00011 1 ..... ..... @qrrr_b
+
+### Advanced SIMD scalar x indexed element
+
+FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
+FMUL_si 0101 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s
+FMUL_si 0101 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d
+
+FMLA_si 0101 1111 00 .. .... 0001 . 0 ..... ..... @rrx_h
+FMLA_si 0101 1111 10 .. .... 0001 . 0 ..... ..... @rrx_s
+FMLA_si 0101 1111 11 0. .... 0001 . 0 ..... ..... @rrx_d
+
+FMLS_si 0101 1111 00 .. .... 0101 . 0 ..... ..... @rrx_h
+FMLS_si 0101 1111 10 .. .... 0101 . 0 ..... ..... @rrx_s
+FMLS_si 0101 1111 11 0. .... 0101 . 0 ..... ..... @rrx_d
+
+FMULX_si 0111 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
+FMULX_si 0111 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s
+FMULX_si 0111 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d
+
+### Advanced SIMD vector x indexed element
+
+FMUL_vi 0.00 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
+FMUL_vi 0.00 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s
+FMUL_vi 0.00 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d
+
+FMLA_vi 0.00 1111 00 .. .... 0001 . 0 ..... ..... @qrrx_h
+FMLA_vi 0.00 1111 10 . ..... 0001 . 0 ..... ..... @qrrx_s
+FMLA_vi 0.00 1111 11 0 ..... 0001 . 0 ..... ..... @qrrx_d
+
+FMLS_vi 0.00 1111 00 .. .... 0101 . 0 ..... ..... @qrrx_h
+FMLS_vi 0.00 1111 10 . ..... 0101 . 0 ..... ..... @qrrx_s
+FMLS_vi 0.00 1111 11 0 ..... 0101 . 0 ..... ..... @qrrx_d
+
+FMULX_vi 0.10 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
+FMULX_vi 0.10 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s
+FMULX_vi 0.10 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d
+
+FMLAL_vi 0.00 1111 10 .. .... 0000 . 0 ..... ..... @qrrx_h
+FMLSL_vi 0.00 1111 10 .. .... 0100 . 0 ..... ..... @qrrx_h
+FMLAL2_vi 0.10 1111 10 .. .... 1000 . 0 ..... ..... @qrrx_h
+FMLSL2_vi 0.10 1111 10 .. .... 1100 . 0 ..... ..... @qrrx_h
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
new file mode 100644
index 0000000000..22c9d17dce
--- /dev/null
+++ b/target/arm/tcg/gengvec.c
@@ -0,0 +1,1672 @@
+/*
+ * ARM generic vector expansion
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2005-2007 CodeSourcery
+ * Copyright (c) 2007 OpenedHand, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "translate.h"
+
+
+static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz,
+ gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
+ opr_sz, max_sz, 0, fn);
+}
+
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3_ptr * const fns[2] = {
+ gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
+ };
+ tcg_debug_assert(vece >= 1 && vece <= 2);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
+}
+
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3_ptr * const fns[2] = {
+ gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
+ };
+ tcg_debug_assert(vece >= 1 && vece <= 2);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
+}
+
+#define GEN_CMP0(NAME, COND) \
+ void NAME(unsigned vece, uint32_t d, uint32_t m, \
+ uint32_t opr_sz, uint32_t max_sz) \
+ { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
+
+GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
+GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
+GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
+GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
+GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
+
+#undef GEN_CMP0
+
+static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_sar8i_i64(a, a, shift);
+ tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_sar16i_i64(a, a, shift);
+ tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_sari_i32(a, a, shift);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_sari_i64(a, a, shift);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ tcg_gen_sari_vec(vece, a, a, sh);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ssra8_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_ssra16_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ssra32_i32,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ssra64_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits.
+ */
+ shift = MIN(shift, (8 << vece) - 1);
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
+
+static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_shr8i_i64(a, a, shift);
+ tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_shr16i_i64(a, a, shift);
+ tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_shri_i32(a, a, shift);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_shri_i64(a, a, shift);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ tcg_gen_shri_vec(vece, a, a, sh);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_usra8_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8, },
+ { .fni8 = gen_usra16_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16, },
+ { .fni4 = gen_usra32_i32,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32, },
+ { .fni8 = gen_usra64_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64, },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in all zeros as input to accumulate: nop.
+ */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ } else {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
+
+/*
+ * Shift one less than the requested amount, and the low bit is
+ * the rounding bit. For the 8 and 16-bit operations, because we
+ * mask the low bit, we can perform a normal integer shift instead
+ * of a vector shift.
+ */
+static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
+ tcg_gen_vec_sar8i_i64(d, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+}
+
+static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
+ tcg_gen_vec_sar16i_i64(d, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t;
+
+ /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
+ if (sh == 32) {
+ tcg_gen_movi_i32(d, 0);
+ return;
+ }
+ t = tcg_temp_new_i32();
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
+ tcg_gen_sari_i32(d, a, sh);
+ tcg_gen_add_i32(d, d, t);
+}
+
+ void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
+ tcg_gen_sari_i64(d, a, sh);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
+ tcg_gen_dupi_vec(vece, ones, 1);
+ tcg_gen_and_vec(vece, t, t, ones);
+ tcg_gen_sari_vec(vece, d, a, sh);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_srshr8_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_srshr16_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_srshr32_i32,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_srshr64_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ if (shift == (8 << vece)) {
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits. With rounding, this produces
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+ * I.e. always zero.
+ */
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr8_i64(t, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+}
+
+static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr16_i64(t, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ gen_srshr32_i32(t, a, sh);
+ tcg_gen_add_i32(d, d, t);
+}
+
+static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr64_i64(t, a, sh);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ gen_srshr_vec(vece, t, a, sh);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_srsra8_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_srsra16_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_srsra32_i32,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_srsra64_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits. With rounding, this produces
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+ * I.e. always zero. With accumulation, this leaves D unchanged.
+ */
+ if (shift == (8 << vece)) {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
+ tcg_gen_vec_shr8i_i64(d, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+}
+
+static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
+ tcg_gen_vec_shr16i_i64(d, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t;
+
+ /* Handle shift by the input size for the benefit of trans_URSHR_ri */
+ if (sh == 32) {
+ tcg_gen_extract_i32(d, a, sh - 1, 1);
+ return;
+ }
+ t = tcg_temp_new_i32();
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
+ tcg_gen_shri_i32(d, a, sh);
+ tcg_gen_add_i32(d, d, t);
+}
+
+void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
+ tcg_gen_shri_i64(d, a, sh);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shri_vec(vece, t, a, shift - 1);
+ tcg_gen_dupi_vec(vece, ones, 1);
+ tcg_gen_and_vec(vece, t, t, ones);
+ tcg_gen_shri_vec(vece, d, a, shift);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_urshr8_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_urshr16_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_urshr32_i32,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_urshr64_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ if (shift == (8 << vece)) {
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in zero. With rounding, this produces a
+ * copy of the most significant bit.
+ */
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 8) {
+ tcg_gen_vec_shr8i_i64(t, a, 7);
+ } else {
+ gen_urshr8_i64(t, a, sh);
+ }
+ tcg_gen_vec_add8_i64(d, d, t);
+}
+
+static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 16) {
+ tcg_gen_vec_shr16i_i64(t, a, 15);
+ } else {
+ gen_urshr16_i64(t, a, sh);
+ }
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ if (sh == 32) {
+ tcg_gen_shri_i32(t, a, 31);
+ } else {
+ gen_urshr32_i32(t, a, sh);
+ }
+ tcg_gen_add_i32(d, d, t);
+}
+
+static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 64) {
+ tcg_gen_shri_i64(t, a, 63);
+ } else {
+ gen_urshr64_i64(t, a, sh);
+ }
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ if (sh == (8 << vece)) {
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
+ } else {
+ gen_urshr_vec(vece, t, a, sh);
+ }
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ursra8_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_ursra16_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_ursra32_i32,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_ursra64_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
+
+static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_8, 0xff >> shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff >> shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_shri_i32(a, a, shift);
+ tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
+}
+
+static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_shri_i64(a, a, shift);
+ tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
+}
+
+static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
+ tcg_gen_shri_vec(vece, t, a, sh);
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+}
+
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shr8_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shr16_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shr32_ins_i32,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shr64_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /* Shift of esize leaves destination unchanged. */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ } else {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
+
+static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_8, 0xff << shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff << shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
+}
+
+static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
+}
+
+static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shli_vec(vece, t, a, sh);
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+}
+
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shl8_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shl16_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shl32_ins_i32,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shl64_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [0..esize-1]. */
+ tcg_debug_assert(shift >= 0);
+ tcg_debug_assert(shift < (8 << vece));
+
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u8(a, a, b);
+ gen_helper_neon_add_u8(d, d, a);
+}
+
+static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u8(a, a, b);
+ gen_helper_neon_sub_u8(d, d, a);
+}
+
+static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u16(a, a, b);
+ gen_helper_neon_add_u16(d, d, a);
+}
+
+static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u16(a, a, b);
+ gen_helper_neon_sub_u16(d, d, a);
+}
+
+static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mul_i32(a, a, b);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mul_i32(a, a, b);
+ tcg_gen_sub_i32(d, d, a);
+}
+
+static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mul_i64(a, a, b);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mul_i64(a, a, b);
+ tcg_gen_sub_i64(d, d, a);
+}
+
+static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mul_vec(vece, a, a, b);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mul_vec(vece, a, a, b);
+ tcg_gen_sub_vec(vece, d, d, a);
+}
+
+/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
+ * these tables are shared with AArch64 which does support them.
+ */
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_mla8_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_mla16_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_mla32_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_mla64_i64,
+ .fniv = gen_mla_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_mls8_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_mls16_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_mls32_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_mls64_i64,
+ .fniv = gen_mls_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+/* CMTST : test is "if (X & Y != 0)". */
+static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_and_i32(d, a, b);
+ tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
+}
+
+void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_and_i64(d, a, b);
+ tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
+}
+
+static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_and_vec(vece, d, a, b);
+ tcg_gen_dupi_vec(vece, a, 0);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
+}
+
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_helper_neon_tst_u8,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_tst_u16,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_cmtst_i32,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_cmtst_i64,
+ .fniv = gen_cmtst_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
+{
+ TCGv_i32 lval = tcg_temp_new_i32();
+ TCGv_i32 rval = tcg_temp_new_i32();
+ TCGv_i32 lsh = tcg_temp_new_i32();
+ TCGv_i32 rsh = tcg_temp_new_i32();
+ TCGv_i32 zero = tcg_constant_i32(0);
+ TCGv_i32 max = tcg_constant_i32(32);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i32(lsh, shift);
+ tcg_gen_neg_i32(rsh, lsh);
+ tcg_gen_shl_i32(lval, src, lsh);
+ tcg_gen_shr_i32(rval, src, rsh);
+ tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
+ tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
+}
+
+void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
+{
+ TCGv_i64 lval = tcg_temp_new_i64();
+ TCGv_i64 rval = tcg_temp_new_i64();
+ TCGv_i64 lsh = tcg_temp_new_i64();
+ TCGv_i64 rsh = tcg_temp_new_i64();
+ TCGv_i64 zero = tcg_constant_i64(0);
+ TCGv_i64 max = tcg_constant_i64(64);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i64(lsh, shift);
+ tcg_gen_neg_i64(rsh, lsh);
+ tcg_gen_shl_i64(lval, src, lsh);
+ tcg_gen_shr_i64(rval, src, rsh);
+ tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
+ tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
+}
+
+static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
+ TCGv_vec src, TCGv_vec shift)
+{
+ TCGv_vec lval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec msk, max;
+
+ tcg_gen_neg_vec(vece, rsh, shift);
+ if (vece == MO_8) {
+ tcg_gen_mov_vec(lsh, shift);
+ } else {
+ msk = tcg_temp_new_vec_matching(dst);
+ tcg_gen_dupi_vec(vece, msk, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, msk);
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
+ }
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_shlv_vec(vece, lval, src, lsh);
+ tcg_gen_shrv_vec(vece, rval, src, rsh);
+
+ max = tcg_temp_new_vec_matching(dst);
+ tcg_gen_dupi_vec(vece, max, 8 << vece);
+
+ /*
+ * The choice of LT (signed) and GEU (unsigned) are biased toward
+ * the instructions of the x86_64 host. For MO_8, the whole byte
+ * is significant so we must use an unsigned compare; otherwise we
+ * have already masked to a byte and so a signed compare works.
+ * Other tcg hosts have a full set of comparisons and do not care.
+ */
+ if (vece == MO_8) {
+ tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
+ tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
+ tcg_gen_andc_vec(vece, lval, lval, lsh);
+ tcg_gen_andc_vec(vece, rval, rval, rsh);
+ } else {
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
+ tcg_gen_and_vec(vece, lval, lval, lsh);
+ tcg_gen_and_vec(vece, rval, rval, rsh);
+ }
+ tcg_gen_or_vec(vece, dst, lval, rval);
+}
+
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_shlv_vec,
+ INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ushl_i32,
+ .fniv = gen_ushl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ushl_i64,
+ .fniv = gen_ushl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
+{
+ TCGv_i32 lval = tcg_temp_new_i32();
+ TCGv_i32 rval = tcg_temp_new_i32();
+ TCGv_i32 lsh = tcg_temp_new_i32();
+ TCGv_i32 rsh = tcg_temp_new_i32();
+ TCGv_i32 zero = tcg_constant_i32(0);
+ TCGv_i32 max = tcg_constant_i32(31);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i32(lsh, shift);
+ tcg_gen_neg_i32(rsh, lsh);
+ tcg_gen_shl_i32(lval, src, lsh);
+ tcg_gen_umin_i32(rsh, rsh, max);
+ tcg_gen_sar_i32(rval, src, rsh);
+ tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
+ tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
+}
+
+void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
+{
+ TCGv_i64 lval = tcg_temp_new_i64();
+ TCGv_i64 rval = tcg_temp_new_i64();
+ TCGv_i64 lsh = tcg_temp_new_i64();
+ TCGv_i64 rsh = tcg_temp_new_i64();
+ TCGv_i64 zero = tcg_constant_i64(0);
+ TCGv_i64 max = tcg_constant_i64(63);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i64(lsh, shift);
+ tcg_gen_neg_i64(rsh, lsh);
+ tcg_gen_shl_i64(lval, src, lsh);
+ tcg_gen_umin_i64(rsh, rsh, max);
+ tcg_gen_sar_i64(rval, src, rsh);
+ tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
+ tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
+}
+
+static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
+ TCGv_vec src, TCGv_vec shift)
+{
+ TCGv_vec lval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_neg_vec(vece, rsh, shift);
+ if (vece == MO_8) {
+ tcg_gen_mov_vec(lsh, shift);
+ } else {
+ tcg_gen_dupi_vec(vece, tmp, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, tmp);
+ tcg_gen_and_vec(vece, rsh, rsh, tmp);
+ }
+
+ /* Bound rsh so out of bound right shift gets -1. */
+ tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
+ tcg_gen_umin_vec(vece, rsh, rsh, tmp);
+ tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
+
+ tcg_gen_shlv_vec(vece, lval, src, lsh);
+ tcg_gen_sarv_vec(vece, rval, src, rsh);
+
+ /* Select in-bound left shift. */
+ tcg_gen_andc_vec(vece, lval, lval, tmp);
+
+ /* Select between left and right shift. */
+ if (vece == MO_8) {
+ tcg_gen_dupi_vec(vece, tmp, 0);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
+ } else {
+ tcg_gen_dupi_vec(vece, tmp, 0x80);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
+ }
+}
+
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
+ INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sshl_i32,
+ .fniv = gen_sshl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sshl_i64,
+ .fniv = gen_sshl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_add_vec(vece, x, a, b);
+ tcg_gen_usadd_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+}
+
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_b,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_h,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_s,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_d,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_add_vec(vece, x, a, b);
+ tcg_gen_ssadd_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+}
+
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_sub_vec(vece, x, a, b);
+ tcg_gen_ussub_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+}
+
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_sub_vec(vece, x, a, b);
+ tcg_gen_sssub_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+}
+
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
+}
+
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
+}
+
+static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_smin_vec(vece, t, a, b);
+ tcg_gen_smax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+}
+
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sabd_i32,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sabd_i64,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
+}
+
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
+}
+
+static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_umin_vec(vece, t, a, b);
+ tcg_gen_umax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+}
+
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_uabd_i32,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_uabd_i64,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ gen_sabd_i32(t, a, b);
+ tcg_gen_add_i32(d, d, t);
+}
+
+static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_sabd_i64(t, a, b);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ gen_sabd_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_add_vec,
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_saba_i32,
+ .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_saba_i64,
+ .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ gen_uabd_i32(t, a, b);
+ tcg_gen_add_i32(d, d, t);
+}
+
+static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_uabd_i64(t, a, b);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ gen_uabd_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_add_vec,
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_uaba_i32,
+ .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_uaba_i64,
+ .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_gvec_addp_b,
+ gen_helper_gvec_addp_h,
+ gen_helper_gvec_addp_s,
+ gen_helper_gvec_addp_d,
+ };
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
+}
+
+void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_gvec_smaxp_b,
+ gen_helper_gvec_smaxp_h,
+ gen_helper_gvec_smaxp_s,
+ };
+ tcg_debug_assert(vece <= MO_32);
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
+}
+
+void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_gvec_sminp_b,
+ gen_helper_gvec_sminp_h,
+ gen_helper_gvec_sminp_s,
+ };
+ tcg_debug_assert(vece <= MO_32);
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
+}
+
+void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_gvec_umaxp_b,
+ gen_helper_gvec_umaxp_h,
+ gen_helper_gvec_umaxp_s,
+ };
+ tcg_debug_assert(vece <= MO_32);
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
+}
+
+void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_gvec_uminp_b,
+ gen_helper_gvec_uminp_h,
+ gen_helper_gvec_uminp_s,
+ };
+ tcg_debug_assert(vece <= MO_32);
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
+}
diff --git a/target/arm/tcg/gengvec64.c b/target/arm/tcg/gengvec64.c
new file mode 100644
index 0000000000..093b498b13
--- /dev/null
+++ b/target/arm/tcg/gengvec64.c
@@ -0,0 +1,190 @@
+/*
+ * AArch64 generic vector expansion
+ *
+ * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "translate.h"
+#include "translate-a64.h"
+
+
+static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ tcg_gen_rotli_i64(d, m, 1);
+ tcg_gen_xor_i64(d, d, n);
+}
+
+static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
+{
+ tcg_gen_rotli_vec(vece, d, m, 1);
+ tcg_gen_xor_vec(vece, d, d, n);
+}
+
+void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
+ static const GVecGen3 op = {
+ .fni8 = gen_rax1_i64,
+ .fniv = gen_rax1_vec,
+ .opt_opc = vecop_list,
+ .fno = gen_helper_crypto_rax1,
+ .vece = MO_64,
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
+}
+
+static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ uint64_t mask = dup_const(MO_8, 0xff >> sh);
+
+ tcg_gen_xor_i64(t, n, m);
+ tcg_gen_shri_i64(d, t, sh);
+ tcg_gen_shli_i64(t, t, 8 - sh);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_andi_i64(t, t, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ uint64_t mask = dup_const(MO_16, 0xffff >> sh);
+
+ tcg_gen_xor_i64(t, n, m);
+ tcg_gen_shri_i64(d, t, sh);
+ tcg_gen_shli_i64(t, t, 16 - sh);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_andi_i64(t, t, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
+{
+ tcg_gen_xor_i32(d, n, m);
+ tcg_gen_rotri_i32(d, d, sh);
+}
+
+static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
+{
+ tcg_gen_xor_i64(d, n, m);
+ tcg_gen_rotri_i64(d, d, sh);
+}
+
+static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, int64_t sh)
+{
+ tcg_gen_xor_vec(vece, d, n, m);
+ tcg_gen_rotri_vec(vece, d, d, sh);
+}
+
+void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, int64_t shift,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
+ static const GVecGen3i ops[4] = {
+ { .fni8 = gen_xar8_i64,
+ .fniv = gen_xar_vec,
+ .fno = gen_helper_sve2_xar_b,
+ .opt_opc = vecop,
+ .vece = MO_8 },
+ { .fni8 = gen_xar16_i64,
+ .fniv = gen_xar_vec,
+ .fno = gen_helper_sve2_xar_h,
+ .opt_opc = vecop,
+ .vece = MO_16 },
+ { .fni4 = gen_xar_i32,
+ .fniv = gen_xar_vec,
+ .fno = gen_helper_sve2_xar_s,
+ .opt_opc = vecop,
+ .vece = MO_32 },
+ { .fni8 = gen_xar_i64,
+ .fniv = gen_xar_vec,
+ .fno = gen_helper_gvec_xar_d,
+ .opt_opc = vecop,
+ .vece = MO_64 }
+ };
+ int esize = 8 << vece;
+
+ /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
+ tcg_debug_assert(shift >= 0);
+ tcg_debug_assert(shift <= esize);
+ shift &= esize - 1;
+
+ if (shift == 0) {
+ /* xar with no rotate devolves to xor. */
+ tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
+ shift, &ops[vece]);
+ }
+}
+
+static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ tcg_gen_xor_i64(d, n, m);
+ tcg_gen_xor_i64(d, d, k);
+}
+
+static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ tcg_gen_xor_vec(vece, d, n, m);
+ tcg_gen_xor_vec(vece, d, d, k);
+}
+
+void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_eor3_i64,
+ .fniv = gen_eor3_vec,
+ .fno = gen_helper_sve2_eor3,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ tcg_gen_andc_i64(d, m, k);
+ tcg_gen_xor_i64(d, d, n);
+}
+
+static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ tcg_gen_andc_vec(vece, d, m, k);
+ tcg_gen_xor_vec(vece, d, d, n);
+}
+
+void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_bcax_i64,
+ .fniv = gen_bcax_vec,
+ .fno = gen_helper_sve2_bcax,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
index 0518165399..371388f61b 100644
--- a/target/arm/tcg/helper-a64.h
+++ b/target/arm/tcg/helper-a64.h
@@ -132,3 +132,15 @@ DEF_HELPER_4(cpye, void, env, i32, i32, i32)
DEF_HELPER_4(cpyfp, void, env, i32, i32, i32)
DEF_HELPER_4(cpyfm, void, env, i32, i32, i32)
DEF_HELPER_4(cpyfe, void, env, i32, i32, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmulx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmulx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmulx_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmulx_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmulx_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
index 3b1a9f0fc5..508932a249 100644
--- a/target/arm/tcg/meson.build
+++ b/target/arm/tcg/meson.build
@@ -24,6 +24,7 @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: gen_a64)
arm_ss.add(files(
'cpu32.c',
+ 'gengvec.c',
'translate.c',
'translate-m-nocp.c',
'translate-mve.c',
@@ -42,6 +43,7 @@ arm_ss.add(files(
arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
'cpu64.c',
+ 'gengvec64.c',
'translate-a64.c',
'translate-sve.c',
'translate-sme.c',
diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c
index bc6c4a54e9..a0b51c8809 100644
--- a/target/arm/tcg/neon_helper.c
+++ b/target/arm/tcg/neon_helper.c
@@ -745,11 +745,6 @@ uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b)
return (a + b) ^ mask;
}
-#define NEON_FN(dest, src1, src2) dest = src1 + src2
-NEON_POP(padd_u8, neon_u8, 4)
-NEON_POP(padd_u16, neon_u16, 2)
-#undef NEON_FN
-
#define NEON_FN(dest, src1, src2) dest = src1 - src2
NEON_VOP(sub_u8, neon_u8, 4)
NEON_VOP(sub_u16, neon_u16, 2)
diff --git a/target/arm/tcg/t32.decode b/target/arm/tcg/t32.decode
index f21ad0167a..d327178829 100644
--- a/target/arm/tcg/t32.decode
+++ b/target/arm/tcg/t32.decode
@@ -458,41 +458,41 @@ STR_ri 1111 1000 1100 .... .... ............ @ldst_ri_pos
# Note that Load, unsigned (literal) overlaps all other load encodings.
{
{
- NOP 1111 1000 -001 1111 1111 ------------ # PLD
+ PLD 1111 1000 -001 1111 1111 ------------ # (literal)
LDRB_ri 1111 1000 .001 1111 .... ............ @ldst_ri_lit
}
{
- NOP 1111 1000 1001 ---- 1111 ------------ # PLD
+ PLD 1111 1000 1001 ---- 1111 ------------ # (immediate T1)
LDRB_ri 1111 1000 1001 .... .... ............ @ldst_ri_pos
}
LDRB_ri 1111 1000 0001 .... .... 1..1 ........ @ldst_ri_idx
{
- NOP 1111 1000 0001 ---- 1111 1100 -------- # PLD
+ PLD 1111 1000 0001 ---- 1111 1100 -------- # (immediate T2)
LDRB_ri 1111 1000 0001 .... .... 1100 ........ @ldst_ri_neg
}
LDRBT_ri 1111 1000 0001 .... .... 1110 ........ @ldst_ri_unp
{
- NOP 1111 1000 0001 ---- 1111 000000 -- ---- # PLD
+ PLD 1111 1000 0001 ---- 1111 000000 -- ---- # (register)
LDRB_rr 1111 1000 0001 .... .... 000000 .. .... @ldst_rr
}
}
{
{
- NOP 1111 1000 -011 1111 1111 ------------ # PLD
+ PLD 1111 1000 -011 1111 1111 ------------ # (literal)
LDRH_ri 1111 1000 .011 1111 .... ............ @ldst_ri_lit
}
{
- NOP 1111 1000 1011 ---- 1111 ------------ # PLDW
+ PLDW 1111 1000 1011 ---- 1111 ------------ # (immediate T1)
LDRH_ri 1111 1000 1011 .... .... ............ @ldst_ri_pos
}
LDRH_ri 1111 1000 0011 .... .... 1..1 ........ @ldst_ri_idx
{
- NOP 1111 1000 0011 ---- 1111 1100 -------- # PLDW
+ PLDW 1111 1000 0011 ---- 1111 1100 -------- # (immediate T2)
LDRH_ri 1111 1000 0011 .... .... 1100 ........ @ldst_ri_neg
}
LDRHT_ri 1111 1000 0011 .... .... 1110 ........ @ldst_ri_unp
{
- NOP 1111 1000 0011 ---- 1111 000000 -- ---- # PLDW
+ PLDW 1111 1000 0011 ---- 1111 000000 -- ---- # (register)
LDRH_rr 1111 1000 0011 .... .... 000000 .. .... @ldst_rr
}
}
@@ -504,24 +504,23 @@ STR_ri 1111 1000 1100 .... .... ............ @ldst_ri_pos
LDRT_ri 1111 1000 0101 .... .... 1110 ........ @ldst_ri_unp
LDR_rr 1111 1000 0101 .... .... 000000 .. .... @ldst_rr
}
-# NOPs here are PLI.
{
{
- NOP 1111 1001 -001 1111 1111 ------------
+ PLI 1111 1001 -001 1111 1111 ------------ # (literal T3)
LDRSB_ri 1111 1001 .001 1111 .... ............ @ldst_ri_lit
}
{
- NOP 1111 1001 1001 ---- 1111 ------------
+ PLI 1111 1001 1001 ---- 1111 ------------ # (immediate T1)
LDRSB_ri 1111 1001 1001 .... .... ............ @ldst_ri_pos
}
LDRSB_ri 1111 1001 0001 .... .... 1..1 ........ @ldst_ri_idx
{
- NOP 1111 1001 0001 ---- 1111 1100 --------
+ PLI 1111 1001 0001 ---- 1111 1100 -------- # (immediate T2)
LDRSB_ri 1111 1001 0001 .... .... 1100 ........ @ldst_ri_neg
}
LDRSBT_ri 1111 1001 0001 .... .... 1110 ........ @ldst_ri_unp
{
- NOP 1111 1001 0001 ---- 1111 000000 -- ----
+ PLI 1111 1001 0001 ---- 1111 000000 -- ---- # (register)
LDRSB_rr 1111 1001 0001 .... .... 000000 .. .... @ldst_rr
}
}
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 4126aaa27e..9167e4d0bd 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -1314,6 +1314,67 @@ bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
}
/*
+ * Expanders for AdvSIMD translation functions.
+ */
+
+static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
+ gen_helper_gvec_2 *fn)
+{
+ if (!a->q && a->esz == MO_64) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
+ }
+ return true;
+}
+
+static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
+ gen_helper_gvec_3 *fn)
+{
+ if (!a->q && a->esz == MO_64) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
+ }
+ return true;
+}
+
+static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
+{
+ if (!a->q && a->esz == MO_64) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
+ }
+ return true;
+}
+
+static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
+{
+ if (a->esz == MO_64) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
+ }
+ return true;
+}
+
+static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
+{
+ if (!a->q && a->esz == MO_64) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
+ }
+ return true;
+}
+
+/*
* This utility function is for doing register extension with an
* optional shift. You will likely want to pass a temporary for the
* destination register. See DecodeRegExtend() in the ARM ARM.
@@ -4560,6 +4621,955 @@ static bool trans_EXTR(DisasContext *s, arg_extract *a)
return true;
}
+/*
+ * Cryptographic AES, SHA, SHA512
+ */
+
+TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
+TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
+TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
+TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
+
+TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
+TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
+TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
+TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
+
+TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
+TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
+TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
+
+TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
+TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
+TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
+
+TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
+TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
+TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
+TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
+TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
+TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
+TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
+
+TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
+TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
+
+TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
+TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
+
+static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
+{
+ if (!dc_isar_feature(aa64_sm3, s)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op3 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+ unsigned vsz, dofs;
+
+ read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
+ read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
+ read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
+
+ tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
+ tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
+ tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
+ tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
+
+ /* Clear the whole register first, then store bits [127:96]. */
+ vsz = vec_full_reg_size(s);
+ dofs = vec_full_reg_offset(s, a->rd);
+ tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
+ write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
+ }
+ return true;
+}
+
+static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
+{
+ if (fp_access_check(s)) {
+ gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
+ }
+ return true;
+}
+TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
+TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
+TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
+TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
+
+static bool trans_XAR(DisasContext *s, arg_XAR *a)
+{
+ if (!dc_isar_feature(aa64_sha3, s)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm), a->imm, 16,
+ vec_full_reg_size(s));
+ }
+ return true;
+}
+
+/*
+ * Advanced SIMD copy
+ */
+
+static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
+{
+ unsigned esz = ctz32(imm);
+ if (esz <= MO_64) {
+ *pesz = esz;
+ *pidx = imm >> (esz + 1);
+ return true;
+ }
+ return false;
+}
+
+static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
+{
+ MemOp esz;
+ unsigned idx;
+
+ if (!decode_esz_idx(a->imm, &esz, &idx)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ /*
+ * This instruction just extracts the specified element and
+ * zero-extends it into the bottom of the destination register.
+ */
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ read_vec_element(s, tmp, a->rn, idx, esz);
+ write_fp_dreg(s, a->rd, tmp);
+ }
+ return true;
+}
+
+static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
+{
+ MemOp esz;
+ unsigned idx;
+
+ if (!decode_esz_idx(a->imm, &esz, &idx)) {
+ return false;
+ }
+ if (esz == MO_64 && !a->q) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
+ vec_reg_offset(s, a->rn, idx, esz),
+ a->q ? 16 : 8, vec_full_reg_size(s));
+ }
+ return true;
+}
+
+static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
+{
+ MemOp esz;
+ unsigned idx;
+
+ if (!decode_esz_idx(a->imm, &esz, &idx)) {
+ return false;
+ }
+ if (esz == MO_64 && !a->q) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
+ a->q ? 16 : 8, vec_full_reg_size(s),
+ cpu_reg(s, a->rn));
+ }
+ return true;
+}
+
+static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
+{
+ MemOp esz;
+ unsigned idx;
+
+ if (!decode_esz_idx(a->imm, &esz, &idx)) {
+ return false;
+ }
+ if (is_signed) {
+ if (esz == MO_64 || (esz == MO_32 && !a->q)) {
+ return false;
+ }
+ } else {
+ if (esz == MO_64 ? !a->q : a->q) {
+ return false;
+ }
+ }
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
+ if (is_signed && !a->q) {
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+ }
+ }
+ return true;
+}
+
+TRANS(SMOV, do_smov_umov, a, MO_SIGN)
+TRANS(UMOV, do_smov_umov, a, 0)
+
+static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
+{
+ MemOp esz;
+ unsigned idx;
+
+ if (!decode_esz_idx(a->imm, &esz, &idx)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
+ clear_vec_high(s, true, a->rd);
+ }
+ return true;
+}
+
+static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
+{
+ MemOp esz;
+ unsigned didx, sidx;
+
+ if (!decode_esz_idx(a->di, &esz, &didx)) {
+ return false;
+ }
+ sidx = a->si >> esz;
+ if (fp_access_check(s)) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
+
+ read_vec_element(s, tmp, a->rn, sidx, esz);
+ write_vec_element(s, tmp, a->rd, didx, esz);
+
+ /* INS is considered a 128-bit write for SVE. */
+ clear_vec_high(s, true, a->rd);
+ }
+ return true;
+}
+
+/*
+ * Advanced SIMD three same
+ */
+
+typedef struct FPScalar {
+ void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+ void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+ void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
+} FPScalar;
+
+static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
+{
+ switch (a->esz) {
+ case MO_64:
+ if (fp_access_check(s)) {
+ TCGv_i64 t0 = read_fp_dreg(s, a->rn);
+ TCGv_i64 t1 = read_fp_dreg(s, a->rm);
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
+ write_fp_dreg(s, a->rd, t0);
+ }
+ break;
+ case MO_32:
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = read_fp_sreg(s, a->rn);
+ TCGv_i32 t1 = read_fp_sreg(s, a->rm);
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
+ write_fp_sreg(s, a->rd, t0);
+ }
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = read_fp_hreg(s, a->rn);
+ TCGv_i32 t1 = read_fp_hreg(s, a->rm);
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
+ write_fp_sreg(s, a->rd, t0);
+ }
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+static const FPScalar f_scalar_fadd = {
+ gen_helper_vfp_addh,
+ gen_helper_vfp_adds,
+ gen_helper_vfp_addd,
+};
+TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
+
+static const FPScalar f_scalar_fsub = {
+ gen_helper_vfp_subh,
+ gen_helper_vfp_subs,
+ gen_helper_vfp_subd,
+};
+TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
+
+static const FPScalar f_scalar_fdiv = {
+ gen_helper_vfp_divh,
+ gen_helper_vfp_divs,
+ gen_helper_vfp_divd,
+};
+TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
+
+static const FPScalar f_scalar_fmul = {
+ gen_helper_vfp_mulh,
+ gen_helper_vfp_muls,
+ gen_helper_vfp_muld,
+};
+TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
+
+static const FPScalar f_scalar_fmax = {
+ gen_helper_advsimd_maxh,
+ gen_helper_vfp_maxs,
+ gen_helper_vfp_maxd,
+};
+TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
+
+static const FPScalar f_scalar_fmin = {
+ gen_helper_advsimd_minh,
+ gen_helper_vfp_mins,
+ gen_helper_vfp_mind,
+};
+TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
+
+static const FPScalar f_scalar_fmaxnm = {
+ gen_helper_advsimd_maxnumh,
+ gen_helper_vfp_maxnums,
+ gen_helper_vfp_maxnumd,
+};
+TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
+
+static const FPScalar f_scalar_fminnm = {
+ gen_helper_advsimd_minnumh,
+ gen_helper_vfp_minnums,
+ gen_helper_vfp_minnumd,
+};
+TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
+
+static const FPScalar f_scalar_fmulx = {
+ gen_helper_advsimd_mulxh,
+ gen_helper_vfp_mulxs,
+ gen_helper_vfp_mulxd,
+};
+TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
+
+static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_mulh(d, n, m, s);
+ gen_vfp_negh(d, d);
+}
+
+static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_muls(d, n, m, s);
+ gen_vfp_negs(d, d);
+}
+
+static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
+{
+ gen_helper_vfp_muld(d, n, m, s);
+ gen_vfp_negd(d, d);
+}
+
+static const FPScalar f_scalar_fnmul = {
+ gen_fnmul_h,
+ gen_fnmul_s,
+ gen_fnmul_d,
+};
+TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
+
+static const FPScalar f_scalar_fcmeq = {
+ gen_helper_advsimd_ceq_f16,
+ gen_helper_neon_ceq_f32,
+ gen_helper_neon_ceq_f64,
+};
+TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
+
+static const FPScalar f_scalar_fcmge = {
+ gen_helper_advsimd_cge_f16,
+ gen_helper_neon_cge_f32,
+ gen_helper_neon_cge_f64,
+};
+TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
+
+static const FPScalar f_scalar_fcmgt = {
+ gen_helper_advsimd_cgt_f16,
+ gen_helper_neon_cgt_f32,
+ gen_helper_neon_cgt_f64,
+};
+TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
+
+static const FPScalar f_scalar_facge = {
+ gen_helper_advsimd_acge_f16,
+ gen_helper_neon_acge_f32,
+ gen_helper_neon_acge_f64,
+};
+TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
+
+static const FPScalar f_scalar_facgt = {
+ gen_helper_advsimd_acgt_f16,
+ gen_helper_neon_acgt_f32,
+ gen_helper_neon_acgt_f64,
+};
+TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
+
+static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subh(d, n, m, s);
+ gen_vfp_absh(d, d);
+}
+
+static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subs(d, n, m, s);
+ gen_vfp_abss(d, d);
+}
+
+static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subd(d, n, m, s);
+ gen_vfp_absd(d, d);
+}
+
+static const FPScalar f_scalar_fabd = {
+ gen_fabd_h,
+ gen_fabd_s,
+ gen_fabd_d,
+};
+TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
+
+static const FPScalar f_scalar_frecps = {
+ gen_helper_recpsf_f16,
+ gen_helper_recpsf_f32,
+ gen_helper_recpsf_f64,
+};
+TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
+
+static const FPScalar f_scalar_frsqrts = {
+ gen_helper_rsqrtsf_f16,
+ gen_helper_rsqrtsf_f32,
+ gen_helper_rsqrtsf_f64,
+};
+TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
+
+static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
+ gen_helper_gvec_3_ptr * const fns[3])
+{
+ MemOp esz = a->esz;
+
+ switch (esz) {
+ case MO_64:
+ if (!a->q) {
+ return false;
+ }
+ break;
+ case MO_32:
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
+ esz == MO_16, 0, fns[esz - 1]);
+ }
+ return true;
+}
+
+static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
+ gen_helper_gvec_fadd_h,
+ gen_helper_gvec_fadd_s,
+ gen_helper_gvec_fadd_d,
+};
+TRANS(FADD_v, do_fp3_vector, a, f_vector_fadd)
+
+static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
+ gen_helper_gvec_fsub_h,
+ gen_helper_gvec_fsub_s,
+ gen_helper_gvec_fsub_d,
+};
+TRANS(FSUB_v, do_fp3_vector, a, f_vector_fsub)
+
+static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
+ gen_helper_gvec_fdiv_h,
+ gen_helper_gvec_fdiv_s,
+ gen_helper_gvec_fdiv_d,
+};
+TRANS(FDIV_v, do_fp3_vector, a, f_vector_fdiv)
+
+static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
+ gen_helper_gvec_fmul_h,
+ gen_helper_gvec_fmul_s,
+ gen_helper_gvec_fmul_d,
+};
+TRANS(FMUL_v, do_fp3_vector, a, f_vector_fmul)
+
+static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
+ gen_helper_gvec_fmax_h,
+ gen_helper_gvec_fmax_s,
+ gen_helper_gvec_fmax_d,
+};
+TRANS(FMAX_v, do_fp3_vector, a, f_vector_fmax)
+
+static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
+ gen_helper_gvec_fmin_h,
+ gen_helper_gvec_fmin_s,
+ gen_helper_gvec_fmin_d,
+};
+TRANS(FMIN_v, do_fp3_vector, a, f_vector_fmin)
+
+static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
+ gen_helper_gvec_fmaxnum_h,
+ gen_helper_gvec_fmaxnum_s,
+ gen_helper_gvec_fmaxnum_d,
+};
+TRANS(FMAXNM_v, do_fp3_vector, a, f_vector_fmaxnm)
+
+static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
+ gen_helper_gvec_fminnum_h,
+ gen_helper_gvec_fminnum_s,
+ gen_helper_gvec_fminnum_d,
+};
+TRANS(FMINNM_v, do_fp3_vector, a, f_vector_fminnm)
+
+static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
+ gen_helper_gvec_fmulx_h,
+ gen_helper_gvec_fmulx_s,
+ gen_helper_gvec_fmulx_d,
+};
+TRANS(FMULX_v, do_fp3_vector, a, f_vector_fmulx)
+
+static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
+ gen_helper_gvec_vfma_h,
+ gen_helper_gvec_vfma_s,
+ gen_helper_gvec_vfma_d,
+};
+TRANS(FMLA_v, do_fp3_vector, a, f_vector_fmla)
+
+static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
+ gen_helper_gvec_vfms_h,
+ gen_helper_gvec_vfms_s,
+ gen_helper_gvec_vfms_d,
+};
+TRANS(FMLS_v, do_fp3_vector, a, f_vector_fmls)
+
+static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
+ gen_helper_gvec_fceq_h,
+ gen_helper_gvec_fceq_s,
+ gen_helper_gvec_fceq_d,
+};
+TRANS(FCMEQ_v, do_fp3_vector, a, f_vector_fcmeq)
+
+static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
+ gen_helper_gvec_fcge_h,
+ gen_helper_gvec_fcge_s,
+ gen_helper_gvec_fcge_d,
+};
+TRANS(FCMGE_v, do_fp3_vector, a, f_vector_fcmge)
+
+static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
+ gen_helper_gvec_fcgt_h,
+ gen_helper_gvec_fcgt_s,
+ gen_helper_gvec_fcgt_d,
+};
+TRANS(FCMGT_v, do_fp3_vector, a, f_vector_fcmgt)
+
+static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
+ gen_helper_gvec_facge_h,
+ gen_helper_gvec_facge_s,
+ gen_helper_gvec_facge_d,
+};
+TRANS(FACGE_v, do_fp3_vector, a, f_vector_facge)
+
+static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
+ gen_helper_gvec_facgt_h,
+ gen_helper_gvec_facgt_s,
+ gen_helper_gvec_facgt_d,
+};
+TRANS(FACGT_v, do_fp3_vector, a, f_vector_facgt)
+
+static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
+ gen_helper_gvec_fabd_h,
+ gen_helper_gvec_fabd_s,
+ gen_helper_gvec_fabd_d,
+};
+TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd)
+
+static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
+ gen_helper_gvec_recps_h,
+ gen_helper_gvec_recps_s,
+ gen_helper_gvec_recps_d,
+};
+TRANS(FRECPS_v, do_fp3_vector, a, f_vector_frecps)
+
+static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
+ gen_helper_gvec_rsqrts_h,
+ gen_helper_gvec_rsqrts_s,
+ gen_helper_gvec_rsqrts_d,
+};
+TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts)
+
+static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
+ gen_helper_gvec_faddp_h,
+ gen_helper_gvec_faddp_s,
+ gen_helper_gvec_faddp_d,
+};
+TRANS(FADDP_v, do_fp3_vector, a, f_vector_faddp)
+
+static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
+ gen_helper_gvec_fmaxp_h,
+ gen_helper_gvec_fmaxp_s,
+ gen_helper_gvec_fmaxp_d,
+};
+TRANS(FMAXP_v, do_fp3_vector, a, f_vector_fmaxp)
+
+static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
+ gen_helper_gvec_fminp_h,
+ gen_helper_gvec_fminp_s,
+ gen_helper_gvec_fminp_d,
+};
+TRANS(FMINP_v, do_fp3_vector, a, f_vector_fminp)
+
+static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
+ gen_helper_gvec_fmaxnump_h,
+ gen_helper_gvec_fmaxnump_s,
+ gen_helper_gvec_fmaxnump_d,
+};
+TRANS(FMAXNMP_v, do_fp3_vector, a, f_vector_fmaxnmp)
+
+static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
+ gen_helper_gvec_fminnump_h,
+ gen_helper_gvec_fminnump_s,
+ gen_helper_gvec_fminnump_d,
+};
+TRANS(FMINNMP_v, do_fp3_vector, a, f_vector_fminnmp)
+
+static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
+{
+ if (fp_access_check(s)) {
+ int data = (is_2 << 1) | is_s;
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm), tcg_env,
+ a->q ? 16 : 8, vec_full_reg_size(s),
+ data, gen_helper_gvec_fmlal_a64);
+ }
+ return true;
+}
+
+TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
+TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
+TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
+TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
+
+TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
+TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
+TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
+TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
+TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
+
+TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
+TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
+TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
+TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
+TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
+
+static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
+{
+ if (fp_access_check(s)) {
+ gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
+ }
+ return true;
+}
+
+TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
+TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
+TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
+
+/*
+ * Advanced SIMD scalar/vector x indexed element
+ */
+
+static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
+{
+ switch (a->esz) {
+ case MO_64:
+ if (fp_access_check(s)) {
+ TCGv_i64 t0 = read_fp_dreg(s, a->rn);
+ TCGv_i64 t1 = tcg_temp_new_i64();
+
+ read_vec_element(s, t1, a->rm, a->idx, MO_64);
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
+ write_fp_dreg(s, a->rd, t0);
+ }
+ break;
+ case MO_32:
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = read_fp_sreg(s, a->rn);
+ TCGv_i32 t1 = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
+ write_fp_sreg(s, a->rd, t0);
+ }
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = read_fp_hreg(s, a->rn);
+ TCGv_i32 t1 = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
+ write_fp_sreg(s, a->rd, t0);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
+TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
+
+static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
+{
+ switch (a->esz) {
+ case MO_64:
+ if (fp_access_check(s)) {
+ TCGv_i64 t0 = read_fp_dreg(s, a->rd);
+ TCGv_i64 t1 = read_fp_dreg(s, a->rn);
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ read_vec_element(s, t2, a->rm, a->idx, MO_64);
+ if (neg) {
+ gen_vfp_negd(t1, t1);
+ }
+ gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
+ write_fp_dreg(s, a->rd, t0);
+ }
+ break;
+ case MO_32:
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = read_fp_sreg(s, a->rd);
+ TCGv_i32 t1 = read_fp_sreg(s, a->rn);
+ TCGv_i32 t2 = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
+ if (neg) {
+ gen_vfp_negs(t1, t1);
+ }
+ gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
+ write_fp_sreg(s, a->rd, t0);
+ }
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = read_fp_hreg(s, a->rd);
+ TCGv_i32 t1 = read_fp_hreg(s, a->rn);
+ TCGv_i32 t2 = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
+ if (neg) {
+ gen_vfp_negh(t1, t1);
+ }
+ gen_helper_advsimd_muladdh(t0, t1, t2, t0,
+ fpstatus_ptr(FPST_FPCR_F16));
+ write_fp_sreg(s, a->rd, t0);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
+TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
+
+static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
+ gen_helper_gvec_3_ptr * const fns[3])
+{
+ MemOp esz = a->esz;
+
+ switch (esz) {
+ case MO_64:
+ if (!a->q) {
+ return false;
+ }
+ break;
+ case MO_32:
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
+ esz == MO_16, a->idx, fns[esz - 1]);
+ }
+ return true;
+}
+
+static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
+ gen_helper_gvec_fmul_idx_h,
+ gen_helper_gvec_fmul_idx_s,
+ gen_helper_gvec_fmul_idx_d,
+};
+TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
+
+static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
+ gen_helper_gvec_fmulx_idx_h,
+ gen_helper_gvec_fmulx_idx_s,
+ gen_helper_gvec_fmulx_idx_d,
+};
+TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
+
+static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
+{
+ static gen_helper_gvec_4_ptr * const fns[3] = {
+ gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_fmla_idx_s,
+ gen_helper_gvec_fmla_idx_d,
+ };
+ MemOp esz = a->esz;
+
+ switch (esz) {
+ case MO_64:
+ if (!a->q) {
+ return false;
+ }
+ break;
+ case MO_32:
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
+ esz == MO_16, (a->idx << 1) | neg,
+ fns[esz - 1]);
+ }
+ return true;
+}
+
+TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
+TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
+
+static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
+{
+ if (fp_access_check(s)) {
+ int data = (a->idx << 2) | (is_2 << 1) | is_s;
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm), tcg_env,
+ a->q ? 16 : 8, vec_full_reg_size(s),
+ data, gen_helper_gvec_fmlal_idx_a64);
+ }
+ return true;
+}
+
+TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
+TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
+TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
+TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
+
+/*
+ * Advanced SIMD scalar pairwise
+ */
+
+static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
+{
+ switch (a->esz) {
+ case MO_64:
+ if (fp_access_check(s)) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+
+ read_vec_element(s, t0, a->rn, 0, MO_64);
+ read_vec_element(s, t1, a->rn, 1, MO_64);
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
+ write_fp_dreg(s, a->rd, t0);
+ }
+ break;
+ case MO_32:
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, t0, a->rn, 0, MO_32);
+ read_vec_element_i32(s, t1, a->rn, 1, MO_32);
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
+ write_fp_sreg(s, a->rd, t0);
+ }
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, t0, a->rn, 0, MO_16);
+ read_vec_element_i32(s, t1, a->rn, 1, MO_16);
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
+ write_fp_sreg(s, a->rd, t0);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
+TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
+TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
+TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
+TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
+
+static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+
+ read_vec_element(s, t0, a->rn, 0, MO_64);
+ read_vec_element(s, t1, a->rn, 1, MO_64);
+ tcg_gen_add_i64(t0, t0, t1);
+ write_fp_dreg(s, a->rd, t0);
+ }
+ return true;
+}
+
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
* Note that it is the caller's responsibility to ensure that the
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@@ -6018,10 +7028,10 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
tcg_gen_mov_i32(tcg_res, tcg_op);
break;
case 0x1: /* FABS */
- tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
+ gen_vfp_absh(tcg_res, tcg_op);
break;
case 0x2: /* FNEG */
- tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
+ gen_vfp_negh(tcg_res, tcg_op);
break;
case 0x3: /* FSQRT */
fpst = fpstatus_ptr(FPST_FPCR_F16);
@@ -6072,10 +7082,10 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
tcg_gen_mov_i32(tcg_res, tcg_op);
goto done;
case 0x1: /* FABS */
- gen_helper_vfp_abss(tcg_res, tcg_op);
+ gen_vfp_abss(tcg_res, tcg_op);
goto done;
case 0x2: /* FNEG */
- gen_helper_vfp_negs(tcg_res, tcg_op);
+ gen_vfp_negs(tcg_res, tcg_op);
goto done;
case 0x3: /* FSQRT */
gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
@@ -6147,10 +7157,10 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
switch (opcode) {
case 0x1: /* FABS */
- gen_helper_vfp_absd(tcg_res, tcg_op);
+ gen_vfp_absd(tcg_res, tcg_op);
goto done;
case 0x2: /* FNEG */
- gen_helper_vfp_negd(tcg_res, tcg_op);
+ gen_vfp_negd(tcg_res, tcg_op);
goto done;
case 0x3: /* FSQRT */
gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
@@ -6359,200 +7369,6 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
}
}
-/* Floating-point data-processing (2 source) - single precision */
-static void handle_fp_2src_single(DisasContext *s, int opcode,
- int rd, int rn, int rm)
-{
- TCGv_i32 tcg_op1;
- TCGv_i32 tcg_op2;
- TCGv_i32 tcg_res;
- TCGv_ptr fpst;
-
- tcg_res = tcg_temp_new_i32();
- fpst = fpstatus_ptr(FPST_FPCR);
- tcg_op1 = read_fp_sreg(s, rn);
- tcg_op2 = read_fp_sreg(s, rm);
-
- switch (opcode) {
- case 0x0: /* FMUL */
- gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1: /* FDIV */
- gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2: /* FADD */
- gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3: /* FSUB */
- gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x4: /* FMAX */
- gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5: /* FMIN */
- gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x6: /* FMAXNM */
- gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x7: /* FMINNM */
- gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x8: /* FNMUL */
- gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
- gen_helper_vfp_negs(tcg_res, tcg_res);
- break;
- }
-
- write_fp_sreg(s, rd, tcg_res);
-}
-
-/* Floating-point data-processing (2 source) - double precision */
-static void handle_fp_2src_double(DisasContext *s, int opcode,
- int rd, int rn, int rm)
-{
- TCGv_i64 tcg_op1;
- TCGv_i64 tcg_op2;
- TCGv_i64 tcg_res;
- TCGv_ptr fpst;
-
- tcg_res = tcg_temp_new_i64();
- fpst = fpstatus_ptr(FPST_FPCR);
- tcg_op1 = read_fp_dreg(s, rn);
- tcg_op2 = read_fp_dreg(s, rm);
-
- switch (opcode) {
- case 0x0: /* FMUL */
- gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1: /* FDIV */
- gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2: /* FADD */
- gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3: /* FSUB */
- gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x4: /* FMAX */
- gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5: /* FMIN */
- gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x6: /* FMAXNM */
- gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x7: /* FMINNM */
- gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x8: /* FNMUL */
- gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
- gen_helper_vfp_negd(tcg_res, tcg_res);
- break;
- }
-
- write_fp_dreg(s, rd, tcg_res);
-}
-
-/* Floating-point data-processing (2 source) - half precision */
-static void handle_fp_2src_half(DisasContext *s, int opcode,
- int rd, int rn, int rm)
-{
- TCGv_i32 tcg_op1;
- TCGv_i32 tcg_op2;
- TCGv_i32 tcg_res;
- TCGv_ptr fpst;
-
- tcg_res = tcg_temp_new_i32();
- fpst = fpstatus_ptr(FPST_FPCR_F16);
- tcg_op1 = read_fp_hreg(s, rn);
- tcg_op2 = read_fp_hreg(s, rm);
-
- switch (opcode) {
- case 0x0: /* FMUL */
- gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1: /* FDIV */
- gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2: /* FADD */
- gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3: /* FSUB */
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x4: /* FMAX */
- gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5: /* FMIN */
- gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x6: /* FMAXNM */
- gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x7: /* FMINNM */
- gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x8: /* FNMUL */
- gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
- tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
- break;
- default:
- g_assert_not_reached();
- }
-
- write_fp_sreg(s, rd, tcg_res);
-}
-
-/* Floating point data-processing (2 source)
- * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+---+------+--------+-----+------+------+
- * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd |
- * +---+---+---+-----------+------+---+------+--------+-----+------+------+
- */
-static void disas_fp_2src(DisasContext *s, uint32_t insn)
-{
- int mos = extract32(insn, 29, 3);
- int type = extract32(insn, 22, 2);
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int rm = extract32(insn, 16, 5);
- int opcode = extract32(insn, 12, 4);
-
- if (opcode > 8 || mos) {
- unallocated_encoding(s);
- return;
- }
-
- switch (type) {
- case 0:
- if (!fp_access_check(s)) {
- return;
- }
- handle_fp_2src_single(s, opcode, rd, rn, rm);
- break;
- case 1:
- if (!fp_access_check(s)) {
- return;
- }
- handle_fp_2src_double(s, opcode, rd, rn, rm);
- break;
- case 3:
- if (!dc_isar_feature(aa64_fp16, s)) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_fp_2src_half(s, opcode, rd, rn, rm);
- break;
- default:
- unallocated_encoding(s);
- }
-}
-
/* Floating-point data-processing (3 source) - single precision */
static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
int rd, int rn, int rm, int ra)
@@ -6573,11 +7389,11 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
* flipped if it is a negated-input.
*/
if (o1 == true) {
- gen_helper_vfp_negs(tcg_op3, tcg_op3);
+ gen_vfp_negs(tcg_op3, tcg_op3);
}
if (o0 != o1) {
- gen_helper_vfp_negs(tcg_op1, tcg_op1);
+ gen_vfp_negs(tcg_op1, tcg_op1);
}
gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
@@ -6605,11 +7421,11 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
* flipped if it is a negated-input.
*/
if (o1 == true) {
- gen_helper_vfp_negd(tcg_op3, tcg_op3);
+ gen_vfp_negd(tcg_op3, tcg_op3);
}
if (o0 != o1) {
- gen_helper_vfp_negd(tcg_op1, tcg_op1);
+ gen_vfp_negd(tcg_op1, tcg_op1);
}
gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
@@ -7156,7 +7972,7 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
break;
case 2:
/* Floating point data-processing (2 source) */
- disas_fp_2src(s, insn);
+ unallocated_encoding(s); /* in decodetree */
break;
case 3:
/* Floating point conditional select */
@@ -7618,268 +8434,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
write_fp_dreg(s, rd, tcg_res);
}
-/* DUP (Element, Vector)
- *
- * 31 30 29 21 20 16 15 10 9 5 4 0
- * +---+---+-------------------+--------+-------------+------+------+
- * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
- * +---+---+-------------------+--------+-------------+------+------+
- *
- * size: encoded in imm5 (see ARM ARM LowestSetBit())
- */
-static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
- int imm5)
-{
- int size = ctz32(imm5);
- int index;
-
- if (size > 3 || (size == 3 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- index = imm5 >> (size + 1);
- tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
- vec_reg_offset(s, rn, index, size),
- is_q ? 16 : 8, vec_full_reg_size(s));
-}
-
-/* DUP (element, scalar)
- * 31 21 20 16 15 10 9 5 4 0
- * +-----------------------+--------+-------------+------+------+
- * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
- * +-----------------------+--------+-------------+------+------+
- */
-static void handle_simd_dupes(DisasContext *s, int rd, int rn,
- int imm5)
-{
- int size = ctz32(imm5);
- int index;
- TCGv_i64 tmp;
-
- if (size > 3) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- index = imm5 >> (size + 1);
-
- /* This instruction just extracts the specified element and
- * zero-extends it into the bottom of the destination register.
- */
- tmp = tcg_temp_new_i64();
- read_vec_element(s, tmp, rn, index, size);
- write_fp_dreg(s, rd, tmp);
-}
-
-/* DUP (General)
- *
- * 31 30 29 21 20 16 15 10 9 5 4 0
- * +---+---+-------------------+--------+-------------+------+------+
- * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
- * +---+---+-------------------+--------+-------------+------+------+
- *
- * size: encoded in imm5 (see ARM ARM LowestSetBit())
- */
-static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
- int imm5)
-{
- int size = ctz32(imm5);
- uint32_t dofs, oprsz, maxsz;
-
- if (size > 3 || ((size == 3) && !is_q)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- dofs = vec_full_reg_offset(s, rd);
- oprsz = is_q ? 16 : 8;
- maxsz = vec_full_reg_size(s);
-
- tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
-}
-
-/* INS (Element)
- *
- * 31 21 20 16 15 14 11 10 9 5 4 0
- * +-----------------------+--------+------------+---+------+------+
- * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
- * +-----------------------+--------+------------+---+------+------+
- *
- * size: encoded in imm5 (see ARM ARM LowestSetBit())
- * index: encoded in imm5<4:size+1>
- */
-static void handle_simd_inse(DisasContext *s, int rd, int rn,
- int imm4, int imm5)
-{
- int size = ctz32(imm5);
- int src_index, dst_index;
- TCGv_i64 tmp;
-
- if (size > 3) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- dst_index = extract32(imm5, 1+size, 5);
- src_index = extract32(imm4, size, 4);
-
- tmp = tcg_temp_new_i64();
-
- read_vec_element(s, tmp, rn, src_index, size);
- write_vec_element(s, tmp, rd, dst_index, size);
-
- /* INS is considered a 128-bit write for SVE. */
- clear_vec_high(s, true, rd);
-}
-
-
-/* INS (General)
- *
- * 31 21 20 16 15 10 9 5 4 0
- * +-----------------------+--------+-------------+------+------+
- * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
- * +-----------------------+--------+-------------+------+------+
- *
- * size: encoded in imm5 (see ARM ARM LowestSetBit())
- * index: encoded in imm5<4:size+1>
- */
-static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
-{
- int size = ctz32(imm5);
- int idx;
-
- if (size > 3) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- idx = extract32(imm5, 1 + size, 4 - size);
- write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
-
- /* INS is considered a 128-bit write for SVE. */
- clear_vec_high(s, true, rd);
-}
-
-/*
- * UMOV (General)
- * SMOV (General)
- *
- * 31 30 29 21 20 16 15 12 10 9 5 4 0
- * +---+---+-------------------+--------+-------------+------+------+
- * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
- * +---+---+-------------------+--------+-------------+------+------+
- *
- * U: unsigned when set
- * size: encoded in imm5 (see ARM ARM LowestSetBit())
- */
-static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
- int rn, int rd, int imm5)
-{
- int size = ctz32(imm5);
- int element;
- TCGv_i64 tcg_rd;
-
- /* Check for UnallocatedEncodings */
- if (is_signed) {
- if (size > 2 || (size == 2 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
- } else {
- if (size > 3
- || (size < 3 && is_q)
- || (size == 3 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- element = extract32(imm5, 1+size, 4);
-
- tcg_rd = cpu_reg(s, rd);
- read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
- if (is_signed && !is_q) {
- tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
- }
-}
-
-/* AdvSIMD copy
- * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
- * +---+---+----+-----------------+------+---+------+---+------+------+
- * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
- * +---+---+----+-----------------+------+---+------+---+------+------+
- */
-static void disas_simd_copy(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int imm4 = extract32(insn, 11, 4);
- int op = extract32(insn, 29, 1);
- int is_q = extract32(insn, 30, 1);
- int imm5 = extract32(insn, 16, 5);
-
- if (op) {
- if (is_q) {
- /* INS (element) */
- handle_simd_inse(s, rd, rn, imm4, imm5);
- } else {
- unallocated_encoding(s);
- }
- } else {
- switch (imm4) {
- case 0:
- /* DUP (element - vector) */
- handle_simd_dupe(s, is_q, rd, rn, imm5);
- break;
- case 1:
- /* DUP (general) */
- handle_simd_dupg(s, is_q, rd, rn, imm5);
- break;
- case 3:
- if (is_q) {
- /* INS (general) */
- handle_simd_insg(s, rd, rn, imm5);
- } else {
- unallocated_encoding(s);
- }
- break;
- case 5:
- case 7:
- /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
- handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
- break;
- default:
- unallocated_encoding(s);
- break;
- }
- }
-}
-
/* AdvSIMD modified immediate
* 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
* +---+---+----+---------------------+-----+-------+----+---+-------+------+
@@ -7904,27 +8458,31 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
bool is_q = extract32(insn, 30, 1);
uint64_t imm = 0;
- if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
- /* Check for FMOV (vector, immediate) - half-precision */
- if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
+ if (o2) {
+ if (cmode != 0xf || is_neg) {
unallocated_encoding(s);
return;
}
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- if (cmode == 15 && o2 && !is_neg) {
/* FMOV (vector, immediate) - half-precision */
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ unallocated_encoding(s);
+ return;
+ }
imm = vfp_expand_imm(MO_16, abcdefgh);
/* now duplicate across the lanes */
imm = dup_const(MO_16, imm);
} else {
+ if (cmode == 0xf && is_neg && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
imm = asimd_imm_const(abcdefgh, cmode, is_neg);
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
/* MOVI or MVNI, with MVNI negation handled above. */
tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
@@ -7939,176 +8497,6 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
}
}
-/* AdvSIMD scalar copy
- * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
- * +-----+----+-----------------+------+---+------+---+------+------+
- * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
- * +-----+----+-----------------+------+---+------+---+------+------+
- */
-static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int imm4 = extract32(insn, 11, 4);
- int imm5 = extract32(insn, 16, 5);
- int op = extract32(insn, 29, 1);
-
- if (op != 0 || imm4 != 0) {
- unallocated_encoding(s);
- return;
- }
-
- /* DUP (element, scalar) */
- handle_simd_dupes(s, rd, rn, imm5);
-}
-
-/* AdvSIMD scalar pairwise
- * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +-----+---+-----------+------+-----------+--------+-----+------+------+
- * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
- * +-----+---+-----------+------+-----------+--------+-----+------+------+
- */
-static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
-{
- int u = extract32(insn, 29, 1);
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- TCGv_ptr fpst;
-
- /* For some ops (the FP ones), size[1] is part of the encoding.
- * For ADDP strictly it is not but size[1] is always 1 for valid
- * encodings.
- */
- opcode |= (extract32(size, 1, 1) << 5);
-
- switch (opcode) {
- case 0x3b: /* ADDP */
- if (u || size != 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
-
- fpst = NULL;
- break;
- case 0xc: /* FMAXNMP */
- case 0xd: /* FADDP */
- case 0xf: /* FMAXP */
- case 0x2c: /* FMINNMP */
- case 0x2f: /* FMINP */
- /* FP op, size[0] is 32 or 64 bit*/
- if (!u) {
- if (!dc_isar_feature(aa64_fp16, s)) {
- unallocated_encoding(s);
- return;
- } else {
- size = MO_16;
- }
- } else {
- size = extract32(size, 0, 1) ? MO_64 : MO_32;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (size == MO_64) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_res = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op1, rn, 0, MO_64);
- read_vec_element(s, tcg_op2, rn, 1, MO_64);
-
- switch (opcode) {
- case 0x3b: /* ADDP */
- tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
- break;
- case 0xc: /* FMAXNMP */
- gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xd: /* FADDP */
- gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xf: /* FMAXP */
- gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2c: /* FMINNMP */
- gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2f: /* FMINP */
- gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
- }
-
- write_fp_dreg(s, rd, tcg_res);
- } else {
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- TCGv_i32 tcg_res = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_op1, rn, 0, size);
- read_vec_element_i32(s, tcg_op2, rn, 1, size);
-
- if (size == MO_16) {
- switch (opcode) {
- case 0xc: /* FMAXNMP */
- gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xd: /* FADDP */
- gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xf: /* FMAXP */
- gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2c: /* FMINNMP */
- gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2f: /* FMINP */
- gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- } else {
- switch (opcode) {
- case 0xc: /* FMAXNMP */
- gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xd: /* FADDP */
- gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xf: /* FMAXP */
- gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2c: /* FMINNMP */
- gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2f: /* FMINP */
- gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- }
-
- write_fp_sreg(s, rd, tcg_res);
- }
-}
-
/*
* Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
*
@@ -8707,6 +9095,9 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
read_vec_element_i32(s, tcg_op, rn, pass, size);
fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
if (is_scalar) {
+ if (size == MO_16 && !is_u) {
+ tcg_gen_ext16u_i32(tcg_op, tcg_op);
+ }
write_fp_sreg(s, rd, tcg_op);
} else {
write_vec_element_i32(s, tcg_op, rd, pass, size);
@@ -8972,183 +9363,6 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
}
}
-/* Handle the 3-same-operands float operations; shared by the scalar
- * and vector encodings. The caller must filter out any encodings
- * not allocated for the encoding it is dealing with.
- */
-static void handle_3same_float(DisasContext *s, int size, int elements,
- int fpopcode, int rd, int rn, int rm)
-{
- int pass;
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
-
- for (pass = 0; pass < elements; pass++) {
- if (size) {
- /* Double */
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_res = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
- switch (fpopcode) {
- case 0x39: /* FMLS */
- /* As usual for ARM, separate negation for fused multiply-add */
- gen_helper_vfp_negd(tcg_op1, tcg_op1);
- /* fall through */
- case 0x19: /* FMLA */
- read_vec_element(s, tcg_res, rd, pass, MO_64);
- gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
- tcg_res, fpst);
- break;
- case 0x18: /* FMAXNM */
- gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1a: /* FADD */
- gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1b: /* FMULX */
- gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1c: /* FCMEQ */
- gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1e: /* FMAX */
- gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1f: /* FRECPS */
- gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x38: /* FMINNM */
- gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3a: /* FSUB */
- gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3e: /* FMIN */
- gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3f: /* FRSQRTS */
- gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5b: /* FMUL */
- gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5c: /* FCMGE */
- gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5d: /* FACGE */
- gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5f: /* FDIV */
- gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x7a: /* FABD */
- gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
- gen_helper_vfp_absd(tcg_res, tcg_res);
- break;
- case 0x7c: /* FCMGT */
- gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x7d: /* FACGT */
- gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
- }
-
- write_vec_element(s, tcg_res, rd, pass, MO_64);
- } else {
- /* Single */
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- TCGv_i32 tcg_res = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
- read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
-
- switch (fpopcode) {
- case 0x39: /* FMLS */
- /* As usual for ARM, separate negation for fused multiply-add */
- gen_helper_vfp_negs(tcg_op1, tcg_op1);
- /* fall through */
- case 0x19: /* FMLA */
- read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
- gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
- tcg_res, fpst);
- break;
- case 0x1a: /* FADD */
- gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1b: /* FMULX */
- gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1c: /* FCMEQ */
- gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1e: /* FMAX */
- gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1f: /* FRECPS */
- gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x18: /* FMAXNM */
- gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x38: /* FMINNM */
- gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3a: /* FSUB */
- gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3e: /* FMIN */
- gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3f: /* FRSQRTS */
- gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5b: /* FMUL */
- gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5c: /* FCMGE */
- gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5d: /* FACGE */
- gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x5f: /* FDIV */
- gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x7a: /* FABD */
- gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
- gen_helper_vfp_abss(tcg_res, tcg_res);
- break;
- case 0x7c: /* FCMGT */
- gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x7d: /* FACGT */
- gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
- }
-
- if (elements == 1) {
- /* scalar single so clear high part */
- TCGv_i64 tcg_tmp = tcg_temp_new_i64();
-
- tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
- write_vec_element(s, tcg_tmp, rd, pass, MO_64);
- } else {
- write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
- }
- }
- }
-
- clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
-}
-
/* AdvSIMD scalar three same
* 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
* +-----+---+-----------+------+---+------+--------+---+------+------+
@@ -9165,33 +9379,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
bool u = extract32(insn, 29, 1);
TCGv_i64 tcg_rd;
- if (opcode >= 0x18) {
- /* Floating point: U, size[1] and opcode indicate operation */
- int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
- switch (fpopcode) {
- case 0x1b: /* FMULX */
- case 0x1f: /* FRECPS */
- case 0x3f: /* FRSQRTS */
- case 0x5d: /* FACGE */
- case 0x7d: /* FACGT */
- case 0x1c: /* FCMEQ */
- case 0x5c: /* FCMGE */
- case 0x7c: /* FCMGT */
- case 0x7a: /* FABD */
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
- return;
- }
-
switch (opcode) {
case 0x1: /* SQADD, UQADD */
case 0x5: /* SQSUB, UQSUB */
@@ -9308,95 +9495,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
write_fp_dreg(s, rd, tcg_rd);
}
-/* AdvSIMD scalar three same FP16
- * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
- * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
- * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
- * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
- * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
- * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
- */
-static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
- uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int opcode = extract32(insn, 11, 3);
- int rm = extract32(insn, 16, 5);
- bool u = extract32(insn, 29, 1);
- bool a = extract32(insn, 23, 1);
- int fpopcode = opcode | (a << 3) | (u << 4);
- TCGv_ptr fpst;
- TCGv_i32 tcg_op1;
- TCGv_i32 tcg_op2;
- TCGv_i32 tcg_res;
-
- switch (fpopcode) {
- case 0x03: /* FMULX */
- case 0x04: /* FCMEQ (reg) */
- case 0x07: /* FRECPS */
- case 0x0f: /* FRSQRTS */
- case 0x14: /* FCMGE (reg) */
- case 0x15: /* FACGE */
- case 0x1a: /* FABD */
- case 0x1c: /* FCMGT (reg) */
- case 0x1d: /* FACGT */
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!dc_isar_feature(aa64_fp16, s)) {
- unallocated_encoding(s);
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- fpst = fpstatus_ptr(FPST_FPCR_F16);
-
- tcg_op1 = read_fp_hreg(s, rn);
- tcg_op2 = read_fp_hreg(s, rm);
- tcg_res = tcg_temp_new_i32();
-
- switch (fpopcode) {
- case 0x03: /* FMULX */
- gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x04: /* FCMEQ (reg) */
- gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x07: /* FRECPS */
- gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x0f: /* FRSQRTS */
- gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x14: /* FCMGE (reg) */
- gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x15: /* FACGE */
- gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1a: /* FABD */
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
- tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
- break;
- case 0x1c: /* FCMGT (reg) */
- gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1d: /* FACGT */
- gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
- }
-
- write_fp_sreg(s, rd, tcg_res);
-}
-
/* AdvSIMD scalar three same extra
* 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
* +-----+---+-----------+------+---+------+---+--------+---+----+----+
@@ -9528,10 +9626,10 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
}
break;
case 0x2f: /* FABS */
- gen_helper_vfp_absd(tcg_rd, tcg_rn);
+ gen_vfp_absd(tcg_rd, tcg_rn);
break;
case 0x6f: /* FNEG */
- gen_helper_vfp_negd(tcg_rd, tcg_rn);
+ gen_vfp_negd(tcg_rd, tcg_rn);
break;
case 0x7f: /* FSQRT */
gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
@@ -10821,284 +10919,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
}
}
-/* Logic op (opcode == 3) subgroup of C3.6.16. */
-static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int rm = extract32(insn, 16, 5);
- int size = extract32(insn, 22, 2);
- bool is_u = extract32(insn, 29, 1);
- bool is_q = extract32(insn, 30, 1);
-
- if (!fp_access_check(s)) {
- return;
- }
-
- switch (size + 4 * is_u) {
- case 0: /* AND */
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
- return;
- case 1: /* BIC */
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
- return;
- case 2: /* ORR */
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
- return;
- case 3: /* ORN */
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
- return;
- case 4: /* EOR */
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
- return;
-
- case 5: /* BSL bitwise select */
- gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
- return;
- case 6: /* BIT, bitwise insert if true */
- gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
- return;
- case 7: /* BIF, bitwise insert if false */
- gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
- return;
-
- default:
- g_assert_not_reached();
- }
-}
-
-/* Pairwise op subgroup of C3.6.16.
- *
- * This is called directly or via the handle_3same_float for float pairwise
- * operations where the opcode and size are calculated differently.
- */
-static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
- int size, int rn, int rm, int rd)
-{
- TCGv_ptr fpst;
- int pass;
-
- /* Floating point operations need fpst */
- if (opcode >= 0x58) {
- fpst = fpstatus_ptr(FPST_FPCR);
- } else {
- fpst = NULL;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- /* These operations work on the concatenated rm:rn, with each pair of
- * adjacent elements being operated on to produce an element in the result.
- */
- if (size == 3) {
- TCGv_i64 tcg_res[2];
-
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- int passreg = (pass == 0) ? rn : rm;
-
- read_vec_element(s, tcg_op1, passreg, 0, MO_64);
- read_vec_element(s, tcg_op2, passreg, 1, MO_64);
- tcg_res[pass] = tcg_temp_new_i64();
-
- switch (opcode) {
- case 0x17: /* ADDP */
- tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
- break;
- case 0x58: /* FMAXNMP */
- gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- case 0x5a: /* FADDP */
- gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- case 0x5e: /* FMAXP */
- gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- case 0x78: /* FMINNMP */
- gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- case 0x7e: /* FMINP */
- gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- }
-
- for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- }
- } else {
- int maxpass = is_q ? 4 : 2;
- TCGv_i32 tcg_res[4];
-
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- NeonGenTwoOpFn *genfn = NULL;
- int passreg = pass < (maxpass / 2) ? rn : rm;
- int passelt = (is_q && (pass & 1)) ? 2 : 0;
-
- read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
- read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
- tcg_res[pass] = tcg_temp_new_i32();
-
- switch (opcode) {
- case 0x17: /* ADDP */
- {
- static NeonGenTwoOpFn * const fns[3] = {
- gen_helper_neon_padd_u8,
- gen_helper_neon_padd_u16,
- tcg_gen_add_i32,
- };
- genfn = fns[size];
- break;
- }
- case 0x14: /* SMAXP, UMAXP */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
- { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
- { tcg_gen_smax_i32, tcg_gen_umax_i32 },
- };
- genfn = fns[size][u];
- break;
- }
- case 0x15: /* SMINP, UMINP */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
- { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
- { tcg_gen_smin_i32, tcg_gen_umin_i32 },
- };
- genfn = fns[size][u];
- break;
- }
- /* The FP operations are all on single floats (32 bit) */
- case 0x58: /* FMAXNMP */
- gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- case 0x5a: /* FADDP */
- gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- case 0x5e: /* FMAXP */
- gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- case 0x78: /* FMINNMP */
- gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- case 0x7e: /* FMINP */
- gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
- }
-
- /* FP ops called directly, otherwise call now */
- if (genfn) {
- genfn(tcg_res[pass], tcg_op1, tcg_op2);
- }
- }
-
- for (pass = 0; pass < maxpass; pass++) {
- write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
- }
- clear_vec_high(s, is_q, rd);
- }
-}
-
-/* Floating point op subgroup of C3.6.16. */
-static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
-{
- /* For floating point ops, the U, size[1] and opcode bits
- * together indicate the operation. size[0] indicates single
- * or double.
- */
- int fpopcode = extract32(insn, 11, 5)
- | (extract32(insn, 23, 1) << 5)
- | (extract32(insn, 29, 1) << 6);
- int is_q = extract32(insn, 30, 1);
- int size = extract32(insn, 22, 1);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
-
- int datasize = is_q ? 128 : 64;
- int esize = 32 << size;
- int elements = datasize / esize;
-
- if (size == 1 && !is_q) {
- unallocated_encoding(s);
- return;
- }
-
- switch (fpopcode) {
- case 0x58: /* FMAXNMP */
- case 0x5a: /* FADDP */
- case 0x5e: /* FMAXP */
- case 0x78: /* FMINNMP */
- case 0x7e: /* FMINP */
- if (size && !is_q) {
- unallocated_encoding(s);
- return;
- }
- handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
- rn, rm, rd);
- return;
- case 0x1b: /* FMULX */
- case 0x1f: /* FRECPS */
- case 0x3f: /* FRSQRTS */
- case 0x5d: /* FACGE */
- case 0x7d: /* FACGT */
- case 0x19: /* FMLA */
- case 0x39: /* FMLS */
- case 0x18: /* FMAXNM */
- case 0x1a: /* FADD */
- case 0x1c: /* FCMEQ */
- case 0x1e: /* FMAX */
- case 0x38: /* FMINNM */
- case 0x3a: /* FSUB */
- case 0x3e: /* FMIN */
- case 0x5b: /* FMUL */
- case 0x5c: /* FCMGE */
- case 0x5f: /* FDIV */
- case 0x7a: /* FABD */
- case 0x7c: /* FCMGT */
- if (!fp_access_check(s)) {
- return;
- }
- handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
- return;
-
- case 0x1d: /* FMLAL */
- case 0x3d: /* FMLSL */
- case 0x59: /* FMLAL2 */
- case 0x79: /* FMLSL2 */
- if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
- unallocated_encoding(s);
- return;
- }
- if (fp_access_check(s)) {
- int is_s = extract32(insn, 23, 1);
- int is_2 = extract32(insn, 29, 1);
- int data = (is_2 << 1) | is_s;
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), tcg_env,
- is_q ? 16 : 8, vec_full_reg_size(s),
- data, gen_helper_gvec_fmlal_a64);
- }
- return;
-
- default:
- unallocated_encoding(s);
- return;
- }
-}
-
/* Integer op subgroup of C3.6.16. */
static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
{
@@ -11364,244 +11184,17 @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
int opcode = extract32(insn, 11, 5);
switch (opcode) {
- case 0x3: /* logic ops */
- disas_simd_3same_logic(s, insn);
- break;
- case 0x17: /* ADDP */
- case 0x14: /* SMAXP, UMAXP */
- case 0x15: /* SMINP, UMINP */
- {
- /* Pairwise operations */
- int is_q = extract32(insn, 30, 1);
- int u = extract32(insn, 29, 1);
- int size = extract32(insn, 22, 2);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- if (opcode == 0x17) {
- if (u || (size == 3 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
- } else {
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- }
- handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
- break;
- }
- case 0x18 ... 0x31:
- /* floating point ops, sz[1] and U are part of opcode */
- disas_simd_3same_float(s, insn);
- break;
default:
disas_simd_3same_int(s, insn);
break;
- }
-}
-
-/*
- * Advanced SIMD three same (ARMv8.2 FP16 variants)
- *
- * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
- * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
- * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
- * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
- *
- * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
- * (register), FACGE, FABD, FCMGT (register) and FACGT.
- *
- */
-static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
-{
- int opcode = extract32(insn, 11, 3);
- int u = extract32(insn, 29, 1);
- int a = extract32(insn, 23, 1);
- int is_q = extract32(insn, 30, 1);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- /*
- * For these floating point ops, the U, a and opcode bits
- * together indicate the operation.
- */
- int fpopcode = opcode | (a << 3) | (u << 4);
- int datasize = is_q ? 128 : 64;
- int elements = datasize / 16;
- bool pairwise;
- TCGv_ptr fpst;
- int pass;
-
- switch (fpopcode) {
- case 0x0: /* FMAXNM */
- case 0x1: /* FMLA */
- case 0x2: /* FADD */
- case 0x3: /* FMULX */
- case 0x4: /* FCMEQ */
- case 0x6: /* FMAX */
- case 0x7: /* FRECPS */
- case 0x8: /* FMINNM */
- case 0x9: /* FMLS */
- case 0xa: /* FSUB */
- case 0xe: /* FMIN */
- case 0xf: /* FRSQRTS */
- case 0x13: /* FMUL */
- case 0x14: /* FCMGE */
- case 0x15: /* FACGE */
- case 0x17: /* FDIV */
- case 0x1a: /* FABD */
- case 0x1c: /* FCMGT */
- case 0x1d: /* FACGT */
- pairwise = false;
- break;
- case 0x10: /* FMAXNMP */
- case 0x12: /* FADDP */
- case 0x16: /* FMAXP */
- case 0x18: /* FMINNMP */
- case 0x1e: /* FMINP */
- pairwise = true;
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!dc_isar_feature(aa64_fp16, s)) {
+ case 0x3: /* logic ops */
+ case 0x14: /* SMAXP, UMAXP */
+ case 0x15: /* SMINP, UMINP */
+ case 0x17: /* ADDP */
+ case 0x18 ... 0x31: /* floating point ops */
unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- fpst = fpstatus_ptr(FPST_FPCR_F16);
-
- if (pairwise) {
- int maxpass = is_q ? 8 : 4;
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- TCGv_i32 tcg_res[8];
-
- for (pass = 0; pass < maxpass; pass++) {
- int passreg = pass < (maxpass / 2) ? rn : rm;
- int passelt = (pass << 1) & (maxpass - 1);
-
- read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
- read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
- tcg_res[pass] = tcg_temp_new_i32();
-
- switch (fpopcode) {
- case 0x10: /* FMAXNMP */
- gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
- fpst);
- break;
- case 0x12: /* FADDP */
- gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- case 0x16: /* FMAXP */
- gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- case 0x18: /* FMINNMP */
- gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
- fpst);
- break;
- case 0x1e: /* FMINP */
- gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- }
-
- for (pass = 0; pass < maxpass; pass++) {
- write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
- }
- } else {
- for (pass = 0; pass < elements; pass++) {
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- TCGv_i32 tcg_res = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
- read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
-
- switch (fpopcode) {
- case 0x0: /* FMAXNM */
- gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1: /* FMLA */
- read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
- gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
- fpst);
- break;
- case 0x2: /* FADD */
- gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3: /* FMULX */
- gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x4: /* FCMEQ */
- gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x6: /* FMAX */
- gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x7: /* FRECPS */
- gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x8: /* FMINNM */
- gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x9: /* FMLS */
- /* As usual for ARM, separate negation for fused multiply-add */
- tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
- read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
- gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
- fpst);
- break;
- case 0xa: /* FSUB */
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xe: /* FMIN */
- gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xf: /* FRSQRTS */
- gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x13: /* FMUL */
- gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x14: /* FCMGE */
- gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x15: /* FACGE */
- gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x17: /* FDIV */
- gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1a: /* FABD */
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
- tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
- break;
- case 0x1c: /* FCMGT */
- gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1d: /* FACGT */
- gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
- }
-
- write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
- }
+ break;
}
-
- clear_vec_high(s, is_q, rd);
}
/* AdvSIMD three same extra
@@ -12372,10 +11965,10 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
break;
case 0x2f: /* FABS */
- gen_helper_vfp_abss(tcg_res, tcg_op);
+ gen_vfp_abss(tcg_res, tcg_op);
break;
case 0x6f: /* FNEG */
- gen_helper_vfp_negs(tcg_res, tcg_op);
+ gen_vfp_negs(tcg_res, tcg_op);
break;
case 0x7f: /* FSQRT */
gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
@@ -12810,12 +12403,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
case 0x0c: /* SQDMULH */
case 0x0d: /* SQRDMULH */
break;
- case 0x01: /* FMLA */
- case 0x05: /* FMLS */
- case 0x09: /* FMUL */
- case 0x19: /* FMULX */
- is_fp = 1;
- break;
case 0x1d: /* SQRDMLAH */
case 0x1f: /* SQRDMLSH */
if (!dc_isar_feature(aa64_rdm, s)) {
@@ -12870,38 +12457,23 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
is_fp = 2;
break;
+ default:
case 0x00: /* FMLAL */
+ case 0x01: /* FMLA */
case 0x04: /* FMLSL */
+ case 0x05: /* FMLS */
+ case 0x09: /* FMUL */
case 0x18: /* FMLAL2 */
+ case 0x19: /* FMULX */
case 0x1c: /* FMLSL2 */
- if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
- unallocated_encoding(s);
- return;
- }
- size = MO_16;
- /* is_fp, but we pass tcg_env not fp_status. */
- break;
- default:
unallocated_encoding(s);
return;
}
switch (is_fp) {
case 1: /* normal fp */
- /* convert insn encoded size to MemOp size */
- switch (size) {
- case 0: /* half-precision */
- size = MO_16;
- is_fp16 = true;
- break;
- case MO_32: /* single precision */
- case MO_64: /* double precision */
- break;
- default:
- unallocated_encoding(s);
- return;
- }
- break;
+ unallocated_encoding(s); /* in decodetree */
+ return;
case 2: /* complex fp */
/* Each indexable element is a complex pair. */
@@ -13012,22 +12584,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
return;
- case 0x00: /* FMLAL */
- case 0x04: /* FMLSL */
- case 0x18: /* FMLAL2 */
- case 0x1c: /* FMLSL2 */
- {
- int is_s = extract32(opcode, 2, 1);
- int is_2 = u;
- int data = (index << 2) | (is_2 << 1) | is_s;
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), tcg_env,
- is_q ? 16 : 8, vec_full_reg_size(s),
- data, gen_helper_gvec_fmlal_idx_a64);
- }
- return;
-
case 0x08: /* MUL */
if (!is_long && !is_scalar) {
static gen_helper_gvec_3 * const fns[3] = {
@@ -13080,42 +12636,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
if (size == 3) {
- TCGv_i64 tcg_idx = tcg_temp_new_i64();
- int pass;
-
- assert(is_fp && is_q && !is_long);
-
- read_vec_element(s, tcg_idx, rm, index, MO_64);
-
- for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- TCGv_i64 tcg_res = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op, rn, pass, MO_64);
-
- switch (16 * u + opcode) {
- case 0x05: /* FMLS */
- /* As usual for ARM, separate negation for fused multiply-add */
- gen_helper_vfp_negd(tcg_op, tcg_op);
- /* fall through */
- case 0x01: /* FMLA */
- read_vec_element(s, tcg_res, rd, pass, MO_64);
- gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
- break;
- case 0x09: /* FMUL */
- gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
- break;
- case 0x19: /* FMULX */
- gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
- break;
- default:
- g_assert_not_reached();
- }
-
- write_vec_element(s, tcg_res, rd, pass, MO_64);
- }
-
- clear_vec_high(s, !is_scalar, rd);
+ g_assert_not_reached();
} else if (!is_long) {
/* 32 bit floating point, or 16 or 32 bit integer.
* For the 16 bit scalar case we use the usual Neon helpers and
@@ -13171,74 +12692,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
genfn(tcg_res, tcg_op, tcg_res);
break;
}
- case 0x05: /* FMLS */
- case 0x01: /* FMLA */
- read_vec_element_i32(s, tcg_res, rd, pass,
- is_scalar ? size : MO_32);
- switch (size) {
- case 1:
- if (opcode == 0x5) {
- /* As usual for ARM, separate negation for fused
- * multiply-add */
- tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
- }
- if (is_scalar) {
- gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
- tcg_res, fpst);
- } else {
- gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
- tcg_res, fpst);
- }
- break;
- case 2:
- if (opcode == 0x5) {
- /* As usual for ARM, separate negation for
- * fused multiply-add */
- tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
- }
- gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
- tcg_res, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- break;
- case 0x09: /* FMUL */
- switch (size) {
- case 1:
- if (is_scalar) {
- gen_helper_advsimd_mulh(tcg_res, tcg_op,
- tcg_idx, fpst);
- } else {
- gen_helper_advsimd_mul2h(tcg_res, tcg_op,
- tcg_idx, fpst);
- }
- break;
- case 2:
- gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- break;
- case 0x19: /* FMULX */
- switch (size) {
- case 1:
- if (is_scalar) {
- gen_helper_advsimd_mulxh(tcg_res, tcg_op,
- tcg_idx, fpst);
- } else {
- gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
- tcg_idx, fpst);
- }
- break;
- case 2:
- gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- break;
case 0x0c: /* SQDMULH */
if (size == 1) {
gen_helper_neon_qdmulh_s16(tcg_res, tcg_env,
@@ -13280,6 +12733,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
break;
default:
+ case 0x01: /* FMLA */
+ case 0x05: /* FMLS */
+ case 0x09: /* FMUL */
+ case 0x19: /* FMULX */
g_assert_not_reached();
}
@@ -13453,461 +12910,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
}
-/* Crypto AES
- * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +-----------------+------+-----------+--------+-----+------+------+
- * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
- * +-----------------+------+-----------+--------+-----+------+------+
- */
-static void disas_crypto_aes(DisasContext *s, uint32_t insn)
-{
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- gen_helper_gvec_2 *genfn2 = NULL;
- gen_helper_gvec_3 *genfn3 = NULL;
-
- if (!dc_isar_feature(aa64_aes, s) || size != 0) {
- unallocated_encoding(s);
- return;
- }
-
- switch (opcode) {
- case 0x4: /* AESE */
- genfn3 = gen_helper_crypto_aese;
- break;
- case 0x6: /* AESMC */
- genfn2 = gen_helper_crypto_aesmc;
- break;
- case 0x5: /* AESD */
- genfn3 = gen_helper_crypto_aesd;
- break;
- case 0x7: /* AESIMC */
- genfn2 = gen_helper_crypto_aesimc;
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
- if (genfn2) {
- gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2);
- } else {
- gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3);
- }
-}
-
-/* Crypto three-reg SHA
- * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
- * +-----------------+------+---+------+---+--------+-----+------+------+
- * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
- * +-----------------+------+---+------+---+--------+-----+------+------+
- */
-static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
-{
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 3);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- gen_helper_gvec_3 *genfn;
- bool feature;
-
- if (size != 0) {
- unallocated_encoding(s);
- return;
- }
-
- switch (opcode) {
- case 0: /* SHA1C */
- genfn = gen_helper_crypto_sha1c;
- feature = dc_isar_feature(aa64_sha1, s);
- break;
- case 1: /* SHA1P */
- genfn = gen_helper_crypto_sha1p;
- feature = dc_isar_feature(aa64_sha1, s);
- break;
- case 2: /* SHA1M */
- genfn = gen_helper_crypto_sha1m;
- feature = dc_isar_feature(aa64_sha1, s);
- break;
- case 3: /* SHA1SU0 */
- genfn = gen_helper_crypto_sha1su0;
- feature = dc_isar_feature(aa64_sha1, s);
- break;
- case 4: /* SHA256H */
- genfn = gen_helper_crypto_sha256h;
- feature = dc_isar_feature(aa64_sha256, s);
- break;
- case 5: /* SHA256H2 */
- genfn = gen_helper_crypto_sha256h2;
- feature = dc_isar_feature(aa64_sha256, s);
- break;
- case 6: /* SHA256SU1 */
- genfn = gen_helper_crypto_sha256su1;
- feature = dc_isar_feature(aa64_sha256, s);
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!feature) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
- gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
-}
-
-/* Crypto two-reg SHA
- * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +-----------------+------+-----------+--------+-----+------+------+
- * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
- * +-----------------+------+-----------+--------+-----+------+------+
- */
-static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
-{
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- gen_helper_gvec_2 *genfn;
- bool feature;
-
- if (size != 0) {
- unallocated_encoding(s);
- return;
- }
-
- switch (opcode) {
- case 0: /* SHA1H */
- feature = dc_isar_feature(aa64_sha1, s);
- genfn = gen_helper_crypto_sha1h;
- break;
- case 1: /* SHA1SU1 */
- feature = dc_isar_feature(aa64_sha1, s);
- genfn = gen_helper_crypto_sha1su1;
- break;
- case 2: /* SHA256SU0 */
- feature = dc_isar_feature(aa64_sha256, s);
- genfn = gen_helper_crypto_sha256su0;
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!feature) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
- gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
-}
-
-static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
-{
- tcg_gen_rotli_i64(d, m, 1);
- tcg_gen_xor_i64(d, d, n);
-}
-
-static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
-{
- tcg_gen_rotli_vec(vece, d, m, 1);
- tcg_gen_xor_vec(vece, d, d, n);
-}
-
-void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
- static const GVecGen3 op = {
- .fni8 = gen_rax1_i64,
- .fniv = gen_rax1_vec,
- .opt_opc = vecop_list,
- .fno = gen_helper_crypto_rax1,
- .vece = MO_64,
- };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
-}
-
-/* Crypto three-reg SHA512
- * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
- * +-----------------------+------+---+---+-----+--------+------+------+
- * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd |
- * +-----------------------+------+---+---+-----+--------+------+------+
- */
-static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
-{
- int opcode = extract32(insn, 10, 2);
- int o = extract32(insn, 14, 1);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- bool feature;
- gen_helper_gvec_3 *oolfn = NULL;
- GVecGen3Fn *gvecfn = NULL;
-
- if (o == 0) {
- switch (opcode) {
- case 0: /* SHA512H */
- feature = dc_isar_feature(aa64_sha512, s);
- oolfn = gen_helper_crypto_sha512h;
- break;
- case 1: /* SHA512H2 */
- feature = dc_isar_feature(aa64_sha512, s);
- oolfn = gen_helper_crypto_sha512h2;
- break;
- case 2: /* SHA512SU1 */
- feature = dc_isar_feature(aa64_sha512, s);
- oolfn = gen_helper_crypto_sha512su1;
- break;
- case 3: /* RAX1 */
- feature = dc_isar_feature(aa64_sha3, s);
- gvecfn = gen_gvec_rax1;
- break;
- default:
- g_assert_not_reached();
- }
- } else {
- switch (opcode) {
- case 0: /* SM3PARTW1 */
- feature = dc_isar_feature(aa64_sm3, s);
- oolfn = gen_helper_crypto_sm3partw1;
- break;
- case 1: /* SM3PARTW2 */
- feature = dc_isar_feature(aa64_sm3, s);
- oolfn = gen_helper_crypto_sm3partw2;
- break;
- case 2: /* SM4EKEY */
- feature = dc_isar_feature(aa64_sm4, s);
- oolfn = gen_helper_crypto_sm4ekey;
- break;
- default:
- unallocated_encoding(s);
- return;
- }
- }
-
- if (!feature) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- if (oolfn) {
- gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
- } else {
- gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
- }
-}
-
-/* Crypto two-reg SHA512
- * 31 12 11 10 9 5 4 0
- * +-----------------------------------------+--------+------+------+
- * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd |
- * +-----------------------------------------+--------+------+------+
- */
-static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
-{
- int opcode = extract32(insn, 10, 2);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- bool feature;
-
- switch (opcode) {
- case 0: /* SHA512SU0 */
- feature = dc_isar_feature(aa64_sha512, s);
- break;
- case 1: /* SM4E */
- feature = dc_isar_feature(aa64_sm4, s);
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!feature) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- switch (opcode) {
- case 0: /* SHA512SU0 */
- gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
- break;
- case 1: /* SM4E */
- gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
- break;
- default:
- g_assert_not_reached();
- }
-}
-
-/* Crypto four-register
- * 31 23 22 21 20 16 15 14 10 9 5 4 0
- * +-------------------+-----+------+---+------+------+------+
- * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd |
- * +-------------------+-----+------+---+------+------+------+
- */
-static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
-{
- int op0 = extract32(insn, 21, 2);
- int rm = extract32(insn, 16, 5);
- int ra = extract32(insn, 10, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- bool feature;
-
- switch (op0) {
- case 0: /* EOR3 */
- case 1: /* BCAX */
- feature = dc_isar_feature(aa64_sha3, s);
- break;
- case 2: /* SM3SS1 */
- feature = dc_isar_feature(aa64_sm3, s);
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!feature) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- if (op0 < 2) {
- TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
- int pass;
-
- tcg_op1 = tcg_temp_new_i64();
- tcg_op2 = tcg_temp_new_i64();
- tcg_op3 = tcg_temp_new_i64();
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = tcg_temp_new_i64();
-
- for (pass = 0; pass < 2; pass++) {
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element(s, tcg_op2, rm, pass, MO_64);
- read_vec_element(s, tcg_op3, ra, pass, MO_64);
-
- if (op0 == 0) {
- /* EOR3 */
- tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
- } else {
- /* BCAX */
- tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
- }
- tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
- }
- write_vec_element(s, tcg_res[0], rd, 0, MO_64);
- write_vec_element(s, tcg_res[1], rd, 1, MO_64);
- } else {
- TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
-
- tcg_op1 = tcg_temp_new_i32();
- tcg_op2 = tcg_temp_new_i32();
- tcg_op3 = tcg_temp_new_i32();
- tcg_res = tcg_temp_new_i32();
- tcg_zero = tcg_constant_i32(0);
-
- read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
- read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
- read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
-
- tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
- tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
- tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
- tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
-
- write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
- write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
- write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
- write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
- }
-}
-
-/* Crypto XAR
- * 31 21 20 16 15 10 9 5 4 0
- * +-----------------------+------+--------+------+------+
- * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd |
- * +-----------------------+------+--------+------+------+
- */
-static void disas_crypto_xar(DisasContext *s, uint32_t insn)
-{
- int rm = extract32(insn, 16, 5);
- int imm6 = extract32(insn, 10, 6);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
-
- if (!dc_isar_feature(aa64_sha3, s)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), imm6, 16,
- vec_full_reg_size(s));
-}
-
-/* Crypto three-reg imm2
- * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
- * +-----------------------+------+-----+------+--------+------+------+
- * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd |
- * +-----------------------+------+-----+------+--------+------+------+
- */
-static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
- gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
- };
- int opcode = extract32(insn, 10, 2);
- int imm2 = extract32(insn, 12, 2);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
-
- if (!dc_isar_feature(aa64_sm3, s)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
-}
-
/* C3.6 Data processing - SIMD, inc Crypto
*
* As the decode gets a little complex we are using a table based
@@ -13920,7 +12922,6 @@ static const AArch64DecodeTable data_proc_simd[] = {
{ 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
{ 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
- { 0x0e000400, 0x9fe08400, disas_simd_copy },
{ 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
/* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
{ 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
@@ -13932,21 +12933,9 @@ static const AArch64DecodeTable data_proc_simd[] = {
{ 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
{ 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
{ 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
- { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
- { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
{ 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
- { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
- { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
- { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
- { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
- { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
- { 0xce000000, 0xff808000, disas_crypto_four_reg },
- { 0xce800000, 0xffe00000, disas_crypto_xar },
- { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
- { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
- { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
{ 0x00000000, 0x00000000, NULL }
};
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
index 7b811b8ac5..91750f0ca9 100644
--- a/target/arm/tcg/translate-a64.h
+++ b/target/arm/tcg/translate-a64.h
@@ -193,6 +193,10 @@ void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, int64_t shift,
uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz);
+void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz);
void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
index 144f18ba22..18b048611b 100644
--- a/target/arm/tcg/translate-neon.c
+++ b/target/arm/tcg/translate-neon.c
@@ -830,6 +830,11 @@ DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
+DO_3SAME_NO_SZ_3(VPADD, gen_gvec_addp)
+DO_3SAME_NO_SZ_3(VPMAX_S, gen_gvec_smaxp)
+DO_3SAME_NO_SZ_3(VPMIN_S, gen_gvec_sminp)
+DO_3SAME_NO_SZ_3(VPMAX_U, gen_gvec_umaxp)
+DO_3SAME_NO_SZ_3(VPMIN_U, gen_gvec_uminp)
#define DO_3SAME_CMP(INSN, COND) \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
@@ -1002,82 +1007,6 @@ DO_3SAME_32_ENV(VQSHL_U, qshl_u)
DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
-static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
-{
- /* Operations handled pairwise 32 bits at a time */
- TCGv_i32 tmp, tmp2, tmp3;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vn | a->vm) & 0x10)) {
- return false;
- }
-
- if (a->size == 3) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- assert(a->q == 0); /* enforced by decode patterns */
-
- /*
- * Note that we have to be careful not to clobber the source operands
- * in the "vm == vd" case by storing the result of the first pass too
- * early. Since Q is 0 there are always just two passes, so instead
- * of a complicated loop over each pass we just unroll.
- */
- tmp = tcg_temp_new_i32();
- tmp2 = tcg_temp_new_i32();
- tmp3 = tcg_temp_new_i32();
-
- read_neon_element32(tmp, a->vn, 0, MO_32);
- read_neon_element32(tmp2, a->vn, 1, MO_32);
- fn(tmp, tmp, tmp2);
-
- read_neon_element32(tmp3, a->vm, 0, MO_32);
- read_neon_element32(tmp2, a->vm, 1, MO_32);
- fn(tmp3, tmp3, tmp2);
-
- write_neon_element32(tmp, a->vd, 0, MO_32);
- write_neon_element32(tmp3, a->vd, 1, MO_32);
-
- return true;
-}
-
-#define DO_3SAME_PAIR(INSN, func) \
- static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
- { \
- static NeonGenTwoOpFn * const fns[] = { \
- gen_helper_neon_##func##8, \
- gen_helper_neon_##func##16, \
- gen_helper_neon_##func##32, \
- }; \
- if (a->size > 2) { \
- return false; \
- } \
- return do_3same_pair(s, a, fns[a->size]); \
- }
-
-/* 32-bit pairwise ops end up the same as the elementwise versions. */
-#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
-#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
-#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
-#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
-#define gen_helper_neon_padd_u32 tcg_gen_add_i32
-
-DO_3SAME_PAIR(VPMAX_S, pmax_s)
-DO_3SAME_PAIR(VPMIN_S, pmin_s)
-DO_3SAME_PAIR(VPMAX_U, pmax_u)
-DO_3SAME_PAIR(VPMIN_U, pmin_u)
-DO_3SAME_PAIR(VPADD, padd_u)
-
#define DO_3SAME_VQDMULH(INSN, FUNC) \
WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
@@ -1144,6 +1073,9 @@ DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
+DO_3S_FP_GVEC(VPADD, gen_helper_gvec_faddp_s, gen_helper_gvec_faddp_h)
+DO_3S_FP_GVEC(VPMAX, gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_h)
+DO_3S_FP_GVEC(VPMIN, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_h)
WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
@@ -1180,58 +1112,6 @@ static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
return do_3same(s, a, gen_VMINNM_fp32_3s);
}
-static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
- gen_helper_gvec_3_ptr *fn)
-{
- /* FP pairwise operations */
- TCGv_ptr fpstatus;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vn | a->vm) & 0x10)) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- assert(a->q == 0); /* enforced by decode patterns */
-
-
- fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
- vfp_reg_offset(1, a->vn),
- vfp_reg_offset(1, a->vm),
- fpstatus, 8, 8, 0, fn);
-
- return true;
-}
-
-/*
- * For all the functions using this macro, size == 1 means fp16,
- * which is an architecture extension we don't implement yet.
- */
-#define DO_3S_FP_PAIR(INSN,FUNC) \
- static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
- { \
- if (a->size == MO_16) { \
- if (!dc_isar_feature(aa32_fp16_arith, s)) { \
- return false; \
- } \
- return do_3same_fp_pair(s, a, FUNC##h); \
- } \
- return do_3same_fp_pair(s, a, FUNC##s); \
- }
-
-DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
-DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
-DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
-
static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
{
/* Handle a 2-reg-shift insn which can be vectorized. */
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index ada05aa530..798ab2bfb1 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -527,94 +527,6 @@ TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
-static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
- uint64_t mask = dup_const(MO_8, 0xff >> sh);
-
- tcg_gen_xor_i64(t, n, m);
- tcg_gen_shri_i64(d, t, sh);
- tcg_gen_shli_i64(t, t, 8 - sh);
- tcg_gen_andi_i64(d, d, mask);
- tcg_gen_andi_i64(t, t, ~mask);
- tcg_gen_or_i64(d, d, t);
-}
-
-static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
- uint64_t mask = dup_const(MO_16, 0xffff >> sh);
-
- tcg_gen_xor_i64(t, n, m);
- tcg_gen_shri_i64(d, t, sh);
- tcg_gen_shli_i64(t, t, 16 - sh);
- tcg_gen_andi_i64(d, d, mask);
- tcg_gen_andi_i64(t, t, ~mask);
- tcg_gen_or_i64(d, d, t);
-}
-
-static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
-{
- tcg_gen_xor_i32(d, n, m);
- tcg_gen_rotri_i32(d, d, sh);
-}
-
-static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
-{
- tcg_gen_xor_i64(d, n, m);
- tcg_gen_rotri_i64(d, d, sh);
-}
-
-static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
- TCGv_vec m, int64_t sh)
-{
- tcg_gen_xor_vec(vece, d, n, m);
- tcg_gen_rotri_vec(vece, d, d, sh);
-}
-
-void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, int64_t shift,
- uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
- static const GVecGen3i ops[4] = {
- { .fni8 = gen_xar8_i64,
- .fniv = gen_xar_vec,
- .fno = gen_helper_sve2_xar_b,
- .opt_opc = vecop,
- .vece = MO_8 },
- { .fni8 = gen_xar16_i64,
- .fniv = gen_xar_vec,
- .fno = gen_helper_sve2_xar_h,
- .opt_opc = vecop,
- .vece = MO_16 },
- { .fni4 = gen_xar_i32,
- .fniv = gen_xar_vec,
- .fno = gen_helper_sve2_xar_s,
- .opt_opc = vecop,
- .vece = MO_32 },
- { .fni8 = gen_xar_i64,
- .fniv = gen_xar_vec,
- .fno = gen_helper_gvec_xar_d,
- .opt_opc = vecop,
- .vece = MO_64 }
- };
- int esize = 8 << vece;
-
- /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
- tcg_debug_assert(shift >= 0);
- tcg_debug_assert(shift <= esize);
- shift &= esize - 1;
-
- if (shift == 0) {
- /* xar with no rotate devolves to xor. */
- tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
- } else {
- tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
- shift, &ops[vece]);
- }
-}
-
static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
{
if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
@@ -629,61 +541,8 @@ static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
return true;
}
-static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
-{
- tcg_gen_xor_i64(d, n, m);
- tcg_gen_xor_i64(d, d, k);
-}
-
-static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
- TCGv_vec m, TCGv_vec k)
-{
- tcg_gen_xor_vec(vece, d, n, m);
- tcg_gen_xor_vec(vece, d, d, k);
-}
-
-static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
- uint32_t a, uint32_t oprsz, uint32_t maxsz)
-{
- static const GVecGen4 op = {
- .fni8 = gen_eor3_i64,
- .fniv = gen_eor3_vec,
- .fno = gen_helper_sve2_eor3,
- .vece = MO_64,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
- tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
-}
-
-TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
-
-static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
-{
- tcg_gen_andc_i64(d, m, k);
- tcg_gen_xor_i64(d, d, n);
-}
-
-static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
- TCGv_vec m, TCGv_vec k)
-{
- tcg_gen_andc_vec(vece, d, m, k);
- tcg_gen_xor_vec(vece, d, d, n);
-}
-
-static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
- uint32_t a, uint32_t oprsz, uint32_t maxsz)
-{
- static const GVecGen4 op = {
- .fni8 = gen_bcax_i64,
- .fniv = gen_bcax_vec,
- .fno = gen_helper_sve2_bcax,
- .vece = MO_64,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
- tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
-}
-
-TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
+TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a)
+TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a)
static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
uint32_t a, uint32_t oprsz, uint32_t maxsz)
diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c
index b9af03b7c3..39ec971ff7 100644
--- a/target/arm/tcg/translate-vfp.c
+++ b/target/arm/tcg/translate-vfp.c
@@ -48,6 +48,12 @@ static inline void vfp_store_reg32(TCGv_i32 var, int reg)
tcg_gen_st_i32(var, tcg_env, vfp_reg_offset(false, reg));
}
+static inline void vfp_load_reg16(TCGv_i32 var, int reg)
+{
+ tcg_gen_ld16u_i32(var, tcg_env,
+ vfp_reg_offset(false, reg) + HOST_BIG_ENDIAN * 2);
+}
+
/*
* The imm8 encodes the sign bit, enough bits to represent an exponent in
* the range 01....1xx to 10....0xx, and the most significant 4 bits of
@@ -902,8 +908,7 @@ static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
if (a->l) {
/* VFP to general purpose register */
tmp = tcg_temp_new_i32();
- vfp_load_reg32(tmp, a->vn);
- tcg_gen_andi_i32(tmp, tmp, 0xffff);
+ vfp_load_reg16(tmp, a->vn);
store_reg(s, a->rt, tmp);
} else {
/* general purpose register to VFP */
@@ -1453,11 +1458,11 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
fd = tcg_temp_new_i32();
fpst = fpstatus_ptr(FPST_FPCR_F16);
- vfp_load_reg32(f0, vn);
- vfp_load_reg32(f1, vm);
+ vfp_load_reg16(f0, vn);
+ vfp_load_reg16(f1, vm);
if (reads_vd) {
- vfp_load_reg32(fd, vd);
+ vfp_load_reg16(fd, vd);
}
fn(fd, f0, f1, fpst);
vfp_store_reg32(fd, vd);
@@ -1633,7 +1638,7 @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
}
f0 = tcg_temp_new_i32();
- vfp_load_reg32(f0, vm);
+ vfp_load_reg16(f0, vm);
fn(f0, f0);
vfp_store_reg32(f0, vd);
@@ -1763,7 +1768,7 @@ static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
- gen_helper_vfp_negh(tmp, tmp);
+ gen_vfp_negh(tmp, tmp);
gen_helper_vfp_addh(vd, vd, tmp, fpst);
}
@@ -1781,7 +1786,7 @@ static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_vfp_muls(tmp, vn, vm, fpst);
- gen_helper_vfp_negs(tmp, tmp);
+ gen_vfp_negs(tmp, tmp);
gen_helper_vfp_adds(vd, vd, tmp, fpst);
}
@@ -1799,7 +1804,7 @@ static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
TCGv_i64 tmp = tcg_temp_new_i64();
gen_helper_vfp_muld(tmp, vn, vm, fpst);
- gen_helper_vfp_negd(tmp, tmp);
+ gen_vfp_negd(tmp, tmp);
gen_helper_vfp_addd(vd, vd, tmp, fpst);
}
@@ -1819,7 +1824,7 @@ static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
- gen_helper_vfp_negh(vd, vd);
+ gen_vfp_negh(vd, vd);
gen_helper_vfp_addh(vd, vd, tmp, fpst);
}
@@ -1839,7 +1844,7 @@ static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_vfp_muls(tmp, vn, vm, fpst);
- gen_helper_vfp_negs(vd, vd);
+ gen_vfp_negs(vd, vd);
gen_helper_vfp_adds(vd, vd, tmp, fpst);
}
@@ -1859,7 +1864,7 @@ static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
TCGv_i64 tmp = tcg_temp_new_i64();
gen_helper_vfp_muld(tmp, vn, vm, fpst);
- gen_helper_vfp_negd(vd, vd);
+ gen_vfp_negd(vd, vd);
gen_helper_vfp_addd(vd, vd, tmp, fpst);
}
@@ -1874,8 +1879,8 @@ static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_vfp_mulh(tmp, vn, vm, fpst);
- gen_helper_vfp_negh(tmp, tmp);
- gen_helper_vfp_negh(vd, vd);
+ gen_vfp_negh(tmp, tmp);
+ gen_vfp_negh(vd, vd);
gen_helper_vfp_addh(vd, vd, tmp, fpst);
}
@@ -1890,8 +1895,8 @@ static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_vfp_muls(tmp, vn, vm, fpst);
- gen_helper_vfp_negs(tmp, tmp);
- gen_helper_vfp_negs(vd, vd);
+ gen_vfp_negs(tmp, tmp);
+ gen_vfp_negs(vd, vd);
gen_helper_vfp_adds(vd, vd, tmp, fpst);
}
@@ -1906,8 +1911,8 @@ static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
TCGv_i64 tmp = tcg_temp_new_i64();
gen_helper_vfp_muld(tmp, vn, vm, fpst);
- gen_helper_vfp_negd(tmp, tmp);
- gen_helper_vfp_negd(vd, vd);
+ gen_vfp_negd(tmp, tmp);
+ gen_vfp_negd(vd, vd);
gen_helper_vfp_addd(vd, vd, tmp, fpst);
}
@@ -1935,7 +1940,7 @@ static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMUL: -(fn * fm) */
gen_helper_vfp_mulh(vd, vn, vm, fpst);
- gen_helper_vfp_negh(vd, vd);
+ gen_vfp_negh(vd, vd);
}
static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
@@ -1947,7 +1952,7 @@ static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMUL: -(fn * fm) */
gen_helper_vfp_muls(vd, vn, vm, fpst);
- gen_helper_vfp_negs(vd, vd);
+ gen_vfp_negs(vd, vd);
}
static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
@@ -1959,7 +1964,7 @@ static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
{
/* VNMUL: -(fn * fm) */
gen_helper_vfp_muld(vd, vn, vm, fpst);
- gen_helper_vfp_negd(vd, vd);
+ gen_vfp_negd(vd, vd);
}
static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
@@ -2106,16 +2111,16 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i32();
- vfp_load_reg32(vn, a->vn);
- vfp_load_reg32(vm, a->vm);
+ vfp_load_reg16(vn, a->vn);
+ vfp_load_reg16(vm, a->vm);
if (neg_n) {
/* VFNMS, VFMS */
- gen_helper_vfp_negh(vn, vn);
+ gen_vfp_negh(vn, vn);
}
- vfp_load_reg32(vd, a->vd);
+ vfp_load_reg16(vd, a->vd);
if (neg_d) {
/* VFNMA, VFNMS */
- gen_helper_vfp_negh(vd, vd);
+ gen_vfp_negh(vd, vd);
}
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
@@ -2169,12 +2174,12 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
vfp_load_reg32(vm, a->vm);
if (neg_n) {
/* VFNMS, VFMS */
- gen_helper_vfp_negs(vn, vn);
+ gen_vfp_negs(vn, vn);
}
vfp_load_reg32(vd, a->vd);
if (neg_d) {
/* VFNMA, VFNMS */
- gen_helper_vfp_negs(vd, vd);
+ gen_vfp_negs(vd, vd);
}
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
@@ -2234,12 +2239,12 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
vfp_load_reg64(vm, a->vm);
if (neg_n) {
/* VFNMS, VFMS */
- gen_helper_vfp_negd(vn, vn);
+ gen_vfp_negd(vn, vn);
}
vfp_load_reg64(vd, a->vd);
if (neg_d) {
/* VFNMA, VFNMS */
- gen_helper_vfp_negd(vd, vd);
+ gen_vfp_negd(vd, vd);
}
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
@@ -2409,13 +2414,13 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
-DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
-DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
-DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
+DO_VFP_2OP(VABS, hp, gen_vfp_absh, aa32_fp16_arith)
+DO_VFP_2OP(VABS, sp, gen_vfp_abss, aa32_fpsp_v2)
+DO_VFP_2OP(VABS, dp, gen_vfp_absd, aa32_fpdp_v2)
-DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
-DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
-DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
+DO_VFP_2OP(VNEG, hp, gen_vfp_negh, aa32_fp16_arith)
+DO_VFP_2OP(VNEG, sp, gen_vfp_negs, aa32_fpsp_v2)
+DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2)
static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
{
@@ -2456,11 +2461,11 @@ static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
vd = tcg_temp_new_i32();
vm = tcg_temp_new_i32();
- vfp_load_reg32(vd, a->vd);
+ vfp_load_reg16(vd, a->vd);
if (a->z) {
tcg_gen_movi_i32(vm, 0);
} else {
- vfp_load_reg32(vm, a->vm);
+ vfp_load_reg16(vm, a->vm);
}
if (a->e) {
@@ -2700,7 +2705,7 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
}
tmp = tcg_temp_new_i32();
- vfp_load_reg32(tmp, a->vm);
+ vfp_load_reg16(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_rinth(tmp, tmp, fpst);
vfp_store_reg32(tmp, a->vd);
@@ -2773,7 +2778,7 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
}
tmp = tcg_temp_new_i32();
- vfp_load_reg32(tmp, a->vm);
+ vfp_load_reg16(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
gen_helper_rinth(tmp, tmp, fpst);
@@ -2853,7 +2858,7 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
}
tmp = tcg_temp_new_i32();
- vfp_load_reg32(tmp, a->vm);
+ vfp_load_reg16(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_rinth_exact(tmp, tmp, fpst);
vfp_store_reg32(tmp, a->vd);
@@ -3270,7 +3275,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
fpst = fpstatus_ptr(FPST_FPCR_F16);
vm = tcg_temp_new_i32();
- vfp_load_reg32(vm, a->vm);
+ vfp_load_reg16(vm, a->vm);
if (a->s) {
if (a->rz) {
@@ -3383,8 +3388,8 @@ static bool trans_VINS(DisasContext *s, arg_VINS *a)
/* Insert low half of Vm into high half of Vd */
rm = tcg_temp_new_i32();
rd = tcg_temp_new_i32();
- vfp_load_reg32(rm, a->vm);
- vfp_load_reg32(rd, a->vd);
+ vfp_load_reg16(rm, a->vm);
+ vfp_load_reg16(rd, a->vd);
tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
vfp_store_reg32(rd, a->vd);
return true;
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index d605e10f11..c5bc691d92 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -2912,1594 +2912,6 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
gen_rfe(s, pc, load_cpu_field(spsr));
}
-static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
- uint32_t opr_sz, uint32_t max_sz,
- gen_helper_gvec_3_ptr *fn)
-{
- TCGv_ptr qc_ptr = tcg_temp_new_ptr();
-
- tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
- tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
- opr_sz, max_sz, 0, fn);
-}
-
-void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static gen_helper_gvec_3_ptr * const fns[2] = {
- gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
- };
- tcg_debug_assert(vece >= 1 && vece <= 2);
- gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
-}
-
-void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static gen_helper_gvec_3_ptr * const fns[2] = {
- gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
- };
- tcg_debug_assert(vece >= 1 && vece <= 2);
- gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
-}
-
-#define GEN_CMP0(NAME, COND) \
- void NAME(unsigned vece, uint32_t d, uint32_t m, \
- uint32_t opr_sz, uint32_t max_sz) \
- { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
-
-GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
-GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
-GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
-GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
-GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
-
-#undef GEN_CMP0
-
-static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_vec_sar8i_i64(a, a, shift);
- tcg_gen_vec_add8_i64(d, d, a);
-}
-
-static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_vec_sar16i_i64(a, a, shift);
- tcg_gen_vec_add16_i64(d, d, a);
-}
-
-static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
- tcg_gen_sari_i32(a, a, shift);
- tcg_gen_add_i32(d, d, a);
-}
-
-static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_sari_i64(a, a, shift);
- tcg_gen_add_i64(d, d, a);
-}
-
-static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- tcg_gen_sari_vec(vece, a, a, sh);
- tcg_gen_add_vec(vece, d, d, a);
-}
-
-void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen2i ops[4] = {
- { .fni8 = gen_ssra8_i64,
- .fniv = gen_ssra_vec,
- .fno = gen_helper_gvec_ssra_b,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fni8 = gen_ssra16_i64,
- .fniv = gen_ssra_vec,
- .fno = gen_helper_gvec_ssra_h,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_ssra32_i32,
- .fniv = gen_ssra_vec,
- .fno = gen_helper_gvec_ssra_s,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_ssra64_i64,
- .fniv = gen_ssra_vec,
- .fno = gen_helper_gvec_ssra_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_64 },
- };
-
- /* tszimm encoding produces immediates in the range [1..esize]. */
- tcg_debug_assert(shift > 0);
- tcg_debug_assert(shift <= (8 << vece));
-
- /*
- * Shifts larger than the element size are architecturally valid.
- * Signed results in all sign bits.
- */
- shift = MIN(shift, (8 << vece) - 1);
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
-}
-
-static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_vec_shr8i_i64(a, a, shift);
- tcg_gen_vec_add8_i64(d, d, a);
-}
-
-static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_vec_shr16i_i64(a, a, shift);
- tcg_gen_vec_add16_i64(d, d, a);
-}
-
-static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
- tcg_gen_shri_i32(a, a, shift);
- tcg_gen_add_i32(d, d, a);
-}
-
-static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_shri_i64(a, a, shift);
- tcg_gen_add_i64(d, d, a);
-}
-
-static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- tcg_gen_shri_vec(vece, a, a, sh);
- tcg_gen_add_vec(vece, d, d, a);
-}
-
-void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen2i ops[4] = {
- { .fni8 = gen_usra8_i64,
- .fniv = gen_usra_vec,
- .fno = gen_helper_gvec_usra_b,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_8, },
- { .fni8 = gen_usra16_i64,
- .fniv = gen_usra_vec,
- .fno = gen_helper_gvec_usra_h,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16, },
- { .fni4 = gen_usra32_i32,
- .fniv = gen_usra_vec,
- .fno = gen_helper_gvec_usra_s,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32, },
- { .fni8 = gen_usra64_i64,
- .fniv = gen_usra_vec,
- .fno = gen_helper_gvec_usra_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64, },
- };
-
- /* tszimm encoding produces immediates in the range [1..esize]. */
- tcg_debug_assert(shift > 0);
- tcg_debug_assert(shift <= (8 << vece));
-
- /*
- * Shifts larger than the element size are architecturally valid.
- * Unsigned results in all zeros as input to accumulate: nop.
- */
- if (shift < (8 << vece)) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
- } else {
- /* Nop, but we do need to clear the tail. */
- tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
- }
-}
-
-/*
- * Shift one less than the requested amount, and the low bit is
- * the rounding bit. For the 8 and 16-bit operations, because we
- * mask the low bit, we can perform a normal integer shift instead
- * of a vector shift.
- */
-static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shri_i64(t, a, sh - 1);
- tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
- tcg_gen_vec_sar8i_i64(d, a, sh);
- tcg_gen_vec_add8_i64(d, d, t);
-}
-
-static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shri_i64(t, a, sh - 1);
- tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
- tcg_gen_vec_sar16i_i64(d, a, sh);
- tcg_gen_vec_add16_i64(d, d, t);
-}
-
-static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
-{
- TCGv_i32 t;
-
- /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
- if (sh == 32) {
- tcg_gen_movi_i32(d, 0);
- return;
- }
- t = tcg_temp_new_i32();
- tcg_gen_extract_i32(t, a, sh - 1, 1);
- tcg_gen_sari_i32(d, a, sh);
- tcg_gen_add_i32(d, d, t);
-}
-
-static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_extract_i64(t, a, sh - 1, 1);
- tcg_gen_sari_i64(d, a, sh);
- tcg_gen_add_i64(d, d, t);
-}
-
-static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
-
- tcg_gen_shri_vec(vece, t, a, sh - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
- tcg_gen_and_vec(vece, t, t, ones);
- tcg_gen_sari_vec(vece, d, a, sh);
- tcg_gen_add_vec(vece, d, d, t);
-}
-
-void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen2i ops[4] = {
- { .fni8 = gen_srshr8_i64,
- .fniv = gen_srshr_vec,
- .fno = gen_helper_gvec_srshr_b,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fni8 = gen_srshr16_i64,
- .fniv = gen_srshr_vec,
- .fno = gen_helper_gvec_srshr_h,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_srshr32_i32,
- .fniv = gen_srshr_vec,
- .fno = gen_helper_gvec_srshr_s,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_srshr64_i64,
- .fniv = gen_srshr_vec,
- .fno = gen_helper_gvec_srshr_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
-
- /* tszimm encoding produces immediates in the range [1..esize] */
- tcg_debug_assert(shift > 0);
- tcg_debug_assert(shift <= (8 << vece));
-
- if (shift == (8 << vece)) {
- /*
- * Shifts larger than the element size are architecturally valid.
- * Signed results in all sign bits. With rounding, this produces
- * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
- * I.e. always zero.
- */
- tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
- } else {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
- }
-}
-
-static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- gen_srshr8_i64(t, a, sh);
- tcg_gen_vec_add8_i64(d, d, t);
-}
-
-static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- gen_srshr16_i64(t, a, sh);
- tcg_gen_vec_add16_i64(d, d, t);
-}
-
-static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
-{
- TCGv_i32 t = tcg_temp_new_i32();
-
- gen_srshr32_i32(t, a, sh);
- tcg_gen_add_i32(d, d, t);
-}
-
-static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- gen_srshr64_i64(t, a, sh);
- tcg_gen_add_i64(d, d, t);
-}
-
-static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
-
- gen_srshr_vec(vece, t, a, sh);
- tcg_gen_add_vec(vece, d, d, t);
-}
-
-void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen2i ops[4] = {
- { .fni8 = gen_srsra8_i64,
- .fniv = gen_srsra_vec,
- .fno = gen_helper_gvec_srsra_b,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_8 },
- { .fni8 = gen_srsra16_i64,
- .fniv = gen_srsra_vec,
- .fno = gen_helper_gvec_srsra_h,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_16 },
- { .fni4 = gen_srsra32_i32,
- .fniv = gen_srsra_vec,
- .fno = gen_helper_gvec_srsra_s,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_32 },
- { .fni8 = gen_srsra64_i64,
- .fniv = gen_srsra_vec,
- .fno = gen_helper_gvec_srsra_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_64 },
- };
-
- /* tszimm encoding produces immediates in the range [1..esize] */
- tcg_debug_assert(shift > 0);
- tcg_debug_assert(shift <= (8 << vece));
-
- /*
- * Shifts larger than the element size are architecturally valid.
- * Signed results in all sign bits. With rounding, this produces
- * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
- * I.e. always zero. With accumulation, this leaves D unchanged.
- */
- if (shift == (8 << vece)) {
- /* Nop, but we do need to clear the tail. */
- tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
- } else {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
- }
-}
-
-static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shri_i64(t, a, sh - 1);
- tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
- tcg_gen_vec_shr8i_i64(d, a, sh);
- tcg_gen_vec_add8_i64(d, d, t);
-}
-
-static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shri_i64(t, a, sh - 1);
- tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
- tcg_gen_vec_shr16i_i64(d, a, sh);
- tcg_gen_vec_add16_i64(d, d, t);
-}
-
-static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
-{
- TCGv_i32 t;
-
- /* Handle shift by the input size for the benefit of trans_URSHR_ri */
- if (sh == 32) {
- tcg_gen_extract_i32(d, a, sh - 1, 1);
- return;
- }
- t = tcg_temp_new_i32();
- tcg_gen_extract_i32(t, a, sh - 1, 1);
- tcg_gen_shri_i32(d, a, sh);
- tcg_gen_add_i32(d, d, t);
-}
-
-static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_extract_i64(t, a, sh - 1, 1);
- tcg_gen_shri_i64(d, a, sh);
- tcg_gen_add_i64(d, d, t);
-}
-
-static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
-
- tcg_gen_shri_vec(vece, t, a, shift - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
- tcg_gen_and_vec(vece, t, t, ones);
- tcg_gen_shri_vec(vece, d, a, shift);
- tcg_gen_add_vec(vece, d, d, t);
-}
-
-void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen2i ops[4] = {
- { .fni8 = gen_urshr8_i64,
- .fniv = gen_urshr_vec,
- .fno = gen_helper_gvec_urshr_b,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fni8 = gen_urshr16_i64,
- .fniv = gen_urshr_vec,
- .fno = gen_helper_gvec_urshr_h,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_urshr32_i32,
- .fniv = gen_urshr_vec,
- .fno = gen_helper_gvec_urshr_s,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_urshr64_i64,
- .fniv = gen_urshr_vec,
- .fno = gen_helper_gvec_urshr_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
-
- /* tszimm encoding produces immediates in the range [1..esize] */
- tcg_debug_assert(shift > 0);
- tcg_debug_assert(shift <= (8 << vece));
-
- if (shift == (8 << vece)) {
- /*
- * Shifts larger than the element size are architecturally valid.
- * Unsigned results in zero. With rounding, this produces a
- * copy of the most significant bit.
- */
- tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
- } else {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
- }
-}
-
-static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- if (sh == 8) {
- tcg_gen_vec_shr8i_i64(t, a, 7);
- } else {
- gen_urshr8_i64(t, a, sh);
- }
- tcg_gen_vec_add8_i64(d, d, t);
-}
-
-static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- if (sh == 16) {
- tcg_gen_vec_shr16i_i64(t, a, 15);
- } else {
- gen_urshr16_i64(t, a, sh);
- }
- tcg_gen_vec_add16_i64(d, d, t);
-}
-
-static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
-{
- TCGv_i32 t = tcg_temp_new_i32();
-
- if (sh == 32) {
- tcg_gen_shri_i32(t, a, 31);
- } else {
- gen_urshr32_i32(t, a, sh);
- }
- tcg_gen_add_i32(d, d, t);
-}
-
-static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- if (sh == 64) {
- tcg_gen_shri_i64(t, a, 63);
- } else {
- gen_urshr64_i64(t, a, sh);
- }
- tcg_gen_add_i64(d, d, t);
-}
-
-static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
-
- if (sh == (8 << vece)) {
- tcg_gen_shri_vec(vece, t, a, sh - 1);
- } else {
- gen_urshr_vec(vece, t, a, sh);
- }
- tcg_gen_add_vec(vece, d, d, t);
-}
-
-void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen2i ops[4] = {
- { .fni8 = gen_ursra8_i64,
- .fniv = gen_ursra_vec,
- .fno = gen_helper_gvec_ursra_b,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_8 },
- { .fni8 = gen_ursra16_i64,
- .fniv = gen_ursra_vec,
- .fno = gen_helper_gvec_ursra_h,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_16 },
- { .fni4 = gen_ursra32_i32,
- .fniv = gen_ursra_vec,
- .fno = gen_helper_gvec_ursra_s,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_32 },
- { .fni8 = gen_ursra64_i64,
- .fniv = gen_ursra_vec,
- .fno = gen_helper_gvec_ursra_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_64 },
- };
-
- /* tszimm encoding produces immediates in the range [1..esize] */
- tcg_debug_assert(shift > 0);
- tcg_debug_assert(shift <= (8 << vece));
-
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
-}
-
-static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- uint64_t mask = dup_const(MO_8, 0xff >> shift);
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shri_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
-}
-
-static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- uint64_t mask = dup_const(MO_16, 0xffff >> shift);
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shri_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
-}
-
-static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
- tcg_gen_shri_i32(a, a, shift);
- tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
-}
-
-static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_shri_i64(a, a, shift);
- tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
-}
-
-static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
-
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
- tcg_gen_shri_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
-}
-
-void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
- const GVecGen2i ops[4] = {
- { .fni8 = gen_shr8_ins_i64,
- .fniv = gen_shr_ins_vec,
- .fno = gen_helper_gvec_sri_b,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fni8 = gen_shr16_ins_i64,
- .fniv = gen_shr_ins_vec,
- .fno = gen_helper_gvec_sri_h,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_shr32_ins_i32,
- .fniv = gen_shr_ins_vec,
- .fno = gen_helper_gvec_sri_s,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_shr64_ins_i64,
- .fniv = gen_shr_ins_vec,
- .fno = gen_helper_gvec_sri_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
-
- /* tszimm encoding produces immediates in the range [1..esize]. */
- tcg_debug_assert(shift > 0);
- tcg_debug_assert(shift <= (8 << vece));
-
- /* Shift of esize leaves destination unchanged. */
- if (shift < (8 << vece)) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
- } else {
- /* Nop, but we do need to clear the tail. */
- tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
- }
-}
-
-static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- uint64_t mask = dup_const(MO_8, 0xff << shift);
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shli_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
-}
-
-static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- uint64_t mask = dup_const(MO_16, 0xffff << shift);
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shli_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
-}
-
-static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
- tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
-}
-
-static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
-}
-
-static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
-
- tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
-}
-
-void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- int64_t shift, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
- const GVecGen2i ops[4] = {
- { .fni8 = gen_shl8_ins_i64,
- .fniv = gen_shl_ins_vec,
- .fno = gen_helper_gvec_sli_b,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fni8 = gen_shl16_ins_i64,
- .fniv = gen_shl_ins_vec,
- .fno = gen_helper_gvec_sli_h,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_shl32_ins_i32,
- .fniv = gen_shl_ins_vec,
- .fno = gen_helper_gvec_sli_s,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_shl64_ins_i64,
- .fniv = gen_shl_ins_vec,
- .fno = gen_helper_gvec_sli_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
-
- /* tszimm encoding produces immediates in the range [0..esize-1]. */
- tcg_debug_assert(shift >= 0);
- tcg_debug_assert(shift < (8 << vece));
-
- if (shift == 0) {
- tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
- } else {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
- }
-}
-
-static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- gen_helper_neon_mul_u8(a, a, b);
- gen_helper_neon_add_u8(d, d, a);
-}
-
-static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- gen_helper_neon_mul_u8(a, a, b);
- gen_helper_neon_sub_u8(d, d, a);
-}
-
-static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- gen_helper_neon_mul_u16(a, a, b);
- gen_helper_neon_add_u16(d, d, a);
-}
-
-static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- gen_helper_neon_mul_u16(a, a, b);
- gen_helper_neon_sub_u16(d, d, a);
-}
-
-static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- tcg_gen_mul_i32(a, a, b);
- tcg_gen_add_i32(d, d, a);
-}
-
-static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- tcg_gen_mul_i32(a, a, b);
- tcg_gen_sub_i32(d, d, a);
-}
-
-static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- tcg_gen_mul_i64(a, a, b);
- tcg_gen_add_i64(d, d, a);
-}
-
-static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- tcg_gen_mul_i64(a, a, b);
- tcg_gen_sub_i64(d, d, a);
-}
-
-static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
- tcg_gen_mul_vec(vece, a, a, b);
- tcg_gen_add_vec(vece, d, d, a);
-}
-
-static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
- tcg_gen_mul_vec(vece, a, a, b);
- tcg_gen_sub_vec(vece, d, d, a);
-}
-
-/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
- * these tables are shared with AArch64 which does support them.
- */
-void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 ops[4] = {
- { .fni4 = gen_mla8_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fni4 = gen_mla16_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_mla32_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_mla64_i64,
- .fniv = gen_mla_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_mul_vec, INDEX_op_sub_vec, 0
- };
- static const GVecGen3 ops[4] = {
- { .fni4 = gen_mls8_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fni4 = gen_mls16_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_mls32_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_mls64_i64,
- .fniv = gen_mls_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-/* CMTST : test is "if (X & Y != 0)". */
-static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- tcg_gen_and_i32(d, a, b);
- tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
-}
-
-void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- tcg_gen_and_i64(d, a, b);
- tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
-}
-
-static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
- tcg_gen_and_vec(vece, d, a, b);
- tcg_gen_dupi_vec(vece, a, 0);
- tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
-}
-
-void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
- static const GVecGen3 ops[4] = {
- { .fni4 = gen_helper_neon_tst_u8,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fni4 = gen_helper_neon_tst_u16,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_cmtst_i32,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_cmtst_i64,
- .fniv = gen_cmtst_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
-{
- TCGv_i32 lval = tcg_temp_new_i32();
- TCGv_i32 rval = tcg_temp_new_i32();
- TCGv_i32 lsh = tcg_temp_new_i32();
- TCGv_i32 rsh = tcg_temp_new_i32();
- TCGv_i32 zero = tcg_constant_i32(0);
- TCGv_i32 max = tcg_constant_i32(32);
-
- /*
- * Rely on the TCG guarantee that out of range shifts produce
- * unspecified results, not undefined behaviour (i.e. no trap).
- * Discard out-of-range results after the fact.
- */
- tcg_gen_ext8s_i32(lsh, shift);
- tcg_gen_neg_i32(rsh, lsh);
- tcg_gen_shl_i32(lval, src, lsh);
- tcg_gen_shr_i32(rval, src, rsh);
- tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
- tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
-}
-
-void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
-{
- TCGv_i64 lval = tcg_temp_new_i64();
- TCGv_i64 rval = tcg_temp_new_i64();
- TCGv_i64 lsh = tcg_temp_new_i64();
- TCGv_i64 rsh = tcg_temp_new_i64();
- TCGv_i64 zero = tcg_constant_i64(0);
- TCGv_i64 max = tcg_constant_i64(64);
-
- /*
- * Rely on the TCG guarantee that out of range shifts produce
- * unspecified results, not undefined behaviour (i.e. no trap).
- * Discard out-of-range results after the fact.
- */
- tcg_gen_ext8s_i64(lsh, shift);
- tcg_gen_neg_i64(rsh, lsh);
- tcg_gen_shl_i64(lval, src, lsh);
- tcg_gen_shr_i64(rval, src, rsh);
- tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
- tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
-}
-
-static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
- TCGv_vec src, TCGv_vec shift)
-{
- TCGv_vec lval = tcg_temp_new_vec_matching(dst);
- TCGv_vec rval = tcg_temp_new_vec_matching(dst);
- TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec msk, max;
-
- tcg_gen_neg_vec(vece, rsh, shift);
- if (vece == MO_8) {
- tcg_gen_mov_vec(lsh, shift);
- } else {
- msk = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, msk, 0xff);
- tcg_gen_and_vec(vece, lsh, shift, msk);
- tcg_gen_and_vec(vece, rsh, rsh, msk);
- }
-
- /*
- * Rely on the TCG guarantee that out of range shifts produce
- * unspecified results, not undefined behaviour (i.e. no trap).
- * Discard out-of-range results after the fact.
- */
- tcg_gen_shlv_vec(vece, lval, src, lsh);
- tcg_gen_shrv_vec(vece, rval, src, rsh);
-
- max = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, max, 8 << vece);
-
- /*
- * The choice of LT (signed) and GEU (unsigned) are biased toward
- * the instructions of the x86_64 host. For MO_8, the whole byte
- * is significant so we must use an unsigned compare; otherwise we
- * have already masked to a byte and so a signed compare works.
- * Other tcg hosts have a full set of comparisons and do not care.
- */
- if (vece == MO_8) {
- tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
- tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
- tcg_gen_andc_vec(vece, lval, lval, lsh);
- tcg_gen_andc_vec(vece, rval, rval, rsh);
- } else {
- tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
- tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
- tcg_gen_and_vec(vece, lval, lval, lsh);
- tcg_gen_and_vec(vece, rval, rval, rsh);
- }
- tcg_gen_or_vec(vece, dst, lval, rval);
-}
-
-void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_neg_vec, INDEX_op_shlv_vec,
- INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
- };
- static const GVecGen3 ops[4] = {
- { .fniv = gen_ushl_vec,
- .fno = gen_helper_gvec_ushl_b,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fniv = gen_ushl_vec,
- .fno = gen_helper_gvec_ushl_h,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_ushl_i32,
- .fniv = gen_ushl_vec,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_ushl_i64,
- .fniv = gen_ushl_vec,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
-{
- TCGv_i32 lval = tcg_temp_new_i32();
- TCGv_i32 rval = tcg_temp_new_i32();
- TCGv_i32 lsh = tcg_temp_new_i32();
- TCGv_i32 rsh = tcg_temp_new_i32();
- TCGv_i32 zero = tcg_constant_i32(0);
- TCGv_i32 max = tcg_constant_i32(31);
-
- /*
- * Rely on the TCG guarantee that out of range shifts produce
- * unspecified results, not undefined behaviour (i.e. no trap).
- * Discard out-of-range results after the fact.
- */
- tcg_gen_ext8s_i32(lsh, shift);
- tcg_gen_neg_i32(rsh, lsh);
- tcg_gen_shl_i32(lval, src, lsh);
- tcg_gen_umin_i32(rsh, rsh, max);
- tcg_gen_sar_i32(rval, src, rsh);
- tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
- tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
-}
-
-void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
-{
- TCGv_i64 lval = tcg_temp_new_i64();
- TCGv_i64 rval = tcg_temp_new_i64();
- TCGv_i64 lsh = tcg_temp_new_i64();
- TCGv_i64 rsh = tcg_temp_new_i64();
- TCGv_i64 zero = tcg_constant_i64(0);
- TCGv_i64 max = tcg_constant_i64(63);
-
- /*
- * Rely on the TCG guarantee that out of range shifts produce
- * unspecified results, not undefined behaviour (i.e. no trap).
- * Discard out-of-range results after the fact.
- */
- tcg_gen_ext8s_i64(lsh, shift);
- tcg_gen_neg_i64(rsh, lsh);
- tcg_gen_shl_i64(lval, src, lsh);
- tcg_gen_umin_i64(rsh, rsh, max);
- tcg_gen_sar_i64(rval, src, rsh);
- tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
- tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
-}
-
-static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
- TCGv_vec src, TCGv_vec shift)
-{
- TCGv_vec lval = tcg_temp_new_vec_matching(dst);
- TCGv_vec rval = tcg_temp_new_vec_matching(dst);
- TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
-
- /*
- * Rely on the TCG guarantee that out of range shifts produce
- * unspecified results, not undefined behaviour (i.e. no trap).
- * Discard out-of-range results after the fact.
- */
- tcg_gen_neg_vec(vece, rsh, shift);
- if (vece == MO_8) {
- tcg_gen_mov_vec(lsh, shift);
- } else {
- tcg_gen_dupi_vec(vece, tmp, 0xff);
- tcg_gen_and_vec(vece, lsh, shift, tmp);
- tcg_gen_and_vec(vece, rsh, rsh, tmp);
- }
-
- /* Bound rsh so out of bound right shift gets -1. */
- tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
- tcg_gen_umin_vec(vece, rsh, rsh, tmp);
- tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
-
- tcg_gen_shlv_vec(vece, lval, src, lsh);
- tcg_gen_sarv_vec(vece, rval, src, rsh);
-
- /* Select in-bound left shift. */
- tcg_gen_andc_vec(vece, lval, lval, tmp);
-
- /* Select between left and right shift. */
- if (vece == MO_8) {
- tcg_gen_dupi_vec(vece, tmp, 0);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
- } else {
- tcg_gen_dupi_vec(vece, tmp, 0x80);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
- }
-}
-
-void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
- INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
- };
- static const GVecGen3 ops[4] = {
- { .fniv = gen_sshl_vec,
- .fno = gen_helper_gvec_sshl_b,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fniv = gen_sshl_vec,
- .fno = gen_helper_gvec_sshl_h,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_sshl_i32,
- .fniv = gen_sshl_vec,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_sshl_i64,
- .fniv = gen_sshl_vec,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
- TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec x = tcg_temp_new_vec_matching(t);
- tcg_gen_add_vec(vece, x, a, b);
- tcg_gen_usadd_vec(vece, t, a, b);
- tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
- tcg_gen_or_vec(vece, sat, sat, x);
-}
-
-void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen4 ops[4] = {
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_b,
- .write_aofs = true,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_h,
- .write_aofs = true,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_s,
- .write_aofs = true,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_d,
- .write_aofs = true,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
- rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
- TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec x = tcg_temp_new_vec_matching(t);
- tcg_gen_add_vec(vece, x, a, b);
- tcg_gen_ssadd_vec(vece, t, a, b);
- tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
- tcg_gen_or_vec(vece, sat, sat, x);
-}
-
-void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen4 ops[4] = {
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_b,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_h,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_s,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_d,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_64 },
- };
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
- rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
- TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec x = tcg_temp_new_vec_matching(t);
- tcg_gen_sub_vec(vece, x, a, b);
- tcg_gen_ussub_vec(vece, t, a, b);
- tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
- tcg_gen_or_vec(vece, sat, sat, x);
-}
-
-void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
- };
- static const GVecGen4 ops[4] = {
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_b,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_h,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_s,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_d,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_64 },
- };
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
- rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
- TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec x = tcg_temp_new_vec_matching(t);
- tcg_gen_sub_vec(vece, x, a, b);
- tcg_gen_sssub_vec(vece, t, a, b);
- tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
- tcg_gen_or_vec(vece, sat, sat, x);
-}
-
-void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
- };
- static const GVecGen4 ops[4] = {
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_b,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_h,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_s,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_d,
- .opt_opc = vecop_list,
- .write_aofs = true,
- .vece = MO_64 },
- };
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
- rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t = tcg_temp_new_i32();
-
- tcg_gen_sub_i32(t, a, b);
- tcg_gen_sub_i32(d, b, a);
- tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
-}
-
-static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_sub_i64(t, a, b);
- tcg_gen_sub_i64(d, b, a);
- tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
-}
-
-static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
-
- tcg_gen_smin_vec(vece, t, a, b);
- tcg_gen_smax_vec(vece, d, a, b);
- tcg_gen_sub_vec(vece, d, d, t);
-}
-
-void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
- };
- static const GVecGen3 ops[4] = {
- { .fniv = gen_sabd_vec,
- .fno = gen_helper_gvec_sabd_b,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fniv = gen_sabd_vec,
- .fno = gen_helper_gvec_sabd_h,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_sabd_i32,
- .fniv = gen_sabd_vec,
- .fno = gen_helper_gvec_sabd_s,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_sabd_i64,
- .fniv = gen_sabd_vec,
- .fno = gen_helper_gvec_sabd_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t = tcg_temp_new_i32();
-
- tcg_gen_sub_i32(t, a, b);
- tcg_gen_sub_i32(d, b, a);
- tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
-}
-
-static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_sub_i64(t, a, b);
- tcg_gen_sub_i64(d, b, a);
- tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
-}
-
-static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
-
- tcg_gen_umin_vec(vece, t, a, b);
- tcg_gen_umax_vec(vece, d, a, b);
- tcg_gen_sub_vec(vece, d, d, t);
-}
-
-void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
- };
- static const GVecGen3 ops[4] = {
- { .fniv = gen_uabd_vec,
- .fno = gen_helper_gvec_uabd_b,
- .opt_opc = vecop_list,
- .vece = MO_8 },
- { .fniv = gen_uabd_vec,
- .fno = gen_helper_gvec_uabd_h,
- .opt_opc = vecop_list,
- .vece = MO_16 },
- { .fni4 = gen_uabd_i32,
- .fniv = gen_uabd_vec,
- .fno = gen_helper_gvec_uabd_s,
- .opt_opc = vecop_list,
- .vece = MO_32 },
- { .fni8 = gen_uabd_i64,
- .fniv = gen_uabd_vec,
- .fno = gen_helper_gvec_uabd_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list,
- .vece = MO_64 },
- };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t = tcg_temp_new_i32();
- gen_sabd_i32(t, a, b);
- tcg_gen_add_i32(d, d, t);
-}
-
-static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t = tcg_temp_new_i64();
- gen_sabd_i64(t, a, b);
- tcg_gen_add_i64(d, d, t);
-}
-
-static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- gen_sabd_vec(vece, t, a, b);
- tcg_gen_add_vec(vece, d, d, t);
-}
-
-void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sub_vec, INDEX_op_add_vec,
- INDEX_op_smin_vec, INDEX_op_smax_vec, 0
- };
- static const GVecGen3 ops[4] = {
- { .fniv = gen_saba_vec,
- .fno = gen_helper_gvec_saba_b,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_8 },
- { .fniv = gen_saba_vec,
- .fno = gen_helper_gvec_saba_h,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_16 },
- { .fni4 = gen_saba_i32,
- .fniv = gen_saba_vec,
- .fno = gen_helper_gvec_saba_s,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_32 },
- { .fni8 = gen_saba_i64,
- .fniv = gen_saba_vec,
- .fno = gen_helper_gvec_saba_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_64 },
- };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
-static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t = tcg_temp_new_i32();
- gen_uabd_i32(t, a, b);
- tcg_gen_add_i32(d, d, t);
-}
-
-static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t = tcg_temp_new_i64();
- gen_uabd_i64(t, a, b);
- tcg_gen_add_i64(d, d, t);
-}
-
-static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- gen_uabd_vec(vece, t, a, b);
- tcg_gen_add_vec(vece, d, d, t);
-}
-
-void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sub_vec, INDEX_op_add_vec,
- INDEX_op_umin_vec, INDEX_op_umax_vec, 0
- };
- static const GVecGen3 ops[4] = {
- { .fniv = gen_uaba_vec,
- .fno = gen_helper_gvec_uaba_b,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_8 },
- { .fniv = gen_uaba_vec,
- .fno = gen_helper_gvec_uaba_h,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_16 },
- { .fni4 = gen_uaba_i32,
- .fniv = gen_uaba_vec,
- .fno = gen_helper_gvec_uaba_s,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_32 },
- { .fni8 = gen_uaba_i64,
- .fniv = gen_uaba_vec,
- .fno = gen_helper_gvec_uaba_d,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list,
- .load_dest = true,
- .vece = MO_64 },
- };
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
-}
-
static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm)
{
static const uint16_t mask[3] = {
@@ -8765,12 +7177,12 @@ static bool trans_PLD(DisasContext *s, arg_PLD *a)
return ENABLE_ARCH_5TE;
}
-static bool trans_PLDW(DisasContext *s, arg_PLD *a)
+static bool trans_PLDW(DisasContext *s, arg_PLDW *a)
{
return arm_dc_feature(s, ARM_FEATURE_V7MP);
}
-static bool trans_PLI(DisasContext *s, arg_PLD *a)
+static bool trans_PLI(DisasContext *s, arg_PLI *a)
{
return ENABLE_ARCH_7;
}
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index dc66ff2190..3abdbedfe5 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -252,6 +252,11 @@ static inline int shl_12(DisasContext *s, int x)
return x << 12;
}
+static inline int xor_2(DisasContext *s, int x)
+{
+ return x ^ 2;
+}
+
static inline int neon_3same_fp_size(DisasContext *s, int x)
{
/* Convert 0==fp32, 1==fp16 into a MO_* value */
@@ -401,6 +406,36 @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex)
*/
uint64_t vfp_expand_imm(int size, uint8_t imm8);
+static inline void gen_vfp_absh(TCGv_i32 d, TCGv_i32 s)
+{
+ tcg_gen_andi_i32(d, s, INT16_MAX);
+}
+
+static inline void gen_vfp_abss(TCGv_i32 d, TCGv_i32 s)
+{
+ tcg_gen_andi_i32(d, s, INT32_MAX);
+}
+
+static inline void gen_vfp_absd(TCGv_i64 d, TCGv_i64 s)
+{
+ tcg_gen_andi_i64(d, s, INT64_MAX);
+}
+
+static inline void gen_vfp_negh(TCGv_i32 d, TCGv_i32 s)
+{
+ tcg_gen_xori_i32(d, s, 1u << 15);
+}
+
+static inline void gen_vfp_negs(TCGv_i32 d, TCGv_i32 s)
+{
+ tcg_gen_xori_i32(d, s, 1u << 31);
+}
+
+static inline void gen_vfp_negd(TCGv_i64 d, TCGv_i64 s)
+{
+ tcg_gen_xori_i64(d, s, 1ull << 63);
+}
+
/* Vector operations shared between ARM and AArch64. */
void gen_gvec_ceq0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
uint32_t opr_sz, uint32_t max_sz);
@@ -445,6 +480,11 @@ void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh);
+void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh);
+void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh);
+void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh);
+
void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
@@ -474,6 +514,17 @@ void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
/*
* Forward to the isar_feature_* tests given a DisasContext pointer.
*/
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 1f93510b85..56fea14edb 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -971,6 +971,11 @@ static uint32_t float32_ceq(float32 op1, float32 op2, float_status *stat)
return -float32_eq_quiet(op1, op2, stat);
}
+static uint64_t float64_ceq(float64 op1, float64 op2, float_status *stat)
+{
+ return -float64_eq_quiet(op1, op2, stat);
+}
+
static uint16_t float16_cge(float16 op1, float16 op2, float_status *stat)
{
return -float16_le(op2, op1, stat);
@@ -981,6 +986,11 @@ static uint32_t float32_cge(float32 op1, float32 op2, float_status *stat)
return -float32_le(op2, op1, stat);
}
+static uint64_t float64_cge(float64 op1, float64 op2, float_status *stat)
+{
+ return -float64_le(op2, op1, stat);
+}
+
static uint16_t float16_cgt(float16 op1, float16 op2, float_status *stat)
{
return -float16_lt(op2, op1, stat);
@@ -991,6 +1001,11 @@ static uint32_t float32_cgt(float32 op1, float32 op2, float_status *stat)
return -float32_lt(op2, op1, stat);
}
+static uint64_t float64_cgt(float64 op1, float64 op2, float_status *stat)
+{
+ return -float64_lt(op2, op1, stat);
+}
+
static uint16_t float16_acge(float16 op1, float16 op2, float_status *stat)
{
return -float16_le(float16_abs(op2), float16_abs(op1), stat);
@@ -1001,6 +1016,11 @@ static uint32_t float32_acge(float32 op1, float32 op2, float_status *stat)
return -float32_le(float32_abs(op2), float32_abs(op1), stat);
}
+static uint64_t float64_acge(float64 op1, float64 op2, float_status *stat)
+{
+ return -float64_le(float64_abs(op2), float64_abs(op1), stat);
+}
+
static uint16_t float16_acgt(float16 op1, float16 op2, float_status *stat)
{
return -float16_lt(float16_abs(op2), float16_abs(op1), stat);
@@ -1011,6 +1031,11 @@ static uint32_t float32_acgt(float32 op1, float32 op2, float_status *stat)
return -float32_lt(float32_abs(op2), float32_abs(op1), stat);
}
+static uint64_t float64_acgt(float64 op1, float64 op2, float_status *stat)
+{
+ return -float64_lt(float64_abs(op2), float64_abs(op1), stat);
+}
+
static int16_t vfp_tosszh(float16 x, void *fpstp)
{
float_status *fpst = fpstp;
@@ -1129,6 +1154,11 @@ static float32 float32_abd(float32 op1, float32 op2, float_status *stat)
return float32_abs(float32_sub(op1, op2, stat));
}
+static float64 float64_abd(float64 op1, float64 op2, float_status *stat)
+{
+ return float64_abs(float64_sub(op1, op2, stat));
+}
+
/*
* Reciprocal step. These are the AArch32 version which uses a
* non-fused multiply-and-subtract.
@@ -1213,33 +1243,43 @@ DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
DO_3OP(gvec_fabd_h, float16_abd, float16)
DO_3OP(gvec_fabd_s, float32_abd, float32)
+DO_3OP(gvec_fabd_d, float64_abd, float64)
DO_3OP(gvec_fceq_h, float16_ceq, float16)
DO_3OP(gvec_fceq_s, float32_ceq, float32)
+DO_3OP(gvec_fceq_d, float64_ceq, float64)
DO_3OP(gvec_fcge_h, float16_cge, float16)
DO_3OP(gvec_fcge_s, float32_cge, float32)
+DO_3OP(gvec_fcge_d, float64_cge, float64)
DO_3OP(gvec_fcgt_h, float16_cgt, float16)
DO_3OP(gvec_fcgt_s, float32_cgt, float32)
+DO_3OP(gvec_fcgt_d, float64_cgt, float64)
DO_3OP(gvec_facge_h, float16_acge, float16)
DO_3OP(gvec_facge_s, float32_acge, float32)
+DO_3OP(gvec_facge_d, float64_acge, float64)
DO_3OP(gvec_facgt_h, float16_acgt, float16)
DO_3OP(gvec_facgt_s, float32_acgt, float32)
+DO_3OP(gvec_facgt_d, float64_acgt, float64)
DO_3OP(gvec_fmax_h, float16_max, float16)
DO_3OP(gvec_fmax_s, float32_max, float32)
+DO_3OP(gvec_fmax_d, float64_max, float64)
DO_3OP(gvec_fmin_h, float16_min, float16)
DO_3OP(gvec_fmin_s, float32_min, float32)
+DO_3OP(gvec_fmin_d, float64_min, float64)
DO_3OP(gvec_fmaxnum_h, float16_maxnum, float16)
DO_3OP(gvec_fmaxnum_s, float32_maxnum, float32)
+DO_3OP(gvec_fmaxnum_d, float64_maxnum, float64)
DO_3OP(gvec_fminnum_h, float16_minnum, float16)
DO_3OP(gvec_fminnum_s, float32_minnum, float32)
+DO_3OP(gvec_fminnum_d, float64_minnum, float64)
DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16)
DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32)
@@ -1248,6 +1288,13 @@ DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16)
DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32)
#ifdef TARGET_AARCH64
+DO_3OP(gvec_fdiv_h, float16_div, float16)
+DO_3OP(gvec_fdiv_s, float32_div, float32)
+DO_3OP(gvec_fdiv_d, float64_div, float64)
+
+DO_3OP(gvec_fmulx_h, helper_advsimd_mulxh, float16)
+DO_3OP(gvec_fmulx_s, helper_vfp_mulxs, float32)
+DO_3OP(gvec_fmulx_d, helper_vfp_mulxd, float64)
DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
DO_3OP(gvec_recps_s, helper_recpsf_f32, float32)
@@ -1298,6 +1345,12 @@ static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2,
return float32_muladd(op1, op2, dest, 0, stat);
}
+static float64 float64_muladd_f(float64 dest, float64 op1, float64 op2,
+ float_status *stat)
+{
+ return float64_muladd(op1, op2, dest, 0, stat);
+}
+
static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2,
float_status *stat)
{
@@ -1310,6 +1363,12 @@ static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2,
return float32_muladd(float32_chs(op1), op2, dest, 0, stat);
}
+static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2,
+ float_status *stat)
+{
+ return float64_muladd(float64_chs(op1), op2, dest, 0, stat);
+}
+
#define DO_MULADD(NAME, FUNC, TYPE) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \
@@ -1329,9 +1388,11 @@ DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32)
DO_MULADD(gvec_vfma_h, float16_muladd_f, float16)
DO_MULADD(gvec_vfma_s, float32_muladd_f, float32)
+DO_MULADD(gvec_vfma_d, float64_muladd_f, float64)
DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
+DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64)
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
* For AdvSIMD, there is of course only one such vector segment.
@@ -1385,7 +1446,7 @@ DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, H8)
#undef DO_MLA_IDX
-#define DO_FMUL_IDX(NAME, ADD, TYPE, H) \
+#define DO_FMUL_IDX(NAME, ADD, MUL, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \
intptr_t i, j, oprsz = simd_oprsz(desc); \
@@ -1395,33 +1456,37 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
TYPE mm = m[H(i + idx)]; \
for (j = 0; j < segment; j++) { \
- d[i + j] = TYPE##_##ADD(d[i + j], \
- TYPE##_mul(n[i + j], mm, stat), stat); \
+ d[i + j] = ADD(d[i + j], MUL(n[i + j], mm, stat), stat); \
} \
} \
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
-#define float16_nop(N, M, S) (M)
-#define float32_nop(N, M, S) (M)
-#define float64_nop(N, M, S) (M)
+#define nop(N, M, S) (M)
+
+DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16_mul, float16, H2)
+DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32_mul, float32, H4)
+DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64_mul, float64, H8)
+
+#ifdef TARGET_AARCH64
+
+DO_FMUL_IDX(gvec_fmulx_idx_h, nop, helper_advsimd_mulxh, float16, H2)
+DO_FMUL_IDX(gvec_fmulx_idx_s, nop, helper_vfp_mulxs, float32, H4)
+DO_FMUL_IDX(gvec_fmulx_idx_d, nop, helper_vfp_mulxd, float64, H8)
+
+#endif
-DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2)
-DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4)
-DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, H8)
+#undef nop
/*
* Non-fused multiply-accumulate operations, for Neon. NB that unlike
* the fused ops below they assume accumulate both from and into Vd.
*/
-DO_FMUL_IDX(gvec_fmla_nf_idx_h, add, float16, H2)
-DO_FMUL_IDX(gvec_fmla_nf_idx_s, add, float32, H4)
-DO_FMUL_IDX(gvec_fmls_nf_idx_h, sub, float16, H2)
-DO_FMUL_IDX(gvec_fmls_nf_idx_s, sub, float32, H4)
-
-#undef float16_nop
-#undef float32_nop
-#undef float64_nop
+DO_FMUL_IDX(gvec_fmla_nf_idx_h, float16_add, float16_mul, float16, H2)
+DO_FMUL_IDX(gvec_fmla_nf_idx_s, float32_add, float32_mul, float32, H4)
+DO_FMUL_IDX(gvec_fmls_nf_idx_h, float16_sub, float16_mul, float16, H2)
+DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4)
+
#undef DO_FMUL_IDX
#define DO_FMLA_IDX(NAME, TYPE, H) \
@@ -2127,50 +2192,90 @@ DO_ABA(gvec_uaba_d, uint64_t)
#undef DO_ABA
-#define DO_NEON_PAIRWISE(NAME, OP) \
- void HELPER(NAME##s)(void *vd, void *vn, void *vm, \
- void *stat, uint32_t oprsz) \
- { \
- float_status *fpst = stat; \
- float32 *d = vd; \
- float32 *n = vn; \
- float32 *m = vm; \
- float32 r0, r1; \
- \
- /* Read all inputs before writing outputs in case vm == vd */ \
- r0 = float32_##OP(n[H4(0)], n[H4(1)], fpst); \
- r1 = float32_##OP(m[H4(0)], m[H4(1)], fpst); \
- \
- d[H4(0)] = r0; \
- d[H4(1)] = r1; \
- } \
- \
- void HELPER(NAME##h)(void *vd, void *vn, void *vm, \
- void *stat, uint32_t oprsz) \
- { \
- float_status *fpst = stat; \
- float16 *d = vd; \
- float16 *n = vn; \
- float16 *m = vm; \
- float16 r0, r1, r2, r3; \
- \
- /* Read all inputs before writing outputs in case vm == vd */ \
- r0 = float16_##OP(n[H2(0)], n[H2(1)], fpst); \
- r1 = float16_##OP(n[H2(2)], n[H2(3)], fpst); \
- r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \
- r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \
- \
- d[H2(0)] = r0; \
- d[H2(1)] = r1; \
- d[H2(2)] = r2; \
- d[H2(3)] = r3; \
- }
-
-DO_NEON_PAIRWISE(neon_padd, add)
-DO_NEON_PAIRWISE(neon_pmax, max)
-DO_NEON_PAIRWISE(neon_pmin, min)
-
-#undef DO_NEON_PAIRWISE
+#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{ \
+ ARMVectorReg scratch; \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t half = oprsz / sizeof(TYPE) / 2; \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ if (unlikely(d == m)) { \
+ m = memcpy(&scratch, m, oprsz); \
+ } \
+ for (intptr_t i = 0; i < half; ++i) { \
+ d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)], stat); \
+ } \
+ for (intptr_t i = 0; i < half; ++i) { \
+ d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)], stat); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_3OP_PAIR(gvec_faddp_h, float16_add, float16, H2)
+DO_3OP_PAIR(gvec_faddp_s, float32_add, float32, H4)
+DO_3OP_PAIR(gvec_faddp_d, float64_add, float64, )
+
+DO_3OP_PAIR(gvec_fmaxp_h, float16_max, float16, H2)
+DO_3OP_PAIR(gvec_fmaxp_s, float32_max, float32, H4)
+DO_3OP_PAIR(gvec_fmaxp_d, float64_max, float64, )
+
+DO_3OP_PAIR(gvec_fminp_h, float16_min, float16, H2)
+DO_3OP_PAIR(gvec_fminp_s, float32_min, float32, H4)
+DO_3OP_PAIR(gvec_fminp_d, float64_min, float64, )
+
+DO_3OP_PAIR(gvec_fmaxnump_h, float16_maxnum, float16, H2)
+DO_3OP_PAIR(gvec_fmaxnump_s, float32_maxnum, float32, H4)
+DO_3OP_PAIR(gvec_fmaxnump_d, float64_maxnum, float64, )
+
+DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2)
+DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4)
+DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, )
+
+#undef DO_3OP_PAIR
+
+#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ ARMVectorReg scratch; \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t half = oprsz / sizeof(TYPE) / 2; \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ if (unlikely(d == m)) { \
+ m = memcpy(&scratch, m, oprsz); \
+ } \
+ for (intptr_t i = 0; i < half; ++i) { \
+ d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)]); \
+ } \
+ for (intptr_t i = 0; i < half; ++i) { \
+ d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)]); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+#define ADD(A, B) (A + B)
+DO_3OP_PAIR(gvec_addp_b, ADD, uint8_t, H1)
+DO_3OP_PAIR(gvec_addp_h, ADD, uint16_t, H2)
+DO_3OP_PAIR(gvec_addp_s, ADD, uint32_t, H4)
+DO_3OP_PAIR(gvec_addp_d, ADD, uint64_t, )
+#undef ADD
+
+DO_3OP_PAIR(gvec_smaxp_b, MAX, int8_t, H1)
+DO_3OP_PAIR(gvec_smaxp_h, MAX, int16_t, H2)
+DO_3OP_PAIR(gvec_smaxp_s, MAX, int32_t, H4)
+
+DO_3OP_PAIR(gvec_umaxp_b, MAX, uint8_t, H1)
+DO_3OP_PAIR(gvec_umaxp_h, MAX, uint16_t, H2)
+DO_3OP_PAIR(gvec_umaxp_s, MAX, uint32_t, H4)
+
+DO_3OP_PAIR(gvec_sminp_b, MIN, int8_t, H1)
+DO_3OP_PAIR(gvec_sminp_h, MIN, int16_t, H2)
+DO_3OP_PAIR(gvec_sminp_s, MIN, int32_t, H4)
+
+DO_3OP_PAIR(gvec_uminp_b, MIN, uint8_t, H1)
+DO_3OP_PAIR(gvec_uminp_h, MIN, uint16_t, H2)
+DO_3OP_PAIR(gvec_uminp_s, MIN, uint32_t, H4)
+
+#undef DO_3OP_PAIR
#define DO_VCVT_FIXED(NAME, FUNC, TYPE) \
void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 3e5e37abbe..ce26b8a71a 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -281,36 +281,6 @@ VFP_BINOP(minnum)
VFP_BINOP(maxnum)
#undef VFP_BINOP
-dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
-{
- return float16_chs(a);
-}
-
-float32 VFP_HELPER(neg, s)(float32 a)
-{
- return float32_chs(a);
-}
-
-float64 VFP_HELPER(neg, d)(float64 a)
-{
- return float64_chs(a);
-}
-
-dh_ctype_f16 VFP_HELPER(abs, h)(dh_ctype_f16 a)
-{
- return float16_abs(a);
-}
-
-float32 VFP_HELPER(abs, s)(float32 a)
-{
- return float32_abs(a);
-}
-
-float64 VFP_HELPER(abs, d)(float64 a)
-{
- return float64_abs(a);
-}
-
dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env)
{
return float16_sqrt(a, &env->vfp.fp_status_f16);