diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2024-05-28 10:06:53 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2024-05-28 10:06:53 -0700 |
commit | f8e5c833f9180a49786604e426dd3d87a22652ea (patch) | |
tree | cdd1fdb238c97aa93a19f5b2bc40e54951ef8470 | |
parent | ad10b4badc1dd5b28305f9b9f1168cf0aa3ae946 (diff) | |
parent | f240df3c31b40e4cf1af1f156a88efc1a1df406c (diff) |
Merge tag 'pull-target-arm-20240528' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue:
* xlnx_dpdma: fix descriptor endianness bug
* hvf: arm: Fix encodings for ID_AA64PFR1_EL1 and debug System registers
* hw/arm/npcm7xx: remove setting of mp-affinity
* hw/char: Correct STM32L4x5 usart register CR2 field ADD_0 size
* hw/intc/arm_gic: Fix handling of NS view of GICC_APR<n>
* hw/input/tsc2005: Fix -Wchar-subscripts warning in tsc2005_txrx()
* hw: arm: Remove use of tabs in some source files
* docs/system: Remove ADC from raspi documentation
* target/arm: Start of the conversion of A64 SIMD to decodetree
# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmZV5HsZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3j+CD/9V5kC3DJtovMiolr1z8YYI
# eRj0I/pKacgIzz9kVwzo+UVVgzXAi80VFO7xbe+aucCKs0c2s3wrUnUWkAaGHUYR
# DKhRIp017HKW8esgDVQsItn2030PLQLlhxpLvhSfN7NR2jHiJdE914Kb3h6XIEVE
# CqMRaYt9Vrh5o0e51VSzzccFK+kyYV1MDvNyx1/8F4KJpsMFeK0iy9WYrXx2UxlT
# dlrJZdrShIkOWiQB+bi6zQzjMveNmDicjBCgnC7TO2ayOl0AD24sNg/Z49w+4Hjb
# azUDYR45uuyQD5HJLyBsk5BcYhfyZttn2U5uTvNQ6SEfMuKUFEfdoSebTHngEb6t
# ObOdJW6+GmyaIaaJS99ea8u8jbe1r5zhQGJEBeEWOyGYTKUJ6Q0J+g6dZUdgniOp
# bvORX4qnIlMLMGGYT34410Wf0lsE88BHspcVX0WLGFLMZcEYsHhdgG6/f0p8D3nD
# m3R5+/BxUHK7A6OVe/1YU6jTqnfPBY6CGKSqEvXbJGlPp7LAjIxuUHBRxRnXU+Ad
# ohBwOIEEDNhGnEiiHFFK+wrc8BncXY4eSiJBCLlRaf1AcxCT6ibWXuUlpnWeAwNk
# E3Kmvq9BCdQZpIj7EsyvngTc5PsQrqK0FNIVuSVi38QQnqS/0oykvsPzgSlD6blP
# zcFIgG7aUiPOkdTxcPTYnA==
# =TjtM
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 28 May 2024 07:04:43 AM PDT
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full]
# gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown]
* tag 'pull-target-arm-20240528' of https://git.linaro.org/people/pmaydell/qemu-arm: (42 commits)
target/arm: Convert disas_simd_3same_logic to decodetree
target/arm: Convert FMLAL, FMLSL to decodetree
target/arm: Use gvec for neon pmax, pmin
target/arm: Convert SMAXP, SMINP, UMAXP, UMINP to decodetree
target/arm: Use gvec for neon padd
target/arm: Convert ADDP to decodetree
target/arm: Use gvec for neon faddp, fmaxp, fminp
target/arm: Convert FMAXP, FMINP, FMAXNMP, FMINNMP to decodetree
target/arm: Convert FADDP to decodetree
target/arm: Convert FRECPS, FRSQRTS to decodetree
target/arm: Convert FABD to decodetree
target/arm: Convert FCMEQ, FCMGE, FCMGT, FACGE, FACGT to decodetree
target/arm: Convert FMLA, FMLS to decodetree
target/arm: Convert FNMUL to decodetree
target/arm: Expand vfp neg and abs inline
target/arm: Introduce vfp_load_reg16
target/arm: Convert FMAX, FMIN, FMAXNM, FMINNM to decodetree
target/arm: Convert FADD, FSUB, FDIV, FMUL to decodetree
target/arm: Convert FMULX to decodetree
target/arm: Convert Advanced SIMD copy to decodetree
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | docs/system/arm/raspi.rst | 1 | ||||
-rw-r--r-- | hw/arm/boot.c | 8 | ||||
-rw-r--r-- | hw/arm/npcm7xx.c | 3 | ||||
-rw-r--r-- | hw/char/omap_uart.c | 49 | ||||
-rw-r--r-- | hw/char/stm32l4x5_usart.c | 2 | ||||
-rw-r--r-- | hw/dma/xlnx_dpdma.c | 68 | ||||
-rw-r--r-- | hw/gpio/zaurus.c | 61 | ||||
-rw-r--r-- | hw/input/tsc2005.c | 135 | ||||
-rw-r--r-- | hw/intc/arm_gic.c | 4 | ||||
-rw-r--r-- | target/arm/helper.h | 68 | ||||
-rw-r--r-- | target/arm/hvf/hvf.c | 160 | ||||
-rw-r--r-- | target/arm/tcg/a64.decode | 317 | ||||
-rw-r--r-- | target/arm/tcg/gengvec.c | 1672 | ||||
-rw-r--r-- | target/arm/tcg/gengvec64.c | 190 | ||||
-rw-r--r-- | target/arm/tcg/helper-a64.h | 12 | ||||
-rw-r--r-- | target/arm/tcg/meson.build | 2 | ||||
-rw-r--r-- | target/arm/tcg/neon_helper.c | 5 | ||||
-rw-r--r-- | target/arm/tcg/t32.decode | 25 | ||||
-rw-r--r-- | target/arm/tcg/translate-a64.c | 3131 | ||||
-rw-r--r-- | target/arm/tcg/translate-a64.h | 4 | ||||
-rw-r--r-- | target/arm/tcg/translate-neon.c | 136 | ||||
-rw-r--r-- | target/arm/tcg/translate-sve.c | 145 | ||||
-rw-r--r-- | target/arm/tcg/translate-vfp.c | 93 | ||||
-rw-r--r-- | target/arm/tcg/translate.c | 1592 | ||||
-rw-r--r-- | target/arm/tcg/translate.h | 51 | ||||
-rw-r--r-- | target/arm/tcg/vec_helper.c | 227 | ||||
-rw-r--r-- | target/arm/vfp_helper.c | 30 |
27 files changed, 3877 insertions, 4314 deletions
diff --git a/docs/system/arm/raspi.rst b/docs/system/arm/raspi.rst index fbec1da6a1..44eec3f1c3 100644 --- a/docs/system/arm/raspi.rst +++ b/docs/system/arm/raspi.rst @@ -40,7 +40,6 @@ Implemented devices Missing devices --------------- - * Analog to Digital Converter (ADC) * Pulse Width Modulation (PWM) * PCIE Root Port (raspi4b) * GENET Ethernet Controller (raspi4b) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 84ea6a807a..d480a7da02 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -347,13 +347,13 @@ static void set_kernel_args_old(const struct arm_boot_info *info, WRITE_WORD(p, info->ram_size / 4096); /* ramdisk_size */ WRITE_WORD(p, 0); -#define FLAG_READONLY 1 -#define FLAG_RDLOAD 4 -#define FLAG_RDPROMPT 8 +#define FLAG_READONLY 1 +#define FLAG_RDLOAD 4 +#define FLAG_RDPROMPT 8 /* flags */ WRITE_WORD(p, FLAG_READONLY | FLAG_RDLOAD | FLAG_RDPROMPT); /* rootdev */ - WRITE_WORD(p, (31 << 8) | 0); /* /dev/mtdblock0 */ + WRITE_WORD(p, (31 << 8) | 0); /* /dev/mtdblock0 */ /* video_num_cols */ WRITE_WORD(p, 0); /* video_num_rows */ diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c index 9f2d96c733..cb7791301b 100644 --- a/hw/arm/npcm7xx.c +++ b/hw/arm/npcm7xx.c @@ -487,9 +487,6 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp) /* CPUs */ for (i = 0; i < nc->num_cpus; i++) { - object_property_set_int(OBJECT(&s->cpu[i]), "mp-affinity", - arm_build_mp_affinity(i, NPCM7XX_MAX_NUM_CPUS), - &error_abort); object_property_set_int(OBJECT(&s->cpu[i]), "reset-cbar", NPCM7XX_GIC_CPU_IF_ADDR, &error_abort); object_property_set_bool(OBJECT(&s->cpu[i]), "reset-hivecs", true, diff --git a/hw/char/omap_uart.c b/hw/char/omap_uart.c index 6848bddb4e..c2ef4c137e 100644 --- a/hw/char/omap_uart.c +++ b/hw/char/omap_uart.c @@ -61,7 +61,7 @@ struct omap_uart_s *omap_uart_init(hwaddr base, s->fclk = fclk; s->irq = irq; s->serial = serial_mm_init(get_system_memory(), base, 2, irq, - omap_clk_getrate(fclk)/16, + omap_clk_getrate(fclk) / 16, chr ?: qemu_chr_new(label, "null", NULL), DEVICE_NATIVE_ENDIAN); return s; @@ -76,27 +76,27 @@ static uint64_t omap_uart_read(void *opaque, hwaddr addr, unsigned size) } switch (addr) { - case 0x20: /* MDR1 */ + case 0x20: /* MDR1 */ return s->mdr[0]; - case 0x24: /* MDR2 */ + case 0x24: /* MDR2 */ return s->mdr[1]; - case 0x40: /* SCR */ + case 0x40: /* SCR */ return s->scr; - case 0x44: /* SSR */ + case 0x44: /* SSR */ return 0x0; - case 0x48: /* EBLR (OMAP2) */ + case 0x48: /* EBLR (OMAP2) */ return s->eblr; - case 0x4C: /* OSC_12M_SEL (OMAP1) */ + case 0x4C: /* OSC_12M_SEL (OMAP1) */ return s->clksel; - case 0x50: /* MVR */ + case 0x50: /* MVR */ return 0x30; - case 0x54: /* SYSC (OMAP2) */ + case 0x54: /* SYSC (OMAP2) */ return s->syscontrol; - case 0x58: /* SYSS (OMAP2) */ + case 0x58: /* SYSS (OMAP2) */ return 1; - case 0x5c: /* WER (OMAP2) */ + case 0x5c: /* WER (OMAP2) */ return s->wkup; - case 0x60: /* CFPS (OMAP2) */ + case 0x60: /* CFPS (OMAP2) */ return s->cfps; } @@ -115,35 +115,36 @@ static void omap_uart_write(void *opaque, hwaddr addr, } switch (addr) { - case 0x20: /* MDR1 */ + case 0x20: /* MDR1 */ s->mdr[0] = value & 0x7f; break; - case 0x24: /* MDR2 */ + case 0x24: /* MDR2 */ s->mdr[1] = value & 0xff; break; - case 0x40: /* SCR */ + case 0x40: /* SCR */ s->scr = value & 0xff; break; - case 0x48: /* EBLR (OMAP2) */ + case 0x48: /* EBLR (OMAP2) */ s->eblr = value & 0xff; break; - case 0x4C: /* OSC_12M_SEL (OMAP1) */ + case 0x4C: /* OSC_12M_SEL (OMAP1) */ s->clksel = value & 1; break; - case 0x44: /* SSR */ - case 0x50: /* MVR */ - case 0x58: /* SYSS (OMAP2) */ + case 0x44: /* SSR */ + case 0x50: /* MVR */ + case 0x58: /* SYSS (OMAP2) */ OMAP_RO_REG(addr); break; - case 0x54: /* SYSC (OMAP2) */ + case 0x54: /* SYSC (OMAP2) */ s->syscontrol = value & 0x1d; - if (value & 2) + if (value & 2) { omap_uart_reset(s); + } break; - case 0x5c: /* WER (OMAP2) */ + case 0x5c: /* WER (OMAP2) */ s->wkup = value & 0x7f; break; - case 0x60: /* CFPS (OMAP2) */ + case 0x60: /* CFPS (OMAP2) */ s->cfps = value & 0xff; break; default: diff --git a/hw/char/stm32l4x5_usart.c b/hw/char/stm32l4x5_usart.c index 02f666308c..fc5dcac0c4 100644 --- a/hw/char/stm32l4x5_usart.c +++ b/hw/char/stm32l4x5_usart.c @@ -56,7 +56,7 @@ REG32(CR1, 0x00) FIELD(CR1, UE, 0, 1) /* USART enable */ REG32(CR2, 0x04) FIELD(CR2, ADD_1, 28, 4) /* ADD[7:4] */ - FIELD(CR2, ADD_0, 24, 1) /* ADD[3:0] */ + FIELD(CR2, ADD_0, 24, 4) /* ADD[3:0] */ FIELD(CR2, RTOEN, 23, 1) /* Receiver timeout enable */ FIELD(CR2, ABRMOD, 21, 2) /* Auto baud rate mode */ FIELD(CR2, ABREN, 20, 1) /* Auto baud rate enable */ diff --git a/hw/dma/xlnx_dpdma.c b/hw/dma/xlnx_dpdma.c index 530717d188..dde4aeca40 100644 --- a/hw/dma/xlnx_dpdma.c +++ b/hw/dma/xlnx_dpdma.c @@ -614,6 +614,65 @@ static void xlnx_dpdma_register_types(void) type_register_static(&xlnx_dpdma_info); } +static MemTxResult xlnx_dpdma_read_descriptor(XlnxDPDMAState *s, + uint64_t desc_addr, + DPDMADescriptor *desc) +{ + MemTxResult res = dma_memory_read(&address_space_memory, desc_addr, + &desc, sizeof(DPDMADescriptor), + MEMTXATTRS_UNSPECIFIED); + if (res) { + return res; + } + + /* Convert from LE into host endianness. */ + desc->control = le32_to_cpu(desc->control); + desc->descriptor_id = le32_to_cpu(desc->descriptor_id); + desc->xfer_size = le32_to_cpu(desc->xfer_size); + desc->line_size_stride = le32_to_cpu(desc->line_size_stride); + desc->timestamp_lsb = le32_to_cpu(desc->timestamp_lsb); + desc->timestamp_msb = le32_to_cpu(desc->timestamp_msb); + desc->address_extension = le32_to_cpu(desc->address_extension); + desc->next_descriptor = le32_to_cpu(desc->next_descriptor); + desc->source_address = le32_to_cpu(desc->source_address); + desc->address_extension_23 = le32_to_cpu(desc->address_extension_23); + desc->address_extension_45 = le32_to_cpu(desc->address_extension_45); + desc->source_address2 = le32_to_cpu(desc->source_address2); + desc->source_address3 = le32_to_cpu(desc->source_address3); + desc->source_address4 = le32_to_cpu(desc->source_address4); + desc->source_address5 = le32_to_cpu(desc->source_address5); + desc->crc = le32_to_cpu(desc->crc); + + return res; +} + +static MemTxResult xlnx_dpdma_write_descriptor(uint64_t desc_addr, + DPDMADescriptor *desc) +{ + DPDMADescriptor tmp_desc = *desc; + + /* Convert from host endianness into LE. */ + tmp_desc.control = cpu_to_le32(tmp_desc.control); + tmp_desc.descriptor_id = cpu_to_le32(tmp_desc.descriptor_id); + tmp_desc.xfer_size = cpu_to_le32(tmp_desc.xfer_size); + tmp_desc.line_size_stride = cpu_to_le32(tmp_desc.line_size_stride); + tmp_desc.timestamp_lsb = cpu_to_le32(tmp_desc.timestamp_lsb); + tmp_desc.timestamp_msb = cpu_to_le32(tmp_desc.timestamp_msb); + tmp_desc.address_extension = cpu_to_le32(tmp_desc.address_extension); + tmp_desc.next_descriptor = cpu_to_le32(tmp_desc.next_descriptor); + tmp_desc.source_address = cpu_to_le32(tmp_desc.source_address); + tmp_desc.address_extension_23 = cpu_to_le32(tmp_desc.address_extension_23); + tmp_desc.address_extension_45 = cpu_to_le32(tmp_desc.address_extension_45); + tmp_desc.source_address2 = cpu_to_le32(tmp_desc.source_address2); + tmp_desc.source_address3 = cpu_to_le32(tmp_desc.source_address3); + tmp_desc.source_address4 = cpu_to_le32(tmp_desc.source_address4); + tmp_desc.source_address5 = cpu_to_le32(tmp_desc.source_address5); + tmp_desc.crc = cpu_to_le32(tmp_desc.crc); + + return dma_memory_write(&address_space_memory, desc_addr, &tmp_desc, + sizeof(DPDMADescriptor), MEMTXATTRS_UNSPECIFIED); +} + size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel, bool one_desc) { @@ -651,8 +710,7 @@ size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel, desc_addr = xlnx_dpdma_descriptor_next_address(s, channel); } - if (dma_memory_read(&address_space_memory, desc_addr, &desc, - sizeof(DPDMADescriptor), MEMTXATTRS_UNSPECIFIED)) { + if (xlnx_dpdma_read_descriptor(s, desc_addr, &desc)) { s->registers[DPDMA_EISR] |= ((1 << 1) << channel); xlnx_dpdma_update_irq(s); s->operation_finished[channel] = true; @@ -755,8 +813,10 @@ size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel, /* The descriptor need to be updated when it's completed. */ DPRINTF("update the descriptor with the done flag set.\n"); xlnx_dpdma_desc_set_done(&desc); - dma_memory_write(&address_space_memory, desc_addr, &desc, - sizeof(DPDMADescriptor), MEMTXATTRS_UNSPECIFIED); + if (xlnx_dpdma_write_descriptor(desc_addr, &desc)) { + DPRINTF("Can't write the descriptor.\n"); + /* TODO: check hardware behaviour for memory write failure */ + } } if (xlnx_dpdma_desc_completion_interrupt(&desc)) { diff --git a/hw/gpio/zaurus.c b/hw/gpio/zaurus.c index 5884804c58..7342440b95 100644 --- a/hw/gpio/zaurus.c +++ b/hw/gpio/zaurus.c @@ -49,19 +49,20 @@ struct ScoopInfo { uint16_t isr; }; -#define SCOOP_MCR 0x00 -#define SCOOP_CDR 0x04 -#define SCOOP_CSR 0x08 -#define SCOOP_CPR 0x0c -#define SCOOP_CCR 0x10 -#define SCOOP_IRR_IRM 0x14 -#define SCOOP_IMR 0x18 -#define SCOOP_ISR 0x1c -#define SCOOP_GPCR 0x20 -#define SCOOP_GPWR 0x24 -#define SCOOP_GPRR 0x28 - -static inline void scoop_gpio_handler_update(ScoopInfo *s) { +#define SCOOP_MCR 0x00 +#define SCOOP_CDR 0x04 +#define SCOOP_CSR 0x08 +#define SCOOP_CPR 0x0c +#define SCOOP_CCR 0x10 +#define SCOOP_IRR_IRM 0x14 +#define SCOOP_IMR 0x18 +#define SCOOP_ISR 0x1c +#define SCOOP_GPCR 0x20 +#define SCOOP_GPWR 0x24 +#define SCOOP_GPRR 0x28 + +static inline void scoop_gpio_handler_update(ScoopInfo *s) +{ uint32_t level, diff; int bit; level = s->gpio_level & s->gpio_dir; @@ -125,8 +126,9 @@ static void scoop_write(void *opaque, hwaddr addr, break; case SCOOP_CPR: s->power = value; - if (value & 0x80) + if (value & 0x80) { s->power |= 0x8040; + } break; case SCOOP_CCR: s->ccr = value; @@ -145,7 +147,7 @@ static void scoop_write(void *opaque, hwaddr addr, scoop_gpio_handler_update(s); break; case SCOOP_GPWR: - case SCOOP_GPRR: /* GPRR is probably R/O in real HW */ + case SCOOP_GPRR: /* GPRR is probably R/O in real HW */ s->gpio_level = value & s->gpio_dir; scoop_gpio_handler_update(s); break; @@ -166,10 +168,11 @@ static void scoop_gpio_set(void *opaque, int line, int level) { ScoopInfo *s = (ScoopInfo *) opaque; - if (level) + if (level) { s->gpio_level |= (1 << line); - else + } else { s->gpio_level &= ~(1 << line); + } } static void scoop_init(Object *obj) @@ -203,7 +206,7 @@ static int scoop_post_load(void *opaque, int version_id) return 0; } -static bool is_version_0 (void *opaque, int version_id) +static bool is_version_0(void *opaque, int version_id) { return version_id == 0; } @@ -265,7 +268,7 @@ type_init(scoop_register_types) /* Write the bootloader parameters memory area. */ -#define MAGIC_CHG(a, b, c, d) ((d << 24) | (c << 16) | (b << 8) | a) +#define MAGIC_CHG(a, b, c, d) ((d << 24) | (c << 16) | (b << 8) | a) static struct QEMU_PACKED sl_param_info { uint32_t comadj_keyword; @@ -286,16 +289,16 @@ static struct QEMU_PACKED sl_param_info { uint32_t phad_keyword; int32_t phadadj; } zaurus_bootparam = { - .comadj_keyword = MAGIC_CHG('C', 'M', 'A', 'D'), - .comadj = 125, - .uuid_keyword = MAGIC_CHG('U', 'U', 'I', 'D'), - .uuid = { -1 }, - .touch_keyword = MAGIC_CHG('T', 'U', 'C', 'H'), - .touch_xp = -1, - .adadj_keyword = MAGIC_CHG('B', 'V', 'A', 'D'), - .adadj = -1, - .phad_keyword = MAGIC_CHG('P', 'H', 'A', 'D'), - .phadadj = 0x01, + .comadj_keyword = MAGIC_CHG('C', 'M', 'A', 'D'), + .comadj = 125, + .uuid_keyword = MAGIC_CHG('U', 'U', 'I', 'D'), + .uuid = { -1 }, + .touch_keyword = MAGIC_CHG('T', 'U', 'C', 'H'), + .touch_xp = -1, + .adadj_keyword = MAGIC_CHG('B', 'V', 'A', 'D'), + .adadj = -1, + .phad_keyword = MAGIC_CHG('P', 'H', 'A', 'D'), + .phadadj = 0x01, }; void sl_bootparam_write(hwaddr ptr) diff --git a/hw/input/tsc2005.c b/hw/input/tsc2005.c index 941f163d36..54a15d2441 100644 --- a/hw/input/tsc2005.c +++ b/hw/input/tsc2005.c @@ -28,10 +28,10 @@ #include "migration/vmstate.h" #include "trace.h" -#define TSC_CUT_RESOLUTION(value, p) ((value) >> (16 - (p ? 12 : 10))) +#define TSC_CUT_RESOLUTION(value, p) ((value) >> (16 - (p ? 12 : 10))) typedef struct { - qemu_irq pint; /* Combination of the nPENIRQ and DAV signals */ + qemu_irq pint; /* Combination of the nPENIRQ and DAV signals */ QEMUTimer *timer; uint16_t model; @@ -63,7 +63,7 @@ typedef struct { } TSC2005State; enum { - TSC_MODE_XYZ_SCAN = 0x0, + TSC_MODE_XYZ_SCAN = 0x0, TSC_MODE_XY_SCAN, TSC_MODE_X, TSC_MODE_Y, @@ -82,100 +82,100 @@ enum { }; static const uint16_t mode_regs[16] = { - 0xf000, /* X, Y, Z scan */ - 0xc000, /* X, Y scan */ - 0x8000, /* X */ - 0x4000, /* Y */ - 0x3000, /* Z */ - 0x0800, /* AUX */ - 0x0400, /* TEMP1 */ - 0x0200, /* TEMP2 */ - 0x0800, /* AUX scan */ - 0x0040, /* X test */ - 0x0020, /* Y test */ - 0x0080, /* Short-circuit test */ - 0x0000, /* Reserved */ - 0x0000, /* X+, X- drivers */ - 0x0000, /* Y+, Y- drivers */ - 0x0000, /* Y+, X- drivers */ + 0xf000, /* X, Y, Z scan */ + 0xc000, /* X, Y scan */ + 0x8000, /* X */ + 0x4000, /* Y */ + 0x3000, /* Z */ + 0x0800, /* AUX */ + 0x0400, /* TEMP1 */ + 0x0200, /* TEMP2 */ + 0x0800, /* AUX scan */ + 0x0040, /* X test */ + 0x0020, /* Y test */ + 0x0080, /* Short-circuit test */ + 0x0000, /* Reserved */ + 0x0000, /* X+, X- drivers */ + 0x0000, /* Y+, Y- drivers */ + 0x0000, /* Y+, X- drivers */ }; -#define X_TRANSFORM(s) \ +#define X_TRANSFORM(s) \ ((s->y * s->tr[0] - s->x * s->tr[1]) / s->tr[2] + s->tr[3]) -#define Y_TRANSFORM(s) \ +#define Y_TRANSFORM(s) \ ((s->y * s->tr[4] - s->x * s->tr[5]) / s->tr[6] + s->tr[7]) -#define Z1_TRANSFORM(s) \ +#define Z1_TRANSFORM(s) \ ((400 - ((s)->x >> 7) + ((s)->pressure << 10)) << 4) -#define Z2_TRANSFORM(s) \ +#define Z2_TRANSFORM(s) \ ((4000 + ((s)->y >> 7) - ((s)->pressure << 10)) << 4) -#define AUX_VAL (700 << 4) /* +/- 3 at 12-bit */ -#define TEMP1_VAL (1264 << 4) /* +/- 5 at 12-bit */ -#define TEMP2_VAL (1531 << 4) /* +/- 5 at 12-bit */ +#define AUX_VAL (700 << 4) /* +/- 3 at 12-bit */ +#define TEMP1_VAL (1264 << 4) /* +/- 5 at 12-bit */ +#define TEMP2_VAL (1531 << 4) /* +/- 5 at 12-bit */ static uint16_t tsc2005_read(TSC2005State *s, int reg) { uint16_t ret; switch (reg) { - case 0x0: /* X */ + case 0x0: /* X */ s->dav &= ~mode_regs[TSC_MODE_X]; return TSC_CUT_RESOLUTION(X_TRANSFORM(s), s->precision) + (s->noise & 3); - case 0x1: /* Y */ + case 0x1: /* Y */ s->dav &= ~mode_regs[TSC_MODE_Y]; - s->noise ++; + s->noise++; return TSC_CUT_RESOLUTION(Y_TRANSFORM(s), s->precision) ^ (s->noise & 3); - case 0x2: /* Z1 */ + case 0x2: /* Z1 */ s->dav &= 0xdfff; return TSC_CUT_RESOLUTION(Z1_TRANSFORM(s), s->precision) - (s->noise & 3); - case 0x3: /* Z2 */ + case 0x3: /* Z2 */ s->dav &= 0xefff; return TSC_CUT_RESOLUTION(Z2_TRANSFORM(s), s->precision) | (s->noise & 3); - case 0x4: /* AUX */ + case 0x4: /* AUX */ s->dav &= ~mode_regs[TSC_MODE_AUX]; return TSC_CUT_RESOLUTION(AUX_VAL, s->precision); - case 0x5: /* TEMP1 */ + case 0x5: /* TEMP1 */ s->dav &= ~mode_regs[TSC_MODE_TEMP1]; return TSC_CUT_RESOLUTION(TEMP1_VAL, s->precision) - (s->noise & 5); - case 0x6: /* TEMP2 */ + case 0x6: /* TEMP2 */ s->dav &= 0xdfff; s->dav &= ~mode_regs[TSC_MODE_TEMP2]; return TSC_CUT_RESOLUTION(TEMP2_VAL, s->precision) ^ (s->noise & 3); - case 0x7: /* Status */ + case 0x7: /* Status */ ret = s->dav | (s->reset << 7) | (s->pdst << 2) | 0x0; s->dav &= ~(mode_regs[TSC_MODE_X_TEST] | mode_regs[TSC_MODE_Y_TEST] | mode_regs[TSC_MODE_TS_TEST]); s->reset = true; return ret; - case 0x8: /* AUX high threshold */ + case 0x8: /* AUX high threshold */ return s->aux_thr[1]; - case 0x9: /* AUX low threshold */ + case 0x9: /* AUX low threshold */ return s->aux_thr[0]; - case 0xa: /* TEMP high threshold */ + case 0xa: /* TEMP high threshold */ return s->temp_thr[1]; - case 0xb: /* TEMP low threshold */ + case 0xb: /* TEMP low threshold */ return s->temp_thr[0]; - case 0xc: /* CFR0 */ + case 0xc: /* CFR0 */ return (s->pressure << 15) | ((!s->busy) << 14) | - (s->nextprecision << 13) | s->timing[0]; - case 0xd: /* CFR1 */ + (s->nextprecision << 13) | s->timing[0]; + case 0xd: /* CFR1 */ return s->timing[1]; - case 0xe: /* CFR2 */ + case 0xe: /* CFR2 */ return (s->pin_func << 14) | s->filter; - case 0xf: /* Function select status */ + case 0xf: /* Function select status */ return s->function >= 0 ? 1 << s->function : 0; } @@ -200,13 +200,14 @@ static void tsc2005_write(TSC2005State *s, int reg, uint16_t data) s->temp_thr[0] = data; break; - case 0xc: /* CFR0 */ + case 0xc: /* CFR0 */ s->host_mode = (data >> 15) != 0; if (s->enabled != !(data & 0x4000)) { s->enabled = !(data & 0x4000); trace_tsc2005_sense(s->enabled ? "enabled" : "disabled"); - if (s->busy && !s->enabled) + if (s->busy && !s->enabled) { timer_del(s->timer); + } s->busy = s->busy && s->enabled; } s->nextprecision = (data >> 13) & 1; @@ -216,10 +217,10 @@ static void tsc2005_write(TSC2005State *s, int reg, uint16_t data) "tsc2005_write: illegal conversion clock setting\n"); } break; - case 0xd: /* CFR1 */ + case 0xd: /* CFR1 */ s->timing[1] = data & 0xf07; break; - case 0xe: /* CFR2 */ + case 0xe: /* CFR2 */ s->pin_func = (data >> 14) & 3; s->filter = data & 0x3fff; break; @@ -258,10 +259,12 @@ static void tsc2005_pin_update(TSC2005State *s) switch (s->nextfunction) { case TSC_MODE_XYZ_SCAN: case TSC_MODE_XY_SCAN: - if (!s->host_mode && s->dav) + if (!s->host_mode && s->dav) { s->enabled = false; - if (!s->pressure) + } + if (!s->pressure) { return; + } /* Fall through */ case TSC_MODE_AUX_SCAN: break; @@ -269,8 +272,9 @@ static void tsc2005_pin_update(TSC2005State *s) case TSC_MODE_X: case TSC_MODE_Y: case TSC_MODE_Z: - if (!s->pressure) + if (!s->pressure) { return; + } /* Fall through */ case TSC_MODE_AUX: case TSC_MODE_TEMP1: @@ -278,8 +282,9 @@ static void tsc2005_pin_update(TSC2005State *s) case TSC_MODE_X_TEST: case TSC_MODE_Y_TEST: case TSC_MODE_TS_TEST: - if (s->dav) + if (s->dav) { s->enabled = false; + } break; case TSC_MODE_RESERVED: @@ -290,13 +295,14 @@ static void tsc2005_pin_update(TSC2005State *s) return; } - if (!s->enabled || s->busy) + if (!s->enabled || s->busy) { return; + } s->busy = true; s->precision = s->nextprecision; s->function = s->nextfunction; - s->pdst = !s->pnd0; /* Synchronised on internal clock */ + s->pdst = !s->pnd0; /* Synchronised on internal clock */ expires = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (NANOSECONDS_PER_SECOND >> 7); timer_mod(s->timer, expires); @@ -331,7 +337,7 @@ static uint8_t tsc2005_txrx_word(void *opaque, uint8_t value) TSC2005State *s = opaque; uint32_t ret = 0; - switch (s->state ++) { + switch (s->state++) { case 0: if (value & 0x80) { /* Command */ @@ -343,8 +349,9 @@ static uint8_t tsc2005_txrx_word(void *opaque, uint8_t value) if (s->enabled != !(value & 1)) { s->enabled = !(value & 1); trace_tsc2005_sense(s->enabled ? "enabled" : "disabled"); - if (s->busy && !s->enabled) + if (s->busy && !s->enabled) { timer_del(s->timer); + } s->busy = s->busy && s->enabled; } tsc2005_pin_update(s); @@ -368,10 +375,11 @@ static uint8_t tsc2005_txrx_word(void *opaque, uint8_t value) break; case 1: - if (s->command) + if (s->command) { ret = (s->data >> 8) & 0xff; - else + } else { s->data |= value << 8; + } break; case 2: @@ -406,14 +414,18 @@ uint32_t tsc2005_txrx(void *opaque, uint32_t value, int len) static void tsc2005_timer_tick(void *opaque) { TSC2005State *s = opaque; + unsigned int function = s->function; + + assert(function < ARRAY_SIZE(mode_regs)); /* Timer ticked -- a set of conversions has been finished. */ - if (!s->busy) + if (!s->busy) { return; + } s->busy = false; - s->dav |= mode_regs[s->function]; + s->dav |= mode_regs[function]; s->function = -1; tsc2005_pin_update(s); } @@ -435,8 +447,9 @@ static void tsc2005_touchscreen_event(void *opaque, * signaling TS events immediately, but for now we simulate * the first conversion delay for sake of correctness. */ - if (p != s->pressure) + if (p != s->pressure) { tsc2005_pin_update(s); + } } static int tsc2005_post_load(void *opaque, int version_id) diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c index 074cf50af2..e4b8437f8b 100644 --- a/hw/intc/arm_gic.c +++ b/hw/intc/arm_gic.c @@ -1658,7 +1658,7 @@ static MemTxResult gic_cpu_read(GICState *s, int cpu, int offset, *data = s->h_apr[gic_get_vcpu_real_id(cpu)]; } else if (gic_cpu_ns_access(s, cpu, attrs)) { /* NS view of GICC_APR<n> is the top half of GIC_NSAPR<n> */ - *data = gic_apr_ns_view(s, regno, cpu); + *data = gic_apr_ns_view(s, cpu, regno); } else { *data = s->apr[regno][cpu]; } @@ -1746,7 +1746,7 @@ static MemTxResult gic_cpu_write(GICState *s, int cpu, int offset, s->h_apr[gic_get_vcpu_real_id(cpu)] = value; } else if (gic_cpu_ns_access(s, cpu, attrs)) { /* NS view of GICC_APR<n> is the top half of GIC_NSAPR<n> */ - gic_apr_write_ns_view(s, regno, cpu, value); + gic_apr_write_ns_view(s, cpu, regno, value); } else { s->apr[regno][cpu] = value; } diff --git a/target/arm/helper.h b/target/arm/helper.h index 2b02733305..f830531dd3 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -132,12 +132,6 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr) DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr) DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr) -DEF_HELPER_1(vfp_negh, f16, f16) -DEF_HELPER_1(vfp_negs, f32, f32) -DEF_HELPER_1(vfp_negd, f64, f64) -DEF_HELPER_1(vfp_absh, f16, f16) -DEF_HELPER_1(vfp_abss, f32, f32) -DEF_HELPER_1(vfp_absd, f64, f64) DEF_HELPER_2(vfp_sqrth, f16, f16, env) DEF_HELPER_2(vfp_sqrts, f32, f32, env) DEF_HELPER_2(vfp_sqrtd, f64, f64, env) @@ -360,8 +354,6 @@ DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64) DEF_HELPER_2(neon_add_u8, i32, i32, i32) DEF_HELPER_2(neon_add_u16, i32, i32, i32) -DEF_HELPER_2(neon_padd_u8, i32, i32, i32) -DEF_HELPER_2(neon_padd_u16, i32, i32, i32) DEF_HELPER_2(neon_sub_u8, i32, i32, i32) DEF_HELPER_2(neon_sub_u16, i32, i32, i32) DEF_HELPER_2(neon_mul_u8, i32, i32, i32) @@ -656,13 +648,6 @@ DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_paddh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_pmaxh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_pminh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_padds, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_pmaxs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_pmins, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -730,33 +715,43 @@ DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fcge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fcgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_facge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_facgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -772,9 +767,11 @@ DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -1042,6 +1039,47 @@ DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_addp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_smaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_umaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "tcg/helper-a64.h" #include "tcg/helper-sve.h" diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 08d0757438..45e2218be5 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -396,85 +396,85 @@ struct hvf_sreg_match { }; static struct hvf_sreg_match hvf_sreg_match[] = { - { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 7) }, + { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 7) }, #ifdef SYNC_NO_RAW_REGS /* @@ -486,7 +486,7 @@ static struct hvf_sreg_match hvf_sreg_match[] = { { HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) }, { HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) }, #endif - { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 2) }, + { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 1) }, { HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) }, { HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) }, { HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) }, diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 0e7656fd15..f48adef5bb 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -19,11 +19,53 @@ # This file is processed by scripts/decodetree.py # -&r rn -&ri rd imm -&rri_sf rd rn imm sf -&i imm - +%rd 0:5 +%esz_sd 22:1 !function=plus_2 +%esz_hsd 22:2 !function=xor_2 +%hl 11:1 21:1 +%hlm 11:1 20:2 + +&r rn +&ri rd imm +&rri_sf rd rn imm sf +&i imm +&rr_e rd rn esz +&rrr_e rd rn rm esz +&rrx_e rd rn rm idx esz +&qrr_e q rd rn esz +&qrrr_e q rd rn rm esz +&qrrx_e q rd rn rm idx esz +&qrrrr_e q rd rn rm ra esz + +@rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1 +@rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3 +@rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd + +@rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1 +@rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd +@rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd + +@rrx_h ........ .. .. rm:4 .... . . rn:5 rd:5 &rrx_e esz=1 idx=%hlm +@rrx_s ........ .. . rm:5 .... . . rn:5 rd:5 &rrx_e esz=2 idx=%hl +@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3 + +@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0 +@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0 +@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0 +@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3 +@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3 + +@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0 +@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1 +@qrrr_sd . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=%esz_sd +@qrrr_e . q:1 ...... esz:2 . rm:5 ...... rn:5 rd:5 &qrrr_e + +@qrrx_h . q:1 .. .... .. .. rm:4 .... . . rn:5 rd:5 \ + &qrrx_e esz=1 idx=%hlm +@qrrx_s . q:1 .. .... .. . rm:5 .... . . rn:5 rd:5 \ + &qrrx_e esz=2 idx=%hl +@qrrx_d . q:1 .. .... .. . rm:5 .... idx:1 . rn:5 rd:5 \ + &qrrx_e esz=3 ### Data Processing - Immediate @@ -590,3 +632,268 @@ CPYFE 00 011 0 01100 ..... .... 01 ..... ..... @cpy CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy + +### Cryptographic AES + +AESE 01001110 00 10100 00100 10 ..... ..... @r2r_q1e0 +AESD 01001110 00 10100 00101 10 ..... ..... @r2r_q1e0 +AESMC 01001110 00 10100 00110 10 ..... ..... @rr_q1e0 +AESIMC 01001110 00 10100 00111 10 ..... ..... @rr_q1e0 + +### Cryptographic three-register SHA + +SHA1C 0101 1110 000 ..... 000000 ..... ..... @rrr_q1e0 +SHA1P 0101 1110 000 ..... 000100 ..... ..... @rrr_q1e0 +SHA1M 0101 1110 000 ..... 001000 ..... ..... @rrr_q1e0 +SHA1SU0 0101 1110 000 ..... 001100 ..... ..... @rrr_q1e0 +SHA256H 0101 1110 000 ..... 010000 ..... ..... @rrr_q1e0 +SHA256H2 0101 1110 000 ..... 010100 ..... ..... @rrr_q1e0 +SHA256SU1 0101 1110 000 ..... 011000 ..... ..... @rrr_q1e0 + +### Cryptographic two-register SHA + +SHA1H 0101 1110 0010 1000 0000 10 ..... ..... @rr_q1e0 +SHA1SU1 0101 1110 0010 1000 0001 10 ..... ..... @rr_q1e0 +SHA256SU0 0101 1110 0010 1000 0010 10 ..... ..... @rr_q1e0 + +### Cryptographic three-register SHA512 + +SHA512H 1100 1110 011 ..... 100000 ..... ..... @rrr_q1e0 +SHA512H2 1100 1110 011 ..... 100001 ..... ..... @rrr_q1e0 +SHA512SU1 1100 1110 011 ..... 100010 ..... ..... @rrr_q1e0 +RAX1 1100 1110 011 ..... 100011 ..... ..... @rrr_q1e3 +SM3PARTW1 1100 1110 011 ..... 110000 ..... ..... @rrr_q1e0 +SM3PARTW2 1100 1110 011 ..... 110001 ..... ..... @rrr_q1e0 +SM4EKEY 1100 1110 011 ..... 110010 ..... ..... @rrr_q1e0 + +### Cryptographic two-register SHA512 + +SHA512SU0 1100 1110 110 00000 100000 ..... ..... @rr_q1e0 +SM4E 1100 1110 110 00000 100001 ..... ..... @r2r_q1e0 + +### Cryptographic four-register + +EOR3 1100 1110 000 ..... 0 ..... ..... ..... @rrrr_q1e3 +BCAX 1100 1110 001 ..... 0 ..... ..... ..... @rrrr_q1e3 +SM3SS1 1100 1110 010 ..... 0 ..... ..... ..... @rrrr_q1e3 + +### Cryptographic three-register, imm2 + +&crypto3i rd rn rm imm +@crypto3i ........ ... rm:5 .. imm:2 .. rn:5 rd:5 &crypto3i + +SM3TT1A 11001110 010 ..... 10 .. 00 ..... ..... @crypto3i +SM3TT1B 11001110 010 ..... 10 .. 01 ..... ..... @crypto3i +SM3TT2A 11001110 010 ..... 10 .. 10 ..... ..... @crypto3i +SM3TT2B 11001110 010 ..... 10 .. 11 ..... ..... @crypto3i + +### Cryptographic XAR + +XAR 1100 1110 100 rm:5 imm:6 rn:5 rd:5 + +### Advanced SIMD scalar copy + +DUP_element_s 0101 1110 000 imm:5 0 0000 1 rn:5 rd:5 + +### Advanced SIMD copy + +DUP_element_v 0 q:1 00 1110 000 imm:5 0 0000 1 rn:5 rd:5 +DUP_general 0 q:1 00 1110 000 imm:5 0 0001 1 rn:5 rd:5 +INS_general 0 1 00 1110 000 imm:5 0 0011 1 rn:5 rd:5 +SMOV 0 q:1 00 1110 000 imm:5 0 0101 1 rn:5 rd:5 +UMOV 0 q:1 00 1110 000 imm:5 0 0111 1 rn:5 rd:5 +INS_element 0 1 10 1110 000 di:5 0 si:4 1 rn:5 rd:5 + +### Advanced SIMD scalar three same + +FADD_s 0001 1110 ..1 ..... 0010 10 ..... ..... @rrr_hsd +FSUB_s 0001 1110 ..1 ..... 0011 10 ..... ..... @rrr_hsd +FDIV_s 0001 1110 ..1 ..... 0001 10 ..... ..... @rrr_hsd +FMUL_s 0001 1110 ..1 ..... 0000 10 ..... ..... @rrr_hsd +FNMUL_s 0001 1110 ..1 ..... 1000 10 ..... ..... @rrr_hsd + +FMAX_s 0001 1110 ..1 ..... 0100 10 ..... ..... @rrr_hsd +FMIN_s 0001 1110 ..1 ..... 0101 10 ..... ..... @rrr_hsd +FMAXNM_s 0001 1110 ..1 ..... 0110 10 ..... ..... @rrr_hsd +FMINNM_s 0001 1110 ..1 ..... 0111 10 ..... ..... @rrr_hsd + +FMULX_s 0101 1110 010 ..... 00011 1 ..... ..... @rrr_h +FMULX_s 0101 1110 0.1 ..... 11011 1 ..... ..... @rrr_sd + +FCMEQ_s 0101 1110 010 ..... 00100 1 ..... ..... @rrr_h +FCMEQ_s 0101 1110 0.1 ..... 11100 1 ..... ..... @rrr_sd + +FCMGE_s 0111 1110 010 ..... 00100 1 ..... ..... @rrr_h +FCMGE_s 0111 1110 0.1 ..... 11100 1 ..... ..... @rrr_sd + +FCMGT_s 0111 1110 110 ..... 00100 1 ..... ..... @rrr_h +FCMGT_s 0111 1110 1.1 ..... 11100 1 ..... ..... @rrr_sd + +FACGE_s 0111 1110 010 ..... 00101 1 ..... ..... @rrr_h +FACGE_s 0111 1110 0.1 ..... 11101 1 ..... ..... @rrr_sd + +FACGT_s 0111 1110 110 ..... 00101 1 ..... ..... @rrr_h +FACGT_s 0111 1110 1.1 ..... 11101 1 ..... ..... @rrr_sd + +FABD_s 0111 1110 110 ..... 00010 1 ..... ..... @rrr_h +FABD_s 0111 1110 1.1 ..... 11010 1 ..... ..... @rrr_sd + +FRECPS_s 0101 1110 010 ..... 00111 1 ..... ..... @rrr_h +FRECPS_s 0101 1110 0.1 ..... 11111 1 ..... ..... @rrr_sd + +FRSQRTS_s 0101 1110 110 ..... 00111 1 ..... ..... @rrr_h +FRSQRTS_s 0101 1110 1.1 ..... 11111 1 ..... ..... @rrr_sd + +### Advanced SIMD scalar pairwise + +FADDP_s 0101 1110 0011 0000 1101 10 ..... ..... @rr_h +FADDP_s 0111 1110 0.11 0000 1101 10 ..... ..... @rr_sd + +FMAXP_s 0101 1110 0011 0000 1111 10 ..... ..... @rr_h +FMAXP_s 0111 1110 0.11 0000 1111 10 ..... ..... @rr_sd + +FMINP_s 0101 1110 1011 0000 1111 10 ..... ..... @rr_h +FMINP_s 0111 1110 1.11 0000 1111 10 ..... ..... @rr_sd + +FMAXNMP_s 0101 1110 0011 0000 1100 10 ..... ..... @rr_h +FMAXNMP_s 0111 1110 0.11 0000 1100 10 ..... ..... @rr_sd + +FMINNMP_s 0101 1110 1011 0000 1100 10 ..... ..... @rr_h +FMINNMP_s 0111 1110 1.11 0000 1100 10 ..... ..... @rr_sd + +ADDP_s 0101 1110 1111 0001 1011 10 ..... ..... @rr_d + +### Advanced SIMD three same + +FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h +FADD_v 0.00 1110 0.1 ..... 11010 1 ..... ..... @qrrr_sd + +FSUB_v 0.00 1110 110 ..... 00010 1 ..... ..... @qrrr_h +FSUB_v 0.00 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd + +FDIV_v 0.10 1110 010 ..... 00111 1 ..... ..... @qrrr_h +FDIV_v 0.10 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd + +FMUL_v 0.10 1110 010 ..... 00011 1 ..... ..... @qrrr_h +FMUL_v 0.10 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd + +FMAX_v 0.00 1110 010 ..... 00110 1 ..... ..... @qrrr_h +FMAX_v 0.00 1110 0.1 ..... 11110 1 ..... ..... @qrrr_sd + +FMIN_v 0.00 1110 110 ..... 00110 1 ..... ..... @qrrr_h +FMIN_v 0.00 1110 1.1 ..... 11110 1 ..... ..... @qrrr_sd + +FMAXNM_v 0.00 1110 010 ..... 00000 1 ..... ..... @qrrr_h +FMAXNM_v 0.00 1110 0.1 ..... 11000 1 ..... ..... @qrrr_sd + +FMINNM_v 0.00 1110 110 ..... 00000 1 ..... ..... @qrrr_h +FMINNM_v 0.00 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd + +FMULX_v 0.00 1110 010 ..... 00011 1 ..... ..... @qrrr_h +FMULX_v 0.00 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd + +FMLA_v 0.00 1110 010 ..... 00001 1 ..... ..... @qrrr_h +FMLA_v 0.00 1110 0.1 ..... 11001 1 ..... ..... @qrrr_sd + +FMLS_v 0.00 1110 110 ..... 00001 1 ..... ..... @qrrr_h +FMLS_v 0.00 1110 1.1 ..... 11001 1 ..... ..... @qrrr_sd + +FMLAL_v 0.00 1110 001 ..... 11101 1 ..... ..... @qrrr_h +FMLSL_v 0.00 1110 101 ..... 11101 1 ..... ..... @qrrr_h +FMLAL2_v 0.10 1110 001 ..... 11001 1 ..... ..... @qrrr_h +FMLSL2_v 0.10 1110 101 ..... 11001 1 ..... ..... @qrrr_h + +FCMEQ_v 0.00 1110 010 ..... 00100 1 ..... ..... @qrrr_h +FCMEQ_v 0.00 1110 0.1 ..... 11100 1 ..... ..... @qrrr_sd + +FCMGE_v 0.10 1110 010 ..... 00100 1 ..... ..... @qrrr_h +FCMGE_v 0.10 1110 0.1 ..... 11100 1 ..... ..... @qrrr_sd + +FCMGT_v 0.10 1110 110 ..... 00100 1 ..... ..... @qrrr_h +FCMGT_v 0.10 1110 1.1 ..... 11100 1 ..... ..... @qrrr_sd + +FACGE_v 0.10 1110 010 ..... 00101 1 ..... ..... @qrrr_h +FACGE_v 0.10 1110 0.1 ..... 11101 1 ..... ..... @qrrr_sd + +FACGT_v 0.10 1110 110 ..... 00101 1 ..... ..... @qrrr_h +FACGT_v 0.10 1110 1.1 ..... 11101 1 ..... ..... @qrrr_sd + +FABD_v 0.10 1110 110 ..... 00010 1 ..... ..... @qrrr_h +FABD_v 0.10 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd + +FRECPS_v 0.00 1110 010 ..... 00111 1 ..... ..... @qrrr_h +FRECPS_v 0.00 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd + +FRSQRTS_v 0.00 1110 110 ..... 00111 1 ..... ..... @qrrr_h +FRSQRTS_v 0.00 1110 1.1 ..... 11111 1 ..... ..... @qrrr_sd + +FADDP_v 0.10 1110 010 ..... 00010 1 ..... ..... @qrrr_h +FADDP_v 0.10 1110 0.1 ..... 11010 1 ..... ..... @qrrr_sd + +FMAXP_v 0.10 1110 010 ..... 00110 1 ..... ..... @qrrr_h +FMAXP_v 0.10 1110 0.1 ..... 11110 1 ..... ..... @qrrr_sd + +FMINP_v 0.10 1110 110 ..... 00110 1 ..... ..... @qrrr_h +FMINP_v 0.10 1110 1.1 ..... 11110 1 ..... ..... @qrrr_sd + +FMAXNMP_v 0.10 1110 010 ..... 00000 1 ..... ..... @qrrr_h +FMAXNMP_v 0.10 1110 0.1 ..... 11000 1 ..... ..... @qrrr_sd + +FMINNMP_v 0.10 1110 110 ..... 00000 1 ..... ..... @qrrr_h +FMINNMP_v 0.10 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd + +ADDP_v 0.00 1110 ..1 ..... 10111 1 ..... ..... @qrrr_e +SMAXP_v 0.00 1110 ..1 ..... 10100 1 ..... ..... @qrrr_e +SMINP_v 0.00 1110 ..1 ..... 10101 1 ..... ..... @qrrr_e +UMAXP_v 0.10 1110 ..1 ..... 10100 1 ..... ..... @qrrr_e +UMINP_v 0.10 1110 ..1 ..... 10101 1 ..... ..... @qrrr_e + +AND_v 0.00 1110 001 ..... 00011 1 ..... ..... @qrrr_b +BIC_v 0.00 1110 011 ..... 00011 1 ..... ..... @qrrr_b +ORR_v 0.00 1110 101 ..... 00011 1 ..... ..... @qrrr_b +ORN_v 0.00 1110 111 ..... 00011 1 ..... ..... @qrrr_b +EOR_v 0.10 1110 001 ..... 00011 1 ..... ..... @qrrr_b +BSL_v 0.10 1110 011 ..... 00011 1 ..... ..... @qrrr_b +BIT_v 0.10 1110 101 ..... 00011 1 ..... ..... @qrrr_b +BIF_v 0.10 1110 111 ..... 00011 1 ..... ..... @qrrr_b + +### Advanced SIMD scalar x indexed element + +FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h +FMUL_si 0101 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s +FMUL_si 0101 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d + +FMLA_si 0101 1111 00 .. .... 0001 . 0 ..... ..... @rrx_h +FMLA_si 0101 1111 10 .. .... 0001 . 0 ..... ..... @rrx_s +FMLA_si 0101 1111 11 0. .... 0001 . 0 ..... ..... @rrx_d + +FMLS_si 0101 1111 00 .. .... 0101 . 0 ..... ..... @rrx_h +FMLS_si 0101 1111 10 .. .... 0101 . 0 ..... ..... @rrx_s +FMLS_si 0101 1111 11 0. .... 0101 . 0 ..... ..... @rrx_d + +FMULX_si 0111 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h +FMULX_si 0111 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s +FMULX_si 0111 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d + +### Advanced SIMD vector x indexed element + +FMUL_vi 0.00 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h +FMUL_vi 0.00 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s +FMUL_vi 0.00 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d + +FMLA_vi 0.00 1111 00 .. .... 0001 . 0 ..... ..... @qrrx_h +FMLA_vi 0.00 1111 10 . ..... 0001 . 0 ..... ..... @qrrx_s +FMLA_vi 0.00 1111 11 0 ..... 0001 . 0 ..... ..... @qrrx_d + +FMLS_vi 0.00 1111 00 .. .... 0101 . 0 ..... ..... @qrrx_h +FMLS_vi 0.00 1111 10 . ..... 0101 . 0 ..... ..... @qrrx_s +FMLS_vi 0.00 1111 11 0 ..... 0101 . 0 ..... ..... @qrrx_d + +FMULX_vi 0.10 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h +FMULX_vi 0.10 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s +FMULX_vi 0.10 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d + +FMLAL_vi 0.00 1111 10 .. .... 0000 . 0 ..... ..... @qrrx_h +FMLSL_vi 0.00 1111 10 .. .... 0100 . 0 ..... ..... @qrrx_h +FMLAL2_vi 0.10 1111 10 .. .... 1000 . 0 ..... ..... @qrrx_h +FMLSL2_vi 0.10 1111 10 .. .... 1100 . 0 ..... ..... @qrrx_h diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c new file mode 100644 index 0000000000..22c9d17dce --- /dev/null +++ b/target/arm/tcg/gengvec.c @@ -0,0 +1,1672 @@ +/* + * ARM generic vector expansion + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2005-2007 CodeSourcery + * Copyright (c) 2007 OpenedHand, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "translate.h" + + +static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz, + gen_helper_gvec_3_ptr *fn) +{ + TCGv_ptr qc_ptr = tcg_temp_new_ptr(); + + tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, + opr_sz, max_sz, 0, fn); +} + +void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); +} + +void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); +} + +#define GEN_CMP0(NAME, COND) \ + void NAME(unsigned vece, uint32_t d, uint32_t m, \ + uint32_t opr_sz, uint32_t max_sz) \ + { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } + +GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) +GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) +GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) +GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) +GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) + +#undef GEN_CMP0 + +static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_sari_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_sari_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_sari_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_ssra8_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_ssra16_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_ssra32_i32, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_ssra64_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. + */ + shift = MIN(shift, (8 << vece) - 1); + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); +} + +static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_shri_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_usra8_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8, }, + { .fni8 = gen_usra16_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16, }, + { .fni4 = gen_usra32_i32, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32, }, + { .fni8 = gen_usra64_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64, }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Unsigned results in all zeros as input to accumulate: nop. + */ + if (shift < (8 << vece)) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } else { + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); + } +} + +/* + * Shift one less than the requested amount, and the low bit is + * the rounding bit. For the 8 and 16-bit operations, because we + * mask the low bit, we can perform a normal integer shift instead + * of a vector shift. + */ +static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, sh - 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_sar8i_i64(d, a, sh); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, sh - 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_sar16i_i64(d, a, sh); + tcg_gen_vec_add16_i64(d, d, t); +} + +void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) +{ + TCGv_i32 t; + + /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ + if (sh == 32) { + tcg_gen_movi_i32(d, 0); + return; + } + t = tcg_temp_new_i32(); + tcg_gen_extract_i32(t, a, sh - 1, 1); + tcg_gen_sari_i32(d, a, sh); + tcg_gen_add_i32(d, d, t); +} + + void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_extract_i64(t, a, sh - 1, 1); + tcg_gen_sari_i64(d, a, sh); + tcg_gen_add_i64(d, d, t); +} + +static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec ones = tcg_temp_new_vec_matching(d); + + tcg_gen_shri_vec(vece, t, a, sh - 1); + tcg_gen_dupi_vec(vece, ones, 1); + tcg_gen_and_vec(vece, t, t, ones); + tcg_gen_sari_vec(vece, d, a, sh); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_srshr8_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_srshr16_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_srshr32_i32, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_srshr64_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + if (shift == (8 << vece)) { + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. With rounding, this produces + * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. + * I.e. always zero. + */ + tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } +} + +static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_srshr8_i64(t, a, sh); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_srshr16_i64(t, a, sh); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + gen_srshr32_i32(t, a, sh); + tcg_gen_add_i32(d, d, t); +} + +static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_srshr64_i64(t, a, sh); + tcg_gen_add_i64(d, d, t); +} + +static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + gen_srshr_vec(vece, t, a, sh); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_srsra8_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fni8 = gen_srsra16_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_srsra32_i32, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_srsra64_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. With rounding, this produces + * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. + * I.e. always zero. With accumulation, this leaves D unchanged. + */ + if (shift == (8 << vece)) { + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } +} + +static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, sh - 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_shr8i_i64(d, a, sh); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, sh - 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_shr16i_i64(d, a, sh); + tcg_gen_vec_add16_i64(d, d, t); +} + +void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) +{ + TCGv_i32 t; + + /* Handle shift by the input size for the benefit of trans_URSHR_ri */ + if (sh == 32) { + tcg_gen_extract_i32(d, a, sh - 1, 1); + return; + } + t = tcg_temp_new_i32(); + tcg_gen_extract_i32(t, a, sh - 1, 1); + tcg_gen_shri_i32(d, a, sh); + tcg_gen_add_i32(d, d, t); +} + +void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_extract_i64(t, a, sh - 1, 1); + tcg_gen_shri_i64(d, a, sh); + tcg_gen_add_i64(d, d, t); +} + +static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec ones = tcg_temp_new_vec_matching(d); + + tcg_gen_shri_vec(vece, t, a, shift - 1); + tcg_gen_dupi_vec(vece, ones, 1); + tcg_gen_and_vec(vece, t, t, ones); + tcg_gen_shri_vec(vece, d, a, shift); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_urshr8_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_urshr16_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_urshr32_i32, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_urshr64_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + if (shift == (8 << vece)) { + /* + * Shifts larger than the element size are architecturally valid. + * Unsigned results in zero. With rounding, this produces a + * copy of the most significant bit. + */ + tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } +} + +static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + if (sh == 8) { + tcg_gen_vec_shr8i_i64(t, a, 7); + } else { + gen_urshr8_i64(t, a, sh); + } + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + if (sh == 16) { + tcg_gen_vec_shr16i_i64(t, a, 15); + } else { + gen_urshr16_i64(t, a, sh); + } + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + if (sh == 32) { + tcg_gen_shri_i32(t, a, 31); + } else { + gen_urshr32_i32(t, a, sh); + } + tcg_gen_add_i32(d, d, t); +} + +static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + if (sh == 64) { + tcg_gen_shri_i64(t, a, 63); + } else { + gen_urshr64_i64(t, a, sh); + } + tcg_gen_add_i64(d, d, t); +} + +static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + if (sh == (8 << vece)) { + tcg_gen_shri_vec(vece, t, a, sh - 1); + } else { + gen_urshr_vec(vece, t, a, sh); + } + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_ursra8_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fni8 = gen_ursra16_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_ursra32_i32, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_ursra64_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); +} + +static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); +} + +static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); +} + +static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); + tcg_gen_shri_vec(vece, t, a, sh); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); +} + +void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; + const GVecGen2i ops[4] = { + { .fni8 = gen_shr8_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shr16_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shr32_ins_i32, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_shr64_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* Shift of esize leaves destination unchanged. */ + if (shift < (8 << vece)) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } else { + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); + } +} + +static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); +} + +static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); +} + +static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_shli_vec(vece, t, a, sh); + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); +} + +void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; + const GVecGen2i ops[4] = { + { .fni8 = gen_shl8_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shl16_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shl32_ins_i32, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_shl64_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [0..esize-1]. */ + tcg_debug_assert(shift >= 0); + tcg_debug_assert(shift < (8 << vece)); + + if (shift == 0) { + tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } +} + +static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_add_u8(d, d, a); +} + +static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_sub_u8(d, d, a); +} + +static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_add_u16(d, d, a); +} + +static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_sub_u16(d, d, a); +} + +static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_add_i32(d, d, a); +} + +static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_sub_i32(d, d, a); +} + +static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_add_i64(d, d, a); +} + +static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_sub_i64(d, d, a); +} + +static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_add_vec(vece, d, d, a); +} + +static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_sub_vec(vece, d, d, a); +} + +/* Note that while NEON does not support VMLA and VMLS as 64-bit ops, + * these tables are shared with AArch64 which does support them. + */ +void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_mla8_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_mla16_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_mla32_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_mla64_i64, + .fniv = gen_mla_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_mls8_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_mls16_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_mls32_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_mls64_i64, + .fniv = gen_mls_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +/* CMTST : test is "if (X & Y != 0)". */ +static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_and_i32(d, a, b); + tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); +} + +void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_and_i64(d, a, b); + tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); +} + +static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_and_vec(vece, d, a, b); + tcg_gen_dupi_vec(vece, a, 0); + tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); +} + +void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_helper_neon_tst_u8, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_tst_u16, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_cmtst_i32, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_cmtst_i64, + .fniv = gen_cmtst_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) +{ + TCGv_i32 lval = tcg_temp_new_i32(); + TCGv_i32 rval = tcg_temp_new_i32(); + TCGv_i32 lsh = tcg_temp_new_i32(); + TCGv_i32 rsh = tcg_temp_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 max = tcg_constant_i32(32); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i32(lsh, shift); + tcg_gen_neg_i32(rsh, lsh); + tcg_gen_shl_i32(lval, src, lsh); + tcg_gen_shr_i32(rval, src, rsh); + tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); + tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); +} + +void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) +{ + TCGv_i64 lval = tcg_temp_new_i64(); + TCGv_i64 rval = tcg_temp_new_i64(); + TCGv_i64 lsh = tcg_temp_new_i64(); + TCGv_i64 rsh = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 max = tcg_constant_i64(64); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i64(lsh, shift); + tcg_gen_neg_i64(rsh, lsh); + tcg_gen_shl_i64(lval, src, lsh); + tcg_gen_shr_i64(rval, src, rsh); + tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); + tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); +} + +static void gen_ushl_vec(unsigned vece, TCGv_vec dst, + TCGv_vec src, TCGv_vec shift) +{ + TCGv_vec lval = tcg_temp_new_vec_matching(dst); + TCGv_vec rval = tcg_temp_new_vec_matching(dst); + TCGv_vec lsh = tcg_temp_new_vec_matching(dst); + TCGv_vec rsh = tcg_temp_new_vec_matching(dst); + TCGv_vec msk, max; + + tcg_gen_neg_vec(vece, rsh, shift); + if (vece == MO_8) { + tcg_gen_mov_vec(lsh, shift); + } else { + msk = tcg_temp_new_vec_matching(dst); + tcg_gen_dupi_vec(vece, msk, 0xff); + tcg_gen_and_vec(vece, lsh, shift, msk); + tcg_gen_and_vec(vece, rsh, rsh, msk); + } + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_shlv_vec(vece, lval, src, lsh); + tcg_gen_shrv_vec(vece, rval, src, rsh); + + max = tcg_temp_new_vec_matching(dst); + tcg_gen_dupi_vec(vece, max, 8 << vece); + + /* + * The choice of LT (signed) and GEU (unsigned) are biased toward + * the instructions of the x86_64 host. For MO_8, the whole byte + * is significant so we must use an unsigned compare; otherwise we + * have already masked to a byte and so a signed compare works. + * Other tcg hosts have a full set of comparisons and do not care. + */ + if (vece == MO_8) { + tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); + tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); + tcg_gen_andc_vec(vece, lval, lval, lsh); + tcg_gen_andc_vec(vece, rval, rval, rsh); + } else { + tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); + tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); + tcg_gen_and_vec(vece, lval, lval, lsh); + tcg_gen_and_vec(vece, rval, rval, rsh); + } + tcg_gen_or_vec(vece, dst, lval, rval); +} + +void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_shlv_vec, + INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_ushl_i32, + .fniv = gen_ushl_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_ushl_i64, + .fniv = gen_ushl_vec, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) +{ + TCGv_i32 lval = tcg_temp_new_i32(); + TCGv_i32 rval = tcg_temp_new_i32(); + TCGv_i32 lsh = tcg_temp_new_i32(); + TCGv_i32 rsh = tcg_temp_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 max = tcg_constant_i32(31); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i32(lsh, shift); + tcg_gen_neg_i32(rsh, lsh); + tcg_gen_shl_i32(lval, src, lsh); + tcg_gen_umin_i32(rsh, rsh, max); + tcg_gen_sar_i32(rval, src, rsh); + tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); + tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); +} + +void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) +{ + TCGv_i64 lval = tcg_temp_new_i64(); + TCGv_i64 rval = tcg_temp_new_i64(); + TCGv_i64 lsh = tcg_temp_new_i64(); + TCGv_i64 rsh = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 max = tcg_constant_i64(63); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i64(lsh, shift); + tcg_gen_neg_i64(rsh, lsh); + tcg_gen_shl_i64(lval, src, lsh); + tcg_gen_umin_i64(rsh, rsh, max); + tcg_gen_sar_i64(rval, src, rsh); + tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); + tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); +} + +static void gen_sshl_vec(unsigned vece, TCGv_vec dst, + TCGv_vec src, TCGv_vec shift) +{ + TCGv_vec lval = tcg_temp_new_vec_matching(dst); + TCGv_vec rval = tcg_temp_new_vec_matching(dst); + TCGv_vec lsh = tcg_temp_new_vec_matching(dst); + TCGv_vec rsh = tcg_temp_new_vec_matching(dst); + TCGv_vec tmp = tcg_temp_new_vec_matching(dst); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_neg_vec(vece, rsh, shift); + if (vece == MO_8) { + tcg_gen_mov_vec(lsh, shift); + } else { + tcg_gen_dupi_vec(vece, tmp, 0xff); + tcg_gen_and_vec(vece, lsh, shift, tmp); + tcg_gen_and_vec(vece, rsh, rsh, tmp); + } + + /* Bound rsh so out of bound right shift gets -1. */ + tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); + tcg_gen_umin_vec(vece, rsh, rsh, tmp); + tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); + + tcg_gen_shlv_vec(vece, lval, src, lsh); + tcg_gen_sarv_vec(vece, rval, src, rsh); + + /* Select in-bound left shift. */ + tcg_gen_andc_vec(vece, lval, lval, tmp); + + /* Select between left and right shift. */ + if (vece == MO_8) { + tcg_gen_dupi_vec(vece, tmp, 0); + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); + } else { + tcg_gen_dupi_vec(vece, tmp, 0x80); + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); + } +} + +void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, + INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_sshl_i32, + .fniv = gen_sshl_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_sshl_i64, + .fniv = gen_sshl_vec, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_add_vec(vece, x, a, b); + tcg_gen_usadd_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); +} + +void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_b, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_h, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_s, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_d, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_add_vec(vece, x, a, b); + tcg_gen_ssadd_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); +} + +void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_sub_vec(vece, x, a, b); + tcg_gen_ussub_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); +} + +void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_sub_vec(vece, x, a, b); + tcg_gen_sssub_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); +} + +void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_sub_i32(t, a, b); + tcg_gen_sub_i32(d, b, a); + tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); +} + +static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_sub_i64(t, a, b); + tcg_gen_sub_i64(d, b, a); + tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); +} + +static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_smin_vec(vece, t, a, b); + tcg_gen_smax_vec(vece, d, a, b); + tcg_gen_sub_vec(vece, d, d, t); +} + +void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_sabd_i32, + .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_sabd_i64, + .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_sub_i32(t, a, b); + tcg_gen_sub_i32(d, b, a); + tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); +} + +static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_sub_i64(t, a, b); + tcg_gen_sub_i64(d, b, a); + tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); +} + +static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_umin_vec(vece, t, a, b); + tcg_gen_umax_vec(vece, d, a, b); + tcg_gen_sub_vec(vece, d, d, t); +} + +void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_uabd_i32, + .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_uabd_i64, + .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + gen_sabd_i32(t, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + gen_sabd_i64(t, a, b); + tcg_gen_add_i64(d, d, t); +} + +static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + gen_sabd_vec(vece, t, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_add_vec, + INDEX_op_smin_vec, INDEX_op_smax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_saba_i32, + .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_saba_i64, + .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + gen_uabd_i32(t, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + gen_uabd_i64(t, a, b); + tcg_gen_add_i64(d, d, t); +} + +static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + gen_uabd_vec(vece, t, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_add_vec, + INDEX_op_umin_vec, INDEX_op_umax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_uaba_i32, + .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_uaba_i64, + .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_gvec_addp_b, + gen_helper_gvec_addp_h, + gen_helper_gvec_addp_s, + gen_helper_gvec_addp_d, + }; + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_gvec_smaxp_b, + gen_helper_gvec_smaxp_h, + gen_helper_gvec_smaxp_s, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_gvec_sminp_b, + gen_helper_gvec_sminp_h, + gen_helper_gvec_sminp_s, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_gvec_umaxp_b, + gen_helper_gvec_umaxp_h, + gen_helper_gvec_umaxp_s, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_gvec_uminp_b, + gen_helper_gvec_uminp_h, + gen_helper_gvec_uminp_s, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} diff --git a/target/arm/tcg/gengvec64.c b/target/arm/tcg/gengvec64.c new file mode 100644 index 0000000000..093b498b13 --- /dev/null +++ b/target/arm/tcg/gengvec64.c @@ -0,0 +1,190 @@ +/* + * AArch64 generic vector expansion + * + * Copyright (c) 2013 Alexander Graf <agraf@suse.de> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "translate.h" +#include "translate-a64.h" + + +static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + tcg_gen_rotli_i64(d, m, 1); + tcg_gen_xor_i64(d, d, n); +} + +static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) +{ + tcg_gen_rotli_vec(vece, d, m, 1); + tcg_gen_xor_vec(vece, d, d, n); +} + +void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen3 op = { + .fni8 = gen_rax1_i64, + .fniv = gen_rax1_vec, + .opt_opc = vecop_list, + .fno = gen_helper_crypto_rax1, + .vece = MO_64, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); +} + +static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + uint64_t mask = dup_const(MO_8, 0xff >> sh); + + tcg_gen_xor_i64(t, n, m); + tcg_gen_shri_i64(d, t, sh); + tcg_gen_shli_i64(t, t, 8 - sh); + tcg_gen_andi_i64(d, d, mask); + tcg_gen_andi_i64(t, t, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + uint64_t mask = dup_const(MO_16, 0xffff >> sh); + + tcg_gen_xor_i64(t, n, m); + tcg_gen_shri_i64(d, t, sh); + tcg_gen_shli_i64(t, t, 16 - sh); + tcg_gen_andi_i64(d, d, mask); + tcg_gen_andi_i64(t, t, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) +{ + tcg_gen_xor_i32(d, n, m); + tcg_gen_rotri_i32(d, d, sh); +} + +static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) +{ + tcg_gen_xor_i64(d, n, m); + tcg_gen_rotri_i64(d, d, sh); +} + +static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, int64_t sh) +{ + tcg_gen_xor_vec(vece, d, n, m); + tcg_gen_rotri_vec(vece, d, d, sh); +} + +void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, int64_t shift, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen3i ops[4] = { + { .fni8 = gen_xar8_i64, + .fniv = gen_xar_vec, + .fno = gen_helper_sve2_xar_b, + .opt_opc = vecop, + .vece = MO_8 }, + { .fni8 = gen_xar16_i64, + .fniv = gen_xar_vec, + .fno = gen_helper_sve2_xar_h, + .opt_opc = vecop, + .vece = MO_16 }, + { .fni4 = gen_xar_i32, + .fniv = gen_xar_vec, + .fno = gen_helper_sve2_xar_s, + .opt_opc = vecop, + .vece = MO_32 }, + { .fni8 = gen_xar_i64, + .fniv = gen_xar_vec, + .fno = gen_helper_gvec_xar_d, + .opt_opc = vecop, + .vece = MO_64 } + }; + int esize = 8 << vece; + + /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ + tcg_debug_assert(shift >= 0); + tcg_debug_assert(shift <= esize); + shift &= esize - 1; + + if (shift == 0) { + /* xar with no rotate devolves to xor. */ + tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); + } else { + tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, + shift, &ops[vece]); + } +} + +static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) +{ + tcg_gen_xor_i64(d, n, m); + tcg_gen_xor_i64(d, d, k); +} + +static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, TCGv_vec k) +{ + tcg_gen_xor_vec(vece, d, n, m); + tcg_gen_xor_vec(vece, d, d, k); +} + +void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen4 op = { + .fni8 = gen_eor3_i64, + .fniv = gen_eor3_vec, + .fno = gen_helper_sve2_eor3, + .vece = MO_64, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + }; + tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); +} + +static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) +{ + tcg_gen_andc_i64(d, m, k); + tcg_gen_xor_i64(d, d, n); +} + +static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, TCGv_vec k) +{ + tcg_gen_andc_vec(vece, d, m, k); + tcg_gen_xor_vec(vece, d, d, n); +} + +void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen4 op = { + .fni8 = gen_bcax_i64, + .fniv = gen_bcax_vec, + .fno = gen_helper_sve2_bcax, + .vece = MO_64, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + }; + tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); +} + diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h index 0518165399..371388f61b 100644 --- a/target/arm/tcg/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -132,3 +132,15 @@ DEF_HELPER_4(cpye, void, env, i32, i32, i32) DEF_HELPER_4(cpyfp, void, env, i32, i32, i32) DEF_HELPER_4(cpyfm, void, env, i32, i32, i32) DEF_HELPER_4(cpyfe, void, env, i32, i32, i32) + +DEF_HELPER_FLAGS_5(gvec_fdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmulx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmulx_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build index 3b1a9f0fc5..508932a249 100644 --- a/target/arm/tcg/meson.build +++ b/target/arm/tcg/meson.build @@ -24,6 +24,7 @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: gen_a64) arm_ss.add(files( 'cpu32.c', + 'gengvec.c', 'translate.c', 'translate-m-nocp.c', 'translate-mve.c', @@ -42,6 +43,7 @@ arm_ss.add(files( arm_ss.add(when: 'TARGET_AARCH64', if_true: files( 'cpu64.c', + 'gengvec64.c', 'translate-a64.c', 'translate-sve.c', 'translate-sme.c', diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c index bc6c4a54e9..a0b51c8809 100644 --- a/target/arm/tcg/neon_helper.c +++ b/target/arm/tcg/neon_helper.c @@ -745,11 +745,6 @@ uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) return (a + b) ^ mask; } -#define NEON_FN(dest, src1, src2) dest = src1 + src2 -NEON_POP(padd_u8, neon_u8, 4) -NEON_POP(padd_u16, neon_u16, 2) -#undef NEON_FN - #define NEON_FN(dest, src1, src2) dest = src1 - src2 NEON_VOP(sub_u8, neon_u8, 4) NEON_VOP(sub_u16, neon_u16, 2) diff --git a/target/arm/tcg/t32.decode b/target/arm/tcg/t32.decode index f21ad0167a..d327178829 100644 --- a/target/arm/tcg/t32.decode +++ b/target/arm/tcg/t32.decode @@ -458,41 +458,41 @@ STR_ri 1111 1000 1100 .... .... ............ @ldst_ri_pos # Note that Load, unsigned (literal) overlaps all other load encodings. { { - NOP 1111 1000 -001 1111 1111 ------------ # PLD + PLD 1111 1000 -001 1111 1111 ------------ # (literal) LDRB_ri 1111 1000 .001 1111 .... ............ @ldst_ri_lit } { - NOP 1111 1000 1001 ---- 1111 ------------ # PLD + PLD 1111 1000 1001 ---- 1111 ------------ # (immediate T1) LDRB_ri 1111 1000 1001 .... .... ............ @ldst_ri_pos } LDRB_ri 1111 1000 0001 .... .... 1..1 ........ @ldst_ri_idx { - NOP 1111 1000 0001 ---- 1111 1100 -------- # PLD + PLD 1111 1000 0001 ---- 1111 1100 -------- # (immediate T2) LDRB_ri 1111 1000 0001 .... .... 1100 ........ @ldst_ri_neg } LDRBT_ri 1111 1000 0001 .... .... 1110 ........ @ldst_ri_unp { - NOP 1111 1000 0001 ---- 1111 000000 -- ---- # PLD + PLD 1111 1000 0001 ---- 1111 000000 -- ---- # (register) LDRB_rr 1111 1000 0001 .... .... 000000 .. .... @ldst_rr } } { { - NOP 1111 1000 -011 1111 1111 ------------ # PLD + PLD 1111 1000 -011 1111 1111 ------------ # (literal) LDRH_ri 1111 1000 .011 1111 .... ............ @ldst_ri_lit } { - NOP 1111 1000 1011 ---- 1111 ------------ # PLDW + PLDW 1111 1000 1011 ---- 1111 ------------ # (immediate T1) LDRH_ri 1111 1000 1011 .... .... ............ @ldst_ri_pos } LDRH_ri 1111 1000 0011 .... .... 1..1 ........ @ldst_ri_idx { - NOP 1111 1000 0011 ---- 1111 1100 -------- # PLDW + PLDW 1111 1000 0011 ---- 1111 1100 -------- # (immediate T2) LDRH_ri 1111 1000 0011 .... .... 1100 ........ @ldst_ri_neg } LDRHT_ri 1111 1000 0011 .... .... 1110 ........ @ldst_ri_unp { - NOP 1111 1000 0011 ---- 1111 000000 -- ---- # PLDW + PLDW 1111 1000 0011 ---- 1111 000000 -- ---- # (register) LDRH_rr 1111 1000 0011 .... .... 000000 .. .... @ldst_rr } } @@ -504,24 +504,23 @@ STR_ri 1111 1000 1100 .... .... ............ @ldst_ri_pos LDRT_ri 1111 1000 0101 .... .... 1110 ........ @ldst_ri_unp LDR_rr 1111 1000 0101 .... .... 000000 .. .... @ldst_rr } -# NOPs here are PLI. { { - NOP 1111 1001 -001 1111 1111 ------------ + PLI 1111 1001 -001 1111 1111 ------------ # (literal T3) LDRSB_ri 1111 1001 .001 1111 .... ............ @ldst_ri_lit } { - NOP 1111 1001 1001 ---- 1111 ------------ + PLI 1111 1001 1001 ---- 1111 ------------ # (immediate T1) LDRSB_ri 1111 1001 1001 .... .... ............ @ldst_ri_pos } LDRSB_ri 1111 1001 0001 .... .... 1..1 ........ @ldst_ri_idx { - NOP 1111 1001 0001 ---- 1111 1100 -------- + PLI 1111 1001 0001 ---- 1111 1100 -------- # (immediate T2) LDRSB_ri 1111 1001 0001 .... .... 1100 ........ @ldst_ri_neg } LDRSBT_ri 1111 1001 0001 .... .... 1110 ........ @ldst_ri_unp { - NOP 1111 1001 0001 ---- 1111 000000 -- ---- + PLI 1111 1001 0001 ---- 1111 000000 -- ---- # (register) LDRSB_rr 1111 1001 0001 .... .... 000000 .. .... @ldst_rr } } diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 4126aaa27e..9167e4d0bd 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -1314,6 +1314,67 @@ bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) } /* + * Expanders for AdvSIMD translation functions. + */ + +static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, + gen_helper_gvec_2 *fn) +{ + if (!a->q && a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); + } + return true; +} + +static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, + gen_helper_gvec_3 *fn) +{ + if (!a->q && a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); + } + return true; +} + +static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) +{ + if (!a->q && a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); + } + return true; +} + +static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) +{ + if (a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); + } + return true; +} + +static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) +{ + if (!a->q && a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); + } + return true; +} + +/* * This utility function is for doing register extension with an * optional shift. You will likely want to pass a temporary for the * destination register. See DecodeRegExtend() in the ARM ARM. @@ -4560,6 +4621,955 @@ static bool trans_EXTR(DisasContext *s, arg_extract *a) return true; } +/* + * Cryptographic AES, SHA, SHA512 + */ + +TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) +TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) +TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) +TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) + +TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) +TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) +TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) +TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) + +TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) +TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) +TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) + +TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) +TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) +TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) + +TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) +TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) +TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) +TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) +TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) +TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) +TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) + +TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) +TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) + +TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) +TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) + +static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) +{ + if (!dc_isar_feature(aa64_sm3, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i32 tcg_op3 = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + unsigned vsz, dofs; + + read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); + read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); + read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); + + tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); + tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); + tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); + tcg_gen_rotri_i32(tcg_res, tcg_res, 25); + + /* Clear the whole register first, then store bits [127:96]. */ + vsz = vec_full_reg_size(s); + dofs = vec_full_reg_offset(s, a->rd); + tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); + write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); + } + return true; +} + +static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) +{ + if (fp_access_check(s)) { + gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); + } + return true; +} +TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) +TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) +TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) +TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) + +static bool trans_XAR(DisasContext *s, arg_XAR *a) +{ + if (!dc_isar_feature(aa64_sha3, s)) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), a->imm, 16, + vec_full_reg_size(s)); + } + return true; +} + +/* + * Advanced SIMD copy + */ + +static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) +{ + unsigned esz = ctz32(imm); + if (esz <= MO_64) { + *pesz = esz; + *pidx = imm >> (esz + 1); + return true; + } + return false; +} + +static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) +{ + MemOp esz; + unsigned idx; + + if (!decode_esz_idx(a->imm, &esz, &idx)) { + return false; + } + if (fp_access_check(s)) { + /* + * This instruction just extracts the specified element and + * zero-extends it into the bottom of the destination register. + */ + TCGv_i64 tmp = tcg_temp_new_i64(); + read_vec_element(s, tmp, a->rn, idx, esz); + write_fp_dreg(s, a->rd, tmp); + } + return true; +} + +static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) +{ + MemOp esz; + unsigned idx; + + if (!decode_esz_idx(a->imm, &esz, &idx)) { + return false; + } + if (esz == MO_64 && !a->q) { + return false; + } + if (fp_access_check(s)) { + tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), + vec_reg_offset(s, a->rn, idx, esz), + a->q ? 16 : 8, vec_full_reg_size(s)); + } + return true; +} + +static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) +{ + MemOp esz; + unsigned idx; + + if (!decode_esz_idx(a->imm, &esz, &idx)) { + return false; + } + if (esz == MO_64 && !a->q) { + return false; + } + if (fp_access_check(s)) { + tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), + a->q ? 16 : 8, vec_full_reg_size(s), + cpu_reg(s, a->rn)); + } + return true; +} + +static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) +{ + MemOp esz; + unsigned idx; + + if (!decode_esz_idx(a->imm, &esz, &idx)) { + return false; + } + if (is_signed) { + if (esz == MO_64 || (esz == MO_32 && !a->q)) { + return false; + } + } else { + if (esz == MO_64 ? !a->q : a->q) { + return false; + } + } + if (fp_access_check(s)) { + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); + if (is_signed && !a->q) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + } + return true; +} + +TRANS(SMOV, do_smov_umov, a, MO_SIGN) +TRANS(UMOV, do_smov_umov, a, 0) + +static bool trans_INS_general(DisasContext *s, arg_INS_general *a) +{ + MemOp esz; + unsigned idx; + + if (!decode_esz_idx(a->imm, &esz, &idx)) { + return false; + } + if (fp_access_check(s)) { + write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); + clear_vec_high(s, true, a->rd); + } + return true; +} + +static bool trans_INS_element(DisasContext *s, arg_INS_element *a) +{ + MemOp esz; + unsigned didx, sidx; + + if (!decode_esz_idx(a->di, &esz, &didx)) { + return false; + } + sidx = a->si >> esz; + if (fp_access_check(s)) { + TCGv_i64 tmp = tcg_temp_new_i64(); + + read_vec_element(s, tmp, a->rn, sidx, esz); + write_vec_element(s, tmp, a->rd, didx, esz); + + /* INS is considered a 128-bit write for SVE. */ + clear_vec_high(s, true, a->rd); + } + return true; +} + +/* + * Advanced SIMD three same + */ + +typedef struct FPScalar { + void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); + void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); + void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); +} FPScalar; + +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t0 = read_fp_dreg(s, a->rn); + TCGv_i64 t1 = read_fp_dreg(s, a->rm); + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_dreg(s, a->rd, t0); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_sreg(s, a->rn); + TCGv_i32 t1 = read_fp_sreg(s, a->rm); + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_sreg(s, a->rd, t0); + } + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_hreg(s, a->rn); + TCGv_i32 t1 = read_fp_hreg(s, a->rm); + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); + write_fp_sreg(s, a->rd, t0); + } + break; + default: + return false; + } + return true; +} + +static const FPScalar f_scalar_fadd = { + gen_helper_vfp_addh, + gen_helper_vfp_adds, + gen_helper_vfp_addd, +}; +TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) + +static const FPScalar f_scalar_fsub = { + gen_helper_vfp_subh, + gen_helper_vfp_subs, + gen_helper_vfp_subd, +}; +TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) + +static const FPScalar f_scalar_fdiv = { + gen_helper_vfp_divh, + gen_helper_vfp_divs, + gen_helper_vfp_divd, +}; +TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) + +static const FPScalar f_scalar_fmul = { + gen_helper_vfp_mulh, + gen_helper_vfp_muls, + gen_helper_vfp_muld, +}; +TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) + +static const FPScalar f_scalar_fmax = { + gen_helper_advsimd_maxh, + gen_helper_vfp_maxs, + gen_helper_vfp_maxd, +}; +TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) + +static const FPScalar f_scalar_fmin = { + gen_helper_advsimd_minh, + gen_helper_vfp_mins, + gen_helper_vfp_mind, +}; +TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) + +static const FPScalar f_scalar_fmaxnm = { + gen_helper_advsimd_maxnumh, + gen_helper_vfp_maxnums, + gen_helper_vfp_maxnumd, +}; +TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) + +static const FPScalar f_scalar_fminnm = { + gen_helper_advsimd_minnumh, + gen_helper_vfp_minnums, + gen_helper_vfp_minnumd, +}; +TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) + +static const FPScalar f_scalar_fmulx = { + gen_helper_advsimd_mulxh, + gen_helper_vfp_mulxs, + gen_helper_vfp_mulxd, +}; +TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) + +static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_mulh(d, n, m, s); + gen_vfp_negh(d, d); +} + +static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_muls(d, n, m, s); + gen_vfp_negs(d, d); +} + +static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) +{ + gen_helper_vfp_muld(d, n, m, s); + gen_vfp_negd(d, d); +} + +static const FPScalar f_scalar_fnmul = { + gen_fnmul_h, + gen_fnmul_s, + gen_fnmul_d, +}; +TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) + +static const FPScalar f_scalar_fcmeq = { + gen_helper_advsimd_ceq_f16, + gen_helper_neon_ceq_f32, + gen_helper_neon_ceq_f64, +}; +TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) + +static const FPScalar f_scalar_fcmge = { + gen_helper_advsimd_cge_f16, + gen_helper_neon_cge_f32, + gen_helper_neon_cge_f64, +}; +TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) + +static const FPScalar f_scalar_fcmgt = { + gen_helper_advsimd_cgt_f16, + gen_helper_neon_cgt_f32, + gen_helper_neon_cgt_f64, +}; +TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) + +static const FPScalar f_scalar_facge = { + gen_helper_advsimd_acge_f16, + gen_helper_neon_acge_f32, + gen_helper_neon_acge_f64, +}; +TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) + +static const FPScalar f_scalar_facgt = { + gen_helper_advsimd_acgt_f16, + gen_helper_neon_acgt_f32, + gen_helper_neon_acgt_f64, +}; +TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) + +static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_subh(d, n, m, s); + gen_vfp_absh(d, d); +} + +static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_subs(d, n, m, s); + gen_vfp_abss(d, d); +} + +static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) +{ + gen_helper_vfp_subd(d, n, m, s); + gen_vfp_absd(d, d); +} + +static const FPScalar f_scalar_fabd = { + gen_fabd_h, + gen_fabd_s, + gen_fabd_d, +}; +TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) + +static const FPScalar f_scalar_frecps = { + gen_helper_recpsf_f16, + gen_helper_recpsf_f32, + gen_helper_recpsf_f64, +}; +TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) + +static const FPScalar f_scalar_frsqrts = { + gen_helper_rsqrtsf_f16, + gen_helper_rsqrtsf_f32, + gen_helper_rsqrtsf_f64, +}; +TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) + +static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, + gen_helper_gvec_3_ptr * const fns[3]) +{ + MemOp esz = a->esz; + + switch (esz) { + case MO_64: + if (!a->q) { + return false; + } + break; + case MO_32: + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + break; + default: + return false; + } + if (fp_access_check(s)) { + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, + esz == MO_16, 0, fns[esz - 1]); + } + return true; +} + +static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { + gen_helper_gvec_fadd_h, + gen_helper_gvec_fadd_s, + gen_helper_gvec_fadd_d, +}; +TRANS(FADD_v, do_fp3_vector, a, f_vector_fadd) + +static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { + gen_helper_gvec_fsub_h, + gen_helper_gvec_fsub_s, + gen_helper_gvec_fsub_d, +}; +TRANS(FSUB_v, do_fp3_vector, a, f_vector_fsub) + +static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { + gen_helper_gvec_fdiv_h, + gen_helper_gvec_fdiv_s, + gen_helper_gvec_fdiv_d, +}; +TRANS(FDIV_v, do_fp3_vector, a, f_vector_fdiv) + +static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { + gen_helper_gvec_fmul_h, + gen_helper_gvec_fmul_s, + gen_helper_gvec_fmul_d, +}; +TRANS(FMUL_v, do_fp3_vector, a, f_vector_fmul) + +static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { + gen_helper_gvec_fmax_h, + gen_helper_gvec_fmax_s, + gen_helper_gvec_fmax_d, +}; +TRANS(FMAX_v, do_fp3_vector, a, f_vector_fmax) + +static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { + gen_helper_gvec_fmin_h, + gen_helper_gvec_fmin_s, + gen_helper_gvec_fmin_d, +}; +TRANS(FMIN_v, do_fp3_vector, a, f_vector_fmin) + +static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { + gen_helper_gvec_fmaxnum_h, + gen_helper_gvec_fmaxnum_s, + gen_helper_gvec_fmaxnum_d, +}; +TRANS(FMAXNM_v, do_fp3_vector, a, f_vector_fmaxnm) + +static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { + gen_helper_gvec_fminnum_h, + gen_helper_gvec_fminnum_s, + gen_helper_gvec_fminnum_d, +}; +TRANS(FMINNM_v, do_fp3_vector, a, f_vector_fminnm) + +static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { + gen_helper_gvec_fmulx_h, + gen_helper_gvec_fmulx_s, + gen_helper_gvec_fmulx_d, +}; +TRANS(FMULX_v, do_fp3_vector, a, f_vector_fmulx) + +static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { + gen_helper_gvec_vfma_h, + gen_helper_gvec_vfma_s, + gen_helper_gvec_vfma_d, +}; +TRANS(FMLA_v, do_fp3_vector, a, f_vector_fmla) + +static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { + gen_helper_gvec_vfms_h, + gen_helper_gvec_vfms_s, + gen_helper_gvec_vfms_d, +}; +TRANS(FMLS_v, do_fp3_vector, a, f_vector_fmls) + +static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { + gen_helper_gvec_fceq_h, + gen_helper_gvec_fceq_s, + gen_helper_gvec_fceq_d, +}; +TRANS(FCMEQ_v, do_fp3_vector, a, f_vector_fcmeq) + +static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { + gen_helper_gvec_fcge_h, + gen_helper_gvec_fcge_s, + gen_helper_gvec_fcge_d, +}; +TRANS(FCMGE_v, do_fp3_vector, a, f_vector_fcmge) + +static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { + gen_helper_gvec_fcgt_h, + gen_helper_gvec_fcgt_s, + gen_helper_gvec_fcgt_d, +}; +TRANS(FCMGT_v, do_fp3_vector, a, f_vector_fcmgt) + +static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { + gen_helper_gvec_facge_h, + gen_helper_gvec_facge_s, + gen_helper_gvec_facge_d, +}; +TRANS(FACGE_v, do_fp3_vector, a, f_vector_facge) + +static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { + gen_helper_gvec_facgt_h, + gen_helper_gvec_facgt_s, + gen_helper_gvec_facgt_d, +}; +TRANS(FACGT_v, do_fp3_vector, a, f_vector_facgt) + +static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { + gen_helper_gvec_fabd_h, + gen_helper_gvec_fabd_s, + gen_helper_gvec_fabd_d, +}; +TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd) + +static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { + gen_helper_gvec_recps_h, + gen_helper_gvec_recps_s, + gen_helper_gvec_recps_d, +}; +TRANS(FRECPS_v, do_fp3_vector, a, f_vector_frecps) + +static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { + gen_helper_gvec_rsqrts_h, + gen_helper_gvec_rsqrts_s, + gen_helper_gvec_rsqrts_d, +}; +TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts) + +static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { + gen_helper_gvec_faddp_h, + gen_helper_gvec_faddp_s, + gen_helper_gvec_faddp_d, +}; +TRANS(FADDP_v, do_fp3_vector, a, f_vector_faddp) + +static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { + gen_helper_gvec_fmaxp_h, + gen_helper_gvec_fmaxp_s, + gen_helper_gvec_fmaxp_d, +}; +TRANS(FMAXP_v, do_fp3_vector, a, f_vector_fmaxp) + +static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { + gen_helper_gvec_fminp_h, + gen_helper_gvec_fminp_s, + gen_helper_gvec_fminp_d, +}; +TRANS(FMINP_v, do_fp3_vector, a, f_vector_fminp) + +static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { + gen_helper_gvec_fmaxnump_h, + gen_helper_gvec_fmaxnump_s, + gen_helper_gvec_fmaxnump_d, +}; +TRANS(FMAXNMP_v, do_fp3_vector, a, f_vector_fmaxnmp) + +static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { + gen_helper_gvec_fminnump_h, + gen_helper_gvec_fminnump_s, + gen_helper_gvec_fminnump_d, +}; +TRANS(FMINNMP_v, do_fp3_vector, a, f_vector_fminnmp) + +static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) +{ + if (fp_access_check(s)) { + int data = (is_2 << 1) | is_s; + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), tcg_env, + a->q ? 16 : 8, vec_full_reg_size(s), + data, gen_helper_gvec_fmlal_a64); + } + return true; +} + +TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) +TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) +TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) +TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) + +TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) +TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) +TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) +TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) +TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) + +TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) +TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) +TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) +TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) +TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) + +static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) +{ + if (fp_access_check(s)) { + gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); + } + return true; +} + +TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) +TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) +TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) + +/* + * Advanced SIMD scalar/vector x indexed element + */ + +static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t0 = read_fp_dreg(s, a->rn); + TCGv_i64 t1 = tcg_temp_new_i64(); + + read_vec_element(s, t1, a->rm, a->idx, MO_64); + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_dreg(s, a->rd, t0); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_sreg(s, a->rn); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_sreg(s, a->rd, t0); + } + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_hreg(s, a->rn); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); + write_fp_sreg(s, a->rd, t0); + } + break; + default: + g_assert_not_reached(); + } + return true; +} + +TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) +TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) + +static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t0 = read_fp_dreg(s, a->rd); + TCGv_i64 t1 = read_fp_dreg(s, a->rn); + TCGv_i64 t2 = tcg_temp_new_i64(); + + read_vec_element(s, t2, a->rm, a->idx, MO_64); + if (neg) { + gen_vfp_negd(t1, t1); + } + gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); + write_fp_dreg(s, a->rd, t0); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_sreg(s, a->rd); + TCGv_i32 t1 = read_fp_sreg(s, a->rn); + TCGv_i32 t2 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); + if (neg) { + gen_vfp_negs(t1, t1); + } + gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); + write_fp_sreg(s, a->rd, t0); + } + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_hreg(s, a->rd); + TCGv_i32 t1 = read_fp_hreg(s, a->rn); + TCGv_i32 t2 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); + if (neg) { + gen_vfp_negh(t1, t1); + } + gen_helper_advsimd_muladdh(t0, t1, t2, t0, + fpstatus_ptr(FPST_FPCR_F16)); + write_fp_sreg(s, a->rd, t0); + } + break; + default: + g_assert_not_reached(); + } + return true; +} + +TRANS(FMLA_si, do_fmla_scalar_idx, a, false) +TRANS(FMLS_si, do_fmla_scalar_idx, a, true) + +static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, + gen_helper_gvec_3_ptr * const fns[3]) +{ + MemOp esz = a->esz; + + switch (esz) { + case MO_64: + if (!a->q) { + return false; + } + break; + case MO_32: + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + break; + default: + g_assert_not_reached(); + } + if (fp_access_check(s)) { + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, + esz == MO_16, a->idx, fns[esz - 1]); + } + return true; +} + +static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { + gen_helper_gvec_fmul_idx_h, + gen_helper_gvec_fmul_idx_s, + gen_helper_gvec_fmul_idx_d, +}; +TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) + +static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { + gen_helper_gvec_fmulx_idx_h, + gen_helper_gvec_fmulx_idx_s, + gen_helper_gvec_fmulx_idx_d, +}; +TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) + +static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) +{ + static gen_helper_gvec_4_ptr * const fns[3] = { + gen_helper_gvec_fmla_idx_h, + gen_helper_gvec_fmla_idx_s, + gen_helper_gvec_fmla_idx_d, + }; + MemOp esz = a->esz; + + switch (esz) { + case MO_64: + if (!a->q) { + return false; + } + break; + case MO_32: + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + break; + default: + g_assert_not_reached(); + } + if (fp_access_check(s)) { + gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, + esz == MO_16, (a->idx << 1) | neg, + fns[esz - 1]); + } + return true; +} + +TRANS(FMLA_vi, do_fmla_vector_idx, a, false) +TRANS(FMLS_vi, do_fmla_vector_idx, a, true) + +static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) +{ + if (fp_access_check(s)) { + int data = (a->idx << 2) | (is_2 << 1) | is_s; + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), tcg_env, + a->q ? 16 : 8, vec_full_reg_size(s), + data, gen_helper_gvec_fmlal_idx_a64); + } + return true; +} + +TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) +TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) +TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) +TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) + +/* + * Advanced SIMD scalar pairwise + */ + +static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + read_vec_element(s, t0, a->rn, 0, MO_64); + read_vec_element(s, t1, a->rn, 1, MO_64); + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_dreg(s, a->rd, t0); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t0, a->rn, 0, MO_32); + read_vec_element_i32(s, t1, a->rn, 1, MO_32); + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_sreg(s, a->rd, t0); + } + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t0, a->rn, 0, MO_16); + read_vec_element_i32(s, t1, a->rn, 1, MO_16); + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); + write_fp_sreg(s, a->rd, t0); + } + break; + default: + g_assert_not_reached(); + } + return true; +} + +TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) +TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) +TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) +TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) +TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) + +static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) +{ + if (fp_access_check(s)) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + read_vec_element(s, t0, a->rn, 0, MO_64); + read_vec_element(s, t1, a->rn, 1, MO_64); + tcg_gen_add_i64(t0, t0, t1); + write_fp_dreg(s, a->rd, t0); + } + return true; +} + /* Shift a TCGv src by TCGv shift_amount, put result in dst. * Note that it is the caller's responsibility to ensure that the * shift amount is in range (ie 0..31 or 0..63) and provide the ARM @@ -6018,10 +7028,10 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) tcg_gen_mov_i32(tcg_res, tcg_op); break; case 0x1: /* FABS */ - tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); + gen_vfp_absh(tcg_res, tcg_op); break; case 0x2: /* FNEG */ - tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); + gen_vfp_negh(tcg_res, tcg_op); break; case 0x3: /* FSQRT */ fpst = fpstatus_ptr(FPST_FPCR_F16); @@ -6072,10 +7082,10 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) tcg_gen_mov_i32(tcg_res, tcg_op); goto done; case 0x1: /* FABS */ - gen_helper_vfp_abss(tcg_res, tcg_op); + gen_vfp_abss(tcg_res, tcg_op); goto done; case 0x2: /* FNEG */ - gen_helper_vfp_negs(tcg_res, tcg_op); + gen_vfp_negs(tcg_res, tcg_op); goto done; case 0x3: /* FSQRT */ gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); @@ -6147,10 +7157,10 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) switch (opcode) { case 0x1: /* FABS */ - gen_helper_vfp_absd(tcg_res, tcg_op); + gen_vfp_absd(tcg_res, tcg_op); goto done; case 0x2: /* FNEG */ - gen_helper_vfp_negd(tcg_res, tcg_op); + gen_vfp_negd(tcg_res, tcg_op); goto done; case 0x3: /* FSQRT */ gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); @@ -6359,200 +7369,6 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) } } -/* Floating-point data-processing (2 source) - single precision */ -static void handle_fp_2src_single(DisasContext *s, int opcode, - int rd, int rn, int rm) -{ - TCGv_i32 tcg_op1; - TCGv_i32 tcg_op2; - TCGv_i32 tcg_res; - TCGv_ptr fpst; - - tcg_res = tcg_temp_new_i32(); - fpst = fpstatus_ptr(FPST_FPCR); - tcg_op1 = read_fp_sreg(s, rn); - tcg_op2 = read_fp_sreg(s, rm); - - switch (opcode) { - case 0x0: /* FMUL */ - gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1: /* FDIV */ - gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2: /* FADD */ - gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3: /* FSUB */ - gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x4: /* FMAX */ - gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5: /* FMIN */ - gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x6: /* FMAXNM */ - gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7: /* FMINNM */ - gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x8: /* FNMUL */ - gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); - gen_helper_vfp_negs(tcg_res, tcg_res); - break; - } - - write_fp_sreg(s, rd, tcg_res); -} - -/* Floating-point data-processing (2 source) - double precision */ -static void handle_fp_2src_double(DisasContext *s, int opcode, - int rd, int rn, int rm) -{ - TCGv_i64 tcg_op1; - TCGv_i64 tcg_op2; - TCGv_i64 tcg_res; - TCGv_ptr fpst; - - tcg_res = tcg_temp_new_i64(); - fpst = fpstatus_ptr(FPST_FPCR); - tcg_op1 = read_fp_dreg(s, rn); - tcg_op2 = read_fp_dreg(s, rm); - - switch (opcode) { - case 0x0: /* FMUL */ - gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1: /* FDIV */ - gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2: /* FADD */ - gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3: /* FSUB */ - gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x4: /* FMAX */ - gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5: /* FMIN */ - gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x6: /* FMAXNM */ - gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7: /* FMINNM */ - gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x8: /* FNMUL */ - gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); - gen_helper_vfp_negd(tcg_res, tcg_res); - break; - } - - write_fp_dreg(s, rd, tcg_res); -} - -/* Floating-point data-processing (2 source) - half precision */ -static void handle_fp_2src_half(DisasContext *s, int opcode, - int rd, int rn, int rm) -{ - TCGv_i32 tcg_op1; - TCGv_i32 tcg_op2; - TCGv_i32 tcg_res; - TCGv_ptr fpst; - - tcg_res = tcg_temp_new_i32(); - fpst = fpstatus_ptr(FPST_FPCR_F16); - tcg_op1 = read_fp_hreg(s, rn); - tcg_op2 = read_fp_hreg(s, rm); - - switch (opcode) { - case 0x0: /* FMUL */ - gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1: /* FDIV */ - gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2: /* FADD */ - gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3: /* FSUB */ - gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x4: /* FMAX */ - gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5: /* FMIN */ - gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x6: /* FMAXNM */ - gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7: /* FMINNM */ - gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x8: /* FNMUL */ - gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); - tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); - break; - default: - g_assert_not_reached(); - } - - write_fp_sreg(s, rd, tcg_res); -} - -/* Floating point data-processing (2 source) - * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 - * +---+---+---+-----------+------+---+------+--------+-----+------+------+ - * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd | - * +---+---+---+-----------+------+---+------+--------+-----+------+------+ - */ -static void disas_fp_2src(DisasContext *s, uint32_t insn) -{ - int mos = extract32(insn, 29, 3); - int type = extract32(insn, 22, 2); - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rm = extract32(insn, 16, 5); - int opcode = extract32(insn, 12, 4); - - if (opcode > 8 || mos) { - unallocated_encoding(s); - return; - } - - switch (type) { - case 0: - if (!fp_access_check(s)) { - return; - } - handle_fp_2src_single(s, opcode, rd, rn, rm); - break; - case 1: - if (!fp_access_check(s)) { - return; - } - handle_fp_2src_double(s, opcode, rd, rn, rm); - break; - case 3: - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_fp_2src_half(s, opcode, rd, rn, rm); - break; - default: - unallocated_encoding(s); - } -} - /* Floating-point data-processing (3 source) - single precision */ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, int rd, int rn, int rm, int ra) @@ -6573,11 +7389,11 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, * flipped if it is a negated-input. */ if (o1 == true) { - gen_helper_vfp_negs(tcg_op3, tcg_op3); + gen_vfp_negs(tcg_op3, tcg_op3); } if (o0 != o1) { - gen_helper_vfp_negs(tcg_op1, tcg_op1); + gen_vfp_negs(tcg_op1, tcg_op1); } gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); @@ -6605,11 +7421,11 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, * flipped if it is a negated-input. */ if (o1 == true) { - gen_helper_vfp_negd(tcg_op3, tcg_op3); + gen_vfp_negd(tcg_op3, tcg_op3); } if (o0 != o1) { - gen_helper_vfp_negd(tcg_op1, tcg_op1); + gen_vfp_negd(tcg_op1, tcg_op1); } gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); @@ -7156,7 +7972,7 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn) break; case 2: /* Floating point data-processing (2 source) */ - disas_fp_2src(s, insn); + unallocated_encoding(s); /* in decodetree */ break; case 3: /* Floating point conditional select */ @@ -7618,268 +8434,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) write_fp_dreg(s, rd, tcg_res); } -/* DUP (Element, Vector) - * - * 31 30 29 21 20 16 15 10 9 5 4 0 - * +---+---+-------------------+--------+-------------+------+------+ - * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | - * +---+---+-------------------+--------+-------------+------+------+ - * - * size: encoded in imm5 (see ARM ARM LowestSetBit()) - */ -static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, - int imm5) -{ - int size = ctz32(imm5); - int index; - - if (size > 3 || (size == 3 && !is_q)) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - index = imm5 >> (size + 1); - tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd), - vec_reg_offset(s, rn, index, size), - is_q ? 16 : 8, vec_full_reg_size(s)); -} - -/* DUP (element, scalar) - * 31 21 20 16 15 10 9 5 4 0 - * +-----------------------+--------+-------------+------+------+ - * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | - * +-----------------------+--------+-------------+------+------+ - */ -static void handle_simd_dupes(DisasContext *s, int rd, int rn, - int imm5) -{ - int size = ctz32(imm5); - int index; - TCGv_i64 tmp; - - if (size > 3) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - index = imm5 >> (size + 1); - - /* This instruction just extracts the specified element and - * zero-extends it into the bottom of the destination register. - */ - tmp = tcg_temp_new_i64(); - read_vec_element(s, tmp, rn, index, size); - write_fp_dreg(s, rd, tmp); -} - -/* DUP (General) - * - * 31 30 29 21 20 16 15 10 9 5 4 0 - * +---+---+-------------------+--------+-------------+------+------+ - * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | - * +---+---+-------------------+--------+-------------+------+------+ - * - * size: encoded in imm5 (see ARM ARM LowestSetBit()) - */ -static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, - int imm5) -{ - int size = ctz32(imm5); - uint32_t dofs, oprsz, maxsz; - - if (size > 3 || ((size == 3) && !is_q)) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - dofs = vec_full_reg_offset(s, rd); - oprsz = is_q ? 16 : 8; - maxsz = vec_full_reg_size(s); - - tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn)); -} - -/* INS (Element) - * - * 31 21 20 16 15 14 11 10 9 5 4 0 - * +-----------------------+--------+------------+---+------+------+ - * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | - * +-----------------------+--------+------------+---+------+------+ - * - * size: encoded in imm5 (see ARM ARM LowestSetBit()) - * index: encoded in imm5<4:size+1> - */ -static void handle_simd_inse(DisasContext *s, int rd, int rn, - int imm4, int imm5) -{ - int size = ctz32(imm5); - int src_index, dst_index; - TCGv_i64 tmp; - - if (size > 3) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - dst_index = extract32(imm5, 1+size, 5); - src_index = extract32(imm4, size, 4); - - tmp = tcg_temp_new_i64(); - - read_vec_element(s, tmp, rn, src_index, size); - write_vec_element(s, tmp, rd, dst_index, size); - - /* INS is considered a 128-bit write for SVE. */ - clear_vec_high(s, true, rd); -} - - -/* INS (General) - * - * 31 21 20 16 15 10 9 5 4 0 - * +-----------------------+--------+-------------+------+------+ - * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | - * +-----------------------+--------+-------------+------+------+ - * - * size: encoded in imm5 (see ARM ARM LowestSetBit()) - * index: encoded in imm5<4:size+1> - */ -static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) -{ - int size = ctz32(imm5); - int idx; - - if (size > 3) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - idx = extract32(imm5, 1 + size, 4 - size); - write_vec_element(s, cpu_reg(s, rn), rd, idx, size); - - /* INS is considered a 128-bit write for SVE. */ - clear_vec_high(s, true, rd); -} - -/* - * UMOV (General) - * SMOV (General) - * - * 31 30 29 21 20 16 15 12 10 9 5 4 0 - * +---+---+-------------------+--------+-------------+------+------+ - * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | - * +---+---+-------------------+--------+-------------+------+------+ - * - * U: unsigned when set - * size: encoded in imm5 (see ARM ARM LowestSetBit()) - */ -static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, - int rn, int rd, int imm5) -{ - int size = ctz32(imm5); - int element; - TCGv_i64 tcg_rd; - - /* Check for UnallocatedEncodings */ - if (is_signed) { - if (size > 2 || (size == 2 && !is_q)) { - unallocated_encoding(s); - return; - } - } else { - if (size > 3 - || (size < 3 && is_q) - || (size == 3 && !is_q)) { - unallocated_encoding(s); - return; - } - } - - if (!fp_access_check(s)) { - return; - } - - element = extract32(imm5, 1+size, 4); - - tcg_rd = cpu_reg(s, rd); - read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); - if (is_signed && !is_q) { - tcg_gen_ext32u_i64(tcg_rd, tcg_rd); - } -} - -/* AdvSIMD copy - * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 - * +---+---+----+-----------------+------+---+------+---+------+------+ - * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | - * +---+---+----+-----------------+------+---+------+---+------+------+ - */ -static void disas_simd_copy(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int imm4 = extract32(insn, 11, 4); - int op = extract32(insn, 29, 1); - int is_q = extract32(insn, 30, 1); - int imm5 = extract32(insn, 16, 5); - - if (op) { - if (is_q) { - /* INS (element) */ - handle_simd_inse(s, rd, rn, imm4, imm5); - } else { - unallocated_encoding(s); - } - } else { - switch (imm4) { - case 0: - /* DUP (element - vector) */ - handle_simd_dupe(s, is_q, rd, rn, imm5); - break; - case 1: - /* DUP (general) */ - handle_simd_dupg(s, is_q, rd, rn, imm5); - break; - case 3: - if (is_q) { - /* INS (general) */ - handle_simd_insg(s, rd, rn, imm5); - } else { - unallocated_encoding(s); - } - break; - case 5: - case 7: - /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ - handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); - break; - default: - unallocated_encoding(s); - break; - } - } -} - /* AdvSIMD modified immediate * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ @@ -7904,27 +8458,31 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) bool is_q = extract32(insn, 30, 1); uint64_t imm = 0; - if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { - /* Check for FMOV (vector, immediate) - half-precision */ - if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { + if (o2) { + if (cmode != 0xf || is_neg) { unallocated_encoding(s); return; } - } - - if (!fp_access_check(s)) { - return; - } - - if (cmode == 15 && o2 && !is_neg) { /* FMOV (vector, immediate) - half-precision */ + if (!dc_isar_feature(aa64_fp16, s)) { + unallocated_encoding(s); + return; + } imm = vfp_expand_imm(MO_16, abcdefgh); /* now duplicate across the lanes */ imm = dup_const(MO_16, imm); } else { + if (cmode == 0xf && is_neg && !is_q) { + unallocated_encoding(s); + return; + } imm = asimd_imm_const(abcdefgh, cmode, is_neg); } + if (!fp_access_check(s)) { + return; + } + if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { /* MOVI or MVNI, with MVNI negation handled above. */ tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, @@ -7939,176 +8497,6 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) } } -/* AdvSIMD scalar copy - * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 - * +-----+----+-----------------+------+---+------+---+------+------+ - * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | - * +-----+----+-----------------+------+---+------+---+------+------+ - */ -static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int imm4 = extract32(insn, 11, 4); - int imm5 = extract32(insn, 16, 5); - int op = extract32(insn, 29, 1); - - if (op != 0 || imm4 != 0) { - unallocated_encoding(s); - return; - } - - /* DUP (element, scalar) */ - handle_simd_dupes(s, rd, rn, imm5); -} - -/* AdvSIMD scalar pairwise - * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 - * +-----+---+-----------+------+-----------+--------+-----+------+------+ - * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | - * +-----+---+-----------+------+-----------+--------+-----+------+------+ - */ -static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) -{ - int u = extract32(insn, 29, 1); - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - TCGv_ptr fpst; - - /* For some ops (the FP ones), size[1] is part of the encoding. - * For ADDP strictly it is not but size[1] is always 1 for valid - * encodings. - */ - opcode |= (extract32(size, 1, 1) << 5); - - switch (opcode) { - case 0x3b: /* ADDP */ - if (u || size != 3) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - - fpst = NULL; - break; - case 0xc: /* FMAXNMP */ - case 0xd: /* FADDP */ - case 0xf: /* FMAXP */ - case 0x2c: /* FMINNMP */ - case 0x2f: /* FMINP */ - /* FP op, size[0] is 32 or 64 bit*/ - if (!u) { - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } else { - size = MO_16; - } - } else { - size = extract32(size, 0, 1) ? MO_64 : MO_32; - } - - if (!fp_access_check(s)) { - return; - } - - fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - break; - default: - unallocated_encoding(s); - return; - } - - if (size == MO_64) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - read_vec_element(s, tcg_op1, rn, 0, MO_64); - read_vec_element(s, tcg_op2, rn, 1, MO_64); - - switch (opcode) { - case 0x3b: /* ADDP */ - tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); - break; - case 0xc: /* FMAXNMP */ - gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xd: /* FADDP */ - gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xf: /* FMAXP */ - gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2c: /* FMINNMP */ - gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2f: /* FMINP */ - gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - write_fp_dreg(s, rd, tcg_res); - } else { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op1, rn, 0, size); - read_vec_element_i32(s, tcg_op2, rn, 1, size); - - if (size == MO_16) { - switch (opcode) { - case 0xc: /* FMAXNMP */ - gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xd: /* FADDP */ - gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xf: /* FMAXP */ - gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2c: /* FMINNMP */ - gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2f: /* FMINP */ - gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - } else { - switch (opcode) { - case 0xc: /* FMAXNMP */ - gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xd: /* FADDP */ - gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xf: /* FMAXP */ - gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2c: /* FMINNMP */ - gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2f: /* FMINP */ - gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - } - - write_fp_sreg(s, rd, tcg_res); - } -} - /* * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) * @@ -8707,6 +9095,9 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, read_vec_element_i32(s, tcg_op, rn, pass, size); fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); if (is_scalar) { + if (size == MO_16 && !is_u) { + tcg_gen_ext16u_i32(tcg_op, tcg_op); + } write_fp_sreg(s, rd, tcg_op); } else { write_vec_element_i32(s, tcg_op, rd, pass, size); @@ -8972,183 +9363,6 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, } } -/* Handle the 3-same-operands float operations; shared by the scalar - * and vector encodings. The caller must filter out any encodings - * not allocated for the encoding it is dealing with. - */ -static void handle_3same_float(DisasContext *s, int size, int elements, - int fpopcode, int rd, int rn, int rm) -{ - int pass; - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); - - for (pass = 0; pass < elements; pass++) { - if (size) { - /* Double */ - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - - switch (fpopcode) { - case 0x39: /* FMLS */ - /* As usual for ARM, separate negation for fused multiply-add */ - gen_helper_vfp_negd(tcg_op1, tcg_op1); - /* fall through */ - case 0x19: /* FMLA */ - read_vec_element(s, tcg_res, rd, pass, MO_64); - gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, - tcg_res, fpst); - break; - case 0x18: /* FMAXNM */ - gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1a: /* FADD */ - gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1b: /* FMULX */ - gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1c: /* FCMEQ */ - gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1e: /* FMAX */ - gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1f: /* FRECPS */ - gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x38: /* FMINNM */ - gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3a: /* FSUB */ - gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3e: /* FMIN */ - gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3f: /* FRSQRTS */ - gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5b: /* FMUL */ - gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5c: /* FCMGE */ - gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5d: /* FACGE */ - gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5f: /* FDIV */ - gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7a: /* FABD */ - gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); - gen_helper_vfp_absd(tcg_res, tcg_res); - break; - case 0x7c: /* FCMGT */ - gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7d: /* FACGT */ - gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - write_vec_element(s, tcg_res, rd, pass, MO_64); - } else { - /* Single */ - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); - read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); - - switch (fpopcode) { - case 0x39: /* FMLS */ - /* As usual for ARM, separate negation for fused multiply-add */ - gen_helper_vfp_negs(tcg_op1, tcg_op1); - /* fall through */ - case 0x19: /* FMLA */ - read_vec_element_i32(s, tcg_res, rd, pass, MO_32); - gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, - tcg_res, fpst); - break; - case 0x1a: /* FADD */ - gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1b: /* FMULX */ - gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1c: /* FCMEQ */ - gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1e: /* FMAX */ - gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1f: /* FRECPS */ - gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x18: /* FMAXNM */ - gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x38: /* FMINNM */ - gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3a: /* FSUB */ - gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3e: /* FMIN */ - gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3f: /* FRSQRTS */ - gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5b: /* FMUL */ - gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5c: /* FCMGE */ - gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5d: /* FACGE */ - gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5f: /* FDIV */ - gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7a: /* FABD */ - gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); - gen_helper_vfp_abss(tcg_res, tcg_res); - break; - case 0x7c: /* FCMGT */ - gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7d: /* FACGT */ - gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - if (elements == 1) { - /* scalar single so clear high part */ - TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - - tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); - write_vec_element(s, tcg_tmp, rd, pass, MO_64); - } else { - write_vec_element_i32(s, tcg_res, rd, pass, MO_32); - } - } - } - - clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd); -} - /* AdvSIMD scalar three same * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 * +-----+---+-----------+------+---+------+--------+---+------+------+ @@ -9165,33 +9379,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) bool u = extract32(insn, 29, 1); TCGv_i64 tcg_rd; - if (opcode >= 0x18) { - /* Floating point: U, size[1] and opcode indicate operation */ - int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); - switch (fpopcode) { - case 0x1b: /* FMULX */ - case 0x1f: /* FRECPS */ - case 0x3f: /* FRSQRTS */ - case 0x5d: /* FACGE */ - case 0x7d: /* FACGT */ - case 0x1c: /* FCMEQ */ - case 0x5c: /* FCMGE */ - case 0x7c: /* FCMGT */ - case 0x7a: /* FABD */ - break; - default: - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); - return; - } - switch (opcode) { case 0x1: /* SQADD, UQADD */ case 0x5: /* SQSUB, UQSUB */ @@ -9308,95 +9495,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) write_fp_dreg(s, rd, tcg_rd); } -/* AdvSIMD scalar three same FP16 - * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 - * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ - * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | - * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ - * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400 - * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400 - */ -static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, - uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 11, 3); - int rm = extract32(insn, 16, 5); - bool u = extract32(insn, 29, 1); - bool a = extract32(insn, 23, 1); - int fpopcode = opcode | (a << 3) | (u << 4); - TCGv_ptr fpst; - TCGv_i32 tcg_op1; - TCGv_i32 tcg_op2; - TCGv_i32 tcg_res; - - switch (fpopcode) { - case 0x03: /* FMULX */ - case 0x04: /* FCMEQ (reg) */ - case 0x07: /* FRECPS */ - case 0x0f: /* FRSQRTS */ - case 0x14: /* FCMGE (reg) */ - case 0x15: /* FACGE */ - case 0x1a: /* FABD */ - case 0x1c: /* FCMGT (reg) */ - case 0x1d: /* FACGT */ - break; - default: - unallocated_encoding(s); - return; - } - - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - } - - if (!fp_access_check(s)) { - return; - } - - fpst = fpstatus_ptr(FPST_FPCR_F16); - - tcg_op1 = read_fp_hreg(s, rn); - tcg_op2 = read_fp_hreg(s, rm); - tcg_res = tcg_temp_new_i32(); - - switch (fpopcode) { - case 0x03: /* FMULX */ - gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x04: /* FCMEQ (reg) */ - gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x07: /* FRECPS */ - gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x0f: /* FRSQRTS */ - gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x14: /* FCMGE (reg) */ - gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x15: /* FACGE */ - gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1a: /* FABD */ - gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); - tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); - break; - case 0x1c: /* FCMGT (reg) */ - gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1d: /* FACGT */ - gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - write_fp_sreg(s, rd, tcg_res); -} - /* AdvSIMD scalar three same extra * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ @@ -9528,10 +9626,10 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, } break; case 0x2f: /* FABS */ - gen_helper_vfp_absd(tcg_rd, tcg_rn); + gen_vfp_absd(tcg_rd, tcg_rn); break; case 0x6f: /* FNEG */ - gen_helper_vfp_negd(tcg_rd, tcg_rn); + gen_vfp_negd(tcg_rd, tcg_rn); break; case 0x7f: /* FSQRT */ gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env); @@ -10821,284 +10919,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } -/* Logic op (opcode == 3) subgroup of C3.6.16. */ -static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 22, 2); - bool is_u = extract32(insn, 29, 1); - bool is_q = extract32(insn, 30, 1); - - if (!fp_access_check(s)) { - return; - } - - switch (size + 4 * is_u) { - case 0: /* AND */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0); - return; - case 1: /* BIC */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); - return; - case 2: /* ORR */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); - return; - case 3: /* ORN */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); - return; - case 4: /* EOR */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0); - return; - - case 5: /* BSL bitwise select */ - gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0); - return; - case 6: /* BIT, bitwise insert if true */ - gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0); - return; - case 7: /* BIF, bitwise insert if false */ - gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0); - return; - - default: - g_assert_not_reached(); - } -} - -/* Pairwise op subgroup of C3.6.16. - * - * This is called directly or via the handle_3same_float for float pairwise - * operations where the opcode and size are calculated differently. - */ -static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, - int size, int rn, int rm, int rd) -{ - TCGv_ptr fpst; - int pass; - - /* Floating point operations need fpst */ - if (opcode >= 0x58) { - fpst = fpstatus_ptr(FPST_FPCR); - } else { - fpst = NULL; - } - - if (!fp_access_check(s)) { - return; - } - - /* These operations work on the concatenated rm:rn, with each pair of - * adjacent elements being operated on to produce an element in the result. - */ - if (size == 3) { - TCGv_i64 tcg_res[2]; - - for (pass = 0; pass < 2; pass++) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - int passreg = (pass == 0) ? rn : rm; - - read_vec_element(s, tcg_op1, passreg, 0, MO_64); - read_vec_element(s, tcg_op2, passreg, 1, MO_64); - tcg_res[pass] = tcg_temp_new_i64(); - - switch (opcode) { - case 0x17: /* ADDP */ - tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); - break; - case 0x58: /* FMAXNMP */ - gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x5a: /* FADDP */ - gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x5e: /* FMAXP */ - gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x78: /* FMINNMP */ - gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x7e: /* FMINP */ - gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - } - - for (pass = 0; pass < 2; pass++) { - write_vec_element(s, tcg_res[pass], rd, pass, MO_64); - } - } else { - int maxpass = is_q ? 4 : 2; - TCGv_i32 tcg_res[4]; - - for (pass = 0; pass < maxpass; pass++) { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - NeonGenTwoOpFn *genfn = NULL; - int passreg = pass < (maxpass / 2) ? rn : rm; - int passelt = (is_q && (pass & 1)) ? 2 : 0; - - read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); - read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); - tcg_res[pass] = tcg_temp_new_i32(); - - switch (opcode) { - case 0x17: /* ADDP */ - { - static NeonGenTwoOpFn * const fns[3] = { - gen_helper_neon_padd_u8, - gen_helper_neon_padd_u16, - tcg_gen_add_i32, - }; - genfn = fns[size]; - break; - } - case 0x14: /* SMAXP, UMAXP */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, - { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, - { tcg_gen_smax_i32, tcg_gen_umax_i32 }, - }; - genfn = fns[size][u]; - break; - } - case 0x15: /* SMINP, UMINP */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, - { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, - { tcg_gen_smin_i32, tcg_gen_umin_i32 }, - }; - genfn = fns[size][u]; - break; - } - /* The FP operations are all on single floats (32 bit) */ - case 0x58: /* FMAXNMP */ - gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x5a: /* FADDP */ - gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x5e: /* FMAXP */ - gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x78: /* FMINNMP */ - gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x7e: /* FMINP */ - gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - /* FP ops called directly, otherwise call now */ - if (genfn) { - genfn(tcg_res[pass], tcg_op1, tcg_op2); - } - } - - for (pass = 0; pass < maxpass; pass++) { - write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); - } - clear_vec_high(s, is_q, rd); - } -} - -/* Floating point op subgroup of C3.6.16. */ -static void disas_simd_3same_float(DisasContext *s, uint32_t insn) -{ - /* For floating point ops, the U, size[1] and opcode bits - * together indicate the operation. size[0] indicates single - * or double. - */ - int fpopcode = extract32(insn, 11, 5) - | (extract32(insn, 23, 1) << 5) - | (extract32(insn, 29, 1) << 6); - int is_q = extract32(insn, 30, 1); - int size = extract32(insn, 22, 1); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - - int datasize = is_q ? 128 : 64; - int esize = 32 << size; - int elements = datasize / esize; - - if (size == 1 && !is_q) { - unallocated_encoding(s); - return; - } - - switch (fpopcode) { - case 0x58: /* FMAXNMP */ - case 0x5a: /* FADDP */ - case 0x5e: /* FMAXP */ - case 0x78: /* FMINNMP */ - case 0x7e: /* FMINP */ - if (size && !is_q) { - unallocated_encoding(s); - return; - } - handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, - rn, rm, rd); - return; - case 0x1b: /* FMULX */ - case 0x1f: /* FRECPS */ - case 0x3f: /* FRSQRTS */ - case 0x5d: /* FACGE */ - case 0x7d: /* FACGT */ - case 0x19: /* FMLA */ - case 0x39: /* FMLS */ - case 0x18: /* FMAXNM */ - case 0x1a: /* FADD */ - case 0x1c: /* FCMEQ */ - case 0x1e: /* FMAX */ - case 0x38: /* FMINNM */ - case 0x3a: /* FSUB */ - case 0x3e: /* FMIN */ - case 0x5b: /* FMUL */ - case 0x5c: /* FCMGE */ - case 0x5f: /* FDIV */ - case 0x7a: /* FABD */ - case 0x7c: /* FCMGT */ - if (!fp_access_check(s)) { - return; - } - handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); - return; - - case 0x1d: /* FMLAL */ - case 0x3d: /* FMLSL */ - case 0x59: /* FMLAL2 */ - case 0x79: /* FMLSL2 */ - if (size & 1 || !dc_isar_feature(aa64_fhm, s)) { - unallocated_encoding(s); - return; - } - if (fp_access_check(s)) { - int is_s = extract32(insn, 23, 1); - int is_2 = extract32(insn, 29, 1); - int data = (is_2 << 1) | is_s; - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), tcg_env, - is_q ? 16 : 8, vec_full_reg_size(s), - data, gen_helper_gvec_fmlal_a64); - } - return; - - default: - unallocated_encoding(s); - return; - } -} - /* Integer op subgroup of C3.6.16. */ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) { @@ -11364,244 +11184,17 @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) int opcode = extract32(insn, 11, 5); switch (opcode) { - case 0x3: /* logic ops */ - disas_simd_3same_logic(s, insn); - break; - case 0x17: /* ADDP */ - case 0x14: /* SMAXP, UMAXP */ - case 0x15: /* SMINP, UMINP */ - { - /* Pairwise operations */ - int is_q = extract32(insn, 30, 1); - int u = extract32(insn, 29, 1); - int size = extract32(insn, 22, 2); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - if (opcode == 0x17) { - if (u || (size == 3 && !is_q)) { - unallocated_encoding(s); - return; - } - } else { - if (size == 3) { - unallocated_encoding(s); - return; - } - } - handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); - break; - } - case 0x18 ... 0x31: - /* floating point ops, sz[1] and U are part of opcode */ - disas_simd_3same_float(s, insn); - break; default: disas_simd_3same_int(s, insn); break; - } -} - -/* - * Advanced SIMD three same (ARMv8.2 FP16 variants) - * - * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 - * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ - * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | - * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ - * - * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE - * (register), FACGE, FABD, FCMGT (register) and FACGT. - * - */ -static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) -{ - int opcode = extract32(insn, 11, 3); - int u = extract32(insn, 29, 1); - int a = extract32(insn, 23, 1); - int is_q = extract32(insn, 30, 1); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - /* - * For these floating point ops, the U, a and opcode bits - * together indicate the operation. - */ - int fpopcode = opcode | (a << 3) | (u << 4); - int datasize = is_q ? 128 : 64; - int elements = datasize / 16; - bool pairwise; - TCGv_ptr fpst; - int pass; - - switch (fpopcode) { - case 0x0: /* FMAXNM */ - case 0x1: /* FMLA */ - case 0x2: /* FADD */ - case 0x3: /* FMULX */ - case 0x4: /* FCMEQ */ - case 0x6: /* FMAX */ - case 0x7: /* FRECPS */ - case 0x8: /* FMINNM */ - case 0x9: /* FMLS */ - case 0xa: /* FSUB */ - case 0xe: /* FMIN */ - case 0xf: /* FRSQRTS */ - case 0x13: /* FMUL */ - case 0x14: /* FCMGE */ - case 0x15: /* FACGE */ - case 0x17: /* FDIV */ - case 0x1a: /* FABD */ - case 0x1c: /* FCMGT */ - case 0x1d: /* FACGT */ - pairwise = false; - break; - case 0x10: /* FMAXNMP */ - case 0x12: /* FADDP */ - case 0x16: /* FMAXP */ - case 0x18: /* FMINNMP */ - case 0x1e: /* FMINP */ - pairwise = true; - break; - default: - unallocated_encoding(s); - return; - } - - if (!dc_isar_feature(aa64_fp16, s)) { + case 0x3: /* logic ops */ + case 0x14: /* SMAXP, UMAXP */ + case 0x15: /* SMINP, UMINP */ + case 0x17: /* ADDP */ + case 0x18 ... 0x31: /* floating point ops */ unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - fpst = fpstatus_ptr(FPST_FPCR_F16); - - if (pairwise) { - int maxpass = is_q ? 8 : 4; - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i32 tcg_res[8]; - - for (pass = 0; pass < maxpass; pass++) { - int passreg = pass < (maxpass / 2) ? rn : rm; - int passelt = (pass << 1) & (maxpass - 1); - - read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16); - read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16); - tcg_res[pass] = tcg_temp_new_i32(); - - switch (fpopcode) { - case 0x10: /* FMAXNMP */ - gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2, - fpst); - break; - case 0x12: /* FADDP */ - gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x16: /* FMAXP */ - gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x18: /* FMINNMP */ - gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2, - fpst); - break; - case 0x1e: /* FMINP */ - gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - } - - for (pass = 0; pass < maxpass; pass++) { - write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16); - } - } else { - for (pass = 0; pass < elements; pass++) { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op1, rn, pass, MO_16); - read_vec_element_i32(s, tcg_op2, rm, pass, MO_16); - - switch (fpopcode) { - case 0x0: /* FMAXNM */ - gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1: /* FMLA */ - read_vec_element_i32(s, tcg_res, rd, pass, MO_16); - gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, - fpst); - break; - case 0x2: /* FADD */ - gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3: /* FMULX */ - gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x4: /* FCMEQ */ - gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x6: /* FMAX */ - gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7: /* FRECPS */ - gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x8: /* FMINNM */ - gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x9: /* FMLS */ - /* As usual for ARM, separate negation for fused multiply-add */ - tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); - read_vec_element_i32(s, tcg_res, rd, pass, MO_16); - gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, - fpst); - break; - case 0xa: /* FSUB */ - gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xe: /* FMIN */ - gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xf: /* FRSQRTS */ - gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x13: /* FMUL */ - gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x14: /* FCMGE */ - gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x15: /* FACGE */ - gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x17: /* FDIV */ - gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1a: /* FABD */ - gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); - tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); - break; - case 0x1c: /* FCMGT */ - gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1d: /* FACGT */ - gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - write_vec_element_i32(s, tcg_res, rd, pass, MO_16); - } + break; } - - clear_vec_high(s, is_q, rd); } /* AdvSIMD three same extra @@ -12372,10 +11965,10 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } break; case 0x2f: /* FABS */ - gen_helper_vfp_abss(tcg_res, tcg_op); + gen_vfp_abss(tcg_res, tcg_op); break; case 0x6f: /* FNEG */ - gen_helper_vfp_negs(tcg_res, tcg_op); + gen_vfp_negs(tcg_res, tcg_op); break; case 0x7f: /* FSQRT */ gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); @@ -12810,12 +12403,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x0c: /* SQDMULH */ case 0x0d: /* SQRDMULH */ break; - case 0x01: /* FMLA */ - case 0x05: /* FMLS */ - case 0x09: /* FMUL */ - case 0x19: /* FMULX */ - is_fp = 1; - break; case 0x1d: /* SQRDMLAH */ case 0x1f: /* SQRDMLSH */ if (!dc_isar_feature(aa64_rdm, s)) { @@ -12870,38 +12457,23 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } is_fp = 2; break; + default: case 0x00: /* FMLAL */ + case 0x01: /* FMLA */ case 0x04: /* FMLSL */ + case 0x05: /* FMLS */ + case 0x09: /* FMUL */ case 0x18: /* FMLAL2 */ + case 0x19: /* FMULX */ case 0x1c: /* FMLSL2 */ - if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) { - unallocated_encoding(s); - return; - } - size = MO_16; - /* is_fp, but we pass tcg_env not fp_status. */ - break; - default: unallocated_encoding(s); return; } switch (is_fp) { case 1: /* normal fp */ - /* convert insn encoded size to MemOp size */ - switch (size) { - case 0: /* half-precision */ - size = MO_16; - is_fp16 = true; - break; - case MO_32: /* single precision */ - case MO_64: /* double precision */ - break; - default: - unallocated_encoding(s); - return; - } - break; + unallocated_encoding(s); /* in decodetree */ + return; case 2: /* complex fp */ /* Each indexable element is a complex pair. */ @@ -13012,22 +12584,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } return; - case 0x00: /* FMLAL */ - case 0x04: /* FMLSL */ - case 0x18: /* FMLAL2 */ - case 0x1c: /* FMLSL2 */ - { - int is_s = extract32(opcode, 2, 1); - int is_2 = u; - int data = (index << 2) | (is_2 << 1) | is_s; - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), tcg_env, - is_q ? 16 : 8, vec_full_reg_size(s), - data, gen_helper_gvec_fmlal_idx_a64); - } - return; - case 0x08: /* MUL */ if (!is_long && !is_scalar) { static gen_helper_gvec_3 * const fns[3] = { @@ -13080,42 +12636,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } if (size == 3) { - TCGv_i64 tcg_idx = tcg_temp_new_i64(); - int pass; - - assert(is_fp && is_q && !is_long); - - read_vec_element(s, tcg_idx, rm, index, MO_64); - - for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - read_vec_element(s, tcg_op, rn, pass, MO_64); - - switch (16 * u + opcode) { - case 0x05: /* FMLS */ - /* As usual for ARM, separate negation for fused multiply-add */ - gen_helper_vfp_negd(tcg_op, tcg_op); - /* fall through */ - case 0x01: /* FMLA */ - read_vec_element(s, tcg_res, rd, pass, MO_64); - gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); - break; - case 0x09: /* FMUL */ - gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); - break; - case 0x19: /* FMULX */ - gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); - break; - default: - g_assert_not_reached(); - } - - write_vec_element(s, tcg_res, rd, pass, MO_64); - } - - clear_vec_high(s, !is_scalar, rd); + g_assert_not_reached(); } else if (!is_long) { /* 32 bit floating point, or 16 or 32 bit integer. * For the 16 bit scalar case we use the usual Neon helpers and @@ -13171,74 +12692,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) genfn(tcg_res, tcg_op, tcg_res); break; } - case 0x05: /* FMLS */ - case 0x01: /* FMLA */ - read_vec_element_i32(s, tcg_res, rd, pass, - is_scalar ? size : MO_32); - switch (size) { - case 1: - if (opcode == 0x5) { - /* As usual for ARM, separate negation for fused - * multiply-add */ - tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000); - } - if (is_scalar) { - gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx, - tcg_res, fpst); - } else { - gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx, - tcg_res, fpst); - } - break; - case 2: - if (opcode == 0x5) { - /* As usual for ARM, separate negation for - * fused multiply-add */ - tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000); - } - gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, - tcg_res, fpst); - break; - default: - g_assert_not_reached(); - } - break; - case 0x09: /* FMUL */ - switch (size) { - case 1: - if (is_scalar) { - gen_helper_advsimd_mulh(tcg_res, tcg_op, - tcg_idx, fpst); - } else { - gen_helper_advsimd_mul2h(tcg_res, tcg_op, - tcg_idx, fpst); - } - break; - case 2: - gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); - break; - default: - g_assert_not_reached(); - } - break; - case 0x19: /* FMULX */ - switch (size) { - case 1: - if (is_scalar) { - gen_helper_advsimd_mulxh(tcg_res, tcg_op, - tcg_idx, fpst); - } else { - gen_helper_advsimd_mulx2h(tcg_res, tcg_op, - tcg_idx, fpst); - } - break; - case 2: - gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); - break; - default: - g_assert_not_reached(); - } - break; case 0x0c: /* SQDMULH */ if (size == 1) { gen_helper_neon_qdmulh_s16(tcg_res, tcg_env, @@ -13280,6 +12733,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } break; default: + case 0x01: /* FMLA */ + case 0x05: /* FMLS */ + case 0x09: /* FMUL */ + case 0x19: /* FMULX */ g_assert_not_reached(); } @@ -13453,461 +12910,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } } -/* Crypto AES - * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 - * +-----------------+------+-----------+--------+-----+------+------+ - * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | - * +-----------------+------+-----------+--------+-----+------+------+ - */ -static void disas_crypto_aes(DisasContext *s, uint32_t insn) -{ - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - gen_helper_gvec_2 *genfn2 = NULL; - gen_helper_gvec_3 *genfn3 = NULL; - - if (!dc_isar_feature(aa64_aes, s) || size != 0) { - unallocated_encoding(s); - return; - } - - switch (opcode) { - case 0x4: /* AESE */ - genfn3 = gen_helper_crypto_aese; - break; - case 0x6: /* AESMC */ - genfn2 = gen_helper_crypto_aesmc; - break; - case 0x5: /* AESD */ - genfn3 = gen_helper_crypto_aesd; - break; - case 0x7: /* AESIMC */ - genfn2 = gen_helper_crypto_aesimc; - break; - default: - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - if (genfn2) { - gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2); - } else { - gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3); - } -} - -/* Crypto three-reg SHA - * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 - * +-----------------+------+---+------+---+--------+-----+------+------+ - * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | - * +-----------------+------+---+------+---+--------+-----+------+------+ - */ -static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) -{ - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 3); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - gen_helper_gvec_3 *genfn; - bool feature; - - if (size != 0) { - unallocated_encoding(s); - return; - } - - switch (opcode) { - case 0: /* SHA1C */ - genfn = gen_helper_crypto_sha1c; - feature = dc_isar_feature(aa64_sha1, s); - break; - case 1: /* SHA1P */ - genfn = gen_helper_crypto_sha1p; - feature = dc_isar_feature(aa64_sha1, s); - break; - case 2: /* SHA1M */ - genfn = gen_helper_crypto_sha1m; - feature = dc_isar_feature(aa64_sha1, s); - break; - case 3: /* SHA1SU0 */ - genfn = gen_helper_crypto_sha1su0; - feature = dc_isar_feature(aa64_sha1, s); - break; - case 4: /* SHA256H */ - genfn = gen_helper_crypto_sha256h; - feature = dc_isar_feature(aa64_sha256, s); - break; - case 5: /* SHA256H2 */ - genfn = gen_helper_crypto_sha256h2; - feature = dc_isar_feature(aa64_sha256, s); - break; - case 6: /* SHA256SU1 */ - genfn = gen_helper_crypto_sha256su1; - feature = dc_isar_feature(aa64_sha256, s); - break; - default: - unallocated_encoding(s); - return; - } - - if (!feature) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); -} - -/* Crypto two-reg SHA - * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 - * +-----------------+------+-----------+--------+-----+------+------+ - * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | - * +-----------------+------+-----------+--------+-----+------+------+ - */ -static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) -{ - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - gen_helper_gvec_2 *genfn; - bool feature; - - if (size != 0) { - unallocated_encoding(s); - return; - } - - switch (opcode) { - case 0: /* SHA1H */ - feature = dc_isar_feature(aa64_sha1, s); - genfn = gen_helper_crypto_sha1h; - break; - case 1: /* SHA1SU1 */ - feature = dc_isar_feature(aa64_sha1, s); - genfn = gen_helper_crypto_sha1su1; - break; - case 2: /* SHA256SU0 */ - feature = dc_isar_feature(aa64_sha256, s); - genfn = gen_helper_crypto_sha256su0; - break; - default: - unallocated_encoding(s); - return; - } - - if (!feature) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); -} - -static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) -{ - tcg_gen_rotli_i64(d, m, 1); - tcg_gen_xor_i64(d, d, n); -} - -static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) -{ - tcg_gen_rotli_vec(vece, d, m, 1); - tcg_gen_xor_vec(vece, d, d, n); -} - -void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; - static const GVecGen3 op = { - .fni8 = gen_rax1_i64, - .fniv = gen_rax1_vec, - .opt_opc = vecop_list, - .fno = gen_helper_crypto_rax1, - .vece = MO_64, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); -} - -/* Crypto three-reg SHA512 - * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 - * +-----------------------+------+---+---+-----+--------+------+------+ - * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd | - * +-----------------------+------+---+---+-----+--------+------+------+ - */ -static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) -{ - int opcode = extract32(insn, 10, 2); - int o = extract32(insn, 14, 1); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - bool feature; - gen_helper_gvec_3 *oolfn = NULL; - GVecGen3Fn *gvecfn = NULL; - - if (o == 0) { - switch (opcode) { - case 0: /* SHA512H */ - feature = dc_isar_feature(aa64_sha512, s); - oolfn = gen_helper_crypto_sha512h; - break; - case 1: /* SHA512H2 */ - feature = dc_isar_feature(aa64_sha512, s); - oolfn = gen_helper_crypto_sha512h2; - break; - case 2: /* SHA512SU1 */ - feature = dc_isar_feature(aa64_sha512, s); - oolfn = gen_helper_crypto_sha512su1; - break; - case 3: /* RAX1 */ - feature = dc_isar_feature(aa64_sha3, s); - gvecfn = gen_gvec_rax1; - break; - default: - g_assert_not_reached(); - } - } else { - switch (opcode) { - case 0: /* SM3PARTW1 */ - feature = dc_isar_feature(aa64_sm3, s); - oolfn = gen_helper_crypto_sm3partw1; - break; - case 1: /* SM3PARTW2 */ - feature = dc_isar_feature(aa64_sm3, s); - oolfn = gen_helper_crypto_sm3partw2; - break; - case 2: /* SM4EKEY */ - feature = dc_isar_feature(aa64_sm4, s); - oolfn = gen_helper_crypto_sm4ekey; - break; - default: - unallocated_encoding(s); - return; - } - } - - if (!feature) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - if (oolfn) { - gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); - } else { - gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); - } -} - -/* Crypto two-reg SHA512 - * 31 12 11 10 9 5 4 0 - * +-----------------------------------------+--------+------+------+ - * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd | - * +-----------------------------------------+--------+------+------+ - */ -static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) -{ - int opcode = extract32(insn, 10, 2); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - bool feature; - - switch (opcode) { - case 0: /* SHA512SU0 */ - feature = dc_isar_feature(aa64_sha512, s); - break; - case 1: /* SM4E */ - feature = dc_isar_feature(aa64_sm4, s); - break; - default: - unallocated_encoding(s); - return; - } - - if (!feature) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - switch (opcode) { - case 0: /* SHA512SU0 */ - gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); - break; - case 1: /* SM4E */ - gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); - break; - default: - g_assert_not_reached(); - } -} - -/* Crypto four-register - * 31 23 22 21 20 16 15 14 10 9 5 4 0 - * +-------------------+-----+------+---+------+------+------+ - * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | - * +-------------------+-----+------+---+------+------+------+ - */ -static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) -{ - int op0 = extract32(insn, 21, 2); - int rm = extract32(insn, 16, 5); - int ra = extract32(insn, 10, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - bool feature; - - switch (op0) { - case 0: /* EOR3 */ - case 1: /* BCAX */ - feature = dc_isar_feature(aa64_sha3, s); - break; - case 2: /* SM3SS1 */ - feature = dc_isar_feature(aa64_sm3, s); - break; - default: - unallocated_encoding(s); - return; - } - - if (!feature) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - if (op0 < 2) { - TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; - int pass; - - tcg_op1 = tcg_temp_new_i64(); - tcg_op2 = tcg_temp_new_i64(); - tcg_op3 = tcg_temp_new_i64(); - tcg_res[0] = tcg_temp_new_i64(); - tcg_res[1] = tcg_temp_new_i64(); - - for (pass = 0; pass < 2; pass++) { - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - read_vec_element(s, tcg_op3, ra, pass, MO_64); - - if (op0 == 0) { - /* EOR3 */ - tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); - } else { - /* BCAX */ - tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); - } - tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); - } - write_vec_element(s, tcg_res[0], rd, 0, MO_64); - write_vec_element(s, tcg_res[1], rd, 1, MO_64); - } else { - TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; - - tcg_op1 = tcg_temp_new_i32(); - tcg_op2 = tcg_temp_new_i32(); - tcg_op3 = tcg_temp_new_i32(); - tcg_res = tcg_temp_new_i32(); - tcg_zero = tcg_constant_i32(0); - - read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); - read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); - read_vec_element_i32(s, tcg_op3, ra, 3, MO_32); - - tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); - tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); - tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); - tcg_gen_rotri_i32(tcg_res, tcg_res, 25); - - write_vec_element_i32(s, tcg_zero, rd, 0, MO_32); - write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); - write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); - write_vec_element_i32(s, tcg_res, rd, 3, MO_32); - } -} - -/* Crypto XAR - * 31 21 20 16 15 10 9 5 4 0 - * +-----------------------+------+--------+------+------+ - * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | - * +-----------------------+------+--------+------+------+ - */ -static void disas_crypto_xar(DisasContext *s, uint32_t insn) -{ - int rm = extract32(insn, 16, 5); - int imm6 = extract32(insn, 10, 6); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - - if (!dc_isar_feature(aa64_sha3, s)) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), imm6, 16, - vec_full_reg_size(s)); -} - -/* Crypto three-reg imm2 - * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 - * +-----------------------+------+-----+------+--------+------+------+ - * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd | - * +-----------------------+------+-----+------+--------+------+------+ - */ -static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, - gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, - }; - int opcode = extract32(insn, 10, 2); - int imm2 = extract32(insn, 12, 2); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - - if (!dc_isar_feature(aa64_sm3, s)) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); -} - /* C3.6 Data processing - SIMD, inc Crypto * * As the decode gets a little complex we are using a table based @@ -13920,7 +12922,6 @@ static const AArch64DecodeTable data_proc_simd[] = { { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, - { 0x0e000400, 0x9fe08400, disas_simd_copy }, { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, @@ -13932,21 +12933,9 @@ static const AArch64DecodeTable data_proc_simd[] = { { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, - { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, - { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, - { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, - { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, - { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, - { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, - { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, - { 0xce000000, 0xff808000, disas_crypto_four_reg }, - { 0xce800000, 0xffe00000, disas_crypto_xar }, - { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, - { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, - { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, { 0x00000000, 0x00000000, NULL } }; diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h index 7b811b8ac5..91750f0ca9 100644 --- a/target/arm/tcg/translate-a64.h +++ b/target/arm/tcg/translate-a64.h @@ -193,6 +193,10 @@ void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz); +void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz); void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c index 144f18ba22..18b048611b 100644 --- a/target/arm/tcg/translate-neon.c +++ b/target/arm/tcg/translate-neon.c @@ -830,6 +830,11 @@ DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd) DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba) DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd) DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba) +DO_3SAME_NO_SZ_3(VPADD, gen_gvec_addp) +DO_3SAME_NO_SZ_3(VPMAX_S, gen_gvec_smaxp) +DO_3SAME_NO_SZ_3(VPMIN_S, gen_gvec_sminp) +DO_3SAME_NO_SZ_3(VPMAX_U, gen_gvec_umaxp) +DO_3SAME_NO_SZ_3(VPMIN_U, gen_gvec_uminp) #define DO_3SAME_CMP(INSN, COND) \ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ @@ -1002,82 +1007,6 @@ DO_3SAME_32_ENV(VQSHL_U, qshl_u) DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) -static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) -{ - /* Operations handled pairwise 32 bits at a time */ - TCGv_i32 tmp, tmp2, tmp3; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if (a->size == 3) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - assert(a->q == 0); /* enforced by decode patterns */ - - /* - * Note that we have to be careful not to clobber the source operands - * in the "vm == vd" case by storing the result of the first pass too - * early. Since Q is 0 there are always just two passes, so instead - * of a complicated loop over each pass we just unroll. - */ - tmp = tcg_temp_new_i32(); - tmp2 = tcg_temp_new_i32(); - tmp3 = tcg_temp_new_i32(); - - read_neon_element32(tmp, a->vn, 0, MO_32); - read_neon_element32(tmp2, a->vn, 1, MO_32); - fn(tmp, tmp, tmp2); - - read_neon_element32(tmp3, a->vm, 0, MO_32); - read_neon_element32(tmp2, a->vm, 1, MO_32); - fn(tmp3, tmp3, tmp2); - - write_neon_element32(tmp, a->vd, 0, MO_32); - write_neon_element32(tmp3, a->vd, 1, MO_32); - - return true; -} - -#define DO_3SAME_PAIR(INSN, func) \ - static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ - { \ - static NeonGenTwoOpFn * const fns[] = { \ - gen_helper_neon_##func##8, \ - gen_helper_neon_##func##16, \ - gen_helper_neon_##func##32, \ - }; \ - if (a->size > 2) { \ - return false; \ - } \ - return do_3same_pair(s, a, fns[a->size]); \ - } - -/* 32-bit pairwise ops end up the same as the elementwise versions. */ -#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 -#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 -#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 -#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 -#define gen_helper_neon_padd_u32 tcg_gen_add_i32 - -DO_3SAME_PAIR(VPMAX_S, pmax_s) -DO_3SAME_PAIR(VPMIN_S, pmin_s) -DO_3SAME_PAIR(VPMAX_U, pmax_u) -DO_3SAME_PAIR(VPMIN_U, pmin_u) -DO_3SAME_PAIR(VPADD, padd_u) - #define DO_3SAME_VQDMULH(INSN, FUNC) \ WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \ WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \ @@ -1144,6 +1073,9 @@ DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h) +DO_3S_FP_GVEC(VPADD, gen_helper_gvec_faddp_s, gen_helper_gvec_faddp_h) +DO_3S_FP_GVEC(VPMAX, gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_h) +DO_3S_FP_GVEC(VPMIN, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_h) WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s) WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h) @@ -1180,58 +1112,6 @@ static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) return do_3same(s, a, gen_VMINNM_fp32_3s); } -static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, - gen_helper_gvec_3_ptr *fn) -{ - /* FP pairwise operations */ - TCGv_ptr fpstatus; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - assert(a->q == 0); /* enforced by decode patterns */ - - - fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); - tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), - vfp_reg_offset(1, a->vn), - vfp_reg_offset(1, a->vm), - fpstatus, 8, 8, 0, fn); - - return true; -} - -/* - * For all the functions using this macro, size == 1 means fp16, - * which is an architecture extension we don't implement yet. - */ -#define DO_3S_FP_PAIR(INSN,FUNC) \ - static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ - { \ - if (a->size == MO_16) { \ - if (!dc_isar_feature(aa32_fp16_arith, s)) { \ - return false; \ - } \ - return do_3same_fp_pair(s, a, FUNC##h); \ - } \ - return do_3same_fp_pair(s, a, FUNC##s); \ - } - -DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd) -DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax) -DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin) - static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) { /* Handle a 2-reg-shift insn which can be vectorized. */ diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index ada05aa530..798ab2bfb1 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -527,94 +527,6 @@ TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) -static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - uint64_t mask = dup_const(MO_8, 0xff >> sh); - - tcg_gen_xor_i64(t, n, m); - tcg_gen_shri_i64(d, t, sh); - tcg_gen_shli_i64(t, t, 8 - sh); - tcg_gen_andi_i64(d, d, mask); - tcg_gen_andi_i64(t, t, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - uint64_t mask = dup_const(MO_16, 0xffff >> sh); - - tcg_gen_xor_i64(t, n, m); - tcg_gen_shri_i64(d, t, sh); - tcg_gen_shli_i64(t, t, 16 - sh); - tcg_gen_andi_i64(d, d, mask); - tcg_gen_andi_i64(t, t, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) -{ - tcg_gen_xor_i32(d, n, m); - tcg_gen_rotri_i32(d, d, sh); -} - -static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) -{ - tcg_gen_xor_i64(d, n, m); - tcg_gen_rotri_i64(d, d, sh); -} - -static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, - TCGv_vec m, int64_t sh) -{ - tcg_gen_xor_vec(vece, d, n, m); - tcg_gen_rotri_vec(vece, d, d, sh); -} - -void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, int64_t shift, - uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; - static const GVecGen3i ops[4] = { - { .fni8 = gen_xar8_i64, - .fniv = gen_xar_vec, - .fno = gen_helper_sve2_xar_b, - .opt_opc = vecop, - .vece = MO_8 }, - { .fni8 = gen_xar16_i64, - .fniv = gen_xar_vec, - .fno = gen_helper_sve2_xar_h, - .opt_opc = vecop, - .vece = MO_16 }, - { .fni4 = gen_xar_i32, - .fniv = gen_xar_vec, - .fno = gen_helper_sve2_xar_s, - .opt_opc = vecop, - .vece = MO_32 }, - { .fni8 = gen_xar_i64, - .fniv = gen_xar_vec, - .fno = gen_helper_gvec_xar_d, - .opt_opc = vecop, - .vece = MO_64 } - }; - int esize = 8 << vece; - - /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ - tcg_debug_assert(shift >= 0); - tcg_debug_assert(shift <= esize); - shift &= esize - 1; - - if (shift == 0) { - /* xar with no rotate devolves to xor. */ - tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); - } else { - tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, - shift, &ops[vece]); - } -} - static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) { if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { @@ -629,61 +541,8 @@ static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) return true; } -static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) -{ - tcg_gen_xor_i64(d, n, m); - tcg_gen_xor_i64(d, d, k); -} - -static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, - TCGv_vec m, TCGv_vec k) -{ - tcg_gen_xor_vec(vece, d, n, m); - tcg_gen_xor_vec(vece, d, d, k); -} - -static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, - uint32_t a, uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen4 op = { - .fni8 = gen_eor3_i64, - .fniv = gen_eor3_vec, - .fno = gen_helper_sve2_eor3, - .vece = MO_64, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - }; - tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); -} - -TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) - -static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) -{ - tcg_gen_andc_i64(d, m, k); - tcg_gen_xor_i64(d, d, n); -} - -static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, - TCGv_vec m, TCGv_vec k) -{ - tcg_gen_andc_vec(vece, d, m, k); - tcg_gen_xor_vec(vece, d, d, n); -} - -static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, - uint32_t a, uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen4 op = { - .fni8 = gen_bcax_i64, - .fniv = gen_bcax_vec, - .fno = gen_helper_sve2_bcax, - .vece = MO_64, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - }; - tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); -} - -TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) +TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a) +TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a) static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, uint32_t a, uint32_t oprsz, uint32_t maxsz) diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c index b9af03b7c3..39ec971ff7 100644 --- a/target/arm/tcg/translate-vfp.c +++ b/target/arm/tcg/translate-vfp.c @@ -48,6 +48,12 @@ static inline void vfp_store_reg32(TCGv_i32 var, int reg) tcg_gen_st_i32(var, tcg_env, vfp_reg_offset(false, reg)); } +static inline void vfp_load_reg16(TCGv_i32 var, int reg) +{ + tcg_gen_ld16u_i32(var, tcg_env, + vfp_reg_offset(false, reg) + HOST_BIG_ENDIAN * 2); +} + /* * The imm8 encodes the sign bit, enough bits to represent an exponent in * the range 01....1xx to 10....0xx, and the most significant 4 bits of @@ -902,8 +908,7 @@ static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a) if (a->l) { /* VFP to general purpose register */ tmp = tcg_temp_new_i32(); - vfp_load_reg32(tmp, a->vn); - tcg_gen_andi_i32(tmp, tmp, 0xffff); + vfp_load_reg16(tmp, a->vn); store_reg(s, a->rt, tmp); } else { /* general purpose register to VFP */ @@ -1453,11 +1458,11 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, fd = tcg_temp_new_i32(); fpst = fpstatus_ptr(FPST_FPCR_F16); - vfp_load_reg32(f0, vn); - vfp_load_reg32(f1, vm); + vfp_load_reg16(f0, vn); + vfp_load_reg16(f1, vm); if (reads_vd) { - vfp_load_reg32(fd, vd); + vfp_load_reg16(fd, vd); } fn(fd, f0, f1, fpst); vfp_store_reg32(fd, vd); @@ -1633,7 +1638,7 @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) } f0 = tcg_temp_new_i32(); - vfp_load_reg32(f0, vm); + vfp_load_reg16(f0, vm); fn(f0, f0); vfp_store_reg32(f0, vd); @@ -1763,7 +1768,7 @@ static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_mulh(tmp, vn, vm, fpst); - gen_helper_vfp_negh(tmp, tmp); + gen_vfp_negh(tmp, tmp); gen_helper_vfp_addh(vd, vd, tmp, fpst); } @@ -1781,7 +1786,7 @@ static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_muls(tmp, vn, vm, fpst); - gen_helper_vfp_negs(tmp, tmp); + gen_vfp_negs(tmp, tmp); gen_helper_vfp_adds(vd, vd, tmp, fpst); } @@ -1799,7 +1804,7 @@ static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) TCGv_i64 tmp = tcg_temp_new_i64(); gen_helper_vfp_muld(tmp, vn, vm, fpst); - gen_helper_vfp_negd(tmp, tmp); + gen_vfp_negd(tmp, tmp); gen_helper_vfp_addd(vd, vd, tmp, fpst); } @@ -1819,7 +1824,7 @@ static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_mulh(tmp, vn, vm, fpst); - gen_helper_vfp_negh(vd, vd); + gen_vfp_negh(vd, vd); gen_helper_vfp_addh(vd, vd, tmp, fpst); } @@ -1839,7 +1844,7 @@ static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_muls(tmp, vn, vm, fpst); - gen_helper_vfp_negs(vd, vd); + gen_vfp_negs(vd, vd); gen_helper_vfp_adds(vd, vd, tmp, fpst); } @@ -1859,7 +1864,7 @@ static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) TCGv_i64 tmp = tcg_temp_new_i64(); gen_helper_vfp_muld(tmp, vn, vm, fpst); - gen_helper_vfp_negd(vd, vd); + gen_vfp_negd(vd, vd); gen_helper_vfp_addd(vd, vd, tmp, fpst); } @@ -1874,8 +1879,8 @@ static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_mulh(tmp, vn, vm, fpst); - gen_helper_vfp_negh(tmp, tmp); - gen_helper_vfp_negh(vd, vd); + gen_vfp_negh(tmp, tmp); + gen_vfp_negh(vd, vd); gen_helper_vfp_addh(vd, vd, tmp, fpst); } @@ -1890,8 +1895,8 @@ static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_muls(tmp, vn, vm, fpst); - gen_helper_vfp_negs(tmp, tmp); - gen_helper_vfp_negs(vd, vd); + gen_vfp_negs(tmp, tmp); + gen_vfp_negs(vd, vd); gen_helper_vfp_adds(vd, vd, tmp, fpst); } @@ -1906,8 +1911,8 @@ static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) TCGv_i64 tmp = tcg_temp_new_i64(); gen_helper_vfp_muld(tmp, vn, vm, fpst); - gen_helper_vfp_negd(tmp, tmp); - gen_helper_vfp_negd(vd, vd); + gen_vfp_negd(tmp, tmp); + gen_vfp_negd(vd, vd); gen_helper_vfp_addd(vd, vd, tmp, fpst); } @@ -1935,7 +1940,7 @@ static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) { /* VNMUL: -(fn * fm) */ gen_helper_vfp_mulh(vd, vn, vm, fpst); - gen_helper_vfp_negh(vd, vd); + gen_vfp_negh(vd, vd); } static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a) @@ -1947,7 +1952,7 @@ static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) { /* VNMUL: -(fn * fm) */ gen_helper_vfp_muls(vd, vn, vm, fpst); - gen_helper_vfp_negs(vd, vd); + gen_vfp_negs(vd, vd); } static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a) @@ -1959,7 +1964,7 @@ static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) { /* VNMUL: -(fn * fm) */ gen_helper_vfp_muld(vd, vn, vm, fpst); - gen_helper_vfp_negd(vd, vd); + gen_vfp_negd(vd, vd); } static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a) @@ -2106,16 +2111,16 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) vm = tcg_temp_new_i32(); vd = tcg_temp_new_i32(); - vfp_load_reg32(vn, a->vn); - vfp_load_reg32(vm, a->vm); + vfp_load_reg16(vn, a->vn); + vfp_load_reg16(vm, a->vm); if (neg_n) { /* VFNMS, VFMS */ - gen_helper_vfp_negh(vn, vn); + gen_vfp_negh(vn, vn); } - vfp_load_reg32(vd, a->vd); + vfp_load_reg16(vd, a->vd); if (neg_d) { /* VFNMA, VFNMS */ - gen_helper_vfp_negh(vd, vd); + gen_vfp_negh(vd, vd); } fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); @@ -2169,12 +2174,12 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) vfp_load_reg32(vm, a->vm); if (neg_n) { /* VFNMS, VFMS */ - gen_helper_vfp_negs(vn, vn); + gen_vfp_negs(vn, vn); } vfp_load_reg32(vd, a->vd); if (neg_d) { /* VFNMA, VFNMS */ - gen_helper_vfp_negs(vd, vd); + gen_vfp_negs(vd, vd); } fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); @@ -2234,12 +2239,12 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) vfp_load_reg64(vm, a->vm); if (neg_n) { /* VFNMS, VFMS */ - gen_helper_vfp_negd(vn, vn); + gen_vfp_negd(vn, vn); } vfp_load_reg64(vd, a->vd); if (neg_d) { /* VFNMA, VFNMS */ - gen_helper_vfp_negd(vd, vd); + gen_vfp_negd(vd, vd); } fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); @@ -2409,13 +2414,13 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32) DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64) -DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith) -DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2) -DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2) +DO_VFP_2OP(VABS, hp, gen_vfp_absh, aa32_fp16_arith) +DO_VFP_2OP(VABS, sp, gen_vfp_abss, aa32_fpsp_v2) +DO_VFP_2OP(VABS, dp, gen_vfp_absd, aa32_fpdp_v2) -DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith) -DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2) -DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2) +DO_VFP_2OP(VNEG, hp, gen_vfp_negh, aa32_fp16_arith) +DO_VFP_2OP(VNEG, sp, gen_vfp_negs, aa32_fpsp_v2) +DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2) static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) { @@ -2456,11 +2461,11 @@ static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a) vd = tcg_temp_new_i32(); vm = tcg_temp_new_i32(); - vfp_load_reg32(vd, a->vd); + vfp_load_reg16(vd, a->vd); if (a->z) { tcg_gen_movi_i32(vm, 0); } else { - vfp_load_reg32(vm, a->vm); + vfp_load_reg16(vm, a->vm); } if (a->e) { @@ -2700,7 +2705,7 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) } tmp = tcg_temp_new_i32(); - vfp_load_reg32(tmp, a->vm); + vfp_load_reg16(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_rinth(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); @@ -2773,7 +2778,7 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) } tmp = tcg_temp_new_i32(); - vfp_load_reg32(tmp, a->vm); + vfp_load_reg16(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); gen_helper_rinth(tmp, tmp, fpst); @@ -2853,7 +2858,7 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) } tmp = tcg_temp_new_i32(); - vfp_load_reg32(tmp, a->vm); + vfp_load_reg16(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_rinth_exact(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); @@ -3270,7 +3275,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) fpst = fpstatus_ptr(FPST_FPCR_F16); vm = tcg_temp_new_i32(); - vfp_load_reg32(vm, a->vm); + vfp_load_reg16(vm, a->vm); if (a->s) { if (a->rz) { @@ -3383,8 +3388,8 @@ static bool trans_VINS(DisasContext *s, arg_VINS *a) /* Insert low half of Vm into high half of Vd */ rm = tcg_temp_new_i32(); rd = tcg_temp_new_i32(); - vfp_load_reg32(rm, a->vm); - vfp_load_reg32(rd, a->vd); + vfp_load_reg16(rm, a->vm); + vfp_load_reg16(rd, a->vd); tcg_gen_deposit_i32(rd, rd, rm, 16, 16); vfp_store_reg32(rd, a->vd); return true; diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index d605e10f11..c5bc691d92 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -2912,1594 +2912,6 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc) gen_rfe(s, pc, load_cpu_field(spsr)); } -static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, - uint32_t opr_sz, uint32_t max_sz, - gen_helper_gvec_3_ptr *fn) -{ - TCGv_ptr qc_ptr = tcg_temp_new_ptr(); - - tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); - tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, - opr_sz, max_sz, 0, fn); -} - -void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static gen_helper_gvec_3_ptr * const fns[2] = { - gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 - }; - tcg_debug_assert(vece >= 1 && vece <= 2); - gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); -} - -void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static gen_helper_gvec_3_ptr * const fns[2] = { - gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 - }; - tcg_debug_assert(vece >= 1 && vece <= 2); - gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); -} - -#define GEN_CMP0(NAME, COND) \ - void NAME(unsigned vece, uint32_t d, uint32_t m, \ - uint32_t opr_sz, uint32_t max_sz) \ - { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } - -GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) -GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) -GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) -GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) -GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) - -#undef GEN_CMP0 - -static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_sari_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_sari_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_sari_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_ssra8_i64, - .fniv = gen_ssra_vec, - .fno = gen_helper_gvec_ssra_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni8 = gen_ssra16_i64, - .fniv = gen_ssra_vec, - .fno = gen_helper_gvec_ssra_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_ssra32_i32, - .fniv = gen_ssra_vec, - .fno = gen_helper_gvec_ssra_s, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_ssra64_i64, - .fniv = gen_ssra_vec, - .fno = gen_helper_gvec_ssra_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize]. */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - /* - * Shifts larger than the element size are architecturally valid. - * Signed results in all sign bits. - */ - shift = MIN(shift, (8 << vece) - 1); - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); -} - -static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_shri_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_usra8_i64, - .fniv = gen_usra_vec, - .fno = gen_helper_gvec_usra_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8, }, - { .fni8 = gen_usra16_i64, - .fniv = gen_usra_vec, - .fno = gen_helper_gvec_usra_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16, }, - { .fni4 = gen_usra32_i32, - .fniv = gen_usra_vec, - .fno = gen_helper_gvec_usra_s, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32, }, - { .fni8 = gen_usra64_i64, - .fniv = gen_usra_vec, - .fno = gen_helper_gvec_usra_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64, }, - }; - - /* tszimm encoding produces immediates in the range [1..esize]. */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - /* - * Shifts larger than the element size are architecturally valid. - * Unsigned results in all zeros as input to accumulate: nop. - */ - if (shift < (8 << vece)) { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } else { - /* Nop, but we do need to clear the tail. */ - tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); - } -} - -/* - * Shift one less than the requested amount, and the low bit is - * the rounding bit. For the 8 and 16-bit operations, because we - * mask the low bit, we can perform a normal integer shift instead - * of a vector shift. - */ -static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, sh - 1); - tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); - tcg_gen_vec_sar8i_i64(d, a, sh); - tcg_gen_vec_add8_i64(d, d, t); -} - -static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, sh - 1); - tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); - tcg_gen_vec_sar16i_i64(d, a, sh); - tcg_gen_vec_add16_i64(d, d, t); -} - -static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) -{ - TCGv_i32 t; - - /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ - if (sh == 32) { - tcg_gen_movi_i32(d, 0); - return; - } - t = tcg_temp_new_i32(); - tcg_gen_extract_i32(t, a, sh - 1, 1); - tcg_gen_sari_i32(d, a, sh); - tcg_gen_add_i32(d, d, t); -} - -static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_extract_i64(t, a, sh - 1, 1); - tcg_gen_sari_i64(d, a, sh); - tcg_gen_add_i64(d, d, t); -} - -static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec ones = tcg_temp_new_vec_matching(d); - - tcg_gen_shri_vec(vece, t, a, sh - 1); - tcg_gen_dupi_vec(vece, ones, 1); - tcg_gen_and_vec(vece, t, t, ones); - tcg_gen_sari_vec(vece, d, a, sh); - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_srshr8_i64, - .fniv = gen_srshr_vec, - .fno = gen_helper_gvec_srshr_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni8 = gen_srshr16_i64, - .fniv = gen_srshr_vec, - .fno = gen_helper_gvec_srshr_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_srshr32_i32, - .fniv = gen_srshr_vec, - .fno = gen_helper_gvec_srshr_s, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_srshr64_i64, - .fniv = gen_srshr_vec, - .fno = gen_helper_gvec_srshr_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize] */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - if (shift == (8 << vece)) { - /* - * Shifts larger than the element size are architecturally valid. - * Signed results in all sign bits. With rounding, this produces - * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. - * I.e. always zero. - */ - tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); - } else { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } -} - -static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - gen_srshr8_i64(t, a, sh); - tcg_gen_vec_add8_i64(d, d, t); -} - -static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - gen_srshr16_i64(t, a, sh); - tcg_gen_vec_add16_i64(d, d, t); -} - -static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) -{ - TCGv_i32 t = tcg_temp_new_i32(); - - gen_srshr32_i32(t, a, sh); - tcg_gen_add_i32(d, d, t); -} - -static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - gen_srshr64_i64(t, a, sh); - tcg_gen_add_i64(d, d, t); -} - -static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - - gen_srshr_vec(vece, t, a, sh); - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_srsra8_i64, - .fniv = gen_srsra_vec, - .fno = gen_helper_gvec_srsra_b, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_8 }, - { .fni8 = gen_srsra16_i64, - .fniv = gen_srsra_vec, - .fno = gen_helper_gvec_srsra_h, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_srsra32_i32, - .fniv = gen_srsra_vec, - .fno = gen_helper_gvec_srsra_s, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_srsra64_i64, - .fniv = gen_srsra_vec, - .fno = gen_helper_gvec_srsra_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize] */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - /* - * Shifts larger than the element size are architecturally valid. - * Signed results in all sign bits. With rounding, this produces - * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. - * I.e. always zero. With accumulation, this leaves D unchanged. - */ - if (shift == (8 << vece)) { - /* Nop, but we do need to clear the tail. */ - tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); - } else { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } -} - -static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, sh - 1); - tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); - tcg_gen_vec_shr8i_i64(d, a, sh); - tcg_gen_vec_add8_i64(d, d, t); -} - -static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, sh - 1); - tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); - tcg_gen_vec_shr16i_i64(d, a, sh); - tcg_gen_vec_add16_i64(d, d, t); -} - -static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) -{ - TCGv_i32 t; - - /* Handle shift by the input size for the benefit of trans_URSHR_ri */ - if (sh == 32) { - tcg_gen_extract_i32(d, a, sh - 1, 1); - return; - } - t = tcg_temp_new_i32(); - tcg_gen_extract_i32(t, a, sh - 1, 1); - tcg_gen_shri_i32(d, a, sh); - tcg_gen_add_i32(d, d, t); -} - -static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_extract_i64(t, a, sh - 1, 1); - tcg_gen_shri_i64(d, a, sh); - tcg_gen_add_i64(d, d, t); -} - -static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec ones = tcg_temp_new_vec_matching(d); - - tcg_gen_shri_vec(vece, t, a, shift - 1); - tcg_gen_dupi_vec(vece, ones, 1); - tcg_gen_and_vec(vece, t, t, ones); - tcg_gen_shri_vec(vece, d, a, shift); - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_urshr8_i64, - .fniv = gen_urshr_vec, - .fno = gen_helper_gvec_urshr_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni8 = gen_urshr16_i64, - .fniv = gen_urshr_vec, - .fno = gen_helper_gvec_urshr_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_urshr32_i32, - .fniv = gen_urshr_vec, - .fno = gen_helper_gvec_urshr_s, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_urshr64_i64, - .fniv = gen_urshr_vec, - .fno = gen_helper_gvec_urshr_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize] */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - if (shift == (8 << vece)) { - /* - * Shifts larger than the element size are architecturally valid. - * Unsigned results in zero. With rounding, this produces a - * copy of the most significant bit. - */ - tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); - } else { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } -} - -static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - if (sh == 8) { - tcg_gen_vec_shr8i_i64(t, a, 7); - } else { - gen_urshr8_i64(t, a, sh); - } - tcg_gen_vec_add8_i64(d, d, t); -} - -static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - if (sh == 16) { - tcg_gen_vec_shr16i_i64(t, a, 15); - } else { - gen_urshr16_i64(t, a, sh); - } - tcg_gen_vec_add16_i64(d, d, t); -} - -static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) -{ - TCGv_i32 t = tcg_temp_new_i32(); - - if (sh == 32) { - tcg_gen_shri_i32(t, a, 31); - } else { - gen_urshr32_i32(t, a, sh); - } - tcg_gen_add_i32(d, d, t); -} - -static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - if (sh == 64) { - tcg_gen_shri_i64(t, a, 63); - } else { - gen_urshr64_i64(t, a, sh); - } - tcg_gen_add_i64(d, d, t); -} - -static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - - if (sh == (8 << vece)) { - tcg_gen_shri_vec(vece, t, a, sh - 1); - } else { - gen_urshr_vec(vece, t, a, sh); - } - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_ursra8_i64, - .fniv = gen_ursra_vec, - .fno = gen_helper_gvec_ursra_b, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_8 }, - { .fni8 = gen_ursra16_i64, - .fniv = gen_ursra_vec, - .fno = gen_helper_gvec_ursra_h, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_ursra32_i32, - .fniv = gen_ursra_vec, - .fno = gen_helper_gvec_ursra_s, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_ursra64_i64, - .fniv = gen_ursra_vec, - .fno = gen_helper_gvec_ursra_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize] */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); -} - -static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); -} - -static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); -} - -static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); - tcg_gen_shri_vec(vece, t, a, sh); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); -} - -void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; - const GVecGen2i ops[4] = { - { .fni8 = gen_shr8_ins_i64, - .fniv = gen_shr_ins_vec, - .fno = gen_helper_gvec_sri_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni8 = gen_shr16_ins_i64, - .fniv = gen_shr_ins_vec, - .fno = gen_helper_gvec_sri_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_shr32_ins_i32, - .fniv = gen_shr_ins_vec, - .fno = gen_helper_gvec_sri_s, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_shr64_ins_i64, - .fniv = gen_shr_ins_vec, - .fno = gen_helper_gvec_sri_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize]. */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - /* Shift of esize leaves destination unchanged. */ - if (shift < (8 << vece)) { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } else { - /* Nop, but we do need to clear the tail. */ - tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); - } -} - -static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); -} - -static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); -} - -static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_shli_vec(vece, t, a, sh); - tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); -} - -void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; - const GVecGen2i ops[4] = { - { .fni8 = gen_shl8_ins_i64, - .fniv = gen_shl_ins_vec, - .fno = gen_helper_gvec_sli_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni8 = gen_shl16_ins_i64, - .fniv = gen_shl_ins_vec, - .fno = gen_helper_gvec_sli_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_shl32_ins_i32, - .fniv = gen_shl_ins_vec, - .fno = gen_helper_gvec_sli_s, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_shl64_ins_i64, - .fniv = gen_shl_ins_vec, - .fno = gen_helper_gvec_sli_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [0..esize-1]. */ - tcg_debug_assert(shift >= 0); - tcg_debug_assert(shift < (8 << vece)); - - if (shift == 0) { - tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); - } else { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } -} - -static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_add_u8(d, d, a); -} - -static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_sub_u8(d, d, a); -} - -static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_add_u16(d, d, a); -} - -static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_sub_u16(d, d, a); -} - -static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_add_i32(d, d, a); -} - -static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_sub_i32(d, d, a); -} - -static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_add_i64(d, d, a); -} - -static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_sub_i64(d, d, a); -} - -static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_sub_vec(vece, d, d, a); -} - -/* Note that while NEON does not support VMLA and VMLS as 64-bit ops, - * these tables are shared with AArch64 which does support them. - */ -void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_mul_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fni4 = gen_mla8_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni4 = gen_mla16_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_mla32_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_mla64_i64, - .fniv = gen_mla_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_mul_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fni4 = gen_mls8_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni4 = gen_mls16_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_mls32_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_mls64_i64, - .fniv = gen_mls_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -/* CMTST : test is "if (X & Y != 0)". */ -static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_and_i32(d, a, b); - tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); -} - -void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_and_i64(d, a, b); - tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); -} - -static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_and_vec(vece, d, a, b); - tcg_gen_dupi_vec(vece, a, 0); - tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); -} - -void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; - static const GVecGen3 ops[4] = { - { .fni4 = gen_helper_neon_tst_u8, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni4 = gen_helper_neon_tst_u16, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_cmtst_i32, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_cmtst_i64, - .fniv = gen_cmtst_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) -{ - TCGv_i32 lval = tcg_temp_new_i32(); - TCGv_i32 rval = tcg_temp_new_i32(); - TCGv_i32 lsh = tcg_temp_new_i32(); - TCGv_i32 rsh = tcg_temp_new_i32(); - TCGv_i32 zero = tcg_constant_i32(0); - TCGv_i32 max = tcg_constant_i32(32); - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_ext8s_i32(lsh, shift); - tcg_gen_neg_i32(rsh, lsh); - tcg_gen_shl_i32(lval, src, lsh); - tcg_gen_shr_i32(rval, src, rsh); - tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); - tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); -} - -void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) -{ - TCGv_i64 lval = tcg_temp_new_i64(); - TCGv_i64 rval = tcg_temp_new_i64(); - TCGv_i64 lsh = tcg_temp_new_i64(); - TCGv_i64 rsh = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_constant_i64(0); - TCGv_i64 max = tcg_constant_i64(64); - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_ext8s_i64(lsh, shift); - tcg_gen_neg_i64(rsh, lsh); - tcg_gen_shl_i64(lval, src, lsh); - tcg_gen_shr_i64(rval, src, rsh); - tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); - tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); -} - -static void gen_ushl_vec(unsigned vece, TCGv_vec dst, - TCGv_vec src, TCGv_vec shift) -{ - TCGv_vec lval = tcg_temp_new_vec_matching(dst); - TCGv_vec rval = tcg_temp_new_vec_matching(dst); - TCGv_vec lsh = tcg_temp_new_vec_matching(dst); - TCGv_vec rsh = tcg_temp_new_vec_matching(dst); - TCGv_vec msk, max; - - tcg_gen_neg_vec(vece, rsh, shift); - if (vece == MO_8) { - tcg_gen_mov_vec(lsh, shift); - } else { - msk = tcg_temp_new_vec_matching(dst); - tcg_gen_dupi_vec(vece, msk, 0xff); - tcg_gen_and_vec(vece, lsh, shift, msk); - tcg_gen_and_vec(vece, rsh, rsh, msk); - } - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_shlv_vec(vece, lval, src, lsh); - tcg_gen_shrv_vec(vece, rval, src, rsh); - - max = tcg_temp_new_vec_matching(dst); - tcg_gen_dupi_vec(vece, max, 8 << vece); - - /* - * The choice of LT (signed) and GEU (unsigned) are biased toward - * the instructions of the x86_64 host. For MO_8, the whole byte - * is significant so we must use an unsigned compare; otherwise we - * have already masked to a byte and so a signed compare works. - * Other tcg hosts have a full set of comparisons and do not care. - */ - if (vece == MO_8) { - tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); - tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); - tcg_gen_andc_vec(vece, lval, lval, lsh); - tcg_gen_andc_vec(vece, rval, rval, rsh); - } else { - tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); - tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); - tcg_gen_and_vec(vece, lval, lval, lsh); - tcg_gen_and_vec(vece, rval, rval, rsh); - } - tcg_gen_or_vec(vece, dst, lval, rval); -} - -void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_neg_vec, INDEX_op_shlv_vec, - INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_ushl_vec, - .fno = gen_helper_gvec_ushl_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fniv = gen_ushl_vec, - .fno = gen_helper_gvec_ushl_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_ushl_i32, - .fniv = gen_ushl_vec, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_ushl_i64, - .fniv = gen_ushl_vec, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) -{ - TCGv_i32 lval = tcg_temp_new_i32(); - TCGv_i32 rval = tcg_temp_new_i32(); - TCGv_i32 lsh = tcg_temp_new_i32(); - TCGv_i32 rsh = tcg_temp_new_i32(); - TCGv_i32 zero = tcg_constant_i32(0); - TCGv_i32 max = tcg_constant_i32(31); - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_ext8s_i32(lsh, shift); - tcg_gen_neg_i32(rsh, lsh); - tcg_gen_shl_i32(lval, src, lsh); - tcg_gen_umin_i32(rsh, rsh, max); - tcg_gen_sar_i32(rval, src, rsh); - tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); - tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); -} - -void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) -{ - TCGv_i64 lval = tcg_temp_new_i64(); - TCGv_i64 rval = tcg_temp_new_i64(); - TCGv_i64 lsh = tcg_temp_new_i64(); - TCGv_i64 rsh = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_constant_i64(0); - TCGv_i64 max = tcg_constant_i64(63); - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_ext8s_i64(lsh, shift); - tcg_gen_neg_i64(rsh, lsh); - tcg_gen_shl_i64(lval, src, lsh); - tcg_gen_umin_i64(rsh, rsh, max); - tcg_gen_sar_i64(rval, src, rsh); - tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); - tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); -} - -static void gen_sshl_vec(unsigned vece, TCGv_vec dst, - TCGv_vec src, TCGv_vec shift) -{ - TCGv_vec lval = tcg_temp_new_vec_matching(dst); - TCGv_vec rval = tcg_temp_new_vec_matching(dst); - TCGv_vec lsh = tcg_temp_new_vec_matching(dst); - TCGv_vec rsh = tcg_temp_new_vec_matching(dst); - TCGv_vec tmp = tcg_temp_new_vec_matching(dst); - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_neg_vec(vece, rsh, shift); - if (vece == MO_8) { - tcg_gen_mov_vec(lsh, shift); - } else { - tcg_gen_dupi_vec(vece, tmp, 0xff); - tcg_gen_and_vec(vece, lsh, shift, tmp); - tcg_gen_and_vec(vece, rsh, rsh, tmp); - } - - /* Bound rsh so out of bound right shift gets -1. */ - tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); - tcg_gen_umin_vec(vece, rsh, rsh, tmp); - tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); - - tcg_gen_shlv_vec(vece, lval, src, lsh); - tcg_gen_sarv_vec(vece, rval, src, rsh); - - /* Select in-bound left shift. */ - tcg_gen_andc_vec(vece, lval, lval, tmp); - - /* Select between left and right shift. */ - if (vece == MO_8) { - tcg_gen_dupi_vec(vece, tmp, 0); - tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); - } else { - tcg_gen_dupi_vec(vece, tmp, 0x80); - tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); - } -} - -void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, - INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_sshl_vec, - .fno = gen_helper_gvec_sshl_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fniv = gen_sshl_vec, - .fno = gen_helper_gvec_sshl_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_sshl_i32, - .fniv = gen_sshl_vec, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_sshl_i64, - .fniv = gen_sshl_vec, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, - TCGv_vec a, TCGv_vec b) -{ - TCGv_vec x = tcg_temp_new_vec_matching(t); - tcg_gen_add_vec(vece, x, a, b); - tcg_gen_usadd_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); -} - -void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen4 ops[4] = { - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_b, - .write_aofs = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_h, - .write_aofs = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_s, - .write_aofs = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_d, - .write_aofs = true, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, - TCGv_vec a, TCGv_vec b) -{ - TCGv_vec x = tcg_temp_new_vec_matching(t); - tcg_gen_add_vec(vece, x, a, b); - tcg_gen_ssadd_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); -} - -void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen4 ops[4] = { - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_b, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_h, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_s, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_d, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_64 }, - }; - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, - TCGv_vec a, TCGv_vec b) -{ - TCGv_vec x = tcg_temp_new_vec_matching(t); - tcg_gen_sub_vec(vece, x, a, b); - tcg_gen_ussub_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); -} - -void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen4 ops[4] = { - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_b, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_h, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_s, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_d, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_64 }, - }; - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, - TCGv_vec a, TCGv_vec b) -{ - TCGv_vec x = tcg_temp_new_vec_matching(t); - tcg_gen_sub_vec(vece, x, a, b); - tcg_gen_sssub_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); -} - -void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen4 ops[4] = { - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_b, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_h, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_s, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_d, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_64 }, - }; - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t = tcg_temp_new_i32(); - - tcg_gen_sub_i32(t, a, b); - tcg_gen_sub_i32(d, b, a); - tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); -} - -static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_sub_i64(t, a, b); - tcg_gen_sub_i64(d, b, a); - tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); -} - -static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - - tcg_gen_smin_vec(vece, t, a, b); - tcg_gen_smax_vec(vece, d, a, b); - tcg_gen_sub_vec(vece, d, d, t); -} - -void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_sabd_vec, - .fno = gen_helper_gvec_sabd_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fniv = gen_sabd_vec, - .fno = gen_helper_gvec_sabd_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_sabd_i32, - .fniv = gen_sabd_vec, - .fno = gen_helper_gvec_sabd_s, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_sabd_i64, - .fniv = gen_sabd_vec, - .fno = gen_helper_gvec_sabd_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t = tcg_temp_new_i32(); - - tcg_gen_sub_i32(t, a, b); - tcg_gen_sub_i32(d, b, a); - tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); -} - -static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_sub_i64(t, a, b); - tcg_gen_sub_i64(d, b, a); - tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); -} - -static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - - tcg_gen_umin_vec(vece, t, a, b); - tcg_gen_umax_vec(vece, d, a, b); - tcg_gen_sub_vec(vece, d, d, t); -} - -void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_uabd_vec, - .fno = gen_helper_gvec_uabd_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fniv = gen_uabd_vec, - .fno = gen_helper_gvec_uabd_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_uabd_i32, - .fniv = gen_uabd_vec, - .fno = gen_helper_gvec_uabd_s, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_uabd_i64, - .fniv = gen_uabd_vec, - .fno = gen_helper_gvec_uabd_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t = tcg_temp_new_i32(); - gen_sabd_i32(t, a, b); - tcg_gen_add_i32(d, d, t); -} - -static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t = tcg_temp_new_i64(); - gen_sabd_i64(t, a, b); - tcg_gen_add_i64(d, d, t); -} - -static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - gen_sabd_vec(vece, t, a, b); - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sub_vec, INDEX_op_add_vec, - INDEX_op_smin_vec, INDEX_op_smax_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_saba_vec, - .fno = gen_helper_gvec_saba_b, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_8 }, - { .fniv = gen_saba_vec, - .fno = gen_helper_gvec_saba_h, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_saba_i32, - .fniv = gen_saba_vec, - .fno = gen_helper_gvec_saba_s, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_saba_i64, - .fniv = gen_saba_vec, - .fno = gen_helper_gvec_saba_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t = tcg_temp_new_i32(); - gen_uabd_i32(t, a, b); - tcg_gen_add_i32(d, d, t); -} - -static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t = tcg_temp_new_i64(); - gen_uabd_i64(t, a, b); - tcg_gen_add_i64(d, d, t); -} - -static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - gen_uabd_vec(vece, t, a, b); - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sub_vec, INDEX_op_add_vec, - INDEX_op_umin_vec, INDEX_op_umax_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_uaba_vec, - .fno = gen_helper_gvec_uaba_b, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_8 }, - { .fniv = gen_uaba_vec, - .fno = gen_helper_gvec_uaba_h, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_uaba_i32, - .fniv = gen_uaba_vec, - .fno = gen_helper_gvec_uaba_s, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_uaba_i64, - .fniv = gen_uaba_vec, - .fno = gen_helper_gvec_uaba_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm) { static const uint16_t mask[3] = { @@ -8765,12 +7177,12 @@ static bool trans_PLD(DisasContext *s, arg_PLD *a) return ENABLE_ARCH_5TE; } -static bool trans_PLDW(DisasContext *s, arg_PLD *a) +static bool trans_PLDW(DisasContext *s, arg_PLDW *a) { return arm_dc_feature(s, ARM_FEATURE_V7MP); } -static bool trans_PLI(DisasContext *s, arg_PLD *a) +static bool trans_PLI(DisasContext *s, arg_PLI *a) { return ENABLE_ARCH_7; } diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index dc66ff2190..3abdbedfe5 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -252,6 +252,11 @@ static inline int shl_12(DisasContext *s, int x) return x << 12; } +static inline int xor_2(DisasContext *s, int x) +{ + return x ^ 2; +} + static inline int neon_3same_fp_size(DisasContext *s, int x) { /* Convert 0==fp32, 1==fp16 into a MO_* value */ @@ -401,6 +406,36 @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex) */ uint64_t vfp_expand_imm(int size, uint8_t imm8); +static inline void gen_vfp_absh(TCGv_i32 d, TCGv_i32 s) +{ + tcg_gen_andi_i32(d, s, INT16_MAX); +} + +static inline void gen_vfp_abss(TCGv_i32 d, TCGv_i32 s) +{ + tcg_gen_andi_i32(d, s, INT32_MAX); +} + +static inline void gen_vfp_absd(TCGv_i64 d, TCGv_i64 s) +{ + tcg_gen_andi_i64(d, s, INT64_MAX); +} + +static inline void gen_vfp_negh(TCGv_i32 d, TCGv_i32 s) +{ + tcg_gen_xori_i32(d, s, 1u << 15); +} + +static inline void gen_vfp_negs(TCGv_i32 d, TCGv_i32 s) +{ + tcg_gen_xori_i32(d, s, 1u << 31); +} + +static inline void gen_vfp_negd(TCGv_i64 d, TCGv_i64 s) +{ + tcg_gen_xori_i64(d, s, 1ull << 63); +} + /* Vector operations shared between ARM and AArch64. */ void gen_gvec_ceq0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); @@ -445,6 +480,11 @@ void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh); +void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh); +void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh); +void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh); + void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, @@ -474,6 +514,17 @@ void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + /* * Forward to the isar_feature_* tests given a DisasContext pointer. */ diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 1f93510b85..56fea14edb 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -971,6 +971,11 @@ static uint32_t float32_ceq(float32 op1, float32 op2, float_status *stat) return -float32_eq_quiet(op1, op2, stat); } +static uint64_t float64_ceq(float64 op1, float64 op2, float_status *stat) +{ + return -float64_eq_quiet(op1, op2, stat); +} + static uint16_t float16_cge(float16 op1, float16 op2, float_status *stat) { return -float16_le(op2, op1, stat); @@ -981,6 +986,11 @@ static uint32_t float32_cge(float32 op1, float32 op2, float_status *stat) return -float32_le(op2, op1, stat); } +static uint64_t float64_cge(float64 op1, float64 op2, float_status *stat) +{ + return -float64_le(op2, op1, stat); +} + static uint16_t float16_cgt(float16 op1, float16 op2, float_status *stat) { return -float16_lt(op2, op1, stat); @@ -991,6 +1001,11 @@ static uint32_t float32_cgt(float32 op1, float32 op2, float_status *stat) return -float32_lt(op2, op1, stat); } +static uint64_t float64_cgt(float64 op1, float64 op2, float_status *stat) +{ + return -float64_lt(op2, op1, stat); +} + static uint16_t float16_acge(float16 op1, float16 op2, float_status *stat) { return -float16_le(float16_abs(op2), float16_abs(op1), stat); @@ -1001,6 +1016,11 @@ static uint32_t float32_acge(float32 op1, float32 op2, float_status *stat) return -float32_le(float32_abs(op2), float32_abs(op1), stat); } +static uint64_t float64_acge(float64 op1, float64 op2, float_status *stat) +{ + return -float64_le(float64_abs(op2), float64_abs(op1), stat); +} + static uint16_t float16_acgt(float16 op1, float16 op2, float_status *stat) { return -float16_lt(float16_abs(op2), float16_abs(op1), stat); @@ -1011,6 +1031,11 @@ static uint32_t float32_acgt(float32 op1, float32 op2, float_status *stat) return -float32_lt(float32_abs(op2), float32_abs(op1), stat); } +static uint64_t float64_acgt(float64 op1, float64 op2, float_status *stat) +{ + return -float64_lt(float64_abs(op2), float64_abs(op1), stat); +} + static int16_t vfp_tosszh(float16 x, void *fpstp) { float_status *fpst = fpstp; @@ -1129,6 +1154,11 @@ static float32 float32_abd(float32 op1, float32 op2, float_status *stat) return float32_abs(float32_sub(op1, op2, stat)); } +static float64 float64_abd(float64 op1, float64 op2, float_status *stat) +{ + return float64_abs(float64_sub(op1, op2, stat)); +} + /* * Reciprocal step. These are the AArch32 version which uses a * non-fused multiply-and-subtract. @@ -1213,33 +1243,43 @@ DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64) DO_3OP(gvec_fabd_h, float16_abd, float16) DO_3OP(gvec_fabd_s, float32_abd, float32) +DO_3OP(gvec_fabd_d, float64_abd, float64) DO_3OP(gvec_fceq_h, float16_ceq, float16) DO_3OP(gvec_fceq_s, float32_ceq, float32) +DO_3OP(gvec_fceq_d, float64_ceq, float64) DO_3OP(gvec_fcge_h, float16_cge, float16) DO_3OP(gvec_fcge_s, float32_cge, float32) +DO_3OP(gvec_fcge_d, float64_cge, float64) DO_3OP(gvec_fcgt_h, float16_cgt, float16) DO_3OP(gvec_fcgt_s, float32_cgt, float32) +DO_3OP(gvec_fcgt_d, float64_cgt, float64) DO_3OP(gvec_facge_h, float16_acge, float16) DO_3OP(gvec_facge_s, float32_acge, float32) +DO_3OP(gvec_facge_d, float64_acge, float64) DO_3OP(gvec_facgt_h, float16_acgt, float16) DO_3OP(gvec_facgt_s, float32_acgt, float32) +DO_3OP(gvec_facgt_d, float64_acgt, float64) DO_3OP(gvec_fmax_h, float16_max, float16) DO_3OP(gvec_fmax_s, float32_max, float32) +DO_3OP(gvec_fmax_d, float64_max, float64) DO_3OP(gvec_fmin_h, float16_min, float16) DO_3OP(gvec_fmin_s, float32_min, float32) +DO_3OP(gvec_fmin_d, float64_min, float64) DO_3OP(gvec_fmaxnum_h, float16_maxnum, float16) DO_3OP(gvec_fmaxnum_s, float32_maxnum, float32) +DO_3OP(gvec_fmaxnum_d, float64_maxnum, float64) DO_3OP(gvec_fminnum_h, float16_minnum, float16) DO_3OP(gvec_fminnum_s, float32_minnum, float32) +DO_3OP(gvec_fminnum_d, float64_minnum, float64) DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16) DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32) @@ -1248,6 +1288,13 @@ DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16) DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32) #ifdef TARGET_AARCH64 +DO_3OP(gvec_fdiv_h, float16_div, float16) +DO_3OP(gvec_fdiv_s, float32_div, float32) +DO_3OP(gvec_fdiv_d, float64_div, float64) + +DO_3OP(gvec_fmulx_h, helper_advsimd_mulxh, float16) +DO_3OP(gvec_fmulx_s, helper_vfp_mulxs, float32) +DO_3OP(gvec_fmulx_d, helper_vfp_mulxd, float64) DO_3OP(gvec_recps_h, helper_recpsf_f16, float16) DO_3OP(gvec_recps_s, helper_recpsf_f32, float32) @@ -1298,6 +1345,12 @@ static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2, return float32_muladd(op1, op2, dest, 0, stat); } +static float64 float64_muladd_f(float64 dest, float64 op1, float64 op2, + float_status *stat) +{ + return float64_muladd(op1, op2, dest, 0, stat); +} + static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2, float_status *stat) { @@ -1310,6 +1363,12 @@ static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2, return float32_muladd(float32_chs(op1), op2, dest, 0, stat); } +static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, + float_status *stat) +{ + return float64_muladd(float64_chs(op1), op2, dest, 0, stat); +} + #define DO_MULADD(NAME, FUNC, TYPE) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ @@ -1329,9 +1388,11 @@ DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32) DO_MULADD(gvec_vfma_h, float16_muladd_f, float16) DO_MULADD(gvec_vfma_s, float32_muladd_f, float32) +DO_MULADD(gvec_vfma_d, float64_muladd_f, float64) DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) +DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) /* For the indexed ops, SVE applies the index per 128-bit vector segment. * For AdvSIMD, there is of course only one such vector segment. @@ -1385,7 +1446,7 @@ DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, H8) #undef DO_MLA_IDX -#define DO_FMUL_IDX(NAME, ADD, TYPE, H) \ +#define DO_FMUL_IDX(NAME, ADD, MUL, TYPE, H) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ intptr_t i, j, oprsz = simd_oprsz(desc); \ @@ -1395,33 +1456,37 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ TYPE mm = m[H(i + idx)]; \ for (j = 0; j < segment; j++) { \ - d[i + j] = TYPE##_##ADD(d[i + j], \ - TYPE##_mul(n[i + j], mm, stat), stat); \ + d[i + j] = ADD(d[i + j], MUL(n[i + j], mm, stat), stat); \ } \ } \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } -#define float16_nop(N, M, S) (M) -#define float32_nop(N, M, S) (M) -#define float64_nop(N, M, S) (M) +#define nop(N, M, S) (M) + +DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16_mul, float16, H2) +DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32_mul, float32, H4) +DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64_mul, float64, H8) + +#ifdef TARGET_AARCH64 + +DO_FMUL_IDX(gvec_fmulx_idx_h, nop, helper_advsimd_mulxh, float16, H2) +DO_FMUL_IDX(gvec_fmulx_idx_s, nop, helper_vfp_mulxs, float32, H4) +DO_FMUL_IDX(gvec_fmulx_idx_d, nop, helper_vfp_mulxd, float64, H8) + +#endif -DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2) -DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4) -DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, H8) +#undef nop /* * Non-fused multiply-accumulate operations, for Neon. NB that unlike * the fused ops below they assume accumulate both from and into Vd. */ -DO_FMUL_IDX(gvec_fmla_nf_idx_h, add, float16, H2) -DO_FMUL_IDX(gvec_fmla_nf_idx_s, add, float32, H4) -DO_FMUL_IDX(gvec_fmls_nf_idx_h, sub, float16, H2) -DO_FMUL_IDX(gvec_fmls_nf_idx_s, sub, float32, H4) - -#undef float16_nop -#undef float32_nop -#undef float64_nop +DO_FMUL_IDX(gvec_fmla_nf_idx_h, float16_add, float16_mul, float16, H2) +DO_FMUL_IDX(gvec_fmla_nf_idx_s, float32_add, float32_mul, float32, H4) +DO_FMUL_IDX(gvec_fmls_nf_idx_h, float16_sub, float16_mul, float16, H2) +DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4) + #undef DO_FMUL_IDX #define DO_FMLA_IDX(NAME, TYPE, H) \ @@ -2127,50 +2192,90 @@ DO_ABA(gvec_uaba_d, uint64_t) #undef DO_ABA -#define DO_NEON_PAIRWISE(NAME, OP) \ - void HELPER(NAME##s)(void *vd, void *vn, void *vm, \ - void *stat, uint32_t oprsz) \ - { \ - float_status *fpst = stat; \ - float32 *d = vd; \ - float32 *n = vn; \ - float32 *m = vm; \ - float32 r0, r1; \ - \ - /* Read all inputs before writing outputs in case vm == vd */ \ - r0 = float32_##OP(n[H4(0)], n[H4(1)], fpst); \ - r1 = float32_##OP(m[H4(0)], m[H4(1)], fpst); \ - \ - d[H4(0)] = r0; \ - d[H4(1)] = r1; \ - } \ - \ - void HELPER(NAME##h)(void *vd, void *vn, void *vm, \ - void *stat, uint32_t oprsz) \ - { \ - float_status *fpst = stat; \ - float16 *d = vd; \ - float16 *n = vn; \ - float16 *m = vm; \ - float16 r0, r1, r2, r3; \ - \ - /* Read all inputs before writing outputs in case vm == vd */ \ - r0 = float16_##OP(n[H2(0)], n[H2(1)], fpst); \ - r1 = float16_##OP(n[H2(2)], n[H2(3)], fpst); \ - r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \ - r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \ - \ - d[H2(0)] = r0; \ - d[H2(1)] = r1; \ - d[H2(2)] = r2; \ - d[H2(3)] = r3; \ - } - -DO_NEON_PAIRWISE(neon_padd, add) -DO_NEON_PAIRWISE(neon_pmax, max) -DO_NEON_PAIRWISE(neon_pmin, min) - -#undef DO_NEON_PAIRWISE +#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t half = oprsz / sizeof(TYPE) / 2; \ + TYPE *d = vd, *n = vn, *m = vm; \ + if (unlikely(d == m)) { \ + m = memcpy(&scratch, m, oprsz); \ + } \ + for (intptr_t i = 0; i < half; ++i) { \ + d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)], stat); \ + } \ + for (intptr_t i = 0; i < half; ++i) { \ + d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)], stat); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_3OP_PAIR(gvec_faddp_h, float16_add, float16, H2) +DO_3OP_PAIR(gvec_faddp_s, float32_add, float32, H4) +DO_3OP_PAIR(gvec_faddp_d, float64_add, float64, ) + +DO_3OP_PAIR(gvec_fmaxp_h, float16_max, float16, H2) +DO_3OP_PAIR(gvec_fmaxp_s, float32_max, float32, H4) +DO_3OP_PAIR(gvec_fmaxp_d, float64_max, float64, ) + +DO_3OP_PAIR(gvec_fminp_h, float16_min, float16, H2) +DO_3OP_PAIR(gvec_fminp_s, float32_min, float32, H4) +DO_3OP_PAIR(gvec_fminp_d, float64_min, float64, ) + +DO_3OP_PAIR(gvec_fmaxnump_h, float16_maxnum, float16, H2) +DO_3OP_PAIR(gvec_fmaxnump_s, float32_maxnum, float32, H4) +DO_3OP_PAIR(gvec_fmaxnump_d, float64_maxnum, float64, ) + +DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2) +DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4) +DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, ) + +#undef DO_3OP_PAIR + +#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t half = oprsz / sizeof(TYPE) / 2; \ + TYPE *d = vd, *n = vn, *m = vm; \ + if (unlikely(d == m)) { \ + m = memcpy(&scratch, m, oprsz); \ + } \ + for (intptr_t i = 0; i < half; ++i) { \ + d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)]); \ + } \ + for (intptr_t i = 0; i < half; ++i) { \ + d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)]); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +#define ADD(A, B) (A + B) +DO_3OP_PAIR(gvec_addp_b, ADD, uint8_t, H1) +DO_3OP_PAIR(gvec_addp_h, ADD, uint16_t, H2) +DO_3OP_PAIR(gvec_addp_s, ADD, uint32_t, H4) +DO_3OP_PAIR(gvec_addp_d, ADD, uint64_t, ) +#undef ADD + +DO_3OP_PAIR(gvec_smaxp_b, MAX, int8_t, H1) +DO_3OP_PAIR(gvec_smaxp_h, MAX, int16_t, H2) +DO_3OP_PAIR(gvec_smaxp_s, MAX, int32_t, H4) + +DO_3OP_PAIR(gvec_umaxp_b, MAX, uint8_t, H1) +DO_3OP_PAIR(gvec_umaxp_h, MAX, uint16_t, H2) +DO_3OP_PAIR(gvec_umaxp_s, MAX, uint32_t, H4) + +DO_3OP_PAIR(gvec_sminp_b, MIN, int8_t, H1) +DO_3OP_PAIR(gvec_sminp_h, MIN, int16_t, H2) +DO_3OP_PAIR(gvec_sminp_s, MIN, int32_t, H4) + +DO_3OP_PAIR(gvec_uminp_b, MIN, uint8_t, H1) +DO_3OP_PAIR(gvec_uminp_h, MIN, uint16_t, H2) +DO_3OP_PAIR(gvec_uminp_s, MIN, uint32_t, H4) + +#undef DO_3OP_PAIR #define DO_VCVT_FIXED(NAME, FUNC, TYPE) \ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 3e5e37abbe..ce26b8a71a 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -281,36 +281,6 @@ VFP_BINOP(minnum) VFP_BINOP(maxnum) #undef VFP_BINOP -dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a) -{ - return float16_chs(a); -} - -float32 VFP_HELPER(neg, s)(float32 a) -{ - return float32_chs(a); -} - -float64 VFP_HELPER(neg, d)(float64 a) -{ - return float64_chs(a); -} - -dh_ctype_f16 VFP_HELPER(abs, h)(dh_ctype_f16 a) -{ - return float16_abs(a); -} - -float32 VFP_HELPER(abs, s)(float32 a) -{ - return float32_abs(a); -} - -float64 VFP_HELPER(abs, d)(float64 a) -{ - return float64_abs(a); -} - dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env) { return float16_sqrt(a, &env->vfp.fp_status_f16); |