aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-03-01 15:37:31 +0000
committerPeter Maydell <peter.maydell@linaro.org>2018-03-01 15:37:31 +0000
commit9db0855e8501365334e859370800c240d25322a2 (patch)
treebd4a8f0576258a43bf446035c26f9e5c1b1d2c40
parent8cb340c613ee3e626b070e0429c589f8a60ac657 (diff)
parentc22e580c2ad1cccef582e1490e732f254d4ac064 (diff)
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20180301' into staging
target-arm queue: * update MAINTAINERS for Alistair's new email address * add Arm v8.2 FP16 arithmetic extension for linux-user * implement display connector emulation for vexpress board * xilinx_spips: Enable only two slaves when reading/writing with stripe * xilinx_spips: Use 8 dummy cycles with the QIOR/QIOR4 commands * hw: register: Run post_write hook on reset # gpg: Signature made Thu 01 Mar 2018 11:22:46 GMT # gpg: using RSA key 3C2525ED14360CDE # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" # gpg: aka "Peter Maydell <pmaydell@gmail.com>" # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20180301: (42 commits) MAINTAINERS: Update my email address linux-user: Report AArch64 FP16 support via hwcap bits target/arm: Enable ARM_V8_FP16 feature bit for the AArch64 "any" CPU arm/translate-a64: add all single op FP16 to handle_fp_1src_half arm/translate-a64: implement simd_scalar_three_reg_same_fp16 arm/translate-a64: add all FP16 ops in simd_scalar_pairwise arm/translate-a64: add FP16 FMOV to simd_mod_imm arm/translate-a64: add FP16 FRSQRTE to simd_two_reg_misc_fp16 arm/helper.c: re-factor rsqrte and add rsqrte_f16 arm/translate-a64: add FP16 FSQRT to simd_two_reg_misc_fp16 arm/translate-a64: add FP16 FRCPX to simd_two_reg_misc_fp16 arm/translate-a64: add FP16 FRECPE arm/helper.c: re-factor recpe and add recepe_f16 arm/translate-a64: add FP16 FNEG/FABS to simd_two_reg_misc_fp16 arm/translate-a64: add FP16 SCVTF/UCVFT to simd_two_reg_misc_fp16 arm/translate-a64: add FP16 FCMxx (zero) to simd_two_reg_misc_fp16 arm/translate-a64: add FCVTxx to simd_two_reg_misc_fp16 arm/translate-a64: add FP16 FPRINTx to simd_two_reg_misc_fp16 arm/translate-a64: initial decode for simd_two_reg_misc_fp16 arm/translate-a64: add FP16 x2 ops for simd_indexed ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS12
-rw-r--r--default-configs/arm-softmmu.mak2
-rw-r--r--hw/arm/vexpress.c6
-rw-r--r--hw/core/register.c8
-rw-r--r--hw/display/Makefile.objs1
-rw-r--r--hw/display/sii9022.c191
-rw-r--r--hw/display/trace-events5
-rw-r--r--hw/i2c/core.c18
-rw-r--r--hw/i2c/i2c-ddc.c4
-rw-r--r--hw/ssi/xilinx_spips.c43
-rw-r--r--include/exec/helper-head.h3
-rw-r--r--include/fpu/softfloat.h16
-rw-r--r--include/hw/i2c/i2c.h23
-rw-r--r--include/hw/register.h6
-rw-r--r--linux-user/elfload.c2
-rw-r--r--target/arm/cpu.h34
-rw-r--r--target/arm/cpu64.c1
-rw-r--r--target/arm/helper-a64.c269
-rw-r--r--target/arm/helper-a64.h33
-rw-r--r--target/arm/helper.c479
-rw-r--r--target/arm/helper.h14
-rw-r--r--target/arm/translate-a64.c1260
-rw-r--r--target/arm/translate.c12
23 files changed, 1976 insertions, 466 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 79fb830031..b7c4130388 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -556,7 +556,7 @@ F: hw/misc/arm_sysctl.c
Xilinx Zynq
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
-M: Alistair Francis <alistair.francis@xilinx.com>
+M: Alistair Francis <alistair@alistair23.me>
L: qemu-arm@nongnu.org
S: Maintained
F: hw/*/xilinx_*
@@ -566,7 +566,7 @@ F: include/hw/misc/zynq*
X: hw/ssi/xilinx_*
Xilinx ZynqMP
-M: Alistair Francis <alistair.francis@xilinx.com>
+M: Alistair Francis <alistair@alistair23.me>
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
L: qemu-arm@nongnu.org
S: Maintained
@@ -1075,7 +1075,7 @@ T: git git://github.com/bonzini/qemu.git scsi-next
SSI
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
-M: Alistair Francis <alistair.francis@xilinx.com>
+M: Alistair Francis <alistair@alistair23.me>
S: Maintained
F: hw/ssi/*
F: hw/block/m25p80.c
@@ -1084,7 +1084,7 @@ X: hw/ssi/xilinx_*
F: tests/m25p80-test.c
Xilinx SPI
-M: Alistair Francis <alistair.francis@xilinx.com>
+M: Alistair Francis <alistair@alistair23.me>
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
S: Maintained
F: hw/ssi/xilinx_*
@@ -1254,7 +1254,7 @@ S: Maintained
F: hw/net/eepro100.c
Generic Loader
-M: Alistair Francis <alistair.francis@xilinx.com>
+M: Alistair Francis <alistair@alistair23.me>
S: Maintained
F: hw/core/generic-loader.c
F: include/hw/core/generic-loader.h
@@ -1600,7 +1600,7 @@ F: tests/qmp-test.c
T: git git://repo.or.cz/qemu/armbru.git qapi-next
Register API
-M: Alistair Francis <alistair.francis@xilinx.com>
+M: Alistair Francis <alistair@alistair23.me>
S: Maintained
F: hw/core/register.c
F: include/hw/register.h
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index ca34cf4462..54f855d072 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -21,6 +21,8 @@ CONFIG_STELLARIS_INPUT=y
CONFIG_STELLARIS_ENET=y
CONFIG_SSD0303=y
CONFIG_SSD0323=y
+CONFIG_DDC=y
+CONFIG_SII9022=y
CONFIG_ADS7846=y
CONFIG_MAX111X=y
CONFIG_SSI=y
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index dc5928ae1a..9fad79177a 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -29,6 +29,7 @@
#include "hw/arm/arm.h"
#include "hw/arm/primecell.h"
#include "hw/devices.h"
+#include "hw/i2c/i2c.h"
#include "net/net.h"
#include "sysemu/sysemu.h"
#include "hw/boards.h"
@@ -537,6 +538,7 @@ static void vexpress_common_init(MachineState *machine)
uint32_t sys_id;
DriveInfo *dinfo;
pflash_t *pflash0;
+ I2CBus *i2c;
ram_addr_t vram_size, sram_size;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *vram = g_new(MemoryRegion, 1);
@@ -628,7 +630,9 @@ static void vexpress_common_init(MachineState *machine)
sysbus_create_simple("sp804", map[VE_TIMER01], pic[2]);
sysbus_create_simple("sp804", map[VE_TIMER23], pic[3]);
- /* VE_SERIALDVI: not modelled */
+ dev = sysbus_create_simple("versatile_i2c", map[VE_SERIALDVI], NULL);
+ i2c = (I2CBus *)qdev_get_child_bus(dev, "i2c");
+ i2c_create_slave(i2c, "sii9022", 0x39);
sysbus_create_simple("pl031", map[VE_RTC], pic[4]); /* RTC */
diff --git a/hw/core/register.c b/hw/core/register.c
index 900294b9c4..0741a1af32 100644
--- a/hw/core/register.c
+++ b/hw/core/register.c
@@ -159,13 +159,21 @@ uint64_t register_read(RegisterInfo *reg, uint64_t re, const char* prefix,
void register_reset(RegisterInfo *reg)
{
+ const RegisterAccessInfo *ac;
+
g_assert(reg);
if (!reg->data || !reg->access) {
return;
}
+ ac = reg->access;
+
register_write_val(reg, reg->access->reset);
+
+ if (ac->post_write) {
+ ac->post_write(reg, reg->access->reset);
+ }
}
void register_init(RegisterInfo *reg)
diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs
index d3a4cb396e..3c7c75b94d 100644
--- a/hw/display/Makefile.objs
+++ b/hw/display/Makefile.objs
@@ -3,6 +3,7 @@ common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o
common-obj-$(CONFIG_G364FB) += g364fb.o
common-obj-$(CONFIG_JAZZ_LED) += jazz_led.o
common-obj-$(CONFIG_PL110) += pl110.o
+common-obj-$(CONFIG_SII9022) += sii9022.o
common-obj-$(CONFIG_SSD0303) += ssd0303.o
common-obj-$(CONFIG_SSD0323) += ssd0323.o
common-obj-$(CONFIG_XEN) += xenfb.o
diff --git a/hw/display/sii9022.c b/hw/display/sii9022.c
new file mode 100644
index 0000000000..eaf11a6e7b
--- /dev/null
+++ b/hw/display/sii9022.c
@@ -0,0 +1,191 @@
+/*
+ * Silicon Image SiI9022
+ *
+ * This is a pretty hollow emulation: all we do is acknowledge that we
+ * exist (chip ID) and confirm that we get switched over into DDC mode
+ * so the emulated host can proceed to read out EDID data. All subsequent
+ * set-up of connectors etc will be acknowledged and ignored.
+ *
+ * Copyright (C) 2018 Linus Walleij
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/i2c-ddc.h"
+#include "trace.h"
+
+#define SII9022_SYS_CTRL_DATA 0x1a
+#define SII9022_SYS_CTRL_PWR_DWN 0x10
+#define SII9022_SYS_CTRL_AV_MUTE 0x08
+#define SII9022_SYS_CTRL_DDC_BUS_REQ 0x04
+#define SII9022_SYS_CTRL_DDC_BUS_GRTD 0x02
+#define SII9022_SYS_CTRL_OUTPUT_MODE 0x01
+#define SII9022_SYS_CTRL_OUTPUT_HDMI 1
+#define SII9022_SYS_CTRL_OUTPUT_DVI 0
+#define SII9022_REG_CHIPID 0x1b
+#define SII9022_INT_ENABLE 0x3c
+#define SII9022_INT_STATUS 0x3d
+#define SII9022_INT_STATUS_HOTPLUG 0x01;
+#define SII9022_INT_STATUS_PLUGGED 0x04;
+
+#define TYPE_SII9022 "sii9022"
+#define SII9022(obj) OBJECT_CHECK(sii9022_state, (obj), TYPE_SII9022)
+
+typedef struct sii9022_state {
+ I2CSlave parent_obj;
+ uint8_t ptr;
+ bool addr_byte;
+ bool ddc_req;
+ bool ddc_skip_finish;
+ bool ddc;
+} sii9022_state;
+
+static const VMStateDescription vmstate_sii9022 = {
+ .name = "sii9022",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_I2C_SLAVE(parent_obj, sii9022_state),
+ VMSTATE_UINT8(ptr, sii9022_state),
+ VMSTATE_BOOL(addr_byte, sii9022_state),
+ VMSTATE_BOOL(ddc_req, sii9022_state),
+ VMSTATE_BOOL(ddc_skip_finish, sii9022_state),
+ VMSTATE_BOOL(ddc, sii9022_state),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static int sii9022_event(I2CSlave *i2c, enum i2c_event event)
+{
+ sii9022_state *s = SII9022(i2c);
+
+ switch (event) {
+ case I2C_START_SEND:
+ s->addr_byte = true;
+ break;
+ case I2C_START_RECV:
+ break;
+ case I2C_FINISH:
+ break;
+ case I2C_NACK:
+ break;
+ }
+
+ return 0;
+}
+
+static int sii9022_rx(I2CSlave *i2c)
+{
+ sii9022_state *s = SII9022(i2c);
+ uint8_t res = 0x00;
+
+ switch (s->ptr) {
+ case SII9022_SYS_CTRL_DATA:
+ if (s->ddc_req) {
+ /* Acknowledge DDC bus request */
+ res = SII9022_SYS_CTRL_DDC_BUS_GRTD | SII9022_SYS_CTRL_DDC_BUS_REQ;
+ }
+ break;
+ case SII9022_REG_CHIPID:
+ res = 0xb0;
+ break;
+ case SII9022_INT_STATUS:
+ /* Something is cold-plugged in, no interrupts */
+ res = SII9022_INT_STATUS_PLUGGED;
+ break;
+ default:
+ break;
+ }
+
+ trace_sii9022_read_reg(s->ptr, res);
+ s->ptr++;
+
+ return res;
+}
+
+static int sii9022_tx(I2CSlave *i2c, uint8_t data)
+{
+ sii9022_state *s = SII9022(i2c);
+
+ if (s->addr_byte) {
+ s->ptr = data;
+ s->addr_byte = false;
+ return 0;
+ }
+
+ switch (s->ptr) {
+ case SII9022_SYS_CTRL_DATA:
+ if (data & SII9022_SYS_CTRL_DDC_BUS_REQ) {
+ s->ddc_req = true;
+ if (data & SII9022_SYS_CTRL_DDC_BUS_GRTD) {
+ s->ddc = true;
+ /* Skip this finish since we just switched to DDC */
+ s->ddc_skip_finish = true;
+ trace_sii9022_switch_mode("DDC");
+ }
+ } else {
+ s->ddc_req = false;
+ s->ddc = false;
+ trace_sii9022_switch_mode("normal");
+ }
+ break;
+ default:
+ break;
+ }
+
+ trace_sii9022_write_reg(s->ptr, data);
+ s->ptr++;
+
+ return 0;
+}
+
+static void sii9022_reset(DeviceState *dev)
+{
+ sii9022_state *s = SII9022(dev);
+
+ s->ptr = 0;
+ s->addr_byte = false;
+ s->ddc_req = false;
+ s->ddc_skip_finish = false;
+ s->ddc = false;
+}
+
+static void sii9022_realize(DeviceState *dev, Error **errp)
+{
+ I2CBus *bus;
+
+ bus = I2C_BUS(qdev_get_parent_bus(dev));
+ i2c_create_slave(bus, TYPE_I2CDDC, 0x50);
+}
+
+static void sii9022_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ I2CSlaveClass *k = I2C_SLAVE_CLASS(klass);
+
+ k->event = sii9022_event;
+ k->recv = sii9022_rx;
+ k->send = sii9022_tx;
+ dc->reset = sii9022_reset;
+ dc->realize = sii9022_realize;
+ dc->vmsd = &vmstate_sii9022;
+}
+
+static const TypeInfo sii9022_info = {
+ .name = TYPE_SII9022,
+ .parent = TYPE_I2C_SLAVE,
+ .instance_size = sizeof(sii9022_state),
+ .class_init = sii9022_class_init,
+};
+
+static void sii9022_register_types(void)
+{
+ type_register_static(&sii9022_info);
+}
+
+type_init(sii9022_register_types)
diff --git a/hw/display/trace-events b/hw/display/trace-events
index da498c1def..5a48c6cb6a 100644
--- a/hw/display/trace-events
+++ b/hw/display/trace-events
@@ -132,3 +132,8 @@ vga_cirrus_read_io(uint32_t addr, uint32_t val) "addr 0x%x, val 0x%x"
vga_cirrus_write_io(uint32_t addr, uint32_t val) "addr 0x%x, val 0x%x"
vga_cirrus_read_blt(uint32_t offset, uint32_t val) "offset 0x%x, val 0x%x"
vga_cirrus_write_blt(uint32_t offset, uint32_t val) "offset 0x%x, val 0x%x"
+
+# hw/display/sii9022.c
+sii9022_read_reg(uint8_t addr, uint8_t val) "addr 0x%02x, val 0x%02x"
+sii9022_write_reg(uint8_t addr, uint8_t val) "addr 0x%02x, val 0x%02x"
+sii9022_switch_mode(const char *mode) "mode: %s"
diff --git a/hw/i2c/core.c b/hw/i2c/core.c
index 59068f157e..cfccefca3d 100644
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@@ -10,31 +10,13 @@
#include "qemu/osdep.h"
#include "hw/i2c/i2c.h"
-typedef struct I2CNode I2CNode;
-
-struct I2CNode {
- I2CSlave *elt;
- QLIST_ENTRY(I2CNode) next;
-};
-
#define I2C_BROADCAST 0x00
-struct I2CBus
-{
- BusState qbus;
- QLIST_HEAD(, I2CNode) current_devs;
- uint8_t saved_address;
- bool broadcast;
-};
-
static Property i2c_props[] = {
DEFINE_PROP_UINT8("address", struct I2CSlave, address, 0),
DEFINE_PROP_END_OF_LIST(),
};
-#define TYPE_I2C_BUS "i2c-bus"
-#define I2C_BUS(obj) OBJECT_CHECK(I2CBus, (obj), TYPE_I2C_BUS)
-
static const TypeInfo i2c_bus_info = {
.name = TYPE_I2C_BUS,
.parent = TYPE_BUS,
diff --git a/hw/i2c/i2c-ddc.c b/hw/i2c/i2c-ddc.c
index 199dac9e41..bec0c91e2d 100644
--- a/hw/i2c/i2c-ddc.c
+++ b/hw/i2c/i2c-ddc.c
@@ -259,12 +259,12 @@ static int i2c_ddc_tx(I2CSlave *i2c, uint8_t data)
s->reg = data;
s->firstbyte = false;
DPRINTF("[EDID] Written new pointer: %u\n", data);
- return 1;
+ return 0;
}
/* Ignore all writes */
s->reg++;
- return 1;
+ return 0;
}
static void i2c_ddc_init(Object *obj)
diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
index 8af36ca3d4..426f971311 100644
--- a/hw/ssi/xilinx_spips.c
+++ b/hw/ssi/xilinx_spips.c
@@ -223,7 +223,7 @@ static void xilinx_spips_update_cs(XilinxSPIPS *s, int field)
{
int i;
- for (i = 0; i < s->num_cs; i++) {
+ for (i = 0; i < s->num_cs * s->num_busses; i++) {
bool old_state = s->cs_lines_state[i];
bool new_state = field & (1 << i);
@@ -234,7 +234,7 @@ static void xilinx_spips_update_cs(XilinxSPIPS *s, int field)
}
qemu_set_irq(s->cs_lines[i], !new_state);
}
- if (!(field & ((1 << s->num_cs) - 1))) {
+ if (!(field & ((1 << (s->num_cs * s->num_busses)) - 1))) {
s->snoop_state = SNOOP_CHECKING;
s->cmd_dummies = 0;
s->link_state = 1;
@@ -248,7 +248,40 @@ static void xlnx_zynqmp_qspips_update_cs_lines(XlnxZynqMPQSPIPS *s)
{
if (s->regs[R_GQSPI_GF_SNAPSHOT]) {
int field = ARRAY_FIELD_EX32(s->regs, GQSPI_GF_SNAPSHOT, CHIP_SELECT);
- xilinx_spips_update_cs(XILINX_SPIPS(s), field);
+ bool upper_cs_sel = field & (1 << 1);
+ bool lower_cs_sel = field & 1;
+ bool bus0_enabled;
+ bool bus1_enabled;
+ uint8_t buses;
+ int cs = 0;
+
+ buses = ARRAY_FIELD_EX32(s->regs, GQSPI_GF_SNAPSHOT, DATA_BUS_SELECT);
+ bus0_enabled = buses & 1;
+ bus1_enabled = buses & (1 << 1);
+
+ if (bus0_enabled && bus1_enabled) {
+ if (lower_cs_sel) {
+ cs |= 1;
+ }
+ if (upper_cs_sel) {
+ cs |= 1 << 3;
+ }
+ } else if (bus0_enabled) {
+ if (lower_cs_sel) {
+ cs |= 1;
+ }
+ if (upper_cs_sel) {
+ cs |= 1 << 1;
+ }
+ } else if (bus1_enabled) {
+ if (lower_cs_sel) {
+ cs |= 1 << 2;
+ }
+ if (upper_cs_sel) {
+ cs |= 1 << 3;
+ }
+ }
+ xilinx_spips_update_cs(XILINX_SPIPS(s), cs);
}
}
@@ -260,7 +293,7 @@ static void xilinx_spips_update_cs_lines(XilinxSPIPS *s)
if (num_effective_busses(s) == 2) {
/* Single bit chip-select for qspi */
field &= 0x1;
- field |= field << 1;
+ field |= field << 3;
/* Dual stack U-Page */
} else if (s->regs[R_LQSPI_CFG] & LQSPI_CFG_TWO_MEM &&
s->regs[R_LQSPI_STS] & LQSPI_CFG_U_PAGE) {
@@ -544,7 +577,7 @@ static int xilinx_spips_num_dummies(XilinxQSPIPS *qs, uint8_t command)
return 2;
case QIOR:
case QIOR_4:
- return 5;
+ return 4;
default:
return -1;
}
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
index e1fd08f2ba..15b6a68de3 100644
--- a/include/exec/helper-head.h
+++ b/include/exec/helper-head.h
@@ -26,6 +26,7 @@
#define dh_alias_int i32
#define dh_alias_i64 i64
#define dh_alias_s64 i64
+#define dh_alias_f16 i32
#define dh_alias_f32 i32
#define dh_alias_f64 i64
#define dh_alias_ptr ptr
@@ -38,6 +39,7 @@
#define dh_ctype_int int
#define dh_ctype_i64 uint64_t
#define dh_ctype_s64 int64_t
+#define dh_ctype_f16 float16
#define dh_ctype_f32 float32
#define dh_ctype_f64 float64
#define dh_ctype_ptr void *
@@ -94,6 +96,7 @@
#define dh_is_signed_s32 1
#define dh_is_signed_i64 0
#define dh_is_signed_s64 1
+#define dh_is_signed_f16 0
#define dh_is_signed_f32 0
#define dh_is_signed_f64 0
#define dh_is_signed_tl 0
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 9b7b5e34e2..27876e711c 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -306,8 +306,11 @@ static inline float16 float16_set_sign(float16 a, int sign)
}
#define float16_zero make_float16(0)
-#define float16_one make_float16(0x3c00)
#define float16_half make_float16(0x3800)
+#define float16_one make_float16(0x3c00)
+#define float16_one_point_five make_float16(0x3e00)
+#define float16_two make_float16(0x4000)
+#define float16_three make_float16(0x4200)
#define float16_infinity make_float16(0x7c00)
/*----------------------------------------------------------------------------
@@ -415,11 +418,13 @@ static inline float32 float32_set_sign(float32 a, int sign)
}
#define float32_zero make_float32(0)
-#define float32_one make_float32(0x3f800000)
#define float32_half make_float32(0x3f000000)
+#define float32_one make_float32(0x3f800000)
+#define float32_one_point_five make_float32(0x3fc00000)
+#define float32_two make_float32(0x40000000)
+#define float32_three make_float32(0x40400000)
#define float32_infinity make_float32(0x7f800000)
-
/*----------------------------------------------------------------------------
| The pattern for a default generated single-precision NaN.
*----------------------------------------------------------------------------*/
@@ -526,9 +531,12 @@ static inline float64 float64_set_sign(float64 a, int sign)
}
#define float64_zero make_float64(0)
+#define float64_half make_float64(0x3fe0000000000000LL)
#define float64_one make_float64(0x3ff0000000000000LL)
+#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
+#define float64_two make_float64(0x4000000000000000ULL)
+#define float64_three make_float64(0x4008000000000000ULL)
#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
-#define float64_half make_float64(0x3fe0000000000000LL)
#define float64_infinity make_float64(0x7ff0000000000000LL)
/*----------------------------------------------------------------------------
diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
index 24e95d0155..d727379b48 100644
--- a/include/hw/i2c/i2c.h
+++ b/include/hw/i2c/i2c.h
@@ -25,8 +25,7 @@ typedef struct I2CSlave I2CSlave;
#define I2C_SLAVE_GET_CLASS(obj) \
OBJECT_GET_CLASS(I2CSlaveClass, (obj), TYPE_I2C_SLAVE)
-typedef struct I2CSlaveClass
-{
+typedef struct I2CSlaveClass {
DeviceClass parent_class;
/* Callbacks provided by the device. */
@@ -50,14 +49,30 @@ typedef struct I2CSlaveClass
int (*event)(I2CSlave *s, enum i2c_event event);
} I2CSlaveClass;
-struct I2CSlave
-{
+struct I2CSlave {
DeviceState qdev;
/* Remaining fields for internal use by the I2C code. */
uint8_t address;
};
+#define TYPE_I2C_BUS "i2c-bus"
+#define I2C_BUS(obj) OBJECT_CHECK(I2CBus, (obj), TYPE_I2C_BUS)
+
+typedef struct I2CNode I2CNode;
+
+struct I2CNode {
+ I2CSlave *elt;
+ QLIST_ENTRY(I2CNode) next;
+};
+
+struct I2CBus {
+ BusState qbus;
+ QLIST_HEAD(, I2CNode) current_devs;
+ uint8_t saved_address;
+ bool broadcast;
+};
+
I2CBus *i2c_init_bus(DeviceState *parent, const char *name);
void i2c_set_slave_address(I2CSlave *dev, uint8_t address);
int i2c_bus_busy(I2CBus *bus);
diff --git a/include/hw/register.h b/include/hw/register.h
index de2414e6b4..5796584588 100644
--- a/include/hw/register.h
+++ b/include/hw/register.h
@@ -34,7 +34,7 @@ typedef struct RegisterInfoArray RegisterInfoArray;
* immediately before the actual write. The returned value is what is written,
* giving the handler a chance to modify the written value.
* @post_write: Post write callback. Passed the written value. Most write side
- * effects should be implemented here.
+ * effects should be implemented here. This is called during device reset.
*
* @post_read: Post read callback. Passes the value that is about to be returned
* for a read. The return value from this function is what is ultimately read,
@@ -135,8 +135,8 @@ uint64_t register_read(RegisterInfo *reg, uint64_t re, const char* prefix,
bool debug);
/**
- * reset a register
- * @reg: register to reset
+ * Resets a register. This will also call the post_write hook if it exists.
+ * @reg: The register to reset.
*/
void register_reset(RegisterInfo *reg);
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 0208022445..6689089cd2 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -551,6 +551,8 @@ static uint32_t get_elf_hwcap(void)
GET_FEATURE(ARM_FEATURE_V8_SM3, ARM_HWCAP_A64_SM3);
GET_FEATURE(ARM_FEATURE_V8_SM4, ARM_HWCAP_A64_SM4);
GET_FEATURE(ARM_FEATURE_V8_SHA512, ARM_HWCAP_A64_SHA512);
+ GET_FEATURE(ARM_FEATURE_V8_FP16,
+ ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP);
#undef GET_FEATURE
return hwcaps;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 8c839faa8f..2b9740878b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -168,6 +168,7 @@ typedef struct {
* Qn = regs[n].d[1]:regs[n].d[0]
* Dn = regs[n].d[0]
* Sn = regs[n].d[0] bits 31..0
+ * Hn = regs[n].d[0] bits 15..0
*
* This corresponds to the architecturally defined mapping between
* the two execution states, and means we do not need to explicitly
@@ -537,19 +538,29 @@ typedef struct CPUARMState {
/* scratch space when Tn are not sufficient. */
uint32_t scratch[8];
- /* fp_status is the "normal" fp status. standard_fp_status retains
- * values corresponding to the ARM "Standard FPSCR Value", ie
- * default-NaN, flush-to-zero, round-to-nearest and is used by
- * any operations (generally Neon) which the architecture defines
- * as controlled by the standard FPSCR value rather than the FPSCR.
+ /* There are a number of distinct float control structures:
+ *
+ * fp_status: is the "normal" fp status.
+ * fp_status_fp16: used for half-precision calculations
+ * standard_fp_status : the ARM "Standard FPSCR Value"
+ *
+ * Half-precision operations are governed by a separate
+ * flush-to-zero control bit in FPSCR:FZ16. We pass a separate
+ * status structure to control this.
+ *
+ * The "Standard FPSCR", ie default-NaN, flush-to-zero,
+ * round-to-nearest and is used by any operations (generally
+ * Neon) which the architecture defines as controlled by the
+ * standard FPSCR value rather than the FPSCR.
*
* To avoid having to transfer exception bits around, we simply
* say that the FPSCR cumulative exception flags are the logical
- * OR of the flags in the two fp statuses. This relies on the
+ * OR of the flags in the three fp statuses. This relies on the
* only thing which needs to read the exception flags being
* an explicit FPSCR read.
*/
float_status fp_status;
+ float_status fp_status_f16;
float_status standard_fp_status;
/* ZCR_EL[1-3] */
@@ -1189,12 +1200,20 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
uint32_t vfp_get_fpscr(CPUARMState *env);
void vfp_set_fpscr(CPUARMState *env, uint32_t val);
-/* For A64 the FPSCR is split into two logically distinct registers,
+/* FPCR, Floating Point Control Register
+ * FPSR, Floating Poiht Status Register
+ *
+ * For A64 the FPSCR is split into two logically distinct registers,
* FPCR and FPSR. However since they still use non-overlapping bits
* we store the underlying state in fpscr and just mask on read/write.
*/
#define FPSR_MASK 0xf800009f
#define FPCR_MASK 0x07f79f00
+
+#define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */
+#define FPCR_FZ (1 << 24) /* Flush-to-zero enable bit */
+#define FPCR_DN (1 << 25) /* Default NaN enable bit */
+
static inline uint32_t vfp_get_fpsr(CPUARMState *env)
{
return vfp_get_fpscr(env) & FPSR_MASK;
@@ -1408,6 +1427,7 @@ enum arm_features {
ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
};
static inline int arm_feature(CPUARMState *env, int feature)
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 1c330adc28..9743bdc8c3 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -230,6 +230,7 @@ static void aarch64_any_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_V8_SM4);
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
set_feature(&cpu->env, ARM_FEATURE_CRC);
+ set_feature(&cpu->env, ARM_FEATURE_V8_FP16);
cpu->ctr = 0x80038003; /* 32 byte I and D cacheline size, VIPT icache */
cpu->dcz_blocksize = 7; /* 512 bytes */
}
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 10e08bdc1f..afb25ad20c 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -192,6 +192,10 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
* versions, these do a fully fused multiply-add or
* multiply-add-and-halve.
*/
+#define float16_two make_float16(0x4000)
+#define float16_three make_float16(0x4200)
+#define float16_one_point_five make_float16(0x3e00)
+
#define float32_two make_float32(0x40000000)
#define float32_three make_float32(0x40400000)
#define float32_one_point_five make_float32(0x3fc00000)
@@ -200,6 +204,21 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
#define float64_three make_float64(0x4008000000000000ULL)
#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
+float16 HELPER(recpsf_f16)(float16 a, float16 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float16_squash_input_denormal(a, fpst);
+ b = float16_squash_input_denormal(b, fpst);
+
+ a = float16_chs(a);
+ if ((float16_is_infinity(a) && float16_is_zero(b)) ||
+ (float16_is_infinity(b) && float16_is_zero(a))) {
+ return float16_two;
+ }
+ return float16_muladd(a, b, float16_two, 0, fpst);
+}
+
float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
{
float_status *fpst = fpstp;
@@ -230,6 +249,21 @@ float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
return float64_muladd(a, b, float64_two, 0, fpst);
}
+float16 HELPER(rsqrtsf_f16)(float16 a, float16 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float16_squash_input_denormal(a, fpst);
+ b = float16_squash_input_denormal(b, fpst);
+
+ a = float16_chs(a);
+ if ((float16_is_infinity(a) && float16_is_zero(b)) ||
+ (float16_is_infinity(b) && float16_is_zero(a))) {
+ return float16_one_point_five;
+ }
+ return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
+}
+
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
{
float_status *fpst = fpstp;
@@ -322,6 +356,35 @@ uint64_t HELPER(neon_addlp_u16)(uint64_t a)
}
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
+float16 HELPER(frecpx_f16)(float16 a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ uint16_t val16, sbit;
+ int16_t exp;
+
+ if (float16_is_any_nan(a)) {
+ float16 nan = a;
+ if (float16_is_signaling_nan(a, fpst)) {
+ float_raise(float_flag_invalid, fpst);
+ nan = float16_maybe_silence_nan(a, fpst);
+ }
+ if (fpst->default_nan_mode) {
+ nan = float16_default_nan(fpst);
+ }
+ return nan;
+ }
+
+ val16 = float16_val(a);
+ sbit = 0x8000 & val16;
+ exp = extract32(val16, 10, 5);
+
+ if (exp == 0) {
+ return make_float16(deposit32(sbit, 10, 5, 0x1e));
+ } else {
+ return make_float16(deposit32(sbit, 10, 5, ~exp));
+ }
+}
+
float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
{
float_status *fpst = fpstp;
@@ -572,3 +635,209 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
{
return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
}
+
+/*
+ * AdvSIMD half-precision
+ */
+
+#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
+
+#define ADVSIMD_HALFOP(name) \
+float16 ADVSIMD_HELPER(name, h)(float16 a, float16 b, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ return float16_ ## name(a, b, fpst); \
+}
+
+ADVSIMD_HALFOP(add)
+ADVSIMD_HALFOP(sub)
+ADVSIMD_HALFOP(mul)
+ADVSIMD_HALFOP(div)
+ADVSIMD_HALFOP(min)
+ADVSIMD_HALFOP(max)
+ADVSIMD_HALFOP(minnum)
+ADVSIMD_HALFOP(maxnum)
+
+#define ADVSIMD_TWOHALFOP(name) \
+uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
+{ \
+ float16 a1, a2, b1, b2; \
+ uint32_t r1, r2; \
+ float_status *fpst = fpstp; \
+ a1 = extract32(two_a, 0, 16); \
+ a2 = extract32(two_a, 16, 16); \
+ b1 = extract32(two_b, 0, 16); \
+ b2 = extract32(two_b, 16, 16); \
+ r1 = float16_ ## name(a1, b1, fpst); \
+ r2 = float16_ ## name(a2, b2, fpst); \
+ return deposit32(r1, 16, 16, r2); \
+}
+
+ADVSIMD_TWOHALFOP(add)
+ADVSIMD_TWOHALFOP(sub)
+ADVSIMD_TWOHALFOP(mul)
+ADVSIMD_TWOHALFOP(div)
+ADVSIMD_TWOHALFOP(min)
+ADVSIMD_TWOHALFOP(max)
+ADVSIMD_TWOHALFOP(minnum)
+ADVSIMD_TWOHALFOP(maxnum)
+
+/* Data processing - scalar floating-point and advanced SIMD */
+static float16 float16_mulx(float16 a, float16 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float16_squash_input_denormal(a, fpst);
+ b = float16_squash_input_denormal(b, fpst);
+
+ if ((float16_is_zero(a) && float16_is_infinity(b)) ||
+ (float16_is_infinity(a) && float16_is_zero(b))) {
+ /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+ return make_float16((1U << 14) |
+ ((float16_val(a) ^ float16_val(b)) & (1U << 15)));
+ }
+ return float16_mul(a, b, fpst);
+}
+
+ADVSIMD_HALFOP(mulx)
+ADVSIMD_TWOHALFOP(mulx)
+
+/* fused multiply-accumulate */
+float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return float16_muladd(a, b, c, 0, fpst);
+}
+
+uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
+ uint32_t two_c, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 a1, a2, b1, b2, c1, c2;
+ uint32_t r1, r2;
+ a1 = extract32(two_a, 0, 16);
+ a2 = extract32(two_a, 16, 16);
+ b1 = extract32(two_b, 0, 16);
+ b2 = extract32(two_b, 16, 16);
+ c1 = extract32(two_c, 0, 16);
+ c2 = extract32(two_c, 16, 16);
+ r1 = float16_muladd(a1, b1, c1, 0, fpst);
+ r2 = float16_muladd(a2, b2, c2, 0, fpst);
+ return deposit32(r1, 16, 16, r2);
+}
+
+/*
+ * Floating point comparisons produce an integer result. Softfloat
+ * routines return float_relation types which we convert to the 0/-1
+ * Neon requires.
+ */
+
+#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
+
+uint32_t HELPER(advsimd_ceq_f16)(float16 a, float16 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ int compare = float16_compare_quiet(a, b, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_equal);
+}
+
+uint32_t HELPER(advsimd_cge_f16)(float16 a, float16 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ int compare = float16_compare(a, b, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater ||
+ compare == float_relation_equal);
+}
+
+uint32_t HELPER(advsimd_cgt_f16)(float16 a, float16 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ int compare = float16_compare(a, b, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater);
+}
+
+uint32_t HELPER(advsimd_acge_f16)(float16 a, float16 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 f0 = float16_abs(a);
+ float16 f1 = float16_abs(b);
+ int compare = float16_compare(f0, f1, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater ||
+ compare == float_relation_equal);
+}
+
+uint32_t HELPER(advsimd_acgt_f16)(float16 a, float16 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 f0 = float16_abs(a);
+ float16 f1 = float16_abs(b);
+ int compare = float16_compare(f0, f1, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater);
+}
+
+/* round to integral */
+float16 HELPER(advsimd_rinth_exact)(float16 x, void *fp_status)
+{
+ return float16_round_to_int(x, fp_status);
+}
+
+float16 HELPER(advsimd_rinth)(float16 x, void *fp_status)
+{
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
+ float16 ret;
+
+ ret = float16_round_to_int(x, fp_status);
+
+ /* Suppress any inexact exceptions the conversion produced */
+ if (!(old_flags & float_flag_inexact)) {
+ new_flags = get_float_exception_flags(fp_status);
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+ }
+
+ return ret;
+}
+
+/*
+ * Half-precision floating point conversion functions
+ *
+ * There are a multitude of conversion functions with various
+ * different rounding modes. This is dealt with by the calling code
+ * setting the mode appropriately before calling the helper.
+ */
+
+uint32_t HELPER(advsimd_f16tosinth)(float16 a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ /* Invalid if we are passed a NaN */
+ if (float16_is_any_nan(a)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_int16(a, fpst);
+}
+
+uint32_t HELPER(advsimd_f16touinth)(float16 a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ /* Invalid if we are passed a NaN */
+ if (float16_is_any_nan(a)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_uint16(a, fpst);
+}
+
+/*
+ * Square Root and Reciprocal square root
+ */
+
+float16 HELPER(sqrt_f16)(float16 a, void *fpstp)
+{
+ float_status *s = fpstp;
+
+ return float16_sqrt(a, s);
+}
+
+
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 85d86741db..ef4ddfe9d8 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -29,8 +29,10 @@ DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
+DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
+DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
@@ -39,6 +41,7 @@ DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
@@ -48,3 +51,33 @@ DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG,
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG,
i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
+DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
+DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
+DEF_HELPER_FLAGS_3(advsimd_minnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
+DEF_HELPER_3(advsimd_addh, f16, f16, f16, ptr)
+DEF_HELPER_3(advsimd_subh, f16, f16, f16, ptr)
+DEF_HELPER_3(advsimd_mulh, f16, f16, f16, ptr)
+DEF_HELPER_3(advsimd_divh, f16, f16, f16, ptr)
+DEF_HELPER_3(advsimd_ceq_f16, i32, f16, f16, ptr)
+DEF_HELPER_3(advsimd_cge_f16, i32, f16, f16, ptr)
+DEF_HELPER_3(advsimd_cgt_f16, i32, f16, f16, ptr)
+DEF_HELPER_3(advsimd_acge_f16, i32, f16, f16, ptr)
+DEF_HELPER_3(advsimd_acgt_f16, i32, f16, f16, ptr)
+DEF_HELPER_3(advsimd_mulxh, f16, f16, f16, ptr)
+DEF_HELPER_4(advsimd_muladdh, f16, f16, f16, f16, ptr)
+DEF_HELPER_3(advsimd_add2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_sub2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_mul2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_div2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_max2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_min2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_maxnum2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_minnum2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_mulx2h, i32, i32, i32, ptr)
+DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, ptr)
+DEF_HELPER_2(advsimd_rinth_exact, f16, f16, ptr)
+DEF_HELPER_2(advsimd_rinth, f16, f16, ptr)
+DEF_HELPER_2(advsimd_f16tosinth, i32, f16, ptr)
+DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr)
+DEF_HELPER_2(sqrt_f16, f16, f16, ptr)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index c5bc69b961..c82f63d440 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11103,6 +11103,7 @@ uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
| (env->vfp.vec_stride << 20);
i = get_float_exception_flags(&env->vfp.fp_status);
i |= get_float_exception_flags(&env->vfp.standard_fp_status);
+ i |= get_float_exception_flags(&env->vfp.fp_status_f16);
fpscr |= vfp_exceptbits_from_host(i);
return fpscr;
}
@@ -11160,16 +11161,31 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
break;
}
set_float_rounding_mode(i, &env->vfp.fp_status);
+ set_float_rounding_mode(i, &env->vfp.fp_status_f16);
}
- if (changed & (1 << 24)) {
- set_flush_to_zero((val & (1 << 24)) != 0, &env->vfp.fp_status);
- set_flush_inputs_to_zero((val & (1 << 24)) != 0, &env->vfp.fp_status);
+ if (changed & FPCR_FZ16) {
+ bool ftz_enabled = val & FPCR_FZ16;
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
+ }
+ if (changed & FPCR_FZ) {
+ bool ftz_enabled = val & FPCR_FZ;
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
+ }
+ if (changed & FPCR_DN) {
+ bool dnan_enabled = val & FPCR_DN;
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
}
- if (changed & (1 << 25))
- set_default_nan_mode((val & (1 << 25)) != 0, &env->vfp.fp_status);
+ /* The exception flags are ORed together when we read fpscr so we
+ * only need to preserve the current state in one of our
+ * float_status values.
+ */
i = vfp_exceptbits_to_host(val);
set_float_exception_flags(i, &env->vfp.fp_status);
+ set_float_exception_flags(0, &env->vfp.fp_status_f16);
set_float_exception_flags(0, &env->vfp.standard_fp_status);
}
@@ -11286,8 +11302,10 @@ CONV_ITOF(vfp_##name##to##p, fsz, sign) \
CONV_FTOI(vfp_to##name##p, fsz, sign, ) \
CONV_FTOI(vfp_to##name##z##p, fsz, sign, _round_to_zero)
+FLOAT_CONVS(si, h, 16, )
FLOAT_CONVS(si, s, 32, )
FLOAT_CONVS(si, d, 64, )
+FLOAT_CONVS(ui, h, 16, u)
FLOAT_CONVS(ui, s, 32, u)
FLOAT_CONVS(ui, d, 64, u)
@@ -11370,6 +11388,8 @@ VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
VFP_CONV_FIX(uh, s, 32, 32, uint16)
VFP_CONV_FIX(ul, s, 32, 32, uint32)
VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
+VFP_CONV_FIX_A64(sl, h, 16, 32, int32)
+VFP_CONV_FIX_A64(ul, h, 16, 32, uint32)
#undef VFP_CONV_FIX
#undef VFP_CONV_FIX_FLOAT
#undef VFP_CONV_FLOAT_FIX_ROUND
@@ -11377,9 +11397,9 @@ VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
/* Set the current fp rounding mode and return the old one.
* The argument is a softfloat float_round_ value.
*/
-uint32_t HELPER(set_rmode)(uint32_t rmode, CPUARMState *env)
+uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
{
- float_status *fp_status = &env->vfp.fp_status;
+ float_status *fp_status = fpstp;
uint32_t prev_rmode = get_float_rounding_mode(fp_status);
set_float_rounding_mode(rmode, fp_status);
@@ -11503,80 +11523,75 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
* int->float conversions at run-time. */
#define float64_256 make_float64(0x4070000000000000LL)
#define float64_512 make_float64(0x4080000000000000LL)
+#define float16_maxnorm make_float16(0x7bff)
#define float32_maxnorm make_float32(0x7f7fffff)
#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
/* Reciprocal functions
*
* The algorithm that must be used to calculate the estimate
- * is specified by the ARM ARM, see FPRecipEstimate()
+ * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
*/
-static float64 recip_estimate(float64 a, float_status *real_fp_status)
-{
- /* These calculations mustn't set any fp exception flags,
- * so we use a local copy of the fp_status.
- */
- float_status dummy_status = *real_fp_status;
- float_status *s = &dummy_status;
- /* q = (int)(a * 512.0) */
- float64 q = float64_mul(float64_512, a, s);
- int64_t q_int = float64_to_int64_round_to_zero(q, s);
-
- /* r = 1.0 / (((double)q + 0.5) / 512.0) */
- q = int64_to_float64(q_int, s);
- q = float64_add(q, float64_half, s);
- q = float64_div(q, float64_512, s);
- q = float64_div(float64_one, q, s);
-
- /* s = (int)(256.0 * r + 0.5) */
- q = float64_mul(q, float64_256, s);
- q = float64_add(q, float64_half, s);
- q_int = float64_to_int64_round_to_zero(q, s);
+/* See RecipEstimate()
+ *
+ * input is a 9 bit fixed point number
+ * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
+ * result range 256 .. 511 for a number from 1.0 to 511/256.
+ */
- /* return (double)s / 256.0 */
- return float64_div(int64_to_float64(q_int, s), float64_256, s);
+static int recip_estimate(int input)
+{
+ int a, b, r;
+ assert(256 <= input && input < 512);
+ a = (input * 2) + 1;
+ b = (1 << 19) / a;
+ r = (b + 1) >> 1;
+ assert(256 <= r && r < 512);
+ return r;
}
-/* Common wrapper to call recip_estimate */
-static float64 call_recip_estimate(float64 num, int off, float_status *fpst)
+/*
+ * Common wrapper to call recip_estimate
+ *
+ * The parameters are exponent and 64 bit fraction (without implicit
+ * bit) where the binary point is nominally at bit 52. Returns a
+ * float64 which can then be rounded to the appropriate size by the
+ * callee.
+ */
+
+static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
{
- uint64_t val64 = float64_val(num);
- uint64_t frac = extract64(val64, 0, 52);
- int64_t exp = extract64(val64, 52, 11);
- uint64_t sbit;
- float64 scaled, estimate;
+ uint32_t scaled, estimate;
+ uint64_t result_frac;
+ int result_exp;
- /* Generate the scaled number for the estimate function */
- if (exp == 0) {
+ /* Handle sub-normals */
+ if (*exp == 0) {
if (extract64(frac, 51, 1) == 0) {
- exp = -1;
- frac = extract64(frac, 0, 50) << 2;
+ *exp = -1;
+ frac <<= 2;
} else {
- frac = extract64(frac, 0, 51) << 1;
+ frac <<= 1;
}
}
- /* scaled = '0' : '01111111110' : fraction<51:44> : Zeros(44); */
- scaled = make_float64((0x3feULL << 52)
- | extract64(frac, 44, 8) << 44);
-
- estimate = recip_estimate(scaled, fpst);
-
- /* Build new result */
- val64 = float64_val(estimate);
- sbit = 0x8000000000000000ULL & val64;
- exp = off - exp;
- frac = extract64(val64, 0, 52);
+ /* scaled = UInt('1':fraction<51:44>) */
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+ estimate = recip_estimate(scaled);
- if (exp == 0) {
- frac = 1ULL << 51 | extract64(frac, 1, 51);
- } else if (exp == -1) {
- frac = 1ULL << 50 | extract64(frac, 2, 50);
- exp = 0;
+ result_exp = exp_off - *exp;
+ result_frac = deposit64(0, 44, 8, estimate);
+ if (result_exp == 0) {
+ result_frac = deposit64(result_frac >> 1, 51, 1, 1);
+ } else if (result_exp == -1) {
+ result_frac = deposit64(result_frac >> 2, 50, 2, 1);
+ result_exp = 0;
}
- return make_float64(sbit | (exp << 52) | frac);
+ *exp = result_exp;
+
+ return result_frac;
}
static bool round_to_inf(float_status *fpst, bool sign_bit)
@@ -11595,18 +11610,63 @@ static bool round_to_inf(float_status *fpst, bool sign_bit)
g_assert_not_reached();
}
+float16 HELPER(recpe_f16)(float16 input, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 f16 = float16_squash_input_denormal(input, fpst);
+ uint32_t f16_val = float16_val(f16);
+ uint32_t f16_sign = float16_is_neg(f16);
+ int f16_exp = extract32(f16_val, 10, 5);
+ uint32_t f16_frac = extract32(f16_val, 0, 10);
+ uint64_t f64_frac;
+
+ if (float16_is_any_nan(f16)) {
+ float16 nan = f16;
+ if (float16_is_signaling_nan(f16, fpst)) {
+ float_raise(float_flag_invalid, fpst);
+ nan = float16_maybe_silence_nan(f16, fpst);
+ }
+ if (fpst->default_nan_mode) {
+ nan = float16_default_nan(fpst);
+ }
+ return nan;
+ } else if (float16_is_infinity(f16)) {
+ return float16_set_sign(float16_zero, float16_is_neg(f16));
+ } else if (float16_is_zero(f16)) {
+ float_raise(float_flag_divbyzero, fpst);
+ return float16_set_sign(float16_infinity, float16_is_neg(f16));
+ } else if (float16_abs(f16) < (1 << 8)) {
+ /* Abs(value) < 2.0^-16 */
+ float_raise(float_flag_overflow | float_flag_inexact, fpst);
+ if (round_to_inf(fpst, f16_sign)) {
+ return float16_set_sign(float16_infinity, f16_sign);
+ } else {
+ return float16_set_sign(float16_maxnorm, f16_sign);
+ }
+ } else if (f16_exp >= 29 && fpst->flush_to_zero) {
+ float_raise(float_flag_underflow, fpst);
+ return float16_set_sign(float16_zero, float16_is_neg(f16));
+ }
+
+ f64_frac = call_recip_estimate(&f16_exp, 29,
+ ((uint64_t) f16_frac) << (52 - 10));
+
+ /* result = sign : result_exp<4:0> : fraction<51:42> */
+ f16_val = deposit32(0, 15, 1, f16_sign);
+ f16_val = deposit32(f16_val, 10, 5, f16_exp);
+ f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
+ return make_float16(f16_val);
+}
+
float32 HELPER(recpe_f32)(float32 input, void *fpstp)
{
float_status *fpst = fpstp;
float32 f32 = float32_squash_input_denormal(input, fpst);
uint32_t f32_val = float32_val(f32);
- uint32_t f32_sbit = 0x80000000ULL & f32_val;
- int32_t f32_exp = extract32(f32_val, 23, 8);
+ bool f32_sign = float32_is_neg(f32);
+ int f32_exp = extract32(f32_val, 23, 8);
uint32_t f32_frac = extract32(f32_val, 0, 23);
- float64 f64, r64;
- uint64_t r64_val;
- int64_t r64_exp;
- uint64_t r64_frac;
+ uint64_t f64_frac;
if (float32_is_any_nan(f32)) {
float32 nan = f32;
@@ -11623,30 +11683,27 @@ float32 HELPER(recpe_f32)(float32 input, void *fpstp)
} else if (float32_is_zero(f32)) {
float_raise(float_flag_divbyzero, fpst);
return float32_set_sign(float32_infinity, float32_is_neg(f32));
- } else if ((f32_val & ~(1ULL << 31)) < (1ULL << 21)) {
+ } else if (float32_abs(f32) < (1ULL << 21)) {
/* Abs(value) < 2.0^-128 */
float_raise(float_flag_overflow | float_flag_inexact, fpst);
- if (round_to_inf(fpst, f32_sbit)) {
- return float32_set_sign(float32_infinity, float32_is_neg(f32));
+ if (round_to_inf(fpst, f32_sign)) {
+ return float32_set_sign(float32_infinity, f32_sign);
} else {
- return float32_set_sign(float32_maxnorm, float32_is_neg(f32));
+ return float32_set_sign(float32_maxnorm, f32_sign);
}
} else if (f32_exp >= 253 && fpst->flush_to_zero) {
float_raise(float_flag_underflow, fpst);
return float32_set_sign(float32_zero, float32_is_neg(f32));
}
+ f64_frac = call_recip_estimate(&f32_exp, 253,
+ ((uint64_t) f32_frac) << (52 - 23));
- f64 = make_float64(((int64_t)(f32_exp) << 52) | (int64_t)(f32_frac) << 29);
- r64 = call_recip_estimate(f64, 253, fpst);
- r64_val = float64_val(r64);
- r64_exp = extract64(r64_val, 52, 11);
- r64_frac = extract64(r64_val, 0, 52);
-
- /* result = sign : result_exp<7:0> : fraction<51:29>; */
- return make_float32(f32_sbit |
- (r64_exp & 0xff) << 23 |
- extract64(r64_frac, 29, 24));
+ /* result = sign : result_exp<7:0> : fraction<51:29> */
+ f32_val = deposit32(0, 31, 1, f32_sign);
+ f32_val = deposit32(f32_val, 23, 8, f32_exp);
+ f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
+ return make_float32(f32_val);
}
float64 HELPER(recpe_f64)(float64 input, void *fpstp)
@@ -11654,12 +11711,9 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp)
float_status *fpst = fpstp;
float64 f64 = float64_squash_input_denormal(input, fpst);
uint64_t f64_val = float64_val(f64);
- uint64_t f64_sbit = 0x8000000000000000ULL & f64_val;
- int64_t f64_exp = extract64(f64_val, 52, 11);
- float64 r64;
- uint64_t r64_val;
- int64_t r64_exp;
- uint64_t r64_frac;
+ bool f64_sign = float64_is_neg(f64);
+ int f64_exp = extract64(f64_val, 52, 11);
+ uint64_t f64_frac = extract64(f64_val, 0, 52);
/* Deal with any special cases */
if (float64_is_any_nan(f64)) {
@@ -11680,80 +11734,119 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp)
} else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
/* Abs(value) < 2.0^-1024 */
float_raise(float_flag_overflow | float_flag_inexact, fpst);
- if (round_to_inf(fpst, f64_sbit)) {
- return float64_set_sign(float64_infinity, float64_is_neg(f64));
+ if (round_to_inf(fpst, f64_sign)) {
+ return float64_set_sign(float64_infinity, f64_sign);
} else {
- return float64_set_sign(float64_maxnorm, float64_is_neg(f64));
+ return float64_set_sign(float64_maxnorm, f64_sign);
}
} else if (f64_exp >= 2045 && fpst->flush_to_zero) {
float_raise(float_flag_underflow, fpst);
return float64_set_sign(float64_zero, float64_is_neg(f64));
}
- r64 = call_recip_estimate(f64, 2045, fpst);
- r64_val = float64_val(r64);
- r64_exp = extract64(r64_val, 52, 11);
- r64_frac = extract64(r64_val, 0, 52);
+ f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
- /* result = sign : result_exp<10:0> : fraction<51:0> */
- return make_float64(f64_sbit |
- ((r64_exp & 0x7ff) << 52) |
- r64_frac);
+ /* result = sign : result_exp<10:0> : fraction<51:0>; */
+ f64_val = deposit64(0, 63, 1, f64_sign);
+ f64_val = deposit64(f64_val, 52, 11, f64_exp);
+ f64_val = deposit64(f64_val, 0, 52, f64_frac);
+ return make_float64(f64_val);
}
/* The algorithm that must be used to calculate the estimate
* is specified by the ARM ARM.
*/
-static float64 recip_sqrt_estimate(float64 a, float_status *real_fp_status)
-{
- /* These calculations mustn't set any fp exception flags,
- * so we use a local copy of the fp_status.
- */
- float_status dummy_status = *real_fp_status;
- float_status *s = &dummy_status;
- float64 q;
- int64_t q_int;
-
- if (float64_lt(a, float64_half, s)) {
- /* range 0.25 <= a < 0.5 */
-
- /* a in units of 1/512 rounded down */
- /* q0 = (int)(a * 512.0); */
- q = float64_mul(float64_512, a, s);
- q_int = float64_to_int64_round_to_zero(q, s);
-
- /* reciprocal root r */
- /* r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0); */
- q = int64_to_float64(q_int, s);
- q = float64_add(q, float64_half, s);
- q = float64_div(q, float64_512, s);
- q = float64_sqrt(q, s);
- q = float64_div(float64_one, q, s);
+
+static int do_recip_sqrt_estimate(int a)
+{
+ int b, estimate;
+
+ assert(128 <= a && a < 512);
+ if (a < 256) {
+ a = a * 2 + 1;
+ } else {
+ a = (a >> 1) << 1;
+ a = (a + 1) * 2;
+ }
+ b = 512;
+ while (a * (b + 1) * (b + 1) < (1 << 28)) {
+ b += 1;
+ }
+ estimate = (b + 1) / 2;
+ assert(256 <= estimate && estimate < 512);
+
+ return estimate;
+}
+
+
+static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
+{
+ int estimate;
+ uint32_t scaled;
+
+ if (*exp == 0) {
+ while (extract64(frac, 51, 1) == 0) {
+ frac = frac << 1;
+ *exp -= 1;
+ }
+ frac = extract64(frac, 0, 51) << 1;
+ }
+
+ if (*exp & 1) {
+ /* scaled = UInt('01':fraction<51:45>) */
+ scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
} else {
- /* range 0.5 <= a < 1.0 */
+ /* scaled = UInt('1':fraction<51:44>) */
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+ }
+ estimate = do_recip_sqrt_estimate(scaled);
+
+ *exp = (exp_off - *exp) / 2;
+ return extract64(estimate, 0, 8) << 44;
+}
- /* a in units of 1/256 rounded down */
- /* q1 = (int)(a * 256.0); */
- q = float64_mul(float64_256, a, s);
- int64_t q_int = float64_to_int64_round_to_zero(q, s);
+float16 HELPER(rsqrte_f16)(float16 input, void *fpstp)
+{
+ float_status *s = fpstp;
+ float16 f16 = float16_squash_input_denormal(input, s);
+ uint16_t val = float16_val(f16);
+ bool f16_sign = float16_is_neg(f16);
+ int f16_exp = extract32(val, 10, 5);
+ uint16_t f16_frac = extract32(val, 0, 10);
+ uint64_t f64_frac;
- /* reciprocal root r */
- /* r = 1.0 /sqrt(((double)q1 + 0.5) / 256); */
- q = int64_to_float64(q_int, s);
- q = float64_add(q, float64_half, s);
- q = float64_div(q, float64_256, s);
- q = float64_sqrt(q, s);
- q = float64_div(float64_one, q, s);
+ if (float16_is_any_nan(f16)) {
+ float16 nan = f16;
+ if (float16_is_signaling_nan(f16, s)) {
+ float_raise(float_flag_invalid, s);
+ nan = float16_maybe_silence_nan(f16, s);
+ }
+ if (s->default_nan_mode) {
+ nan = float16_default_nan(s);
+ }
+ return nan;
+ } else if (float16_is_zero(f16)) {
+ float_raise(float_flag_divbyzero, s);
+ return float16_set_sign(float16_infinity, f16_sign);
+ } else if (f16_sign) {
+ float_raise(float_flag_invalid, s);
+ return float16_default_nan(s);
+ } else if (float16_is_infinity(f16)) {
+ return float16_zero;
}
- /* r in units of 1/256 rounded to nearest */
- /* s = (int)(256.0 * r + 0.5); */
- q = float64_mul(q, float64_256,s );
- q = float64_add(q, float64_half, s);
- q_int = float64_to_int64_round_to_zero(q, s);
+ /* Scale and normalize to a double-precision value between 0.25 and 1.0,
+ * preserving the parity of the exponent. */
+
+ f64_frac = ((uint64_t) f16_frac) << (52 - 10);
+
+ f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
- /* return (double)s / 256.0;*/
- return float64_div(int64_to_float64(q_int, s), float64_256, s);
+ /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
+ val = deposit32(0, 15, 1, f16_sign);
+ val = deposit32(val, 10, 5, f16_exp);
+ val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
+ return make_float16(val);
}
float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
@@ -11761,13 +11854,10 @@ float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
float_status *s = fpstp;
float32 f32 = float32_squash_input_denormal(input, s);
uint32_t val = float32_val(f32);
- uint32_t f32_sbit = 0x80000000 & val;
- int32_t f32_exp = extract32(val, 23, 8);
+ uint32_t f32_sign = float32_is_neg(f32);
+ int f32_exp = extract32(val, 23, 8);
uint32_t f32_frac = extract32(val, 0, 23);
uint64_t f64_frac;
- uint64_t val64;
- int result_exp;
- float64 f64;
if (float32_is_any_nan(f32)) {
float32 nan = f32;
@@ -11793,32 +11883,13 @@ float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
* preserving the parity of the exponent. */
f64_frac = ((uint64_t) f32_frac) << 29;
- if (f32_exp == 0) {
- while (extract64(f64_frac, 51, 1) == 0) {
- f64_frac = f64_frac << 1;
- f32_exp = f32_exp-1;
- }
- f64_frac = extract64(f64_frac, 0, 51) << 1;
- }
-
- if (extract64(f32_exp, 0, 1) == 0) {
- f64 = make_float64(((uint64_t) f32_sbit) << 32
- | (0x3feULL << 52)
- | f64_frac);
- } else {
- f64 = make_float64(((uint64_t) f32_sbit) << 32
- | (0x3fdULL << 52)
- | f64_frac);
- }
-
- result_exp = (380 - f32_exp) / 2;
- f64 = recip_sqrt_estimate(f64, s);
+ f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
- val64 = float64_val(f64);
-
- val = ((result_exp & 0xff) << 23)
- | ((val64 >> 29) & 0x7fffff);
+ /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
+ val = deposit32(0, 31, 1, f32_sign);
+ val = deposit32(val, 23, 8, f32_exp);
+ val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
return make_float32(val);
}
@@ -11827,11 +11898,9 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
float_status *s = fpstp;
float64 f64 = float64_squash_input_denormal(input, s);
uint64_t val = float64_val(f64);
- uint64_t f64_sbit = 0x8000000000000000ULL & val;
- int64_t f64_exp = extract64(val, 52, 11);
+ bool f64_sign = float64_is_neg(f64);
+ int f64_exp = extract64(val, 52, 11);
uint64_t f64_frac = extract64(val, 0, 52);
- int64_t result_exp;
- uint64_t result_frac;
if (float64_is_any_nan(f64)) {
float64 nan = f64;
@@ -11853,75 +11922,41 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
return float64_zero;
}
- /* Scale and normalize to a double-precision value between 0.25 and 1.0,
- * preserving the parity of the exponent. */
-
- if (f64_exp == 0) {
- while (extract64(f64_frac, 51, 1) == 0) {
- f64_frac = f64_frac << 1;
- f64_exp = f64_exp - 1;
- }
- f64_frac = extract64(f64_frac, 0, 51) << 1;
- }
-
- if (extract64(f64_exp, 0, 1) == 0) {
- f64 = make_float64(f64_sbit
- | (0x3feULL << 52)
- | f64_frac);
- } else {
- f64 = make_float64(f64_sbit
- | (0x3fdULL << 52)
- | f64_frac);
- }
-
- result_exp = (3068 - f64_exp) / 2;
-
- f64 = recip_sqrt_estimate(f64, s);
+ f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
- result_frac = extract64(float64_val(f64), 0, 52);
-
- return make_float64(f64_sbit |
- ((result_exp & 0x7ff) << 52) |
- result_frac);
+ /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
+ val = deposit64(0, 61, 1, f64_sign);
+ val = deposit64(val, 52, 11, f64_exp);
+ val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
+ return make_float64(val);
}
uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
{
- float_status *s = fpstp;
- float64 f64;
+ /* float_status *s = fpstp; */
+ int input, estimate;
if ((a & 0x80000000) == 0) {
return 0xffffffff;
}
- f64 = make_float64((0x3feULL << 52)
- | ((int64_t)(a & 0x7fffffff) << 21));
-
- f64 = recip_estimate(f64, s);
+ input = extract32(a, 23, 9);
+ estimate = recip_estimate(input);
- return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
+ return deposit32(0, (32 - 9), 9, estimate);
}
uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
{
- float_status *fpst = fpstp;
- float64 f64;
+ int estimate;
if ((a & 0xc0000000) == 0) {
return 0xffffffff;
}
- if (a & 0x80000000) {
- f64 = make_float64((0x3feULL << 52)
- | ((uint64_t)(a & 0x7fffffff) << 21));
- } else { /* bits 31-30 == '01' */
- f64 = make_float64((0x3fdULL << 52)
- | ((uint64_t)(a & 0x3fffffff) << 22));
- }
-
- f64 = recip_sqrt_estimate(f64, fpst);
+ estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
- return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
+ return deposit32(0, 23, 9, estimate);
}
/* VFPv4 fused multiply-accumulate */
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 6383d7d09e..6dd8504ec3 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -120,17 +120,23 @@ DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
DEF_HELPER_2(vfp_fcvtds, f64, f32, env)
DEF_HELPER_2(vfp_fcvtsd, f32, f64, env)
+DEF_HELPER_2(vfp_uitoh, f16, i32, ptr)
DEF_HELPER_2(vfp_uitos, f32, i32, ptr)
DEF_HELPER_2(vfp_uitod, f64, i32, ptr)
+DEF_HELPER_2(vfp_sitoh, f16, i32, ptr)
DEF_HELPER_2(vfp_sitos, f32, i32, ptr)
DEF_HELPER_2(vfp_sitod, f64, i32, ptr)
+DEF_HELPER_2(vfp_touih, i32, f16, ptr)
DEF_HELPER_2(vfp_touis, i32, f32, ptr)
DEF_HELPER_2(vfp_touid, i32, f64, ptr)
+DEF_HELPER_2(vfp_touizh, i32, f16, ptr)
DEF_HELPER_2(vfp_touizs, i32, f32, ptr)
DEF_HELPER_2(vfp_touizd, i32, f64, ptr)
+DEF_HELPER_2(vfp_tosih, i32, f16, ptr)
DEF_HELPER_2(vfp_tosis, i32, f32, ptr)
DEF_HELPER_2(vfp_tosid, i32, f64, ptr)
+DEF_HELPER_2(vfp_tosizh, i32, f16, ptr)
DEF_HELPER_2(vfp_tosizs, i32, f32, ptr)
DEF_HELPER_2(vfp_tosizd, i32, f64, ptr)
@@ -142,6 +148,8 @@ DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr)
+DEF_HELPER_3(vfp_toslh, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr)
@@ -166,8 +174,10 @@ DEF_HELPER_3(vfp_sqtod, f64, i64, i32, ptr)
DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr)
DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_sltoh, f16, i32, i32, ptr)
+DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr)
-DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, env)
+DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr)
DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env)
DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
@@ -182,8 +192,10 @@ DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
DEF_HELPER_3(recps_f32, f32, f32, f32, env)
DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
+DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_2(recpe_u32, i32, i32, ptr)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 1c88539d62..32811dc8b0 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -637,16 +637,21 @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
tcg_temp_free_i64(tmp);
}
-static TCGv_ptr get_fpstatus_ptr(void)
+static TCGv_ptr get_fpstatus_ptr(bool is_f16)
{
TCGv_ptr statusptr = tcg_temp_new_ptr();
int offset;
- /* In A64 all instructions (both FP and Neon) use the FPCR;
- * there is no equivalent of the A32 Neon "standard FPSCR value"
- * and all operations use vfp.fp_status.
+ /* In A64 all instructions (both FP and Neon) use the FPCR; there
+ * is no equivalent of the A32 Neon "standard FPSCR value".
+ * However half-precision operations operate under a different
+ * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
*/
- offset = offsetof(CPUARMState, vfp.fp_status);
+ if (is_f16) {
+ offset = offsetof(CPUARMState, vfp.fp_status_f16);
+ } else {
+ offset = offsetof(CPUARMState, vfp.fp_status);
+ }
tcg_gen_addi_ptr(statusptr, cpu_env, offset);
return statusptr;
}
@@ -4423,7 +4428,7 @@ static void handle_fp_compare(DisasContext *s, bool is_double,
bool cmp_with_zero, bool signal_all_nans)
{
TCGv_i64 tcg_flags = tcg_temp_new_i64();
- TCGv_ptr fpst = get_fpstatus_ptr();
+ TCGv_ptr fpst = get_fpstatus_ptr(false);
if (is_double) {
TCGv_i64 tcg_vn, tcg_vm;
@@ -4591,6 +4596,65 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
tcg_temp_free_i64(t_true);
}
+/* Floating-point data-processing (1 source) - half precision */
+static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
+{
+ TCGv_ptr fpst = NULL;
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, 0, MO_16);
+
+ switch (opcode) {
+ case 0x0: /* FMOV */
+ tcg_gen_mov_i32(tcg_res, tcg_op);
+ break;
+ case 0x1: /* FABS */
+ tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
+ break;
+ case 0x2: /* FNEG */
+ tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
+ break;
+ case 0x3: /* FSQRT */
+ gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);
+ break;
+ case 0x8: /* FRINTN */
+ case 0x9: /* FRINTP */
+ case 0xa: /* FRINTM */
+ case 0xb: /* FRINTZ */
+ case 0xc: /* FRINTA */
+ {
+ TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
+ fpst = get_fpstatus_ptr(true);
+
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
+
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ tcg_temp_free_i32(tcg_rmode);
+ break;
+ }
+ case 0xe: /* FRINTX */
+ fpst = get_fpstatus_ptr(true);
+ gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
+ break;
+ case 0xf: /* FRINTI */
+ fpst = get_fpstatus_ptr(true);
+ gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
+ break;
+ default:
+ abort();
+ }
+
+ write_fp_sreg(s, rd, tcg_res);
+
+ if (fpst) {
+ tcg_temp_free_ptr(fpst);
+ }
+ tcg_temp_free_i32(tcg_op);
+ tcg_temp_free_i32(tcg_res);
+}
+
/* Floating-point data-processing (1 source) - single precision */
static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
{
@@ -4598,7 +4662,7 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
TCGv_i32 tcg_op;
TCGv_i32 tcg_res;
- fpst = get_fpstatus_ptr();
+ fpst = get_fpstatus_ptr(false);
tcg_op = read_fp_sreg(s, rn);
tcg_res = tcg_temp_new_i32();
@@ -4623,10 +4687,10 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
{
TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
gen_helper_rints(tcg_res, tcg_op, fpst);
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
tcg_temp_free_i32(tcg_rmode);
break;
}
@@ -4660,7 +4724,7 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
return;
}
- fpst = get_fpstatus_ptr();
+ fpst = get_fpstatus_ptr(false);
tcg_op = read_fp_dreg(s, rn);
tcg_res = tcg_temp_new_i64();
@@ -4682,10 +4746,10 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
{
TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
gen_helper_rintd(tcg_res, tcg_op, fpst);
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
tcg_temp_free_i32(tcg_rmode);
break;
}
@@ -4820,6 +4884,18 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
handle_fp_1src_double(s, opcode, rd, rn);
break;
+ case 3:
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ handle_fp_1src_half(s, opcode, rd, rn);
+ break;
default:
unallocated_encoding(s);
}
@@ -4840,7 +4916,7 @@ static void handle_fp_2src_single(DisasContext *s, int opcode,
TCGv_ptr fpst;
tcg_res = tcg_temp_new_i32();
- fpst = get_fpstatus_ptr();
+ fpst = get_fpstatus_ptr(false);
tcg_op1 = read_fp_sreg(s, rn);
tcg_op2 = read_fp_sreg(s, rm);
@@ -4893,7 +4969,7 @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
TCGv_ptr fpst;
tcg_res = tcg_temp_new_i64();
- fpst = get_fpstatus_ptr();
+ fpst = get_fpstatus_ptr(false);
tcg_op1 = read_fp_dreg(s, rn);
tcg_op2 = read_fp_dreg(s, rm);
@@ -4979,7 +5055,7 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
{
TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
TCGv_i32 tcg_res = tcg_temp_new_i32();
- TCGv_ptr fpst = get_fpstatus_ptr();
+ TCGv_ptr fpst = get_fpstatus_ptr(false);
tcg_op1 = read_fp_sreg(s, rn);
tcg_op2 = read_fp_sreg(s, rm);
@@ -5017,7 +5093,7 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
{
TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
TCGv_i64 tcg_res = tcg_temp_new_i64();
- TCGv_ptr fpst = get_fpstatus_ptr();
+ TCGv_ptr fpst = get_fpstatus_ptr(false);
tcg_op1 = read_fp_dreg(s, rn);
tcg_op2 = read_fp_dreg(s, rm);
@@ -5158,7 +5234,7 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
TCGv_ptr tcg_fpstatus;
TCGv_i32 tcg_shift;
- tcg_fpstatus = get_fpstatus_ptr();
+ tcg_fpstatus = get_fpstatus_ptr(false);
tcg_shift = tcg_const_i32(64 - scale);
@@ -5212,7 +5288,7 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
if (is_double) {
TCGv_i64 tcg_double = read_fp_dreg(s, rn);
@@ -5259,7 +5335,7 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
tcg_temp_free_i32(tcg_single);
}
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
tcg_temp_free_i32(tcg_rmode);
if (!sf) {
@@ -5736,26 +5812,75 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
tcg_temp_free_i64(tcg_resh);
}
-static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
- int opc, bool is_min, TCGv_ptr fpst)
-{
- /* Helper function for disas_simd_across_lanes: do a single precision
- * min/max operation on the specified two inputs,
- * and return the result in tcg_elt1.
- */
- if (opc == 0xc) {
- if (is_min) {
- gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
- } else {
- gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
- }
+/*
+ * do_reduction_op helper
+ *
+ * This mirrors the Reduce() pseudocode in the ARM ARM. It is
+ * important for correct NaN propagation that we do these
+ * operations in exactly the order specified by the pseudocode.
+ *
+ * This is a recursive function, TCG temps should be freed by the
+ * calling function once it is done with the values.
+ */
+static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
+ int esize, int size, int vmap, TCGv_ptr fpst)
+{
+ if (esize == size) {
+ int element;
+ TCGMemOp msize = esize == 16 ? MO_16 : MO_32;
+ TCGv_i32 tcg_elem;
+
+ /* We should have one register left here */
+ assert(ctpop8(vmap) == 1);
+ element = ctz32(vmap);
+ assert(element < 8);
+
+ tcg_elem = tcg_temp_new_i32();
+ read_vec_element_i32(s, tcg_elem, rn, element, msize);
+ return tcg_elem;
} else {
- assert(opc == 0xf);
- if (is_min) {
- gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
- } else {
- gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
+ int bits = size / 2;
+ int shift = ctpop8(vmap) / 2;
+ int vmap_lo = (vmap >> shift) & vmap;
+ int vmap_hi = (vmap & ~vmap_lo);
+ TCGv_i32 tcg_hi, tcg_lo, tcg_res;
+
+ tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
+ tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
+ tcg_res = tcg_temp_new_i32();
+
+ switch (fpopcode) {
+ case 0x0c: /* fmaxnmv half-precision */
+ gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x0f: /* fmaxv half-precision */
+ gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x1c: /* fminnmv half-precision */
+ gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x1f: /* fminv half-precision */
+ gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x2c: /* fmaxnmv */
+ gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x2f: /* fmaxv */
+ gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x3c: /* fminnmv */
+ gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x3f: /* fminv */
+ gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ default:
+ g_assert_not_reached();
}
+
+ tcg_temp_free_i32(tcg_hi);
+ tcg_temp_free_i32(tcg_lo);
+ return tcg_res;
}
}
@@ -5797,16 +5922,21 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
break;
case 0xc: /* FMAXNMV, FMINNMV */
case 0xf: /* FMAXV, FMINV */
- if (!is_u || !is_q || extract32(size, 0, 1)) {
- unallocated_encoding(s);
- return;
- }
- /* Bit 1 of size field encodes min vs max, and actual size is always
- * 32 bits: adjust the size variable so following code can rely on it
+ /* Bit 1 of size field encodes min vs max and the actual size
+ * depends on the encoding of the U bit. If not set (and FP16
+ * enabled) then we do half-precision float instead of single
+ * precision.
*/
is_min = extract32(size, 1, 1);
is_fp = true;
- size = 2;
+ if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ size = 1;
+ } else if (!is_u || !is_q || extract32(size, 0, 1)) {
+ unallocated_encoding(s);
+ return;
+ } else {
+ size = 2;
+ }
break;
default:
unallocated_encoding(s);
@@ -5863,38 +5993,18 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
}
} else {
- /* Floating point ops which work on 32 bit (single) intermediates.
+ /* Floating point vector reduction ops which work across 32
+ * bit (single) or 16 bit (half-precision) intermediates.
* Note that correct NaN propagation requires that we do these
* operations in exactly the order specified by the pseudocode.
*/
- TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
- TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
- TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
- TCGv_ptr fpst = get_fpstatus_ptr();
-
- assert(esize == 32);
- assert(elements == 4);
-
- read_vec_element(s, tcg_elt, rn, 0, MO_32);
- tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
- read_vec_element(s, tcg_elt, rn, 1, MO_32);
- tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
-
- do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
-
- read_vec_element(s, tcg_elt, rn, 2, MO_32);
- tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
- read_vec_element(s, tcg_elt, rn, 3, MO_32);
- tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
-
- do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
-
- do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
-
- tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
- tcg_temp_free_i32(tcg_elt1);
- tcg_temp_free_i32(tcg_elt2);
- tcg_temp_free_i32(tcg_elt3);
+ TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
+ int fpopcode = opcode | is_min << 4 | is_u << 5;
+ int vmap = (1 << elements) - 1;
+ TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
+ (is_q ? 128 : 64), vmap, fpst);
+ tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
+ tcg_temp_free_i32(tcg_res32);
tcg_temp_free_ptr(fpst);
}
@@ -6195,6 +6305,8 @@ static void disas_simd_copy(DisasContext *s, uint32_t insn)
* MVNI - move inverted (shifted) imm into register
* ORR - bitwise OR of (shifted) imm with register
* BIC - bitwise clear of (shifted) imm with register
+ * With ARMv8.2 we also have:
+ * FMOV half-precision
*/
static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
{
@@ -6209,8 +6321,11 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
uint64_t imm = 0;
if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
- unallocated_encoding(s);
- return;
+ /* Check for FMOV (vector, immediate) - half-precision */
+ if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) {
+ unallocated_encoding(s);
+ return;
+ }
}
if (!fp_access_check(s)) {
@@ -6268,19 +6383,29 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
imm |= 0x4000000000000000ULL;
}
} else {
- imm = (abcdefgh & 0x3f) << 19;
- if (abcdefgh & 0x80) {
- imm |= 0x80000000;
- }
- if (abcdefgh & 0x40) {
- imm |= 0x3e000000;
+ if (o2) {
+ /* FMOV (vector, immediate) - half-precision */
+ imm = vfp_expand_imm(MO_16, abcdefgh);
+ /* now duplicate across the lanes */
+ imm = bitfield_replicate(imm, 16);
} else {
- imm |= 0x40000000;
+ imm = (abcdefgh & 0x3f) << 19;
+ if (abcdefgh & 0x80) {
+ imm |= 0x80000000;
+ }
+ if (abcdefgh & 0x40) {
+ imm |= 0x3e000000;
+ } else {
+ imm |= 0x40000000;
+ }
+ imm |= (imm << 32);
}
- imm |= (imm << 32);
}
}
break;
+ default:
+ fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
+ g_assert_not_reached();
}
if (cmode_3_1 != 7 && is_neg) {
@@ -6362,24 +6487,30 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
case 0xf: /* FMAXP */
case 0x2c: /* FMINNMP */
case 0x2f: /* FMINP */
- /* FP op, size[0] is 32 or 64 bit */
+ /* FP op, size[0] is 32 or 64 bit*/
if (!u) {
- unallocated_encoding(s);
- return;
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ } else {
+ size = MO_16;
+ }
+ } else {
+ size = extract32(size, 0, 1) ? MO_64 : MO_32;
}
+
if (!fp_access_check(s)) {
return;
}
- size = extract32(size, 0, 1) ? 3 : 2;
- fpst = get_fpstatus_ptr();
+ fpst = get_fpstatus_ptr(size == MO_16);
break;
default:
unallocated_encoding(s);
return;
}
- if (size == 3) {
+ if (size == MO_64) {
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
TCGv_i64 tcg_res = tcg_temp_new_i64();
@@ -6420,27 +6551,49 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
TCGv_i32 tcg_op2 = tcg_temp_new_i32();
TCGv_i32 tcg_res = tcg_temp_new_i32();
- read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
- read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
+ read_vec_element_i32(s, tcg_op1, rn, 0, size);
+ read_vec_element_i32(s, tcg_op2, rn, 1, size);
- switch (opcode) {
- case 0xc: /* FMAXNMP */
- gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xd: /* FADDP */
- gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xf: /* FMAXP */
- gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2c: /* FMINNMP */
- gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2f: /* FMINP */
- gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
+ if (size == MO_16) {
+ switch (opcode) {
+ case 0xc: /* FMAXNMP */
+ gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xd: /* FADDP */
+ gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FMAXP */
+ gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2c: /* FMINNMP */
+ gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2f: /* FMINP */
+ gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ switch (opcode) {
+ case 0xc: /* FMAXNMP */
+ gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xd: /* FADDP */
+ gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FMAXP */
+ gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2c: /* FMINNMP */
+ gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2f: /* FMINP */
+ gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
write_fp_sreg(s, rd, tcg_res);
@@ -6863,23 +7016,28 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
int elements, int is_signed,
int fracbits, int size)
{
- bool is_double = size == 3 ? true : false;
- TCGv_ptr tcg_fpst = get_fpstatus_ptr();
- TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
- TCGv_i64 tcg_int = tcg_temp_new_i64();
+ TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
+ TCGv_i32 tcg_shift = NULL;
+
TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
int pass;
- for (pass = 0; pass < elements; pass++) {
- read_vec_element(s, tcg_int, rn, pass, mop);
+ if (fracbits || size == MO_64) {
+ tcg_shift = tcg_const_i32(fracbits);
+ }
+
+ if (size == MO_64) {
+ TCGv_i64 tcg_int64 = tcg_temp_new_i64();
+ TCGv_i64 tcg_double = tcg_temp_new_i64();
+
+ for (pass = 0; pass < elements; pass++) {
+ read_vec_element(s, tcg_int64, rn, pass, mop);
- if (is_double) {
- TCGv_i64 tcg_double = tcg_temp_new_i64();
if (is_signed) {
- gen_helper_vfp_sqtod(tcg_double, tcg_int,
+ gen_helper_vfp_sqtod(tcg_double, tcg_int64,
tcg_shift, tcg_fpst);
} else {
- gen_helper_vfp_uqtod(tcg_double, tcg_int,
+ gen_helper_vfp_uqtod(tcg_double, tcg_int64,
tcg_shift, tcg_fpst);
}
if (elements == 1) {
@@ -6887,28 +7045,72 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
} else {
write_vec_element(s, tcg_double, rd, pass, MO_64);
}
- tcg_temp_free_i64(tcg_double);
- } else {
- TCGv_i32 tcg_single = tcg_temp_new_i32();
- if (is_signed) {
- gen_helper_vfp_sqtos(tcg_single, tcg_int,
- tcg_shift, tcg_fpst);
- } else {
- gen_helper_vfp_uqtos(tcg_single, tcg_int,
- tcg_shift, tcg_fpst);
+ }
+
+ tcg_temp_free_i64(tcg_int64);
+ tcg_temp_free_i64(tcg_double);
+
+ } else {
+ TCGv_i32 tcg_int32 = tcg_temp_new_i32();
+ TCGv_i32 tcg_float = tcg_temp_new_i32();
+
+ for (pass = 0; pass < elements; pass++) {
+ read_vec_element_i32(s, tcg_int32, rn, pass, mop);
+
+ switch (size) {
+ case MO_32:
+ if (fracbits) {
+ if (is_signed) {
+ gen_helper_vfp_sltos(tcg_float, tcg_int32,
+ tcg_shift, tcg_fpst);
+ } else {
+ gen_helper_vfp_ultos(tcg_float, tcg_int32,
+ tcg_shift, tcg_fpst);
+ }
+ } else {
+ if (is_signed) {
+ gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
+ } else {
+ gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
+ }
+ }
+ break;
+ case MO_16:
+ if (fracbits) {
+ if (is_signed) {
+ gen_helper_vfp_sltoh(tcg_float, tcg_int32,
+ tcg_shift, tcg_fpst);
+ } else {
+ gen_helper_vfp_ultoh(tcg_float, tcg_int32,
+ tcg_shift, tcg_fpst);
+ }
+ } else {
+ if (is_signed) {
+ gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
+ } else {
+ gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
+ }
+ }
+ break;
+ default:
+ g_assert_not_reached();
}
+
if (elements == 1) {
- write_fp_sreg(s, rd, tcg_single);
+ write_fp_sreg(s, rd, tcg_float);
} else {
- write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
+ write_vec_element_i32(s, tcg_float, rd, pass, size);
}
- tcg_temp_free_i32(tcg_single);
}
+
+ tcg_temp_free_i32(tcg_int32);
+ tcg_temp_free_i32(tcg_float);
}
- tcg_temp_free_i64(tcg_int);
tcg_temp_free_ptr(tcg_fpst);
- tcg_temp_free_i32(tcg_shift);
+ if (tcg_shift) {
+ tcg_temp_free_i32(tcg_shift);
+ }
clear_vec_high(s, elements << size == 16, rd);
}
@@ -6979,8 +7181,8 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
assert(!(is_scalar && is_q));
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
- tcg_fpstatus = get_fpstatus_ptr();
+ tcg_fpstatus = get_fpstatus_ptr(false);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
tcg_shift = tcg_const_i32(fracbits);
if (is_double) {
@@ -7024,7 +7226,7 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
tcg_temp_free_ptr(tcg_fpstatus);
tcg_temp_free_i32(tcg_shift);
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
tcg_temp_free_i32(tcg_rmode);
}
@@ -7239,8 +7441,7 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
/* Handle 64x64->64 opcodes which are shared between the scalar
* and vector 3-same groups. We cover every opcode where size == 3
* is valid in either the three-reg-same (integer, not pairwise)
- * or scalar-three-reg-same groups. (Some opcodes are not yet
- * implemented.)
+ * or scalar-three-reg-same groups.
*/
TCGCond cond;
@@ -7326,7 +7527,7 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
int fpopcode, int rd, int rn, int rm)
{
int pass;
- TCGv_ptr fpst = get_fpstatus_ptr();
+ TCGv_ptr fpst = get_fpstatus_ptr(false);
for (pass = 0; pass < elements; pass++) {
if (size) {
@@ -7672,6 +7873,104 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
tcg_temp_free_i64(tcg_rd);
}
+/* AdvSIMD scalar three same FP16
+ * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
+ * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
+ * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
+ * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
+ * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
+ * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
+ */
+static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
+ uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 11, 3);
+ int rm = extract32(insn, 16, 5);
+ bool u = extract32(insn, 29, 1);
+ bool a = extract32(insn, 23, 1);
+ int fpopcode = opcode | (a << 3) | (u << 4);
+ TCGv_ptr fpst;
+ TCGv_i32 tcg_op1;
+ TCGv_i32 tcg_op2;
+ TCGv_i32 tcg_res;
+
+ switch (fpopcode) {
+ case 0x03: /* FMULX */
+ case 0x04: /* FCMEQ (reg) */
+ case 0x07: /* FRECPS */
+ case 0x0f: /* FRSQRTS */
+ case 0x14: /* FCMGE (reg) */
+ case 0x15: /* FACGE */
+ case 0x1a: /* FABD */
+ case 0x1c: /* FCMGT (reg) */
+ case 0x1d: /* FACGT */
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ fpst = get_fpstatus_ptr(true);
+
+ tcg_op1 = tcg_temp_new_i32();
+ tcg_op2 = tcg_temp_new_i32();
+ tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
+ read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
+
+ switch (fpopcode) {
+ case 0x03: /* FMULX */
+ gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x04: /* FCMEQ (reg) */
+ gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x07: /* FRECPS */
+ gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x0f: /* FRSQRTS */
+ gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x14: /* FCMGE (reg) */
+ gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x15: /* FACGE */
+ gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1a: /* FABD */
+ gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
+ tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
+ break;
+ case 0x1c: /* FCMGT (reg) */
+ gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1d: /* FACGT */
+ gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_sreg(s, rd, tcg_res);
+
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_temp_free_ptr(fpst);
+}
+
static void handle_2misc_64(DisasContext *s, int opcode, bool u,
TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
@@ -7783,14 +8082,14 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
bool is_scalar, bool is_u, bool is_q,
int size, int rn, int rd)
{
- bool is_double = (size == 3);
+ bool is_double = (size == MO_64);
TCGv_ptr fpst;
if (!fp_access_check(s)) {
return;
}
- fpst = get_fpstatus_ptr();
+ fpst = get_fpstatus_ptr(size == MO_16);
if (is_double) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
@@ -7842,34 +8141,57 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
bool swap = false;
int pass, maxpasses;
- switch (opcode) {
- case 0x2e: /* FCMLT (zero) */
- swap = true;
- /* fall through */
- case 0x2c: /* FCMGT (zero) */
- genfn = gen_helper_neon_cgt_f32;
- break;
- case 0x2d: /* FCMEQ (zero) */
- genfn = gen_helper_neon_ceq_f32;
- break;
- case 0x6d: /* FCMLE (zero) */
- swap = true;
- /* fall through */
- case 0x6c: /* FCMGE (zero) */
- genfn = gen_helper_neon_cge_f32;
- break;
- default:
- g_assert_not_reached();
+ if (size == MO_16) {
+ switch (opcode) {
+ case 0x2e: /* FCMLT (zero) */
+ swap = true;
+ /* fall through */
+ case 0x2c: /* FCMGT (zero) */
+ genfn = gen_helper_advsimd_cgt_f16;
+ break;
+ case 0x2d: /* FCMEQ (zero) */
+ genfn = gen_helper_advsimd_ceq_f16;
+ break;
+ case 0x6d: /* FCMLE (zero) */
+ swap = true;
+ /* fall through */
+ case 0x6c: /* FCMGE (zero) */
+ genfn = gen_helper_advsimd_cge_f16;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ switch (opcode) {
+ case 0x2e: /* FCMLT (zero) */
+ swap = true;
+ /* fall through */
+ case 0x2c: /* FCMGT (zero) */
+ genfn = gen_helper_neon_cgt_f32;
+ break;
+ case 0x2d: /* FCMEQ (zero) */
+ genfn = gen_helper_neon_ceq_f32;
+ break;
+ case 0x6d: /* FCMLE (zero) */
+ swap = true;
+ /* fall through */
+ case 0x6c: /* FCMGE (zero) */
+ genfn = gen_helper_neon_cge_f32;
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
if (is_scalar) {
maxpasses = 1;
} else {
- maxpasses = is_q ? 4 : 2;
+ int vector_size = 8 << is_q;
+ maxpasses = vector_size >> size;
}
for (pass = 0; pass < maxpasses; pass++) {
- read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+ read_vec_element_i32(s, tcg_op, rn, pass, size);
if (swap) {
genfn(tcg_res, tcg_zero, tcg_op, fpst);
} else {
@@ -7878,7 +8200,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
if (is_scalar) {
write_fp_sreg(s, rd, tcg_res);
} else {
- write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ write_vec_element_i32(s, tcg_res, rd, pass, size);
}
}
tcg_temp_free_i32(tcg_res);
@@ -7897,7 +8219,7 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
int size, int rn, int rd)
{
bool is_double = (size == 3);
- TCGv_ptr fpst = get_fpstatus_ptr();
+ TCGv_ptr fpst = get_fpstatus_ptr(false);
if (is_double) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
@@ -8295,8 +8617,8 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
if (is_fcvt) {
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
- tcg_fpstatus = get_fpstatus_ptr();
+ tcg_fpstatus = get_fpstatus_ptr(false);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
} else {
tcg_rmode = NULL;
tcg_fpstatus = NULL;
@@ -8361,7 +8683,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
}
if (is_fcvt) {
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
tcg_temp_free_i32(tcg_rmode);
tcg_temp_free_ptr(tcg_fpstatus);
}
@@ -9516,7 +9838,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
/* Floating point operations need fpst */
if (opcode >= 0x58) {
- fpst = get_fpstatus_ptr();
+ fpst = get_fpstatus_ptr(false);
} else {
fpst = NULL;
}
@@ -10190,6 +10512,200 @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
}
}
+/*
+ * Advanced SIMD three same (ARMv8.2 FP16 variants)
+ *
+ * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
+ * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
+ * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
+ *
+ * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
+ * (register), FACGE, FABD, FCMGT (register) and FACGT.
+ *
+ */
+static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
+{
+ int opcode, fpopcode;
+ int is_q, u, a, rm, rn, rd;
+ int datasize, elements;
+ int pass;
+ TCGv_ptr fpst;
+ bool pairwise = false;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ /* For these floating point ops, the U, a and opcode bits
+ * together indicate the operation.
+ */
+ opcode = extract32(insn, 11, 3);
+ u = extract32(insn, 29, 1);
+ a = extract32(insn, 23, 1);
+ is_q = extract32(insn, 30, 1);
+ rm = extract32(insn, 16, 5);
+ rn = extract32(insn, 5, 5);
+ rd = extract32(insn, 0, 5);
+
+ fpopcode = opcode | (a << 3) | (u << 4);
+ datasize = is_q ? 128 : 64;
+ elements = datasize / 16;
+
+ switch (fpopcode) {
+ case 0x10: /* FMAXNMP */
+ case 0x12: /* FADDP */
+ case 0x16: /* FMAXP */
+ case 0x18: /* FMINNMP */
+ case 0x1e: /* FMINP */
+ pairwise = true;
+ break;
+ }
+
+ fpst = get_fpstatus_ptr(true);
+
+ if (pairwise) {
+ int maxpass = is_q ? 8 : 4;
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res[8];
+
+ for (pass = 0; pass < maxpass; pass++) {
+ int passreg = pass < (maxpass / 2) ? rn : rm;
+ int passelt = (pass << 1) & (maxpass - 1);
+
+ read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
+ read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
+ tcg_res[pass] = tcg_temp_new_i32();
+
+ switch (fpopcode) {
+ case 0x10: /* FMAXNMP */
+ gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
+ fpst);
+ break;
+ case 0x12: /* FADDP */
+ gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x16: /* FMAXP */
+ gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x18: /* FMINNMP */
+ gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
+ fpst);
+ break;
+ case 0x1e: /* FMINP */
+ gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ for (pass = 0; pass < maxpass; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
+ tcg_temp_free_i32(tcg_res[pass]);
+ }
+
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+
+ } else {
+ for (pass = 0; pass < elements; pass++) {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
+ read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
+
+ switch (fpopcode) {
+ case 0x0: /* FMAXNM */
+ gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1: /* FMLA */
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
+ gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
+ fpst);
+ break;
+ case 0x2: /* FADD */
+ gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3: /* FMULX */
+ gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x4: /* FCMEQ */
+ gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x6: /* FMAX */
+ gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7: /* FRECPS */
+ gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x8: /* FMINNM */
+ gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x9: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
+ gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
+ fpst);
+ break;
+ case 0xa: /* FSUB */
+ gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xe: /* FMIN */
+ gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FRSQRTS */
+ gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x13: /* FMUL */
+ gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x14: /* FCMGE */
+ gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x15: /* FACGE */
+ gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x17: /* FDIV */
+ gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1a: /* FABD */
+ gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
+ tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
+ break;
+ case 0x1c: /* FCMGT */
+ gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1d: /* FACGT */
+ gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
+ __func__, insn, fpopcode, s->pc);
+ g_assert_not_reached();
+ }
+
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ }
+ }
+
+ tcg_temp_free_ptr(fpst);
+
+ clear_vec_high(s, is_q, rd);
+}
+
static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
int size, int rn, int rd)
{
@@ -10675,14 +11191,14 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
return;
}
- if (need_fpstatus) {
- tcg_fpstatus = get_fpstatus_ptr();
+ if (need_fpstatus || need_rmode) {
+ tcg_fpstatus = get_fpstatus_ptr(false);
} else {
tcg_fpstatus = NULL;
}
if (need_rmode) {
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
} else {
tcg_rmode = NULL;
}
@@ -10924,7 +11440,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
clear_vec_high(s, is_q, rd);
if (need_rmode) {
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
tcg_temp_free_i32(tcg_rmode);
}
if (need_fpstatus) {
@@ -10932,6 +11448,307 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
}
+/* AdvSIMD [scalar] two register miscellaneous (FP16)
+ *
+ * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0
+ * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
+ * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd |
+ * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
+ * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
+ * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
+ *
+ * This actually covers two groups where scalar access is governed by
+ * bit 28. A bunch of the instructions (float to integral) only exist
+ * in the vector form and are un-allocated for the scalar decode. Also
+ * in the scalar decode Q is always 1.
+ */
+static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
+{
+ int fpop, opcode, a, u;
+ int rn, rd;
+ bool is_q;
+ bool is_scalar;
+ bool only_in_vector = false;
+
+ int pass;
+ TCGv_i32 tcg_rmode = NULL;
+ TCGv_ptr tcg_fpstatus = NULL;
+ bool need_rmode = false;
+ bool need_fpst = true;
+ int rmode;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ rd = extract32(insn, 0, 5);
+ rn = extract32(insn, 5, 5);
+
+ a = extract32(insn, 23, 1);
+ u = extract32(insn, 29, 1);
+ is_scalar = extract32(insn, 28, 1);
+ is_q = extract32(insn, 30, 1);
+
+ opcode = extract32(insn, 12, 5);
+ fpop = deposit32(opcode, 5, 1, a);
+ fpop = deposit32(fpop, 6, 1, u);
+
+ rd = extract32(insn, 0, 5);
+ rn = extract32(insn, 5, 5);
+
+ switch (fpop) {
+ case 0x1d: /* SCVTF */
+ case 0x5d: /* UCVTF */
+ {
+ int elements;
+
+ if (is_scalar) {
+ elements = 1;
+ } else {
+ elements = (is_q ? 8 : 4);
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+ handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
+ return;
+ }
+ break;
+ case 0x2c: /* FCMGT (zero) */
+ case 0x2d: /* FCMEQ (zero) */
+ case 0x2e: /* FCMLT (zero) */
+ case 0x6c: /* FCMGE (zero) */
+ case 0x6d: /* FCMLE (zero) */
+ handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
+ return;
+ case 0x3d: /* FRECPE */
+ case 0x3f: /* FRECPX */
+ break;
+ case 0x18: /* FRINTN */
+ need_rmode = true;
+ only_in_vector = true;
+ rmode = FPROUNDING_TIEEVEN;
+ break;
+ case 0x19: /* FRINTM */
+ need_rmode = true;
+ only_in_vector = true;
+ rmode = FPROUNDING_NEGINF;
+ break;
+ case 0x38: /* FRINTP */
+ need_rmode = true;
+ only_in_vector = true;
+ rmode = FPROUNDING_POSINF;
+ break;
+ case 0x39: /* FRINTZ */
+ need_rmode = true;
+ only_in_vector = true;
+ rmode = FPROUNDING_ZERO;
+ break;
+ case 0x58: /* FRINTA */
+ need_rmode = true;
+ only_in_vector = true;
+ rmode = FPROUNDING_TIEAWAY;
+ break;
+ case 0x59: /* FRINTX */
+ case 0x79: /* FRINTI */
+ only_in_vector = true;
+ /* current rounding mode */
+ break;
+ case 0x1a: /* FCVTNS */
+ need_rmode = true;
+ rmode = FPROUNDING_TIEEVEN;
+ break;
+ case 0x1b: /* FCVTMS */
+ need_rmode = true;
+ rmode = FPROUNDING_NEGINF;
+ break;
+ case 0x1c: /* FCVTAS */
+ need_rmode = true;
+ rmode = FPROUNDING_TIEAWAY;
+ break;
+ case 0x3a: /* FCVTPS */
+ need_rmode = true;
+ rmode = FPROUNDING_POSINF;
+ break;
+ case 0x3b: /* FCVTZS */
+ need_rmode = true;
+ rmode = FPROUNDING_ZERO;
+ break;
+ case 0x5a: /* FCVTNU */
+ need_rmode = true;
+ rmode = FPROUNDING_TIEEVEN;
+ break;
+ case 0x5b: /* FCVTMU */
+ need_rmode = true;
+ rmode = FPROUNDING_NEGINF;
+ break;
+ case 0x5c: /* FCVTAU */
+ need_rmode = true;
+ rmode = FPROUNDING_TIEAWAY;
+ break;
+ case 0x7a: /* FCVTPU */
+ need_rmode = true;
+ rmode = FPROUNDING_POSINF;
+ break;
+ case 0x7b: /* FCVTZU */
+ need_rmode = true;
+ rmode = FPROUNDING_ZERO;
+ break;
+ case 0x2f: /* FABS */
+ case 0x6f: /* FNEG */
+ need_fpst = false;
+ break;
+ case 0x7d: /* FRSQRTE */
+ case 0x7f: /* FSQRT (vector) */
+ break;
+ default:
+ fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
+ g_assert_not_reached();
+ }
+
+
+ /* Check additional constraints for the scalar encoding */
+ if (is_scalar) {
+ if (!is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* FRINTxx is only in the vector form */
+ if (only_in_vector) {
+ unallocated_encoding(s);
+ return;
+ }
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ if (need_rmode || need_fpst) {
+ tcg_fpstatus = get_fpstatus_ptr(true);
+ }
+
+ if (need_rmode) {
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
+ }
+
+ if (is_scalar) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, 0, MO_16);
+
+ switch (fpop) {
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x3d: /* FRECPE */
+ gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x3f: /* FRECPX */
+ gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x6f: /* FNEG */
+ tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
+ break;
+ case 0x7d: /* FRSQRTE */
+ gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* limit any sign extension going on */
+ tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
+ write_fp_sreg(s, rd, tcg_res);
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op);
+ } else {
+ for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
+
+ switch (fpop) {
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x3d: /* FRECPE */
+ gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x18: /* FRINTN */
+ case 0x19: /* FRINTM */
+ case 0x38: /* FRINTP */
+ case 0x39: /* FRINTZ */
+ case 0x58: /* FRINTA */
+ case 0x79: /* FRINTI */
+ gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x59: /* FRINTX */
+ gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x2f: /* FABS */
+ tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
+ break;
+ case 0x6f: /* FNEG */
+ tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
+ break;
+ case 0x7d: /* FRSQRTE */
+ gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x7f: /* FSQRT */
+ gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op);
+ }
+
+ clear_vec_high(s, is_q, rd);
+ }
+
+ if (tcg_rmode) {
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
+ tcg_temp_free_i32(tcg_rmode);
+ }
+
+ if (tcg_fpstatus) {
+ tcg_temp_free_ptr(tcg_fpstatus);
+ }
+}
+
/* AdvSIMD scalar x indexed element
* 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
* +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
@@ -10966,6 +11783,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
int rd = extract32(insn, 0, 5);
bool is_long = false;
bool is_fp = false;
+ bool is_fp16 = false;
int index;
TCGv_ptr fpst;
@@ -11012,7 +11830,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
/* fall through */
case 0x9: /* FMUL, FMULX */
- if (!extract32(size, 1, 1)) {
+ if (size == 1) {
unallocated_encoding(s);
return;
}
@@ -11024,18 +11842,34 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
if (is_fp) {
- /* low bit of size indicates single/double */
- size = extract32(size, 0, 1) ? 3 : 2;
- if (size == 2) {
+ /* convert insn encoded size to TCGMemOp size */
+ switch (size) {
+ case 2: /* single precision */
+ size = MO_32;
index = h << 1 | l;
- } else {
+ rm |= (m << 4);
+ break;
+ case 3: /* double precision */
+ size = MO_64;
if (l || !is_q) {
unallocated_encoding(s);
return;
}
index = h;
+ rm |= (m << 4);
+ break;
+ case 0: /* half precision */
+ size = MO_16;
+ index = h << 2 | l << 1 | m;
+ is_fp16 = true;
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ break;
+ }
+ /* fallthru */
+ default: /* unallocated */
+ unallocated_encoding(s);
+ return;
}
- rm |= (m << 4);
} else {
switch (size) {
case 1:
@@ -11056,7 +11890,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
if (is_fp) {
- fpst = get_fpstatus_ptr();
+ fpst = get_fpstatus_ptr(is_fp16);
} else {
fpst = NULL;
}
@@ -11158,18 +11992,67 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
break;
}
case 0x5: /* FMLS */
- /* As usual for ARM, separate negation for fused multiply-add */
- gen_helper_vfp_negs(tcg_op, tcg_op);
- /* fall through */
case 0x1: /* FMLA */
- read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
- gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+ read_vec_element_i32(s, tcg_res, rd, pass,
+ is_scalar ? size : MO_32);
+ switch (size) {
+ case 1:
+ if (opcode == 0x5) {
+ /* As usual for ARM, separate negation for fused
+ * multiply-add */
+ tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
+ }
+ if (is_scalar) {
+ gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
+ tcg_res, fpst);
+ } else {
+ gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
+ tcg_res, fpst);
+ }
+ break;
+ case 2:
+ if (opcode == 0x5) {
+ /* As usual for ARM, separate negation for
+ * fused multiply-add */
+ tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
+ }
+ gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
+ tcg_res, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
break;
case 0x9: /* FMUL, FMULX */
- if (u) {
- gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
- } else {
- gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+ switch (size) {
+ case 1:
+ if (u) {
+ if (is_scalar) {
+ gen_helper_advsimd_mulxh(tcg_res, tcg_op,
+ tcg_idx, fpst);
+ } else {
+ gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
+ tcg_idx, fpst);
+ }
+ } else {
+ if (is_scalar) {
+ gen_helper_advsimd_mulh(tcg_res, tcg_op,
+ tcg_idx, fpst);
+ } else {
+ gen_helper_advsimd_mul2h(tcg_res, tcg_op,
+ tcg_idx, fpst);
+ }
+ }
+ break;
+ case 2:
+ if (u) {
+ gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
+ } else {
+ gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+ }
+ break;
+ default:
+ g_assert_not_reached();
}
break;
case 0xc: /* SQDMULH */
@@ -11937,6 +12820,9 @@ static const AArch64DecodeTable data_proc_simd[] = {
{ 0xce000000, 0xff808000, disas_crypto_four_reg },
{ 0xce800000, 0xffe00000, disas_crypto_xar },
{ 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
+ { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
+ { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
+ { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
{ 0x00000000, 0x00000000, NULL }
};
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 1270022289..aa6dcaa577 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3143,7 +3143,7 @@ static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
TCGv_i32 tcg_rmode;
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
if (dp) {
TCGv_i64 tcg_op;
@@ -3167,7 +3167,7 @@ static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
tcg_temp_free_i32(tcg_res);
}
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
tcg_temp_free_i32(tcg_rmode);
tcg_temp_free_ptr(fpst);
@@ -3184,7 +3184,7 @@ static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
tcg_shift = tcg_const_i32(0);
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
if (dp) {
TCGv_i64 tcg_double, tcg_res;
@@ -3222,7 +3222,7 @@ static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
tcg_temp_free_i32(tcg_single);
}
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
tcg_temp_free_i32(tcg_rmode);
tcg_temp_free_i32(tcg_shift);
@@ -3892,13 +3892,13 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
TCGv_ptr fpst = get_fpstatus_ptr(0);
TCGv_i32 tcg_rmode;
tcg_rmode = tcg_const_i32(float_round_to_zero);
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
if (dp) {
gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
} else {
gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
}
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
tcg_temp_free_i32(tcg_rmode);
tcg_temp_free_ptr(fpst);
break;