diff options
29 files changed, 626 insertions, 492 deletions
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c index e258463747..d404f31e02 100644 --- a/hw/arm/allwinner-a10.c +++ b/hw/arm/allwinner-a10.c @@ -155,6 +155,8 @@ static void aw_a10_realize(DeviceState *dev, Error **errp) } /* SD/MMC */ + object_property_set_link(OBJECT(&s->mmc0), "dma-memory", + OBJECT(get_system_memory()), &error_fatal); sysbus_realize(SYS_BUS_DEVICE(&s->mmc0), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(&s->mmc0), 0, AW_A10_MMC0_BASE); sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc0), 0, qdev_get_gpio_in(dev, 32)); diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c index 341abe6718..88259a9c0d 100644 --- a/hw/arm/allwinner-h3.c +++ b/hw/arm/allwinner-h3.c @@ -349,6 +349,8 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp) sysbus_mmio_map(SYS_BUS_DEVICE(&s->sid), 0, s->memmap[AW_H3_DEV_SID]); /* SD/MMC */ + object_property_set_link(OBJECT(&s->mmc0), "dma-memory", + OBJECT(get_system_memory()), &error_fatal); sysbus_realize(SYS_BUS_DEVICE(&s->mmc0), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(&s->mmc0), 0, s->memmap[AW_H3_DEV_MMC0]); sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc0), 0, @@ -363,6 +365,8 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp) qemu_check_nic_model(&nd_table[0], TYPE_AW_SUN8I_EMAC); qdev_set_nic_properties(DEVICE(&s->emac), &nd_table[0]); } + object_property_set_link(OBJECT(&s->emac), "dma-memory", + OBJECT(get_system_memory()), &error_fatal); sysbus_realize(SYS_BUS_DEVICE(&s->emac), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(&s->emac), 0, s->memmap[AW_H3_DEV_EMAC]); sysbus_connect_irq(SYS_BUS_DEVICE(&s->emac), 0, diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c index 6264eab16b..a93da37dcb 100644 --- a/hw/arm/armsse.c +++ b/hw/arm/armsse.c @@ -1160,6 +1160,7 @@ static const TypeInfo armsse_info = { .name = TYPE_ARM_SSE, .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(ARMSSE), + .class_size = sizeof(ARMSSEClass), .instance_init = armsse_init, .abstract = true, .interfaces = (InterfaceInfo[]) { diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c index c3b9780f35..f2f4fc0264 100644 --- a/hw/arm/musicpal.c +++ b/hw/arm/musicpal.c @@ -30,6 +30,7 @@ #include "hw/audio/wm8750.h" #include "sysemu/block-backend.h" #include "sysemu/runstate.h" +#include "sysemu/dma.h" #include "exec/address-spaces.h" #include "ui/pixel_ops.h" #include "qemu/cutils.h" @@ -163,6 +164,8 @@ typedef struct mv88w8618_eth_state { MemoryRegion iomem; qemu_irq irq; + MemoryRegion *dma_mr; + AddressSpace dma_as; uint32_t smir; uint32_t icr; uint32_t imr; @@ -176,19 +179,21 @@ typedef struct mv88w8618_eth_state { NICConf conf; } mv88w8618_eth_state; -static void eth_rx_desc_put(uint32_t addr, mv88w8618_rx_desc *desc) +static void eth_rx_desc_put(AddressSpace *dma_as, uint32_t addr, + mv88w8618_rx_desc *desc) { cpu_to_le32s(&desc->cmdstat); cpu_to_le16s(&desc->bytes); cpu_to_le16s(&desc->buffer_size); cpu_to_le32s(&desc->buffer); cpu_to_le32s(&desc->next); - cpu_physical_memory_write(addr, desc, sizeof(*desc)); + dma_memory_write(dma_as, addr, desc, sizeof(*desc)); } -static void eth_rx_desc_get(uint32_t addr, mv88w8618_rx_desc *desc) +static void eth_rx_desc_get(AddressSpace *dma_as, uint32_t addr, + mv88w8618_rx_desc *desc) { - cpu_physical_memory_read(addr, desc, sizeof(*desc)); + dma_memory_read(dma_as, addr, desc, sizeof(*desc)); le32_to_cpus(&desc->cmdstat); le16_to_cpus(&desc->bytes); le16_to_cpus(&desc->buffer_size); @@ -209,9 +214,9 @@ static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size) continue; } do { - eth_rx_desc_get(desc_addr, &desc); + eth_rx_desc_get(&s->dma_as, desc_addr, &desc); if ((desc.cmdstat & MP_ETH_RX_OWN) && desc.buffer_size >= size) { - cpu_physical_memory_write(desc.buffer + s->vlan_header, + dma_memory_write(&s->dma_as, desc.buffer + s->vlan_header, buf, size); desc.bytes = size + s->vlan_header; desc.cmdstat &= ~MP_ETH_RX_OWN; @@ -221,7 +226,7 @@ static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size) if (s->icr & s->imr) { qemu_irq_raise(s->irq); } - eth_rx_desc_put(desc_addr, &desc); + eth_rx_desc_put(&s->dma_as, desc_addr, &desc); return size; } desc_addr = desc.next; @@ -230,19 +235,21 @@ static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size) return size; } -static void eth_tx_desc_put(uint32_t addr, mv88w8618_tx_desc *desc) +static void eth_tx_desc_put(AddressSpace *dma_as, uint32_t addr, + mv88w8618_tx_desc *desc) { cpu_to_le32s(&desc->cmdstat); cpu_to_le16s(&desc->res); cpu_to_le16s(&desc->bytes); cpu_to_le32s(&desc->buffer); cpu_to_le32s(&desc->next); - cpu_physical_memory_write(addr, desc, sizeof(*desc)); + dma_memory_write(dma_as, addr, desc, sizeof(*desc)); } -static void eth_tx_desc_get(uint32_t addr, mv88w8618_tx_desc *desc) +static void eth_tx_desc_get(AddressSpace *dma_as, uint32_t addr, + mv88w8618_tx_desc *desc) { - cpu_physical_memory_read(addr, desc, sizeof(*desc)); + dma_memory_read(dma_as, addr, desc, sizeof(*desc)); le32_to_cpus(&desc->cmdstat); le16_to_cpus(&desc->res); le16_to_cpus(&desc->bytes); @@ -259,17 +266,17 @@ static void eth_send(mv88w8618_eth_state *s, int queue_index) int len; do { - eth_tx_desc_get(desc_addr, &desc); + eth_tx_desc_get(&s->dma_as, desc_addr, &desc); next_desc = desc.next; if (desc.cmdstat & MP_ETH_TX_OWN) { len = desc.bytes; if (len < 2048) { - cpu_physical_memory_read(desc.buffer, buf, len); + dma_memory_read(&s->dma_as, desc.buffer, buf, len); qemu_send_packet(qemu_get_queue(s->nic), buf, len); } desc.cmdstat &= ~MP_ETH_TX_OWN; s->icr |= 1 << (MP_ETH_IRQ_TXLO_BIT - queue_index); - eth_tx_desc_put(desc_addr, &desc); + eth_tx_desc_put(&s->dma_as, desc_addr, &desc); } desc_addr = next_desc; } while (desc_addr != s->tx_queue[queue_index]); @@ -405,6 +412,12 @@ static void mv88w8618_eth_realize(DeviceState *dev, Error **errp) { mv88w8618_eth_state *s = MV88W8618_ETH(dev); + if (!s->dma_mr) { + error_setg(errp, TYPE_MV88W8618_ETH " 'dma-memory' link not set"); + return; + } + + address_space_init(&s->dma_as, s->dma_mr, "emac-dma"); s->nic = qemu_new_nic(&net_mv88w8618_info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, s); } @@ -428,6 +441,8 @@ static const VMStateDescription mv88w8618_eth_vmsd = { static Property mv88w8618_eth_properties[] = { DEFINE_NIC_PROPERTIES(mv88w8618_eth_state, conf), + DEFINE_PROP_LINK("dma-memory", mv88w8618_eth_state, dma_mr, + TYPE_MEMORY_REGION, MemoryRegion *), DEFINE_PROP_END_OF_LIST(), }; @@ -1653,6 +1668,8 @@ static void musicpal_init(MachineState *machine) qemu_check_nic_model(&nd_table[0], "mv88w8618"); dev = qdev_new(TYPE_MV88W8618_ETH); qdev_set_nic_properties(dev, &nd_table[0]); + object_property_set_link(OBJECT(dev), "dma-memory", + OBJECT(get_system_memory()), &error_fatal); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, MP_ETH_BASE); sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[MP_ETH_IRQ]); diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index f030a416fd..2a7d9a61fc 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -554,7 +554,7 @@ static void create_pcie(SBSAMachineState *sms) for (i = 0; i < GPEX_NUM_IRQS; i++) { sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, - qdev_get_gpio_in(sms->gic, irq + 1)); + qdev_get_gpio_in(sms->gic, irq + i)); gpex_set_irq_num(GPEX_HOST(dev), i, irq + i); } diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c index 32aa7323d9..969ef0727c 100644 --- a/hw/arm/xilinx_zynq.c +++ b/hw/arm/xilinx_zynq.c @@ -222,18 +222,18 @@ static void zynq_init(MachineState *machine) 1, 0x0066, 0x0022, 0x0000, 0x0000, 0x0555, 0x2aa, 0); - /* Create slcr, keep a pointer to connect clocks */ - slcr = qdev_new("xilinx,zynq_slcr"); - sysbus_realize_and_unref(SYS_BUS_DEVICE(slcr), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(slcr), 0, 0xF8000000); - /* Create the main clock source, and feed slcr with it */ zynq_machine->ps_clk = CLOCK(object_new(TYPE_CLOCK)); object_property_add_child(OBJECT(zynq_machine), "ps_clk", OBJECT(zynq_machine->ps_clk)); object_unref(OBJECT(zynq_machine->ps_clk)); clock_set_hz(zynq_machine->ps_clk, PS_CLK_FREQUENCY); + + /* Create slcr, keep a pointer to connect clocks */ + slcr = qdev_new("xilinx,zynq_slcr"); qdev_connect_clock_in(slcr, "ps_clk", zynq_machine->ps_clk); + sysbus_realize_and_unref(SYS_BUS_DEVICE(slcr), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(slcr), 0, 0xF8000000); dev = qdev_new(TYPE_A9MPCORE_PRIV); qdev_prop_set_uint32(dev, "num-cpu", 1); @@ -254,12 +254,22 @@ static void zynq_init(MachineState *machine) sysbus_create_simple(TYPE_CHIPIDEA, 0xE0002000, pic[53 - IRQ_OFFSET]); sysbus_create_simple(TYPE_CHIPIDEA, 0xE0003000, pic[76 - IRQ_OFFSET]); - dev = cadence_uart_create(0xE0000000, pic[59 - IRQ_OFFSET], serial_hd(0)); + dev = qdev_new(TYPE_CADENCE_UART); + busdev = SYS_BUS_DEVICE(dev); + qdev_prop_set_chr(dev, "chardev", serial_hd(0)); qdev_connect_clock_in(dev, "refclk", qdev_get_clock_out(slcr, "uart0_ref_clk")); - dev = cadence_uart_create(0xE0001000, pic[82 - IRQ_OFFSET], serial_hd(1)); + sysbus_realize_and_unref(busdev, &error_fatal); + sysbus_mmio_map(busdev, 0, 0xE0000000); + sysbus_connect_irq(busdev, 0, pic[59 - IRQ_OFFSET]); + dev = qdev_new(TYPE_CADENCE_UART); + busdev = SYS_BUS_DEVICE(dev); + qdev_prop_set_chr(dev, "chardev", serial_hd(1)); qdev_connect_clock_in(dev, "refclk", qdev_get_clock_out(slcr, "uart1_ref_clk")); + sysbus_realize_and_unref(busdev, &error_fatal); + sysbus_mmio_map(busdev, 0, 0xE0001000); + sysbus_connect_irq(busdev, 0, pic[82 - IRQ_OFFSET]); sysbus_create_varargs("cadence_ttc", 0xF8001000, pic[42-IRQ_OFFSET], pic[43-IRQ_OFFSET], pic[44-IRQ_OFFSET], NULL); diff --git a/hw/core/clock.c b/hw/core/clock.c index 3c0daf7d4c..7066282f7b 100644 --- a/hw/core/clock.c +++ b/hw/core/clock.c @@ -34,11 +34,16 @@ void clock_clear_callback(Clock *clk) clock_set_callback(clk, NULL, NULL); } -void clock_set(Clock *clk, uint64_t period) +bool clock_set(Clock *clk, uint64_t period) { + if (clk->period == period) { + return false; + } trace_clock_set(CLOCK_PATH(clk), CLOCK_PERIOD_TO_NS(clk->period), CLOCK_PERIOD_TO_NS(period)); clk->period = period; + + return true; } static void clock_propagate_period(Clock *clk, bool call_callbacks) diff --git a/hw/core/qdev-clock.c b/hw/core/qdev-clock.c index 5cc1e82e51..47ecb5b4fa 100644 --- a/hw/core/qdev-clock.c +++ b/hw/core/qdev-clock.c @@ -183,3 +183,9 @@ Clock *qdev_alias_clock(DeviceState *dev, const char *name, return ncl->clock; } + +void qdev_connect_clock_in(DeviceState *dev, const char *name, Clock *source) +{ + assert(!dev->realized); + clock_set_source(qdev_get_clock_in(dev, name), source); +} diff --git a/hw/misc/unimp.c b/hw/misc/unimp.c index bc4084d344..6cfc5727f0 100644 --- a/hw/misc/unimp.c +++ b/hw/misc/unimp.c @@ -22,9 +22,9 @@ static uint64_t unimp_read(void *opaque, hwaddr offset, unsigned size) { UnimplementedDeviceState *s = UNIMPLEMENTED_DEVICE(opaque); - qemu_log_mask(LOG_UNIMP, "%s: unimplemented device read " - "(size %d, offset 0x%" HWADDR_PRIx ")\n", - s->name, size, offset); + qemu_log_mask(LOG_UNIMP, "%s: unimplemented device read " + "(size %d, offset 0x%0*" HWADDR_PRIx ")\n", + s->name, size, s->offset_fmt_width, offset); return 0; } @@ -34,9 +34,9 @@ static void unimp_write(void *opaque, hwaddr offset, UnimplementedDeviceState *s = UNIMPLEMENTED_DEVICE(opaque); qemu_log_mask(LOG_UNIMP, "%s: unimplemented device write " - "(size %d, value 0x%" PRIx64 - ", offset 0x%" HWADDR_PRIx ")\n", - s->name, size, value, offset); + "(size %d, offset 0x%0*" HWADDR_PRIx + ", value 0x%0*" PRIx64 ")\n", + s->name, size, s->offset_fmt_width, offset, size << 1, value); } static const MemoryRegionOps unimp_ops = { @@ -63,6 +63,8 @@ static void unimp_realize(DeviceState *dev, Error **errp) return; } + s->offset_fmt_width = DIV_ROUND_UP(64 - clz64(s->size - 1), 4); + memory_region_init_io(&s->iomem, OBJECT(s), &unimp_ops, s, s->name, s->size); sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); diff --git a/hw/net/allwinner-sun8i-emac.c b/hw/net/allwinner-sun8i-emac.c index 28637ff4c1..38d328587e 100644 --- a/hw/net/allwinner-sun8i-emac.c +++ b/hw/net/allwinner-sun8i-emac.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qapi/error.h" #include "hw/sysbus.h" #include "migration/vmstate.h" #include "net/net.h" @@ -29,6 +30,7 @@ #include "net/checksum.h" #include "qemu/module.h" #include "exec/cpu-common.h" +#include "sysemu/dma.h" #include "hw/net/allwinner-sun8i-emac.h" /* EMAC register offsets */ @@ -337,12 +339,13 @@ static void allwinner_sun8i_emac_update_irq(AwSun8iEmacState *s) qemu_set_irq(s->irq, (s->int_sta & s->int_en) != 0); } -static uint32_t allwinner_sun8i_emac_next_desc(FrameDescriptor *desc, +static uint32_t allwinner_sun8i_emac_next_desc(AwSun8iEmacState *s, + FrameDescriptor *desc, size_t min_size) { uint32_t paddr = desc->next; - cpu_physical_memory_read(paddr, desc, sizeof(*desc)); + dma_memory_read(&s->dma_as, paddr, desc, sizeof(*desc)); if ((desc->status & DESC_STATUS_CTL) && (desc->status2 & DESC_STATUS2_BUF_SIZE_MASK) >= min_size) { @@ -352,7 +355,8 @@ static uint32_t allwinner_sun8i_emac_next_desc(FrameDescriptor *desc, } } -static uint32_t allwinner_sun8i_emac_get_desc(FrameDescriptor *desc, +static uint32_t allwinner_sun8i_emac_get_desc(AwSun8iEmacState *s, + FrameDescriptor *desc, uint32_t start_addr, size_t min_size) { @@ -360,7 +364,7 @@ static uint32_t allwinner_sun8i_emac_get_desc(FrameDescriptor *desc, /* Note that the list is a cycle. Last entry points back to the head. */ while (desc_addr != 0) { - cpu_physical_memory_read(desc_addr, desc, sizeof(*desc)); + dma_memory_read(&s->dma_as, desc_addr, desc, sizeof(*desc)); if ((desc->status & DESC_STATUS_CTL) && (desc->status2 & DESC_STATUS2_BUF_SIZE_MASK) >= min_size) { @@ -379,20 +383,21 @@ static uint32_t allwinner_sun8i_emac_rx_desc(AwSun8iEmacState *s, FrameDescriptor *desc, size_t min_size) { - return allwinner_sun8i_emac_get_desc(desc, s->rx_desc_curr, min_size); + return allwinner_sun8i_emac_get_desc(s, desc, s->rx_desc_curr, min_size); } static uint32_t allwinner_sun8i_emac_tx_desc(AwSun8iEmacState *s, FrameDescriptor *desc, size_t min_size) { - return allwinner_sun8i_emac_get_desc(desc, s->tx_desc_head, min_size); + return allwinner_sun8i_emac_get_desc(s, desc, s->tx_desc_head, min_size); } -static void allwinner_sun8i_emac_flush_desc(FrameDescriptor *desc, +static void allwinner_sun8i_emac_flush_desc(AwSun8iEmacState *s, + FrameDescriptor *desc, uint32_t phys_addr) { - cpu_physical_memory_write(phys_addr, desc, sizeof(*desc)); + dma_memory_write(&s->dma_as, phys_addr, desc, sizeof(*desc)); } static bool allwinner_sun8i_emac_can_receive(NetClientState *nc) @@ -450,8 +455,8 @@ static ssize_t allwinner_sun8i_emac_receive(NetClientState *nc, << RX_DESC_STATUS_FRM_LEN_SHIFT; } - cpu_physical_memory_write(desc.addr, buf, desc_bytes); - allwinner_sun8i_emac_flush_desc(&desc, s->rx_desc_curr); + dma_memory_write(&s->dma_as, desc.addr, buf, desc_bytes); + allwinner_sun8i_emac_flush_desc(s, &desc, s->rx_desc_curr); trace_allwinner_sun8i_emac_receive(s->rx_desc_curr, desc.addr, desc_bytes); @@ -465,7 +470,7 @@ static ssize_t allwinner_sun8i_emac_receive(NetClientState *nc, bytes_left -= desc_bytes; /* Move to the next descriptor */ - s->rx_desc_curr = allwinner_sun8i_emac_next_desc(&desc, 64); + s->rx_desc_curr = allwinner_sun8i_emac_next_desc(s, &desc, 64); if (!s->rx_desc_curr) { /* Not enough buffer space available */ s->int_sta |= INT_STA_RX_BUF_UA; @@ -501,10 +506,10 @@ static void allwinner_sun8i_emac_transmit(AwSun8iEmacState *s) desc.status |= TX_DESC_STATUS_LENGTH_ERR; break; } - cpu_physical_memory_read(desc.addr, packet_buf + packet_bytes, bytes); + dma_memory_read(&s->dma_as, desc.addr, packet_buf + packet_bytes, bytes); packet_bytes += bytes; desc.status &= ~DESC_STATUS_CTL; - allwinner_sun8i_emac_flush_desc(&desc, s->tx_desc_curr); + allwinner_sun8i_emac_flush_desc(s, &desc, s->tx_desc_curr); /* After the last descriptor, send the packet */ if (desc.status2 & TX_DESC_STATUS2_LAST_DESC) { @@ -519,7 +524,7 @@ static void allwinner_sun8i_emac_transmit(AwSun8iEmacState *s) packet_bytes = 0; transmitted++; } - s->tx_desc_curr = allwinner_sun8i_emac_next_desc(&desc, 0); + s->tx_desc_curr = allwinner_sun8i_emac_next_desc(s, &desc, 0); } /* Raise transmit completed interrupt */ @@ -623,7 +628,7 @@ static uint64_t allwinner_sun8i_emac_read(void *opaque, hwaddr offset, break; case REG_TX_CUR_BUF: /* Transmit Current Buffer */ if (s->tx_desc_curr != 0) { - cpu_physical_memory_read(s->tx_desc_curr, &desc, sizeof(desc)); + dma_memory_read(&s->dma_as, s->tx_desc_curr, &desc, sizeof(desc)); value = desc.addr; } else { value = 0; @@ -636,7 +641,7 @@ static uint64_t allwinner_sun8i_emac_read(void *opaque, hwaddr offset, break; case REG_RX_CUR_BUF: /* Receive Current Buffer */ if (s->rx_desc_curr != 0) { - cpu_physical_memory_read(s->rx_desc_curr, &desc, sizeof(desc)); + dma_memory_read(&s->dma_as, s->rx_desc_curr, &desc, sizeof(desc)); value = desc.addr; } else { value = 0; @@ -790,6 +795,13 @@ static void allwinner_sun8i_emac_realize(DeviceState *dev, Error **errp) { AwSun8iEmacState *s = AW_SUN8I_EMAC(dev); + if (!s->dma_mr) { + error_setg(errp, TYPE_AW_SUN8I_EMAC " 'dma-memory' link not set"); + return; + } + + address_space_init(&s->dma_as, s->dma_mr, "emac-dma"); + qemu_macaddr_default_if_unset(&s->conf.macaddr); s->nic = qemu_new_nic(&net_allwinner_sun8i_emac_info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, s); @@ -799,6 +811,8 @@ static void allwinner_sun8i_emac_realize(DeviceState *dev, Error **errp) static Property allwinner_sun8i_emac_properties[] = { DEFINE_NIC_PROPERTIES(AwSun8iEmacState, conf), DEFINE_PROP_UINT8("phy-addr", AwSun8iEmacState, mii_phy_addr, 0), + DEFINE_PROP_LINK("dma-memory", AwSun8iEmacState, dma_mr, + TYPE_MEMORY_REGION, MemoryRegion *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c index f9eb92c09e..e82afb75eb 100644 --- a/hw/sd/allwinner-sdhost.c +++ b/hw/sd/allwinner-sdhost.c @@ -21,7 +21,10 @@ #include "qemu/log.h" #include "qemu/module.h" #include "qemu/units.h" +#include "qapi/error.h" #include "sysemu/blockdev.h" +#include "sysemu/dma.h" +#include "hw/qdev-properties.h" #include "hw/irq.h" #include "hw/sd/allwinner-sdhost.h" #include "migration/vmstate.h" @@ -306,7 +309,7 @@ static uint32_t allwinner_sdhost_process_desc(AwSdHostState *s, uint8_t buf[1024]; /* Read descriptor */ - cpu_physical_memory_read(desc_addr, desc, sizeof(*desc)); + dma_memory_read(&s->dma_as, desc_addr, desc, sizeof(*desc)); if (desc->size == 0) { desc->size = klass->max_desc_size; } else if (desc->size > klass->max_desc_size) { @@ -331,22 +334,24 @@ static uint32_t allwinner_sdhost_process_desc(AwSdHostState *s, /* Write to SD bus */ if (is_write) { - cpu_physical_memory_read((desc->addr & DESC_SIZE_MASK) + num_done, - buf, buf_bytes); + dma_memory_read(&s->dma_as, + (desc->addr & DESC_SIZE_MASK) + num_done, + buf, buf_bytes); sdbus_write_data(&s->sdbus, buf, buf_bytes); /* Read from SD bus */ } else { sdbus_read_data(&s->sdbus, buf, buf_bytes); - cpu_physical_memory_write((desc->addr & DESC_SIZE_MASK) + num_done, - buf, buf_bytes); + dma_memory_write(&s->dma_as, + (desc->addr & DESC_SIZE_MASK) + num_done, + buf, buf_bytes); } num_done += buf_bytes; } /* Clear hold flag and flush descriptor */ desc->status &= ~DESC_STATUS_HOLD; - cpu_physical_memory_write(desc_addr, desc, sizeof(*desc)); + dma_memory_write(&s->dma_as, desc_addr, desc, sizeof(*desc)); return num_done; } @@ -721,6 +726,12 @@ static const VMStateDescription vmstate_allwinner_sdhost = { } }; +static Property allwinner_sdhost_properties[] = { + DEFINE_PROP_LINK("dma-memory", AwSdHostState, dma_mr, + TYPE_MEMORY_REGION, MemoryRegion *), + DEFINE_PROP_END_OF_LIST(), +}; + static void allwinner_sdhost_init(Object *obj) { AwSdHostState *s = AW_SDHOST(obj); @@ -734,6 +745,18 @@ static void allwinner_sdhost_init(Object *obj) sysbus_init_irq(SYS_BUS_DEVICE(s), &s->irq); } +static void allwinner_sdhost_realize(DeviceState *dev, Error **errp) +{ + AwSdHostState *s = AW_SDHOST(dev); + + if (!s->dma_mr) { + error_setg(errp, TYPE_AW_SDHOST " 'dma-memory' link not set"); + return; + } + + address_space_init(&s->dma_as, s->dma_mr, "sdhost-dma"); +} + static void allwinner_sdhost_reset(DeviceState *dev) { AwSdHostState *s = AW_SDHOST(dev); @@ -792,6 +815,8 @@ static void allwinner_sdhost_class_init(ObjectClass *klass, void *data) dc->reset = allwinner_sdhost_reset; dc->vmsd = &vmstate_allwinner_sdhost; + dc->realize = allwinner_sdhost_realize; + device_class_set_props(dc, allwinner_sdhost_properties); } static void allwinner_sdhost_sun4i_class_init(ObjectClass *klass, void *data) diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h index 529816286d..347b977ae5 100644 --- a/include/hw/arm/armsse.h +++ b/include/hw/arm/armsse.h @@ -220,7 +220,7 @@ typedef struct ARMSSE { typedef struct ARMSSEInfo ARMSSEInfo; typedef struct ARMSSEClass { - DeviceClass parent_class; + SysBusDeviceClass parent_class; const ARMSSEInfo *info; } ARMSSEClass; diff --git a/include/hw/char/cadence_uart.h b/include/hw/char/cadence_uart.h index ed7b58d31d..dabc49ea4f 100644 --- a/include/hw/char/cadence_uart.h +++ b/include/hw/char/cadence_uart.h @@ -53,21 +53,4 @@ typedef struct { Clock *refclk; } CadenceUARTState; -static inline DeviceState *cadence_uart_create(hwaddr addr, - qemu_irq irq, - Chardev *chr) -{ - DeviceState *dev; - SysBusDevice *s; - - dev = qdev_new(TYPE_CADENCE_UART); - s = SYS_BUS_DEVICE(dev); - qdev_prop_set_chr(dev, "chardev", chr); - sysbus_realize_and_unref(s, &error_fatal); - sysbus_mmio_map(s, 0, addr); - sysbus_connect_irq(s, 0, irq); - - return dev; -} - #endif diff --git a/include/hw/clock.h b/include/hw/clock.h index f822a94220..9ecd78b2c3 100644 --- a/include/hw/clock.h +++ b/include/hw/clock.h @@ -127,17 +127,19 @@ void clock_set_source(Clock *clk, Clock *src); * @value: the clock's value, 0 means unclocked * * Set the local cached period value of @clk to @value. + * + * @return: true if the clock is changed. */ -void clock_set(Clock *clk, uint64_t value); +bool clock_set(Clock *clk, uint64_t value); -static inline void clock_set_hz(Clock *clk, unsigned hz) +static inline bool clock_set_hz(Clock *clk, unsigned hz) { - clock_set(clk, CLOCK_PERIOD_FROM_HZ(hz)); + return clock_set(clk, CLOCK_PERIOD_FROM_HZ(hz)); } -static inline void clock_set_ns(Clock *clk, unsigned ns) +static inline bool clock_set_ns(Clock *clk, unsigned ns) { - clock_set(clk, CLOCK_PERIOD_FROM_NS(ns)); + return clock_set(clk, CLOCK_PERIOD_FROM_NS(ns)); } /** @@ -163,8 +165,9 @@ void clock_propagate(Clock *clk); */ static inline void clock_update(Clock *clk, uint64_t value) { - clock_set(clk, value); - clock_propagate(clk); + if (clock_set(clk, value)) { + clock_propagate(clk); + } } static inline void clock_update_hz(Clock *clk, unsigned hz) @@ -209,17 +212,4 @@ static inline bool clock_is_enabled(const Clock *clk) return clock_get(clk) != 0; } -static inline void clock_init(Clock *clk, uint64_t value) -{ - clock_set(clk, value); -} -static inline void clock_init_hz(Clock *clk, uint64_t value) -{ - clock_set_hz(clk, value); -} -static inline void clock_init_ns(Clock *clk, uint64_t value) -{ - clock_set_ns(clk, value); -} - #endif /* QEMU_HW_CLOCK_H */ diff --git a/include/hw/misc/unimp.h b/include/hw/misc/unimp.h index 4c1d13c9bf..c63968a2cd 100644 --- a/include/hw/misc/unimp.h +++ b/include/hw/misc/unimp.h @@ -20,6 +20,7 @@ typedef struct { SysBusDevice parent_obj; MemoryRegion iomem; + unsigned offset_fmt_width; char *name; uint64_t size; } UnimplementedDeviceState; diff --git a/include/hw/net/allwinner-sun8i-emac.h b/include/hw/net/allwinner-sun8i-emac.h index eda034e96b..dd1d7b96cd 100644 --- a/include/hw/net/allwinner-sun8i-emac.h +++ b/include/hw/net/allwinner-sun8i-emac.h @@ -49,6 +49,12 @@ typedef struct AwSun8iEmacState { /** Interrupt output signal to notify CPU */ qemu_irq irq; + /** Memory region where DMA transfers are done */ + MemoryRegion *dma_mr; + + /** Address space used internally for DMA transfers */ + AddressSpace dma_as; + /** Generic Network Interface Controller (NIC) for networking API */ NICState *nic; diff --git a/include/hw/qdev-clock.h b/include/hw/qdev-clock.h index a340f65ff9..64ca4d266f 100644 --- a/include/hw/qdev-clock.h +++ b/include/hw/qdev-clock.h @@ -70,12 +70,10 @@ Clock *qdev_get_clock_out(DeviceState *dev, const char *name); * * Set the source clock of input clock @name of device @dev to @source. * @source period update will be propagated to @name clock. + * + * Must be called before @dev is realized. */ -static inline void qdev_connect_clock_in(DeviceState *dev, const char *name, - Clock *source) -{ - clock_set_source(qdev_get_clock_in(dev, name), source); -} +void qdev_connect_clock_in(DeviceState *dev, const char *name, Clock *source); /** * qdev_alias_clock: diff --git a/include/hw/sd/allwinner-sdhost.h b/include/hw/sd/allwinner-sdhost.h index d94606a853..839732ebf3 100644 --- a/include/hw/sd/allwinner-sdhost.h +++ b/include/hw/sd/allwinner-sdhost.h @@ -71,6 +71,12 @@ typedef struct AwSdHostState { /** Interrupt output signal to notify CPU */ qemu_irq irq; + /** Memory region where DMA transfers are done */ + MemoryRegion *dma_mr; + + /** Address space used internally for DMA transfers */ + AddressSpace dma_as; + /** Number of bytes left in current DMA transfer */ uint32_t transfer_cnt; diff --git a/include/qemu/int128.h b/include/qemu/int128.h index 5c9890db8b..76ea405922 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -63,6 +63,11 @@ static inline Int128 int128_rshift(Int128 a, int n) return a >> n; } +static inline Int128 int128_lshift(Int128 a, int n) +{ + return a << n; +} + static inline Int128 int128_add(Int128 a, Int128 b) { return a + b; @@ -217,6 +222,17 @@ static inline Int128 int128_rshift(Int128 a, int n) } } +static inline Int128 int128_lshift(Int128 a, int n) +{ + uint64_t l = a.lo << (n & 63); + if (n >= 64) { + return int128_make128(0, l); + } else if (n > 0) { + return int128_make128(l, (a.hi << n) | (a.lo >> (64 - n))); + } + return a; +} + static inline Int128 int128_add(Int128 a, Int128 b) { uint64_t lo = a.lo + b.lo; diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 63c4a087ca..4411c47120 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -269,11 +269,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/helper.c b/target/arm/helper.c index 6b4f0eb533..44d666627a 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5105,7 +5105,6 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .access = PL2_RW, .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore }, { .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH, - .type = ARM_CP_NO_RAW, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, diff --git a/target/arm/helper.h b/target/arm/helper.h index 759639a63a..3ca73a1764 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -758,6 +758,34 @@ DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 104752041f..891306f5b0 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -514,11 +514,12 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) } /* Record a tag check failure. */ -static void mte_check_fail(CPUARMState *env, int mmu_idx, +static void mte_check_fail(CPUARMState *env, uint32_t desc, uint64_t dirty_ptr, uintptr_t ra) { + int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); - int el, reg_el, tcf, select; + int el, reg_el, tcf, select, is_write, syn; uint64_t sctlr; reg_el = regime_el(env, arm_mmu_idx); @@ -546,9 +547,10 @@ static void mte_check_fail(CPUARMState *env, int mmu_idx, */ cpu_restore_state(env_cpu(env), ra, true); env->exception.vaddress = dirty_ptr; - raise_exception(env, EXCP_DATA_ABORT, - syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11), - exception_target_el(env)); + + is_write = FIELD_EX32(desc, MTEDESC, WRITE); + syn = syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, is_write, 0x11); + raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env)); /* noreturn, but fall through to the assert anyway */ case 0: @@ -639,8 +641,7 @@ uint64_t mte_check1(CPUARMState *env, uint32_t desc, } if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) { - int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); - mte_check_fail(env, mmu_idx, ptr, ra); + mte_check_fail(env, desc, ptr, ra); } return useronly_clean_ptr(ptr); @@ -810,7 +811,7 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, fail_ofs = tag_first + n * TAG_GRANULE - ptr; fail_ofs = ROUND_UP(fail_ofs, esize); - mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra); + mte_check_fail(env, desc, ptr + fail_ofs, ra); } done: @@ -922,7 +923,7 @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) fail: /* Locate the first nibble that differs. */ i = ctz64(mem_tag ^ ptr_tag) >> 4; - mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra); + mte_check_fail(env, desc, align_ptr + i * TAG_GRANULE, ra); done: return useronly_clean_ptr(ptr); diff --git a/target/arm/sve.decode b/target/arm/sve.decode index 4f580a25e7..6425396ac1 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -150,13 +150,17 @@ @rd_rn_i6 ........ ... rn:5 ..... imm:s6 rd:5 &rri # Two register operand, one immediate operand, with predicate, -# element size encoded as TSZHL. User must fill in imm. -@rdn_pg_tszimm ........ .. ... ... ... pg:3 ..... rd:5 \ - &rpri_esz rn=%reg_movprfx esz=%tszimm_esz +# element size encoded as TSZHL. +@rdn_pg_tszimm_shl ........ .. ... ... ... pg:3 ..... rd:5 \ + &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shl +@rdn_pg_tszimm_shr ........ .. ... ... ... pg:3 ..... rd:5 \ + &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shr # Similarly without predicate. -@rd_rn_tszimm ........ .. ... ... ...... rn:5 rd:5 \ - &rri_esz esz=%tszimm16_esz +@rd_rn_tszimm_shl ........ .. ... ... ...... rn:5 rd:5 \ + &rri_esz esz=%tszimm16_esz imm=%tszimm16_shl +@rd_rn_tszimm_shr ........ .. ... ... ...... rn:5 rd:5 \ + &rri_esz esz=%tszimm16_esz imm=%tszimm16_shr # Two register operand, one immediate operand, with 4-bit predicate. # User must fill in imm. @@ -289,14 +293,10 @@ UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn ### SVE Shift by Immediate - Predicated Group # SVE bitwise shift by immediate (predicated) -ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shr -LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shr -LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shl -ASRD 00000100 .. 000 100 100 ... .. ... ..... \ - @rdn_pg_tszimm imm=%tszimm_shr +ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... @rdn_pg_tszimm_shr +LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... @rdn_pg_tszimm_shr +LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... @rdn_pg_tszimm_shl +ASRD 00000100 .. 000 100 100 ... .. ... ..... @rdn_pg_tszimm_shr # SVE bitwise shift by vector (predicated) ASR_zpzz 00000100 .. 010 000 100 ... ..... ..... @rdn_pg_rm @@ -400,12 +400,9 @@ RDVL 00000100 101 11111 01010 imm:s6 rd:5 ### SVE Bitwise Shift - Unpredicated Group # SVE bitwise shift by immediate (unpredicated) -ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... \ - @rd_rn_tszimm imm=%tszimm16_shr -LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... \ - @rd_rn_tszimm imm=%tszimm16_shr -LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... \ - @rd_rn_tszimm imm=%tszimm16_shl +ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... @rd_rn_tszimm_shr +LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... @rd_rn_tszimm_shr +LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... @rd_rn_tszimm_shl # SVE bitwise shift by wide elements (unpredicated) # Note esz != 3 diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 382fa82bc8..4758d46f34 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -956,85 +956,43 @@ uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc) return flags; } -/* Store zero into every active element of Zd. We will use this for two - * and three-operand predicated instructions for which logic dictates a - * zero result. In particular, logical shift by element size, which is - * otherwise undefined on the host. - * - * For element sizes smaller than uint64_t, we use tables to expand - * the N bits of the controlling predicate to a byte mask, and clear - * those bytes. +/* + * Copy Zn into Zd, and store zero into inactive elements. + * If inv, store zeros into the active elements. */ -void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_b(pg[H1(i)]); - } -} - -void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_h(pg[H1(i)]); - } -} - -void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_s(pg[H1(i)]); - } -} - -void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - if (pg[H1(i)] & 1) { - d[i] = 0; - } - } -} - -/* Copy Zn into Zd, and store zero into inactive elements. */ void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_b(pg[H1(i)]); + d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv); } } void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_h(pg[H1(i)]); + d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv); } } void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_s(pg[H1(i)]); + d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv); } } @@ -1043,8 +1001,10 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc) intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + uint8_t inv = simd_data(desc); + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1); + d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1); } } diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 0fc5e12fab..4ba6918b60 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -678,6 +678,20 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, tcg_temp_free_ptr(fpst); } +/* Expand a 3-operand + qc + operation using an out-of-line helper. */ +static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, + int rm, gen_helper_gvec_3_ptr *fn) +{ + TCGv_ptr qc_ptr = tcg_temp_new_ptr(); + + tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), qc_ptr, + is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); + tcg_temp_free_ptr(qc_ptr); +} + /* Set ZF and NF based on a 64 bit result. This is alas fiddlier * than the 32 bit equivalent. */ @@ -1156,18 +1170,18 @@ static void do_vec_ld(DisasContext *s, int destidx, int element, * unallocated-encoding checks (otherwise the syndrome information * for the resulting exception will be incorrect). */ -static inline bool fp_access_check(DisasContext *s) +static bool fp_access_check(DisasContext *s) { - assert(!s->fp_access_checked); - s->fp_access_checked = true; + if (s->fp_excp_el) { + assert(!s->fp_access_checked); + s->fp_access_checked = true; - if (!s->fp_excp_el) { - return true; + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, + syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + return false; } - - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); - return false; + s->fp_access_checked = true; + return true; } /* Check that SVE access is enabled. If it is, return true. @@ -1176,10 +1190,14 @@ static inline bool fp_access_check(DisasContext *s) bool sve_access_check(DisasContext *s) { if (s->sve_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(), - s->sve_excp_el); + assert(!s->sve_access_checked); + s->sve_access_checked = true; + + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, + syn_sve_access_trap(), s->sve_excp_el); return false; } + s->sve_access_checked = true; return fp_access_check(s); } @@ -11730,6 +11748,15 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); } return; + case 0x16: /* SQDMULH, SQRDMULH */ + { + static gen_helper_gvec_3_ptr * const fns[2][2] = { + { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, + { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, + }; + gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); + } + return; case 0x11: if (!u) { /* CMTST */ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); @@ -11841,16 +11868,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genenvfn = fns[size][u]; break; } - case 0x16: /* SQDMULH, SQRDMULH */ - { - static NeonGenTwoOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, - { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, - }; - assert(size == 1 || size == 2); - genenvfn = fns[size - 1][u]; - break; - } default: g_assert_not_reached(); } @@ -13484,6 +13501,56 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) data, gen_helper_gvec_fmlal_idx_a64); } return; + + case 0x08: /* MUL */ + if (!is_long && !is_scalar) { + static gen_helper_gvec_3 * const fns[3] = { + gen_helper_gvec_mul_idx_h, + gen_helper_gvec_mul_idx_s, + gen_helper_gvec_mul_idx_d, + }; + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + is_q ? 16 : 8, vec_full_reg_size(s), + index, fns[size - 1]); + return; + } + break; + + case 0x10: /* MLA */ + if (!is_long && !is_scalar) { + static gen_helper_gvec_4 * const fns[3] = { + gen_helper_gvec_mla_idx_h, + gen_helper_gvec_mla_idx_s, + gen_helper_gvec_mla_idx_d, + }; + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rd), + is_q ? 16 : 8, vec_full_reg_size(s), + index, fns[size - 1]); + return; + } + break; + + case 0x14: /* MLS */ + if (!is_long && !is_scalar) { + static gen_helper_gvec_4 * const fns[3] = { + gen_helper_gvec_mls_idx_h, + gen_helper_gvec_mls_idx_s, + gen_helper_gvec_mls_idx_d, + }; + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rd), + is_q ? 16 : 8, vec_full_reg_size(s), + index, fns[size - 1]); + return; + } + break; } if (size == 3) { @@ -14529,6 +14596,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) s->base.pc_next += 4; s->fp_access_checked = false; + s->sve_access_checked = false; if (dc_isar_feature(aa64_bti, s)) { if (s->base.num_insns == 1) { diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index d97cb37d83..15ad6c7d32 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -142,35 +142,76 @@ static int pred_gvec_reg_size(DisasContext *s) return size_for_gvec(pred_full_reg_size(s)); } +/* Invoke an out-of-line helper on 2 Zregs. */ +static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, + int rd, int rn, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vsz, vsz, data, fn); +} + +/* Invoke an out-of-line helper on 3 Zregs. */ +static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, + int rd, int rn, int rm, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vsz, vsz, data, fn); +} + +/* Invoke an out-of-line helper on 2 Zregs and a predicate. */ +static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, + int rd, int rn, int pg, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + pred_full_reg_offset(s, pg), + vsz, vsz, data, fn); +} + +/* Invoke an out-of-line helper on 3 Zregs and a predicate. */ +static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, + int rd, int rn, int rm, int pg, int data) +{ + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + pred_full_reg_offset(s, pg), + vsz, vsz, data, fn); +} + /* Invoke a vector expander on two Zregs. */ -static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn, - int esz, int rd, int rn) +static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn, + int esz, int rd, int rn) { - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - gvec_fn(esz, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), vsz, vsz); - } - return true; + unsigned vsz = vec_full_reg_size(s); + gvec_fn(esz, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), vsz, vsz); } /* Invoke a vector expander on three Zregs. */ -static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn, - int esz, int rd, int rn, int rm) +static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, + int esz, int rd, int rn, int rm) { - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - gvec_fn(esz, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), vsz, vsz); - } - return true; + unsigned vsz = vec_full_reg_size(s); + gvec_fn(esz, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), vsz, vsz); } /* Invoke a vector move on two Zregs. */ static bool do_mov_z(DisasContext *s, int rd, int rn) { - return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn); + if (sve_access_check(s)) { + gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn); + } + return true; } /* Initialize a Zreg with replications of a 64-bit immediate. */ @@ -180,52 +221,27 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word) tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); } -/* Invoke a vector expander on two Pregs. */ -static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn, - int esz, int rd, int rn) -{ - if (sve_access_check(s)) { - unsigned psz = pred_gvec_reg_size(s); - gvec_fn(esz, pred_full_reg_offset(s, rd), - pred_full_reg_offset(s, rn), psz, psz); - } - return true; -} - /* Invoke a vector expander on three Pregs. */ -static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn, - int esz, int rd, int rn, int rm) +static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, + int rd, int rn, int rm) { - if (sve_access_check(s)) { - unsigned psz = pred_gvec_reg_size(s); - gvec_fn(esz, pred_full_reg_offset(s, rd), - pred_full_reg_offset(s, rn), - pred_full_reg_offset(s, rm), psz, psz); - } - return true; + unsigned psz = pred_gvec_reg_size(s); + gvec_fn(MO_64, pred_full_reg_offset(s, rd), + pred_full_reg_offset(s, rn), + pred_full_reg_offset(s, rm), psz, psz); } -/* Invoke a vector operation on four Pregs. */ -static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op, - int rd, int rn, int rm, int rg) +/* Invoke a vector move on two Pregs. */ +static bool do_mov_p(DisasContext *s, int rd, int rn) { if (sve_access_check(s)) { unsigned psz = pred_gvec_reg_size(s); - tcg_gen_gvec_4(pred_full_reg_offset(s, rd), - pred_full_reg_offset(s, rn), - pred_full_reg_offset(s, rm), - pred_full_reg_offset(s, rg), - psz, psz, gvec_op); + tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), + pred_full_reg_offset(s, rn), psz, psz); } return true; } -/* Invoke a vector move on two Pregs. */ -static bool do_mov_p(DisasContext *s, int rd, int rn) -{ - return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn); -} - /* Set the cpu flags as per a return from an SVE helper. */ static void do_pred_flags(TCGv_i32 t) { @@ -273,24 +289,32 @@ const uint64_t pred_esz_masks[4] = { *** SVE Logical - Unpredicated Group */ +static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn) +{ + if (sve_access_check(s)) { + gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm); + } + return true; +} + static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_and); } static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_or); } static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_xor); } static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_andc); } /* @@ -299,32 +323,32 @@ static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a) static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_add); } static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_sub); } static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_ssadd); } static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_sssub); } static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_usadd); } static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a) { - return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm); + return do_zzz_fn(s, a, tcg_gen_gvec_ussub); } /* @@ -333,16 +357,11 @@ static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a) static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn) { - unsigned vsz = vec_full_reg_size(s); if (fn == NULL) { return false; } if (sve_access_check(s)) { - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - pred_full_reg_offset(s, a->pg), - vsz, vsz, 0, fn); + gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0); } return true; } @@ -356,12 +375,7 @@ static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d }; - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); + gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); } #define DO_ZPZZ(NAME, name) \ @@ -433,11 +447,7 @@ static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - pred_full_reg_offset(s, a->pg), - vsz, vsz, 0, fn); + gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0); } return true; } @@ -608,48 +618,29 @@ static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a) *** SVE Shift by Immediate - Predicated Group */ -/* Store zero into every active element of Zd. We will use this for two - * and three-operand predicated instructions for which logic dictates a - * zero result. +/* + * Copy Zn into Zd, storing zeros into inactive elements. + * If invert, store zeros into the active elements. */ -static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz) -{ - static gen_helper_gvec_2 * const fns[4] = { - gen_helper_sve_clr_b, gen_helper_sve_clr_h, - gen_helper_sve_clr_s, gen_helper_sve_clr_d, - }; - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); - } - return true; -} - -/* Copy Zn into Zd, storing zeros into inactive elements. */ -static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz) +static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, + int esz, bool invert) { static gen_helper_gvec_3 * const fns[4] = { gen_helper_sve_movz_b, gen_helper_sve_movz_h, gen_helper_sve_movz_s, gen_helper_sve_movz_d, }; - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); + + if (sve_access_check(s)) { + gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); + } + return true; } static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a, gen_helper_gvec_3 *fn) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - pred_full_reg_offset(s, a->pg), - vsz, vsz, a->imm, fn); + gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); } return true; } @@ -682,7 +673,7 @@ static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For logical shifts, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -700,7 +691,7 @@ static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For logical shifts, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -718,7 +709,7 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For arithmetic right shift for division, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -799,11 +790,7 @@ static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fn); + gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0); } return true; } @@ -977,11 +964,7 @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a) static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, a->imm, fn); + gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); } return true; } @@ -1022,10 +1005,7 @@ static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0); } return true; } @@ -1042,11 +1022,7 @@ static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a) return false; } if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); } return true; } @@ -1068,6 +1044,11 @@ static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, int mofs = pred_full_reg_offset(s, a->rm); int gofs = pred_full_reg_offset(s, a->pg); + if (!a->s) { + tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); + return true; + } + if (psz == 8) { /* Do the operation and the flags generation in temps. */ TCGv_i64 pd = tcg_temp_new_i64(); @@ -1127,19 +1108,24 @@ static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_and_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else if (a->rn == a->rm) { - if (a->pg == a->rn) { - return do_mov_p(s, a->rd, a->rn); - } else { - return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg); + + if (!a->s) { + if (!sve_access_check(s)) { + return true; + } + if (a->rn == a->rm) { + if (a->pg == a->rn) { + do_mov_p(s, a->rd, a->rn); + } else { + gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); + } + return true; + } else if (a->pg == a->rn || a->pg == a->rm) { + gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); + return true; } - } else if (a->pg == a->rn || a->pg == a->rm) { - return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); } + return do_pppp_flags(s, a, &op); } static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1163,13 +1149,14 @@ static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_bic_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else if (a->pg == a->rn) { - return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); + + if (!a->s && a->pg == a->rn) { + if (sve_access_check(s)) { + gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); + } + return true; } + return do_pppp_flags(s, a, &op); } static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1193,41 +1180,22 @@ static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_eor_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } -} - -static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) -{ - tcg_gen_and_i64(pn, pn, pg); - tcg_gen_andc_i64(pm, pm, pg); - tcg_gen_or_i64(pd, pn, pm); -} - -static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, - TCGv_vec pm, TCGv_vec pg) -{ - tcg_gen_and_vec(vece, pn, pn, pg); - tcg_gen_andc_vec(vece, pm, pm, pg); - tcg_gen_or_vec(vece, pd, pn, pm); + return do_pppp_flags(s, a, &op); } static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) { - static const GVecGen4 op = { - .fni8 = gen_sel_pg_i64, - .fniv = gen_sel_pg_vec, - .fno = gen_helper_sve_sel_pppp, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - }; if (a->s) { return false; - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); } + if (sve_access_check(s)) { + unsigned psz = pred_gvec_reg_size(s); + tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), + pred_full_reg_offset(s, a->pg), + pred_full_reg_offset(s, a->rn), + pred_full_reg_offset(s, a->rm), psz, psz); + } + return true; } static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1251,13 +1219,11 @@ static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_orr_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else if (a->pg == a->rn && a->rn == a->rm) { + + if (!a->s && a->pg == a->rn && a->rn == a->rm) { return do_mov_p(s, a->rd, a->rn); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); } + return do_pppp_flags(s, a, &op); } static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1281,11 +1247,7 @@ static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_orn_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } + return do_pppp_flags(s, a, &op); } static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1309,11 +1271,7 @@ static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_nor_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } + return do_pppp_flags(s, a, &op); } static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) @@ -1337,11 +1295,7 @@ static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_nand_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (a->s) { - return do_pppp_flags(s, a, &op); - } else { - return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg); - } + return do_pppp_flags(s, a, &op); } /* @@ -2103,10 +2057,7 @@ static bool trans_REV_v(DisasContext *s, arg_rr_esz *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0); } return true; } @@ -2119,11 +2070,7 @@ static bool trans_TBL(DisasContext *s, arg_rrr_esz *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fns[a->esz]); + gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); } return true; } @@ -2296,11 +2243,7 @@ static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data, gen_helper_gvec_3 *fn) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, data, fn); + gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); } return true; } @@ -2745,12 +2688,8 @@ static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a) static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a) { if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - pred_full_reg_offset(s, a->pg), - vsz, vsz, a->esz, gen_helper_sve_splice); + gen_gvec_ool_zzzp(s, gen_helper_sve_splice, + a->rd, a->rn, a->rm, a->pg, 0); } return true; } @@ -3429,11 +3368,7 @@ static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, 0, fns[a->u][a->sz]); + gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0); } return true; } @@ -3446,11 +3381,7 @@ static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a) }; if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, a->index, fns[a->u][a->sz]); + gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index); } return true; } @@ -5093,8 +5024,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) /* Zero the inactive elements. */ gen_set_label(over); - do_movz_zpz(s, a->rd, a->rd, a->pg, esz); - return true; + return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); } static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, @@ -5877,8 +5807,5 @@ static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a) static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a) { - if (sve_access_check(s)) { - do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz); - } - return true; + return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false); } diff --git a/target/arm/translate.h b/target/arm/translate.h index 6d6d4c0f42..423b0e08df 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -64,6 +64,7 @@ typedef struct DisasContext { * that it is set at the point where we actually touch the FP regs. */ bool fp_access_checked; + bool sve_access_checked; /* ARMv8 single-step state (this is distinct from the QEMU gdbstub * single-step support). */ diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 7d76412ee0..a6c53d2ab6 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -37,19 +37,24 @@ #endif /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */ -static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2, - int16_t src3, uint32_t *sat) +static int16_t do_sqrdmlah_h(int16_t src1, int16_t src2, int16_t src3, + bool neg, bool round, uint32_t *sat) { - /* Simplify: + /* + * Simplify: * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16 * = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15 */ int32_t ret = (int32_t)src1 * src2; - ret = ((int32_t)src3 << 15) + ret + (1 << 14); + if (neg) { + ret = -ret; + } + ret += ((int32_t)src3 << 15) + (round << 14); ret >>= 15; + if (ret != (int16_t)ret) { *sat = 1; - ret = (ret < 0 ? -0x8000 : 0x7fff); + ret = (ret < 0 ? INT16_MIN : INT16_MAX); } return ret; } @@ -58,8 +63,9 @@ uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1, uint32_t src2, uint32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat); - uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat); + uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, false, true, sat); + uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16, + false, true, sat); return deposit32(e1, 16, 16, e2); } @@ -73,35 +79,18 @@ void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 2; ++i) { - d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_h(n[i], m[i], d[i], false, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } -/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */ -static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2, - int16_t src3, uint32_t *sat) -{ - /* Similarly, using subtraction: - * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16 - * = ((a3 << 15) - (e1 * e2) + (1 << 14)) >> 15 - */ - int32_t ret = (int32_t)src1 * src2; - ret = ((int32_t)src3 << 15) - ret + (1 << 14); - ret >>= 15; - if (ret != (int16_t)ret) { - *sat = 1; - ret = (ret < 0 ? -0x8000 : 0x7fff); - } - return ret; -} - uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1, uint32_t src2, uint32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat); - uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat); + uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, true, true, sat); + uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16, + true, true, sat); return deposit32(e1, 16, 16, e2); } @@ -115,19 +104,47 @@ void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 2; ++i) { - d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_h(n[i], m[i], d[i], true, true, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqdmulh_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */ -static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2, - int32_t src3, uint32_t *sat) +static int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3, + bool neg, bool round, uint32_t *sat) { /* Simplify similarly to int_qrdmlah_s16 above. */ int64_t ret = (int64_t)src1 * src2; - ret = ((int64_t)src3 << 31) + ret + (1 << 30); + if (neg) { + ret = -ret; + } + ret += ((int64_t)src3 << 31) + (round << 30); ret >>= 31; + if (ret != (int32_t)ret) { *sat = 1; ret = (ret < 0 ? INT32_MIN : INT32_MAX); @@ -139,7 +156,7 @@ uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1, int32_t src2, int32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - return inl_qrdmlah_s32(src1, src2, src3, sat); + return do_sqrdmlah_s(src1, src2, src3, false, true, sat); } void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm, @@ -152,31 +169,16 @@ void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 4; ++i) { - d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_s(n[i], m[i], d[i], false, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } -/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */ -static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2, - int32_t src3, uint32_t *sat) -{ - /* Simplify similarly to int_qrdmlsh_s16 above. */ - int64_t ret = (int64_t)src1 * src2; - ret = ((int64_t)src3 << 31) - ret + (1 << 30); - ret >>= 31; - if (ret != (int32_t)ret) { - *sat = 1; - ret = (ret < 0 ? INT32_MIN : INT32_MAX); - } - return ret; -} - uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1, int32_t src2, int32_t src3) { uint32_t *sat = &env->vfp.qc[0]; - return inl_qrdmlsh_s32(src1, src2, src3, sat); + return do_sqrdmlah_s(src1, src2, src3, true, true, sat); } void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, @@ -189,7 +191,31 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, uintptr_t i; for (i = 0; i < opr_sz / 4; ++i) { - d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq); + d[i] = do_sqrdmlah_s(n[i], m[i], d[i], true, true, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqdmulh_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int32_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int32_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } @@ -733,6 +759,52 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) */ #define DO_MUL_IDX(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ + intptr_t idx = simd_data(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ + TYPE mm = m[H(i + idx)]; \ + for (j = 0; j < segment; j++) { \ + d[i + j] = n[i + j] * mm; \ + } \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_MUL_IDX(gvec_mul_idx_h, uint16_t, H2) +DO_MUL_IDX(gvec_mul_idx_s, uint32_t, H4) +DO_MUL_IDX(gvec_mul_idx_d, uint64_t, ) + +#undef DO_MUL_IDX + +#define DO_MLA_IDX(NAME, TYPE, OP, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ + intptr_t idx = simd_data(desc); \ + TYPE *d = vd, *n = vn, *m = vm, *a = va; \ + for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ + TYPE mm = m[H(i + idx)]; \ + for (j = 0; j < segment; j++) { \ + d[i + j] = a[i + j] OP n[i + j] * mm; \ + } \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_MLA_IDX(gvec_mla_idx_h, uint16_t, +, H2) +DO_MLA_IDX(gvec_mla_idx_s, uint32_t, +, H4) +DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, ) + +DO_MLA_IDX(gvec_mls_idx_h, uint16_t, -, H2) +DO_MLA_IDX(gvec_mls_idx_s, uint32_t, -, H4) +DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, ) + +#undef DO_MLA_IDX + +#define DO_FMUL_IDX(NAME, TYPE, H) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ @@ -747,11 +819,11 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } -DO_MUL_IDX(gvec_fmul_idx_h, float16, H2) -DO_MUL_IDX(gvec_fmul_idx_s, float32, H4) -DO_MUL_IDX(gvec_fmul_idx_d, float64, ) +DO_FMUL_IDX(gvec_fmul_idx_h, float16, H2) +DO_FMUL_IDX(gvec_fmul_idx_s, float32, H4) +DO_FMUL_IDX(gvec_fmul_idx_d, float64, ) -#undef DO_MUL_IDX +#undef DO_FMUL_IDX #define DO_FMLA_IDX(NAME, TYPE, H) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ |