aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2024-10-31 16:34:25 +0000
committerPeter Maydell <peter.maydell@linaro.org>2024-10-31 16:34:25 +0000
commit92ec7805190313c9e628f8fc4eb4f932c15247bd (patch)
tree8dcef66d9a36386f3e274c919c6618934ba0a761 /hw
parentea8ae47bdd2024dc2596f16b27f27fd4dcc08776 (diff)
parentc128d39edeff337220fc536a3e935bcba01ecb49 (diff)
Merge tag 'pull-riscv-to-apply-20241031-1' of https://github.com/alistair23/qemu into staging
RISC-V PR for 9.2 * Fix an access to VXSAT * Expose RV32 cpu to RV64 QEMU * Don't clear PLIC pending bits on IRQ lowering * Make PLIC zeroth priority register read-only * Set vtype.vill on CPU reset * Check and update APLIC pending when write sourcecfg * Avoid dropping charecters with HTIF * Apply FIFO backpressure to guests using SiFive UART * Support for control flow integrity extensions * Support for the IOMMU with the virt machine * set 'aia_mode' to default in error path * clarify how 'riscv-aia' default works # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEaukCtqfKh31tZZKWr3yVEwxTgBMFAmci/tQACgkQr3yVEwxT # gBNPAQ//dZKjjJm4Sh+UFdUslivBJYtL1rl2UUG2UqiNn/UoYh/vcHoSArljHTjt # 8riEStnaQqXziOpMIJjIMLJ4KoiIk2SMvjNfFtcmPiPZEDEpjsTxfUxBFsBee+fI # 4KNQKKFeljq4pa+VzVvXEqzCNJIzCThFXTZhZmer00M91HPA8ZQIHpv2JL1sWlgZ # /HW24XEDFLGc/JsR55fxpPftlAqP+BfOrqMmbWy7x2Y+G8WI05hM2zTP/W8pnIz3 # z0GCRYSBlADtrp+3RqzTwQfK5pXoFc0iDktWVYlhoXaeEmOwo8IYxTjrvBGhnBq+ # ySX1DzTa23QmOIxSYYvCRuOxyOK9ziNn+EQ9FiFBt1h1o251CYMil1bwmYXMCMNJ # rZwF1HfUx0g2GQW1ZOqh1eeyLO29JiOdV3hxlDO7X4bbISNgU6il5MXmnvf0/XVW # Af3YhALeeDbHgHL1iVfjafzaviQc9+YrEX13eX6N2AjcgE5a3F7XNmGfFpFJ+mfQ # CPgiwVBXat6UpBUGAt14UM+6wzp+crSgQR5IEGth+mKMKdkWoykvo7A2oHdu39zn # 2cdzsshg2qcLLUPTFy06OOTXX382kCWXuykhHOjZ4uu2SJJ7R0W3PlYV8HSde2Vu # Rj+89ZlUSICJNXXweQB39r87hNbtRuDIO22V0B9XrApQbJj6/yE= # =rPaa # -----END PGP SIGNATURE----- # gpg: Signature made Thu 31 Oct 2024 03:51:48 GMT # gpg: using RSA key 6AE902B6A7CA877D6D659296AF7C95130C538013 # gpg: Good signature from "Alistair Francis <alistair@alistair23.me>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 6AE9 02B6 A7CA 877D 6D65 9296 AF7C 9513 0C53 8013 * tag 'pull-riscv-to-apply-20241031-1' of https://github.com/alistair23/qemu: (50 commits) target/riscv: Fix vcompress with rvv_ta_all_1s target/riscv/kvm: clarify how 'riscv-aia' default works target/riscv/kvm: set 'aia_mode' to default in error path docs/specs: add riscv-iommu qtest/riscv-iommu-test: add init queues test hw/riscv/riscv-iommu: add DBG support hw/riscv/riscv-iommu: add ATS support hw/riscv/riscv-iommu: add Address Translation Cache (IOATC) test/qtest: add riscv-iommu-pci tests hw/riscv/virt.c: support for RISC-V IOMMU PCIDevice hotplug hw/riscv: add riscv-iommu-pci reference device pci-ids.rst: add Red Hat pci-id for RISC-V IOMMU device hw/riscv: add RISC-V IOMMU base emulation hw/riscv: add riscv-iommu-bits.h exec/memtxattr: add process identifier to the transaction attributes target/riscv: Expose zicfiss extension as a cpu property disas/riscv: enable disassembly for compressed sspush/sspopchk disas/riscv: enable disassembly for zicfiss instructions target/riscv: compressed encodings for sspush and sspopchk target/riscv: implement zicfiss instructions ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/char/riscv_htif.c12
-rw-r--r--hw/char/sifive_uart.c97
-rw-r--r--hw/intc/riscv_aplic.c51
-rw-r--r--hw/intc/sifive_plic.c15
-rw-r--r--hw/riscv/Kconfig4
-rw-r--r--hw/riscv/boot.c35
-rw-r--r--hw/riscv/meson.build1
-rw-r--r--hw/riscv/riscv-iommu-bits.h421
-rw-r--r--hw/riscv/riscv-iommu-pci.c202
-rw-r--r--hw/riscv/riscv-iommu.c2399
-rw-r--r--hw/riscv/riscv-iommu.h130
-rw-r--r--hw/riscv/sifive_u.c3
-rw-r--r--hw/riscv/trace-events17
-rw-r--r--hw/riscv/trace.h1
-rw-r--r--hw/riscv/virt.c33
15 files changed, 3377 insertions, 44 deletions
diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c
index 54fd55c3e6..0345088e8b 100644
--- a/hw/char/riscv_htif.c
+++ b/hw/char/riscv_htif.c
@@ -217,7 +217,11 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) {
uint8_t ch;
cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1);
- qemu_chr_fe_write(&s->chr, &ch, 1);
+ /*
+ * XXX this blocks entire thread. Rewrite to use
+ * qemu_chr_fe_write and background I/O callbacks
+ */
+ qemu_chr_fe_write_all(&s->chr, &ch, 1);
resp = 0x100 | (uint8_t)payload;
} else {
qemu_log_mask(LOG_UNIMP,
@@ -236,7 +240,11 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
return;
} else if (cmd == HTIF_CONSOLE_CMD_PUTC) {
uint8_t ch = (uint8_t)payload;
- qemu_chr_fe_write(&s->chr, &ch, 1);
+ /*
+ * XXX this blocks entire thread. Rewrite to use
+ * qemu_chr_fe_write and background I/O callbacks
+ */
+ qemu_chr_fe_write_all(&s->chr, &ch, 1);
resp = 0x100 | (uint8_t)payload;
} else {
qemu_log("HTIF device %d: unknown command\n", device);
diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c
index 7fc6787f69..aeb45d3601 100644
--- a/hw/char/sifive_uart.c
+++ b/hw/char/sifive_uart.c
@@ -26,6 +26,8 @@
#include "hw/char/sifive_uart.h"
#include "hw/qdev-properties-system.h"
+#define TX_INTERRUPT_TRIGGER_DELAY_NS 100
+
/*
* Not yet implemented:
*
@@ -64,6 +66,72 @@ static void sifive_uart_update_irq(SiFiveUARTState *s)
}
}
+static gboolean sifive_uart_xmit(void *do_not_use, GIOCondition cond,
+ void *opaque)
+{
+ SiFiveUARTState *s = opaque;
+ int ret;
+ const uint8_t *characters;
+ uint32_t numptr = 0;
+
+ /* instant drain the fifo when there's no back-end */
+ if (!qemu_chr_fe_backend_connected(&s->chr)) {
+ fifo8_reset(&s->tx_fifo);
+ return G_SOURCE_REMOVE;
+ }
+
+ if (fifo8_is_empty(&s->tx_fifo)) {
+ return G_SOURCE_REMOVE;
+ }
+
+ /* Don't pop the FIFO in case the write fails */
+ characters = fifo8_peek_bufptr(&s->tx_fifo,
+ fifo8_num_used(&s->tx_fifo), &numptr);
+ ret = qemu_chr_fe_write(&s->chr, characters, numptr);
+
+ if (ret >= 0) {
+ /* We wrote the data, actually pop the fifo */
+ fifo8_pop_bufptr(&s->tx_fifo, ret, NULL);
+ }
+
+ if (!fifo8_is_empty(&s->tx_fifo)) {
+ guint r = qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
+ sifive_uart_xmit, s);
+ if (!r) {
+ fifo8_reset(&s->tx_fifo);
+ return G_SOURCE_REMOVE;
+ }
+ }
+
+ /* Clear the TX Full bit */
+ if (!fifo8_is_full(&s->tx_fifo)) {
+ s->txfifo &= ~SIFIVE_UART_TXFIFO_FULL;
+ }
+
+ sifive_uart_update_irq(s);
+ return G_SOURCE_REMOVE;
+}
+
+static void sifive_uart_write_tx_fifo(SiFiveUARTState *s, const uint8_t *buf,
+ int size)
+{
+ uint64_t current_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+ if (size > fifo8_num_free(&s->tx_fifo)) {
+ size = fifo8_num_free(&s->tx_fifo);
+ qemu_log_mask(LOG_GUEST_ERROR, "sifive_uart: TX FIFO overflow");
+ }
+
+ fifo8_push_all(&s->tx_fifo, buf, size);
+
+ if (fifo8_is_full(&s->tx_fifo)) {
+ s->txfifo |= SIFIVE_UART_TXFIFO_FULL;
+ }
+
+ timer_mod(s->fifo_trigger_handle, current_time +
+ TX_INTERRUPT_TRIGGER_DELAY_NS);
+}
+
static uint64_t
sifive_uart_read(void *opaque, hwaddr addr, unsigned int size)
{
@@ -82,7 +150,7 @@ sifive_uart_read(void *opaque, hwaddr addr, unsigned int size)
return 0x80000000;
case SIFIVE_UART_TXFIFO:
- return 0; /* Should check tx fifo */
+ return s->txfifo;
case SIFIVE_UART_IE:
return s->ie;
case SIFIVE_UART_IP:
@@ -106,12 +174,10 @@ sifive_uart_write(void *opaque, hwaddr addr,
{
SiFiveUARTState *s = opaque;
uint32_t value = val64;
- unsigned char ch = value;
switch (addr) {
case SIFIVE_UART_TXFIFO:
- qemu_chr_fe_write(&s->chr, &ch, 1);
- sifive_uart_update_irq(s);
+ sifive_uart_write_tx_fifo(s, (uint8_t *) &value, 1);
return;
case SIFIVE_UART_IE:
s->ie = val64;
@@ -131,6 +197,13 @@ sifive_uart_write(void *opaque, hwaddr addr,
__func__, (int)addr, (int)value);
}
+static void fifo_trigger_update(void *opaque)
+{
+ SiFiveUARTState *s = opaque;
+
+ sifive_uart_xmit(NULL, G_IO_OUT, s);
+}
+
static const MemoryRegionOps sifive_uart_ops = {
.read = sifive_uart_read,
.write = sifive_uart_write,
@@ -197,6 +270,9 @@ static void sifive_uart_realize(DeviceState *dev, Error **errp)
{
SiFiveUARTState *s = SIFIVE_UART(dev);
+ s->fifo_trigger_handle = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ fifo_trigger_update, s);
+
qemu_chr_fe_set_handlers(&s->chr, sifive_uart_can_rx, sifive_uart_rx,
sifive_uart_event, sifive_uart_be_change, s,
NULL, true);
@@ -206,12 +282,18 @@ static void sifive_uart_realize(DeviceState *dev, Error **errp)
static void sifive_uart_reset_enter(Object *obj, ResetType type)
{
SiFiveUARTState *s = SIFIVE_UART(obj);
+
+ s->txfifo = 0;
s->ie = 0;
s->ip = 0;
s->txctrl = 0;
s->rxctrl = 0;
s->div = 0;
+
s->rx_fifo_len = 0;
+
+ memset(s->rx_fifo, 0, SIFIVE_UART_RX_FIFO_SIZE);
+ fifo8_create(&s->tx_fifo, SIFIVE_UART_TX_FIFO_SIZE);
}
static void sifive_uart_reset_hold(Object *obj, ResetType type)
@@ -222,8 +304,8 @@ static void sifive_uart_reset_hold(Object *obj, ResetType type)
static const VMStateDescription vmstate_sifive_uart = {
.name = TYPE_SIFIVE_UART,
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
.fields = (const VMStateField[]) {
VMSTATE_UINT8_ARRAY(rx_fifo, SiFiveUARTState,
SIFIVE_UART_RX_FIFO_SIZE),
@@ -233,6 +315,9 @@ static const VMStateDescription vmstate_sifive_uart = {
VMSTATE_UINT32(txctrl, SiFiveUARTState),
VMSTATE_UINT32(rxctrl, SiFiveUARTState),
VMSTATE_UINT32(div, SiFiveUARTState),
+ VMSTATE_UINT32(txfifo, SiFiveUARTState),
+ VMSTATE_FIFO8(tx_fifo, SiFiveUARTState),
+ VMSTATE_TIMER_PTR(fifo_trigger_handle, SiFiveUARTState),
VMSTATE_END_OF_LIST()
},
};
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
index 32edd6d07b..4a262c82f0 100644
--- a/hw/intc/riscv_aplic.c
+++ b/hw/intc/riscv_aplic.c
@@ -159,31 +159,42 @@ static bool is_kvm_aia(bool msimode)
return kvm_irqchip_in_kernel() && msimode;
}
+static bool riscv_aplic_irq_rectified_val(RISCVAPLICState *aplic,
+ uint32_t irq)
+{
+ uint32_t sourcecfg, sm, raw_input, irq_inverted;
+
+ if (!irq || aplic->num_irqs <= irq) {
+ return false;
+ }
+
+ sourcecfg = aplic->sourcecfg[irq];
+ if (sourcecfg & APLIC_SOURCECFG_D) {
+ return false;
+ }
+
+ sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE) {
+ return false;
+ }
+
+ raw_input = (aplic->state[irq] & APLIC_ISTATE_INPUT) ? 1 : 0;
+ irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
+ sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
+
+ return !!(raw_input ^ irq_inverted);
+}
+
static uint32_t riscv_aplic_read_input_word(RISCVAPLICState *aplic,
uint32_t word)
{
- uint32_t i, irq, sourcecfg, sm, raw_input, irq_inverted, ret = 0;
+ uint32_t i, irq, rectified_val, ret = 0;
for (i = 0; i < 32; i++) {
irq = word * 32 + i;
- if (!irq || aplic->num_irqs <= irq) {
- continue;
- }
- sourcecfg = aplic->sourcecfg[irq];
- if (sourcecfg & APLIC_SOURCECFG_D) {
- continue;
- }
-
- sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
- if (sm == APLIC_SOURCECFG_SM_INACTIVE) {
- continue;
- }
-
- raw_input = (aplic->state[irq] & APLIC_ISTATE_INPUT) ? 1 : 0;
- irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
- sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
- ret |= (raw_input ^ irq_inverted) << i;
+ rectified_val = riscv_aplic_irq_rectified_val(aplic, irq);
+ ret |= rectified_val << i;
}
return ret;
@@ -702,6 +713,10 @@ static void riscv_aplic_write(void *opaque, hwaddr addr, uint64_t value,
(aplic->sourcecfg[irq] == 0)) {
riscv_aplic_set_pending_raw(aplic, irq, false);
riscv_aplic_set_enabled_raw(aplic, irq, false);
+ } else {
+ if (riscv_aplic_irq_rectified_val(aplic, irq)) {
+ riscv_aplic_set_pending_raw(aplic, irq, true);
+ }
}
} else if (aplic->mmode && aplic->msimode &&
(addr == APLIC_MMSICFGADDR)) {
diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c
index 7f43e96310..ed74490dba 100644
--- a/hw/intc/sifive_plic.c
+++ b/hw/intc/sifive_plic.c
@@ -189,8 +189,13 @@ static void sifive_plic_write(void *opaque, hwaddr addr, uint64_t value,
if (addr_between(addr, plic->priority_base, plic->num_sources << 2)) {
uint32_t irq = (addr - plic->priority_base) >> 2;
-
- if (((plic->num_priorities + 1) & plic->num_priorities) == 0) {
+ if (irq == 0) {
+ /* IRQ 0 source prioority is reserved */
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Invalid source priority write 0x%"
+ HWADDR_PRIx "\n", __func__, addr);
+ return;
+ } else if (((plic->num_priorities + 1) & plic->num_priorities) == 0) {
/*
* if "num_priorities + 1" is power-of-2, make each register bit of
* interrupt priority WARL (Write-Any-Read-Legal). Just filter
@@ -349,8 +354,10 @@ static void sifive_plic_irq_request(void *opaque, int irq, int level)
{
SiFivePLICState *s = opaque;
- sifive_plic_set_pending(s, irq, level > 0);
- sifive_plic_update(s);
+ if (level > 0) {
+ sifive_plic_set_pending(s, irq, true);
+ sifive_plic_update(s);
+ }
}
static void sifive_plic_realize(DeviceState *dev, Error **errp)
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 44695ff9f2..2e88467c4a 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -1,3 +1,6 @@
+config RISCV_IOMMU
+ bool
+
config RISCV_NUMA
bool
@@ -47,6 +50,7 @@ config RISCV_VIRT
select SERIAL_MM
select RISCV_ACLINT
select RISCV_APLIC
+ select RISCV_IOMMU
select RISCV_IMSIC
select SIFIVE_PLIC
select SIFIVE_TEST
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 9115ecd91f..2e319168db 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -343,27 +343,33 @@ void riscv_load_fdt(hwaddr fdt_addr, void *fdt)
rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize));
}
-void riscv_rom_copy_firmware_info(MachineState *machine, hwaddr rom_base,
- hwaddr rom_size, uint32_t reset_vec_size,
+void riscv_rom_copy_firmware_info(MachineState *machine,
+ RISCVHartArrayState *harts,
+ hwaddr rom_base, hwaddr rom_size,
+ uint32_t reset_vec_size,
uint64_t kernel_entry)
{
+ struct fw_dynamic_info32 dinfo32;
struct fw_dynamic_info dinfo;
size_t dinfo_len;
- if (sizeof(dinfo.magic) == 4) {
- dinfo.magic = cpu_to_le32(FW_DYNAMIC_INFO_MAGIC_VALUE);
- dinfo.version = cpu_to_le32(FW_DYNAMIC_INFO_VERSION);
- dinfo.next_mode = cpu_to_le32(FW_DYNAMIC_INFO_NEXT_MODE_S);
- dinfo.next_addr = cpu_to_le32(kernel_entry);
+ if (riscv_is_32bit(harts)) {
+ dinfo32.magic = cpu_to_le32(FW_DYNAMIC_INFO_MAGIC_VALUE);
+ dinfo32.version = cpu_to_le32(FW_DYNAMIC_INFO_VERSION);
+ dinfo32.next_mode = cpu_to_le32(FW_DYNAMIC_INFO_NEXT_MODE_S);
+ dinfo32.next_addr = cpu_to_le32(kernel_entry);
+ dinfo32.options = 0;
+ dinfo32.boot_hart = 0;
+ dinfo_len = sizeof(dinfo32);
} else {
dinfo.magic = cpu_to_le64(FW_DYNAMIC_INFO_MAGIC_VALUE);
dinfo.version = cpu_to_le64(FW_DYNAMIC_INFO_VERSION);
dinfo.next_mode = cpu_to_le64(FW_DYNAMIC_INFO_NEXT_MODE_S);
dinfo.next_addr = cpu_to_le64(kernel_entry);
+ dinfo.options = 0;
+ dinfo.boot_hart = 0;
+ dinfo_len = sizeof(dinfo);
}
- dinfo.options = 0;
- dinfo.boot_hart = 0;
- dinfo_len = sizeof(dinfo);
/**
* copy the dynamic firmware info. This information is specific to
@@ -375,7 +381,10 @@ void riscv_rom_copy_firmware_info(MachineState *machine, hwaddr rom_base,
exit(1);
}
- rom_add_blob_fixed_as("mrom.finfo", &dinfo, dinfo_len,
+ rom_add_blob_fixed_as("mrom.finfo",
+ riscv_is_32bit(harts) ?
+ (void *)&dinfo32 : (void *)&dinfo,
+ dinfo_len,
rom_base + reset_vec_size,
&address_space_memory);
}
@@ -431,7 +440,9 @@ void riscv_setup_rom_reset_vec(MachineState *machine, RISCVHartArrayState *harts
}
rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec),
rom_base, &address_space_memory);
- riscv_rom_copy_firmware_info(machine, rom_base, rom_size, sizeof(reset_vec),
+ riscv_rom_copy_firmware_info(machine, harts,
+ rom_base, rom_size,
+ sizeof(reset_vec),
kernel_entry);
}
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index f872674093..adbef8a9b2 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -10,5 +10,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c'))
riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c'))
riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
+riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files('riscv-iommu.c', 'riscv-iommu-pci.c'))
hw_arch += {'riscv': riscv_ss}
diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h
new file mode 100644
index 0000000000..6359ae0353
--- /dev/null
+++ b/hw/riscv/riscv-iommu-bits.h
@@ -0,0 +1,421 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2022-2023 Rivos Inc.
+ * Copyright © 2023 FORTH-ICS/CARV
+ * Copyright © 2023 RISC-V IOMMU Task Group
+ *
+ * RISC-V IOMMU - Register Layout and Data Structures.
+ *
+ * Based on the IOMMU spec version 1.0, 3/2023
+ * https://github.com/riscv-non-isa/riscv-iommu
+ */
+
+#ifndef HW_RISCV_IOMMU_BITS_H
+#define HW_RISCV_IOMMU_BITS_H
+
+#define RISCV_IOMMU_SPEC_DOT_VER 0x010
+
+#ifndef GENMASK_ULL
+#define GENMASK_ULL(h, l) (((~0ULL) >> (63 - (h) + (l))) << (l))
+#endif
+
+/*
+ * struct riscv_iommu_fq_record - Fault/Event Queue Record
+ * See section 3.2 for more info.
+ */
+struct riscv_iommu_fq_record {
+ uint64_t hdr;
+ uint64_t _reserved;
+ uint64_t iotval;
+ uint64_t iotval2;
+};
+/* Header fields */
+#define RISCV_IOMMU_FQ_HDR_CAUSE GENMASK_ULL(11, 0)
+#define RISCV_IOMMU_FQ_HDR_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_FQ_HDR_PV BIT_ULL(32)
+#define RISCV_IOMMU_FQ_HDR_TTYPE GENMASK_ULL(39, 34)
+#define RISCV_IOMMU_FQ_HDR_DID GENMASK_ULL(63, 40)
+
+/*
+ * struct riscv_iommu_pq_record - PCIe Page Request record
+ * For more infos on the PCIe Page Request queue see chapter 3.3.
+ */
+struct riscv_iommu_pq_record {
+ uint64_t hdr;
+ uint64_t payload;
+};
+/* Header fields */
+#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32)
+#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33)
+#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34)
+#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40)
+/* Payload fields */
+#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0)
+
+/* Common field positions */
+#define RISCV_IOMMU_PPN_FIELD GENMASK_ULL(53, 10)
+#define RISCV_IOMMU_QUEUE_LOGSZ_FIELD GENMASK_ULL(4, 0)
+#define RISCV_IOMMU_QUEUE_INDEX_FIELD GENMASK_ULL(31, 0)
+#define RISCV_IOMMU_QUEUE_ENABLE BIT(0)
+#define RISCV_IOMMU_QUEUE_INTR_ENABLE BIT(1)
+#define RISCV_IOMMU_QUEUE_MEM_FAULT BIT(8)
+#define RISCV_IOMMU_QUEUE_OVERFLOW BIT(9)
+#define RISCV_IOMMU_QUEUE_ACTIVE BIT(16)
+#define RISCV_IOMMU_QUEUE_BUSY BIT(17)
+#define RISCV_IOMMU_ATP_PPN_FIELD GENMASK_ULL(43, 0)
+#define RISCV_IOMMU_ATP_MODE_FIELD GENMASK_ULL(63, 60)
+
+/* 5.3 IOMMU Capabilities (64bits) */
+#define RISCV_IOMMU_REG_CAP 0x0000
+#define RISCV_IOMMU_CAP_VERSION GENMASK_ULL(7, 0)
+#define RISCV_IOMMU_CAP_SV32 BIT_ULL(8)
+#define RISCV_IOMMU_CAP_SV39 BIT_ULL(9)
+#define RISCV_IOMMU_CAP_SV48 BIT_ULL(10)
+#define RISCV_IOMMU_CAP_SV57 BIT_ULL(11)
+#define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16)
+#define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17)
+#define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18)
+#define RISCV_IOMMU_CAP_SV57X4 BIT_ULL(19)
+#define RISCV_IOMMU_CAP_MSI_FLAT BIT_ULL(22)
+#define RISCV_IOMMU_CAP_MSI_MRIF BIT_ULL(23)
+#define RISCV_IOMMU_CAP_ATS BIT_ULL(25)
+#define RISCV_IOMMU_CAP_T2GPA BIT_ULL(26)
+#define RISCV_IOMMU_CAP_IGS GENMASK_ULL(29, 28)
+#define RISCV_IOMMU_CAP_DBG BIT_ULL(31)
+#define RISCV_IOMMU_CAP_PAS GENMASK_ULL(37, 32)
+#define RISCV_IOMMU_CAP_PD8 BIT_ULL(38)
+#define RISCV_IOMMU_CAP_PD17 BIT_ULL(39)
+#define RISCV_IOMMU_CAP_PD20 BIT_ULL(40)
+
+/* 5.4 Features control register (32bits) */
+#define RISCV_IOMMU_REG_FCTL 0x0008
+#define RISCV_IOMMU_FCTL_BE BIT(0)
+#define RISCV_IOMMU_FCTL_WSI BIT(1)
+#define RISCV_IOMMU_FCTL_GXL BIT(2)
+
+/* 5.5 Device-directory-table pointer (64bits) */
+#define RISCV_IOMMU_REG_DDTP 0x0010
+#define RISCV_IOMMU_DDTP_MODE GENMASK_ULL(3, 0)
+#define RISCV_IOMMU_DDTP_BUSY BIT_ULL(4)
+#define RISCV_IOMMU_DDTP_PPN RISCV_IOMMU_PPN_FIELD
+
+enum riscv_iommu_ddtp_modes {
+ RISCV_IOMMU_DDTP_MODE_OFF = 0,
+ RISCV_IOMMU_DDTP_MODE_BARE = 1,
+ RISCV_IOMMU_DDTP_MODE_1LVL = 2,
+ RISCV_IOMMU_DDTP_MODE_2LVL = 3,
+ RISCV_IOMMU_DDTP_MODE_3LVL = 4,
+ RISCV_IOMMU_DDTP_MODE_MAX = 4
+};
+
+/* 5.6 Command Queue Base (64bits) */
+#define RISCV_IOMMU_REG_CQB 0x0018
+#define RISCV_IOMMU_CQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD
+#define RISCV_IOMMU_CQB_PPN RISCV_IOMMU_PPN_FIELD
+
+/* 5.7 Command Queue head (32bits) */
+#define RISCV_IOMMU_REG_CQH 0x0020
+
+/* 5.8 Command Queue tail (32bits) */
+#define RISCV_IOMMU_REG_CQT 0x0024
+
+/* 5.9 Fault Queue Base (64bits) */
+#define RISCV_IOMMU_REG_FQB 0x0028
+#define RISCV_IOMMU_FQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD
+#define RISCV_IOMMU_FQB_PPN RISCV_IOMMU_PPN_FIELD
+
+/* 5.10 Fault Queue Head (32bits) */
+#define RISCV_IOMMU_REG_FQH 0x0030
+
+/* 5.11 Fault Queue tail (32bits) */
+#define RISCV_IOMMU_REG_FQT 0x0034
+
+/* 5.12 Page Request Queue base (64bits) */
+#define RISCV_IOMMU_REG_PQB 0x0038
+#define RISCV_IOMMU_PQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD
+#define RISCV_IOMMU_PQB_PPN RISCV_IOMMU_PPN_FIELD
+
+/* 5.13 Page Request Queue head (32bits) */
+#define RISCV_IOMMU_REG_PQH 0x0040
+
+/* 5.14 Page Request Queue tail (32bits) */
+#define RISCV_IOMMU_REG_PQT 0x0044
+
+/* 5.15 Command Queue CSR (32bits) */
+#define RISCV_IOMMU_REG_CQCSR 0x0048
+#define RISCV_IOMMU_CQCSR_CQEN RISCV_IOMMU_QUEUE_ENABLE
+#define RISCV_IOMMU_CQCSR_CIE RISCV_IOMMU_QUEUE_INTR_ENABLE
+#define RISCV_IOMMU_CQCSR_CQMF RISCV_IOMMU_QUEUE_MEM_FAULT
+#define RISCV_IOMMU_CQCSR_CMD_TO BIT(9)
+#define RISCV_IOMMU_CQCSR_CMD_ILL BIT(10)
+#define RISCV_IOMMU_CQCSR_FENCE_W_IP BIT(11)
+#define RISCV_IOMMU_CQCSR_CQON RISCV_IOMMU_QUEUE_ACTIVE
+#define RISCV_IOMMU_CQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY
+
+/* 5.16 Fault Queue CSR (32bits) */
+#define RISCV_IOMMU_REG_FQCSR 0x004C
+#define RISCV_IOMMU_FQCSR_FQEN RISCV_IOMMU_QUEUE_ENABLE
+#define RISCV_IOMMU_FQCSR_FIE RISCV_IOMMU_QUEUE_INTR_ENABLE
+#define RISCV_IOMMU_FQCSR_FQMF RISCV_IOMMU_QUEUE_MEM_FAULT
+#define RISCV_IOMMU_FQCSR_FQOF RISCV_IOMMU_QUEUE_OVERFLOW
+#define RISCV_IOMMU_FQCSR_FQON RISCV_IOMMU_QUEUE_ACTIVE
+#define RISCV_IOMMU_FQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY
+
+/* 5.17 Page Request Queue CSR (32bits) */
+#define RISCV_IOMMU_REG_PQCSR 0x0050
+#define RISCV_IOMMU_PQCSR_PQEN RISCV_IOMMU_QUEUE_ENABLE
+#define RISCV_IOMMU_PQCSR_PIE RISCV_IOMMU_QUEUE_INTR_ENABLE
+#define RISCV_IOMMU_PQCSR_PQMF RISCV_IOMMU_QUEUE_MEM_FAULT
+#define RISCV_IOMMU_PQCSR_PQOF RISCV_IOMMU_QUEUE_OVERFLOW
+#define RISCV_IOMMU_PQCSR_PQON RISCV_IOMMU_QUEUE_ACTIVE
+#define RISCV_IOMMU_PQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY
+
+/* 5.18 Interrupt Pending Status (32bits) */
+#define RISCV_IOMMU_REG_IPSR 0x0054
+#define RISCV_IOMMU_IPSR_CIP BIT(0)
+#define RISCV_IOMMU_IPSR_FIP BIT(1)
+#define RISCV_IOMMU_IPSR_PIP BIT(3)
+
+enum {
+ RISCV_IOMMU_INTR_CQ,
+ RISCV_IOMMU_INTR_FQ,
+ RISCV_IOMMU_INTR_PM,
+ RISCV_IOMMU_INTR_PQ,
+ RISCV_IOMMU_INTR_COUNT
+};
+
+/* 5.24 Translation request IOVA (64bits) */
+#define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258
+
+/* 5.25 Translation request control (64bits) */
+#define RISCV_IOMMU_REG_TR_REQ_CTL 0x0260
+#define RISCV_IOMMU_TR_REQ_CTL_GO_BUSY BIT_ULL(0)
+#define RISCV_IOMMU_TR_REQ_CTL_NW BIT_ULL(3)
+#define RISCV_IOMMU_TR_REQ_CTL_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_TR_REQ_CTL_DID GENMASK_ULL(63, 40)
+
+/* 5.26 Translation request response (64bits) */
+#define RISCV_IOMMU_REG_TR_RESPONSE 0x0268
+#define RISCV_IOMMU_TR_RESPONSE_FAULT BIT_ULL(0)
+#define RISCV_IOMMU_TR_RESPONSE_S BIT_ULL(9)
+#define RISCV_IOMMU_TR_RESPONSE_PPN RISCV_IOMMU_PPN_FIELD
+
+/* 5.27 Interrupt cause to vector (64bits) */
+#define RISCV_IOMMU_REG_ICVEC 0x02F8
+#define RISCV_IOMMU_ICVEC_CIV GENMASK_ULL(3, 0)
+#define RISCV_IOMMU_ICVEC_FIV GENMASK_ULL(7, 4)
+#define RISCV_IOMMU_ICVEC_PMIV GENMASK_ULL(11, 8)
+#define RISCV_IOMMU_ICVEC_PIV GENMASK_ULL(15, 12)
+
+/* 5.28 MSI Configuration table (32 * 64bits) */
+#define RISCV_IOMMU_REG_MSI_CONFIG 0x0300
+
+#define RISCV_IOMMU_REG_SIZE 0x1000
+
+#define RISCV_IOMMU_DDTE_VALID BIT_ULL(0)
+#define RISCV_IOMMU_DDTE_PPN RISCV_IOMMU_PPN_FIELD
+
+/* Struct riscv_iommu_dc - Device Context - section 2.1 */
+struct riscv_iommu_dc {
+ uint64_t tc;
+ uint64_t iohgatp;
+ uint64_t ta;
+ uint64_t fsc;
+ uint64_t msiptp;
+ uint64_t msi_addr_mask;
+ uint64_t msi_addr_pattern;
+ uint64_t _reserved;
+};
+
+/* Translation control fields */
+#define RISCV_IOMMU_DC_TC_V BIT_ULL(0)
+#define RISCV_IOMMU_DC_TC_EN_ATS BIT_ULL(1)
+#define RISCV_IOMMU_DC_TC_EN_PRI BIT_ULL(2)
+#define RISCV_IOMMU_DC_TC_T2GPA BIT_ULL(3)
+#define RISCV_IOMMU_DC_TC_DTF BIT_ULL(4)
+#define RISCV_IOMMU_DC_TC_PDTV BIT_ULL(5)
+#define RISCV_IOMMU_DC_TC_PRPR BIT_ULL(6)
+#define RISCV_IOMMU_DC_TC_GADE BIT_ULL(7)
+#define RISCV_IOMMU_DC_TC_SADE BIT_ULL(8)
+#define RISCV_IOMMU_DC_TC_DPE BIT_ULL(9)
+#define RISCV_IOMMU_DC_TC_SBE BIT_ULL(10)
+#define RISCV_IOMMU_DC_TC_SXL BIT_ULL(11)
+
+/* Second-stage (aka G-stage) context fields */
+#define RISCV_IOMMU_DC_IOHGATP_PPN RISCV_IOMMU_ATP_PPN_FIELD
+#define RISCV_IOMMU_DC_IOHGATP_GSCID GENMASK_ULL(59, 44)
+#define RISCV_IOMMU_DC_IOHGATP_MODE RISCV_IOMMU_ATP_MODE_FIELD
+
+enum riscv_iommu_dc_iohgatp_modes {
+ RISCV_IOMMU_DC_IOHGATP_MODE_BARE = 0,
+ RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4 = 8,
+ RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4 = 8,
+ RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4 = 9,
+ RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4 = 10
+};
+
+/* Translation attributes fields */
+#define RISCV_IOMMU_DC_TA_PSCID GENMASK_ULL(31, 12)
+
+/* First-stage context fields */
+#define RISCV_IOMMU_DC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD
+#define RISCV_IOMMU_DC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD
+
+/* Generic I/O MMU command structure - check section 3.1 */
+struct riscv_iommu_command {
+ uint64_t dword0;
+ uint64_t dword1;
+};
+
+#define RISCV_IOMMU_CMD_OPCODE GENMASK_ULL(6, 0)
+#define RISCV_IOMMU_CMD_FUNC GENMASK_ULL(9, 7)
+
+#define RISCV_IOMMU_CMD_IOTINVAL_OPCODE 1
+#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA 0
+#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA 1
+#define RISCV_IOMMU_CMD_IOTINVAL_AV BIT_ULL(10)
+#define RISCV_IOMMU_CMD_IOTINVAL_PSCID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_CMD_IOTINVAL_PSCV BIT_ULL(32)
+#define RISCV_IOMMU_CMD_IOTINVAL_GV BIT_ULL(33)
+#define RISCV_IOMMU_CMD_IOTINVAL_GSCID GENMASK_ULL(59, 44)
+
+#define RISCV_IOMMU_CMD_IOFENCE_OPCODE 2
+#define RISCV_IOMMU_CMD_IOFENCE_FUNC_C 0
+#define RISCV_IOMMU_CMD_IOFENCE_AV BIT_ULL(10)
+#define RISCV_IOMMU_CMD_IOFENCE_DATA GENMASK_ULL(63, 32)
+
+#define RISCV_IOMMU_CMD_IODIR_OPCODE 3
+#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT 0
+#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT 1
+#define RISCV_IOMMU_CMD_IODIR_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_CMD_IODIR_DV BIT_ULL(33)
+#define RISCV_IOMMU_CMD_IODIR_DID GENMASK_ULL(63, 40)
+
+/* 3.1.4 I/O MMU PCIe ATS */
+#define RISCV_IOMMU_CMD_ATS_OPCODE 4
+#define RISCV_IOMMU_CMD_ATS_FUNC_INVAL 0
+#define RISCV_IOMMU_CMD_ATS_FUNC_PRGR 1
+#define RISCV_IOMMU_CMD_ATS_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_CMD_ATS_PV BIT_ULL(32)
+#define RISCV_IOMMU_CMD_ATS_DSV BIT_ULL(33)
+#define RISCV_IOMMU_CMD_ATS_RID GENMASK_ULL(55, 40)
+#define RISCV_IOMMU_CMD_ATS_DSEG GENMASK_ULL(63, 56)
+/* dword1 is the ATS payload, two different payload types for INVAL and PRGR */
+
+/* ATS.PRGR payload */
+#define RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE GENMASK_ULL(47, 44)
+
+enum riscv_iommu_dc_fsc_atp_modes {
+ RISCV_IOMMU_DC_FSC_MODE_BARE = 0,
+ RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 = 8,
+ RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 = 8,
+ RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48 = 9,
+ RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57 = 10,
+ RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8 = 1,
+ RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17 = 2,
+ RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20 = 3
+};
+
+enum riscv_iommu_fq_causes {
+ RISCV_IOMMU_FQ_CAUSE_INST_FAULT = 1,
+ RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED = 4,
+ RISCV_IOMMU_FQ_CAUSE_RD_FAULT = 5,
+ RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED = 6,
+ RISCV_IOMMU_FQ_CAUSE_WR_FAULT = 7,
+ RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S = 12,
+ RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S = 13,
+ RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S = 15,
+ RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS = 20,
+ RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS = 21,
+ RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS = 23,
+ RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED = 256,
+ RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT = 257,
+ RISCV_IOMMU_FQ_CAUSE_DDT_INVALID = 258,
+ RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED = 259,
+ RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED = 260,
+ RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT = 261,
+ RISCV_IOMMU_FQ_CAUSE_MSI_INVALID = 262,
+ RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED = 263,
+ RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT = 264,
+ RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT = 265,
+ RISCV_IOMMU_FQ_CAUSE_PDT_INVALID = 266,
+ RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED = 267,
+ RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED = 268,
+ RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED = 269,
+ RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED = 270,
+ RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED = 271,
+ RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR = 272,
+ RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT = 273,
+ RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED = 274
+};
+
+/* MSI page table pointer */
+#define RISCV_IOMMU_DC_MSIPTP_PPN RISCV_IOMMU_ATP_PPN_FIELD
+#define RISCV_IOMMU_DC_MSIPTP_MODE RISCV_IOMMU_ATP_MODE_FIELD
+#define RISCV_IOMMU_DC_MSIPTP_MODE_OFF 0
+#define RISCV_IOMMU_DC_MSIPTP_MODE_FLAT 1
+
+/* Translation attributes fields */
+#define RISCV_IOMMU_PC_TA_V BIT_ULL(0)
+#define RISCV_IOMMU_PC_TA_RESERVED GENMASK_ULL(63, 32)
+
+/* First stage context fields */
+#define RISCV_IOMMU_PC_FSC_PPN GENMASK_ULL(43, 0)
+#define RISCV_IOMMU_PC_FSC_RESERVED GENMASK_ULL(59, 44)
+
+enum riscv_iommu_fq_ttypes {
+ RISCV_IOMMU_FQ_TTYPE_NONE = 0,
+ RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH = 1,
+ RISCV_IOMMU_FQ_TTYPE_UADDR_RD = 2,
+ RISCV_IOMMU_FQ_TTYPE_UADDR_WR = 3,
+ RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH = 5,
+ RISCV_IOMMU_FQ_TTYPE_TADDR_RD = 6,
+ RISCV_IOMMU_FQ_TTYPE_TADDR_WR = 7,
+ RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ = 8,
+ RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9,
+};
+
+/* Header fields */
+#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32)
+#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33)
+#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34)
+#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40)
+
+/* Payload fields */
+#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0)
+#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1)
+#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2)
+#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0)
+#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3)
+#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12)
+
+
+/*
+ * struct riscv_iommu_msi_pte - MSI Page Table Entry
+ */
+struct riscv_iommu_msi_pte {
+ uint64_t pte;
+ uint64_t mrif_info;
+};
+
+/* Fields on pte */
+#define RISCV_IOMMU_MSI_PTE_V BIT_ULL(0)
+#define RISCV_IOMMU_MSI_PTE_M GENMASK_ULL(2, 1)
+
+#define RISCV_IOMMU_MSI_PTE_M_MRIF 1
+#define RISCV_IOMMU_MSI_PTE_M_BASIC 3
+
+/* When M == 1 (MRIF mode) */
+#define RISCV_IOMMU_MSI_PTE_MRIF_ADDR GENMASK_ULL(53, 7)
+/* When M == 3 (basic mode) */
+#define RISCV_IOMMU_MSI_PTE_PPN RISCV_IOMMU_PPN_FIELD
+#define RISCV_IOMMU_MSI_PTE_C BIT_ULL(63)
+
+/* Fields on mrif_info */
+#define RISCV_IOMMU_MSI_MRIF_NID GENMASK_ULL(9, 0)
+#define RISCV_IOMMU_MSI_MRIF_NPPN RISCV_IOMMU_PPN_FIELD
+#define RISCV_IOMMU_MSI_MRIF_NID_MSB BIT_ULL(60)
+
+#endif /* _RISCV_IOMMU_BITS_H_ */
diff --git a/hw/riscv/riscv-iommu-pci.c b/hw/riscv/riscv-iommu-pci.c
new file mode 100644
index 0000000000..a42242532d
--- /dev/null
+++ b/hw/riscv/riscv-iommu-pci.c
@@ -0,0 +1,202 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/qdev-properties.h"
+#include "hw/riscv/riscv_hart.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/host-utils.h"
+#include "qom/object.h"
+
+#include "cpu_bits.h"
+#include "riscv-iommu.h"
+#include "riscv-iommu-bits.h"
+
+/* RISC-V IOMMU PCI Device Emulation */
+#define RISCV_PCI_CLASS_SYSTEM_IOMMU 0x0806
+
+/*
+ * 4 MSIx vectors for ICVEC, one for MRIF. The spec mentions in
+ * the "Placement and data flow" section that:
+ *
+ * "The interfaces related to recording an incoming MSI in a memory-resident
+ * interrupt file (MRIF) are implementation-specific. The partitioning of
+ * responsibility between the IOMMU and the IO bridge for recording the
+ * incoming MSI in an MRIF and generating the associated notice MSI are
+ * implementation-specific."
+ *
+ * We're making a design decision to create the MSIx for MRIF in the
+ * IOMMU MSIx emulation.
+ */
+#define RISCV_IOMMU_PCI_MSIX_VECTORS 5
+
+/*
+ * 4 vectors that can be used by civ, fiv, pmiv and piv. Number of
+ * vectors is represented by 2^N, where N = number of writable bits
+ * in each cause. For 4 vectors we'll write 0b11 (3) in each reg.
+ */
+#define RISCV_IOMMU_PCI_ICVEC_VECTORS 0x3333
+
+typedef struct RISCVIOMMUStatePci {
+ PCIDevice pci; /* Parent PCIe device state */
+ uint16_t vendor_id;
+ uint16_t device_id;
+ uint8_t revision;
+ MemoryRegion bar0; /* PCI BAR (including MSI-x config) */
+ RISCVIOMMUState iommu; /* common IOMMU state */
+} RISCVIOMMUStatePci;
+
+/* interrupt delivery callback */
+static void riscv_iommu_pci_notify(RISCVIOMMUState *iommu, unsigned vector)
+{
+ RISCVIOMMUStatePci *s = container_of(iommu, RISCVIOMMUStatePci, iommu);
+
+ if (msix_enabled(&(s->pci))) {
+ msix_notify(&(s->pci), vector);
+ }
+}
+
+static void riscv_iommu_pci_realize(PCIDevice *dev, Error **errp)
+{
+ RISCVIOMMUStatePci *s = DO_UPCAST(RISCVIOMMUStatePci, pci, dev);
+ RISCVIOMMUState *iommu = &s->iommu;
+ uint8_t *pci_conf = dev->config;
+ Error *err = NULL;
+
+ pci_set_word(pci_conf + PCI_VENDOR_ID, s->vendor_id);
+ pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, s->vendor_id);
+ pci_set_word(pci_conf + PCI_DEVICE_ID, s->device_id);
+ pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, s->device_id);
+ pci_set_byte(pci_conf + PCI_REVISION_ID, s->revision);
+
+ /* Set device id for trace / debug */
+ DEVICE(iommu)->id = g_strdup_printf("%02x:%02x.%01x",
+ pci_dev_bus_num(dev), PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+ qdev_realize(DEVICE(iommu), NULL, errp);
+
+ memory_region_init(&s->bar0, OBJECT(s), "riscv-iommu-bar0",
+ QEMU_ALIGN_UP(memory_region_size(&iommu->regs_mr), TARGET_PAGE_SIZE));
+ memory_region_add_subregion(&s->bar0, 0, &iommu->regs_mr);
+
+ pcie_endpoint_cap_init(dev, 0);
+
+ pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, &s->bar0);
+
+ int ret = msix_init(dev, RISCV_IOMMU_PCI_MSIX_VECTORS,
+ &s->bar0, 0, RISCV_IOMMU_REG_MSI_CONFIG,
+ &s->bar0, 0, RISCV_IOMMU_REG_MSI_CONFIG + 256, 0, &err);
+
+ if (ret == -ENOTSUP) {
+ /*
+ * MSI-x is not supported by the platform.
+ * Driver should use timer/polling based notification handlers.
+ */
+ warn_report_err(err);
+ } else if (ret < 0) {
+ error_propagate(errp, err);
+ return;
+ } else {
+ /* Mark all ICVEC MSIx vectors as used */
+ for (int i = 0; i < RISCV_IOMMU_PCI_MSIX_VECTORS; i++) {
+ msix_vector_use(dev, i);
+ }
+
+ iommu->notify = riscv_iommu_pci_notify;
+ }
+
+ PCIBus *bus = pci_device_root_bus(dev);
+ if (!bus) {
+ error_setg(errp, "can't find PCIe root port for %02x:%02x.%x",
+ pci_bus_num(pci_get_bus(dev)), PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+ return;
+ }
+
+ riscv_iommu_pci_setup_iommu(iommu, bus, errp);
+}
+
+static void riscv_iommu_pci_exit(PCIDevice *pci_dev)
+{
+ pci_setup_iommu(pci_device_root_bus(pci_dev), NULL, NULL);
+}
+
+static const VMStateDescription riscv_iommu_vmstate = {
+ .name = "riscv-iommu",
+ .unmigratable = 1
+};
+
+static void riscv_iommu_pci_init(Object *obj)
+{
+ RISCVIOMMUStatePci *s = RISCV_IOMMU_PCI(obj);
+ RISCVIOMMUState *iommu = &s->iommu;
+
+ object_initialize_child(obj, "iommu", iommu, TYPE_RISCV_IOMMU);
+ qdev_alias_all_properties(DEVICE(iommu), obj);
+
+ iommu->icvec_avail_vectors = RISCV_IOMMU_PCI_ICVEC_VECTORS;
+}
+
+static Property riscv_iommu_pci_properties[] = {
+ DEFINE_PROP_UINT16("vendor-id", RISCVIOMMUStatePci, vendor_id,
+ PCI_VENDOR_ID_REDHAT),
+ DEFINE_PROP_UINT16("device-id", RISCVIOMMUStatePci, device_id,
+ PCI_DEVICE_ID_REDHAT_RISCV_IOMMU),
+ DEFINE_PROP_UINT8("revision", RISCVIOMMUStatePci, revision, 0x01),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void riscv_iommu_pci_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+ k->realize = riscv_iommu_pci_realize;
+ k->exit = riscv_iommu_pci_exit;
+ k->class_id = RISCV_PCI_CLASS_SYSTEM_IOMMU;
+ dc->desc = "RISCV-IOMMU DMA Remapping device";
+ dc->vmsd = &riscv_iommu_vmstate;
+ dc->hotpluggable = false;
+ dc->user_creatable = true;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ device_class_set_props(dc, riscv_iommu_pci_properties);
+}
+
+static const TypeInfo riscv_iommu_pci = {
+ .name = TYPE_RISCV_IOMMU_PCI,
+ .parent = TYPE_PCI_DEVICE,
+ .class_init = riscv_iommu_pci_class_init,
+ .instance_init = riscv_iommu_pci_init,
+ .instance_size = sizeof(RISCVIOMMUStatePci),
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_PCIE_DEVICE },
+ { },
+ },
+};
+
+static void riscv_iommu_register_pci_types(void)
+{
+ type_register_static(&riscv_iommu_pci);
+}
+
+type_init(riscv_iommu_register_pci_types);
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
new file mode 100644
index 0000000000..feb650549a
--- /dev/null
+++ b/hw/riscv/riscv-iommu.c
@@ -0,0 +1,2399 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU
+ *
+ * Copyright (C) 2021-2023, Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_device.h"
+#include "hw/qdev-properties.h"
+#include "hw/riscv/riscv_hart.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/timer.h"
+
+#include "cpu_bits.h"
+#include "riscv-iommu.h"
+#include "riscv-iommu-bits.h"
+#include "trace.h"
+
+#define LIMIT_CACHE_CTX (1U << 7)
+#define LIMIT_CACHE_IOT (1U << 20)
+
+/* Physical page number coversions */
+#define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS)
+#define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS)
+
+typedef struct RISCVIOMMUContext RISCVIOMMUContext;
+typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
+
+/* Device assigned I/O address space */
+struct RISCVIOMMUSpace {
+ IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */
+ AddressSpace iova_as; /* IOVA address space for attached device */
+ RISCVIOMMUState *iommu; /* Managing IOMMU device state */
+ uint32_t devid; /* Requester identifier, AKA device_id */
+ bool notifier; /* IOMMU unmap notifier enabled */
+ QLIST_ENTRY(RISCVIOMMUSpace) list;
+};
+
+/* Device translation context state. */
+struct RISCVIOMMUContext {
+ uint64_t devid:24; /* Requester Id, AKA device_id */
+ uint64_t process_id:20; /* Process ID. PASID for PCIe */
+ uint64_t tc; /* Translation Control */
+ uint64_t ta; /* Translation Attributes */
+ uint64_t satp; /* S-Stage address translation and protection */
+ uint64_t gatp; /* G-Stage address translation and protection */
+ uint64_t msi_addr_mask; /* MSI filtering - address mask */
+ uint64_t msi_addr_pattern; /* MSI filtering - address pattern */
+ uint64_t msiptp; /* MSI redirection page table pointer */
+};
+
+/* Address translation cache entry */
+struct RISCVIOMMUEntry {
+ uint64_t iova:44; /* IOVA Page Number */
+ uint64_t pscid:20; /* Process Soft-Context identifier */
+ uint64_t phys:44; /* Physical Page Number */
+ uint64_t gscid:16; /* Guest Soft-Context identifier */
+ uint64_t perm:2; /* IOMMU_RW flags */
+};
+
+/* IOMMU index for transactions without process_id specified. */
+#define RISCV_IOMMU_NOPROCID 0
+
+static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
+{
+ switch (vec_type) {
+ case RISCV_IOMMU_INTR_CQ:
+ return icvec & RISCV_IOMMU_ICVEC_CIV;
+ case RISCV_IOMMU_INTR_FQ:
+ return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4;
+ case RISCV_IOMMU_INTR_PM:
+ return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8;
+ case RISCV_IOMMU_INTR_PQ:
+ return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
+{
+ const uint32_t fctl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FCTL);
+ uint32_t ipsr, icvec, vector;
+
+ if (fctl & RISCV_IOMMU_FCTL_WSI || !s->notify) {
+ return;
+ }
+
+ icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC);
+ ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0);
+
+ if (!(ipsr & (1 << vec_type))) {
+ vector = riscv_iommu_get_icvec_vector(icvec, vec_type);
+ s->notify(s, vector);
+ trace_riscv_iommu_notify_int_vector(vec_type, vector);
+ }
+}
+
+static void riscv_iommu_fault(RISCVIOMMUState *s,
+ struct riscv_iommu_fq_record *ev)
+{
+ uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
+ uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask;
+ uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask;
+ uint32_t next = (tail + 1) & s->fq_mask;
+ uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID);
+
+ trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid), ev->hdr, ev->iotval);
+
+ if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) ||
+ !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) {
+ return;
+ }
+
+ if (head == next) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
+ RISCV_IOMMU_FQCSR_FQOF, 0);
+ } else {
+ dma_addr_t addr = s->fq_addr + tail * sizeof(*ev);
+ if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev),
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
+ RISCV_IOMMU_FQCSR_FQMF, 0);
+ } else {
+ riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next);
+ }
+ }
+
+ if (ctrl & RISCV_IOMMU_FQCSR_FIE) {
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ);
+ }
+}
+
+static void riscv_iommu_pri(RISCVIOMMUState *s,
+ struct riscv_iommu_pq_record *pr)
+{
+ uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
+ uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask;
+ uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask;
+ uint32_t next = (tail + 1) & s->pq_mask;
+ uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID);
+
+ trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid), pr->payload);
+
+ if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) ||
+ !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) {
+ return;
+ }
+
+ if (head == next) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
+ RISCV_IOMMU_PQCSR_PQOF, 0);
+ } else {
+ dma_addr_t addr = s->pq_addr + tail * sizeof(*pr);
+ if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr),
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
+ RISCV_IOMMU_PQCSR_PQMF, 0);
+ } else {
+ riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next);
+ }
+ }
+
+ if (ctrl & RISCV_IOMMU_PQCSR_PIE) {
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ);
+ }
+}
+
+/* Portable implementation of pext_u64, bit-mask extraction. */
+static uint64_t _pext_u64(uint64_t val, uint64_t ext)
+{
+ uint64_t ret = 0;
+ uint64_t rot = 1;
+
+ while (ext) {
+ if (ext & 1) {
+ if (val & 1) {
+ ret |= rot;
+ }
+ rot <<= 1;
+ }
+ val >>= 1;
+ ext >>= 1;
+ }
+
+ return ret;
+}
+
+/* Check if GPA matches MSI/MRIF pattern. */
+static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+ dma_addr_t gpa)
+{
+ if (!s->enable_msi) {
+ return false;
+ }
+
+ if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) !=
+ RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
+ return false; /* Invalid MSI/MRIF mode */
+ }
+
+ if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) {
+ return false; /* GPA not in MSI range defined by AIA IMSIC rules. */
+ }
+
+ return true;
+}
+
+/*
+ * RISCV IOMMU Address Translation Lookup - Page Table Walk
+ *
+ * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c
+ * Both implementation can be merged into single helper function in future.
+ * Keeping them separate for now, as error reporting and flow specifics are
+ * sufficiently different for separate implementation.
+ *
+ * @s : IOMMU Device State
+ * @ctx : Translation context for device id and process address space id.
+ * @iotlb : translation data: physical address and access mode.
+ * @return : success or fault cause code.
+ */
+static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+ IOMMUTLBEntry *iotlb)
+{
+ dma_addr_t addr, base;
+ uint64_t satp, gatp, pte;
+ bool en_s, en_g;
+ struct {
+ unsigned char step;
+ unsigned char levels;
+ unsigned char ptidxbits;
+ unsigned char ptesize;
+ } sc[2];
+ /* Translation stage phase */
+ enum {
+ S_STAGE = 0,
+ G_STAGE = 1,
+ } pass;
+ MemTxResult ret;
+
+ satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
+ gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
+
+ en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE;
+ en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE;
+
+ /*
+ * Early check for MSI address match when IOVA == GPA.
+ * Note that the (!en_s) condition means that the MSI
+ * page table may only be used when guest pages are
+ * mapped using the g-stage page table, whether single-
+ * or two-stage paging is enabled. It's unavoidable though,
+ * because the spec mandates that we do a first-stage
+ * translation before we check the MSI page table, which
+ * means we can't do an early MSI check unless we have
+ * strictly !en_s.
+ */
+ if (!en_s && (iotlb->perm & IOMMU_WO) &&
+ riscv_iommu_msi_check(s, ctx, iotlb->iova)) {
+ iotlb->target_as = &s->trap_as;
+ iotlb->translated_addr = iotlb->iova;
+ iotlb->addr_mask = ~TARGET_PAGE_MASK;
+ return 0;
+ }
+
+ /* Exit early for pass-through mode. */
+ if (!(en_s || en_g)) {
+ iotlb->translated_addr = iotlb->iova;
+ iotlb->addr_mask = ~TARGET_PAGE_MASK;
+ /* Allow R/W in pass-through mode */
+ iotlb->perm = IOMMU_RW;
+ return 0;
+ }
+
+ /* S/G translation parameters. */
+ for (pass = 0; pass < 2; pass++) {
+ uint32_t sv_mode;
+
+ sc[pass].step = 0;
+ if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) :
+ (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) {
+ /* 32bit mode for GXL/SXL == 1 */
+ switch (pass ? gatp : satp) {
+ case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
+ sc[pass].levels = 0;
+ sc[pass].ptidxbits = 0;
+ sc[pass].ptesize = 0;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
+ sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32;
+ if (!(s->cap & sv_mode)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ sc[pass].levels = 2;
+ sc[pass].ptidxbits = 10;
+ sc[pass].ptesize = 4;
+ break;
+ default:
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ } else {
+ /* 64bit mode for GXL/SXL == 0 */
+ switch (pass ? gatp : satp) {
+ case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
+ sc[pass].levels = 0;
+ sc[pass].ptidxbits = 0;
+ sc[pass].ptesize = 0;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
+ sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39;
+ if (!(s->cap & sv_mode)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ sc[pass].levels = 3;
+ sc[pass].ptidxbits = 9;
+ sc[pass].ptesize = 8;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
+ sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48;
+ if (!(s->cap & sv_mode)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ sc[pass].levels = 4;
+ sc[pass].ptidxbits = 9;
+ sc[pass].ptesize = 8;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
+ sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57;
+ if (!(s->cap & sv_mode)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ sc[pass].levels = 5;
+ sc[pass].ptidxbits = 9;
+ sc[pass].ptesize = 8;
+ break;
+ default:
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ }
+ };
+
+ /* S/G stages translation tables root pointers */
+ gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
+ satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD));
+ addr = (en_s && en_g) ? satp : iotlb->iova;
+ base = en_g ? gatp : satp;
+ pass = en_g ? G_STAGE : S_STAGE;
+
+ do {
+ const unsigned widened = (pass && !sc[pass].step) ? 2 : 0;
+ const unsigned va_bits = widened + sc[pass].ptidxbits;
+ const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits *
+ (sc[pass].levels - 1 - sc[pass].step);
+ const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
+ const dma_addr_t pte_addr = base + idx * sc[pass].ptesize;
+ const bool ade =
+ ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE);
+
+ /* Address range check before first level lookup */
+ if (!sc[pass].step) {
+ const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1;
+ if ((addr & va_mask) != addr) {
+ return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
+ }
+ }
+
+ /* Read page table entry */
+ if (sc[pass].ptesize == 4) {
+ uint32_t pte32 = 0;
+ ret = ldl_le_dma(s->target_as, pte_addr, &pte32,
+ MEMTXATTRS_UNSPECIFIED);
+ pte = pte32;
+ } else {
+ ret = ldq_le_dma(s->target_as, pte_addr, &pte,
+ MEMTXATTRS_UNSPECIFIED);
+ }
+ if (ret != MEMTX_OK) {
+ return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT
+ : RISCV_IOMMU_FQ_CAUSE_RD_FAULT;
+ }
+
+ sc[pass].step++;
+ hwaddr ppn = pte >> PTE_PPN_SHIFT;
+
+ if (!(pte & PTE_V)) {
+ break; /* Invalid PTE */
+ } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
+ base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
+ } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
+ break; /* Reserved leaf PTE flags: PTE_W */
+ } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
+ break; /* Reserved leaf PTE flags: PTE_W + PTE_X */
+ } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
+ break; /* Misaligned PPN */
+ } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) {
+ break; /* Read access check failed */
+ } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) {
+ break; /* Write access check failed */
+ } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) {
+ break; /* Access bit not set */
+ } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) {
+ break; /* Dirty bit not set */
+ } else {
+ /* Leaf PTE, translation completed. */
+ sc[pass].step = sc[pass].levels;
+ base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
+ /* Update address mask based on smallest translation granularity */
+ iotlb->addr_mask &= (1ULL << va_skip) - 1;
+ /* Continue with S-Stage translation? */
+ if (pass && sc[0].step != sc[0].levels) {
+ pass = S_STAGE;
+ addr = iotlb->iova;
+ continue;
+ }
+ /* Translation phase completed (GPA or SPA) */
+ iotlb->translated_addr = base;
+ iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO)
+ : IOMMU_RO;
+
+ /* Check MSI GPA address match */
+ if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) &&
+ riscv_iommu_msi_check(s, ctx, base)) {
+ /* Trap MSI writes and return GPA address. */
+ iotlb->target_as = &s->trap_as;
+ iotlb->addr_mask = ~TARGET_PAGE_MASK;
+ return 0;
+ }
+
+ /* Continue with G-Stage translation? */
+ if (!pass && en_g) {
+ pass = G_STAGE;
+ addr = base;
+ base = gatp;
+ sc[pass].step = 0;
+ continue;
+ }
+
+ return 0;
+ }
+
+ if (sc[pass].step == sc[pass].levels) {
+ break; /* Can't find leaf PTE */
+ }
+
+ /* Continue with G-Stage translation? */
+ if (!pass && en_g) {
+ pass = G_STAGE;
+ addr = base;
+ base = gatp;
+ sc[pass].step = 0;
+ }
+ } while (1);
+
+ return (iotlb->perm & IOMMU_WO) ?
+ (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
+ RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) :
+ (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS :
+ RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S);
+}
+
+static void riscv_iommu_report_fault(RISCVIOMMUState *s,
+ RISCVIOMMUContext *ctx,
+ uint32_t fault_type, uint32_t cause,
+ bool pv,
+ uint64_t iotval, uint64_t iotval2)
+{
+ struct riscv_iommu_fq_record ev = { 0 };
+
+ if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) {
+ switch (cause) {
+ case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED:
+ case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT:
+ case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID:
+ case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED:
+ case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED:
+ case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR:
+ case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT:
+ break;
+ default:
+ /* DTF prevents reporting a fault for this given cause */
+ return;
+ }
+ }
+
+ ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause);
+ ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type);
+ ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid);
+ ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true);
+
+ if (pv) {
+ ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id);
+ }
+
+ ev.iotval = iotval;
+ ev.iotval2 = iotval2;
+
+ riscv_iommu_fault(s, &ev);
+}
+
+/* Redirect MSI write for given GPA. */
+static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
+ RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data,
+ unsigned size, MemTxAttrs attrs)
+{
+ MemTxResult res;
+ dma_addr_t addr;
+ uint64_t intn;
+ uint32_t n190;
+ uint64_t pte[2];
+ int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
+ int cause;
+
+ /* Interrupt File Number */
+ intn = _pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask);
+ if (intn >= 256) {
+ /* Interrupt file number out of range */
+ res = MEMTX_ACCESS_ERROR;
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
+ goto err;
+ }
+
+ /* fetch MSI PTE */
+ addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
+ addr = addr | (intn * sizeof(pte));
+ res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte),
+ MEMTXATTRS_UNSPECIFIED);
+ if (res != MEMTX_OK) {
+ if (res == MEMTX_DECODE_ERROR) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED;
+ } else {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
+ }
+ goto err;
+ }
+
+ le64_to_cpus(&pte[0]);
+ le64_to_cpus(&pte[1]);
+
+ if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) {
+ /*
+ * The spec mentions that: "If msipte.C == 1, then further
+ * processing to interpret the PTE is implementation
+ * defined.". We'll abort with cause = 262 for this
+ * case too.
+ */
+ res = MEMTX_ACCESS_ERROR;
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID;
+ goto err;
+ }
+
+ switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) {
+ case RISCV_IOMMU_MSI_PTE_M_BASIC:
+ /* MSI Pass-through mode */
+ addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN));
+
+ trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
+ PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
+ gpa, addr);
+
+ res = dma_memory_write(s->target_as, addr, &data, size, attrs);
+ if (res != MEMTX_OK) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
+ goto err;
+ }
+
+ return MEMTX_OK;
+ case RISCV_IOMMU_MSI_PTE_M_MRIF:
+ /* MRIF mode, continue. */
+ break;
+ default:
+ res = MEMTX_ACCESS_ERROR;
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
+ goto err;
+ }
+
+ /*
+ * Report an error for interrupt identities exceeding the maximum allowed
+ * for an IMSIC interrupt file (2047) or destination address is not 32-bit
+ * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables.
+ */
+ if ((data > 2047) || (gpa & 3)) {
+ res = MEMTX_ACCESS_ERROR;
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
+ goto err;
+ }
+
+ /* MSI MRIF mode, non atomic pending bit update */
+
+ /* MRIF pending bit address */
+ addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9;
+ addr = addr | ((data & 0x7c0) >> 3);
+
+ trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
+ PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
+ gpa, addr);
+
+ /* MRIF pending bit mask */
+ data = 1ULL << (data & 0x03f);
+ res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
+ if (res != MEMTX_OK) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
+ goto err;
+ }
+
+ intn = intn | data;
+ res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs);
+ if (res != MEMTX_OK) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
+ goto err;
+ }
+
+ /* Get MRIF enable bits */
+ addr = addr + sizeof(intn);
+ res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
+ if (res != MEMTX_OK) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
+ goto err;
+ }
+
+ if (!(intn & data)) {
+ /* notification disabled, MRIF update completed. */
+ return MEMTX_OK;
+ }
+
+ /* Send notification message */
+ addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN));
+ n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) |
+ (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10);
+
+ res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs);
+ if (res != MEMTX_OK) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
+ goto err;
+ }
+
+ trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr);
+
+ return MEMTX_OK;
+
+err:
+ riscv_iommu_report_fault(s, ctx, fault_type, cause,
+ !!ctx->process_id, 0, 0);
+ return res;
+}
+
+/*
+ * Check device context configuration as described by the
+ * riscv-iommu spec section "Device-context configuration
+ * checks".
+ */
+static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s,
+ RISCVIOMMUContext *ctx)
+{
+ uint32_t fsc_mode, msi_mode;
+ uint64_t gatp;
+
+ if (!(s->cap & RISCV_IOMMU_CAP_ATS) &&
+ (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS ||
+ ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI ||
+ ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) {
+ return false;
+ }
+
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) &&
+ (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA ||
+ ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) {
+ return false;
+ }
+
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) &&
+ ctx->tc & RISCV_IOMMU_DC_TC_PRPR) {
+ return false;
+ }
+
+ if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) &&
+ ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) {
+ return false;
+ }
+
+ if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) {
+ msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE);
+
+ if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF &&
+ msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
+ return false;
+ }
+ }
+
+ gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
+ if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA &&
+ gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
+ return false;
+ }
+
+ fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
+
+ if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) {
+ switch (fsc_mode) {
+ case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8:
+ if (!(s->cap & RISCV_IOMMU_CAP_PD8)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17:
+ if (!(s->cap & RISCV_IOMMU_CAP_PD17)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20:
+ if (!(s->cap & RISCV_IOMMU_CAP_PD20)) {
+ return false;
+ }
+ break;
+ }
+ } else {
+ /* DC.tc.PDTV is 0 */
+ if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) {
+ return false;
+ }
+
+ if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
+ if (fsc_mode == RISCV_IOMMU_CAP_SV32 &&
+ !(s->cap & RISCV_IOMMU_CAP_SV32)) {
+ return false;
+ }
+ } else {
+ switch (fsc_mode) {
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
+ return false;
+ }
+ break;
+ }
+ }
+ }
+
+ /*
+ * CAP_END is always zero (only one endianess). FCTL_BE is
+ * always zero (little-endian accesses). Thus TC_SBE must
+ * always be LE, i.e. zero.
+ */
+ if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Validate process context (PC) according to section
+ * "Process-context configuration checks".
+ */
+static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
+ RISCVIOMMUContext *ctx)
+{
+ uint32_t mode;
+
+ if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) {
+ return false;
+ }
+
+ if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) {
+ return false;
+ }
+
+ mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
+ switch (mode) {
+ case RISCV_IOMMU_DC_FSC_MODE_BARE:
+ /* sv39 and sv32 modes have the same value (8) */
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
+ break;
+ default:
+ return false;
+ }
+
+ if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
+ if (mode == RISCV_IOMMU_CAP_SV32 &&
+ !(s->cap & RISCV_IOMMU_CAP_SV32)) {
+ return false;
+ }
+ } else {
+ switch (mode) {
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
+ return false;
+ }
+ break;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
+ *
+ * @s : IOMMU Device State
+ * @ctx : Device Translation Context with devid and process_id set.
+ * @return : success or fault code.
+ */
+static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
+{
+ const uint64_t ddtp = s->ddtp;
+ unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE);
+ dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN));
+ struct riscv_iommu_dc dc;
+ /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */
+ const int dc_fmt = !s->enable_msi;
+ const size_t dc_len = sizeof(dc) >> dc_fmt;
+ unsigned depth;
+ uint64_t de;
+
+ switch (mode) {
+ case RISCV_IOMMU_DDTP_MODE_OFF:
+ return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
+
+ case RISCV_IOMMU_DDTP_MODE_BARE:
+ /* mock up pass-through translation context */
+ ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
+ RISCV_IOMMU_DC_IOHGATP_MODE_BARE);
+ ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
+ RISCV_IOMMU_DC_FSC_MODE_BARE);
+
+ ctx->tc = RISCV_IOMMU_DC_TC_V;
+ if (s->enable_ats) {
+ ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS;
+ }
+
+ ctx->ta = 0;
+ ctx->msiptp = 0;
+ return 0;
+
+ case RISCV_IOMMU_DDTP_MODE_1LVL:
+ depth = 0;
+ break;
+
+ case RISCV_IOMMU_DDTP_MODE_2LVL:
+ depth = 1;
+ break;
+
+ case RISCV_IOMMU_DDTP_MODE_3LVL:
+ depth = 2;
+ break;
+
+ default:
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+
+ /*
+ * Check supported device id width (in bits).
+ * See IOMMU Specification, Chapter 6. Software guidelines.
+ * - if extended device-context format is used:
+ * 1LVL: 6, 2LVL: 15, 3LVL: 24
+ * - if base device-context format is used:
+ * 1LVL: 7, 2LVL: 16, 3LVL: 24
+ */
+ if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) {
+ return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
+ }
+
+ /* Device directory tree walk */
+ for (; depth-- > 0; ) {
+ /*
+ * Select device id index bits based on device directory tree level
+ * and device context format.
+ * See IOMMU Specification, Chapter 2. Data Structures.
+ * - if extended device-context format is used:
+ * device index: [23:15][14:6][5:0]
+ * - if base device-context format is used:
+ * device index: [23:16][15:7][6:0]
+ */
+ const int split = depth * 9 + 6 + dc_fmt;
+ addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK;
+ if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
+ }
+ le64_to_cpus(&de);
+ if (!(de & RISCV_IOMMU_DDTE_VALID)) {
+ /* invalid directory entry */
+ return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
+ }
+ if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) {
+ /* reserved bits set */
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
+ }
+
+ /* index into device context entry page */
+ addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
+
+ memset(&dc, 0, sizeof(dc));
+ if (dma_memory_read(s->target_as, addr, &dc, dc_len,
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
+ }
+
+ /* Set translation context. */
+ ctx->tc = le64_to_cpu(dc.tc);
+ ctx->gatp = le64_to_cpu(dc.iohgatp);
+ ctx->satp = le64_to_cpu(dc.fsc);
+ ctx->ta = le64_to_cpu(dc.ta);
+ ctx->msiptp = le64_to_cpu(dc.msiptp);
+ ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask);
+ ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern);
+
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
+ }
+
+ if (!riscv_iommu_validate_device_ctx(s, ctx)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+
+ /* FSC field checks */
+ mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
+ addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN));
+
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) {
+ if (ctx->process_id != RISCV_IOMMU_NOPROCID) {
+ /* PID is disabled */
+ return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
+ }
+ if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) {
+ /* Invalid translation mode */
+ return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
+ }
+ return 0;
+ }
+
+ if (ctx->process_id == RISCV_IOMMU_NOPROCID) {
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) {
+ /* No default process_id enabled, set BARE mode */
+ ctx->satp = 0ULL;
+ return 0;
+ } else {
+ /* Use default process_id #0 */
+ ctx->process_id = 0;
+ }
+ }
+
+ if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) {
+ /* No S-Stage translation, done. */
+ return 0;
+ }
+
+ /* FSC.TC.PDTV enabled */
+ if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) {
+ /* Invalid PDTP.MODE */
+ return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
+ }
+
+ for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
+ /*
+ * Select process id index bits based on process directory tree
+ * level. See IOMMU Specification, 2.2. Process-Directory-Table.
+ */
+ const int split = depth * 9 + 8;
+ addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
+ if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
+ }
+ le64_to_cpus(&de);
+ if (!(de & RISCV_IOMMU_PC_TA_V)) {
+ return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
+ }
+ addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN));
+ }
+
+ /* Leaf entry in PDT */
+ addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
+ if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
+ }
+
+ /* Use FSC and TA from process directory entry. */
+ ctx->ta = le64_to_cpu(dc.ta);
+ ctx->satp = le64_to_cpu(dc.fsc);
+
+ if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) {
+ return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
+ }
+
+ if (!riscv_iommu_validate_process_ctx(s, ctx)) {
+ return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
+ }
+
+ return 0;
+}
+
+/* Translation Context cache support */
+static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2)
+{
+ RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1;
+ RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2;
+ return c1->devid == c2->devid &&
+ c1->process_id == c2->process_id;
+}
+
+static guint riscv_iommu_ctx_hash(gconstpointer v)
+{
+ RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v;
+ /*
+ * Generate simple hash of (process_id, devid)
+ * assuming 24-bit wide devid.
+ */
+ return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24);
+}
+
+static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
+ RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
+ if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
+ ctx->devid == arg->devid &&
+ ctx->process_id == arg->process_id) {
+ ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
+ }
+}
+
+static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
+ RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
+ if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
+ ctx->devid == arg->devid) {
+ ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
+ }
+}
+
+static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
+ if (ctx->tc & RISCV_IOMMU_DC_TC_V) {
+ ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
+ }
+}
+
+static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func,
+ uint32_t devid, uint32_t process_id)
+{
+ GHashTable *ctx_cache;
+ RISCVIOMMUContext key = {
+ .devid = devid,
+ .process_id = process_id,
+ };
+ ctx_cache = g_hash_table_ref(s->ctx_cache);
+ g_hash_table_foreach(ctx_cache, func, &key);
+ g_hash_table_unref(ctx_cache);
+}
+
+/* Find or allocate translation context for a given {device_id, process_id} */
+static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s,
+ unsigned devid, unsigned process_id,
+ void **ref)
+{
+ GHashTable *ctx_cache;
+ RISCVIOMMUContext *ctx;
+ RISCVIOMMUContext key = {
+ .devid = devid,
+ .process_id = process_id,
+ };
+
+ ctx_cache = g_hash_table_ref(s->ctx_cache);
+ ctx = g_hash_table_lookup(ctx_cache, &key);
+
+ if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) {
+ *ref = ctx_cache;
+ return ctx;
+ }
+
+ ctx = g_new0(RISCVIOMMUContext, 1);
+ ctx->devid = devid;
+ ctx->process_id = process_id;
+
+ int fault = riscv_iommu_ctx_fetch(s, ctx);
+ if (!fault) {
+ if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) {
+ g_hash_table_unref(ctx_cache);
+ ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
+ riscv_iommu_ctx_equal,
+ g_free, NULL);
+ g_hash_table_ref(ctx_cache);
+ g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache));
+ }
+ g_hash_table_add(ctx_cache, ctx);
+ *ref = ctx_cache;
+ return ctx;
+ }
+
+ g_hash_table_unref(ctx_cache);
+ *ref = NULL;
+
+ riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD,
+ fault, !!process_id, 0, 0);
+
+ g_free(ctx);
+ return NULL;
+}
+
+static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
+{
+ if (ref) {
+ g_hash_table_unref((GHashTable *)ref);
+ }
+}
+
+/* Find or allocate address space for a given device */
+static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
+{
+ RISCVIOMMUSpace *as;
+
+ /* FIXME: PCIe bus remapping for attached endpoints. */
+ devid |= s->bus << 8;
+
+ QLIST_FOREACH(as, &s->spaces, list) {
+ if (as->devid == devid) {
+ break;
+ }
+ }
+
+ if (as == NULL) {
+ char name[64];
+ as = g_new0(RISCVIOMMUSpace, 1);
+
+ as->iommu = s;
+ as->devid = devid;
+
+ snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
+ PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid));
+
+ /* IOVA address space, untranslated addresses */
+ memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr),
+ TYPE_RISCV_IOMMU_MEMORY_REGION,
+ OBJECT(as), "riscv_iommu", UINT64_MAX);
+ address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name);
+
+ QLIST_INSERT_HEAD(&s->spaces, as, list);
+
+ trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid),
+ PCI_SLOT(as->devid), PCI_FUNC(as->devid));
+ }
+ return &as->iova_as;
+}
+
+/* Translation Object cache support */
+static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2)
+{
+ RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1;
+ RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2;
+ return t1->gscid == t2->gscid && t1->pscid == t2->pscid &&
+ t1->iova == t2->iova;
+}
+
+static guint riscv_iommu_iot_hash(gconstpointer v)
+{
+ RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v;
+ return (guint)t->iova;
+}
+
+/* GV: 1 PSCV: 1 AV: 1 */
+static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->gscid == arg->gscid &&
+ iot->pscid == arg->pscid &&
+ iot->iova == arg->iova) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 1 PSCV: 1 AV: 0 */
+static void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->gscid == arg->gscid &&
+ iot->pscid == arg->pscid) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 1 GVMA: 1 */
+static void riscv_iommu_iot_inval_gscid_gpa(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->gscid == arg->gscid) {
+ /* simplified cache, no GPA matching */
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 1 GVMA: 0 */
+static void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->gscid == arg->gscid) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 0 */
+static void riscv_iommu_iot_inval_all(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ iot->perm = IOMMU_NONE;
+}
+
+/* caller should keep ref-count for iot_cache object */
+static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx,
+ GHashTable *iot_cache, hwaddr iova)
+{
+ RISCVIOMMUEntry key = {
+ .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID),
+ .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID),
+ .iova = PPN_DOWN(iova),
+ };
+ return g_hash_table_lookup(iot_cache, &key);
+}
+
+/* caller should keep ref-count for iot_cache object */
+static void riscv_iommu_iot_update(RISCVIOMMUState *s,
+ GHashTable *iot_cache, RISCVIOMMUEntry *iot)
+{
+ if (!s->iot_limit) {
+ return;
+ }
+
+ if (g_hash_table_size(s->iot_cache) >= s->iot_limit) {
+ iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
+ riscv_iommu_iot_equal,
+ g_free, NULL);
+ g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache));
+ }
+ g_hash_table_add(iot_cache, iot);
+}
+
+static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func,
+ uint32_t gscid, uint32_t pscid, hwaddr iova)
+{
+ GHashTable *iot_cache;
+ RISCVIOMMUEntry key = {
+ .gscid = gscid,
+ .pscid = pscid,
+ .iova = PPN_DOWN(iova),
+ };
+
+ iot_cache = g_hash_table_ref(s->iot_cache);
+ g_hash_table_foreach(iot_cache, func, &key);
+ g_hash_table_unref(iot_cache);
+}
+
+static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+ IOMMUTLBEntry *iotlb, bool enable_cache)
+{
+ RISCVIOMMUEntry *iot;
+ IOMMUAccessFlags perm;
+ bool enable_pid;
+ bool enable_pri;
+ GHashTable *iot_cache;
+ int fault;
+
+ iot_cache = g_hash_table_ref(s->iot_cache);
+ /*
+ * TC[32] is reserved for custom extensions, used here to temporarily
+ * enable automatic page-request generation for ATS queries.
+ */
+ enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32));
+ enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV);
+
+ /* Check for ATS request. */
+ if (iotlb->perm == IOMMU_NONE) {
+ /* Check if ATS is disabled. */
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
+ enable_pri = false;
+ fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
+ goto done;
+ }
+ }
+
+ iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova);
+ perm = iot ? iot->perm : IOMMU_NONE;
+ if (perm != IOMMU_NONE) {
+ iotlb->translated_addr = PPN_PHYS(iot->phys);
+ iotlb->addr_mask = ~TARGET_PAGE_MASK;
+ iotlb->perm = perm;
+ fault = 0;
+ goto done;
+ }
+
+ /* Translate using device directory / page table information. */
+ fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
+
+ if (!fault && iotlb->target_as == &s->trap_as) {
+ /* Do not cache trapped MSI translations */
+ goto done;
+ }
+
+ /*
+ * We made an implementation choice to not cache identity-mapped
+ * translations, as allowed by the specification, to avoid
+ * translation cache evictions for other devices sharing the
+ * IOMMU hardware model.
+ */
+ if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) {
+ iot = g_new0(RISCVIOMMUEntry, 1);
+ iot->iova = PPN_DOWN(iotlb->iova);
+ iot->phys = PPN_DOWN(iotlb->translated_addr);
+ iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
+ iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
+ iot->perm = iotlb->perm;
+ riscv_iommu_iot_update(s, iot_cache, iot);
+ }
+
+done:
+ g_hash_table_unref(iot_cache);
+
+ if (enable_pri && fault) {
+ struct riscv_iommu_pq_record pr = {0};
+ if (enable_pid) {
+ pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV,
+ RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id);
+ }
+ pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid);
+ pr.payload = (iotlb->iova & TARGET_PAGE_MASK) |
+ RISCV_IOMMU_PREQ_PAYLOAD_M;
+ riscv_iommu_pri(s, &pr);
+ return fault;
+ }
+
+ if (fault) {
+ unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ;
+
+ if (iotlb->perm & IOMMU_RW) {
+ ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
+ } else if (iotlb->perm & IOMMU_RO) {
+ ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD;
+ }
+
+ riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid,
+ iotlb->iova, iotlb->translated_addr);
+ return fault;
+ }
+
+ return 0;
+}
+
+/* IOMMU Command Interface */
+static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify,
+ uint64_t addr, uint32_t data)
+{
+ /*
+ * ATS processing in this implementation of the IOMMU is synchronous,
+ * no need to wait for completions here.
+ */
+ if (!notify) {
+ return MEMTX_OK;
+ }
+
+ return dma_memory_write(s->target_as, addr, &data, sizeof(data),
+ MEMTXATTRS_UNSPECIFIED);
+}
+
+static void riscv_iommu_ats(RISCVIOMMUState *s,
+ struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag,
+ IOMMUAccessFlags perm,
+ void (*trace_fn)(const char *id))
+{
+ RISCVIOMMUSpace *as = NULL;
+ IOMMUNotifier *n;
+ IOMMUTLBEvent event;
+ uint32_t pid;
+ uint32_t devid;
+ const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV;
+
+ if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) {
+ /* Use device segment and requester id */
+ devid = get_field(cmd->dword0,
+ RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID);
+ } else {
+ devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID);
+ }
+
+ pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID);
+
+ QLIST_FOREACH(as, &s->spaces, list) {
+ if (as->devid == devid) {
+ break;
+ }
+ }
+
+ if (!as || !as->notifier) {
+ return;
+ }
+
+ event.type = flag;
+ event.entry.perm = perm;
+ event.entry.target_as = s->target_as;
+
+ IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) {
+ if (!pv || n->iommu_idx == pid) {
+ event.entry.iova = n->start;
+ event.entry.addr_mask = n->end - n->start;
+ trace_fn(as->iova_mr.parent_obj.name);
+ memory_region_notify_iommu_one(n, &event);
+ }
+ }
+}
+
+static void riscv_iommu_ats_inval(RISCVIOMMUState *s,
+ struct riscv_iommu_command *cmd)
+{
+ return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE,
+ trace_riscv_iommu_ats_inval);
+}
+
+static void riscv_iommu_ats_prgr(RISCVIOMMUState *s,
+ struct riscv_iommu_command *cmd)
+{
+ unsigned resp_code = get_field(cmd->dword1,
+ RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE);
+
+ /* Using the access flag to carry response code information */
+ IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW;
+ return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm,
+ trace_riscv_iommu_ats_prgr);
+}
+
+static void riscv_iommu_process_ddtp(RISCVIOMMUState *s)
+{
+ uint64_t old_ddtp = s->ddtp;
+ uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP);
+ unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE);
+ unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE);
+ bool ok = false;
+
+ /*
+ * Check for allowed DDTP.MODE transitions:
+ * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL}
+ * {1LVL, 2LVL, 3LVL} -> {OFF, BARE}
+ */
+ if (new_mode == old_mode ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_BARE) {
+ ok = true;
+ } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_2LVL ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) {
+ ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
+ old_mode == RISCV_IOMMU_DDTP_MODE_BARE;
+ }
+
+ if (ok) {
+ /* clear reserved and busy bits, report back sanitized version */
+ new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN,
+ RISCV_IOMMU_DDTP_MODE, new_mode);
+ } else {
+ new_ddtp = old_ddtp;
+ }
+ s->ddtp = new_ddtp;
+
+ riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp);
+}
+
+/* Command function and opcode field. */
+#define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op))
+
+static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s)
+{
+ struct riscv_iommu_command cmd;
+ MemTxResult res;
+ dma_addr_t addr;
+ uint32_t tail, head, ctrl;
+ uint64_t cmd_opcode;
+ GHFunc func;
+
+ ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
+ tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask;
+ head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask;
+
+ /* Check for pending error or queue processing disabled */
+ if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) ||
+ !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) {
+ return;
+ }
+
+ while (tail != head) {
+ addr = s->cq_addr + head * sizeof(cmd);
+ res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd),
+ MEMTXATTRS_UNSPECIFIED);
+
+ if (res != MEMTX_OK) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
+ RISCV_IOMMU_CQCSR_CQMF, 0);
+ goto fault;
+ }
+
+ trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1);
+
+ cmd_opcode = get_field(cmd.dword0,
+ RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC);
+
+ switch (cmd_opcode) {
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C,
+ RISCV_IOMMU_CMD_IOFENCE_OPCODE):
+ res = riscv_iommu_iofence(s,
+ cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA));
+
+ if (res != MEMTX_OK) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
+ RISCV_IOMMU_CQCSR_CQMF, 0);
+ goto fault;
+ }
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA,
+ RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
+ if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) {
+ /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */
+ goto cmd_ill;
+ } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) {
+ /* invalidate all cache mappings */
+ func = riscv_iommu_iot_inval_all;
+ } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) {
+ /* invalidate cache matching GSCID */
+ func = riscv_iommu_iot_inval_gscid;
+ } else {
+ /* invalidate cache matching GSCID and ADDR (GPA) */
+ func = riscv_iommu_iot_inval_gscid_gpa;
+ }
+ riscv_iommu_iot_inval(s, func,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 0,
+ cmd.dword1 << 2 & TARGET_PAGE_MASK);
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA,
+ RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
+ if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) {
+ /* invalidate all cache mappings, simplified model */
+ func = riscv_iommu_iot_inval_all;
+ } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV)) {
+ /* invalidate cache matching GSCID, simplified model */
+ func = riscv_iommu_iot_inval_gscid;
+ } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) {
+ /* invalidate cache matching GSCID and PSCID */
+ func = riscv_iommu_iot_inval_pscid;
+ } else {
+ /* invalidate cache matching GSCID and PSCID and ADDR (IOVA) */
+ func = riscv_iommu_iot_inval_pscid_iova;
+ }
+ riscv_iommu_iot_inval(s, func,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID),
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_PSCID),
+ cmd.dword1 << 2 & TARGET_PAGE_MASK);
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT,
+ RISCV_IOMMU_CMD_IODIR_OPCODE):
+ if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
+ /* invalidate all device context cache mappings */
+ func = riscv_iommu_ctx_inval_all;
+ } else {
+ /* invalidate all device context matching DID */
+ func = riscv_iommu_ctx_inval_devid;
+ }
+ riscv_iommu_ctx_inval(s, func,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0);
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT,
+ RISCV_IOMMU_CMD_IODIR_OPCODE):
+ if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
+ /* illegal command arguments IODIR_PDT & DV == 0 */
+ goto cmd_ill;
+ } else {
+ func = riscv_iommu_ctx_inval_devid_procid;
+ }
+ riscv_iommu_ctx_inval(s, func,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID),
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID));
+ break;
+
+ /* ATS commands */
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL,
+ RISCV_IOMMU_CMD_ATS_OPCODE):
+ if (!s->enable_ats) {
+ goto cmd_ill;
+ }
+
+ riscv_iommu_ats_inval(s, &cmd);
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR,
+ RISCV_IOMMU_CMD_ATS_OPCODE):
+ if (!s->enable_ats) {
+ goto cmd_ill;
+ }
+
+ riscv_iommu_ats_prgr(s, &cmd);
+ break;
+
+ default:
+ cmd_ill:
+ /* Invalid instruction, do not advance instruction index. */
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
+ RISCV_IOMMU_CQCSR_CMD_ILL, 0);
+ goto fault;
+ }
+
+ /* Advance and update head pointer after command completes. */
+ head = (head + 1) & s->cq_mask;
+ riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head);
+ }
+ return;
+
+fault:
+ if (ctrl & RISCV_IOMMU_CQCSR_CIE) {
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ);
+ }
+}
+
+static void riscv_iommu_process_cq_control(RISCVIOMMUState *s)
+{
+ uint64_t base;
+ uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
+ uint32_t ctrl_clr;
+ bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN);
+ bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON);
+
+ if (enable && !active) {
+ base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB);
+ s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1;
+ s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN));
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0);
+ ctrl_set = RISCV_IOMMU_CQCSR_CQON;
+ ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF |
+ RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO |
+ RISCV_IOMMU_CQCSR_FENCE_W_IP;
+ } else if (!enable && active) {
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0);
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON;
+ } else {
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_CQCSR_BUSY;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr);
+}
+
+static void riscv_iommu_process_fq_control(RISCVIOMMUState *s)
+{
+ uint64_t base;
+ uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
+ uint32_t ctrl_clr;
+ bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN);
+ bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON);
+
+ if (enable && !active) {
+ base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB);
+ s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1;
+ s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN));
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0);
+ ctrl_set = RISCV_IOMMU_FQCSR_FQON;
+ ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF |
+ RISCV_IOMMU_FQCSR_FQOF;
+ } else if (!enable && active) {
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0);
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON;
+ } else {
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_FQCSR_BUSY;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr);
+}
+
+static void riscv_iommu_process_pq_control(RISCVIOMMUState *s)
+{
+ uint64_t base;
+ uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
+ uint32_t ctrl_clr;
+ bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN);
+ bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON);
+
+ if (enable && !active) {
+ base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB);
+ s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1;
+ s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN));
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0);
+ ctrl_set = RISCV_IOMMU_PQCSR_PQON;
+ ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF |
+ RISCV_IOMMU_PQCSR_PQOF;
+ } else if (!enable && active) {
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0);
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON;
+ } else {
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_PQCSR_BUSY;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr);
+}
+
+static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
+{
+ uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA);
+ uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL);
+ unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID);
+ unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID);
+ RISCVIOMMUContext *ctx;
+ void *ref;
+
+ if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) {
+ return;
+ }
+
+ ctx = riscv_iommu_ctx(s, devid, pid, &ref);
+ if (ctx == NULL) {
+ riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE,
+ RISCV_IOMMU_TR_RESPONSE_FAULT |
+ (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10));
+ } else {
+ IOMMUTLBEntry iotlb = {
+ .iova = iova,
+ .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW,
+ .addr_mask = ~0,
+ .target_as = NULL,
+ };
+ int fault = riscv_iommu_translate(s, ctx, &iotlb, false);
+ if (fault) {
+ iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
+ } else {
+ iova = iotlb.translated_addr & ~iotlb.addr_mask;
+ iova >>= TARGET_PAGE_BITS;
+ iova &= RISCV_IOMMU_TR_RESPONSE_PPN;
+
+ /* We do not support superpages (> 4kbs) for now */
+ iova &= ~RISCV_IOMMU_TR_RESPONSE_S;
+ }
+ riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
+ }
+
+ riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
+ RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
+ riscv_iommu_ctx_put(s, ref);
+}
+
+typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
+
+static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data)
+{
+ uint64_t icvec = 0;
+
+ icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV,
+ s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV);
+
+ icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV,
+ s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV);
+
+ icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV,
+ s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV);
+
+ icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV,
+ s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV);
+
+ trace_riscv_iommu_icvec_write(data, icvec);
+
+ riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec);
+}
+
+static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
+{
+ uint32_t cqcsr, fqcsr, pqcsr;
+ uint32_t ipsr_set = 0;
+ uint32_t ipsr_clr = 0;
+
+ if (data & RISCV_IOMMU_IPSR_CIP) {
+ cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
+
+ if (cqcsr & RISCV_IOMMU_CQCSR_CIE &&
+ (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP ||
+ cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL ||
+ cqcsr & RISCV_IOMMU_CQCSR_CMD_TO ||
+ cqcsr & RISCV_IOMMU_CQCSR_CQMF)) {
+ ipsr_set |= RISCV_IOMMU_IPSR_CIP;
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
+ }
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
+ }
+
+ if (data & RISCV_IOMMU_IPSR_FIP) {
+ fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
+
+ if (fqcsr & RISCV_IOMMU_FQCSR_FIE &&
+ (fqcsr & RISCV_IOMMU_FQCSR_FQOF ||
+ fqcsr & RISCV_IOMMU_FQCSR_FQMF)) {
+ ipsr_set |= RISCV_IOMMU_IPSR_FIP;
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
+ }
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
+ }
+
+ if (data & RISCV_IOMMU_IPSR_PIP) {
+ pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
+
+ if (pqcsr & RISCV_IOMMU_PQCSR_PIE &&
+ (pqcsr & RISCV_IOMMU_PQCSR_PQOF ||
+ pqcsr & RISCV_IOMMU_PQCSR_PQMF)) {
+ ipsr_set |= RISCV_IOMMU_IPSR_PIP;
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
+ }
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
+}
+
+/*
+ * Write the resulting value of 'data' for the reg specified
+ * by 'reg_addr', after considering read-only/read-write/write-clear
+ * bits, in the pointer 'dest'.
+ *
+ * The result is written in little-endian.
+ */
+static void riscv_iommu_write_reg_val(RISCVIOMMUState *s,
+ void *dest, hwaddr reg_addr,
+ int size, uint64_t data)
+{
+ uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size);
+ uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size);
+ uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size);
+
+ stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc));
+}
+
+static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size,
+ MemTxAttrs attrs)
+{
+ riscv_iommu_process_fn *process_fn = NULL;
+ RISCVIOMMUState *s = opaque;
+ uint32_t regb = addr & ~3;
+ uint32_t busy = 0;
+ uint64_t val = 0;
+
+ if ((addr & (size - 1)) != 0) {
+ /* Unsupported MMIO alignment or access size */
+ return MEMTX_ERROR;
+ }
+
+ if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
+ /* Unsupported MMIO access location. */
+ return MEMTX_ACCESS_ERROR;
+ }
+
+ /* Track actionable MMIO write. */
+ switch (regb) {
+ case RISCV_IOMMU_REG_DDTP:
+ case RISCV_IOMMU_REG_DDTP + 4:
+ process_fn = riscv_iommu_process_ddtp;
+ regb = RISCV_IOMMU_REG_DDTP;
+ busy = RISCV_IOMMU_DDTP_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_CQT:
+ process_fn = riscv_iommu_process_cq_tail;
+ break;
+
+ case RISCV_IOMMU_REG_CQCSR:
+ process_fn = riscv_iommu_process_cq_control;
+ busy = RISCV_IOMMU_CQCSR_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_FQCSR:
+ process_fn = riscv_iommu_process_fq_control;
+ busy = RISCV_IOMMU_FQCSR_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_PQCSR:
+ process_fn = riscv_iommu_process_pq_control;
+ busy = RISCV_IOMMU_PQCSR_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_ICVEC:
+ case RISCV_IOMMU_REG_IPSR:
+ /*
+ * ICVEC and IPSR have special read/write procedures. We'll
+ * call their respective helpers and exit.
+ */
+ riscv_iommu_write_reg_val(s, &val, addr, size, data);
+
+ /*
+ * 'val' is stored as LE. Switch to host endianess
+ * before using it.
+ */
+ val = le64_to_cpu(val);
+
+ if (regb == RISCV_IOMMU_REG_ICVEC) {
+ riscv_iommu_update_icvec(s, val);
+ } else {
+ riscv_iommu_update_ipsr(s, val);
+ }
+
+ return MEMTX_OK;
+
+ case RISCV_IOMMU_REG_TR_REQ_CTL:
+ process_fn = riscv_iommu_process_dbg;
+ regb = RISCV_IOMMU_REG_TR_REQ_CTL;
+ busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * Registers update might be not synchronized with core logic.
+ * If system software updates register when relevant BUSY bit
+ * is set IOMMU behavior of additional writes to the register
+ * is UNSPECIFIED.
+ */
+ riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data);
+
+ /* Busy flag update, MSB 4-byte register. */
+ if (busy) {
+ uint32_t rw = ldl_le_p(&s->regs_rw[regb]);
+ stl_le_p(&s->regs_rw[regb], rw | busy);
+ }
+
+ if (process_fn) {
+ process_fn(s);
+ }
+
+ return MEMTX_OK;
+}
+
+static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size, MemTxAttrs attrs)
+{
+ RISCVIOMMUState *s = opaque;
+ uint64_t val = -1;
+ uint8_t *ptr;
+
+ if ((addr & (size - 1)) != 0) {
+ /* Unsupported MMIO alignment. */
+ return MEMTX_ERROR;
+ }
+
+ if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
+ return MEMTX_ACCESS_ERROR;
+ }
+
+ ptr = &s->regs_rw[addr];
+ val = ldn_le_p(ptr, size);
+
+ *data = val;
+
+ return MEMTX_OK;
+}
+
+static const MemoryRegionOps riscv_iommu_mmio_ops = {
+ .read_with_attrs = riscv_iommu_mmio_read,
+ .write_with_attrs = riscv_iommu_mmio_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ }
+};
+
+/*
+ * Translations matching MSI pattern check are redirected to "riscv-iommu-trap"
+ * memory region as untranslated address, for additional MSI/MRIF interception
+ * by IOMMU interrupt remapping implementation.
+ * Note: Device emulation code generating an MSI is expected to provide a valid
+ * memory transaction attributes with requested_id set.
+ */
+static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size, MemTxAttrs attrs)
+{
+ RISCVIOMMUState* s = (RISCVIOMMUState *)opaque;
+ RISCVIOMMUContext *ctx;
+ MemTxResult res;
+ void *ref;
+ uint32_t devid = attrs.requester_id;
+
+ if (attrs.unspecified) {
+ return MEMTX_ACCESS_ERROR;
+ }
+
+ /* FIXME: PCIe bus remapping for attached endpoints. */
+ devid |= s->bus << 8;
+
+ ctx = riscv_iommu_ctx(s, devid, 0, &ref);
+ if (ctx == NULL) {
+ res = MEMTX_ACCESS_ERROR;
+ } else {
+ res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs);
+ }
+ riscv_iommu_ctx_put(s, ref);
+ return res;
+}
+
+static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size, MemTxAttrs attrs)
+{
+ return MEMTX_ACCESS_ERROR;
+}
+
+static const MemoryRegionOps riscv_iommu_trap_ops = {
+ .read_with_attrs = riscv_iommu_trap_read,
+ .write_with_attrs = riscv_iommu_trap_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ .unaligned = true,
+ },
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ }
+};
+
+static void riscv_iommu_realize(DeviceState *dev, Error **errp)
+{
+ RISCVIOMMUState *s = RISCV_IOMMU(dev);
+
+ s->cap = s->version & RISCV_IOMMU_CAP_VERSION;
+ if (s->enable_msi) {
+ s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF;
+ }
+ if (s->enable_ats) {
+ s->cap |= RISCV_IOMMU_CAP_ATS;
+ }
+ if (s->enable_s_stage) {
+ s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 |
+ RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57;
+ }
+ if (s->enable_g_stage) {
+ s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
+ RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
+ }
+ /* Enable translation debug interface */
+ s->cap |= RISCV_IOMMU_CAP_DBG;
+
+ /* Report QEMU target physical address space limits */
+ s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
+ TARGET_PHYS_ADDR_SPACE_BITS);
+
+ /* TODO: method to report supported PID bits */
+ s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */
+ s->cap |= RISCV_IOMMU_CAP_PD8;
+
+ /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
+ s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
+ RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
+
+ /* register storage */
+ s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
+ s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
+ s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
+
+ /* Mark all registers read-only */
+ memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE);
+
+ /*
+ * Register complete MMIO space, including MSI/PBA registers.
+ * Note, PCIDevice implementation will add overlapping MR for MSI/PBA,
+ * managed directly by the PCIDevice implementation.
+ */
+ memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s,
+ "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE);
+
+ /* Set power-on register state */
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL],
+ ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
+ ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],
+ ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB],
+ ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB],
+ ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN));
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF |
+ RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON |
+ RISCV_IOMMU_CQCSR_BUSY);
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF |
+ RISCV_IOMMU_FQCSR_FQOF);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON |
+ RISCV_IOMMU_FQCSR_BUSY);
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF |
+ RISCV_IOMMU_PQCSR_PQOF);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON |
+ RISCV_IOMMU_PQCSR_BUSY);
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0);
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp);
+ /* If debug registers enabled. */
+ if (s->cap & RISCV_IOMMU_CAP_DBG) {
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0);
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL],
+ RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
+ }
+
+ /* Memory region for downstream access, if specified. */
+ if (s->target_mr) {
+ s->target_as = g_new0(AddressSpace, 1);
+ address_space_init(s->target_as, s->target_mr,
+ "riscv-iommu-downstream");
+ } else {
+ /* Fallback to global system memory. */
+ s->target_as = &address_space_memory;
+ }
+
+ /* Memory region for untranslated MRIF/MSI writes */
+ memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
+ "riscv-iommu-trap", ~0ULL);
+ address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
+
+ /* Device translation context cache */
+ s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
+ riscv_iommu_ctx_equal,
+ g_free, NULL);
+
+ s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
+ riscv_iommu_iot_equal,
+ g_free, NULL);
+
+ s->iommus.le_next = NULL;
+ s->iommus.le_prev = NULL;
+ QLIST_INIT(&s->spaces);
+}
+
+static void riscv_iommu_unrealize(DeviceState *dev)
+{
+ RISCVIOMMUState *s = RISCV_IOMMU(dev);
+
+ g_hash_table_unref(s->iot_cache);
+ g_hash_table_unref(s->ctx_cache);
+}
+
+static Property riscv_iommu_properties[] = {
+ DEFINE_PROP_UINT32("version", RISCVIOMMUState, version,
+ RISCV_IOMMU_SPEC_DOT_VER),
+ DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0),
+ DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit,
+ LIMIT_CACHE_IOT),
+ DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE),
+ DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE),
+ DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE),
+ DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE),
+ DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
+ DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
+ TYPE_MEMORY_REGION, MemoryRegion *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void riscv_iommu_class_init(ObjectClass *klass, void* data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ /* internal device for riscv-iommu-{pci/sys}, not user-creatable */
+ dc->user_creatable = false;
+ dc->realize = riscv_iommu_realize;
+ dc->unrealize = riscv_iommu_unrealize;
+ device_class_set_props(dc, riscv_iommu_properties);
+}
+
+static const TypeInfo riscv_iommu_info = {
+ .name = TYPE_RISCV_IOMMU,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(RISCVIOMMUState),
+ .class_init = riscv_iommu_class_init,
+};
+
+static const char *IOMMU_FLAG_STR[] = {
+ "NA",
+ "RO",
+ "WR",
+ "RW",
+};
+
+/* RISC-V IOMMU Memory Region - Address Translation Space */
+static IOMMUTLBEntry riscv_iommu_memory_region_translate(
+ IOMMUMemoryRegion *iommu_mr, hwaddr addr,
+ IOMMUAccessFlags flag, int iommu_idx)
+{
+ RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
+ RISCVIOMMUContext *ctx;
+ void *ref;
+ IOMMUTLBEntry iotlb = {
+ .iova = addr,
+ .target_as = as->iommu->target_as,
+ .addr_mask = ~0ULL,
+ .perm = flag,
+ };
+
+ ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref);
+ if (ctx == NULL) {
+ /* Translation disabled or invalid. */
+ iotlb.addr_mask = 0;
+ iotlb.perm = IOMMU_NONE;
+ } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) {
+ /* Translation disabled or fault reported. */
+ iotlb.addr_mask = 0;
+ iotlb.perm = IOMMU_NONE;
+ }
+
+ /* Trace all dma translations with original access flags. */
+ trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid),
+ PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx,
+ IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova,
+ iotlb.translated_addr);
+
+ riscv_iommu_ctx_put(as->iommu, ref);
+
+ return iotlb;
+}
+
+static int riscv_iommu_memory_region_notify(
+ IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old,
+ IOMMUNotifierFlag new, Error **errp)
+{
+ RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
+
+ if (old == IOMMU_NOTIFIER_NONE) {
+ as->notifier = true;
+ trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name);
+ } else if (new == IOMMU_NOTIFIER_NONE) {
+ as->notifier = false;
+ trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name);
+ }
+
+ return 0;
+}
+
+static inline bool pci_is_iommu(PCIDevice *pdev)
+{
+ return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806;
+}
+
+static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
+{
+ RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
+ PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
+ AddressSpace *as = NULL;
+
+ if (pdev && pci_is_iommu(pdev)) {
+ return s->target_as;
+ }
+
+ /* Find first registered IOMMU device */
+ while (s->iommus.le_prev) {
+ s = *(s->iommus.le_prev);
+ }
+
+ /* Find first matching IOMMU */
+ while (s != NULL && as == NULL) {
+ as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));
+ s = s->iommus.le_next;
+ }
+
+ return as ? as : &address_space_memory;
+}
+
+static const PCIIOMMUOps riscv_iommu_ops = {
+ .get_address_space = riscv_iommu_find_as,
+};
+
+void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
+ Error **errp)
+{
+ if (bus->iommu_ops &&
+ bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
+ /* Allow multiple IOMMUs on the same PCIe bus, link known devices */
+ RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
+ QLIST_INSERT_AFTER(last, iommu, iommus);
+ } else if (!bus->iommu_ops && !bus->iommu_opaque) {
+ pci_setup_iommu(bus, &riscv_iommu_ops, iommu);
+ } else {
+ error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
+ pci_bus_num(bus));
+ }
+}
+
+static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
+ MemTxAttrs attrs)
+{
+ return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid;
+}
+
+static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
+{
+ RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
+ return 1 << as->iommu->pid_bits;
+}
+
+static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+ imrc->translate = riscv_iommu_memory_region_translate;
+ imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
+ imrc->attrs_to_index = riscv_iommu_memory_region_index;
+ imrc->num_indexes = riscv_iommu_memory_region_index_len;
+}
+
+static const TypeInfo riscv_iommu_memory_region_info = {
+ .parent = TYPE_IOMMU_MEMORY_REGION,
+ .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
+ .class_init = riscv_iommu_memory_region_init,
+};
+
+static void riscv_iommu_register_mr_types(void)
+{
+ type_register_static(&riscv_iommu_memory_region_info);
+ type_register_static(&riscv_iommu_info);
+}
+
+type_init(riscv_iommu_register_mr_types);
diff --git a/hw/riscv/riscv-iommu.h b/hw/riscv/riscv-iommu.h
new file mode 100644
index 0000000000..da3f03440c
--- /dev/null
+++ b/hw/riscv/riscv-iommu.h
@@ -0,0 +1,130 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_IOMMU_STATE_H
+#define HW_RISCV_IOMMU_STATE_H
+
+#include "qom/object.h"
+#include "hw/riscv/iommu.h"
+
+struct RISCVIOMMUState {
+ /*< private >*/
+ DeviceState parent_obj;
+
+ /*< public >*/
+ uint32_t version; /* Reported interface version number */
+ uint32_t pid_bits; /* process identifier width */
+ uint32_t bus; /* PCI bus mapping for non-root endpoints */
+
+ uint64_t cap; /* IOMMU supported capabilities */
+ uint64_t fctl; /* IOMMU enabled features */
+ uint64_t icvec_avail_vectors; /* Available interrupt vectors in ICVEC */
+
+ bool enable_off; /* Enable out-of-reset OFF mode (DMA disabled) */
+ bool enable_msi; /* Enable MSI remapping */
+ bool enable_ats; /* Enable ATS support */
+ bool enable_s_stage; /* Enable S/VS-Stage translation */
+ bool enable_g_stage; /* Enable G-Stage translation */
+
+ /* IOMMU Internal State */
+ uint64_t ddtp; /* Validated Device Directory Tree Root Pointer */
+
+ dma_addr_t cq_addr; /* Command queue base physical address */
+ dma_addr_t fq_addr; /* Fault/event queue base physical address */
+ dma_addr_t pq_addr; /* Page request queue base physical address */
+
+ uint32_t cq_mask; /* Command queue index bit mask */
+ uint32_t fq_mask; /* Fault/event queue index bit mask */
+ uint32_t pq_mask; /* Page request queue index bit mask */
+
+ /* interrupt notifier */
+ void (*notify)(RISCVIOMMUState *iommu, unsigned vector);
+
+ /* IOMMU State Machine */
+ QemuThread core_proc; /* Background processing thread */
+ QemuCond core_cond; /* Background processing wake up signal */
+ unsigned core_exec; /* Processing thread execution actions */
+
+ /* IOMMU target address space */
+ AddressSpace *target_as;
+ MemoryRegion *target_mr;
+
+ /* MSI / MRIF access trap */
+ AddressSpace trap_as;
+ MemoryRegion trap_mr;
+
+ GHashTable *ctx_cache; /* Device translation Context Cache */
+
+ GHashTable *iot_cache; /* IO Translated Address Cache */
+ unsigned iot_limit; /* IO Translation Cache size limit */
+
+ /* MMIO Hardware Interface */
+ MemoryRegion regs_mr;
+ uint8_t *regs_rw; /* register state (user write) */
+ uint8_t *regs_wc; /* write-1-to-clear mask */
+ uint8_t *regs_ro; /* read-only mask */
+
+ QLIST_ENTRY(RISCVIOMMUState) iommus;
+ QLIST_HEAD(, RISCVIOMMUSpace) spaces;
+};
+
+void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
+ Error **errp);
+
+/* private helpers */
+
+/* Register helper functions */
+static inline uint32_t riscv_iommu_reg_mod32(RISCVIOMMUState *s,
+ unsigned idx, uint32_t set, uint32_t clr)
+{
+ uint32_t val = ldl_le_p(s->regs_rw + idx);
+ stl_le_p(s->regs_rw + idx, (val & ~clr) | set);
+ return val;
+}
+
+static inline void riscv_iommu_reg_set32(RISCVIOMMUState *s, unsigned idx,
+ uint32_t set)
+{
+ stl_le_p(s->regs_rw + idx, set);
+}
+
+static inline uint32_t riscv_iommu_reg_get32(RISCVIOMMUState *s, unsigned idx)
+{
+ return ldl_le_p(s->regs_rw + idx);
+}
+
+static inline uint64_t riscv_iommu_reg_mod64(RISCVIOMMUState *s, unsigned idx,
+ uint64_t set, uint64_t clr)
+{
+ uint64_t val = ldq_le_p(s->regs_rw + idx);
+ stq_le_p(s->regs_rw + idx, (val & ~clr) | set);
+ return val;
+}
+
+static inline void riscv_iommu_reg_set64(RISCVIOMMUState *s, unsigned idx,
+ uint64_t set)
+{
+ stq_le_p(s->regs_rw + idx, set);
+}
+
+static inline uint64_t riscv_iommu_reg_get64(RISCVIOMMUState *s,
+ unsigned idx)
+{
+ return ldq_le_p(s->regs_rw + idx);
+}
+#endif
diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index 9b3dcf3a7a..c5e74126b1 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -645,7 +645,8 @@ static void sifive_u_machine_init(MachineState *machine)
rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec),
memmap[SIFIVE_U_DEV_MROM].base, &address_space_memory);
- riscv_rom_copy_firmware_info(machine, memmap[SIFIVE_U_DEV_MROM].base,
+ riscv_rom_copy_firmware_info(machine, &s->soc.u_cpus,
+ memmap[SIFIVE_U_DEV_MROM].base,
memmap[SIFIVE_U_DEV_MROM].size,
sizeof(reset_vec), kernel_entry);
diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events
new file mode 100644
index 0000000000..0527c56c91
--- /dev/null
+++ b/hw/riscv/trace-events
@@ -0,0 +1,17 @@
+# See documentation at docs/devel/tracing.rst
+
+# riscv-iommu.c
+riscv_iommu_new(const char *id, unsigned b, unsigned d, unsigned f) "%s: device attached %04x:%02x.%d"
+riscv_iommu_flt(const char *id, unsigned b, unsigned d, unsigned f, uint64_t reason, uint64_t iova) "%s: fault %04x:%02x.%u reason: 0x%"PRIx64" iova: 0x%"PRIx64
+riscv_iommu_pri(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: page request %04x:%02x.%u iova: 0x%"PRIx64
+riscv_iommu_dma(const char *id, unsigned b, unsigned d, unsigned f, unsigned pasid, const char *dir, uint64_t iova, uint64_t phys) "%s: translate %04x:%02x.%u #%u %s 0x%"PRIx64" -> 0x%"PRIx64
+riscv_iommu_msi(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova, uint64_t phys) "%s: translate %04x:%02x.%u MSI 0x%"PRIx64" -> 0x%"PRIx64
+riscv_iommu_mrif_notification(const char *id, uint32_t nid, uint64_t phys) "%s: sent MRIF notification 0x%x to 0x%"PRIx64
+riscv_iommu_cmd(const char *id, uint64_t l, uint64_t u) "%s: command 0x%"PRIx64" 0x%"PRIx64
+riscv_iommu_notifier_add(const char *id) "%s: dev-iotlb notifier added"
+riscv_iommu_notifier_del(const char *id) "%s: dev-iotlb notifier removed"
+riscv_iommu_notify_int_vector(uint32_t cause, uint32_t vector) "Interrupt cause 0x%x sent via vector 0x%x"
+riscv_iommu_icvec_write(uint32_t orig, uint32_t actual) "ICVEC write: incoming 0x%x actual 0x%x"
+riscv_iommu_ats(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: translate request %04x:%02x.%u iova: 0x%"PRIx64
+riscv_iommu_ats_inval(const char *id) "%s: dev-iotlb invalidate"
+riscv_iommu_ats_prgr(const char *id) "%s: dev-iotlb page request group response"
diff --git a/hw/riscv/trace.h b/hw/riscv/trace.h
new file mode 100644
index 0000000000..8c0e3ca1f3
--- /dev/null
+++ b/hw/riscv/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_riscv.h"
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index ee3129f3b3..45a8c4f819 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -32,6 +32,7 @@
#include "hw/core/sysbus-fdt.h"
#include "target/riscv/pmu.h"
#include "hw/riscv/riscv_hart.h"
+#include "hw/riscv/iommu.h"
#include "hw/riscv/virt.h"
#include "hw/riscv/boot.h"
#include "hw/riscv/numa.h"
@@ -1032,6 +1033,30 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf)
bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf);
}
+static void create_fdt_iommu(RISCVVirtState *s, uint16_t bdf)
+{
+ const char comp[] = "riscv,pci-iommu";
+ void *fdt = MACHINE(s)->fdt;
+ uint32_t iommu_phandle;
+ g_autofree char *iommu_node = NULL;
+ g_autofree char *pci_node = NULL;
+
+ pci_node = g_strdup_printf("/soc/pci@%lx",
+ (long) virt_memmap[VIRT_PCIE_ECAM].base);
+ iommu_node = g_strdup_printf("%s/iommu@%x", pci_node, bdf);
+ iommu_phandle = qemu_fdt_alloc_phandle(fdt);
+ qemu_fdt_add_subnode(fdt, iommu_node);
+
+ qemu_fdt_setprop(fdt, iommu_node, "compatible", comp, sizeof(comp));
+ qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1);
+ qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle);
+ qemu_fdt_setprop_cells(fdt, iommu_node, "reg",
+ bdf << 8, 0, 0, 0, 0);
+ qemu_fdt_setprop_cells(fdt, pci_node, "iommu-map",
+ 0, iommu_phandle, 0, bdf,
+ bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf);
+}
+
static void finalize_fdt(RISCVVirtState *s)
{
uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
@@ -1738,9 +1763,11 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
MachineClass *mc = MACHINE_GET_CLASS(machine);
if (device_is_dynamic_sysbus(mc, dev) ||
- object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
+ object_dynamic_cast(OBJECT(dev), TYPE_RISCV_IOMMU_PCI)) {
return HOTPLUG_HANDLER(machine);
}
+
return NULL;
}
@@ -1761,6 +1788,10 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
create_fdt_virtio_iommu(s, pci_get_bdf(PCI_DEVICE(dev)));
}
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_IOMMU_PCI)) {
+ create_fdt_iommu(s, pci_get_bdf(PCI_DEVICE(dev)));
+ }
}
static void virt_machine_class_init(ObjectClass *oc, void *data)