aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2016-09-26 19:47:00 +0100
committerPeter Maydell <peter.maydell@linaro.org>2016-09-26 19:47:00 +0100
commit7cfdc02dae0d2ff58c897496cfdbbafc0eda0f3f (patch)
tree4ab986d748b8e4f914a388fe449366f9812b0f7d
parent3b71ec8516bb50e9a743645bf139571de0b39f61 (diff)
parentfb9f592623b0f9bb82a88d68d7921fb581918ef5 (diff)
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, pc: fixes and features beginning of guest error handling for virtio devices amd iommu pc compat fixes Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Fri 23 Sep 2016 23:02:09 BST # gpg: using RSA key 0x281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: hw/i386: AMD IOMMU IVRS table hw/i386: Introduce AMD IOMMU hw/i386/trace-events: Add AMD IOMMU trace events hw/pci: Prepare for AMD IOMMU virtio: handle virtqueue_get_head() errors virtio: handle virtqueue_num_heads() errors virtio: handle virtqueue_read_next_desc() errors virtio: use unsigned int for virtqueue_get_avail_bytes() index virtio: handle virtqueue_get_avail_bytes() errors virtio: handle virtqueue_map_desc() errors virtio: migrate vdev->broken flag virtio: stop virtqueue processing if device is broken virtio: fix stray tab character target-i386: turn off CPU.l3-cache only for 2.7 and older machine types pc: clean up COMPAT macro chaining virtio: add check for descriptor's mapped address tests: add /vhost-user/flags-mismatch test tests: add a simple /vhost-user/multiqueue test tests: add /vhost-user/connect-fail test Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/acpi/aml-build.c2
-rw-r--r--hw/i386/Makefile.objs1
-rw-r--r--hw/i386/acpi-build.c76
-rw-r--r--hw/i386/amd_iommu.c1202
-rw-r--r--hw/i386/amd_iommu.h289
-rw-r--r--hw/i386/intel_iommu.c1
-rw-r--r--hw/i386/trace-events29
-rw-r--r--hw/i386/x86-iommu.c6
-rw-r--r--hw/virtio/virtio.c237
-rw-r--r--include/hw/acpi/aml-build.h1
-rw-r--r--include/hw/i386/pc.h9
-rw-r--r--include/hw/i386/x86-iommu.h12
-rw-r--r--include/hw/pci/pci.h3
-rw-r--r--include/hw/virtio/virtio.h3
-rw-r--r--tests/Makefile.include2
-rw-r--r--tests/vhost-user-test.c208
16 files changed, 2009 insertions, 72 deletions
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index db3e914fb4..b2a1e4033b 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -226,7 +226,7 @@ static void build_extop_package(GArray *package, uint8_t op)
build_prepend_byte(package, 0x5B); /* ExtOpPrefix */
}
-static void build_append_int_noprefix(GArray *table, uint64_t value, int size)
+void build_append_int_noprefix(GArray *table, uint64_t value, int size)
{
int i;
diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 90e94ffefd..909ead6a77 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -3,6 +3,7 @@ obj-y += multiboot.o
obj-y += pc.o pc_piix.o pc_q35.o
obj-y += pc_sysfw.o
obj-y += x86-iommu.o intel_iommu.o
+obj-y += amd_iommu.o
obj-$(CONFIG_XEN) += ../xenpv/ xen/
obj-y += kvmvapic.o
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 433febafdd..c20bc71a67 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -59,7 +59,8 @@
#include "qapi/qmp/qint.h"
#include "qom/qom-qobject.h"
-#include "hw/i386/x86-iommu.h"
+#include "hw/i386/amd_iommu.h"
+#include "hw/i386/intel_iommu.h"
#include "hw/acpi/ipmi.h"
@@ -2562,6 +2563,62 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
build_header(linker, table_data, (void *)(table_data->data + dmar_start),
"DMAR", table_data->len - dmar_start, 1, NULL, NULL);
}
+/*
+ * IVRS table as specified in AMD IOMMU Specification v2.62, Section 5.2
+ * accessible here http://support.amd.com/TechDocs/48882_IOMMU.pdf
+ */
+static void
+build_amd_iommu(GArray *table_data, BIOSLinker *linker)
+{
+ int iommu_start = table_data->len;
+ AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
+
+ /* IVRS header */
+ acpi_data_push(table_data, sizeof(AcpiTableHeader));
+ /* IVinfo - IO virtualization information common to all
+ * IOMMU units in a system
+ */
+ build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+ /* reserved */
+ build_append_int_noprefix(table_data, 0, 8);
+
+ /* IVHD definition - type 10h */
+ build_append_int_noprefix(table_data, 0x10, 1);
+ /* virtualization flags */
+ build_append_int_noprefix(table_data,
+ (1UL << 0) | /* HtTunEn */
+ (1UL << 4) | /* iotblSup */
+ (1UL << 6) | /* PrefSup */
+ (1UL << 7), /* PPRSup */
+ 1);
+ /* IVHD length */
+ build_append_int_noprefix(table_data, 0x24, 2);
+ /* DeviceID */
+ build_append_int_noprefix(table_data, s->devid, 2);
+ /* Capability offset */
+ build_append_int_noprefix(table_data, s->capab_offset, 2);
+ /* IOMMU base address */
+ build_append_int_noprefix(table_data, s->mmio.addr, 8);
+ /* PCI Segment Group */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* IOMMU info */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* IOMMU Feature Reporting */
+ build_append_int_noprefix(table_data,
+ (48UL << 30) | /* HATS */
+ (48UL << 28) | /* GATS */
+ (1UL << 2), /* GTSup */
+ 4);
+ /*
+ * Type 1 device entry reporting all devices
+ * These are 4-byte device entries currently reporting the range of
+ * Refer to Spec - Table 95:IVHD Device Entry Type Codes(4-byte)
+ */
+ build_append_int_noprefix(table_data, 0x0000001, 4);
+
+ build_header(linker, table_data, (void *)(table_data->data + iommu_start),
+ "IVRS", table_data->len - iommu_start, 1, NULL, NULL);
+}
static GArray *
build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset)
@@ -2622,11 +2679,6 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
return true;
}
-static bool acpi_has_iommu(void)
-{
- return !!x86_iommu_get_default();
-}
-
static
void acpi_build(AcpiBuildTables *tables, MachineState *machine)
{
@@ -2706,9 +2758,15 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
acpi_add_table(table_offsets, tables_blob);
build_mcfg_q35(tables_blob, tables->linker, &mcfg);
}
- if (acpi_has_iommu()) {
- acpi_add_table(table_offsets, tables_blob);
- build_dmar_q35(tables_blob, tables->linker);
+ if (x86_iommu_get_default()) {
+ IommuType IOMMUType = x86_iommu_get_type();
+ if (IOMMUType == TYPE_AMD) {
+ acpi_add_table(table_offsets, tables_blob);
+ build_amd_iommu(tables_blob, tables->linker);
+ } else if (IOMMUType == TYPE_INTEL) {
+ acpi_add_table(table_offsets, tables_blob);
+ build_dmar_q35(tables_blob, tables->linker);
+ }
}
if (pcms->acpi_nvdimm_state.is_enabled) {
nvdimm_build_acpi(table_offsets, tables_blob, tables->linker,
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
new file mode 100644
index 0000000000..a91a1798cb
--- /dev/null
+++ b/hw/i386/amd_iommu.c
@@ -0,0 +1,1202 @@
+/*
+ * QEMU emulation of AMD IOMMU (AMD-Vi)
+ *
+ * Copyright (C) 2011 Eduard - Gabriel Munteanu
+ * Copyright (C) 2015 David Kiarie, <davidkiarie4@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Cache implementation inspired by hw/i386/intel_iommu.c
+ */
+#include "qemu/osdep.h"
+#include "hw/i386/amd_iommu.h"
+#include "trace.h"
+
+/* used AMD-Vi MMIO registers */
+const char *amdvi_mmio_low[] = {
+ "AMDVI_MMIO_DEVTAB_BASE",
+ "AMDVI_MMIO_CMDBUF_BASE",
+ "AMDVI_MMIO_EVTLOG_BASE",
+ "AMDVI_MMIO_CONTROL",
+ "AMDVI_MMIO_EXCL_BASE",
+ "AMDVI_MMIO_EXCL_LIMIT",
+ "AMDVI_MMIO_EXT_FEATURES",
+ "AMDVI_MMIO_PPR_BASE",
+ "UNHANDLED"
+};
+const char *amdvi_mmio_high[] = {
+ "AMDVI_MMIO_COMMAND_HEAD",
+ "AMDVI_MMIO_COMMAND_TAIL",
+ "AMDVI_MMIO_EVTLOG_HEAD",
+ "AMDVI_MMIO_EVTLOG_TAIL",
+ "AMDVI_MMIO_STATUS",
+ "AMDVI_MMIO_PPR_HEAD",
+ "AMDVI_MMIO_PPR_TAIL",
+ "UNHANDLED"
+};
+
+struct AMDVIAddressSpace {
+ uint8_t bus_num; /* bus number */
+ uint8_t devfn; /* device function */
+ AMDVIState *iommu_state; /* AMDVI - one per machine */
+ MemoryRegion iommu; /* Device's address translation region */
+ MemoryRegion iommu_ir; /* Device's interrupt remapping region */
+ AddressSpace as; /* device's corresponding address space */
+};
+
+/* AMDVI cache entry */
+typedef struct AMDVIIOTLBEntry {
+ uint16_t domid; /* assigned domain id */
+ uint16_t devid; /* device owning entry */
+ uint64_t perms; /* access permissions */
+ uint64_t translated_addr; /* translated address */
+ uint64_t page_mask; /* physical page size */
+} AMDVIIOTLBEntry;
+
+/* configure MMIO registers at startup/reset */
+static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
+ uint64_t romask, uint64_t w1cmask)
+{
+ stq_le_p(&s->mmior[addr], val);
+ stq_le_p(&s->romask[addr], romask);
+ stq_le_p(&s->w1cmask[addr], w1cmask);
+}
+
+static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
+{
+ return lduw_le_p(&s->mmior[addr]);
+}
+
+static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
+{
+ return ldl_le_p(&s->mmior[addr]);
+}
+
+static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
+{
+ return ldq_le_p(&s->mmior[addr]);
+}
+
+/* internal write */
+static void amdvi_writeq_raw(AMDVIState *s, uint64_t val, hwaddr addr)
+{
+ stq_le_p(&s->mmior[addr], val);
+}
+
+/* external write */
+static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
+{
+ uint16_t romask = lduw_le_p(&s->romask[addr]);
+ uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
+ uint16_t oldval = lduw_le_p(&s->mmior[addr]);
+ stw_le_p(&s->mmior[addr],
+ ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
+{
+ uint32_t romask = ldl_le_p(&s->romask[addr]);
+ uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
+ uint32_t oldval = ldl_le_p(&s->mmior[addr]);
+ stl_le_p(&s->mmior[addr],
+ ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+ uint64_t romask = ldq_le_p(&s->romask[addr]);
+ uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
+ uint32_t oldval = ldq_le_p(&s->mmior[addr]);
+ stq_le_p(&s->mmior[addr],
+ ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+/* OR a 64-bit register with a 64-bit value */
+static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+ return amdvi_readq(s, addr) | val;
+}
+
+/* OR a 64-bit register with a 64-bit value storing result in the register */
+static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+ amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
+}
+
+/* AND a 64-bit register with a 64-bit value storing result in the register */
+static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+ amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
+}
+
+static void amdvi_generate_msi_interrupt(AMDVIState *s)
+{
+ MSIMessage msg;
+ MemTxAttrs attrs;
+
+ attrs.requester_id = pci_requester_id(&s->pci.dev);
+
+ if (msi_enabled(&s->pci.dev)) {
+ msg = msi_get_message(&s->pci.dev, 0);
+ address_space_stl_le(&address_space_memory, msg.address, msg.data,
+ attrs, NULL);
+ }
+}
+
+static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
+{
+ /* event logging not enabled */
+ if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
+ AMDVI_MMIO_STATUS_EVT_OVF)) {
+ return;
+ }
+
+ /* event log buffer full */
+ if (s->evtlog_tail >= s->evtlog_len) {
+ amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
+ /* generate interrupt */
+ amdvi_generate_msi_interrupt(s);
+ return;
+ }
+
+ if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
+ &evt, AMDVI_EVENT_LEN)) {
+ trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
+ }
+
+ s->evtlog_tail += AMDVI_EVENT_LEN;
+ amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
+ amdvi_generate_msi_interrupt(s);
+}
+
+static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
+ int length)
+{
+ int index = start / 64, bitpos = start % 64;
+ uint64_t mask = ((1 << length) - 1) << bitpos;
+ buffer[index] &= ~mask;
+ buffer[index] |= (value << bitpos) & mask;
+}
+/*
+ * AMDVi event structure
+ * 0:15 -> DeviceID
+ * 55:63 -> event type + miscellaneous info
+ * 63:127 -> related address
+ */
+static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
+ uint16_t info)
+{
+ amdvi_setevent_bits(evt, devid, 0, 16);
+ amdvi_setevent_bits(evt, info, 55, 8);
+ amdvi_setevent_bits(evt, addr, 63, 64);
+}
+/* log an error encountered during a page walk
+ *
+ * @addr: virtual address in translation request
+ */
+static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
+ hwaddr addr, uint16_t info)
+{
+ uint64_t evt[4];
+
+ info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
+ amdvi_encode_event(evt, devid, addr, info);
+ amdvi_log_event(s, evt);
+ pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ PCI_STATUS_SIG_TARGET_ABORT);
+}
+/*
+ * log a master abort accessing device table
+ * @devtab : address of device table entry
+ * @info : error flags
+ */
+static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
+ hwaddr devtab, uint16_t info)
+{
+ uint64_t evt[4];
+
+ info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
+
+ amdvi_encode_event(evt, devid, devtab, info);
+ amdvi_log_event(s, evt);
+ pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ PCI_STATUS_SIG_TARGET_ABORT);
+}
+/* log an event trying to access command buffer
+ * @addr : address that couldn't be accessed
+ */
+static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
+{
+ uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR;
+
+ amdvi_encode_event(evt, 0, addr, info);
+ amdvi_log_event(s, evt);
+ pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ PCI_STATUS_SIG_TARGET_ABORT);
+}
+/* log an illegal comand event
+ * @addr : address of illegal command
+ */
+static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
+ hwaddr addr)
+{
+ uint64_t evt[4];
+
+ info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
+ amdvi_encode_event(evt, 0, addr, info);
+ amdvi_log_event(s, evt);
+}
+/* log an error accessing device table
+ *
+ * @devid : device owning the table entry
+ * @devtab : address of device table entry
+ * @info : error flags
+ */
+static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
+ hwaddr addr, uint16_t info)
+{
+ uint64_t evt[4];
+
+ info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
+ amdvi_encode_event(evt, devid, addr, info);
+ amdvi_log_event(s, evt);
+}
+/* log an error accessing a PTE entry
+ * @addr : address that couldn't be accessed
+ */
+static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
+ hwaddr addr, uint16_t info)
+{
+ uint64_t evt[4];
+
+ info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
+ amdvi_encode_event(evt, devid, addr, info);
+ amdvi_log_event(s, evt);
+ pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ PCI_STATUS_SIG_TARGET_ABORT);
+}
+
+static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
+{
+ return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+}
+
+static guint amdvi_uint64_hash(gconstpointer v)
+{
+ return (guint)*(const uint64_t *)v;
+}
+
+static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
+ uint64_t devid)
+{
+ uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
+ ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ return g_hash_table_lookup(s->iotlb, &key);
+}
+
+static void amdvi_iotlb_reset(AMDVIState *s)
+{
+ assert(s->iotlb);
+ trace_amdvi_iotlb_reset();
+ g_hash_table_remove_all(s->iotlb);
+}
+
+static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
+ uint16_t devid = *(uint16_t *)user_data;
+ return entry->devid == devid;
+}
+
+static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
+ uint64_t devid)
+{
+ uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
+ ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ g_hash_table_remove(s->iotlb, &key);
+}
+
+static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
+ uint64_t gpa, IOMMUTLBEntry to_cache,
+ uint16_t domid)
+{
+ AMDVIIOTLBEntry *entry = g_malloc(sizeof(*entry));
+ uint64_t *key = g_malloc(sizeof(key));
+ uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
+
+ /* don't cache erroneous translations */
+ if (to_cache.perm != IOMMU_NONE) {
+ trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid), gpa, to_cache.translated_addr);
+
+ if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
+ amdvi_iotlb_reset(s);
+ }
+
+ entry->domid = domid;
+ entry->perms = to_cache.perm;
+ entry->translated_addr = to_cache.translated_addr;
+ entry->page_mask = to_cache.addr_mask;
+ *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ g_hash_table_replace(s->iotlb, key, entry);
+ }
+}
+
+static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
+{
+ /* pad the last 3 bits */
+ hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
+ uint64_t data = cpu_to_le64(cmd[1]);
+
+ if (extract64(cmd[0], 51, 8)) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+ if (extract64(cmd[0], 0, 1)) {
+ if (dma_memory_write(&address_space_memory, addr, &data,
+ AMDVI_COMPLETION_DATA_SIZE)) {
+ trace_amdvi_completion_wait_fail(addr);
+ }
+ }
+ /* set completion interrupt */
+ if (extract64(cmd[0], 1, 1)) {
+ amdvi_test_mask(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
+ /* generate interrupt */
+ amdvi_generate_msi_interrupt(s);
+ }
+ trace_amdvi_completion_wait(addr, data);
+}
+
+/* log error without aborting since linux seems to be using reserved bits */
+static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
+{
+ uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
+
+ /* This command should invalidate internal caches of which there isn't */
+ if (extract64(cmd[0], 15, 16) || cmd[1]) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+ trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid));
+}
+
+static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
+{
+ if (extract64(cmd[0], 15, 16) || extract64(cmd[0], 19, 8) ||
+ extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
+ || extract64(cmd[1], 47, 16)) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+ trace_amdvi_ppr_exec();
+}
+
+static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
+{
+ if (extract64(cmd[0], 0, 60) || cmd[1]) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+
+ amdvi_iotlb_reset(s);
+ trace_amdvi_all_inval();
+}
+
+static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
+ uint16_t domid = *(uint16_t *)user_data;
+ return entry->domid == domid;
+}
+
+/* we don't have devid - we can't remove pages by address */
+static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
+{
+ uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
+
+ if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 16, 12) ||
+ extract64(cmd[0], 3, 10)) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+
+ g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
+ &domid);
+ trace_amdvi_pages_inval(domid);
+}
+
+static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
+{
+ if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 20, 8) ||
+ extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
+ extract64(cmd[1], 5, 7)) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+
+ trace_amdvi_prefetch_pages();
+}
+
+static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
+{
+ if (extract64(cmd[0], 16, 16) || cmd[1]) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ return;
+ }
+
+ trace_amdvi_intr_inval();
+}
+
+/* FIXME: Try to work with the specified size instead of all the pages
+ * when the S bit is on
+ */
+static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
+{
+
+ uint16_t devid = extract64(cmd[0], 0, 16);
+ if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 9)) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ return;
+ }
+
+ if (extract64(cmd[1], 0, 1)) {
+ g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
+ &devid);
+ } else {
+ amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
+ cpu_to_le16(extract64(cmd[1], 0, 16)));
+ }
+ trace_amdvi_iotlb_inval();
+}
+
+/* not honouring reserved bits is regarded as an illegal command */
+static void amdvi_cmdbuf_exec(AMDVIState *s)
+{
+ uint64_t cmd[2];
+
+ if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
+ cmd, AMDVI_COMMAND_SIZE)) {
+ trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
+ amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
+ return;
+ }
+
+ switch (extract64(cmd[0], 60, 4)) {
+ case AMDVI_CMD_COMPLETION_WAIT:
+ amdvi_completion_wait(s, cmd);
+ break;
+ case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
+ amdvi_inval_devtab_entry(s, cmd);
+ break;
+ case AMDVI_CMD_INVAL_AMDVI_PAGES:
+ amdvi_inval_pages(s, cmd);
+ break;
+ case AMDVI_CMD_INVAL_IOTLB_PAGES:
+ iommu_inval_iotlb(s, cmd);
+ break;
+ case AMDVI_CMD_INVAL_INTR_TABLE:
+ amdvi_inval_inttable(s, cmd);
+ break;
+ case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
+ amdvi_prefetch_pages(s, cmd);
+ break;
+ case AMDVI_CMD_COMPLETE_PPR_REQUEST:
+ amdvi_complete_ppr(s, cmd);
+ break;
+ case AMDVI_CMD_INVAL_AMDVI_ALL:
+ amdvi_inval_all(s, cmd);
+ break;
+ default:
+ trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
+ /* log illegal command */
+ amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+}
+
+static void amdvi_cmdbuf_run(AMDVIState *s)
+{
+ if (!s->cmdbuf_enabled) {
+ trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
+ return;
+ }
+
+ /* check if there is work to do. */
+ while (s->cmdbuf_head != s->cmdbuf_tail) {
+ trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
+ amdvi_cmdbuf_exec(s);
+ s->cmdbuf_head += AMDVI_COMMAND_SIZE;
+ amdvi_writeq_raw(s, s->cmdbuf_head, AMDVI_MMIO_COMMAND_HEAD);
+
+ /* wrap head pointer */
+ if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
+ s->cmdbuf_head = 0;
+ }
+ }
+}
+
+static void amdvi_mmio_trace(hwaddr addr, unsigned size)
+{
+ uint8_t index = (addr & ~0x2000) / 8;
+
+ if ((addr & 0x2000)) {
+ /* high table */
+ index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
+ trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+ } else {
+ index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
+ trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+ }
+}
+
+static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+ AMDVIState *s = opaque;
+
+ uint64_t val = -1;
+ if (addr + size > AMDVI_MMIO_SIZE) {
+ trace_amdvi_mmio_read("error: addr outside region: max ",
+ (uint64_t)AMDVI_MMIO_SIZE, addr, size);
+ return (uint64_t)-1;
+ }
+
+ if (size == 2) {
+ val = amdvi_readw(s, addr);
+ } else if (size == 4) {
+ val = amdvi_readl(s, addr);
+ } else if (size == 8) {
+ val = amdvi_readq(s, addr);
+ }
+ amdvi_mmio_trace(addr, size);
+
+ return val;
+}
+
+static void amdvi_handle_control_write(AMDVIState *s)
+{
+ unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
+ s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
+
+ s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
+ s->evtlog_enabled = s->enabled && !!(control &
+ AMDVI_MMIO_CONTROL_EVENTLOGEN);
+
+ s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
+ s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
+ s->cmdbuf_enabled = s->enabled && !!(control &
+ AMDVI_MMIO_CONTROL_CMDBUFLEN);
+
+ /* update the flags depending on the control register */
+ if (s->cmdbuf_enabled) {
+ amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
+ } else {
+ amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
+ }
+ if (s->evtlog_enabled) {
+ amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
+ } else {
+ amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
+ }
+
+ trace_amdvi_control_status(control);
+ amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_devtab_write(AMDVIState *s)
+
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
+ s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
+
+ /* set device table length */
+ s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
+ (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
+ AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
+}
+
+static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
+{
+ s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
+ & AMDVI_MMIO_CMDBUF_HEAD_MASK;
+ amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
+{
+ s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
+ & AMDVI_MMIO_CMDBUF_BASE_MASK;
+ s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
+ & AMDVI_MMIO_CMDBUF_SIZE_MASK);
+ s->cmdbuf_head = s->cmdbuf_tail = 0;
+}
+
+static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
+{
+ s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
+ & AMDVI_MMIO_CMDBUF_TAIL_MASK;
+ amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_excllim_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
+ s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
+ AMDVI_MMIO_EXCL_LIMIT_LOW;
+}
+
+static inline void amdvi_handle_evtbase_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
+ s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
+ s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
+ & AMDVI_MMIO_EVTLOG_SIZE_MASK);
+}
+
+static inline void amdvi_handle_evttail_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
+ s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
+}
+
+static inline void amdvi_handle_evthead_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
+ s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
+}
+
+static inline void amdvi_handle_pprbase_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
+ s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
+ s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
+ & AMDVI_MMIO_PPRLOG_SIZE_MASK);
+}
+
+static inline void amdvi_handle_pprhead_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
+ s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
+}
+
+static inline void amdvi_handle_pprtail_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
+ s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
+}
+
+/* FIXME: something might go wrong if System Software writes in chunks
+ * of one byte but linux writes in chunks of 4 bytes so currently it
+ * works correctly with linux but will definitely be busted if software
+ * reads/writes 8 bytes
+ */
+static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
+ hwaddr addr)
+{
+ if (size == 2) {
+ amdvi_writew(s, addr, val);
+ } else if (size == 4) {
+ amdvi_writel(s, addr, val);
+ } else if (size == 8) {
+ amdvi_writeq(s, addr, val);
+ }
+}
+
+static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned size)
+{
+ AMDVIState *s = opaque;
+ unsigned long offset = addr & 0x07;
+
+ if (addr + size > AMDVI_MMIO_SIZE) {
+ trace_amdvi_mmio_write("error: addr outside region: max ",
+ (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
+ return;
+ }
+
+ amdvi_mmio_trace(addr, size);
+ switch (addr & ~0x07) {
+ case AMDVI_MMIO_CONTROL:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_control_write(s);
+ break;
+ case AMDVI_MMIO_DEVICE_TABLE:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ /* set device table address
+ * This also suffers from inability to tell whether software
+ * is done writing
+ */
+ if (offset || (size == 8)) {
+ amdvi_handle_devtab_write(s);
+ }
+ break;
+ case AMDVI_MMIO_COMMAND_HEAD:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_cmdhead_write(s);
+ break;
+ case AMDVI_MMIO_COMMAND_BASE:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ /* FIXME - make sure System Software has finished writing incase
+ * it writes in chucks less than 8 bytes in a robust way.As for
+ * now, this hacks works for the linux driver
+ */
+ if (offset || (size == 8)) {
+ amdvi_handle_cmdbase_write(s);
+ }
+ break;
+ case AMDVI_MMIO_COMMAND_TAIL:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_cmdtail_write(s);
+ break;
+ case AMDVI_MMIO_EVENT_BASE:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_evtbase_write(s);
+ break;
+ case AMDVI_MMIO_EVENT_HEAD:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_evthead_write(s);
+ break;
+ case AMDVI_MMIO_EVENT_TAIL:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_evttail_write(s);
+ break;
+ case AMDVI_MMIO_EXCL_LIMIT:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_excllim_write(s);
+ break;
+ /* PPR log base - unused for now */
+ case AMDVI_MMIO_PPR_BASE:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_pprbase_write(s);
+ break;
+ /* PPR log head - also unused for now */
+ case AMDVI_MMIO_PPR_HEAD:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_pprhead_write(s);
+ break;
+ /* PPR log tail - unused for now */
+ case AMDVI_MMIO_PPR_TAIL:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_pprtail_write(s);
+ break;
+ }
+}
+
+static inline uint64_t amdvi_get_perms(uint64_t entry)
+{
+ return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
+ AMDVI_DEV_PERM_SHIFT;
+}
+
+/* a valid entry should have V = 1 and reserved bits honoured */
+static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
+ uint64_t *dte)
+{
+ if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED)
+ || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED)
+ || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) {
+ amdvi_log_illegaldevtab_error(s, devid,
+ s->devtab +
+ devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
+ return false;
+ }
+
+ return dte[0] & AMDVI_DEV_VALID;
+}
+
+/* get a device table entry given the devid */
+static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
+{
+ uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
+
+ if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
+ AMDVI_DEVTAB_ENTRY_SIZE)) {
+ trace_amdvi_dte_get_fail(s->devtab, offset);
+ /* log error accessing dte */
+ amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
+ return false;
+ }
+
+ *entry = le64_to_cpu(*entry);
+ if (!amdvi_validate_dte(s, devid, entry)) {
+ trace_amdvi_invalid_dte(entry[0]);
+ return false;
+ }
+
+ return true;
+}
+
+/* get pte translation mode */
+static inline uint8_t get_pte_translation_mode(uint64_t pte)
+{
+ return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
+}
+
+static inline uint64_t pte_override_page_mask(uint64_t pte)
+{
+ uint8_t page_mask = 12;
+ uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) ^ AMDVI_DEV_PT_ROOT_MASK;
+ /* find the first zero bit */
+ while (addr & 1) {
+ page_mask++;
+ addr = addr >> 1;
+ }
+
+ return ~((1ULL << page_mask) - 1);
+}
+
+static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
+{
+ return ~((1UL << ((oldlevel * 9) + 3)) - 1);
+}
+
+static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
+ uint16_t devid)
+{
+ uint64_t pte;
+
+ if (dma_memory_read(&address_space_memory, pte_addr, &pte, sizeof(pte))) {
+ trace_amdvi_get_pte_hwerror(pte_addr);
+ amdvi_log_pagetab_error(s, devid, pte_addr, 0);
+ pte = 0;
+ return pte;
+ }
+
+ pte = le64_to_cpu(pte);
+ return pte;
+}
+
+static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
+ IOMMUTLBEntry *ret, unsigned perms,
+ hwaddr addr)
+{
+ unsigned level, present, pte_perms, oldlevel;
+ uint64_t pte = dte[0], pte_addr, page_mask;
+
+ /* make sure the DTE has TV = 1 */
+ if (pte & AMDVI_DEV_TRANSLATION_VALID) {
+ level = get_pte_translation_mode(pte);
+ if (level >= 7) {
+ trace_amdvi_mode_invalid(level, addr);
+ return;
+ }
+ if (level == 0) {
+ goto no_remap;
+ }
+
+ /* we are at the leaf page table or page table encodes a huge page */
+ while (level > 0) {
+ pte_perms = amdvi_get_perms(pte);
+ present = pte & 1;
+ if (!present || perms != (perms & pte_perms)) {
+ amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
+ trace_amdvi_page_fault(addr);
+ return;
+ }
+
+ /* go to the next lower level */
+ pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
+ /* add offset and load pte */
+ pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
+ pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
+ if (!pte) {
+ return;
+ }
+ oldlevel = level;
+ level = get_pte_translation_mode(pte);
+ if (level == 0x7) {
+ break;
+ }
+ }
+
+ if (level == 0x7) {
+ page_mask = pte_override_page_mask(pte);
+ } else {
+ page_mask = pte_get_page_mask(oldlevel);
+ }
+
+ /* get access permissions from pte */
+ ret->iova = addr & page_mask;
+ ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
+ ret->addr_mask = ~page_mask;
+ ret->perm = amdvi_get_perms(pte);
+ return;
+ }
+no_remap:
+ ret->iova = addr & AMDVI_PAGE_MASK_4K;
+ ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
+ ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
+ ret->perm = amdvi_get_perms(pte);
+}
+
+static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
+ bool is_write, IOMMUTLBEntry *ret)
+{
+ AMDVIState *s = as->iommu_state;
+ uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
+ AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
+ uint64_t entry[4];
+
+ if (iotlb_entry) {
+ trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
+ ret->iova = addr & ~iotlb_entry->page_mask;
+ ret->translated_addr = iotlb_entry->translated_addr;
+ ret->addr_mask = iotlb_entry->page_mask;
+ ret->perm = iotlb_entry->perms;
+ return;
+ }
+
+ /* devices with V = 0 are not translated */
+ if (!amdvi_get_dte(s, devid, entry)) {
+ goto out;
+ }
+
+ amdvi_page_walk(as, entry, ret,
+ is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
+
+ amdvi_update_iotlb(s, devid, addr, *ret,
+ entry[1] & AMDVI_DEV_DOMID_ID_MASK);
+ return;
+
+out:
+ ret->iova = addr & AMDVI_PAGE_MASK_4K;
+ ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
+ ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
+ ret->perm = IOMMU_RW;
+}
+
+static inline bool amdvi_is_interrupt_addr(hwaddr addr)
+{
+ return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
+}
+
+static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
+ bool is_write)
+{
+ AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
+ AMDVIState *s = as->iommu_state;
+ IOMMUTLBEntry ret = {
+ .target_as = &address_space_memory,
+ .iova = addr,
+ .translated_addr = 0,
+ .addr_mask = ~(hwaddr)0,
+ .perm = IOMMU_NONE
+ };
+
+ if (!s->enabled) {
+ /* AMDVI disabled - corresponds to iommu=off not
+ * failure to provide any parameter
+ */
+ ret.iova = addr & AMDVI_PAGE_MASK_4K;
+ ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
+ ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
+ ret.perm = IOMMU_RW;
+ return ret;
+ } else if (amdvi_is_interrupt_addr(addr)) {
+ ret.iova = addr & AMDVI_PAGE_MASK_4K;
+ ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
+ ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
+ ret.perm = IOMMU_WO;
+ return ret;
+ }
+
+ amdvi_do_translate(as, addr, is_write, &ret);
+ trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
+ PCI_FUNC(as->devfn), addr, ret.translated_addr);
+ return ret;
+}
+
+static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+ AMDVIState *s = opaque;
+ AMDVIAddressSpace **iommu_as;
+ int bus_num = pci_bus_num(bus);
+
+ iommu_as = s->address_spaces[bus_num];
+
+ /* allocate memory during the first run */
+ if (!iommu_as) {
+ iommu_as = g_malloc0(sizeof(AMDVIAddressSpace *) * PCI_DEVFN_MAX);
+ s->address_spaces[bus_num] = iommu_as;
+ }
+
+ /* set up AMD-Vi region */
+ if (!iommu_as[devfn]) {
+ iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace));
+ iommu_as[devfn]->bus_num = (uint8_t)bus_num;
+ iommu_as[devfn]->devfn = (uint8_t)devfn;
+ iommu_as[devfn]->iommu_state = s;
+
+ memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s),
+ &s->iommu_ops, "amd-iommu", UINT64_MAX);
+ address_space_init(&iommu_as[devfn]->as, &iommu_as[devfn]->iommu,
+ "amd-iommu");
+ }
+ return &iommu_as[devfn]->as;
+}
+
+static const MemoryRegionOps mmio_mem_ops = {
+ .read = amdvi_mmio_read,
+ .write = amdvi_mmio_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ }
+};
+
+static void amdvi_iommu_notify_started(MemoryRegion *iommu)
+{
+ AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
+
+ hw_error("device %02x.%02x.%x requires iommu notifier which is not "
+ "currently supported", as->bus_num, PCI_SLOT(as->devfn),
+ PCI_FUNC(as->devfn));
+}
+
+static void amdvi_init(AMDVIState *s)
+{
+ amdvi_iotlb_reset(s);
+
+ s->iommu_ops.translate = amdvi_translate;
+ s->iommu_ops.notify_started = amdvi_iommu_notify_started;
+ s->devtab_len = 0;
+ s->cmdbuf_len = 0;
+ s->cmdbuf_head = 0;
+ s->cmdbuf_tail = 0;
+ s->evtlog_head = 0;
+ s->evtlog_tail = 0;
+ s->excl_enabled = false;
+ s->excl_allow = false;
+ s->mmio_enabled = false;
+ s->enabled = false;
+ s->ats_enabled = false;
+ s->cmdbuf_enabled = false;
+
+ /* reset MMIO */
+ memset(s->mmior, 0, AMDVI_MMIO_SIZE);
+ amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
+ 0xffffffffffffffef, 0);
+ amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
+
+ /* reset device ident */
+ pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD);
+ pci_config_set_prog_interface(s->pci.dev.config, 00);
+ pci_config_set_device_id(s->pci.dev.config, s->devid);
+ pci_config_set_class(s->pci.dev.config, 0x0806);
+
+ /* reset AMDVI specific capabilities, all r/o */
+ pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES);
+ pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
+ s->mmio.addr & ~(0xffff0000));
+ pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
+ (s->mmio.addr & ~(0xffff)) >> 16);
+ pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE,
+ 0xff000000);
+ pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
+ pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC,
+ AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
+}
+
+static void amdvi_reset(DeviceState *dev)
+{
+ AMDVIState *s = AMD_IOMMU_DEVICE(dev);
+
+ msi_reset(&s->pci.dev);
+ amdvi_init(s);
+}
+
+static void amdvi_realize(DeviceState *dev, Error **err)
+{
+ AMDVIState *s = AMD_IOMMU_DEVICE(dev);
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
+ PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
+ s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
+ amdvi_uint64_equal, g_free, g_free);
+
+ /* This device should take care of IOMMU PCI properties */
+ x86_iommu->type = TYPE_AMD;
+ qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus);
+ object_property_set_bool(OBJECT(&s->pci), true, "realized", err);
+ s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
+ AMDVI_CAPAB_SIZE);
+ pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, AMDVI_CAPAB_REG_SIZE);
+ pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, AMDVI_CAPAB_REG_SIZE);
+
+ /* set up MMIO */
+ memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
+ AMDVI_MMIO_SIZE);
+
+ sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
+ sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
+ pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+ s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
+ msi_init(&s->pci.dev, 0, 1, true, false, err);
+ amdvi_init(s);
+}
+
+static const VMStateDescription vmstate_amdvi = {
+ .name = "amd-iommu",
+ .unmigratable = 1
+};
+
+static void amdvi_instance_init(Object *klass)
+{
+ AMDVIState *s = AMD_IOMMU_DEVICE(klass);
+
+ object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
+}
+
+static void amdvi_class_init(ObjectClass *klass, void* data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ X86IOMMUClass *dc_class = X86_IOMMU_CLASS(klass);
+
+ dc->reset = amdvi_reset;
+ dc->vmsd = &vmstate_amdvi;
+ dc->hotpluggable = false;
+ dc_class->realize = amdvi_realize;
+}
+
+static const TypeInfo amdvi = {
+ .name = TYPE_AMD_IOMMU_DEVICE,
+ .parent = TYPE_X86_IOMMU_DEVICE,
+ .instance_size = sizeof(AMDVIState),
+ .instance_init = amdvi_instance_init,
+ .class_init = amdvi_class_init
+};
+
+static const TypeInfo amdviPCI = {
+ .name = "AMDVI-PCI",
+ .parent = TYPE_PCI_DEVICE,
+ .instance_size = sizeof(AMDVIPCIState),
+};
+
+static void amdviPCI_register_types(void)
+{
+ type_register_static(&amdviPCI);
+ type_register_static(&amdvi);
+}
+
+type_init(amdviPCI_register_types);
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
new file mode 100644
index 0000000000..884926e9e7
--- /dev/null
+++ b/hw/i386/amd_iommu.h
@@ -0,0 +1,289 @@
+/*
+ * QEMU emulation of an AMD IOMMU (AMD-Vi)
+ *
+ * Copyright (C) 2011 Eduard - Gabriel Munteanu
+ * Copyright (C) 2015 David Kiarie, <davidkiarie4@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef AMD_IOMMU_H_
+#define AMD_IOMMU_H_
+
+#include "hw/hw.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
+#include "hw/sysbus.h"
+#include "sysemu/dma.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/i386/x86-iommu.h"
+
+/* Capability registers */
+#define AMDVI_CAPAB_BAR_LOW 0x04
+#define AMDVI_CAPAB_BAR_HIGH 0x08
+#define AMDVI_CAPAB_RANGE 0x0C
+#define AMDVI_CAPAB_MISC 0x10
+
+#define AMDVI_CAPAB_SIZE 0x18
+#define AMDVI_CAPAB_REG_SIZE 0x04
+
+/* Capability header data */
+#define AMDVI_CAPAB_ID_SEC 0xf
+#define AMDVI_CAPAB_FLAT_EXT (1 << 28)
+#define AMDVI_CAPAB_EFR_SUP (1 << 27)
+#define AMDVI_CAPAB_FLAG_NPCACHE (1 << 26)
+#define AMDVI_CAPAB_FLAG_HTTUNNEL (1 << 25)
+#define AMDVI_CAPAB_FLAG_IOTLBSUP (1 << 24)
+#define AMDVI_CAPAB_INIT_TYPE (3 << 16)
+
+/* No. of used MMIO registers */
+#define AMDVI_MMIO_REGS_HIGH 8
+#define AMDVI_MMIO_REGS_LOW 7
+
+/* MMIO registers */
+#define AMDVI_MMIO_DEVICE_TABLE 0x0000
+#define AMDVI_MMIO_COMMAND_BASE 0x0008
+#define AMDVI_MMIO_EVENT_BASE 0x0010
+#define AMDVI_MMIO_CONTROL 0x0018
+#define AMDVI_MMIO_EXCL_BASE 0x0020
+#define AMDVI_MMIO_EXCL_LIMIT 0x0028
+#define AMDVI_MMIO_EXT_FEATURES 0x0030
+#define AMDVI_MMIO_COMMAND_HEAD 0x2000
+#define AMDVI_MMIO_COMMAND_TAIL 0x2008
+#define AMDVI_MMIO_EVENT_HEAD 0x2010
+#define AMDVI_MMIO_EVENT_TAIL 0x2018
+#define AMDVI_MMIO_STATUS 0x2020
+#define AMDVI_MMIO_PPR_BASE 0x0038
+#define AMDVI_MMIO_PPR_HEAD 0x2030
+#define AMDVI_MMIO_PPR_TAIL 0x2038
+
+#define AMDVI_MMIO_SIZE 0x4000
+
+#define AMDVI_MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1)
+#define AMDVI_MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & ~ \
+ AMDVI_MMIO_DEVTAB_SIZE_MASK)
+#define AMDVI_MMIO_DEVTAB_ENTRY_SIZE 32
+#define AMDVI_MMIO_DEVTAB_SIZE_UNIT 4096
+
+/* some of this are similar but just for readability */
+#define AMDVI_MMIO_CMDBUF_SIZE_BYTE (AMDVI_MMIO_COMMAND_BASE + 7)
+#define AMDVI_MMIO_CMDBUF_SIZE_MASK 0x0f
+#define AMDVI_MMIO_CMDBUF_BASE_MASK AMDVI_MMIO_DEVTAB_BASE_MASK
+#define AMDVI_MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f)
+#define AMDVI_MMIO_CMDBUF_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
+
+#define AMDVI_MMIO_EVTLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7)
+#define AMDVI_MMIO_EVTLOG_SIZE_MASK AMDVI_MMIO_CMDBUF_SIZE_MASK
+#define AMDVI_MMIO_EVTLOG_BASE_MASK AMDVI_MMIO_CMDBUF_BASE_MASK
+#define AMDVI_MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f)
+#define AMDVI_MMIO_EVTLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
+
+#define AMDVI_MMIO_PPRLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7)
+#define AMDVI_MMIO_PPRLOG_HEAD_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
+#define AMDVI_MMIO_PPRLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
+#define AMDVI_MMIO_PPRLOG_BASE_MASK AMDVI_MMIO_EVTLOG_BASE_MASK
+#define AMDVI_MMIO_PPRLOG_SIZE_MASK AMDVI_MMIO_EVTLOG_SIZE_MASK
+
+#define AMDVI_MMIO_EXCL_ENABLED_MASK (1ULL << 0)
+#define AMDVI_MMIO_EXCL_ALLOW_MASK (1ULL << 1)
+#define AMDVI_MMIO_EXCL_LIMIT_MASK AMDVI_MMIO_DEVTAB_BASE_MASK
+#define AMDVI_MMIO_EXCL_LIMIT_LOW 0xfff
+
+/* mmio control register flags */
+#define AMDVI_MMIO_CONTROL_AMDVIEN (1ULL << 0)
+#define AMDVI_MMIO_CONTROL_HTTUNEN (1ULL << 1)
+#define AMDVI_MMIO_CONTROL_EVENTLOGEN (1ULL << 2)
+#define AMDVI_MMIO_CONTROL_EVENTINTEN (1ULL << 3)
+#define AMDVI_MMIO_CONTROL_COMWAITINTEN (1ULL << 4)
+#define AMDVI_MMIO_CONTROL_CMDBUFLEN (1ULL << 12)
+
+/* MMIO status register bits */
+#define AMDVI_MMIO_STATUS_CMDBUF_RUN (1 << 4)
+#define AMDVI_MMIO_STATUS_EVT_RUN (1 << 3)
+#define AMDVI_MMIO_STATUS_COMP_INT (1 << 2)
+#define AMDVI_MMIO_STATUS_EVT_OVF (1 << 0)
+
+#define AMDVI_CMDBUF_ID_BYTE 0x07
+#define AMDVI_CMDBUF_ID_RSHIFT 4
+
+#define AMDVI_CMD_COMPLETION_WAIT 0x01
+#define AMDVI_CMD_INVAL_DEVTAB_ENTRY 0x02
+#define AMDVI_CMD_INVAL_AMDVI_PAGES 0x03
+#define AMDVI_CMD_INVAL_IOTLB_PAGES 0x04
+#define AMDVI_CMD_INVAL_INTR_TABLE 0x05
+#define AMDVI_CMD_PREFETCH_AMDVI_PAGES 0x06
+#define AMDVI_CMD_COMPLETE_PPR_REQUEST 0x07
+#define AMDVI_CMD_INVAL_AMDVI_ALL 0x08
+
+#define AMDVI_DEVTAB_ENTRY_SIZE 32
+
+/* Device table entry bits 0:63 */
+#define AMDVI_DEV_VALID (1ULL << 0)
+#define AMDVI_DEV_TRANSLATION_VALID (1ULL << 1)
+#define AMDVI_DEV_MODE_MASK 0x7
+#define AMDVI_DEV_MODE_RSHIFT 9
+#define AMDVI_DEV_PT_ROOT_MASK 0xffffffffff000
+#define AMDVI_DEV_PT_ROOT_RSHIFT 12
+#define AMDVI_DEV_PERM_SHIFT 61
+#define AMDVI_DEV_PERM_READ (1ULL << 61)
+#define AMDVI_DEV_PERM_WRITE (1ULL << 62)
+
+/* Device table entry bits 64:127 */
+#define AMDVI_DEV_DOMID_ID_MASK ((1ULL << 16) - 1)
+
+/* Event codes and flags, as stored in the info field */
+#define AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY (0x1U << 12)
+#define AMDVI_EVENT_IOPF (0x2U << 12)
+#define AMDVI_EVENT_IOPF_I (1U << 3)
+#define AMDVI_EVENT_DEV_TAB_HW_ERROR (0x3U << 12)
+#define AMDVI_EVENT_PAGE_TAB_HW_ERROR (0x4U << 12)
+#define AMDVI_EVENT_ILLEGAL_COMMAND_ERROR (0x5U << 12)
+#define AMDVI_EVENT_COMMAND_HW_ERROR (0x6U << 12)
+
+#define AMDVI_EVENT_LEN 16
+#define AMDVI_PERM_READ (1 << 0)
+#define AMDVI_PERM_WRITE (1 << 1)
+
+#define AMDVI_FEATURE_PREFETCH (1ULL << 0) /* page prefetch */
+#define AMDVI_FEATURE_PPR (1ULL << 1) /* PPR Support */
+#define AMDVI_FEATURE_GT (1ULL << 4) /* Guest Translation */
+#define AMDVI_FEATURE_IA (1ULL << 6) /* inval all support */
+#define AMDVI_FEATURE_GA (1ULL << 7) /* guest VAPIC support */
+#define AMDVI_FEATURE_HE (1ULL << 8) /* hardware error regs */
+#define AMDVI_FEATURE_PC (1ULL << 9) /* Perf counters */
+
+/* reserved DTE bits */
+#define AMDVI_DTE_LOWER_QUAD_RESERVED 0x80300000000000fc
+#define AMDVI_DTE_MIDDLE_QUAD_RESERVED 0x0000000000000100
+#define AMDVI_DTE_UPPER_QUAD_RESERVED 0x08f0000000000000
+
+/* AMDVI paging mode */
+#define AMDVI_GATS_MODE (6ULL << 12)
+#define AMDVI_HATS_MODE (6ULL << 10)
+
+/* IOTLB */
+#define AMDVI_IOTLB_MAX_SIZE 1024
+#define AMDVI_DEVID_SHIFT 36
+
+/* extended feature support */
+#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
+ AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
+ AMDVI_GATS_MODE | AMDVI_HATS_MODE)
+
+/* capabilities header */
+#define AMDVI_CAPAB_FEATURES (AMDVI_CAPAB_FLAT_EXT | \
+ AMDVI_CAPAB_FLAG_NPCACHE | AMDVI_CAPAB_FLAG_IOTLBSUP \
+ | AMDVI_CAPAB_ID_SEC | AMDVI_CAPAB_INIT_TYPE | \
+ AMDVI_CAPAB_FLAG_HTTUNNEL | AMDVI_CAPAB_EFR_SUP)
+
+/* AMDVI default address */
+#define AMDVI_BASE_ADDR 0xfed80000
+
+/* page management constants */
+#define AMDVI_PAGE_SHIFT 12
+#define AMDVI_PAGE_SIZE (1ULL << AMDVI_PAGE_SHIFT)
+
+#define AMDVI_PAGE_SHIFT_4K 12
+#define AMDVI_PAGE_MASK_4K (~((1ULL << AMDVI_PAGE_SHIFT_4K) - 1))
+
+#define AMDVI_MAX_VA_ADDR (48UL << 5)
+#define AMDVI_MAX_PH_ADDR (40UL << 8)
+#define AMDVI_MAX_GVA_ADDR (48UL << 15)
+
+/* Completion Wait data size */
+#define AMDVI_COMPLETION_DATA_SIZE 8
+
+#define AMDVI_COMMAND_SIZE 16
+/* Completion Wait data size */
+#define AMDVI_COMPLETION_DATA_SIZE 8
+
+#define AMDVI_COMMAND_SIZE 16
+
+#define AMDVI_INT_ADDR_FIRST 0xfee00000
+#define AMDVI_INT_ADDR_LAST 0xfeefffff
+
+#define TYPE_AMD_IOMMU_DEVICE "amd-iommu"
+#define AMD_IOMMU_DEVICE(obj)\
+ OBJECT_CHECK(AMDVIState, (obj), TYPE_AMD_IOMMU_DEVICE)
+
+#define TYPE_AMD_IOMMU_PCI "AMDVI-PCI"
+
+typedef struct AMDVIAddressSpace AMDVIAddressSpace;
+
+/* functions to steal PCI config space */
+typedef struct AMDVIPCIState {
+ PCIDevice dev; /* The PCI device itself */
+} AMDVIPCIState;
+
+typedef struct AMDVIState {
+ X86IOMMUState iommu; /* IOMMU bus device */
+ AMDVIPCIState pci; /* IOMMU PCI device */
+
+ uint32_t version;
+ uint32_t capab_offset; /* capability offset pointer */
+
+ uint64_t mmio_addr;
+
+ uint32_t devid; /* auto-assigned devid */
+
+ bool enabled; /* IOMMU enabled */
+ bool ats_enabled; /* address translation enabled */
+ bool cmdbuf_enabled; /* command buffer enabled */
+ bool evtlog_enabled; /* event log enabled */
+ bool excl_enabled;
+
+ hwaddr devtab; /* base address device table */
+ size_t devtab_len; /* device table length */
+
+ hwaddr cmdbuf; /* command buffer base address */
+ uint64_t cmdbuf_len; /* command buffer length */
+ uint32_t cmdbuf_head; /* current IOMMU read position */
+ uint32_t cmdbuf_tail; /* next Software write position */
+ bool completion_wait_intr;
+
+ hwaddr evtlog; /* base address event log */
+ bool evtlog_intr;
+ uint32_t evtlog_len; /* event log length */
+ uint32_t evtlog_head; /* current IOMMU write position */
+ uint32_t evtlog_tail; /* current Software read position */
+
+ /* unused for now */
+ hwaddr excl_base; /* base DVA - IOMMU exclusion range */
+ hwaddr excl_limit; /* limit of IOMMU exclusion range */
+ bool excl_allow; /* translate accesses to the exclusion range */
+ bool excl_enable; /* exclusion range enabled */
+
+ hwaddr ppr_log; /* base address ppr log */
+ uint32_t pprlog_len; /* ppr log len */
+ uint32_t pprlog_head; /* ppr log head */
+ uint32_t pprlog_tail; /* ppr log tail */
+
+ MemoryRegion mmio; /* MMIO region */
+ uint8_t mmior[AMDVI_MMIO_SIZE]; /* read/write MMIO */
+ uint8_t w1cmask[AMDVI_MMIO_SIZE]; /* read/write 1 clear mask */
+ uint8_t romask[AMDVI_MMIO_SIZE]; /* MMIO read/only mask */
+ bool mmio_enabled;
+
+ /* IOMMU function */
+ MemoryRegionIOMMUOps iommu_ops;
+
+ /* for each served device */
+ AMDVIAddressSpace **address_spaces[PCI_BUS_MAX];
+
+ /* IOTLB */
+ GHashTable *iotlb;
+} AMDVIState;
+
+#endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 28c31a2cdf..d6e02c821a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2453,6 +2453,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
VTD_DPRINTF(GENERAL, "");
+ x86_iommu->type = TYPE_INTEL;
memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
"intel_iommu", DMAR_REG_SIZE);
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 5b99eba7b9..1938b988d9 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -13,3 +13,32 @@ mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64
# hw/i386/x86-iommu.c
x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
+
+# hw/i386/amd_iommu.c
+amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32
+amdvi_cache_update(uint16_t domid, uint8_t bus, uint8_t slot, uint8_t func, uint64_t gpa, uint64_t txaddr) " update iotlb domid 0x%"PRIx16" devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
+amdvi_completion_wait_fail(uint64_t addr) "error: fail to write at address 0x%"PRIx64
+amdvi_mmio_write(const char *reg, uint64_t addr, unsigned size, uint64_t val, uint64_t offset) "%s write addr 0x%"PRIx64", size %u, val 0x%"PRIx64", offset 0x%"PRIx64
+amdvi_mmio_read(const char *reg, uint64_t addr, unsigned size, uint64_t offset) "%s read addr 0x%"PRIx64", size %u offset 0x%"PRIx64
+amdvi_command_error(uint64_t status) "error: Executing commands with command buffer disabled 0x%"PRIx64
+amdvi_command_read_fail(uint64_t addr, uint32_t head) "error: fail to access memory at 0x%"PRIx64" + 0x%"PRIx32
+amdvi_command_exec(uint32_t head, uint32_t tail, uint64_t buf) "command buffer head at 0x%"PRIx32" command buffer tail at 0x%"PRIx32" command buffer base at 0x%"PRIx64
+amdvi_unhandled_command(uint8_t type) "unhandled command 0x%"PRIx8
+amdvi_intr_inval(void) "Interrupt table invalidated"
+amdvi_iotlb_inval(void) "IOTLB pages invalidated"
+amdvi_prefetch_pages(void) "Pre-fetch of AMD-Vi pages requested"
+amdvi_pages_inval(uint16_t domid) "AMD-Vi pages for domain 0x%"PRIx16 " invalidated"
+amdvi_all_inval(void) "Invalidation of all AMD-Vi cache requested "
+amdvi_ppr_exec(void) "Execution of PPR queue requested "
+amdvi_devtab_inval(uint8_t bus, uint8_t slot, uint8_t func) "device table entry for devid: %02x:%02x.%x invalidated"
+amdvi_completion_wait(uint64_t addr, uint64_t data) "completion wait requested with store address 0x%"PRIx64" and store data 0x%"PRIx64
+amdvi_control_status(uint64_t val) "MMIO_STATUS state 0x%"PRIx64
+amdvi_iotlb_reset(void) "IOTLB exceed size limit - reset "
+amdvi_completion_wait_exec(uint64_t addr, uint64_t data) "completion wait requested with store address 0x%"PRIx64" and store data 0x%"PRIx64
+amdvi_dte_get_fail(uint64_t addr, uint32_t offset) "error: failed to access Device Entry devtab 0x%"PRIx64" offset 0x%"PRIx32
+amdvi_invalid_dte(uint64_t addr) "PTE entry at 0x%"PRIx64" is invalid "
+amdvi_get_pte_hwerror(uint64_t addr) "hardware error eccessing PTE at addr 0x%"PRIx64
+amdvi_mode_invalid(uint8_t level, uint64_t addr)"error: translation level 0x%"PRIx8" translating addr 0x%"PRIx64
+amdvi_page_fault(uint64_t addr) "error: page fault accessing guest physical address 0x%"PRIx64
+amdvi_iotlb_hit(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "hit iotlb devid %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
+amdvi_translation_result(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index ce26b2a71d..2278af7c32 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -71,6 +71,11 @@ X86IOMMUState *x86_iommu_get_default(void)
return x86_iommu_default;
}
+IommuType x86_iommu_get_type(void)
+{
+ return x86_iommu_default->type;
+}
+
static void x86_iommu_realize(DeviceState *dev, Error **errp)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
@@ -79,6 +84,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
if (x86_class->realize) {
x86_class->realize(dev, errp);
}
+
x86_iommu_set_default(X86_IOMMU_DEVICE(dev));
}
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index fcf3358d6c..18ce333457 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -303,6 +303,10 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
virtqueue_unmap_sg(vq, elem, len);
+ if (unlikely(vq->vdev->broken)) {
+ return;
+ }
+
idx = (idx + vq->used_idx) % vq->vring.num;
uelem.id = elem->index;
@@ -313,6 +317,12 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
uint16_t old, new;
+
+ if (unlikely(vq->vdev->broken)) {
+ vq->inuse -= count;
+ return;
+ }
+
/* Make sure buffer is written before we update index. */
smp_wmb();
trace_virtqueue_flush(vq, count);
@@ -337,9 +347,9 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
/* Check it isn't doing very strange things with descriptor numbers. */
if (num_heads > vq->vring.num) {
- error_report("Guest moved used index from %u to %u",
+ virtio_error(vq->vdev, "Guest moved used index from %u to %u",
idx, vq->shadow_avail_idx);
- exit(1);
+ return -EINVAL;
}
/* On success, callers read a descriptor at vq->last_avail_idx.
* Make sure descriptor read does not bypass avail index read. */
@@ -350,45 +360,49 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
return num_heads;
}
-static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
+static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
+ unsigned int *head)
{
- unsigned int head;
-
/* Grab the next descriptor number they're advertising, and increment
* the index we've seen. */
- head = vring_avail_ring(vq, idx % vq->vring.num);
+ *head = vring_avail_ring(vq, idx % vq->vring.num);
/* If their number is silly, that's a fatal mistake. */
- if (head >= vq->vring.num) {
- error_report("Guest says index %u is available", head);
- exit(1);
+ if (*head >= vq->vring.num) {
+ virtio_error(vq->vdev, "Guest says index %u is available", *head);
+ return false;
}
- return head;
+ return true;
}
-static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
- hwaddr desc_pa, unsigned int max)
-{
- unsigned int next;
+enum {
+ VIRTQUEUE_READ_DESC_ERROR = -1,
+ VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
+ VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
+};
+static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
+ hwaddr desc_pa, unsigned int max,
+ unsigned int *next)
+{
/* If this descriptor says it doesn't chain, we're done. */
if (!(desc->flags & VRING_DESC_F_NEXT)) {
- return max;
+ return VIRTQUEUE_READ_DESC_DONE;
}
/* Check they're not leading us off end of descriptors. */
- next = desc->next;
+ *next = desc->next;
/* Make sure compiler knows to grab that: we don't want it changing! */
smp_wmb();
- if (next >= max) {
- error_report("Desc next is %u", next);
- exit(1);
+ if (*next >= max) {
+ virtio_error(vdev, "Desc next is %u", *next);
+ return VIRTQUEUE_READ_DESC_ERROR;
}
- vring_desc_read(vdev, desc, desc_pa, next);
- return next;
+ vring_desc_read(vdev, desc, desc_pa, *next);
+ return VIRTQUEUE_READ_DESC_MORE;
}
void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
@@ -397,33 +411,38 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
{
unsigned int idx;
unsigned int total_bufs, in_total, out_total;
+ int rc;
idx = vq->last_avail_idx;
total_bufs = in_total = out_total = 0;
- while (virtqueue_num_heads(vq, idx)) {
+ while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
VirtIODevice *vdev = vq->vdev;
unsigned int max, num_bufs, indirect = 0;
VRingDesc desc;
hwaddr desc_pa;
- int i;
+ unsigned int i;
max = vq->vring.num;
num_bufs = total_bufs;
- i = virtqueue_get_head(vq, idx++);
+
+ if (!virtqueue_get_head(vq, idx++, &i)) {
+ goto err;
+ }
+
desc_pa = vq->vring.desc;
vring_desc_read(vdev, &desc, desc_pa, i);
if (desc.flags & VRING_DESC_F_INDIRECT) {
if (desc.len % sizeof(VRingDesc)) {
- error_report("Invalid size for indirect buffer table");
- exit(1);
+ virtio_error(vdev, "Invalid size for indirect buffer table");
+ goto err;
}
/* If we've got too many, that implies a descriptor loop. */
if (num_bufs >= max) {
- error_report("Looped descriptor");
- exit(1);
+ virtio_error(vdev, "Looped descriptor");
+ goto err;
}
/* loop over the indirect descriptor table */
@@ -437,8 +456,8 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
do {
/* If we've got too many, that implies a descriptor loop. */
if (++num_bufs > max) {
- error_report("Looped descriptor");
- exit(1);
+ virtio_error(vdev, "Looped descriptor");
+ goto err;
}
if (desc.flags & VRING_DESC_F_WRITE) {
@@ -449,13 +468,24 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
goto done;
}
- } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
+
+ rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
+ } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+ if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+ goto err;
+ }
if (!indirect)
total_bufs = num_bufs;
else
total_bufs++;
}
+
+ if (rc < 0) {
+ goto err;
+ }
+
done:
if (in_bytes) {
*in_bytes = in_total;
@@ -463,6 +493,11 @@ done:
if (out_bytes) {
*out_bytes = out_total;
}
+ return;
+
+err:
+ in_total = out_total = 0;
+ goto done;
}
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
@@ -474,27 +509,35 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
return in_bytes <= in_total && out_bytes <= out_total;
}
-static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
+static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
+ hwaddr *addr, struct iovec *iov,
unsigned int max_num_sg, bool is_write,
hwaddr pa, size_t sz)
{
+ bool ok = false;
unsigned num_sg = *p_num_sg;
assert(num_sg <= max_num_sg);
if (!sz) {
- error_report("virtio: zero sized buffers are not allowed");
- exit(1);
+ virtio_error(vdev, "virtio: zero sized buffers are not allowed");
+ goto out;
}
while (sz) {
hwaddr len = sz;
if (num_sg == max_num_sg) {
- error_report("virtio: too many write descriptors in indirect table");
- exit(1);
+ virtio_error(vdev, "virtio: too many write descriptors in "
+ "indirect table");
+ goto out;
}
iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
+ if (!iov[num_sg].iov_base) {
+ virtio_error(vdev, "virtio: bogus descriptor or out of resources");
+ goto out;
+ }
+
iov[num_sg].iov_len = len;
addr[num_sg] = pa;
@@ -502,7 +545,28 @@ static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove
pa += len;
num_sg++;
}
+ ok = true;
+
+out:
*p_num_sg = num_sg;
+ return ok;
+}
+
+/* Only used by error code paths before we have a VirtQueueElement (therefore
+ * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
+ * yet.
+ */
+static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
+ struct iovec *iov)
+{
+ unsigned int i;
+
+ for (i = 0; i < out_num + in_num; i++) {
+ int is_write = i >= out_num;
+
+ cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
+ iov++;
+ }
}
static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
@@ -577,7 +641,11 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
hwaddr addr[VIRTQUEUE_MAX_SIZE];
struct iovec iov[VIRTQUEUE_MAX_SIZE];
VRingDesc desc;
+ int rc;
+ if (unlikely(vdev->broken)) {
+ return NULL;
+ }
if (virtio_queue_empty(vq)) {
return NULL;
}
@@ -591,20 +659,24 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
max = vq->vring.num;
if (vq->inuse >= vq->vring.num) {
- error_report("Virtqueue size exceeded");
- exit(1);
+ virtio_error(vdev, "Virtqueue size exceeded");
+ return NULL;
+ }
+
+ if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
+ return NULL;
}
- i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vq->last_avail_idx);
}
+ i = head;
vring_desc_read(vdev, &desc, desc_pa, i);
if (desc.flags & VRING_DESC_F_INDIRECT) {
if (desc.len % sizeof(VRingDesc)) {
- error_report("Invalid size for indirect buffer table");
- exit(1);
+ virtio_error(vdev, "Invalid size for indirect buffer table");
+ return NULL;
}
/* loop over the indirect descriptor table */
@@ -616,24 +688,38 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
/* Collect all the descriptors */
do {
+ bool map_ok;
+
if (desc.flags & VRING_DESC_F_WRITE) {
- virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
- VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
+ map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
+ iov + out_num,
+ VIRTQUEUE_MAX_SIZE - out_num, true,
+ desc.addr, desc.len);
} else {
if (in_num) {
- error_report("Incorrect order for descriptors");
- exit(1);
+ virtio_error(vdev, "Incorrect order for descriptors");
+ goto err_undo_map;
}
- virtqueue_map_desc(&out_num, addr, iov,
- VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
+ map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
+ VIRTQUEUE_MAX_SIZE, false,
+ desc.addr, desc.len);
+ }
+ if (!map_ok) {
+ goto err_undo_map;
}
/* If we've got too many, that implies a descriptor loop. */
if ((in_num + out_num) > max) {
- error_report("Looped descriptor");
- exit(1);
+ virtio_error(vdev, "Looped descriptor");
+ goto err_undo_map;
}
- } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
+
+ rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
+ } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+ if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+ goto err_undo_map;
+ }
/* Now copy what we have collected and mapped */
elem = virtqueue_alloc_element(sz, out_num, in_num);
@@ -651,6 +737,10 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
return elem;
+
+err_undo_map:
+ virtqueue_undo_map_desc(out_num, in_num, iov);
+ return NULL;
}
/* Reading and writing a structure directly to QEMUFile is *awful*, but
@@ -742,6 +832,10 @@ static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+ if (unlikely(vdev->broken)) {
+ return;
+ }
+
if (k->notify) {
k->notify(qbus->parent, vector);
}
@@ -825,6 +919,7 @@ void virtio_reset(void *opaque)
k->reset(vdev);
}
+ vdev->broken = false;
vdev->guest_features = 0;
vdev->queue_sel = 0;
vdev->status = 0;
@@ -1132,6 +1227,10 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
if (vq->vring.desc && vq->handle_output) {
VirtIODevice *vdev = vq->vdev;
+ if (unlikely(vdev->broken)) {
+ return;
+ }
+
trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
vq->handle_output(vdev, vq);
}
@@ -1316,6 +1415,13 @@ static bool virtio_extra_state_needed(void *opaque)
k->has_extra_state(qbus->parent);
}
+static bool virtio_broken_needed(void *opaque)
+{
+ VirtIODevice *vdev = opaque;
+
+ return vdev->broken;
+}
+
static const VMStateDescription vmstate_virtqueue = {
.name = "virtqueue_state",
.version_id = 1,
@@ -1430,6 +1536,17 @@ static const VMStateDescription vmstate_virtio_64bit_features = {
}
};
+static const VMStateDescription vmstate_virtio_broken = {
+ .name = "virtio/broken",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = &virtio_broken_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(broken, VirtIODevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_virtio = {
.name = "virtio",
.version_id = 1,
@@ -1443,6 +1560,7 @@ static const VMStateDescription vmstate_virtio = {
&vmstate_virtio_64bit_features,
&vmstate_virtio_virtqueues,
&vmstate_virtio_ringsize,
+ &vmstate_virtio_broken,
&vmstate_virtio_extra_state,
NULL
}
@@ -1608,7 +1726,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
"inconsistent with Host index 0x%x",
i, vdev->vq[i].last_avail_idx);
return -1;
- }
+ }
if (k->load_queue) {
ret = k->load_queue(qbus->parent, i, f);
if (ret)
@@ -1753,6 +1871,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
vdev->config_vector = VIRTIO_NO_VECTOR;
vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
vdev->vm_running = runstate_is_running();
+ vdev->broken = false;
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
vdev->vq[i].vector = VIRTIO_NO_VECTOR;
vdev->vq[i].vdev = vdev;
@@ -1939,6 +2058,22 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
vdev->bus_name = g_strdup(bus_name);
}
+void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ error_vreport(fmt, ap);
+ va_end(ap);
+
+ vdev->broken = true;
+
+ if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+ virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
+ virtio_notify_config(vdev);
+ }
+}
+
static void virtio_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index e5f087803f..559326cbd5 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -367,6 +367,7 @@ Aml *aml_sizeof(Aml *arg);
Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target);
Aml *aml_object_type(Aml *object);
+void build_append_int_noprefix(GArray *table, uint64_t value, int size);
void
build_header(BIOSLinker *linker, GArray *table_data,
AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index ab8e319505..29a6c9b60b 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -367,17 +367,15 @@ int e820_get_num_entries(void);
bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
#define PC_COMPAT_2_8 \
+
+#define PC_COMPAT_2_7 \
+ HW_COMPAT_2_7 \
{\
.driver = TYPE_X86_CPU,\
.property = "l3-cache",\
.value = "off",\
},
-
-#define PC_COMPAT_2_7 \
- PC_COMPAT_2_8 \
- HW_COMPAT_2_7
-
#define PC_COMPAT_2_6 \
HW_COMPAT_2_6 \
{\
@@ -405,7 +403,6 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
},
#define PC_COMPAT_2_5 \
- PC_COMPAT_2_6 \
HW_COMPAT_2_5
/* Helper for setting model-id for CPU models that changed model-id
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
index c48e8dd597..0c89d9835b 100644
--- a/include/hw/i386/x86-iommu.h
+++ b/include/hw/i386/x86-iommu.h
@@ -37,6 +37,12 @@
typedef struct X86IOMMUState X86IOMMUState;
typedef struct X86IOMMUClass X86IOMMUClass;
+typedef enum IommuType {
+ TYPE_INTEL,
+ TYPE_AMD,
+ TYPE_NONE
+} IommuType;
+
struct X86IOMMUClass {
SysBusDeviceClass parent;
/* Intel/AMD specific realize() hook */
@@ -67,6 +73,7 @@ typedef struct IEC_Notifier IEC_Notifier;
struct X86IOMMUState {
SysBusDevice busdev;
bool intr_supported; /* Whether vIOMMU supports IR */
+ IommuType type; /* IOMMU type - AMD/Intel */
QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
};
@@ -76,6 +83,11 @@ struct X86IOMMUState {
*/
X86IOMMUState *x86_iommu_get_default(void);
+/*
+ * x86_iommu_get_type - get IOMMU type
+ */
+IommuType x86_iommu_get_type(void);
+
/**
* x86_iommu_iec_register_notifier - register IEC (Interrupt Entry
* Cache) notifiers
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index e8b83bbb1e..772692f1b2 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -13,9 +13,12 @@
/* PCI bus */
#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+#define PCI_BUS_NUM(x) (((x) >> 8) & 0xff)
#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
#define PCI_FUNC(devfn) ((devfn) & 0x07)
#define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn))
+#define PCI_BUS_MAX 256
+#define PCI_DEVFN_MAX 256
#define PCI_SLOT_MAX 32
#define PCI_FUNC_MAX 8
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index f05559d569..888c8debe6 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -87,6 +87,7 @@ struct VirtIODevice
VirtQueue *vq;
uint16_t device_id;
bool vm_running;
+ bool broken; /* device in invalid state, needs reset */
VMChangeStateEntry *vmstate;
char *bus_name;
uint8_t device_endian;
@@ -135,6 +136,8 @@ void virtio_init(VirtIODevice *vdev, const char *name,
uint16_t device_id, size_t config_size);
void virtio_cleanup(VirtIODevice *vdev);
+void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+
/* Set the child bus name. */
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);
diff --git a/tests/Makefile.include b/tests/Makefile.include
index d8101b3543..2aa78c1820 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -656,7 +656,7 @@ tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y)
tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o
tests/postcopy-test$(EXESUF): tests/postcopy-test.o
-tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y) $(libqos-virtio-obj-y)
+tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y) $(libqos-virtio-obj-y) $(libqos-pc-obj-y)
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index b89a551126..a39846e6fd 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -20,6 +20,11 @@
#include "libqos/pci-pc.h"
#include "libqos/virtio-pci.h"
+#include "libqos/pci-pc.h"
+#include "libqos/virtio-pci.h"
+#include "libqos/malloc-pc.h"
+#include "hw/virtio/virtio-net.h"
+
#include <linux/vhost.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_net.h>
@@ -50,6 +55,7 @@
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_USER_PROTOCOL_F_MQ 0
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
#define VHOST_LOG_PAGE 0x1000
@@ -72,6 +78,7 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_ERR = 14,
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_MAX
} VhostUserRequest;
@@ -123,6 +130,13 @@ static VhostUserMsg m __attribute__ ((unused));
#define VHOST_USER_VERSION (0x1)
/*****************************************************************************/
+enum {
+ TEST_FLAGS_OK,
+ TEST_FLAGS_DISCONNECT,
+ TEST_FLAGS_BAD,
+ TEST_FLAGS_END,
+};
+
typedef struct TestServer {
gchar *socket_path;
gchar *mig_path;
@@ -135,6 +149,9 @@ typedef struct TestServer {
CompatGCond data_cond;
int log_fd;
uint64_t rings;
+ bool test_fail;
+ int test_flags;
+ int queues;
} TestServer;
static const char *tmpfs;
@@ -249,6 +266,12 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
uint8_t *p = (uint8_t *) &msg;
int fd;
+ if (s->test_fail) {
+ qemu_chr_disconnect(chr);
+ /* now switch to non-failure */
+ s->test_fail = false;
+ }
+
if (size != VHOST_USER_HDR_SIZE) {
g_test_message("Wrong message size received %d\n", size);
return;
@@ -274,6 +297,13 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
msg.size = sizeof(m.payload.u64);
msg.payload.u64 = 0x1ULL << VHOST_F_LOG_ALL |
0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
+ if (s->queues > 1) {
+ msg.payload.u64 |= 0x1ULL << VIRTIO_NET_F_MQ;
+ }
+ if (s->test_flags >= TEST_FLAGS_BAD) {
+ msg.payload.u64 = 0;
+ s->test_flags = TEST_FLAGS_END;
+ }
p = (uint8_t *) &msg;
qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
break;
@@ -281,6 +311,10 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
case VHOST_USER_SET_FEATURES:
g_assert_cmpint(msg.payload.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES),
!=, 0ULL);
+ if (s->test_flags == TEST_FLAGS_DISCONNECT) {
+ qemu_chr_disconnect(chr);
+ s->test_flags = TEST_FLAGS_BAD;
+ }
break;
case VHOST_USER_GET_PROTOCOL_FEATURES:
@@ -288,6 +322,9 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
msg.flags |= VHOST_USER_REPLY_MASK;
msg.size = sizeof(m.payload.u64);
msg.payload.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
+ if (s->queues > 1) {
+ msg.payload.u64 |= 1 << VHOST_USER_PROTOCOL_F_MQ;
+ }
p = (uint8_t *) &msg;
qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
break;
@@ -300,7 +337,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
p = (uint8_t *) &msg;
qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
- assert(msg.payload.state.index < 2);
+ assert(msg.payload.state.index < s->queues * 2);
s->rings &= ~(0x1ULL << msg.payload.state.index);
break;
@@ -340,10 +377,18 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
break;
case VHOST_USER_SET_VRING_BASE:
- assert(msg.payload.state.index < 2);
+ assert(msg.payload.state.index < s->queues * 2);
s->rings |= 0x1ULL << msg.payload.state.index;
break;
+ case VHOST_USER_GET_QUEUE_NUM:
+ msg.flags |= VHOST_USER_REPLY_MASK;
+ msg.size = sizeof(m.payload.u64);
+ msg.payload.u64 = s->queues;
+ p = (uint8_t *) &msg;
+ qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
+ break;
+
default:
break;
}
@@ -390,10 +435,21 @@ static TestServer *test_server_new(const gchar *name)
g_cond_init(&server->data_cond);
server->log_fd = -1;
+ server->queues = 1;
return server;
}
+static void chr_event(void *opaque, int event)
+{
+ TestServer *s = opaque;
+
+ if (s->test_flags == TEST_FLAGS_END &&
+ event == CHR_EVENT_CLOSED) {
+ s->test_flags = TEST_FLAGS_OK;
+ }
+}
+
static void test_server_create_chr(TestServer *server, const gchar *opt)
{
gchar *chr_path;
@@ -402,7 +458,8 @@ static void test_server_create_chr(TestServer *server, const gchar *opt)
server->chr = qemu_chr_new(server->chr_name, chr_path, NULL);
g_free(chr_path);
- qemu_chr_add_handlers(server->chr, chr_can_read, chr_read, NULL, server);
+ qemu_chr_add_handlers(server->chr, chr_can_read, chr_read,
+ chr_event, server);
}
static void test_server_listen(TestServer *server)
@@ -641,7 +698,6 @@ static void test_migrate(void)
global_qtest = global;
}
-#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
static void wait_for_rings_started(TestServer *s, size_t count)
{
gint64 end_time;
@@ -659,6 +715,7 @@ static void wait_for_rings_started(TestServer *s, size_t count)
g_mutex_unlock(&s->data_mutex);
}
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
static gboolean
reconnect_cb(gpointer user_data)
{
@@ -715,8 +772,144 @@ static void test_reconnect(void)
g_test_trap_assert_passed();
g_free(path);
}
+
+static void test_connect_fail_subprocess(void)
+{
+ TestServer *s = test_server_new("connect-fail");
+ char *cmd;
+
+ s->test_fail = true;
+ g_thread_new("connect", connect_thread, s);
+ cmd = GET_QEMU_CMDE(s, 2, ",server", "");
+ qtest_start(cmd);
+ g_free(cmd);
+
+ init_virtio_dev(s);
+ wait_for_fds(s);
+ wait_for_rings_started(s, 2);
+
+ qtest_end();
+ test_server_free(s);
+}
+
+static void test_connect_fail(void)
+{
+ gchar *path = g_strdup_printf("/%s/vhost-user/connect-fail/subprocess",
+ qtest_get_arch());
+ g_test_trap_subprocess(path, 0, 0);
+ g_test_trap_assert_passed();
+ g_free(path);
+}
+
+static void test_flags_mismatch_subprocess(void)
+{
+ TestServer *s = test_server_new("flags-mismatch");
+ char *cmd;
+
+ s->test_flags = TEST_FLAGS_DISCONNECT;
+ g_thread_new("connect", connect_thread, s);
+ cmd = GET_QEMU_CMDE(s, 2, ",server", "");
+ qtest_start(cmd);
+ g_free(cmd);
+
+ init_virtio_dev(s);
+ wait_for_fds(s);
+ wait_for_rings_started(s, 2);
+
+ qtest_end();
+ test_server_free(s);
+}
+
+static void test_flags_mismatch(void)
+{
+ gchar *path = g_strdup_printf("/%s/vhost-user/flags-mismatch/subprocess",
+ qtest_get_arch());
+ g_test_trap_subprocess(path, 0, 0);
+ g_test_trap_assert_passed();
+ g_free(path);
+}
+
#endif
+static QVirtioPCIDevice *virtio_net_pci_init(QPCIBus *bus, int slot)
+{
+ QVirtioPCIDevice *dev;
+
+ dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET);
+ g_assert(dev != NULL);
+ g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_NET);
+
+ qvirtio_pci_device_enable(dev);
+ qvirtio_reset(&qvirtio_pci, &dev->vdev);
+ qvirtio_set_acknowledge(&qvirtio_pci, &dev->vdev);
+ qvirtio_set_driver(&qvirtio_pci, &dev->vdev);
+
+ return dev;
+}
+
+static void driver_init(const QVirtioBus *bus, QVirtioDevice *dev)
+{
+ uint32_t features;
+
+ features = qvirtio_get_features(bus, dev);
+ features = features & ~(QVIRTIO_F_BAD_FEATURE |
+ (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+ (1u << VIRTIO_RING_F_EVENT_IDX));
+ qvirtio_set_features(bus, dev, features);
+
+ qvirtio_set_driver_ok(bus, dev);
+}
+
+#define PCI_SLOT 0x04
+
+static void test_multiqueue(void)
+{
+ const int queues = 2;
+ TestServer *s = test_server_new("mq");
+ QVirtioPCIDevice *dev;
+ QPCIBus *bus;
+ QVirtQueuePCI *vq[queues * 2];
+ QGuestAllocator *alloc;
+ char *cmd;
+ int i;
+
+ s->queues = queues;
+ test_server_listen(s);
+
+ cmd = g_strdup_printf(QEMU_CMD_MEM QEMU_CMD_CHR QEMU_CMD_NETDEV ",queues=%d "
+ "-device virtio-net-pci,netdev=net0,mq=on,vectors=%d",
+ 512, 512, root, s->chr_name,
+ s->socket_path, "", s->chr_name,
+ queues, queues * 2 + 2);
+ qtest_start(cmd);
+ g_free(cmd);
+
+ bus = qpci_init_pc();
+ dev = virtio_net_pci_init(bus, PCI_SLOT);
+
+ alloc = pc_alloc_init();
+ for (i = 0; i < queues * 2; i++) {
+ vq[i] = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
+ alloc, i);
+ }
+
+ driver_init(&qvirtio_pci, &dev->vdev);
+ wait_for_rings_started(s, queues * 2);
+
+ /* End test */
+ for (i = 0; i < queues * 2; i++) {
+ qvirtqueue_cleanup(&qvirtio_pci, &vq[i]->vq, alloc);
+ }
+ pc_alloc_uninit(alloc);
+ qvirtio_pci_device_disable(dev);
+ g_free(dev->pdev);
+ g_free(dev);
+ qpci_free_pc(bus);
+ qtest_end();
+
+ test_server_free(s);
+}
+
int main(int argc, char **argv)
{
QTestState *s = NULL;
@@ -762,10 +955,17 @@ int main(int argc, char **argv)
qtest_add_data_func("/vhost-user/read-guest-mem", server, read_guest_mem);
qtest_add_func("/vhost-user/migrate", test_migrate);
+ qtest_add_func("/vhost-user/multiqueue", test_multiqueue);
#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
qtest_add_func("/vhost-user/reconnect/subprocess",
test_reconnect_subprocess);
qtest_add_func("/vhost-user/reconnect", test_reconnect);
+ qtest_add_func("/vhost-user/connect-fail/subprocess",
+ test_connect_fail_subprocess);
+ qtest_add_func("/vhost-user/connect-fail", test_connect_fail);
+ qtest_add_func("/vhost-user/flags-mismatch/subprocess",
+ test_flags_mismatch_subprocess);
+ qtest_add_func("/vhost-user/flags-mismatch", test_flags_mismatch);
#endif
ret = g_test_run();