aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/hw/acpi/cxl.h28
-rw-r--r--include/hw/boards.h2
-rw-r--r--include/hw/cxl/cxl.h61
-rw-r--r--include/hw/cxl/cxl_component.h223
-rw-r--r--include/hw/cxl/cxl_device.h268
-rw-r--r--include/hw/cxl/cxl_pci.h167
-rw-r--r--include/hw/i386/intel_iommu.h1
-rw-r--r--include/hw/i386/microvm.h4
-rw-r--r--include/hw/i386/pc.h2
-rw-r--r--include/hw/i386/x86.h4
-rw-r--r--include/hw/pci/pci.h14
-rw-r--r--include/hw/pci/pci_bridge.h20
-rw-r--r--include/hw/pci/pci_bus.h7
-rw-r--r--include/hw/pci/pci_ids.h1
-rw-r--r--include/hw/pci/pcie_host.h6
-rw-r--r--include/hw/pci/pcie_port.h2
-rw-r--r--include/hw/virtio/vhost-user.h43
-rw-r--r--include/hw/virtio/vhost-vsock-common.h2
-rw-r--r--include/hw/virtio/vhost.h132
-rw-r--r--include/hw/virtio/virtio-gpu.h3
-rw-r--r--include/hw/virtio/virtio-pci.h255
-rw-r--r--include/hw/virtio/virtio.h7
22 files changed, 1227 insertions, 25 deletions
diff --git a/include/hw/acpi/cxl.h b/include/hw/acpi/cxl.h
new file mode 100644
index 0000000000..0c496538c0
--- /dev/null
+++ b/include/hw/acpi/cxl.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_ACPI_CXL_H
+#define HW_ACPI_CXL_H
+
+#include "hw/acpi/bios-linker-loader.h"
+
+void cxl_build_cedt(MachineState *ms, GArray *table_offsets, GArray *table_data,
+ BIOSLinker *linker, const char *oem_id,
+ const char *oem_table_id);
+void build_cxl_osc_method(Aml *dev);
+
+#endif
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 7b416c9787..fa57bac4fb 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -269,6 +269,7 @@ struct MachineClass {
bool ignore_boot_device_suffixes;
bool smbus_no_migration_support;
bool nvdimm_supported;
+ bool cxl_supported;
bool numa_mem_supported;
bool auto_enable_numa;
SMPCompatProps smp_props;
@@ -359,6 +360,7 @@ struct MachineState {
CPUArchIdList *possible_cpus;
CpuTopology smp;
struct NVDIMMState *nvdimms_state;
+ struct CXLState *cxl_devices_state;
struct NumaState *numa_state;
};
diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
new file mode 100644
index 0000000000..21d28ca110
--- /dev/null
+++ b/include/hw/cxl/cxl.h
@@ -0,0 +1,61 @@
+/*
+ * QEMU CXL Support
+ *
+ * Copyright (c) 2020 Intel
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_H
+#define CXL_H
+
+
+#include "qapi/qapi-types-machine.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_host.h"
+#include "cxl_pci.h"
+#include "cxl_component.h"
+#include "cxl_device.h"
+
+#define CXL_COMPONENT_REG_BAR_IDX 0
+#define CXL_DEVICE_REG_BAR_IDX 2
+
+#define CXL_WINDOW_MAX 10
+
+typedef struct CXLFixedWindow {
+ uint64_t size;
+ char **targets;
+ struct PXBDev *target_hbs[8];
+ uint8_t num_targets;
+ uint8_t enc_int_ways;
+ uint8_t enc_int_gran;
+ /* Todo: XOR based interleaving */
+ MemoryRegion mr;
+ hwaddr base;
+} CXLFixedWindow;
+
+typedef struct CXLState {
+ bool is_enabled;
+ MemoryRegion host_mr;
+ unsigned int next_mr_idx;
+ GList *fixed_windows;
+} CXLState;
+
+struct CXLHost {
+ PCIHostState parent_obj;
+
+ CXLComponentState cxl_cstate;
+};
+
+#define TYPE_PXB_CXL_HOST "pxb-cxl-host"
+OBJECT_DECLARE_SIMPLE_TYPE(CXLHost, PXB_CXL_HOST)
+
+void cxl_fixed_memory_window_config(MachineState *ms,
+ CXLFixedMemoryWindowOptions *object,
+ Error **errp);
+void cxl_fixed_memory_window_link_targets(Error **errp);
+
+extern const MemoryRegionOps cfmws_ops;
+
+#endif
diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
new file mode 100644
index 0000000000..70b5018156
--- /dev/null
+++ b/include/hw/cxl/cxl_component.h
@@ -0,0 +1,223 @@
+/*
+ * QEMU CXL Component
+ *
+ * Copyright (c) 2020 Intel
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_COMPONENT_H
+#define CXL_COMPONENT_H
+
+/* CXL 2.0 - 8.2.4 */
+#define CXL2_COMPONENT_IO_REGION_SIZE 0x1000
+#define CXL2_COMPONENT_CM_REGION_SIZE 0x1000
+#define CXL2_COMPONENT_BLOCK_SIZE 0x10000
+
+#include "qemu/compiler.h"
+#include "qemu/range.h"
+#include "qemu/typedefs.h"
+#include "hw/register.h"
+
+enum reg_type {
+ CXL2_DEVICE,
+ CXL2_TYPE3_DEVICE,
+ CXL2_LOGICAL_DEVICE,
+ CXL2_ROOT_PORT,
+ CXL2_UPSTREAM_PORT,
+ CXL2_DOWNSTREAM_PORT
+};
+
+/*
+ * Capability registers are defined at the top of the CXL.cache/mem region and
+ * are packed. For our purposes we will always define the caps in the same
+ * order.
+ * CXL 2.0 - 8.2.5 Table 142 for details.
+ */
+
+/* CXL 2.0 - 8.2.5.1 */
+REG32(CXL_CAPABILITY_HEADER, 0)
+ FIELD(CXL_CAPABILITY_HEADER, ID, 0, 16)
+ FIELD(CXL_CAPABILITY_HEADER, VERSION, 16, 4)
+ FIELD(CXL_CAPABILITY_HEADER, CACHE_MEM_VERSION, 20, 4)
+ FIELD(CXL_CAPABILITY_HEADER, ARRAY_SIZE, 24, 8)
+
+#define CXLx_CAPABILITY_HEADER(type, offset) \
+ REG32(CXL_##type##_CAPABILITY_HEADER, offset) \
+ FIELD(CXL_##type##_CAPABILITY_HEADER, ID, 0, 16) \
+ FIELD(CXL_##type##_CAPABILITY_HEADER, VERSION, 16, 4) \
+ FIELD(CXL_##type##_CAPABILITY_HEADER, PTR, 20, 12)
+CXLx_CAPABILITY_HEADER(RAS, 0x4)
+CXLx_CAPABILITY_HEADER(LINK, 0x8)
+CXLx_CAPABILITY_HEADER(HDM, 0xc)
+CXLx_CAPABILITY_HEADER(EXTSEC, 0x10)
+CXLx_CAPABILITY_HEADER(SNOOP, 0x14)
+
+/*
+ * Capability structures contain the actual registers that the CXL component
+ * implements. Some of these are specific to certain types of components, but
+ * this implementation leaves enough space regardless.
+ */
+/* 8.2.5.9 - CXL RAS Capability Structure */
+
+/* Give ample space for caps before this */
+#define CXL_RAS_REGISTERS_OFFSET 0x80
+#define CXL_RAS_REGISTERS_SIZE 0x58
+REG32(CXL_RAS_UNC_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET)
+REG32(CXL_RAS_UNC_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x4)
+REG32(CXL_RAS_UNC_ERR_SEVERITY, CXL_RAS_REGISTERS_OFFSET + 0x8)
+REG32(CXL_RAS_COR_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET + 0xc)
+REG32(CXL_RAS_COR_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x10)
+REG32(CXL_RAS_ERR_CAP_CTRL, CXL_RAS_REGISTERS_OFFSET + 0x14)
+/* Offset 0x18 - 0x58 reserved for RAS logs */
+
+/* 8.2.5.10 - CXL Security Capability Structure */
+#define CXL_SEC_REGISTERS_OFFSET \
+ (CXL_RAS_REGISTERS_OFFSET + CXL_RAS_REGISTERS_SIZE)
+#define CXL_SEC_REGISTERS_SIZE 0 /* We don't implement 1.1 downstream ports */
+
+/* 8.2.5.11 - CXL Link Capability Structure */
+#define CXL_LINK_REGISTERS_OFFSET \
+ (CXL_SEC_REGISTERS_OFFSET + CXL_SEC_REGISTERS_SIZE)
+#define CXL_LINK_REGISTERS_SIZE 0x38
+
+/* 8.2.5.12 - CXL HDM Decoder Capability Structure */
+#define HDM_DECODE_MAX 10 /* 8.2.5.12.1 */
+#define CXL_HDM_REGISTERS_OFFSET \
+ (CXL_LINK_REGISTERS_OFFSET + CXL_LINK_REGISTERS_SIZE)
+#define CXL_HDM_REGISTERS_SIZE (0x10 + 0x20 * HDM_DECODE_MAX)
+#define HDM_DECODER_INIT(n) \
+ REG32(CXL_HDM_DECODER##n##_BASE_LO, \
+ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x10) \
+ FIELD(CXL_HDM_DECODER##n##_BASE_LO, L, 28, 4) \
+ REG32(CXL_HDM_DECODER##n##_BASE_HI, \
+ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x14) \
+ REG32(CXL_HDM_DECODER##n##_SIZE_LO, \
+ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x18) \
+ REG32(CXL_HDM_DECODER##n##_SIZE_HI, \
+ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x1C) \
+ REG32(CXL_HDM_DECODER##n##_CTRL, \
+ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x20) \
+ FIELD(CXL_HDM_DECODER##n##_CTRL, IG, 0, 4) \
+ FIELD(CXL_HDM_DECODER##n##_CTRL, IW, 4, 4) \
+ FIELD(CXL_HDM_DECODER##n##_CTRL, LOCK_ON_COMMIT, 8, 1) \
+ FIELD(CXL_HDM_DECODER##n##_CTRL, COMMIT, 9, 1) \
+ FIELD(CXL_HDM_DECODER##n##_CTRL, COMMITTED, 10, 1) \
+ FIELD(CXL_HDM_DECODER##n##_CTRL, ERR, 11, 1) \
+ FIELD(CXL_HDM_DECODER##n##_CTRL, TYPE, 12, 1) \
+ REG32(CXL_HDM_DECODER##n##_TARGET_LIST_LO, \
+ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x24) \
+ REG32(CXL_HDM_DECODER##n##_TARGET_LIST_HI, \
+ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x28)
+
+REG32(CXL_HDM_DECODER_CAPABILITY, CXL_HDM_REGISTERS_OFFSET)
+ FIELD(CXL_HDM_DECODER_CAPABILITY, DECODER_COUNT, 0, 4)
+ FIELD(CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 4, 4)
+ FIELD(CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_256B, 8, 1)
+ FIELD(CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 9, 1)
+ FIELD(CXL_HDM_DECODER_CAPABILITY, POISON_ON_ERR_CAP, 10, 1)
+REG32(CXL_HDM_DECODER_GLOBAL_CONTROL, CXL_HDM_REGISTERS_OFFSET + 4)
+ FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, POISON_ON_ERR_EN, 0, 1)
+ FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, HDM_DECODER_ENABLE, 1, 1)
+
+HDM_DECODER_INIT(0);
+
+/* 8.2.5.13 - CXL Extended Security Capability Structure (Root complex only) */
+#define EXTSEC_ENTRY_MAX 256
+#define CXL_EXTSEC_REGISTERS_OFFSET \
+ (CXL_HDM_REGISTERS_OFFSET + CXL_HDM_REGISTERS_SIZE)
+#define CXL_EXTSEC_REGISTERS_SIZE (8 * EXTSEC_ENTRY_MAX + 4)
+
+/* 8.2.5.14 - CXL IDE Capability Structure */
+#define CXL_IDE_REGISTERS_OFFSET \
+ (CXL_EXTSEC_REGISTERS_OFFSET + CXL_EXTSEC_REGISTERS_SIZE)
+#define CXL_IDE_REGISTERS_SIZE 0x20
+
+/* 8.2.5.15 - CXL Snoop Filter Capability Structure */
+#define CXL_SNOOP_REGISTERS_OFFSET \
+ (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE)
+#define CXL_SNOOP_REGISTERS_SIZE 0x8
+
+QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE) >= 0x1000,
+ "No space for registers");
+
+typedef struct component_registers {
+ /*
+ * Main memory region to be registered with QEMU core.
+ */
+ MemoryRegion component_registers;
+
+ /*
+ * 8.2.4 Table 141:
+ * 0x0000 - 0x0fff CXL.io registers
+ * 0x1000 - 0x1fff CXL.cache and CXL.mem
+ * 0x2000 - 0xdfff Implementation specific
+ * 0xe000 - 0xe3ff CXL ARB/MUX registers
+ * 0xe400 - 0xffff RSVD
+ */
+ uint32_t io_registers[CXL2_COMPONENT_IO_REGION_SIZE >> 2];
+ MemoryRegion io;
+
+ uint32_t cache_mem_registers[CXL2_COMPONENT_CM_REGION_SIZE >> 2];
+ uint32_t cache_mem_regs_write_mask[CXL2_COMPONENT_CM_REGION_SIZE >> 2];
+ MemoryRegion cache_mem;
+
+ MemoryRegion impl_specific;
+ MemoryRegion arb_mux;
+ MemoryRegion rsvd;
+
+ /* special_ops is used for any component that needs any specific handling */
+ MemoryRegionOps *special_ops;
+} ComponentRegisters;
+
+/*
+ * A CXL component represents all entities in a CXL hierarchy. This includes,
+ * host bridges, root ports, upstream/downstream switch ports, and devices
+ */
+typedef struct cxl_component {
+ ComponentRegisters crb;
+ union {
+ struct {
+ Range dvsecs[CXL20_MAX_DVSEC];
+ uint16_t dvsec_offset;
+ struct PCIDevice *pdev;
+ };
+ };
+} CXLComponentState;
+
+void cxl_component_register_block_init(Object *obj,
+ CXLComponentState *cxl_cstate,
+ const char *type);
+void cxl_component_register_init_common(uint32_t *reg_state,
+ uint32_t *write_msk,
+ enum reg_type type);
+
+void cxl_component_create_dvsec(CXLComponentState *cxl_cstate,
+ enum reg_type cxl_dev_type, uint16_t length,
+ uint16_t type, uint8_t rev, uint8_t *body);
+
+static inline int cxl_decoder_count_enc(int count)
+{
+ switch (count) {
+ case 1: return 0;
+ case 2: return 1;
+ case 4: return 2;
+ case 6: return 3;
+ case 8: return 4;
+ case 10: return 5;
+ }
+ return 0;
+}
+
+uint8_t cxl_interleave_ways_enc(int iw, Error **errp);
+uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp);
+
+static inline hwaddr cxl_decode_ig(int ig)
+{
+ return 1 << (ig + 8);
+}
+
+CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb);
+
+#endif
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
new file mode 100644
index 0000000000..1e141b6621
--- /dev/null
+++ b/include/hw/cxl/cxl_device.h
@@ -0,0 +1,268 @@
+/*
+ * QEMU CXL Devices
+ *
+ * Copyright (c) 2020 Intel
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_DEVICE_H
+#define CXL_DEVICE_H
+
+#include "hw/register.h"
+
+/*
+ * The following is how a CXL device's Memory Device registers are laid out.
+ * The only requirement from the spec is that the capabilities array and the
+ * capability headers start at offset 0 and are contiguously packed. The headers
+ * themselves provide offsets to the register fields. For this emulation, the
+ * actual registers * will start at offset 0x80 (m == 0x80). No secondary
+ * mailbox is implemented which means that the offset of the start of the
+ * mailbox payload (n) is given by
+ * n = m + sizeof(mailbox registers) + sizeof(device registers).
+ *
+ * +---------------------------------+
+ * | |
+ * | Memory Device Registers |
+ * | |
+ * n + PAYLOAD_SIZE_MAX -----------------------------------
+ * ^ | |
+ * | | |
+ * | | |
+ * | | |
+ * | | |
+ * | | Mailbox Payload |
+ * | | |
+ * | | |
+ * | | |
+ * n -----------------------------------
+ * ^ | Mailbox Registers |
+ * | | |
+ * | -----------------------------------
+ * | | |
+ * | | Device Registers |
+ * | | |
+ * m ---------------------------------->
+ * ^ | Memory Device Capability Header|
+ * | -----------------------------------
+ * | | Mailbox Capability Header |
+ * | -----------------------------------
+ * | | Device Capability Header |
+ * | -----------------------------------
+ * | | Device Cap Array Register |
+ * 0 +---------------------------------+
+ *
+ */
+
+#define CXL_DEVICE_CAP_HDR1_OFFSET 0x10 /* Figure 138 */
+#define CXL_DEVICE_CAP_REG_SIZE 0x10 /* 8.2.8.2 */
+#define CXL_DEVICE_CAPS_MAX 4 /* 8.2.8.2.1 + 8.2.8.5 */
+#define CXL_CAPS_SIZE \
+ (CXL_DEVICE_CAP_REG_SIZE * (CXL_DEVICE_CAPS_MAX + 1)) /* +1 for header */
+
+#define CXL_DEVICE_STATUS_REGISTERS_OFFSET 0x80 /* Read comment above */
+#define CXL_DEVICE_STATUS_REGISTERS_LENGTH 0x8 /* 8.2.8.3.1 */
+
+#define CXL_MAILBOX_REGISTERS_OFFSET \
+ (CXL_DEVICE_STATUS_REGISTERS_OFFSET + CXL_DEVICE_STATUS_REGISTERS_LENGTH)
+#define CXL_MAILBOX_REGISTERS_SIZE 0x20 /* 8.2.8.4, Figure 139 */
+#define CXL_MAILBOX_PAYLOAD_SHIFT 11
+#define CXL_MAILBOX_MAX_PAYLOAD_SIZE (1 << CXL_MAILBOX_PAYLOAD_SHIFT)
+#define CXL_MAILBOX_REGISTERS_LENGTH \
+ (CXL_MAILBOX_REGISTERS_SIZE + CXL_MAILBOX_MAX_PAYLOAD_SIZE)
+
+#define CXL_MEMORY_DEVICE_REGISTERS_OFFSET \
+ (CXL_MAILBOX_REGISTERS_OFFSET + CXL_MAILBOX_REGISTERS_LENGTH)
+#define CXL_MEMORY_DEVICE_REGISTERS_LENGTH 0x8
+
+#define CXL_MMIO_SIZE \
+ (CXL_DEVICE_CAP_REG_SIZE + CXL_DEVICE_STATUS_REGISTERS_LENGTH + \
+ CXL_MAILBOX_REGISTERS_LENGTH + CXL_MEMORY_DEVICE_REGISTERS_LENGTH)
+
+typedef struct cxl_device_state {
+ MemoryRegion device_registers;
+
+ /* mmio for device capabilities array - 8.2.8.2 */
+ MemoryRegion device;
+ MemoryRegion memory_device;
+ struct {
+ MemoryRegion caps;
+ union {
+ uint32_t caps_reg_state32[CXL_CAPS_SIZE / 4];
+ uint64_t caps_reg_state64[CXL_CAPS_SIZE / 8];
+ };
+ };
+
+ /* mmio for the mailbox registers 8.2.8.4 */
+ struct {
+ MemoryRegion mailbox;
+ uint16_t payload_size;
+ union {
+ uint8_t mbox_reg_state[CXL_MAILBOX_REGISTERS_LENGTH];
+ uint16_t mbox_reg_state16[CXL_MAILBOX_REGISTERS_LENGTH / 2];
+ uint32_t mbox_reg_state32[CXL_MAILBOX_REGISTERS_LENGTH / 4];
+ uint64_t mbox_reg_state64[CXL_MAILBOX_REGISTERS_LENGTH / 8];
+ };
+ struct cel_log {
+ uint16_t opcode;
+ uint16_t effect;
+ } cel_log[1 << 16];
+ size_t cel_size;
+ };
+
+ struct {
+ bool set;
+ uint64_t last_set;
+ uint64_t host_set;
+ } timestamp;
+
+ /* memory region for persistent memory, HDM */
+ uint64_t pmem_size;
+} CXLDeviceState;
+
+/* Initialize the register block for a device */
+void cxl_device_register_block_init(Object *obj, CXLDeviceState *dev);
+
+/* Set up default values for the register block */
+void cxl_device_register_init_common(CXLDeviceState *dev);
+
+/*
+ * CXL 2.0 - 8.2.8.1 including errata F4
+ * Documented as a 128 bit register, but 64 bit accesses and the second
+ * 64 bits are currently reserved.
+ */
+REG64(CXL_DEV_CAP_ARRAY, 0) /* Documented as 128 bit register but 64 byte accesses */
+ FIELD(CXL_DEV_CAP_ARRAY, CAP_ID, 0, 16)
+ FIELD(CXL_DEV_CAP_ARRAY, CAP_VERSION, 16, 8)
+ FIELD(CXL_DEV_CAP_ARRAY, CAP_COUNT, 32, 16)
+
+/*
+ * Helper macro to initialize capability headers for CXL devices.
+ *
+ * In the 8.2.8.2, this is listed as a 128b register, but in 8.2.8, it says:
+ * > No registers defined in Section 8.2.8 are larger than 64-bits wide so that
+ * > is the maximum access size allowed for these registers. If this rule is not
+ * > followed, the behavior is undefined
+ *
+ * CXL 2.0 Errata F4 states futher that the layouts in the specification are
+ * shown as greater than 128 bits, but implementations are expected to
+ * use any size of access up to 64 bits.
+ *
+ * Here we've chosen to make it 4 dwords. The spec allows any pow2 multiple
+ * access to be used for a register up to 64 bits.
+ */
+#define CXL_DEVICE_CAPABILITY_HEADER_REGISTER(n, offset) \
+ REG32(CXL_DEV_##n##_CAP_HDR0, offset) \
+ FIELD(CXL_DEV_##n##_CAP_HDR0, CAP_ID, 0, 16) \
+ FIELD(CXL_DEV_##n##_CAP_HDR0, CAP_VERSION, 16, 8) \
+ REG32(CXL_DEV_##n##_CAP_HDR1, offset + 4) \
+ FIELD(CXL_DEV_##n##_CAP_HDR1, CAP_OFFSET, 0, 32) \
+ REG32(CXL_DEV_##n##_CAP_HDR2, offset + 8) \
+ FIELD(CXL_DEV_##n##_CAP_HDR2, CAP_LENGTH, 0, 32)
+
+CXL_DEVICE_CAPABILITY_HEADER_REGISTER(DEVICE_STATUS, CXL_DEVICE_CAP_HDR1_OFFSET)
+CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MAILBOX, CXL_DEVICE_CAP_HDR1_OFFSET + \
+ CXL_DEVICE_CAP_REG_SIZE)
+CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MEMORY_DEVICE,
+ CXL_DEVICE_CAP_HDR1_OFFSET +
+ CXL_DEVICE_CAP_REG_SIZE * 2)
+
+int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate);
+void cxl_process_mailbox(CXLDeviceState *cxl_dstate);
+
+#define cxl_device_cap_init(dstate, reg, cap_id) \
+ do { \
+ uint32_t *cap_hdrs = dstate->caps_reg_state32; \
+ int which = R_CXL_DEV_##reg##_CAP_HDR0; \
+ cap_hdrs[which] = \
+ FIELD_DP32(cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, \
+ CAP_ID, cap_id); \
+ cap_hdrs[which] = FIELD_DP32( \
+ cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, 1); \
+ cap_hdrs[which + 1] = \
+ FIELD_DP32(cap_hdrs[which + 1], CXL_DEV_##reg##_CAP_HDR1, \
+ CAP_OFFSET, CXL_##reg##_REGISTERS_OFFSET); \
+ cap_hdrs[which + 2] = \
+ FIELD_DP32(cap_hdrs[which + 2], CXL_DEV_##reg##_CAP_HDR2, \
+ CAP_LENGTH, CXL_##reg##_REGISTERS_LENGTH); \
+ } while (0)
+
+/* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register */
+REG32(CXL_DEV_MAILBOX_CAP, 0)
+ FIELD(CXL_DEV_MAILBOX_CAP, PAYLOAD_SIZE, 0, 5)
+ FIELD(CXL_DEV_MAILBOX_CAP, INT_CAP, 5, 1)
+ FIELD(CXL_DEV_MAILBOX_CAP, BG_INT_CAP, 6, 1)
+ FIELD(CXL_DEV_MAILBOX_CAP, MSI_N, 7, 4)
+
+/* CXL 2.0 8.2.8.4.4 Mailbox Control Register */
+REG32(CXL_DEV_MAILBOX_CTRL, 4)
+ FIELD(CXL_DEV_MAILBOX_CTRL, DOORBELL, 0, 1)
+ FIELD(CXL_DEV_MAILBOX_CTRL, INT_EN, 1, 1)
+ FIELD(CXL_DEV_MAILBOX_CTRL, BG_INT_EN, 2, 1)
+
+/* CXL 2.0 8.2.8.4.5 Command Register */
+REG64(CXL_DEV_MAILBOX_CMD, 8)
+ FIELD(CXL_DEV_MAILBOX_CMD, COMMAND, 0, 8)
+ FIELD(CXL_DEV_MAILBOX_CMD, COMMAND_SET, 8, 8)
+ FIELD(CXL_DEV_MAILBOX_CMD, LENGTH, 16, 20)
+
+/* CXL 2.0 8.2.8.4.6 Mailbox Status Register */
+REG64(CXL_DEV_MAILBOX_STS, 0x10)
+ FIELD(CXL_DEV_MAILBOX_STS, BG_OP, 0, 1)
+ FIELD(CXL_DEV_MAILBOX_STS, ERRNO, 32, 16)
+ FIELD(CXL_DEV_MAILBOX_STS, VENDOR_ERRNO, 48, 16)
+
+/* CXL 2.0 8.2.8.4.7 Background Command Status Register */
+REG64(CXL_DEV_BG_CMD_STS, 0x18)
+ FIELD(CXL_DEV_BG_CMD_STS, OP, 0, 16)
+ FIELD(CXL_DEV_BG_CMD_STS, PERCENTAGE_COMP, 16, 7)
+ FIELD(CXL_DEV_BG_CMD_STS, RET_CODE, 32, 16)
+ FIELD(CXL_DEV_BG_CMD_STS, VENDOR_RET_CODE, 48, 16)
+
+/* CXL 2.0 8.2.8.4.8 Command Payload Registers */
+REG32(CXL_DEV_CMD_PAYLOAD, 0x20)
+
+REG64(CXL_MEM_DEV_STS, 0)
+ FIELD(CXL_MEM_DEV_STS, FATAL, 0, 1)
+ FIELD(CXL_MEM_DEV_STS, FW_HALT, 1, 1)
+ FIELD(CXL_MEM_DEV_STS, MEDIA_STATUS, 2, 2)
+ FIELD(CXL_MEM_DEV_STS, MBOX_READY, 4, 1)
+ FIELD(CXL_MEM_DEV_STS, RESET_NEEDED, 5, 3)
+
+struct CXLType3Dev {
+ /* Private */
+ PCIDevice parent_obj;
+
+ /* Properties */
+ HostMemoryBackend *hostmem;
+ HostMemoryBackend *lsa;
+
+ /* State */
+ AddressSpace hostmem_as;
+ CXLComponentState cxl_cstate;
+ CXLDeviceState cxl_dstate;
+};
+
+#define TYPE_CXL_TYPE3 "cxl-type3"
+OBJECT_DECLARE_TYPE(CXLType3Dev, CXLType3Class, CXL_TYPE3)
+
+struct CXLType3Class {
+ /* Private */
+ PCIDeviceClass parent_class;
+
+ /* public */
+ uint64_t (*get_lsa_size)(CXLType3Dev *ct3d);
+
+ uint64_t (*get_lsa)(CXLType3Dev *ct3d, void *buf, uint64_t size,
+ uint64_t offset);
+ void (*set_lsa)(CXLType3Dev *ct3d, const void *buf, uint64_t size,
+ uint64_t offset);
+};
+
+MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
+ unsigned size, MemTxAttrs attrs);
+MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
+ unsigned size, MemTxAttrs attrs);
+
+#endif
diff --git a/include/hw/cxl/cxl_pci.h b/include/hw/cxl/cxl_pci.h
new file mode 100644
index 0000000000..01cf002096
--- /dev/null
+++ b/include/hw/cxl/cxl_pci.h
@@ -0,0 +1,167 @@
+/*
+ * QEMU CXL PCI interfaces
+ *
+ * Copyright (c) 2020 Intel
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_PCI_H
+#define CXL_PCI_H
+
+#include "qemu/compiler.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pcie.h"
+
+#define CXL_VENDOR_ID 0x1e98
+
+#define PCIE_DVSEC_HEADER1_OFFSET 0x4 /* Offset from start of extend cap */
+#define PCIE_DVSEC_ID_OFFSET 0x8
+
+#define PCIE_CXL_DEVICE_DVSEC_LENGTH 0x38
+#define PCIE_CXL1_DEVICE_DVSEC_REVID 0
+#define PCIE_CXL2_DEVICE_DVSEC_REVID 1
+
+#define EXTENSIONS_PORT_DVSEC_LENGTH 0x28
+#define EXTENSIONS_PORT_DVSEC_REVID 0
+
+#define GPF_PORT_DVSEC_LENGTH 0x10
+#define GPF_PORT_DVSEC_REVID 0
+
+#define GPF_DEVICE_DVSEC_LENGTH 0x10
+#define GPF_DEVICE_DVSEC_REVID 0
+
+#define PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0 0x14
+#define PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0 1
+
+#define REG_LOC_DVSEC_LENGTH 0x24
+#define REG_LOC_DVSEC_REVID 0
+
+enum {
+ PCIE_CXL_DEVICE_DVSEC = 0,
+ NON_CXL_FUNCTION_MAP_DVSEC = 2,
+ EXTENSIONS_PORT_DVSEC = 3,
+ GPF_PORT_DVSEC = 4,
+ GPF_DEVICE_DVSEC = 5,
+ PCIE_FLEXBUS_PORT_DVSEC = 7,
+ REG_LOC_DVSEC = 8,
+ MLD_DVSEC = 9,
+ CXL20_MAX_DVSEC
+};
+
+typedef struct DVSECHeader {
+ uint32_t cap_hdr;
+ uint32_t dv_hdr1;
+ uint16_t dv_hdr2;
+} QEMU_PACKED DVSECHeader;
+QEMU_BUILD_BUG_ON(sizeof(DVSECHeader) != 10);
+
+/*
+ * CXL 2.0 devices must implement certain DVSEC IDs, and can [optionally]
+ * implement others.
+ *
+ * CXL 2.0 Device: 0, [2], 5, 8
+ * CXL 2.0 RP: 3, 4, 7, 8
+ * CXL 2.0 Upstream Port: [2], 7, 8
+ * CXL 2.0 Downstream Port: 3, 4, 7, 8
+ */
+
+/* CXL 2.0 - 8.1.3 (ID 0001) */
+typedef struct CXLDVSECDevice {
+ DVSECHeader hdr;
+ uint16_t cap;
+ uint16_t ctrl;
+ uint16_t status;
+ uint16_t ctrl2;
+ uint16_t status2;
+ uint16_t lock;
+ uint16_t cap2;
+ uint32_t range1_size_hi;
+ uint32_t range1_size_lo;
+ uint32_t range1_base_hi;
+ uint32_t range1_base_lo;
+ uint32_t range2_size_hi;
+ uint32_t range2_size_lo;
+ uint32_t range2_base_hi;
+ uint32_t range2_base_lo;
+} CXLDVSECDevice;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDevice) != 0x38);
+
+/* CXL 2.0 - 8.1.5 (ID 0003) */
+typedef struct CXLDVSECPortExtensions {
+ DVSECHeader hdr;
+ uint16_t status;
+ uint16_t control;
+ uint8_t alt_bus_base;
+ uint8_t alt_bus_limit;
+ uint16_t alt_memory_base;
+ uint16_t alt_memory_limit;
+ uint16_t alt_prefetch_base;
+ uint16_t alt_prefetch_limit;
+ uint32_t alt_prefetch_base_high;
+ uint32_t alt_prefetch_limit_high;
+ uint32_t rcrb_base;
+ uint32_t rcrb_base_high;
+} CXLDVSECPortExtensions;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortExtensions) != 0x28);
+
+#define PORT_CONTROL_OFFSET 0xc
+#define PORT_CONTROL_UNMASK_SBR 1
+#define PORT_CONTROL_ALT_MEMID_EN 4
+
+/* CXL 2.0 - 8.1.6 GPF DVSEC (ID 0004) */
+typedef struct CXLDVSECPortGPF {
+ DVSECHeader hdr;
+ uint16_t rsvd;
+ uint16_t phase1_ctrl;
+ uint16_t phase2_ctrl;
+} CXLDVSECPortGPF;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortGPF) != 0x10);
+
+/* CXL 2.0 - 8.1.7 GPF DVSEC for CXL Device */
+typedef struct CXLDVSECDeviceGPF {
+ DVSECHeader hdr;
+ uint16_t phase2_duration;
+ uint32_t phase2_power;
+} CXLDVSECDeviceGPF;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDeviceGPF) != 0x10);
+
+/* CXL 2.0 - 8.1.8/8.2.1.3 Flex Bus DVSEC (ID 0007) */
+typedef struct CXLDVSECPortFlexBus {
+ DVSECHeader hdr;
+ uint16_t cap;
+ uint16_t ctrl;
+ uint16_t status;
+ uint32_t rcvd_mod_ts_data_phase1;
+} CXLDVSECPortFlexBus;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortFlexBus) != 0x14);
+
+/* CXL 2.0 - 8.1.9 Register Locator DVSEC (ID 0008) */
+typedef struct CXLDVSECRegisterLocator {
+ DVSECHeader hdr;
+ uint16_t rsvd;
+ uint32_t reg0_base_lo;
+ uint32_t reg0_base_hi;
+ uint32_t reg1_base_lo;
+ uint32_t reg1_base_hi;
+ uint32_t reg2_base_lo;
+ uint32_t reg2_base_hi;
+} CXLDVSECRegisterLocator;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECRegisterLocator) != 0x24);
+
+/* BAR Equivalence Indicator */
+#define BEI_BAR_10H 0
+#define BEI_BAR_14H 1
+#define BEI_BAR_18H 2
+#define BEI_BAR_1cH 3
+#define BEI_BAR_20H 4
+#define BEI_BAR_24H 5
+
+/* Register Block Identifier */
+#define RBI_EMPTY 0
+#define RBI_COMPONENT_REG (1 << 8)
+#define RBI_BAR_VIRT_ACL (2 << 8)
+#define RBI_CXL_DEVICE_REG (3 << 8)
+
+#endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index bfa982a419..67653b0f9b 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -267,6 +267,7 @@ struct IntelIOMMUState {
bool buggy_eim; /* Force buggy EIM unless eim=off */
uint8_t aw_bits; /* Host/IOVA address width (in bits) */
bool dma_drain; /* Whether DMA r/w draining enabled */
+ bool dma_translation; /* Whether DMA translation supported */
/*
* Protects IOMMU states in general. Currently it protects the
diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h
index efcbd926fd..fad97a891d 100644
--- a/include/hw/i386/microvm.h
+++ b/include/hw/i386/microvm.h
@@ -67,8 +67,6 @@
#define PCIE_ECAM_SIZE 0x10000000
/* Machine type options */
-#define MICROVM_MACHINE_PIT "pit"
-#define MICROVM_MACHINE_PIC "pic"
#define MICROVM_MACHINE_RTC "rtc"
#define MICROVM_MACHINE_PCIE "pcie"
#define MICROVM_MACHINE_IOAPIC2 "ioapic2"
@@ -86,8 +84,6 @@ struct MicrovmMachineState {
X86MachineState parent;
/* Machine type options */
- OnOffAuto pic;
- OnOffAuto pit;
OnOffAuto rtc;
OnOffAuto pcie;
OnOffAuto ioapic2;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index aff8add155..ffcac5121e 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -46,7 +46,6 @@ typedef struct PCMachineState {
bool acpi_build_enabled;
bool smbus_enabled;
bool sata_enabled;
- bool pit_enabled;
bool hpet_enabled;
bool i8042_enabled;
bool default_bus_bypass_iommu;
@@ -64,7 +63,6 @@ typedef struct PCMachineState {
#define PC_MACHINE_VMPORT "vmport"
#define PC_MACHINE_SMBUS "smbus"
#define PC_MACHINE_SATA "sata"
-#define PC_MACHINE_PIT "pit"
#define PC_MACHINE_I8042 "i8042"
#define PC_MACHINE_MAX_FW_SIZE "max-fw-size"
#define PC_MACHINE_SMBIOS_EP "smbios-entry-point-type"
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 4841a49f86..9089bdd99c 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -65,6 +65,8 @@ struct X86MachineState {
OnOffAuto smm;
OnOffAuto acpi;
+ OnOffAuto pit;
+ OnOffAuto pic;
char *oem_id;
char *oem_table_id;
@@ -84,6 +86,8 @@ struct X86MachineState {
#define X86_MACHINE_SMM "smm"
#define X86_MACHINE_ACPI "acpi"
+#define X86_MACHINE_PIT "pit"
+#define X86_MACHINE_PIC "pic"
#define X86_MACHINE_OEM_ID "x-oem-id"
#define X86_MACHINE_OEM_TABLE_ID "x-oem-table-id"
#define X86_MACHINE_BUS_LOCK_RATELIMIT "bus-lock-ratelimit"
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 3a32b8dd40..44dacfa224 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -194,6 +194,8 @@ enum {
QEMU_PCIE_LNKSTA_DLLLA = (1 << QEMU_PCIE_LNKSTA_DLLLA_BITNR),
#define QEMU_PCIE_EXTCAP_INIT_BITNR 9
QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR),
+#define QEMU_PCIE_CXL_BITNR 10
+ QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
};
#define TYPE_PCI_DEVICE "pci-device"
@@ -201,6 +203,12 @@ typedef struct PCIDeviceClass PCIDeviceClass;
DECLARE_OBJ_CHECKERS(PCIDevice, PCIDeviceClass,
PCI_DEVICE, TYPE_PCI_DEVICE)
+/*
+ * Implemented by devices that can be plugged on CXL buses. In the spec, this is
+ * actually a "CXL Component, but we name it device to match the PCI naming.
+ */
+#define INTERFACE_CXL_DEVICE "cxl-device"
+
/* Implemented by devices that can be plugged on PCI Express buses */
#define INTERFACE_PCIE_DEVICE "pci-express-device"
@@ -400,6 +408,7 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
#define TYPE_PCI_BUS "PCI"
OBJECT_DECLARE_TYPE(PCIBus, PCIBusClass, PCI_BUS)
#define TYPE_PCIE_BUS "PCIE"
+#define TYPE_CXL_BUS "CXL"
typedef void (*pci_bus_dev_fn)(PCIBus *b, PCIDevice *d, void *opaque);
typedef void (*pci_bus_fn)(PCIBus *b, void *opaque);
@@ -762,6 +771,11 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev)
pci_irq_deassert(pci_dev);
}
+static inline int pci_is_cxl(const PCIDevice *d)
+{
+ return d->cap_present & QEMU_PCIE_CAP_CXL;
+}
+
static inline int pci_is_express(const PCIDevice *d)
{
return d->cap_present & QEMU_PCI_CAP_EXPRESS;
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index 30691a6e57..ba4bafac7c 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -28,6 +28,7 @@
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
+#include "hw/cxl/cxl.h"
#include "qom/object.h"
typedef struct PCIBridgeWindows PCIBridgeWindows;
@@ -80,6 +81,25 @@ struct PCIBridge {
#define PCI_BRIDGE_DEV_PROP_CHASSIS_NR "chassis_nr"
#define PCI_BRIDGE_DEV_PROP_MSI "msi"
#define PCI_BRIDGE_DEV_PROP_SHPC "shpc"
+typedef struct CXLHost CXLHost;
+
+struct PXBDev {
+ /*< private >*/
+ PCIDevice parent_obj;
+ /*< public >*/
+
+ uint8_t bus_nr;
+ uint16_t numa_node;
+ bool bypass_iommu;
+ struct cxl_dev {
+ CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */
+ } cxl;
+};
+
+typedef struct PXBDev PXBDev;
+#define TYPE_PXB_CXL_DEVICE "pxb-cxl"
+DECLARE_INSTANCE_CHECKER(PXBDev, PXB_CXL_DEV,
+ TYPE_PXB_CXL_DEVICE)
int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset,
uint16_t svid, uint16_t ssid,
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 347440d42c..eb94e7e85c 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -24,6 +24,8 @@ enum PCIBusFlags {
PCI_BUS_IS_ROOT = 0x0001,
/* PCIe extended configuration space is accessible on this bus */
PCI_BUS_EXTENDED_CONFIG_SPACE = 0x0002,
+ /* This is a CXL Type BUS */
+ PCI_BUS_CXL = 0x0004,
};
struct PCIBus {
@@ -53,6 +55,11 @@ struct PCIBus {
Notifier machine_done;
};
+static inline bool pci_bus_is_cxl(PCIBus *bus)
+{
+ return !!(bus->flags & PCI_BUS_CXL);
+}
+
static inline bool pci_bus_is_root(PCIBus *bus)
{
return !!(bus->flags & PCI_BUS_IS_ROOT);
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index 11abe22d46..898083b86f 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -53,6 +53,7 @@
#define PCI_BASE_CLASS_MEMORY 0x05
#define PCI_CLASS_MEMORY_RAM 0x0500
#define PCI_CLASS_MEMORY_FLASH 0x0501
+#define PCI_CLASS_MEMORY_CXL 0x0502
#define PCI_CLASS_MEMORY_OTHER 0x0580
#define PCI_BASE_CLASS_BRIDGE 0x06
diff --git a/include/hw/pci/pcie_host.h b/include/hw/pci/pcie_host.h
index 076457b270..82d92177da 100644
--- a/include/hw/pci/pcie_host.h
+++ b/include/hw/pci/pcie_host.h
@@ -60,15 +60,15 @@ void pcie_host_mmcfg_update(PCIExpressHost *e,
/*
* PCI express ECAM (Enhanced Configuration Address Mapping) format.
* AKA mmcfg address
- * bit 20 - 28: bus number
+ * bit 20 - 27: bus number
* bit 15 - 19: device number
* bit 12 - 14: function number
* bit 0 - 11: offset in configuration space of a given device
*/
-#define PCIE_MMCFG_SIZE_MAX (1ULL << 29)
+#define PCIE_MMCFG_SIZE_MAX (1ULL << 28)
#define PCIE_MMCFG_SIZE_MIN (1ULL << 20)
#define PCIE_MMCFG_BUS_BIT 20
-#define PCIE_MMCFG_BUS_MASK 0x1ff
+#define PCIE_MMCFG_BUS_MASK 0xff
#define PCIE_MMCFG_DEVFN_BIT 12
#define PCIE_MMCFG_DEVFN_MASK 0xff
#define PCIE_MMCFG_CONFOFFSET_MASK 0xfff
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index e25b289ce8..7b8193061a 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -39,6 +39,8 @@ struct PCIEPort {
void pcie_port_init_reg(PCIDevice *d);
+PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn);
+
#define TYPE_PCIE_SLOT "pcie-slot"
OBJECT_DECLARE_SIMPLE_TYPE(PCIESlot, PCIE_SLOT)
diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index e44a41bb70..c6e693cd3f 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -11,20 +11,61 @@
#include "chardev/char-fe.h"
#include "hw/virtio/virtio.h"
+/**
+ * VhostUserHostNotifier - notifier information for one queue
+ * @rcu: rcu_head for cleanup
+ * @mr: memory region of notifier
+ * @addr: current mapped address
+ * @unmap_addr: address to be un-mapped
+ * @idx: virtioqueue index
+ *
+ * The VhostUserHostNotifier entries are re-used. When an old mapping
+ * is to be released it is moved to @unmap_addr and @addr is replaced.
+ * Once the RCU process has completed the unmap @unmap_addr is
+ * cleared.
+ */
typedef struct VhostUserHostNotifier {
struct rcu_head rcu;
MemoryRegion mr;
void *addr;
void *unmap_addr;
+ int idx;
} VhostUserHostNotifier;
+/**
+ * VhostUserState - shared state for all vhost-user devices
+ * @chr: the character backend for the socket
+ * @notifiers: GPtrArray of @VhostUserHostnotifier
+ * @memory_slots:
+ */
typedef struct VhostUserState {
CharBackend *chr;
- VhostUserHostNotifier notifier[VIRTIO_QUEUE_MAX];
+ GPtrArray *notifiers;
int memory_slots;
+ bool supports_config;
} VhostUserState;
+/**
+ * vhost_user_init() - initialise shared vhost_user state
+ * @user: allocated area for storing shared state
+ * @chr: the chardev for the vhost socket
+ * @errp: error handle
+ *
+ * User can either directly g_new() space for the state or embed
+ * VhostUserState in their larger device structure and just point to
+ * it.
+ *
+ * Return: true on success, false on error while setting errp.
+ */
bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp);
+
+/**
+ * vhost_user_cleanup() - cleanup state
+ * @user: ptr to use state
+ *
+ * Cleans up shared state and notifiers, callee is responsible for
+ * freeing the @VhostUserState memory itself.
+ */
void vhost_user_cleanup(VhostUserState *user);
#endif
diff --git a/include/hw/virtio/vhost-vsock-common.h b/include/hw/virtio/vhost-vsock-common.h
index 456a9c2365..93c782101d 100644
--- a/include/hw/virtio/vhost-vsock-common.h
+++ b/include/hw/virtio/vhost-vsock-common.h
@@ -44,7 +44,7 @@ int vhost_vsock_common_start(VirtIODevice *vdev);
void vhost_vsock_common_stop(VirtIODevice *vdev);
int vhost_vsock_common_pre_save(void *opaque);
int vhost_vsock_common_post_load(void *opaque, int version_id);
-void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name);
+void vhost_vsock_common_realize(VirtIODevice *vdev);
void vhost_vsock_common_unrealize(VirtIODevice *vdev);
uint64_t vhost_vsock_common_get_features(VirtIODevice *vdev, uint64_t features,
Error **errp);
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 58a73e7b7a..b291fe4e24 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -61,6 +61,12 @@ typedef struct VhostDevConfigOps {
} VhostDevConfigOps;
struct vhost_memory;
+
+/**
+ * struct vhost_dev - common vhost_dev structure
+ * @vhost_ops: backend specific ops
+ * @config_ops: ops for config changes (see @vhost_dev_set_config_notifier)
+ */
struct vhost_dev {
VirtIODevice *vdev;
MemoryListener memory_listener;
@@ -108,15 +114,129 @@ struct vhost_net {
NetClientState *nc;
};
+/**
+ * vhost_dev_init() - initialise the vhost interface
+ * @hdev: the common vhost_dev structure
+ * @opaque: opaque ptr passed to backend (vhost/vhost-user/vdpa)
+ * @backend_type: type of backend
+ * @busyloop_timeout: timeout for polling virtqueue
+ * @errp: error handle
+ *
+ * The initialisation of the vhost device will trigger the
+ * initialisation of the backend and potentially capability
+ * negotiation of backend interface. Configuration of the VirtIO
+ * itself won't happen until the interface is started.
+ *
+ * Return: 0 on success, non-zero on error while setting errp.
+ */
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
VhostBackendType backend_type,
uint32_t busyloop_timeout, Error **errp);
+
+/**
+ * vhost_dev_cleanup() - tear down and cleanup vhost interface
+ * @hdev: the common vhost_dev structure
+ */
void vhost_dev_cleanup(struct vhost_dev *hdev);
-int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
+
+/**
+ * vhost_dev_enable_notifiers() - enable event notifiers
+ * @hdev: common vhost_dev structure
+ * @vdev: the VirtIODevice structure
+ *
+ * Enable notifications directly to the vhost device rather than being
+ * triggered by QEMU itself. Notifications should be enabled before
+ * the vhost device is started via @vhost_dev_start.
+ *
+ * Return: 0 on success, < 0 on error.
+ */
int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
+
+/**
+ * vhost_dev_disable_notifiers - disable event notifications
+ * @hdev: common vhost_dev structure
+ * @vdev: the VirtIODevice structure
+ *
+ * Disable direct notifications to vhost device.
+ */
void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
+/**
+ * vhost_dev_start() - start the vhost device
+ * @hdev: common vhost_dev structure
+ * @vdev: the VirtIODevice structure
+ *
+ * Starts the vhost device. From this point VirtIO feature negotiation
+ * can start and the device can start processing VirtIO transactions.
+ *
+ * Return: 0 on success, < 0 on error.
+ */
+int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
+
+/**
+ * vhost_dev_stop() - stop the vhost device
+ * @hdev: common vhost_dev structure
+ * @vdev: the VirtIODevice structure
+ *
+ * Stop the vhost device. After the device is stopped the notifiers
+ * can be disabled (@vhost_dev_disable_notifiers) and the device can
+ * be torn down (@vhost_dev_cleanup).
+ */
+void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
+
+/**
+ * DOC: vhost device configuration handling
+ *
+ * The VirtIO device configuration space is used for rarely changing
+ * or initialisation time parameters. The configuration can be updated
+ * by either the guest driver or the device itself. If the device can
+ * change the configuration over time the vhost handler should
+ * register a @VhostDevConfigOps structure with
+ * @vhost_dev_set_config_notifier so the guest can be notified. Some
+ * devices register a handler anyway and will signal an error if an
+ * unexpected config change happens.
+ */
+
+/**
+ * vhost_dev_get_config() - fetch device configuration
+ * @hdev: common vhost_dev_structure
+ * @config: pointer to device appropriate config structure
+ * @config_len: size of device appropriate config structure
+ *
+ * Return: 0 on success, < 0 on error while setting errp
+ */
+int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
+ uint32_t config_len, Error **errp);
+
+/**
+ * vhost_dev_set_config() - set device configuration
+ * @hdev: common vhost_dev_structure
+ * @data: pointer to data to set
+ * @offset: offset into configuration space
+ * @size: length of set
+ * @flags: @VhostSetConfigType flags
+ *
+ * By use of @offset/@size a subset of the configuration space can be
+ * written to. The @flags are used to indicate if it is a normal
+ * transaction or related to migration.
+ *
+ * Return: 0 on success, non-zero on error
+ */
+int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data,
+ uint32_t offset, uint32_t size, uint32_t flags);
+
+/**
+ * vhost_dev_set_config_notifier() - register VhostDevConfigOps
+ * @hdev: common vhost_dev_structure
+ * @ops: notifier ops
+ *
+ * If the device is expected to change configuration a notifier can be
+ * setup to handle the case.
+ */
+void vhost_dev_set_config_notifier(struct vhost_dev *dev,
+ const VhostDevConfigOps *ops);
+
+
/* Test and clear masked event pending status.
* Should be called after unmask to avoid losing events.
*/
@@ -136,14 +256,6 @@ int vhost_net_set_backend(struct vhost_dev *hdev,
struct vhost_vring_file *file);
int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
-int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
- uint32_t config_len, Error **errp);
-int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data,
- uint32_t offset, uint32_t size, uint32_t flags);
-/* notifier callback in case vhost device config space changed
- */
-void vhost_dev_set_config_notifier(struct vhost_dev *dev,
- const VhostDevConfigOps *ops);
void vhost_dev_reset_inflight(struct vhost_inflight *inflight);
void vhost_dev_free_inflight(struct vhost_inflight *inflight);
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 2179b75703..afff9e158e 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -22,6 +22,7 @@
#include "sysemu/vhost-user-backend.h"
#include "standard-headers/linux/virtio_gpu.h"
+#include "standard-headers/linux/virtio_ids.h"
#include "qom/object.h"
#define TYPE_VIRTIO_GPU_BASE "virtio-gpu-base"
@@ -37,8 +38,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPUGL, VIRTIO_GPU_GL)
#define TYPE_VHOST_USER_GPU "vhost-user-gpu"
OBJECT_DECLARE_SIMPLE_TYPE(VhostUserGPU, VHOST_USER_GPU)
-#define VIRTIO_ID_GPU 16
-
struct virtio_gpu_simple_resource {
uint32_t resource_id;
uint32_t width;
diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
new file mode 100644
index 0000000000..2446dcd9ae
--- /dev/null
+++ b/include/hw/virtio/virtio-pci.h
@@ -0,0 +1,255 @@
+/*
+ * Virtio PCI Bindings
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2009 CodeSourcery
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Paul Brook <paul@codesourcery.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_VIRTIO_PCI_H
+#define QEMU_VIRTIO_PCI_H
+
+#include "hw/pci/msi.h"
+#include "hw/virtio/virtio-bus.h"
+#include "qom/object.h"
+
+
+/* virtio-pci-bus */
+
+typedef struct VirtioBusState VirtioPCIBusState;
+typedef struct VirtioBusClass VirtioPCIBusClass;
+
+#define TYPE_VIRTIO_PCI_BUS "virtio-pci-bus"
+DECLARE_OBJ_CHECKERS(VirtioPCIBusState, VirtioPCIBusClass,
+ VIRTIO_PCI_BUS, TYPE_VIRTIO_PCI_BUS)
+
+enum {
+ VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT,
+ VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT,
+ VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT,
+ VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
+ VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
+ VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT,
+ VIRTIO_PCI_FLAG_ATS_BIT,
+ VIRTIO_PCI_FLAG_INIT_DEVERR_BIT,
+ VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT,
+ VIRTIO_PCI_FLAG_INIT_PM_BIT,
+ VIRTIO_PCI_FLAG_INIT_FLR_BIT,
+ VIRTIO_PCI_FLAG_AER_BIT,
+ VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT,
+};
+
+/* Need to activate work-arounds for buggy guests at vmstate load. */
+#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION \
+ (1 << VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT)
+
+/* Performance improves when virtqueue kick processing is decoupled from the
+ * vcpu thread using ioeventfd for some devices. */
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
+
+/* virtio version flags */
+#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT)
+
+/* migrate extra state */
+#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT)
+
+/* have pio notification for modern device ? */
+#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \
+ (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT)
+
+/* page per vq flag to be used by split drivers within guests */
+#define VIRTIO_PCI_FLAG_PAGE_PER_VQ \
+ (1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT)
+
+/* address space translation service */
+#define VIRTIO_PCI_FLAG_ATS (1 << VIRTIO_PCI_FLAG_ATS_BIT)
+
+/* Init error enabling flags */
+#define VIRTIO_PCI_FLAG_INIT_DEVERR (1 << VIRTIO_PCI_FLAG_INIT_DEVERR_BIT)
+
+/* Init Link Control register */
+#define VIRTIO_PCI_FLAG_INIT_LNKCTL (1 << VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT)
+
+/* Init Power Management */
+#define VIRTIO_PCI_FLAG_INIT_PM (1 << VIRTIO_PCI_FLAG_INIT_PM_BIT)
+
+/* Init Function Level Reset capability */
+#define VIRTIO_PCI_FLAG_INIT_FLR (1 << VIRTIO_PCI_FLAG_INIT_FLR_BIT)
+
+/* Advanced Error Reporting capability */
+#define VIRTIO_PCI_FLAG_AER (1 << VIRTIO_PCI_FLAG_AER_BIT)
+
+/* Page Aligned Address space Translation Service */
+#define VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED \
+ (1 << VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT)
+
+typedef struct {
+ MSIMessage msg;
+ int virq;
+ unsigned int users;
+} VirtIOIRQFD;
+
+/*
+ * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
+ */
+#define TYPE_VIRTIO_PCI "virtio-pci"
+OBJECT_DECLARE_TYPE(VirtIOPCIProxy, VirtioPCIClass, VIRTIO_PCI)
+
+struct VirtioPCIClass {
+ PCIDeviceClass parent_class;
+ DeviceRealize parent_dc_realize;
+ void (*realize)(VirtIOPCIProxy *vpci_dev, Error **errp);
+};
+
+typedef struct VirtIOPCIRegion {
+ MemoryRegion mr;
+ uint32_t offset;
+ uint32_t size;
+ uint32_t type;
+} VirtIOPCIRegion;
+
+typedef struct VirtIOPCIQueue {
+ uint16_t num;
+ bool enabled;
+ uint32_t desc[2];
+ uint32_t avail[2];
+ uint32_t used[2];
+} VirtIOPCIQueue;
+
+struct VirtIOPCIProxy {
+ PCIDevice pci_dev;
+ MemoryRegion bar;
+ union {
+ struct {
+ VirtIOPCIRegion common;
+ VirtIOPCIRegion isr;
+ VirtIOPCIRegion device;
+ VirtIOPCIRegion notify;
+ VirtIOPCIRegion notify_pio;
+ };
+ VirtIOPCIRegion regs[5];
+ };
+ MemoryRegion modern_bar;
+ MemoryRegion io_bar;
+ uint32_t legacy_io_bar_idx;
+ uint32_t msix_bar_idx;
+ uint32_t modern_io_bar_idx;
+ uint32_t modern_mem_bar_idx;
+ int config_cap;
+ uint32_t flags;
+ bool disable_modern;
+ bool ignore_backend_features;
+ OnOffAuto disable_legacy;
+ uint32_t class_code;
+ uint32_t nvectors;
+ uint32_t dfselect;
+ uint32_t gfselect;
+ uint32_t guest_features[2];
+ VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX];
+
+ VirtIOIRQFD *vector_irqfd;
+ int nvqs_with_notifiers;
+ VirtioBusState bus;
+};
+
+static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy)
+{
+ return !proxy->disable_modern;
+}
+
+static inline bool virtio_pci_legacy(VirtIOPCIProxy *proxy)
+{
+ return proxy->disable_legacy == ON_OFF_AUTO_OFF;
+}
+
+static inline void virtio_pci_force_virtio_1(VirtIOPCIProxy *proxy)
+{
+ proxy->disable_modern = false;
+ proxy->disable_legacy = ON_OFF_AUTO_ON;
+}
+
+static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy)
+{
+ proxy->disable_modern = true;
+}
+
+/*
+ * virtio-input-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_INPUT_PCI "virtio-input-pci"
+
+/* Virtio ABI version, if we increment this, we break the guest driver. */
+#define VIRTIO_PCI_ABI_VERSION 0
+
+/* Input for virtio_pci_types_register() */
+typedef struct VirtioPCIDeviceTypeInfo {
+ /*
+ * Common base class for the subclasses below.
+ *
+ * Required only if transitional_name or non_transitional_name is set.
+ *
+ * We need a separate base type instead of making all types
+ * inherit from generic_name for two reasons:
+ * 1) generic_name implements INTERFACE_PCIE_DEVICE, but
+ * transitional_name does not.
+ * 2) generic_name has the "disable-legacy" and "disable-modern"
+ * properties, transitional_name and non_transitional name don't.
+ */
+ const char *base_name;
+ /*
+ * Generic device type. Optional.
+ *
+ * Supports both transitional and non-transitional modes,
+ * using the disable-legacy and disable-modern properties.
+ * If disable-legacy=auto, (non-)transitional mode is selected
+ * depending on the bus where the device is plugged.
+ *
+ * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE,
+ * but PCI Express is supported only in non-transitional mode.
+ *
+ * The only type implemented by QEMU 3.1 and older.
+ */
+ const char *generic_name;
+ /*
+ * The transitional device type. Optional.
+ *
+ * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE.
+ */
+ const char *transitional_name;
+ /*
+ * The non-transitional device type. Optional.
+ *
+ * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only.
+ */
+ const char *non_transitional_name;
+
+ /* Parent type. If NULL, TYPE_VIRTIO_PCI is used */
+ const char *parent;
+
+ /* Same as TypeInfo fields: */
+ size_t instance_size;
+ size_t class_size;
+ void (*instance_init)(Object *obj);
+ void (*class_init)(ObjectClass *klass, void *data);
+ InterfaceInfo *interfaces;
+} VirtioPCIDeviceTypeInfo;
+
+/* Register virtio-pci type(s). @t must be static. */
+void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t);
+
+/**
+ * virtio_pci_optimal_num_queues:
+ * @fixed_queues: number of queues that are always present
+ *
+ * Returns: The optimal number of queues for a multi-queue device, excluding
+ * @fixed_queues.
+ */
+unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues);
+
+#endif
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index b62a35fdca..db1c0ddf6b 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -22,6 +22,7 @@
#include "standard-headers/linux/virtio_config.h"
#include "standard-headers/linux/virtio_ring.h"
#include "qom/object.h"
+#include "hw/virtio/vhost.h"
/* A guest should never accept this. It implies negotiation is broken. */
#define VIRTIO_F_BAD_FEATURE 30
@@ -102,6 +103,7 @@ struct VirtIODevice
bool started;
bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */
bool disable_legacy_check;
+ bool vhost_started;
VMChangeStateEntry *vmstate;
char *bus_name;
uint8_t device_endian;
@@ -160,13 +162,14 @@ struct VirtioDeviceClass {
int (*post_load)(VirtIODevice *vdev);
const VMStateDescription *vmsd;
bool (*primary_unplug_pending)(void *opaque);
+ struct vhost_dev *(*get_vhost)(VirtIODevice *vdev);
};
void virtio_instance_init_common(Object *proxy_obj, void *data,
size_t vdev_size, const char *vdev_name);
-void virtio_init(VirtIODevice *vdev, const char *name,
- uint16_t device_id, size_t config_size);
+void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size);
+
void virtio_cleanup(VirtIODevice *vdev);
void virtio_error(VirtIODevice *vdev, const char *fmt, ...) G_GNUC_PRINTF(2, 3);