diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/hw/acpi/cxl.h | 28 | ||||
-rw-r--r-- | include/hw/boards.h | 2 | ||||
-rw-r--r-- | include/hw/cxl/cxl.h | 61 | ||||
-rw-r--r-- | include/hw/cxl/cxl_component.h | 223 | ||||
-rw-r--r-- | include/hw/cxl/cxl_device.h | 268 | ||||
-rw-r--r-- | include/hw/cxl/cxl_pci.h | 167 | ||||
-rw-r--r-- | include/hw/i386/intel_iommu.h | 1 | ||||
-rw-r--r-- | include/hw/i386/microvm.h | 4 | ||||
-rw-r--r-- | include/hw/i386/pc.h | 2 | ||||
-rw-r--r-- | include/hw/i386/x86.h | 4 | ||||
-rw-r--r-- | include/hw/pci/pci.h | 14 | ||||
-rw-r--r-- | include/hw/pci/pci_bridge.h | 20 | ||||
-rw-r--r-- | include/hw/pci/pci_bus.h | 7 | ||||
-rw-r--r-- | include/hw/pci/pci_ids.h | 1 | ||||
-rw-r--r-- | include/hw/pci/pcie_host.h | 6 | ||||
-rw-r--r-- | include/hw/pci/pcie_port.h | 2 | ||||
-rw-r--r-- | include/hw/virtio/vhost-user.h | 43 | ||||
-rw-r--r-- | include/hw/virtio/vhost-vsock-common.h | 2 | ||||
-rw-r--r-- | include/hw/virtio/vhost.h | 132 | ||||
-rw-r--r-- | include/hw/virtio/virtio-gpu.h | 3 | ||||
-rw-r--r-- | include/hw/virtio/virtio-pci.h | 255 | ||||
-rw-r--r-- | include/hw/virtio/virtio.h | 7 |
22 files changed, 1227 insertions, 25 deletions
diff --git a/include/hw/acpi/cxl.h b/include/hw/acpi/cxl.h new file mode 100644 index 0000000000..0c496538c0 --- /dev/null +++ b/include/hw/acpi/cxl.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HW_ACPI_CXL_H +#define HW_ACPI_CXL_H + +#include "hw/acpi/bios-linker-loader.h" + +void cxl_build_cedt(MachineState *ms, GArray *table_offsets, GArray *table_data, + BIOSLinker *linker, const char *oem_id, + const char *oem_table_id); +void build_cxl_osc_method(Aml *dev); + +#endif diff --git a/include/hw/boards.h b/include/hw/boards.h index 7b416c9787..fa57bac4fb 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -269,6 +269,7 @@ struct MachineClass { bool ignore_boot_device_suffixes; bool smbus_no_migration_support; bool nvdimm_supported; + bool cxl_supported; bool numa_mem_supported; bool auto_enable_numa; SMPCompatProps smp_props; @@ -359,6 +360,7 @@ struct MachineState { CPUArchIdList *possible_cpus; CpuTopology smp; struct NVDIMMState *nvdimms_state; + struct CXLState *cxl_devices_state; struct NumaState *numa_state; }; diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h new file mode 100644 index 0000000000..21d28ca110 --- /dev/null +++ b/include/hw/cxl/cxl.h @@ -0,0 +1,61 @@ +/* + * QEMU CXL Support + * + * Copyright (c) 2020 Intel + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_H +#define CXL_H + + +#include "qapi/qapi-types-machine.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_host.h" +#include "cxl_pci.h" +#include "cxl_component.h" +#include "cxl_device.h" + +#define CXL_COMPONENT_REG_BAR_IDX 0 +#define CXL_DEVICE_REG_BAR_IDX 2 + +#define CXL_WINDOW_MAX 10 + +typedef struct CXLFixedWindow { + uint64_t size; + char **targets; + struct PXBDev *target_hbs[8]; + uint8_t num_targets; + uint8_t enc_int_ways; + uint8_t enc_int_gran; + /* Todo: XOR based interleaving */ + MemoryRegion mr; + hwaddr base; +} CXLFixedWindow; + +typedef struct CXLState { + bool is_enabled; + MemoryRegion host_mr; + unsigned int next_mr_idx; + GList *fixed_windows; +} CXLState; + +struct CXLHost { + PCIHostState parent_obj; + + CXLComponentState cxl_cstate; +}; + +#define TYPE_PXB_CXL_HOST "pxb-cxl-host" +OBJECT_DECLARE_SIMPLE_TYPE(CXLHost, PXB_CXL_HOST) + +void cxl_fixed_memory_window_config(MachineState *ms, + CXLFixedMemoryWindowOptions *object, + Error **errp); +void cxl_fixed_memory_window_link_targets(Error **errp); + +extern const MemoryRegionOps cfmws_ops; + +#endif diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h new file mode 100644 index 0000000000..70b5018156 --- /dev/null +++ b/include/hw/cxl/cxl_component.h @@ -0,0 +1,223 @@ +/* + * QEMU CXL Component + * + * Copyright (c) 2020 Intel + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_COMPONENT_H +#define CXL_COMPONENT_H + +/* CXL 2.0 - 8.2.4 */ +#define CXL2_COMPONENT_IO_REGION_SIZE 0x1000 +#define CXL2_COMPONENT_CM_REGION_SIZE 0x1000 +#define CXL2_COMPONENT_BLOCK_SIZE 0x10000 + +#include "qemu/compiler.h" +#include "qemu/range.h" +#include "qemu/typedefs.h" +#include "hw/register.h" + +enum reg_type { + CXL2_DEVICE, + CXL2_TYPE3_DEVICE, + CXL2_LOGICAL_DEVICE, + CXL2_ROOT_PORT, + CXL2_UPSTREAM_PORT, + CXL2_DOWNSTREAM_PORT +}; + +/* + * Capability registers are defined at the top of the CXL.cache/mem region and + * are packed. For our purposes we will always define the caps in the same + * order. + * CXL 2.0 - 8.2.5 Table 142 for details. + */ + +/* CXL 2.0 - 8.2.5.1 */ +REG32(CXL_CAPABILITY_HEADER, 0) + FIELD(CXL_CAPABILITY_HEADER, ID, 0, 16) + FIELD(CXL_CAPABILITY_HEADER, VERSION, 16, 4) + FIELD(CXL_CAPABILITY_HEADER, CACHE_MEM_VERSION, 20, 4) + FIELD(CXL_CAPABILITY_HEADER, ARRAY_SIZE, 24, 8) + +#define CXLx_CAPABILITY_HEADER(type, offset) \ + REG32(CXL_##type##_CAPABILITY_HEADER, offset) \ + FIELD(CXL_##type##_CAPABILITY_HEADER, ID, 0, 16) \ + FIELD(CXL_##type##_CAPABILITY_HEADER, VERSION, 16, 4) \ + FIELD(CXL_##type##_CAPABILITY_HEADER, PTR, 20, 12) +CXLx_CAPABILITY_HEADER(RAS, 0x4) +CXLx_CAPABILITY_HEADER(LINK, 0x8) +CXLx_CAPABILITY_HEADER(HDM, 0xc) +CXLx_CAPABILITY_HEADER(EXTSEC, 0x10) +CXLx_CAPABILITY_HEADER(SNOOP, 0x14) + +/* + * Capability structures contain the actual registers that the CXL component + * implements. Some of these are specific to certain types of components, but + * this implementation leaves enough space regardless. + */ +/* 8.2.5.9 - CXL RAS Capability Structure */ + +/* Give ample space for caps before this */ +#define CXL_RAS_REGISTERS_OFFSET 0x80 +#define CXL_RAS_REGISTERS_SIZE 0x58 +REG32(CXL_RAS_UNC_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET) +REG32(CXL_RAS_UNC_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x4) +REG32(CXL_RAS_UNC_ERR_SEVERITY, CXL_RAS_REGISTERS_OFFSET + 0x8) +REG32(CXL_RAS_COR_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET + 0xc) +REG32(CXL_RAS_COR_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x10) +REG32(CXL_RAS_ERR_CAP_CTRL, CXL_RAS_REGISTERS_OFFSET + 0x14) +/* Offset 0x18 - 0x58 reserved for RAS logs */ + +/* 8.2.5.10 - CXL Security Capability Structure */ +#define CXL_SEC_REGISTERS_OFFSET \ + (CXL_RAS_REGISTERS_OFFSET + CXL_RAS_REGISTERS_SIZE) +#define CXL_SEC_REGISTERS_SIZE 0 /* We don't implement 1.1 downstream ports */ + +/* 8.2.5.11 - CXL Link Capability Structure */ +#define CXL_LINK_REGISTERS_OFFSET \ + (CXL_SEC_REGISTERS_OFFSET + CXL_SEC_REGISTERS_SIZE) +#define CXL_LINK_REGISTERS_SIZE 0x38 + +/* 8.2.5.12 - CXL HDM Decoder Capability Structure */ +#define HDM_DECODE_MAX 10 /* 8.2.5.12.1 */ +#define CXL_HDM_REGISTERS_OFFSET \ + (CXL_LINK_REGISTERS_OFFSET + CXL_LINK_REGISTERS_SIZE) +#define CXL_HDM_REGISTERS_SIZE (0x10 + 0x20 * HDM_DECODE_MAX) +#define HDM_DECODER_INIT(n) \ + REG32(CXL_HDM_DECODER##n##_BASE_LO, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x10) \ + FIELD(CXL_HDM_DECODER##n##_BASE_LO, L, 28, 4) \ + REG32(CXL_HDM_DECODER##n##_BASE_HI, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x14) \ + REG32(CXL_HDM_DECODER##n##_SIZE_LO, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x18) \ + REG32(CXL_HDM_DECODER##n##_SIZE_HI, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x1C) \ + REG32(CXL_HDM_DECODER##n##_CTRL, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x20) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, IG, 0, 4) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, IW, 4, 4) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, LOCK_ON_COMMIT, 8, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, COMMIT, 9, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, COMMITTED, 10, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, ERR, 11, 1) \ + FIELD(CXL_HDM_DECODER##n##_CTRL, TYPE, 12, 1) \ + REG32(CXL_HDM_DECODER##n##_TARGET_LIST_LO, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x24) \ + REG32(CXL_HDM_DECODER##n##_TARGET_LIST_HI, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x28) + +REG32(CXL_HDM_DECODER_CAPABILITY, CXL_HDM_REGISTERS_OFFSET) + FIELD(CXL_HDM_DECODER_CAPABILITY, DECODER_COUNT, 0, 4) + FIELD(CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 4, 4) + FIELD(CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_256B, 8, 1) + FIELD(CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 9, 1) + FIELD(CXL_HDM_DECODER_CAPABILITY, POISON_ON_ERR_CAP, 10, 1) +REG32(CXL_HDM_DECODER_GLOBAL_CONTROL, CXL_HDM_REGISTERS_OFFSET + 4) + FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, POISON_ON_ERR_EN, 0, 1) + FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, HDM_DECODER_ENABLE, 1, 1) + +HDM_DECODER_INIT(0); + +/* 8.2.5.13 - CXL Extended Security Capability Structure (Root complex only) */ +#define EXTSEC_ENTRY_MAX 256 +#define CXL_EXTSEC_REGISTERS_OFFSET \ + (CXL_HDM_REGISTERS_OFFSET + CXL_HDM_REGISTERS_SIZE) +#define CXL_EXTSEC_REGISTERS_SIZE (8 * EXTSEC_ENTRY_MAX + 4) + +/* 8.2.5.14 - CXL IDE Capability Structure */ +#define CXL_IDE_REGISTERS_OFFSET \ + (CXL_EXTSEC_REGISTERS_OFFSET + CXL_EXTSEC_REGISTERS_SIZE) +#define CXL_IDE_REGISTERS_SIZE 0x20 + +/* 8.2.5.15 - CXL Snoop Filter Capability Structure */ +#define CXL_SNOOP_REGISTERS_OFFSET \ + (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE) +#define CXL_SNOOP_REGISTERS_SIZE 0x8 + +QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE) >= 0x1000, + "No space for registers"); + +typedef struct component_registers { + /* + * Main memory region to be registered with QEMU core. + */ + MemoryRegion component_registers; + + /* + * 8.2.4 Table 141: + * 0x0000 - 0x0fff CXL.io registers + * 0x1000 - 0x1fff CXL.cache and CXL.mem + * 0x2000 - 0xdfff Implementation specific + * 0xe000 - 0xe3ff CXL ARB/MUX registers + * 0xe400 - 0xffff RSVD + */ + uint32_t io_registers[CXL2_COMPONENT_IO_REGION_SIZE >> 2]; + MemoryRegion io; + + uint32_t cache_mem_registers[CXL2_COMPONENT_CM_REGION_SIZE >> 2]; + uint32_t cache_mem_regs_write_mask[CXL2_COMPONENT_CM_REGION_SIZE >> 2]; + MemoryRegion cache_mem; + + MemoryRegion impl_specific; + MemoryRegion arb_mux; + MemoryRegion rsvd; + + /* special_ops is used for any component that needs any specific handling */ + MemoryRegionOps *special_ops; +} ComponentRegisters; + +/* + * A CXL component represents all entities in a CXL hierarchy. This includes, + * host bridges, root ports, upstream/downstream switch ports, and devices + */ +typedef struct cxl_component { + ComponentRegisters crb; + union { + struct { + Range dvsecs[CXL20_MAX_DVSEC]; + uint16_t dvsec_offset; + struct PCIDevice *pdev; + }; + }; +} CXLComponentState; + +void cxl_component_register_block_init(Object *obj, + CXLComponentState *cxl_cstate, + const char *type); +void cxl_component_register_init_common(uint32_t *reg_state, + uint32_t *write_msk, + enum reg_type type); + +void cxl_component_create_dvsec(CXLComponentState *cxl_cstate, + enum reg_type cxl_dev_type, uint16_t length, + uint16_t type, uint8_t rev, uint8_t *body); + +static inline int cxl_decoder_count_enc(int count) +{ + switch (count) { + case 1: return 0; + case 2: return 1; + case 4: return 2; + case 6: return 3; + case 8: return 4; + case 10: return 5; + } + return 0; +} + +uint8_t cxl_interleave_ways_enc(int iw, Error **errp); +uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp); + +static inline hwaddr cxl_decode_ig(int ig) +{ + return 1 << (ig + 8); +} + +CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb); + +#endif diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h new file mode 100644 index 0000000000..1e141b6621 --- /dev/null +++ b/include/hw/cxl/cxl_device.h @@ -0,0 +1,268 @@ +/* + * QEMU CXL Devices + * + * Copyright (c) 2020 Intel + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_DEVICE_H +#define CXL_DEVICE_H + +#include "hw/register.h" + +/* + * The following is how a CXL device's Memory Device registers are laid out. + * The only requirement from the spec is that the capabilities array and the + * capability headers start at offset 0 and are contiguously packed. The headers + * themselves provide offsets to the register fields. For this emulation, the + * actual registers * will start at offset 0x80 (m == 0x80). No secondary + * mailbox is implemented which means that the offset of the start of the + * mailbox payload (n) is given by + * n = m + sizeof(mailbox registers) + sizeof(device registers). + * + * +---------------------------------+ + * | | + * | Memory Device Registers | + * | | + * n + PAYLOAD_SIZE_MAX ----------------------------------- + * ^ | | + * | | | + * | | | + * | | | + * | | | + * | | Mailbox Payload | + * | | | + * | | | + * | | | + * n ----------------------------------- + * ^ | Mailbox Registers | + * | | | + * | ----------------------------------- + * | | | + * | | Device Registers | + * | | | + * m ----------------------------------> + * ^ | Memory Device Capability Header| + * | ----------------------------------- + * | | Mailbox Capability Header | + * | ----------------------------------- + * | | Device Capability Header | + * | ----------------------------------- + * | | Device Cap Array Register | + * 0 +---------------------------------+ + * + */ + +#define CXL_DEVICE_CAP_HDR1_OFFSET 0x10 /* Figure 138 */ +#define CXL_DEVICE_CAP_REG_SIZE 0x10 /* 8.2.8.2 */ +#define CXL_DEVICE_CAPS_MAX 4 /* 8.2.8.2.1 + 8.2.8.5 */ +#define CXL_CAPS_SIZE \ + (CXL_DEVICE_CAP_REG_SIZE * (CXL_DEVICE_CAPS_MAX + 1)) /* +1 for header */ + +#define CXL_DEVICE_STATUS_REGISTERS_OFFSET 0x80 /* Read comment above */ +#define CXL_DEVICE_STATUS_REGISTERS_LENGTH 0x8 /* 8.2.8.3.1 */ + +#define CXL_MAILBOX_REGISTERS_OFFSET \ + (CXL_DEVICE_STATUS_REGISTERS_OFFSET + CXL_DEVICE_STATUS_REGISTERS_LENGTH) +#define CXL_MAILBOX_REGISTERS_SIZE 0x20 /* 8.2.8.4, Figure 139 */ +#define CXL_MAILBOX_PAYLOAD_SHIFT 11 +#define CXL_MAILBOX_MAX_PAYLOAD_SIZE (1 << CXL_MAILBOX_PAYLOAD_SHIFT) +#define CXL_MAILBOX_REGISTERS_LENGTH \ + (CXL_MAILBOX_REGISTERS_SIZE + CXL_MAILBOX_MAX_PAYLOAD_SIZE) + +#define CXL_MEMORY_DEVICE_REGISTERS_OFFSET \ + (CXL_MAILBOX_REGISTERS_OFFSET + CXL_MAILBOX_REGISTERS_LENGTH) +#define CXL_MEMORY_DEVICE_REGISTERS_LENGTH 0x8 + +#define CXL_MMIO_SIZE \ + (CXL_DEVICE_CAP_REG_SIZE + CXL_DEVICE_STATUS_REGISTERS_LENGTH + \ + CXL_MAILBOX_REGISTERS_LENGTH + CXL_MEMORY_DEVICE_REGISTERS_LENGTH) + +typedef struct cxl_device_state { + MemoryRegion device_registers; + + /* mmio for device capabilities array - 8.2.8.2 */ + MemoryRegion device; + MemoryRegion memory_device; + struct { + MemoryRegion caps; + union { + uint32_t caps_reg_state32[CXL_CAPS_SIZE / 4]; + uint64_t caps_reg_state64[CXL_CAPS_SIZE / 8]; + }; + }; + + /* mmio for the mailbox registers 8.2.8.4 */ + struct { + MemoryRegion mailbox; + uint16_t payload_size; + union { + uint8_t mbox_reg_state[CXL_MAILBOX_REGISTERS_LENGTH]; + uint16_t mbox_reg_state16[CXL_MAILBOX_REGISTERS_LENGTH / 2]; + uint32_t mbox_reg_state32[CXL_MAILBOX_REGISTERS_LENGTH / 4]; + uint64_t mbox_reg_state64[CXL_MAILBOX_REGISTERS_LENGTH / 8]; + }; + struct cel_log { + uint16_t opcode; + uint16_t effect; + } cel_log[1 << 16]; + size_t cel_size; + }; + + struct { + bool set; + uint64_t last_set; + uint64_t host_set; + } timestamp; + + /* memory region for persistent memory, HDM */ + uint64_t pmem_size; +} CXLDeviceState; + +/* Initialize the register block for a device */ +void cxl_device_register_block_init(Object *obj, CXLDeviceState *dev); + +/* Set up default values for the register block */ +void cxl_device_register_init_common(CXLDeviceState *dev); + +/* + * CXL 2.0 - 8.2.8.1 including errata F4 + * Documented as a 128 bit register, but 64 bit accesses and the second + * 64 bits are currently reserved. + */ +REG64(CXL_DEV_CAP_ARRAY, 0) /* Documented as 128 bit register but 64 byte accesses */ + FIELD(CXL_DEV_CAP_ARRAY, CAP_ID, 0, 16) + FIELD(CXL_DEV_CAP_ARRAY, CAP_VERSION, 16, 8) + FIELD(CXL_DEV_CAP_ARRAY, CAP_COUNT, 32, 16) + +/* + * Helper macro to initialize capability headers for CXL devices. + * + * In the 8.2.8.2, this is listed as a 128b register, but in 8.2.8, it says: + * > No registers defined in Section 8.2.8 are larger than 64-bits wide so that + * > is the maximum access size allowed for these registers. If this rule is not + * > followed, the behavior is undefined + * + * CXL 2.0 Errata F4 states futher that the layouts in the specification are + * shown as greater than 128 bits, but implementations are expected to + * use any size of access up to 64 bits. + * + * Here we've chosen to make it 4 dwords. The spec allows any pow2 multiple + * access to be used for a register up to 64 bits. + */ +#define CXL_DEVICE_CAPABILITY_HEADER_REGISTER(n, offset) \ + REG32(CXL_DEV_##n##_CAP_HDR0, offset) \ + FIELD(CXL_DEV_##n##_CAP_HDR0, CAP_ID, 0, 16) \ + FIELD(CXL_DEV_##n##_CAP_HDR0, CAP_VERSION, 16, 8) \ + REG32(CXL_DEV_##n##_CAP_HDR1, offset + 4) \ + FIELD(CXL_DEV_##n##_CAP_HDR1, CAP_OFFSET, 0, 32) \ + REG32(CXL_DEV_##n##_CAP_HDR2, offset + 8) \ + FIELD(CXL_DEV_##n##_CAP_HDR2, CAP_LENGTH, 0, 32) + +CXL_DEVICE_CAPABILITY_HEADER_REGISTER(DEVICE_STATUS, CXL_DEVICE_CAP_HDR1_OFFSET) +CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MAILBOX, CXL_DEVICE_CAP_HDR1_OFFSET + \ + CXL_DEVICE_CAP_REG_SIZE) +CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MEMORY_DEVICE, + CXL_DEVICE_CAP_HDR1_OFFSET + + CXL_DEVICE_CAP_REG_SIZE * 2) + +int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate); +void cxl_process_mailbox(CXLDeviceState *cxl_dstate); + +#define cxl_device_cap_init(dstate, reg, cap_id) \ + do { \ + uint32_t *cap_hdrs = dstate->caps_reg_state32; \ + int which = R_CXL_DEV_##reg##_CAP_HDR0; \ + cap_hdrs[which] = \ + FIELD_DP32(cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, \ + CAP_ID, cap_id); \ + cap_hdrs[which] = FIELD_DP32( \ + cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, 1); \ + cap_hdrs[which + 1] = \ + FIELD_DP32(cap_hdrs[which + 1], CXL_DEV_##reg##_CAP_HDR1, \ + CAP_OFFSET, CXL_##reg##_REGISTERS_OFFSET); \ + cap_hdrs[which + 2] = \ + FIELD_DP32(cap_hdrs[which + 2], CXL_DEV_##reg##_CAP_HDR2, \ + CAP_LENGTH, CXL_##reg##_REGISTERS_LENGTH); \ + } while (0) + +/* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register */ +REG32(CXL_DEV_MAILBOX_CAP, 0) + FIELD(CXL_DEV_MAILBOX_CAP, PAYLOAD_SIZE, 0, 5) + FIELD(CXL_DEV_MAILBOX_CAP, INT_CAP, 5, 1) + FIELD(CXL_DEV_MAILBOX_CAP, BG_INT_CAP, 6, 1) + FIELD(CXL_DEV_MAILBOX_CAP, MSI_N, 7, 4) + +/* CXL 2.0 8.2.8.4.4 Mailbox Control Register */ +REG32(CXL_DEV_MAILBOX_CTRL, 4) + FIELD(CXL_DEV_MAILBOX_CTRL, DOORBELL, 0, 1) + FIELD(CXL_DEV_MAILBOX_CTRL, INT_EN, 1, 1) + FIELD(CXL_DEV_MAILBOX_CTRL, BG_INT_EN, 2, 1) + +/* CXL 2.0 8.2.8.4.5 Command Register */ +REG64(CXL_DEV_MAILBOX_CMD, 8) + FIELD(CXL_DEV_MAILBOX_CMD, COMMAND, 0, 8) + FIELD(CXL_DEV_MAILBOX_CMD, COMMAND_SET, 8, 8) + FIELD(CXL_DEV_MAILBOX_CMD, LENGTH, 16, 20) + +/* CXL 2.0 8.2.8.4.6 Mailbox Status Register */ +REG64(CXL_DEV_MAILBOX_STS, 0x10) + FIELD(CXL_DEV_MAILBOX_STS, BG_OP, 0, 1) + FIELD(CXL_DEV_MAILBOX_STS, ERRNO, 32, 16) + FIELD(CXL_DEV_MAILBOX_STS, VENDOR_ERRNO, 48, 16) + +/* CXL 2.0 8.2.8.4.7 Background Command Status Register */ +REG64(CXL_DEV_BG_CMD_STS, 0x18) + FIELD(CXL_DEV_BG_CMD_STS, OP, 0, 16) + FIELD(CXL_DEV_BG_CMD_STS, PERCENTAGE_COMP, 16, 7) + FIELD(CXL_DEV_BG_CMD_STS, RET_CODE, 32, 16) + FIELD(CXL_DEV_BG_CMD_STS, VENDOR_RET_CODE, 48, 16) + +/* CXL 2.0 8.2.8.4.8 Command Payload Registers */ +REG32(CXL_DEV_CMD_PAYLOAD, 0x20) + +REG64(CXL_MEM_DEV_STS, 0) + FIELD(CXL_MEM_DEV_STS, FATAL, 0, 1) + FIELD(CXL_MEM_DEV_STS, FW_HALT, 1, 1) + FIELD(CXL_MEM_DEV_STS, MEDIA_STATUS, 2, 2) + FIELD(CXL_MEM_DEV_STS, MBOX_READY, 4, 1) + FIELD(CXL_MEM_DEV_STS, RESET_NEEDED, 5, 3) + +struct CXLType3Dev { + /* Private */ + PCIDevice parent_obj; + + /* Properties */ + HostMemoryBackend *hostmem; + HostMemoryBackend *lsa; + + /* State */ + AddressSpace hostmem_as; + CXLComponentState cxl_cstate; + CXLDeviceState cxl_dstate; +}; + +#define TYPE_CXL_TYPE3 "cxl-type3" +OBJECT_DECLARE_TYPE(CXLType3Dev, CXLType3Class, CXL_TYPE3) + +struct CXLType3Class { + /* Private */ + PCIDeviceClass parent_class; + + /* public */ + uint64_t (*get_lsa_size)(CXLType3Dev *ct3d); + + uint64_t (*get_lsa)(CXLType3Dev *ct3d, void *buf, uint64_t size, + uint64_t offset); + void (*set_lsa)(CXLType3Dev *ct3d, const void *buf, uint64_t size, + uint64_t offset); +}; + +MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, + unsigned size, MemTxAttrs attrs); +MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, + unsigned size, MemTxAttrs attrs); + +#endif diff --git a/include/hw/cxl/cxl_pci.h b/include/hw/cxl/cxl_pci.h new file mode 100644 index 0000000000..01cf002096 --- /dev/null +++ b/include/hw/cxl/cxl_pci.h @@ -0,0 +1,167 @@ +/* + * QEMU CXL PCI interfaces + * + * Copyright (c) 2020 Intel + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_PCI_H +#define CXL_PCI_H + +#include "qemu/compiler.h" +#include "hw/pci/pci.h" +#include "hw/pci/pcie.h" + +#define CXL_VENDOR_ID 0x1e98 + +#define PCIE_DVSEC_HEADER1_OFFSET 0x4 /* Offset from start of extend cap */ +#define PCIE_DVSEC_ID_OFFSET 0x8 + +#define PCIE_CXL_DEVICE_DVSEC_LENGTH 0x38 +#define PCIE_CXL1_DEVICE_DVSEC_REVID 0 +#define PCIE_CXL2_DEVICE_DVSEC_REVID 1 + +#define EXTENSIONS_PORT_DVSEC_LENGTH 0x28 +#define EXTENSIONS_PORT_DVSEC_REVID 0 + +#define GPF_PORT_DVSEC_LENGTH 0x10 +#define GPF_PORT_DVSEC_REVID 0 + +#define GPF_DEVICE_DVSEC_LENGTH 0x10 +#define GPF_DEVICE_DVSEC_REVID 0 + +#define PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0 0x14 +#define PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0 1 + +#define REG_LOC_DVSEC_LENGTH 0x24 +#define REG_LOC_DVSEC_REVID 0 + +enum { + PCIE_CXL_DEVICE_DVSEC = 0, + NON_CXL_FUNCTION_MAP_DVSEC = 2, + EXTENSIONS_PORT_DVSEC = 3, + GPF_PORT_DVSEC = 4, + GPF_DEVICE_DVSEC = 5, + PCIE_FLEXBUS_PORT_DVSEC = 7, + REG_LOC_DVSEC = 8, + MLD_DVSEC = 9, + CXL20_MAX_DVSEC +}; + +typedef struct DVSECHeader { + uint32_t cap_hdr; + uint32_t dv_hdr1; + uint16_t dv_hdr2; +} QEMU_PACKED DVSECHeader; +QEMU_BUILD_BUG_ON(sizeof(DVSECHeader) != 10); + +/* + * CXL 2.0 devices must implement certain DVSEC IDs, and can [optionally] + * implement others. + * + * CXL 2.0 Device: 0, [2], 5, 8 + * CXL 2.0 RP: 3, 4, 7, 8 + * CXL 2.0 Upstream Port: [2], 7, 8 + * CXL 2.0 Downstream Port: 3, 4, 7, 8 + */ + +/* CXL 2.0 - 8.1.3 (ID 0001) */ +typedef struct CXLDVSECDevice { + DVSECHeader hdr; + uint16_t cap; + uint16_t ctrl; + uint16_t status; + uint16_t ctrl2; + uint16_t status2; + uint16_t lock; + uint16_t cap2; + uint32_t range1_size_hi; + uint32_t range1_size_lo; + uint32_t range1_base_hi; + uint32_t range1_base_lo; + uint32_t range2_size_hi; + uint32_t range2_size_lo; + uint32_t range2_base_hi; + uint32_t range2_base_lo; +} CXLDVSECDevice; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDevice) != 0x38); + +/* CXL 2.0 - 8.1.5 (ID 0003) */ +typedef struct CXLDVSECPortExtensions { + DVSECHeader hdr; + uint16_t status; + uint16_t control; + uint8_t alt_bus_base; + uint8_t alt_bus_limit; + uint16_t alt_memory_base; + uint16_t alt_memory_limit; + uint16_t alt_prefetch_base; + uint16_t alt_prefetch_limit; + uint32_t alt_prefetch_base_high; + uint32_t alt_prefetch_limit_high; + uint32_t rcrb_base; + uint32_t rcrb_base_high; +} CXLDVSECPortExtensions; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortExtensions) != 0x28); + +#define PORT_CONTROL_OFFSET 0xc +#define PORT_CONTROL_UNMASK_SBR 1 +#define PORT_CONTROL_ALT_MEMID_EN 4 + +/* CXL 2.0 - 8.1.6 GPF DVSEC (ID 0004) */ +typedef struct CXLDVSECPortGPF { + DVSECHeader hdr; + uint16_t rsvd; + uint16_t phase1_ctrl; + uint16_t phase2_ctrl; +} CXLDVSECPortGPF; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortGPF) != 0x10); + +/* CXL 2.0 - 8.1.7 GPF DVSEC for CXL Device */ +typedef struct CXLDVSECDeviceGPF { + DVSECHeader hdr; + uint16_t phase2_duration; + uint32_t phase2_power; +} CXLDVSECDeviceGPF; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDeviceGPF) != 0x10); + +/* CXL 2.0 - 8.1.8/8.2.1.3 Flex Bus DVSEC (ID 0007) */ +typedef struct CXLDVSECPortFlexBus { + DVSECHeader hdr; + uint16_t cap; + uint16_t ctrl; + uint16_t status; + uint32_t rcvd_mod_ts_data_phase1; +} CXLDVSECPortFlexBus; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortFlexBus) != 0x14); + +/* CXL 2.0 - 8.1.9 Register Locator DVSEC (ID 0008) */ +typedef struct CXLDVSECRegisterLocator { + DVSECHeader hdr; + uint16_t rsvd; + uint32_t reg0_base_lo; + uint32_t reg0_base_hi; + uint32_t reg1_base_lo; + uint32_t reg1_base_hi; + uint32_t reg2_base_lo; + uint32_t reg2_base_hi; +} CXLDVSECRegisterLocator; +QEMU_BUILD_BUG_ON(sizeof(CXLDVSECRegisterLocator) != 0x24); + +/* BAR Equivalence Indicator */ +#define BEI_BAR_10H 0 +#define BEI_BAR_14H 1 +#define BEI_BAR_18H 2 +#define BEI_BAR_1cH 3 +#define BEI_BAR_20H 4 +#define BEI_BAR_24H 5 + +/* Register Block Identifier */ +#define RBI_EMPTY 0 +#define RBI_COMPONENT_REG (1 << 8) +#define RBI_BAR_VIRT_ACL (2 << 8) +#define RBI_CXL_DEVICE_REG (3 << 8) + +#endif diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index bfa982a419..67653b0f9b 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -267,6 +267,7 @@ struct IntelIOMMUState { bool buggy_eim; /* Force buggy EIM unless eim=off */ uint8_t aw_bits; /* Host/IOVA address width (in bits) */ bool dma_drain; /* Whether DMA r/w draining enabled */ + bool dma_translation; /* Whether DMA translation supported */ /* * Protects IOMMU states in general. Currently it protects the diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h index efcbd926fd..fad97a891d 100644 --- a/include/hw/i386/microvm.h +++ b/include/hw/i386/microvm.h @@ -67,8 +67,6 @@ #define PCIE_ECAM_SIZE 0x10000000 /* Machine type options */ -#define MICROVM_MACHINE_PIT "pit" -#define MICROVM_MACHINE_PIC "pic" #define MICROVM_MACHINE_RTC "rtc" #define MICROVM_MACHINE_PCIE "pcie" #define MICROVM_MACHINE_IOAPIC2 "ioapic2" @@ -86,8 +84,6 @@ struct MicrovmMachineState { X86MachineState parent; /* Machine type options */ - OnOffAuto pic; - OnOffAuto pit; OnOffAuto rtc; OnOffAuto pcie; OnOffAuto ioapic2; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index aff8add155..ffcac5121e 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -46,7 +46,6 @@ typedef struct PCMachineState { bool acpi_build_enabled; bool smbus_enabled; bool sata_enabled; - bool pit_enabled; bool hpet_enabled; bool i8042_enabled; bool default_bus_bypass_iommu; @@ -64,7 +63,6 @@ typedef struct PCMachineState { #define PC_MACHINE_VMPORT "vmport" #define PC_MACHINE_SMBUS "smbus" #define PC_MACHINE_SATA "sata" -#define PC_MACHINE_PIT "pit" #define PC_MACHINE_I8042 "i8042" #define PC_MACHINE_MAX_FW_SIZE "max-fw-size" #define PC_MACHINE_SMBIOS_EP "smbios-entry-point-type" diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 4841a49f86..9089bdd99c 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -65,6 +65,8 @@ struct X86MachineState { OnOffAuto smm; OnOffAuto acpi; + OnOffAuto pit; + OnOffAuto pic; char *oem_id; char *oem_table_id; @@ -84,6 +86,8 @@ struct X86MachineState { #define X86_MACHINE_SMM "smm" #define X86_MACHINE_ACPI "acpi" +#define X86_MACHINE_PIT "pit" +#define X86_MACHINE_PIC "pic" #define X86_MACHINE_OEM_ID "x-oem-id" #define X86_MACHINE_OEM_TABLE_ID "x-oem-table-id" #define X86_MACHINE_BUS_LOCK_RATELIMIT "bus-lock-ratelimit" diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 3a32b8dd40..44dacfa224 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -194,6 +194,8 @@ enum { QEMU_PCIE_LNKSTA_DLLLA = (1 << QEMU_PCIE_LNKSTA_DLLLA_BITNR), #define QEMU_PCIE_EXTCAP_INIT_BITNR 9 QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), +#define QEMU_PCIE_CXL_BITNR 10 + QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), }; #define TYPE_PCI_DEVICE "pci-device" @@ -201,6 +203,12 @@ typedef struct PCIDeviceClass PCIDeviceClass; DECLARE_OBJ_CHECKERS(PCIDevice, PCIDeviceClass, PCI_DEVICE, TYPE_PCI_DEVICE) +/* + * Implemented by devices that can be plugged on CXL buses. In the spec, this is + * actually a "CXL Component, but we name it device to match the PCI naming. + */ +#define INTERFACE_CXL_DEVICE "cxl-device" + /* Implemented by devices that can be plugged on PCI Express buses */ #define INTERFACE_PCIE_DEVICE "pci-express-device" @@ -400,6 +408,7 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin); #define TYPE_PCI_BUS "PCI" OBJECT_DECLARE_TYPE(PCIBus, PCIBusClass, PCI_BUS) #define TYPE_PCIE_BUS "PCIE" +#define TYPE_CXL_BUS "CXL" typedef void (*pci_bus_dev_fn)(PCIBus *b, PCIDevice *d, void *opaque); typedef void (*pci_bus_fn)(PCIBus *b, void *opaque); @@ -762,6 +771,11 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev) pci_irq_deassert(pci_dev); } +static inline int pci_is_cxl(const PCIDevice *d) +{ + return d->cap_present & QEMU_PCIE_CAP_CXL; +} + static inline int pci_is_express(const PCIDevice *d) { return d->cap_present & QEMU_PCI_CAP_EXPRESS; diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index 30691a6e57..ba4bafac7c 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -28,6 +28,7 @@ #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" +#include "hw/cxl/cxl.h" #include "qom/object.h" typedef struct PCIBridgeWindows PCIBridgeWindows; @@ -80,6 +81,25 @@ struct PCIBridge { #define PCI_BRIDGE_DEV_PROP_CHASSIS_NR "chassis_nr" #define PCI_BRIDGE_DEV_PROP_MSI "msi" #define PCI_BRIDGE_DEV_PROP_SHPC "shpc" +typedef struct CXLHost CXLHost; + +struct PXBDev { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + uint8_t bus_nr; + uint16_t numa_node; + bool bypass_iommu; + struct cxl_dev { + CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */ + } cxl; +}; + +typedef struct PXBDev PXBDev; +#define TYPE_PXB_CXL_DEVICE "pxb-cxl" +DECLARE_INSTANCE_CHECKER(PXBDev, PXB_CXL_DEV, + TYPE_PXB_CXL_DEVICE) int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset, uint16_t svid, uint16_t ssid, diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h index 347440d42c..eb94e7e85c 100644 --- a/include/hw/pci/pci_bus.h +++ b/include/hw/pci/pci_bus.h @@ -24,6 +24,8 @@ enum PCIBusFlags { PCI_BUS_IS_ROOT = 0x0001, /* PCIe extended configuration space is accessible on this bus */ PCI_BUS_EXTENDED_CONFIG_SPACE = 0x0002, + /* This is a CXL Type BUS */ + PCI_BUS_CXL = 0x0004, }; struct PCIBus { @@ -53,6 +55,11 @@ struct PCIBus { Notifier machine_done; }; +static inline bool pci_bus_is_cxl(PCIBus *bus) +{ + return !!(bus->flags & PCI_BUS_CXL); +} + static inline bool pci_bus_is_root(PCIBus *bus) { return !!(bus->flags & PCI_BUS_IS_ROOT); diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 11abe22d46..898083b86f 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -53,6 +53,7 @@ #define PCI_BASE_CLASS_MEMORY 0x05 #define PCI_CLASS_MEMORY_RAM 0x0500 #define PCI_CLASS_MEMORY_FLASH 0x0501 +#define PCI_CLASS_MEMORY_CXL 0x0502 #define PCI_CLASS_MEMORY_OTHER 0x0580 #define PCI_BASE_CLASS_BRIDGE 0x06 diff --git a/include/hw/pci/pcie_host.h b/include/hw/pci/pcie_host.h index 076457b270..82d92177da 100644 --- a/include/hw/pci/pcie_host.h +++ b/include/hw/pci/pcie_host.h @@ -60,15 +60,15 @@ void pcie_host_mmcfg_update(PCIExpressHost *e, /* * PCI express ECAM (Enhanced Configuration Address Mapping) format. * AKA mmcfg address - * bit 20 - 28: bus number + * bit 20 - 27: bus number * bit 15 - 19: device number * bit 12 - 14: function number * bit 0 - 11: offset in configuration space of a given device */ -#define PCIE_MMCFG_SIZE_MAX (1ULL << 29) +#define PCIE_MMCFG_SIZE_MAX (1ULL << 28) #define PCIE_MMCFG_SIZE_MIN (1ULL << 20) #define PCIE_MMCFG_BUS_BIT 20 -#define PCIE_MMCFG_BUS_MASK 0x1ff +#define PCIE_MMCFG_BUS_MASK 0xff #define PCIE_MMCFG_DEVFN_BIT 12 #define PCIE_MMCFG_DEVFN_MASK 0xff #define PCIE_MMCFG_CONFOFFSET_MASK 0xfff diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h index e25b289ce8..7b8193061a 100644 --- a/include/hw/pci/pcie_port.h +++ b/include/hw/pci/pcie_port.h @@ -39,6 +39,8 @@ struct PCIEPort { void pcie_port_init_reg(PCIDevice *d); +PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn); + #define TYPE_PCIE_SLOT "pcie-slot" OBJECT_DECLARE_SIMPLE_TYPE(PCIESlot, PCIE_SLOT) diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index e44a41bb70..c6e693cd3f 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -11,20 +11,61 @@ #include "chardev/char-fe.h" #include "hw/virtio/virtio.h" +/** + * VhostUserHostNotifier - notifier information for one queue + * @rcu: rcu_head for cleanup + * @mr: memory region of notifier + * @addr: current mapped address + * @unmap_addr: address to be un-mapped + * @idx: virtioqueue index + * + * The VhostUserHostNotifier entries are re-used. When an old mapping + * is to be released it is moved to @unmap_addr and @addr is replaced. + * Once the RCU process has completed the unmap @unmap_addr is + * cleared. + */ typedef struct VhostUserHostNotifier { struct rcu_head rcu; MemoryRegion mr; void *addr; void *unmap_addr; + int idx; } VhostUserHostNotifier; +/** + * VhostUserState - shared state for all vhost-user devices + * @chr: the character backend for the socket + * @notifiers: GPtrArray of @VhostUserHostnotifier + * @memory_slots: + */ typedef struct VhostUserState { CharBackend *chr; - VhostUserHostNotifier notifier[VIRTIO_QUEUE_MAX]; + GPtrArray *notifiers; int memory_slots; + bool supports_config; } VhostUserState; +/** + * vhost_user_init() - initialise shared vhost_user state + * @user: allocated area for storing shared state + * @chr: the chardev for the vhost socket + * @errp: error handle + * + * User can either directly g_new() space for the state or embed + * VhostUserState in their larger device structure and just point to + * it. + * + * Return: true on success, false on error while setting errp. + */ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp); + +/** + * vhost_user_cleanup() - cleanup state + * @user: ptr to use state + * + * Cleans up shared state and notifiers, callee is responsible for + * freeing the @VhostUserState memory itself. + */ void vhost_user_cleanup(VhostUserState *user); #endif diff --git a/include/hw/virtio/vhost-vsock-common.h b/include/hw/virtio/vhost-vsock-common.h index 456a9c2365..93c782101d 100644 --- a/include/hw/virtio/vhost-vsock-common.h +++ b/include/hw/virtio/vhost-vsock-common.h @@ -44,7 +44,7 @@ int vhost_vsock_common_start(VirtIODevice *vdev); void vhost_vsock_common_stop(VirtIODevice *vdev); int vhost_vsock_common_pre_save(void *opaque); int vhost_vsock_common_post_load(void *opaque, int version_id); -void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name); +void vhost_vsock_common_realize(VirtIODevice *vdev); void vhost_vsock_common_unrealize(VirtIODevice *vdev); uint64_t vhost_vsock_common_get_features(VirtIODevice *vdev, uint64_t features, Error **errp); diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 58a73e7b7a..b291fe4e24 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -61,6 +61,12 @@ typedef struct VhostDevConfigOps { } VhostDevConfigOps; struct vhost_memory; + +/** + * struct vhost_dev - common vhost_dev structure + * @vhost_ops: backend specific ops + * @config_ops: ops for config changes (see @vhost_dev_set_config_notifier) + */ struct vhost_dev { VirtIODevice *vdev; MemoryListener memory_listener; @@ -108,15 +114,129 @@ struct vhost_net { NetClientState *nc; }; +/** + * vhost_dev_init() - initialise the vhost interface + * @hdev: the common vhost_dev structure + * @opaque: opaque ptr passed to backend (vhost/vhost-user/vdpa) + * @backend_type: type of backend + * @busyloop_timeout: timeout for polling virtqueue + * @errp: error handle + * + * The initialisation of the vhost device will trigger the + * initialisation of the backend and potentially capability + * negotiation of backend interface. Configuration of the VirtIO + * itself won't happen until the interface is started. + * + * Return: 0 on success, non-zero on error while setting errp. + */ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout, Error **errp); + +/** + * vhost_dev_cleanup() - tear down and cleanup vhost interface + * @hdev: the common vhost_dev structure + */ void vhost_dev_cleanup(struct vhost_dev *hdev); -int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); + +/** + * vhost_dev_enable_notifiers() - enable event notifiers + * @hdev: common vhost_dev structure + * @vdev: the VirtIODevice structure + * + * Enable notifications directly to the vhost device rather than being + * triggered by QEMU itself. Notifications should be enabled before + * the vhost device is started via @vhost_dev_start. + * + * Return: 0 on success, < 0 on error. + */ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); + +/** + * vhost_dev_disable_notifiers - disable event notifications + * @hdev: common vhost_dev structure + * @vdev: the VirtIODevice structure + * + * Disable direct notifications to vhost device. + */ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); +/** + * vhost_dev_start() - start the vhost device + * @hdev: common vhost_dev structure + * @vdev: the VirtIODevice structure + * + * Starts the vhost device. From this point VirtIO feature negotiation + * can start and the device can start processing VirtIO transactions. + * + * Return: 0 on success, < 0 on error. + */ +int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); + +/** + * vhost_dev_stop() - stop the vhost device + * @hdev: common vhost_dev structure + * @vdev: the VirtIODevice structure + * + * Stop the vhost device. After the device is stopped the notifiers + * can be disabled (@vhost_dev_disable_notifiers) and the device can + * be torn down (@vhost_dev_cleanup). + */ +void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); + +/** + * DOC: vhost device configuration handling + * + * The VirtIO device configuration space is used for rarely changing + * or initialisation time parameters. The configuration can be updated + * by either the guest driver or the device itself. If the device can + * change the configuration over time the vhost handler should + * register a @VhostDevConfigOps structure with + * @vhost_dev_set_config_notifier so the guest can be notified. Some + * devices register a handler anyway and will signal an error if an + * unexpected config change happens. + */ + +/** + * vhost_dev_get_config() - fetch device configuration + * @hdev: common vhost_dev_structure + * @config: pointer to device appropriate config structure + * @config_len: size of device appropriate config structure + * + * Return: 0 on success, < 0 on error while setting errp + */ +int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config, + uint32_t config_len, Error **errp); + +/** + * vhost_dev_set_config() - set device configuration + * @hdev: common vhost_dev_structure + * @data: pointer to data to set + * @offset: offset into configuration space + * @size: length of set + * @flags: @VhostSetConfigType flags + * + * By use of @offset/@size a subset of the configuration space can be + * written to. The @flags are used to indicate if it is a normal + * transaction or related to migration. + * + * Return: 0 on success, non-zero on error + */ +int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags); + +/** + * vhost_dev_set_config_notifier() - register VhostDevConfigOps + * @hdev: common vhost_dev_structure + * @ops: notifier ops + * + * If the device is expected to change configuration a notifier can be + * setup to handle the case. + */ +void vhost_dev_set_config_notifier(struct vhost_dev *dev, + const VhostDevConfigOps *ops); + + /* Test and clear masked event pending status. * Should be called after unmask to avoid losing events. */ @@ -136,14 +256,6 @@ int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); -int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config, - uint32_t config_len, Error **errp); -int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data, - uint32_t offset, uint32_t size, uint32_t flags); -/* notifier callback in case vhost device config space changed - */ -void vhost_dev_set_config_notifier(struct vhost_dev *dev, - const VhostDevConfigOps *ops); void vhost_dev_reset_inflight(struct vhost_inflight *inflight); void vhost_dev_free_inflight(struct vhost_inflight *inflight); diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index 2179b75703..afff9e158e 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -22,6 +22,7 @@ #include "sysemu/vhost-user-backend.h" #include "standard-headers/linux/virtio_gpu.h" +#include "standard-headers/linux/virtio_ids.h" #include "qom/object.h" #define TYPE_VIRTIO_GPU_BASE "virtio-gpu-base" @@ -37,8 +38,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPUGL, VIRTIO_GPU_GL) #define TYPE_VHOST_USER_GPU "vhost-user-gpu" OBJECT_DECLARE_SIMPLE_TYPE(VhostUserGPU, VHOST_USER_GPU) -#define VIRTIO_ID_GPU 16 - struct virtio_gpu_simple_resource { uint32_t resource_id; uint32_t width; diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h new file mode 100644 index 0000000000..2446dcd9ae --- /dev/null +++ b/include/hw/virtio/virtio-pci.h @@ -0,0 +1,255 @@ +/* + * Virtio PCI Bindings + * + * Copyright IBM, Corp. 2007 + * Copyright (c) 2009 CodeSourcery + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Paul Brook <paul@codesourcery.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef QEMU_VIRTIO_PCI_H +#define QEMU_VIRTIO_PCI_H + +#include "hw/pci/msi.h" +#include "hw/virtio/virtio-bus.h" +#include "qom/object.h" + + +/* virtio-pci-bus */ + +typedef struct VirtioBusState VirtioPCIBusState; +typedef struct VirtioBusClass VirtioPCIBusClass; + +#define TYPE_VIRTIO_PCI_BUS "virtio-pci-bus" +DECLARE_OBJ_CHECKERS(VirtioPCIBusState, VirtioPCIBusClass, + VIRTIO_PCI_BUS, TYPE_VIRTIO_PCI_BUS) + +enum { + VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, + VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, + VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, + VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, + VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, + VIRTIO_PCI_FLAG_ATS_BIT, + VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, + VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, + VIRTIO_PCI_FLAG_INIT_PM_BIT, + VIRTIO_PCI_FLAG_INIT_FLR_BIT, + VIRTIO_PCI_FLAG_AER_BIT, + VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT, +}; + +/* Need to activate work-arounds for buggy guests at vmstate load. */ +#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION \ + (1 << VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT) + +/* Performance improves when virtqueue kick processing is decoupled from the + * vcpu thread using ioeventfd for some devices. */ +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) + +/* virtio version flags */ +#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT) + +/* migrate extra state */ +#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT) + +/* have pio notification for modern device ? */ +#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \ + (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT) + +/* page per vq flag to be used by split drivers within guests */ +#define VIRTIO_PCI_FLAG_PAGE_PER_VQ \ + (1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT) + +/* address space translation service */ +#define VIRTIO_PCI_FLAG_ATS (1 << VIRTIO_PCI_FLAG_ATS_BIT) + +/* Init error enabling flags */ +#define VIRTIO_PCI_FLAG_INIT_DEVERR (1 << VIRTIO_PCI_FLAG_INIT_DEVERR_BIT) + +/* Init Link Control register */ +#define VIRTIO_PCI_FLAG_INIT_LNKCTL (1 << VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT) + +/* Init Power Management */ +#define VIRTIO_PCI_FLAG_INIT_PM (1 << VIRTIO_PCI_FLAG_INIT_PM_BIT) + +/* Init Function Level Reset capability */ +#define VIRTIO_PCI_FLAG_INIT_FLR (1 << VIRTIO_PCI_FLAG_INIT_FLR_BIT) + +/* Advanced Error Reporting capability */ +#define VIRTIO_PCI_FLAG_AER (1 << VIRTIO_PCI_FLAG_AER_BIT) + +/* Page Aligned Address space Translation Service */ +#define VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED \ + (1 << VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT) + +typedef struct { + MSIMessage msg; + int virq; + unsigned int users; +} VirtIOIRQFD; + +/* + * virtio-pci: This is the PCIDevice which has a virtio-pci-bus. + */ +#define TYPE_VIRTIO_PCI "virtio-pci" +OBJECT_DECLARE_TYPE(VirtIOPCIProxy, VirtioPCIClass, VIRTIO_PCI) + +struct VirtioPCIClass { + PCIDeviceClass parent_class; + DeviceRealize parent_dc_realize; + void (*realize)(VirtIOPCIProxy *vpci_dev, Error **errp); +}; + +typedef struct VirtIOPCIRegion { + MemoryRegion mr; + uint32_t offset; + uint32_t size; + uint32_t type; +} VirtIOPCIRegion; + +typedef struct VirtIOPCIQueue { + uint16_t num; + bool enabled; + uint32_t desc[2]; + uint32_t avail[2]; + uint32_t used[2]; +} VirtIOPCIQueue; + +struct VirtIOPCIProxy { + PCIDevice pci_dev; + MemoryRegion bar; + union { + struct { + VirtIOPCIRegion common; + VirtIOPCIRegion isr; + VirtIOPCIRegion device; + VirtIOPCIRegion notify; + VirtIOPCIRegion notify_pio; + }; + VirtIOPCIRegion regs[5]; + }; + MemoryRegion modern_bar; + MemoryRegion io_bar; + uint32_t legacy_io_bar_idx; + uint32_t msix_bar_idx; + uint32_t modern_io_bar_idx; + uint32_t modern_mem_bar_idx; + int config_cap; + uint32_t flags; + bool disable_modern; + bool ignore_backend_features; + OnOffAuto disable_legacy; + uint32_t class_code; + uint32_t nvectors; + uint32_t dfselect; + uint32_t gfselect; + uint32_t guest_features[2]; + VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX]; + + VirtIOIRQFD *vector_irqfd; + int nvqs_with_notifiers; + VirtioBusState bus; +}; + +static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy) +{ + return !proxy->disable_modern; +} + +static inline bool virtio_pci_legacy(VirtIOPCIProxy *proxy) +{ + return proxy->disable_legacy == ON_OFF_AUTO_OFF; +} + +static inline void virtio_pci_force_virtio_1(VirtIOPCIProxy *proxy) +{ + proxy->disable_modern = false; + proxy->disable_legacy = ON_OFF_AUTO_ON; +} + +static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) +{ + proxy->disable_modern = true; +} + +/* + * virtio-input-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VIRTIO_INPUT_PCI "virtio-input-pci" + +/* Virtio ABI version, if we increment this, we break the guest driver. */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* Input for virtio_pci_types_register() */ +typedef struct VirtioPCIDeviceTypeInfo { + /* + * Common base class for the subclasses below. + * + * Required only if transitional_name or non_transitional_name is set. + * + * We need a separate base type instead of making all types + * inherit from generic_name for two reasons: + * 1) generic_name implements INTERFACE_PCIE_DEVICE, but + * transitional_name does not. + * 2) generic_name has the "disable-legacy" and "disable-modern" + * properties, transitional_name and non_transitional name don't. + */ + const char *base_name; + /* + * Generic device type. Optional. + * + * Supports both transitional and non-transitional modes, + * using the disable-legacy and disable-modern properties. + * If disable-legacy=auto, (non-)transitional mode is selected + * depending on the bus where the device is plugged. + * + * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE, + * but PCI Express is supported only in non-transitional mode. + * + * The only type implemented by QEMU 3.1 and older. + */ + const char *generic_name; + /* + * The transitional device type. Optional. + * + * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE. + */ + const char *transitional_name; + /* + * The non-transitional device type. Optional. + * + * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only. + */ + const char *non_transitional_name; + + /* Parent type. If NULL, TYPE_VIRTIO_PCI is used */ + const char *parent; + + /* Same as TypeInfo fields: */ + size_t instance_size; + size_t class_size; + void (*instance_init)(Object *obj); + void (*class_init)(ObjectClass *klass, void *data); + InterfaceInfo *interfaces; +} VirtioPCIDeviceTypeInfo; + +/* Register virtio-pci type(s). @t must be static. */ +void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); + +/** + * virtio_pci_optimal_num_queues: + * @fixed_queues: number of queues that are always present + * + * Returns: The optimal number of queues for a multi-queue device, excluding + * @fixed_queues. + */ +unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); + +#endif diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index b62a35fdca..db1c0ddf6b 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -22,6 +22,7 @@ #include "standard-headers/linux/virtio_config.h" #include "standard-headers/linux/virtio_ring.h" #include "qom/object.h" +#include "hw/virtio/vhost.h" /* A guest should never accept this. It implies negotiation is broken. */ #define VIRTIO_F_BAD_FEATURE 30 @@ -102,6 +103,7 @@ struct VirtIODevice bool started; bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ bool disable_legacy_check; + bool vhost_started; VMChangeStateEntry *vmstate; char *bus_name; uint8_t device_endian; @@ -160,13 +162,14 @@ struct VirtioDeviceClass { int (*post_load)(VirtIODevice *vdev); const VMStateDescription *vmsd; bool (*primary_unplug_pending)(void *opaque); + struct vhost_dev *(*get_vhost)(VirtIODevice *vdev); }; void virtio_instance_init_common(Object *proxy_obj, void *data, size_t vdev_size, const char *vdev_name); -void virtio_init(VirtIODevice *vdev, const char *name, - uint16_t device_id, size_t config_size); +void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size); + void virtio_cleanup(VirtIODevice *vdev); void virtio_error(VirtIODevice *vdev, const char *fmt, ...) G_GNUC_PRINTF(2, 3); |