aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rwxr-xr-xconfigure4
-rw-r--r--docs/about/deprecated.rst8
-rw-r--r--docs/interop/vhost-user.rst20
-rw-r--r--docs/pcie_sriov.txt115
-rw-r--r--docs/specs/acpi_erst.rst200
-rw-r--r--docs/specs/index.rst1
-rw-r--r--docs/specs/pci-ids.txt1
-rw-r--r--hw/acpi/aml-build.c8
-rw-r--r--hw/acpi/erst.c5
-rw-r--r--hw/acpi/pcihp.c12
-rw-r--r--hw/i386/acpi-build.c8
-rw-r--r--hw/i386/acpi-microvm.c6
-rw-r--r--hw/i386/intel_iommu.c14
-rw-r--r--hw/i386/intel_iommu_internal.h1
-rw-r--r--hw/i386/pc.c30
-rw-r--r--hw/i386/pc_piix.c1
-rw-r--r--hw/i386/x86.c16
-rw-r--r--hw/misc/pvpanic-isa.c4
-rw-r--r--hw/misc/pvpanic-pci.c4
-rw-r--r--hw/misc/pvpanic.c5
-rw-r--r--hw/net/virtio-net.c13
-rw-r--r--hw/pci-bridge/pci_expander_bridge.c6
-rw-r--r--hw/pci-bridge/xio3130_downstream.c5
-rw-r--r--hw/pci-bridge/xio3130_upstream.c2
-rw-r--r--hw/pci/meson.build1
-rw-r--r--hw/pci/pci.c104
-rw-r--r--hw/pci/pcie.c16
-rw-r--r--hw/pci/pcie_sriov.c302
-rw-r--r--hw/pci/trace-events5
-rw-r--r--hw/smbios/smbios.c80
-rw-r--r--hw/virtio/trace-events4
-rw-r--r--hw/virtio/vhost-user-i2c.c11
-rw-r--r--hw/virtio/vhost-user.c61
-rw-r--r--hw/virtio/vhost-vdpa.c21
-rw-r--r--hw/virtio/vhost-vsock-common.c10
-rw-r--r--hw/virtio/vhost.c6
-rw-r--r--hw/virtio/virtio-bus.c12
-rw-r--r--hw/virtio/virtio-iommu.c99
-rw-r--r--include/hw/acpi/acpi-defs.h1
-rw-r--r--include/hw/i386/intel_iommu.h1
-rw-r--r--include/hw/i386/pc.h2
-rw-r--r--include/hw/i386/x86.h2
-rw-r--r--include/hw/input/i8042.h15
-rw-r--r--include/hw/misc/pvpanic.h8
-rw-r--r--include/hw/pci-bridge/xio3130_downstream.h15
-rw-r--r--include/hw/pci/pci.h12
-rw-r--r--include/hw/pci/pci_regs.h1
-rw-r--r--include/hw/pci/pcie.h7
-rw-r--r--include/hw/pci/pcie_sriov.h77
-rw-r--r--include/hw/virtio/vhost-user-i2c.h3
-rw-r--r--include/hw/virtio/vhost-user.h3
-rw-r--r--include/hw/virtio/virtio-iommu.h1
-rw-r--r--include/qemu/event_notifier.h1
-rw-r--r--include/qemu/typedefs.h2
-rw-r--r--include/standard-headers/linux/pvpanic.h9
-rw-r--r--meson.build2
-rw-r--r--qemu-options.hx3
-rw-r--r--qom/object.c6
-rwxr-xr-xscripts/update-linux-headers.sh3
-rw-r--r--tests/data/acpi/q35/FACPbin244 -> 244 bytes
-rw-r--r--tests/data/acpi/q35/FACP.nosmmbin244 -> 244 bytes
-rw-r--r--tests/data/acpi/q35/FACP.slicbin244 -> 244 bytes
-rw-r--r--tests/data/acpi/q35/FACP.xapicbin244 -> 244 bytes
-rw-r--r--tests/qtest/virtio-iommu-test.c2
-rw-r--r--util/event_notifier-posix.c5
66 files changed, 1229 insertions, 174 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 56460c886d..38d1ac8803 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1819,7 +1819,6 @@ F: docs/specs/acpi_hw_reduced_hotplug.rst
ACPI/VIOT
M: Jean-Philippe Brucker <jean-philippe@linaro.org>
-R: Ani Sinha <ani@anisinha.ca>
S: Supported
F: hw/acpi/viot.c
F: hw/acpi/viot.h
diff --git a/configure b/configure
index 81618708e4..886000346a 100755
--- a/configure
+++ b/configure
@@ -1659,8 +1659,8 @@ fi
# vhost interdependencies and host support
# vhost backends
-if test "$vhost_user" = "yes" && test "$linux" != "yes"; then
- error_exit "vhost-user is only available on Linux"
+if test "$vhost_user" = "yes" && test "$mingw32" = "yes"; then
+ error_exit "vhost-user is not available on Windows"
fi
test "$vhost_vdpa" = "" && vhost_vdpa=$linux
if test "$vhost_vdpa" = "yes" && test "$linux" != "yes"; then
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 85773db631..cf02ef6821 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -324,6 +324,14 @@ machine is hardly emulated at all (e.g. neither the LCD nor the USB part had
been implemented), so there is not much value added by this board. Use the
``ref405ep`` machine instead.
+``pc-i440fx-1.4`` up to ``pc-i440fx-1.7`` (since 7.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+These old machine types are quite neglected nowadays and thus might have
+various pitfalls with regards to live migration. Use a newer machine type
+instead.
+
+
Backend options
---------------
diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index edc3ad84a3..4dbc84fd00 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -38,6 +38,26 @@ conventions <backend_conventions>`.
*Master* and *slave* can be either a client (i.e. connecting) or
server (listening) in the socket communication.
+Support for platforms other than Linux
+--------------------------------------
+
+While vhost-user was initially developed targeting Linux, nowadays it
+is supported on any platform that provides the following features:
+
+- A way for requesting shared memory represented by a file descriptor
+ so it can be passed over a UNIX domain socket and then mapped by the
+ other process.
+
+- AF_UNIX sockets with SCM_RIGHTS, so QEMU and the other process can
+ exchange messages through it, including ancillary data when needed.
+
+- Either eventfd or pipe/pipe2. On platforms where eventfd is not
+ available, QEMU will automatically fall back to pipe2 or, as a last
+ resort, pipe. Each file descriptor will be used for receiving or
+ sending events by reading or writing (respectively) an 8-byte value
+ to the corresponding it. The 8-value itself has no meaning and
+ should not be interpreted.
+
Message Specification
=====================
diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
new file mode 100644
index 0000000000..f5e891e1d4
--- /dev/null
+++ b/docs/pcie_sriov.txt
@@ -0,0 +1,115 @@
+PCI SR/IOV EMULATION SUPPORT
+============================
+
+Description
+===========
+SR/IOV (Single Root I/O Virtualization) is an optional extended capability
+of a PCI Express device. It allows a single physical function (PF) to appear as multiple
+virtual functions (VFs) for the main purpose of eliminating software
+overhead in I/O from virtual machines.
+
+Qemu now implements the basic common functionality to enable an emulated device
+to support SR/IOV. Yet no fully implemented devices exists in Qemu, but a
+proof-of-concept hack of the Intel igb can be found here:
+
+git://github.com/knuto/qemu.git sriov_patches_v5
+
+Implementation
+==============
+Implementing emulation of an SR/IOV capable device typically consists of
+implementing support for two types of device classes; the "normal" physical device
+(PF) and the virtual device (VF). From Qemu's perspective, the VFs are just
+like other devices, except that some of their properties are derived from
+the PF.
+
+A virtual function is different from a physical function in that the BAR
+space for all VFs are defined by the BAR registers in the PFs SR/IOV
+capability. All VFs have the same BARs and BAR sizes.
+
+Accesses to these virtual BARs then is computed as
+
+ <VF BAR start> + <VF number> * <BAR sz> + <offset>
+
+From our emulation perspective this means that there is a separate call for
+setting up a BAR for a VF.
+
+1) To enable SR/IOV support in the PF, it must be a PCI Express device so
+ you would need to add a PCI Express capability in the normal PCI
+ capability list. You might also want to add an ARI (Alternative
+ Routing-ID Interpretation) capability to indicate that your device
+ supports functions beyond it's "own" function space (0-7),
+ which is necessary to support more than 7 functions, or
+ if functions extends beyond offset 7 because they are placed at an
+ offset > 1 or have stride > 1.
+
+ ...
+ #include "hw/pci/pcie.h"
+ #include "hw/pci/pcie_sriov.h"
+
+ pci_your_pf_dev_realize( ... )
+ {
+ ...
+ int ret = pcie_endpoint_cap_init(d, 0x70);
+ ...
+ pcie_ari_init(d, 0x100, 1);
+ ...
+
+ /* Add and initialize the SR/IOV capability */
+ pcie_sriov_pf_init(d, 0x200, "your_virtual_dev",
+ vf_devid, initial_vfs, total_vfs,
+ fun_offset, stride);
+
+ /* Set up individual VF BARs (parameters as for normal BARs) */
+ pcie_sriov_pf_init_vf_bar( ... )
+ ...
+ }
+
+ For cleanup, you simply call:
+
+ pcie_sriov_pf_exit(device);
+
+ which will delete all the virtual functions and associated resources.
+
+2) Similarly in the implementation of the virtual function, you need to
+ make it a PCI Express device and add a similar set of capabilities
+ except for the SR/IOV capability. Then you need to set up the VF BARs as
+ subregions of the PFs SR/IOV VF BARs by calling
+ pcie_sriov_vf_register_bar() instead of the normal pci_register_bar() call:
+
+ pci_your_vf_dev_realize( ... )
+ {
+ ...
+ int ret = pcie_endpoint_cap_init(d, 0x60);
+ ...
+ pcie_ari_init(d, 0x100, 1);
+ ...
+ memory_region_init(mr, ... )
+ pcie_sriov_vf_register_bar(d, bar_nr, mr);
+ ...
+ }
+
+Testing on Linux guest
+======================
+The easiest is if your device driver supports sysfs based SR/IOV
+enabling. Support for this was added in kernel v.3.8, so not all drivers
+support it yet.
+
+To enable 4 VFs for a device at 01:00.0:
+
+ modprobe yourdriver
+ echo 4 > /sys/bus/pci/devices/0000:01:00.0/sriov_numvfs
+
+You should now see 4 VFs with lspci.
+To turn SR/IOV off again - the standard requires you to turn it off before you can enable
+another VF count, and the emulation enforces this:
+
+ echo 0 > /sys/bus/pci/devices/0000:01:00.0/sriov_numvfs
+
+Older drivers typically provide a max_vfs module parameter
+to enable it at load time:
+
+ modprobe yourdriver max_vfs=4
+
+To disable the VFs again then, you simply have to unload the driver:
+
+ rmmod yourdriver
diff --git a/docs/specs/acpi_erst.rst b/docs/specs/acpi_erst.rst
new file mode 100644
index 0000000000..a8a9d22d25
--- /dev/null
+++ b/docs/specs/acpi_erst.rst
@@ -0,0 +1,200 @@
+ACPI ERST DEVICE
+================
+
+The ACPI ERST device is utilized to support the ACPI Error Record
+Serialization Table, ERST, functionality. This feature is designed for
+storing error records in persistent storage for future reference
+and/or debugging.
+
+The ACPI specification[1], in Chapter "ACPI Platform Error Interfaces
+(APEI)", and specifically subsection "Error Serialization", outlines a
+method for storing error records into persistent storage.
+
+The format of error records is described in the UEFI specification[2],
+in Appendix N "Common Platform Error Record".
+
+While the ACPI specification allows for an NVRAM "mode" (see
+GET_ERROR_LOG_ADDRESS_RANGE_ATTRIBUTES) where non-volatile RAM is
+directly exposed for direct access by the OS/guest, this device
+implements the non-NVRAM "mode". This non-NVRAM "mode" is what is
+implemented by most BIOS (since flash memory requires programming
+operations in order to update its contents). Furthermore, as of the
+time of this writing, Linux only supports the non-NVRAM "mode".
+
+
+Background/Motivation
+---------------------
+
+Linux uses the persistent storage filesystem, pstore, to record
+information (eg. dmesg tail) upon panics and shutdowns. Pstore is
+independent of, and runs before, kdump. In certain scenarios (ie.
+hosts/guests with root filesystems on NFS/iSCSI where networking
+software and/or hardware fails, and thus kdump fails), pstore may
+contain information available for post-mortem debugging.
+
+Two common storage backends for the pstore filesystem are ACPI ERST
+and UEFI. Most BIOS implement ACPI ERST. UEFI is not utilized in all
+guests. With QEMU supporting ACPI ERST, it becomes a viable pstore
+storage backend for virtual machines (as it is now for bare metal
+machines).
+
+Enabling support for ACPI ERST facilitates a consistent method to
+capture kernel panic information in a wide range of guests: from
+resource-constrained microvms to very large guests, and in particular,
+in direct-boot environments (which would lack UEFI run-time services).
+
+Note that Microsoft Windows also utilizes the ACPI ERST for certain
+crash information, if available[3].
+
+
+Configuration|Usage
+-------------------
+
+To use ACPI ERST, a memory-backend-file object and acpi-erst device
+can be created, for example:
+
+ qemu ...
+ -object memory-backend-file,id=erstnvram,mem-path=acpi-erst.backing,size=0x10000,share=on \
+ -device acpi-erst,memdev=erstnvram
+
+For proper operation, the ACPI ERST device needs a memory-backend-file
+object with the following parameters:
+
+ - id: The id of the memory-backend-file object is used to associate
+ this memory with the acpi-erst device.
+ - size: The size of the ACPI ERST backing storage. This parameter is
+ required.
+ - mem-path: The location of the ACPI ERST backing storage file. This
+ parameter is also required.
+ - share: The share=on parameter is required so that updates to the
+ ERST backing store are written to the file.
+
+and ERST device:
+
+ - memdev: Is the object id of the memory-backend-file.
+ - record_size: Specifies the size of the records (or slots) in the
+ backend storage. Must be a power of two value greater than or
+ equal to 4096 (PAGE_SIZE).
+
+
+PCI Interface
+-------------
+
+The ERST device is a PCI device with two BARs, one for accessing the
+programming registers, and the other for accessing the record exchange
+buffer.
+
+BAR0 contains the programming interface consisting of ACTION and VALUE
+64-bit registers. All ERST actions/operations/side effects happen on
+the write to the ACTION, by design. Any data needed by the action must
+be placed into VALUE prior to writing ACTION. Reading the VALUE
+simply returns the register contents, which can be updated by a
+previous ACTION.
+
+BAR1 contains the 8KiB record exchange buffer, which is the
+implemented maximum record size.
+
+
+Backend Storage Format
+----------------------
+
+The backend storage is divided into fixed size "slots", 8KiB in
+length, with each slot storing a single record. Not all slots need to
+be occupied, and they need not be occupied in a contiguous fashion.
+The ability to clear/erase specific records allows for the formation
+of unoccupied slots.
+
+Slot 0 contains a backend storage header that identifies the contents
+as ERST and also facilitates efficient access to the records.
+Depending upon the size of the backend storage, additional slots will
+be designated to be a part of the slot 0 header. For example, at 8KiB,
+the slot 0 header can accomodate 1021 records. Thus a storage size
+of 8MiB (8KiB * 1024) requires an additional slot for use by the
+header. In this scenario, slot 0 and slot 1 form the backend storage
+header, and records can be stored starting at slot 2.
+
+Below is an example layout of the backend storage format (for storage
+size less than 8MiB). The size of the storage is a multiple of 8KiB,
+and contains N number of slots to store records. The example below
+shows two records (in CPER format) in the backend storage, while the
+remaining slots are empty/available.
+
+::
+
+ Slot Record
+ <------------------ 8KiB -------------------->
+ +--------------------------------------------+
+ 0 | storage header |
+ +--------------------------------------------+
+ 1 | empty/available |
+ +--------------------------------------------+
+ 2 | CPER |
+ +--------------------------------------------+
+ 3 | CPER |
+ +--------------------------------------------+
+ ... | |
+ +--------------------------------------------+
+ N | empty/available |
+ +--------------------------------------------+
+
+The storage header consists of some basic information and an array
+of CPER record_id's to efficiently access records in the backend
+storage.
+
+All fields in the header are stored in little endian format.
+
+::
+
+ +--------------------------------------------+
+ | magic | 0x0000
+ +--------------------------------------------+
+ | record_offset | record_size | 0x0008
+ +--------------------------------------------+
+ | record_count | reserved | version | 0x0010
+ +--------------------------------------------+
+ | record_id[0] | 0x0018
+ +--------------------------------------------+
+ | record_id[1] | 0x0020
+ +--------------------------------------------+
+ | record_id[...] |
+ +--------------------------------------------+
+ | record_id[N] | 0x1FF8
+ +--------------------------------------------+
+
+The 'magic' field contains the value 0x524F545354535245.
+
+The 'record_size' field contains the value 0x2000, 8KiB.
+
+The 'record_offset' field points to the first record_id in the array,
+0x0018.
+
+The 'version' field contains 0x0100, the first version.
+
+The 'record_count' field contains the number of valid records in the
+backend storage.
+
+The 'record_id' array fields are the 64-bit record identifiers of the
+CPER record in the corresponding slot. Stated differently, the
+location of a CPER record_id in the record_id[] array provides the
+slot index for the corresponding record in the backend storage.
+
+Note that, for example, with a backend storage less than 8MiB, slot 0
+contains the header, so the record_id[0] will never contain a valid
+CPER record_id. Instead slot 1 is the first available slot and thus
+record_id_[1] may contain a CPER.
+
+A 'record_id' of all 0s or all 1s indicates an invalid record (ie. the
+slot is available).
+
+
+References
+----------
+
+[1] "Advanced Configuration and Power Interface Specification",
+ version 4.0, June 2009.
+
+[2] "Unified Extensible Firmware Interface Specification",
+ version 2.1, October 2008.
+
+[3] "Windows Hardware Error Architecture", specfically
+ "Error Record Persistence Mechanism".
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index 2a35700fb3..e10684bf53 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -18,4 +18,5 @@ guest hardware that is specific to QEMU.
acpi_mem_hotplug
acpi_pci_hotplug
acpi_nvdimm
+ acpi_erst
sev-guest-firmware
diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index 5e407a6f32..dd6859d039 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -65,6 +65,7 @@ PCI devices (other than virtio):
1b36:000f mdpy (mdev sample device), linux/samples/vfio-mdev/mdpy.c
1b36:0010 PCIe NVMe device (-device nvme)
1b36:0011 PCI PVPanic device (-device pvpanic-pci)
+1b36:0012 PCI ACPI ERST device (-device acpi-erst)
All these devices are documented in docs/specs.
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 8966e16320..1773cf55f1 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2152,7 +2152,13 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
build_append_int_noprefix(tbl, 0, 1); /* DAY_ALRM */
build_append_int_noprefix(tbl, 0, 1); /* MON_ALRM */
build_append_int_noprefix(tbl, f->rtc_century, 1); /* CENTURY */
- build_append_int_noprefix(tbl, 0, 2); /* IAPC_BOOT_ARCH */
+ /* IAPC_BOOT_ARCH */
+ if (f->rev == 1) {
+ build_append_int_noprefix(tbl, 0, 2);
+ } else {
+ /* since ACPI v2.0 */
+ build_append_int_noprefix(tbl, f->iapc_boot_arch, 2);
+ }
build_append_int_noprefix(tbl, 0, 1); /* Reserved */
build_append_int_noprefix(tbl, f->flags, 4); /* Flags */
diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c
index c0a23cf467..de509c2b48 100644
--- a/hw/acpi/erst.c
+++ b/hw/acpi/erst.c
@@ -80,11 +80,6 @@
#define UEFI_CPER_RECORD_MIN_SIZE 128U
#define UEFI_CPER_RECORD_LENGTH_OFFSET 20U
#define UEFI_CPER_RECORD_ID_OFFSET 96U
-#define IS_UEFI_CPER_RECORD(ptr) \
- (((ptr)[0] == 'C') && \
- ((ptr)[1] == 'P') && \
- ((ptr)[2] == 'E') && \
- ((ptr)[3] == 'R'))
/*
* NOTE that when accessing CPER fields within a record, memcpy()
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index 6befd23e16..6351bd3424 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -32,6 +32,7 @@
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_host.h"
#include "hw/pci/pcie_port.h"
+#include "hw/pci-bridge/xio3130_downstream.h"
#include "hw/i386/acpi-build.h"
#include "hw/acpi/acpi.h"
#include "hw/pci/pci_bus.h"
@@ -336,6 +337,8 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
{
PCIDevice *pdev = PCI_DEVICE(dev);
int slot = PCI_SLOT(pdev->devfn);
+ PCIDevice *bridge;
+ PCIBus *bus;
int bsel;
/* Don't send event when device is enabled during qemu machine creation:
@@ -365,7 +368,14 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
return;
}
- bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev));
+ bus = pci_get_bus(pdev);
+ bridge = pci_bridge_get_device(bus);
+ if (object_dynamic_cast(OBJECT(bridge), TYPE_PCIE_ROOT_PORT) ||
+ object_dynamic_cast(OBJECT(bridge), TYPE_XIO3130_DOWNSTREAM)) {
+ pcie_cap_slot_enable_power(bridge);
+ }
+
+ bsel = acpi_pcihp_get_bsel(bus);
g_assert(bsel >= 0);
s->acpi_pcihp_pci_status[bsel].up |= (1U << slot);
acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ebd47aa26f..4ad4d7286c 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -38,6 +38,7 @@
#include "hw/nvram/fw_cfg.h"
#include "hw/acpi/bios-linker-loader.h"
#include "hw/isa/isa.h"
+#include "hw/input/i8042.h"
#include "hw/block/fdc.h"
#include "hw/acpi/memory_hotplug.h"
#include "sysemu/tpm.h"
@@ -192,6 +193,13 @@ static void init_common_fadt_data(MachineState *ms, Object *o,
.address = object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK, NULL)
},
};
+
+ /*
+ * ACPI v2, Table 5-10 - Fixed ACPI Description Table Boot Architecture
+ * Flags, bit offset 1 - 8042.
+ */
+ fadt.iapc_boot_arch = iapc_boot_arch_8042();
+
*data = fadt;
}
diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c
index 68ca7e7fc2..fb09185cbd 100644
--- a/hw/i386/acpi-microvm.c
+++ b/hw/i386/acpi-microvm.c
@@ -37,6 +37,7 @@
#include "hw/pci/pcie_host.h"
#include "hw/usb/xhci.h"
#include "hw/virtio/virtio-mmio.h"
+#include "hw/input/i8042.h"
#include "acpi-common.h"
#include "acpi-microvm.h"
@@ -187,6 +188,11 @@ static void acpi_build_microvm(AcpiBuildTables *tables,
.address = GED_MMIO_BASE_REGS + ACPI_GED_REG_RESET,
},
.reset_val = ACPI_GED_RESET_VALUE,
+ /*
+ * ACPI v2, Table 5-10 - Fixed ACPI Description Table Boot Architecture
+ * Flags, bit offset 1 - 8042.
+ */
+ .iapc_boot_arch = iapc_boot_arch_8042(),
};
table_offsets = g_array_new(false, true /* clear */,
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4c6c016388..32471a44cb 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3030,6 +3030,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
+ /* TODO: add support for VFIO and vhost users */
+ if (s->snoop_control) {
+ error_setg_errno(errp, -ENOTSUP,
+ "Snoop Control with vhost or VFIO is not supported");
+ return -ENOTSUP;
+ }
+
/* Update per-address-space notifier flags */
vtd_as->notifier_flags = new;
@@ -3113,6 +3120,7 @@ static Property vtd_properties[] = {
VTD_HOST_ADDRESS_WIDTH),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
+ DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
DEFINE_PROP_END_OF_LIST(),
};
@@ -3643,7 +3651,7 @@ static void vtd_init(IntelIOMMUState *s)
vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
x86_iommu->dt_supported);
- if (s->scalable_mode) {
+ if (s->scalable_mode || s->snoop_control) {
vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
@@ -3674,6 +3682,10 @@ static void vtd_init(IntelIOMMUState *s)
s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
}
+ if (s->snoop_control) {
+ s->ecap |= VTD_ECAP_SC;
+ }
+
vtd_reset_caches(s);
/* Define registers with default values and bit semantics */
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index a6c788049b..1ff13b40f9 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -188,6 +188,7 @@
#define VTD_ECAP_IR (1ULL << 3)
#define VTD_ECAP_EIM (1ULL << 4)
#define VTD_ECAP_PT (1ULL << 6)
+#define VTD_ECAP_SC (1ULL << 7)
#define VTD_ECAP_MHMV (15ULL << 20)
#define VTD_ECAP_SRS (1ULL << 31)
#define VTD_ECAP_SMTS (1ULL << 43)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index c8696ac01e..fd55fc725c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -318,8 +318,6 @@ GlobalProperty pc_compat_2_0[] = {
{ "pci-serial-4x", "prog_if", "0" },
{ "virtio-net-pci", "guest_announce", "off" },
{ "ICH9-LPC", "memory-hotplug-support", "off" },
- { "xio3130-downstream", COMPAT_PROP_PCP, "off" },
- { "ioh3420", COMPAT_PROP_PCP, "off" },
};
const size_t pc_compat_2_0_len = G_N_ELEMENTS(pc_compat_2_0);
@@ -1014,7 +1012,8 @@ static const MemoryRegionOps ioportF0_io_ops = {
},
};
-static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl, bool no_vmport)
+static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl,
+ bool create_i8042, bool no_vmport)
{
int i;
DriveInfo *fd[MAX_FD];
@@ -1036,6 +1035,10 @@ static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl, bool no_vmport)
}
}
+ if (!create_i8042) {
+ return;
+ }
+
i8042 = isa_create_simple(isa_bus, "i8042");
if (!no_vmport) {
isa_create_simple(isa_bus, TYPE_VMPORT);
@@ -1131,7 +1134,8 @@ void pc_basic_device_init(struct PCMachineState *pcms,
i8257_dma_init(isa_bus, 0);
/* Super I/O */
- pc_superio_init(isa_bus, create_fdctrl, pcms->vmport != ON_OFF_AUTO_ON);
+ pc_superio_init(isa_bus, create_fdctrl, pcms->i8042_enabled,
+ pcms->vmport != ON_OFF_AUTO_ON);
}
void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
@@ -1512,6 +1516,20 @@ static void pc_machine_set_hpet(Object *obj, bool value, Error **errp)
pcms->hpet_enabled = value;
}
+static bool pc_machine_get_i8042(Object *obj, Error **errp)
+{
+ PCMachineState *pcms = PC_MACHINE(obj);
+
+ return pcms->i8042_enabled;
+}
+
+static void pc_machine_set_i8042(Object *obj, bool value, Error **errp)
+{
+ PCMachineState *pcms = PC_MACHINE(obj);
+
+ pcms->i8042_enabled = value;
+}
+
static bool pc_machine_get_default_bus_bypass_iommu(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
@@ -1641,6 +1659,7 @@ static void pc_machine_initfn(Object *obj)
pcms->smbus_enabled = true;
pcms->sata_enabled = true;
pcms->pit_enabled = true;
+ pcms->i8042_enabled = true;
pcms->max_fw_size = 8 * MiB;
#ifdef CONFIG_HPET
pcms->hpet_enabled = true;
@@ -1777,6 +1796,9 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
object_class_property_set_description(oc, "hpet",
"Enable/disable high precision event timer emulation");
+ object_class_property_add_bool(oc, PC_MACHINE_I8042,
+ pc_machine_get_i8042, pc_machine_set_i8042);
+
object_class_property_add_bool(oc, "default-bus-bypass-iommu",
pc_machine_get_default_bus_bypass_iommu,
pc_machine_set_default_bus_bypass_iommu);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 8d33cf689d..b72c03d0a6 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -757,6 +757,7 @@ static void pc_i440fx_1_7_machine_options(MachineClass *m)
m->hw_version = "1.7.0";
m->default_machine_opts = NULL;
m->option_rom_has_mr = true;
+ m->deprecation_reason = "old and unattended - use a newer version instead";
compat_props_add(m->compat_props, pc_compat_1_7, pc_compat_1_7_len);
pcmc->smbios_defaults = false;
pcmc->gigabyte_align = false;
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index b84840a1bb..4cf107baea 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -83,24 +83,11 @@ inline void init_topo_info(X86CPUTopoInfo *topo_info,
uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms,
unsigned int cpu_index)
{
- X86MachineClass *x86mc = X86_MACHINE_GET_CLASS(x86ms);
X86CPUTopoInfo topo_info;
- uint32_t correct_id;
- static bool warned;
init_topo_info(&topo_info, x86ms);
- correct_id = x86_apicid_from_cpu_idx(&topo_info, cpu_index);
- if (x86mc->compat_apic_id_mode) {
- if (cpu_index != correct_id && !warned && !qtest_enabled()) {
- error_report("APIC IDs set in compatibility mode, "
- "CPU topology won't match the configuration");
- warned = true;
- }
- return cpu_index;
- } else {
- return correct_id;
- }
+ return x86_apicid_from_cpu_idx(&topo_info, cpu_index);
}
@@ -1330,7 +1317,6 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
- x86mc->compat_apic_id_mode = false;
x86mc->save_tsc_khz = true;
x86mc->fwcfg_dma_enabled = true;
nc->nmi_monitor_handler = x86_nmi;
diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c
index a39fcdd1fc..b84d4d458d 100644
--- a/hw/misc/pvpanic-isa.c
+++ b/hw/misc/pvpanic-isa.c
@@ -21,6 +21,7 @@
#include "hw/misc/pvpanic.h"
#include "qom/object.h"
#include "hw/isa/isa.h"
+#include "standard-headers/linux/pvpanic.h"
OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE)
@@ -64,7 +65,8 @@ static void pvpanic_isa_realizefn(DeviceState *dev, Error **errp)
static Property pvpanic_isa_properties[] = {
DEFINE_PROP_UINT16(PVPANIC_IOPORT_PROP, PVPanicISAState, ioport, 0x505),
- DEFINE_PROP_UINT8("events", PVPanicISAState, pvpanic.events, PVPANIC_PANICKED | PVPANIC_CRASHLOADED),
+ DEFINE_PROP_UINT8("events", PVPanicISAState, pvpanic.events,
+ PVPANIC_PANICKED | PVPANIC_CRASH_LOADED),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c
index 62e1be68c1..99cf7e2041 100644
--- a/hw/misc/pvpanic-pci.c
+++ b/hw/misc/pvpanic-pci.c
@@ -21,6 +21,7 @@
#include "hw/misc/pvpanic.h"
#include "qom/object.h"
#include "hw/pci/pci.h"
+#include "standard-headers/linux/pvpanic.h"
OBJECT_DECLARE_SIMPLE_TYPE(PVPanicPCIState, PVPANIC_PCI_DEVICE)
@@ -53,7 +54,8 @@ static void pvpanic_pci_realizefn(PCIDevice *dev, Error **errp)
}
static Property pvpanic_pci_properties[] = {
- DEFINE_PROP_UINT8("events", PVPanicPCIState, pvpanic.events, PVPANIC_PANICKED | PVPANIC_CRASHLOADED),
+ DEFINE_PROP_UINT8("events", PVPanicPCIState, pvpanic.events,
+ PVPANIC_PANICKED | PVPANIC_CRASH_LOADED),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
index e2cb4a5d28..1540e9091a 100644
--- a/hw/misc/pvpanic.c
+++ b/hw/misc/pvpanic.c
@@ -21,12 +21,13 @@
#include "hw/qdev-properties.h"
#include "hw/misc/pvpanic.h"
#include "qom/object.h"
+#include "standard-headers/linux/pvpanic.h"
static void handle_event(int event)
{
static bool logged;
- if (event & ~(PVPANIC_PANICKED | PVPANIC_CRASHLOADED) && !logged) {
+ if (event & ~(PVPANIC_PANICKED | PVPANIC_CRASH_LOADED) && !logged) {
qemu_log_mask(LOG_GUEST_ERROR, "pvpanic: unknown event %#x.\n", event);
logged = true;
}
@@ -36,7 +37,7 @@ static void handle_event(int event)
return;
}
- if (event & PVPANIC_CRASHLOADED) {
+ if (event & PVPANIC_CRASH_LOADED) {
qemu_system_guest_crashloaded(NULL);
return;
}
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index cf8ab0f8af..b02a0632df 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -628,17 +628,20 @@ static int virtio_net_max_tx_queue_size(VirtIONet *n)
NetClientState *peer = n->nic_conf.peers.ncs[0];
/*
- * Backends other than vhost-user don't support max queue size.
+ * Backends other than vhost-user or vhost-vdpa don't support max queue
+ * size.
*/
if (!peer) {
return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
}
- if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
+ switch(peer->info->type) {
+ case NET_CLIENT_DRIVER_VHOST_USER:
+ case NET_CLIENT_DRIVER_VHOST_VDPA:
+ return VIRTQUEUE_MAX_SIZE;
+ default:
return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
- }
-
- return VIRTQUEUE_MAX_SIZE;
+ };
}
static int peer_attach(VirtIONet *n, int index)
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index 10e6e7c2ab..de932286b5 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -193,6 +193,12 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
PCIDevice *pxb = pci_get_bus(pci_dev)->parent_dev;
/*
+ * First carry out normal swizzle to handle
+ * multple root ports on a pxb instance.
+ */
+ pin = pci_swizzle_map_irq_fn(pci_dev, pin);
+
+ /*
* The bios does not index the pxb slot number when
* it computes the IRQ because it resides on bus 0
* and not on the current bus.
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index 04aae72cd6..05e2b06c0c 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -28,6 +28,7 @@
#include "migration/vmstate.h"
#include "qapi/error.h"
#include "qemu/module.h"
+#include "hw/pci-bridge/xio3130_downstream.h"
#define PCI_DEVICE_ID_TI_XIO3130D 0x8233 /* downstream port */
#define XIO3130_REVISION 0x1
@@ -84,7 +85,7 @@ static void xio3130_downstream_realize(PCIDevice *d, Error **errp)
XIO3130_SSVID_SVID, XIO3130_SSVID_SSID,
errp);
if (rc < 0) {
- goto err_bridge;
+ goto err_msi;
}
rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_DOWNSTREAM,
@@ -173,7 +174,7 @@ static void xio3130_downstream_class_init(ObjectClass *klass, void *data)
}
static const TypeInfo xio3130_downstream_info = {
- .name = "xio3130-downstream",
+ .name = TYPE_XIO3130_DOWNSTREAM,
.parent = TYPE_PCIE_SLOT,
.class_init = xio3130_downstream_class_init,
.interfaces = (InterfaceInfo[]) {
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index 5cd3af4fbc..5ff46ef050 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -75,7 +75,7 @@ static void xio3130_upstream_realize(PCIDevice *d, Error **errp)
XIO3130_SSVID_SVID, XIO3130_SSVID_SSID,
errp);
if (rc < 0) {
- goto err_bridge;
+ goto err_msi;
}
rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_UPSTREAM,
diff --git a/hw/pci/meson.build b/hw/pci/meson.build
index 5c4bbac817..bcc9c75919 100644
--- a/hw/pci/meson.build
+++ b/hw/pci/meson.build
@@ -5,6 +5,7 @@ pci_ss.add(files(
'pci.c',
'pci_bridge.c',
'pci_host.c',
+ 'pcie_sriov.c',
'shpc.c',
'slotid_cap.c'
))
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 5d30f9ca60..5cb1232e27 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -239,6 +239,9 @@ int pci_bar(PCIDevice *d, int reg)
{
uint8_t type;
+ /* PCIe virtual functions do not have their own BARs */
+ assert(!pci_is_vf(d));
+
if (reg != PCI_ROM_SLOT)
return PCI_BASE_ADDRESS_0 + reg * 4;
@@ -304,10 +307,30 @@ void pci_device_deassert_intx(PCIDevice *dev)
}
}
-static void pci_do_device_reset(PCIDevice *dev)
+static void pci_reset_regions(PCIDevice *dev)
{
int r;
+ if (pci_is_vf(dev)) {
+ return;
+ }
+
+ for (r = 0; r < PCI_NUM_REGIONS; ++r) {
+ PCIIORegion *region = &dev->io_regions[r];
+ if (!region->size) {
+ continue;
+ }
+ if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(dev->config + pci_bar(dev, r), region->type);
+ } else {
+ pci_set_long(dev->config + pci_bar(dev, r), region->type);
+ }
+ }
+}
+
+static void pci_do_device_reset(PCIDevice *dev)
+{
pci_device_deassert_intx(dev);
assert(dev->irq_state == 0);
@@ -323,19 +346,7 @@ static void pci_do_device_reset(PCIDevice *dev)
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
- for (r = 0; r < PCI_NUM_REGIONS; ++r) {
- PCIIORegion *region = &dev->io_regions[r];
- if (!region->size) {
- continue;
- }
-
- if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) &&
- region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- pci_set_quad(dev->config + pci_bar(dev, r), region->type);
- } else {
- pci_set_long(dev->config + pci_bar(dev, r), region->type);
- }
- }
+ pci_reset_regions(dev);
pci_update_mappings(dev);
msi_reset(dev);
@@ -885,6 +896,16 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
}
/*
+ * With SR/IOV and ARI, a device at function 0 need not be a multifunction
+ * device, as it may just be a VF that ended up with function 0 in
+ * the legacy PCI interpretation. Avoid failing in such cases:
+ */
+ if (pci_is_vf(dev) &&
+ dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ return;
+ }
+
+ /*
* multifunction bit is interpreted in two ways as follows.
* - all functions must set the bit to 1.
* Example: Intel X53
@@ -1078,11 +1099,12 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
return NULL;
} else if (!pci_bus_devfn_available(bus, devfn)) {
error_setg(errp, "PCI: slot %d function %d not available for %s,"
- " in use by %s",
+ " in use by %s,id=%s",
PCI_SLOT(devfn), PCI_FUNC(devfn), name,
- bus->devices[devfn]->name);
+ bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
return NULL;
} else if (dev->hotplugged &&
+ !pci_is_vf(pci_dev) &&
pci_get_function_0(pci_dev)) {
error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
" new func %s cannot be exposed to guest.",
@@ -1191,6 +1213,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;
+ assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
@@ -1294,11 +1317,45 @@ pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int region_num)
return pci_dev->io_regions[region_num].addr;
}
-static pcibus_t pci_bar_address(PCIDevice *d,
- int reg, uint8_t type, pcibus_t size)
+static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
+ uint8_t type, pcibus_t size)
+{
+ pcibus_t new_addr;
+ if (!pci_is_vf(d)) {
+ int bar = pci_bar(d, reg);
+ if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ new_addr = pci_get_quad(d->config + bar);
+ } else {
+ new_addr = pci_get_long(d->config + bar);
+ }
+ } else {
+ PCIDevice *pf = d->exp.sriov_vf.pf;
+ uint16_t sriov_cap = pf->exp.sriov_cap;
+ int bar = sriov_cap + PCI_SRIOV_BAR + reg * 4;
+ uint16_t vf_offset =
+ pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
+ uint16_t vf_stride =
+ pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
+ uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
+
+ if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ new_addr = pci_get_quad(pf->config + bar);
+ } else {
+ new_addr = pci_get_long(pf->config + bar);
+ }
+ new_addr += vf_num * size;
+ }
+ /* The ROM slot has a specific enable bit, keep it intact */
+ if (reg != PCI_ROM_SLOT) {
+ new_addr &= ~(size - 1);
+ }
+ return new_addr;
+}
+
+pcibus_t pci_bar_address(PCIDevice *d,
+ int reg, uint8_t type, pcibus_t size)
{
pcibus_t new_addr, last_addr;
- int bar = pci_bar(d, reg);
uint16_t cmd = pci_get_word(d->config + PCI_COMMAND);
Object *machine = qdev_get_machine();
ObjectClass *oc = object_get_class(machine);
@@ -1309,7 +1366,7 @@ static pcibus_t pci_bar_address(PCIDevice *d,
if (!(cmd & PCI_COMMAND_IO)) {
return PCI_BAR_UNMAPPED;
}
- new_addr = pci_get_long(d->config + bar) & ~(size - 1);
+ new_addr = pci_config_get_bar_addr(d, reg, type, size);
last_addr = new_addr + size - 1;
/* Check if 32 bit BAR wraps around explicitly.
* TODO: make priorities correct and remove this work around.
@@ -1324,11 +1381,7 @@ static pcibus_t pci_bar_address(PCIDevice *d,
if (!(cmd & PCI_COMMAND_MEMORY)) {
return PCI_BAR_UNMAPPED;
}
- if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- new_addr = pci_get_quad(d->config + bar);
- } else {
- new_addr = pci_get_long(d->config + bar);
- }
+ new_addr = pci_config_get_bar_addr(d, reg, type, size);
/* the ROM slot has a specific enable bit */
if (reg == PCI_ROM_SLOT && !(new_addr & PCI_ROM_ADDRESS_ENABLE)) {
return PCI_BAR_UNMAPPED;
@@ -1473,6 +1526,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
msi_write_config(d, addr, val_in, l);
msix_write_config(d, addr, val_in, l);
+ pcie_sriov_config_write(d, addr, val_in, l);
}
/***********************************************************/
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index d7d73a31e4..67a5d67372 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -366,6 +366,17 @@ static void hotplug_event_clear(PCIDevice *dev)
}
}
+void pcie_cap_slot_enable_power(PCIDevice *dev)
+{
+ uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+ uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP);
+
+ if (sltcap & PCI_EXP_SLTCAP_PCP) {
+ pci_set_word_by_mask(exp_cap + PCI_EXP_SLTCTL,
+ PCI_EXP_SLTCTL_PCC, PCI_EXP_SLTCTL_PWR_ON);
+ }
+}
+
static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque)
{
bool *power = opaque;
@@ -446,6 +457,11 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
PCIDevice *pci_dev = PCI_DEVICE(dev);
uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
+ if (pci_is_vf(pci_dev)) {
+ /* Virtual function cannot be physically disconnected */
+ return;
+ }
+
/* Don't send event when device is enabled during qemu machine creation:
* it is present on boot, no hotplug event is necessary. We do send an
* event when the device is disabled later. */
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
new file mode 100644
index 0000000000..87abad6ac8
--- /dev/null
+++ b/hw/pci/pcie_sriov.c
@@ -0,0 +1,302 @@
+/*
+ * pcie_sriov.c:
+ *
+ * Implementation of SR/IOV emulation support.
+ *
+ * Copyright (c) 2015-2017 Knut Omang <knut.omang@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pcie.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/qdev-properties.h"
+#include "qemu/error-report.h"
+#include "qemu/range.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+static PCIDevice *register_vf(PCIDevice *pf, int devfn,
+ const char *name, uint16_t vf_num);
+static void unregister_vfs(PCIDevice *dev);
+
+void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+ const char *vfname, uint16_t vf_dev_id,
+ uint16_t init_vfs, uint16_t total_vfs,
+ uint16_t vf_offset, uint16_t vf_stride)
+{
+ uint8_t *cfg = dev->config + offset;
+ uint8_t *wmask;
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
+ offset, PCI_EXT_CAP_SRIOV_SIZEOF);
+ dev->exp.sriov_cap = offset;
+ dev->exp.sriov_pf.num_vfs = 0;
+ dev->exp.sriov_pf.vfname = g_strdup(vfname);
+ dev->exp.sriov_pf.vf = NULL;
+
+ pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
+ pci_set_word(cfg + PCI_SRIOV_VF_STRIDE, vf_stride);
+
+ /*
+ * Mandatory page sizes to support.
+ * Device implementations can call pcie_sriov_pf_add_sup_pgsize()
+ * to set more bits:
+ */
+ pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, SRIOV_SUP_PGSIZE_MINREQ);
+
+ /*
+ * Default is to use 4K pages, software can modify it
+ * to any of the supported bits
+ */
+ pci_set_word(cfg + PCI_SRIOV_SYS_PGSIZE, 0x1);
+
+ /* Set up device ID and initial/total number of VFs available */
+ pci_set_word(cfg + PCI_SRIOV_VF_DID, vf_dev_id);
+ pci_set_word(cfg + PCI_SRIOV_INITIAL_VF, init_vfs);
+ pci_set_word(cfg + PCI_SRIOV_TOTAL_VF, total_vfs);
+ pci_set_word(cfg + PCI_SRIOV_NUM_VF, 0);
+
+ /* Write enable control bits */
+ wmask = dev->wmask + offset;
+ pci_set_word(wmask + PCI_SRIOV_CTRL,
+ PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI);
+ pci_set_word(wmask + PCI_SRIOV_NUM_VF, 0xffff);
+ pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
+
+ qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+}
+
+void pcie_sriov_pf_exit(PCIDevice *dev)
+{
+ unregister_vfs(dev);
+ g_free((char *)dev->exp.sriov_pf.vfname);
+ dev->exp.sriov_pf.vfname = NULL;
+}
+
+void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
+ uint8_t type, dma_addr_t size)
+{
+ uint32_t addr;
+ uint64_t wmask;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+
+ assert(sriov_cap > 0);
+ assert(region_num >= 0);
+ assert(region_num < PCI_NUM_REGIONS);
+ assert(region_num != PCI_ROM_SLOT);
+
+ wmask = ~(size - 1);
+ addr = sriov_cap + PCI_SRIOV_BAR + region_num * 4;
+
+ pci_set_long(dev->config + addr, type);
+ if (!(type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(dev->wmask + addr, wmask);
+ pci_set_quad(dev->cmask + addr, ~0ULL);
+ } else {
+ pci_set_long(dev->wmask + addr, wmask & 0xffffffff);
+ pci_set_long(dev->cmask + addr, 0xffffffff);
+ }
+ dev->exp.sriov_pf.vf_bar_type[region_num] = type;
+}
+
+void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
+ MemoryRegion *memory)
+{
+ PCIIORegion *r;
+ PCIBus *bus = pci_get_bus(dev);
+ uint8_t type;
+ pcibus_t size = memory_region_size(memory);
+
+ assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */
+ assert(region_num >= 0);
+ assert(region_num < PCI_NUM_REGIONS);
+ type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num];
+
+ if (!is_power_of_2(size)) {
+ error_report("%s: PCI region size must be a power"
+ " of two - type=0x%x, size=0x%"FMT_PCIBUS,
+ __func__, type, size);
+ exit(1);
+ }
+
+ r = &dev->io_regions[region_num];
+ r->memory = memory;
+ r->address_space =
+ type & PCI_BASE_ADDRESS_SPACE_IO
+ ? bus->address_space_io
+ : bus->address_space_mem;
+ r->size = size;
+ r->type = type;
+
+ r->addr = pci_bar_address(dev, region_num, r->type, r->size);
+ if (r->addr != PCI_BAR_UNMAPPED) {
+ memory_region_add_subregion_overlap(r->address_space,
+ r->addr, r->memory, 1);
+ }
+}
+
+static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
+ uint16_t vf_num)
+{
+ PCIDevice *dev = pci_new(devfn, name);
+ dev->exp.sriov_vf.pf = pf;
+ dev->exp.sriov_vf.vf_number = vf_num;
+ PCIBus *bus = pci_get_bus(pf);
+ Error *local_err = NULL;
+
+ qdev_realize(&dev->qdev, &bus->qbus, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return NULL;
+ }
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(dev->config, 0xffff);
+ pci_config_set_device_id(dev->config, 0xffff);
+
+ return dev;
+}
+
+static void register_vfs(PCIDevice *dev)
+{
+ uint16_t num_vfs;
+ uint16_t i;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+ uint16_t vf_offset =
+ pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
+ uint16_t vf_stride =
+ pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
+ int32_t devfn = dev->devfn + vf_offset;
+
+ assert(sriov_cap > 0);
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+
+ dev->exp.sriov_pf.vf = g_malloc(sizeof(PCIDevice *) * num_vfs);
+ assert(dev->exp.sriov_pf.vf);
+
+ trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), num_vfs);
+ for (i = 0; i < num_vfs; i++) {
+ dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
+ dev->exp.sriov_pf.vfname, i);
+ if (!dev->exp.sriov_pf.vf[i]) {
+ num_vfs = i;
+ break;
+ }
+ devfn += vf_stride;
+ }
+ dev->exp.sriov_pf.num_vfs = num_vfs;
+}
+
+static void unregister_vfs(PCIDevice *dev)
+{
+ Error *local_err = NULL;
+ uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
+ uint16_t i;
+
+ trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), num_vfs);
+ for (i = 0; i < num_vfs; i++) {
+ PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+ object_property_set_bool(OBJECT(vf), "realized", false, &local_err);
+ if (local_err) {
+ fprintf(stderr, "Failed to unplug: %s\n",
+ error_get_pretty(local_err));
+ error_free(local_err);
+ }
+ object_unparent(OBJECT(vf));
+ }
+ g_free(dev->exp.sriov_pf.vf);
+ dev->exp.sriov_pf.vf = NULL;
+ dev->exp.sriov_pf.num_vfs = 0;
+ pci_set_word(dev->config + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0);
+}
+
+void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
+ uint32_t val, int len)
+{
+ uint32_t off;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+
+ if (!sriov_cap || address < sriov_cap) {
+ return;
+ }
+ off = address - sriov_cap;
+ if (off >= PCI_EXT_CAP_SRIOV_SIZEOF) {
+ return;
+ }
+
+ trace_sriov_config_write(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), off, val, len);
+
+ if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
+ if (dev->exp.sriov_pf.num_vfs) {
+ if (!(val & PCI_SRIOV_CTRL_VFE)) {
+ unregister_vfs(dev);
+ }
+ } else {
+ if (val & PCI_SRIOV_CTRL_VFE) {
+ register_vfs(dev);
+ }
+ }
+ }
+}
+
+
+/* Reset SR/IOV VF Enable bit to trigger an unregister of all VFs */
+void pcie_sriov_pf_disable_vfs(PCIDevice *dev)
+{
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+ if (sriov_cap) {
+ uint32_t val = pci_get_byte(dev->config + sriov_cap + PCI_SRIOV_CTRL);
+ if (val & PCI_SRIOV_CTRL_VFE) {
+ val &= ~PCI_SRIOV_CTRL_VFE;
+ pcie_sriov_config_write(dev, sriov_cap + PCI_SRIOV_CTRL, val, 1);
+ }
+ }
+}
+
+/* Add optional supported page sizes to the mask of supported page sizes */
+void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize)
+{
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+ uint8_t *wmask = dev->wmask + dev->exp.sriov_cap;
+
+ uint16_t sup_pgsize = pci_get_word(cfg + PCI_SRIOV_SUP_PGSIZE);
+
+ sup_pgsize |= opt_sup_pgsize;
+
+ /*
+ * Make sure the new bits are set, and that system page size
+ * also can be set to any of the new values according to spec:
+ */
+ pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, sup_pgsize);
+ pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, sup_pgsize);
+}
+
+
+uint16_t pcie_sriov_vf_number(PCIDevice *dev)
+{
+ assert(pci_is_vf(dev));
+ return dev->exp.sriov_vf.vf_number;
+}
+
+PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
+{
+ return dev->exp.sriov_vf.pf;
+}
+
+PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
+{
+ assert(!pci_is_vf(dev));
+ if (n < dev->exp.sriov_pf.num_vfs) {
+ return dev->exp.sriov_pf.vf[n];
+ }
+ return NULL;
+}
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index 7570752c40..aaf46bc92d 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -10,3 +10,8 @@ pci_cfg_write(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsig
# msix.c
msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d masked %d"
+
+# hw/pci/pcie_sriov.c
+sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs"
+sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs"
+sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d"
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 6013df1698..60349ee402 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -104,9 +104,11 @@ static struct {
const char *sock_pfx, *manufacturer, *version, *serial, *asset, *part;
uint64_t max_speed;
uint64_t current_speed;
+ uint64_t processor_id;
} type4 = {
.max_speed = DEFAULT_CPU_SPEED,
- .current_speed = DEFAULT_CPU_SPEED
+ .current_speed = DEFAULT_CPU_SPEED,
+ .processor_id = 0,
};
static struct {
@@ -327,6 +329,10 @@ static const QemuOptDesc qemu_smbios_type4_opts[] = {
.name = "part",
.type = QEMU_OPT_STRING,
.help = "part number",
+ }, {
+ .name = "processor-id",
+ .type = QEMU_OPT_NUMBER,
+ .help = "processor id",
},
{ /* end of list */ }
};
@@ -549,9 +555,23 @@ bool smbios_skip_table(uint8_t type, bool required_table)
return true;
}
+#define T0_BASE 0x000
+#define T1_BASE 0x100
+#define T2_BASE 0x200
+#define T3_BASE 0x300
+#define T4_BASE 0x400
+#define T11_BASE 0xe00
+
+#define T16_BASE 0x1000
+#define T17_BASE 0x1100
+#define T19_BASE 0x1300
+#define T32_BASE 0x2000
+#define T41_BASE 0x2900
+#define T127_BASE 0x7F00
+
static void smbios_build_type_0_table(void)
{
- SMBIOS_BUILD_TABLE_PRE(0, 0x000, false); /* optional, leave up to BIOS */
+ SMBIOS_BUILD_TABLE_PRE(0, T0_BASE, false); /* optional, leave up to BIOS */
SMBIOS_TABLE_SET_STR(0, vendor_str, type0.vendor);
SMBIOS_TABLE_SET_STR(0, bios_version_str, type0.version);
@@ -599,7 +619,7 @@ static void smbios_encode_uuid(struct smbios_uuid *uuid, QemuUUID *in)
static void smbios_build_type_1_table(void)
{
- SMBIOS_BUILD_TABLE_PRE(1, 0x100, true); /* required */
+ SMBIOS_BUILD_TABLE_PRE(1, T1_BASE, true); /* required */
SMBIOS_TABLE_SET_STR(1, manufacturer_str, type1.manufacturer);
SMBIOS_TABLE_SET_STR(1, product_name_str, type1.product);
@@ -619,7 +639,7 @@ static void smbios_build_type_1_table(void)
static void smbios_build_type_2_table(void)
{
- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */
+ SMBIOS_BUILD_TABLE_PRE(2, T2_BASE, false); /* optional */
SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer);
SMBIOS_TABLE_SET_STR(2, product_str, type2.product);
@@ -637,7 +657,7 @@ static void smbios_build_type_2_table(void)
static void smbios_build_type_3_table(void)
{
- SMBIOS_BUILD_TABLE_PRE(3, 0x300, true); /* required */
+ SMBIOS_BUILD_TABLE_PRE(3, T3_BASE, true); /* required */
SMBIOS_TABLE_SET_STR(3, manufacturer_str, type3.manufacturer);
t->type = 0x01; /* Other */
@@ -662,15 +682,20 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance)
{
char sock_str[128];
- SMBIOS_BUILD_TABLE_PRE(4, 0x400 + instance, true); /* required */
+ SMBIOS_BUILD_TABLE_PRE(4, T4_BASE + instance, true); /* required */
snprintf(sock_str, sizeof(sock_str), "%s%2x", type4.sock_pfx, instance);
SMBIOS_TABLE_SET_STR(4, socket_designation_str, sock_str);
t->processor_type = 0x03; /* CPU */
t->processor_family = 0x01; /* Other */
SMBIOS_TABLE_SET_STR(4, processor_manufacturer_str, type4.manufacturer);
- t->processor_id[0] = cpu_to_le32(smbios_cpuid_version);
- t->processor_id[1] = cpu_to_le32(smbios_cpuid_features);
+ if (type4.processor_id == 0) {
+ t->processor_id[0] = cpu_to_le32(smbios_cpuid_version);
+ t->processor_id[1] = cpu_to_le32(smbios_cpuid_features);
+ } else {
+ t->processor_id[0] = cpu_to_le32((uint32_t)type4.processor_id);
+ t->processor_id[1] = cpu_to_le32(type4.processor_id >> 32);
+ }
SMBIOS_TABLE_SET_STR(4, processor_version_str, type4.version);
t->voltage = 0;
t->external_clock = cpu_to_le16(0); /* Unknown */
@@ -702,7 +727,7 @@ static void smbios_build_type_11_table(void)
return;
}
- SMBIOS_BUILD_TABLE_PRE(11, 0xe00, true); /* required */
+ SMBIOS_BUILD_TABLE_PRE(11, T11_BASE, true); /* required */
snprintf(count_str, sizeof(count_str), "%zu", type11.nvalues);
t->count = type11.nvalues;
@@ -722,7 +747,7 @@ static void smbios_build_type_16_table(unsigned dimm_cnt)
{
uint64_t size_kb;
- SMBIOS_BUILD_TABLE_PRE(16, 0x1000, true); /* required */
+ SMBIOS_BUILD_TABLE_PRE(16, T16_BASE, true); /* required */
t->location = 0x01; /* Other */
t->use = 0x03; /* System memory */
@@ -749,7 +774,7 @@ static void smbios_build_type_17_table(unsigned instance, uint64_t size)
char loc_str[128];
uint64_t size_mb;
- SMBIOS_BUILD_TABLE_PRE(17, 0x1100 + instance, true); /* required */
+ SMBIOS_BUILD_TABLE_PRE(17, T17_BASE + instance, true); /* required */
t->physical_memory_array_handle = cpu_to_le16(0x1000); /* Type 16 above */
t->memory_error_information_handle = cpu_to_le16(0xFFFE); /* Not provided */
@@ -785,12 +810,13 @@ static void smbios_build_type_17_table(unsigned instance, uint64_t size)
SMBIOS_BUILD_TABLE_POST;
}
-static void smbios_build_type_19_table(unsigned instance,
+static void smbios_build_type_19_table(unsigned instance, unsigned offset,
uint64_t start, uint64_t size)
{
uint64_t end, start_kb, end_kb;
- SMBIOS_BUILD_TABLE_PRE(19, 0x1300 + instance, true); /* required */
+ SMBIOS_BUILD_TABLE_PRE(19, T19_BASE + offset + instance,
+ true); /* required */
end = start + size - 1;
assert(end > start);
@@ -814,7 +840,7 @@ static void smbios_build_type_19_table(unsigned instance,
static void smbios_build_type_32_table(void)
{
- SMBIOS_BUILD_TABLE_PRE(32, 0x2000, true); /* required */
+ SMBIOS_BUILD_TABLE_PRE(32, T32_BASE, true); /* required */
memset(t->reserved, 0, 6);
t->boot_status = 0; /* No errors detected */
@@ -828,7 +854,7 @@ static void smbios_build_type_41_table(Error **errp)
struct type41_instance *t41;
QTAILQ_FOREACH(t41, &type41, next) {
- SMBIOS_BUILD_TABLE_PRE(41, 0x2900 + instance, true);
+ SMBIOS_BUILD_TABLE_PRE(41, T41_BASE + instance, true);
SMBIOS_TABLE_SET_STR(41, reference_designation_str, t41->designation);
t->device_type = t41->kind;
@@ -871,7 +897,7 @@ static void smbios_build_type_41_table(Error **errp)
static void smbios_build_type_127_table(void)
{
- SMBIOS_BUILD_TABLE_PRE(127, 0x7F00, true); /* required */
+ SMBIOS_BUILD_TABLE_PRE(127, T127_BASE, true); /* required */
SMBIOS_BUILD_TABLE_POST;
}
@@ -982,7 +1008,7 @@ void smbios_get_tables(MachineState *ms,
uint8_t **anchor, size_t *anchor_len,
Error **errp)
{
- unsigned i, dimm_cnt;
+ unsigned i, dimm_cnt, offset;
if (smbios_legacy) {
*tables = *anchor = NULL;
@@ -1012,6 +1038,16 @@ void smbios_get_tables(MachineState *ms,
dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / MAX_DIMM_SZ;
+ /*
+ * The offset determines if we need to keep additional space betweeen
+ * table 17 and table 19 header handle numbers so that they do
+ * not overlap. For example, for a VM with larger than 8 TB guest
+ * memory and DIMM like chunks of 16 GiB, the default space between
+ * the two tables (T19_BASE - T17_BASE = 512) is not enough.
+ */
+ offset = (dimm_cnt > (T19_BASE - T17_BASE)) ? \
+ dimm_cnt - (T19_BASE - T17_BASE) : 0;
+
smbios_build_type_16_table(dimm_cnt);
for (i = 0; i < dimm_cnt; i++) {
@@ -1019,10 +1055,16 @@ void smbios_get_tables(MachineState *ms,
}
for (i = 0; i < mem_array_size; i++) {
- smbios_build_type_19_table(i, mem_array[i].address,
+ smbios_build_type_19_table(i, offset, mem_array[i].address,
mem_array[i].length);
}
+ /*
+ * make sure 16 bit handle numbers in the headers of tables 19
+ * and 32 do not overlap.
+ */
+ assert((mem_array_size + offset) < (T32_BASE - T19_BASE));
+
smbios_build_type_32_table();
smbios_build_type_38_table();
smbios_build_type_41_table(errp);
@@ -1292,6 +1334,8 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
save_opt(&type4.serial, opts, "serial");
save_opt(&type4.asset, opts, "asset");
save_opt(&type4.part, opts, "part");
+ /* If the value is 0, it will take the value from the CPU model. */
+ type4.processor_id = qemu_opt_get_number(opts, "processor-id", 0);
type4.max_speed = qemu_opt_get_number(opts, "max-speed",
DEFAULT_CPU_SPEED);
type4.current_speed = qemu_opt_get_number(opts, "current-speed",
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index f7ad6be5fb..a5102eac9e 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -89,9 +89,11 @@ virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d"
# virtio-iommu.c
virtio_iommu_device_reset(void) "reset!"
+virtio_iommu_system_reset(void) "system reset!"
virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64
virtio_iommu_device_status(uint8_t status) "driver status = %d"
-virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_start, uint32_t domain_end, uint32_t probe_size) "page_size_mask=0x%"PRIx64" input range start=0x%"PRIx64" input range end=0x%"PRIx64" domain range start=%d domain range end=%d probe_size=0x%x"
+virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_start, uint32_t domain_end, uint32_t probe_size, uint8_t bypass) "page_size_mask=0x%"PRIx64" input range start=0x%"PRIx64" input range end=0x%"PRIx64" domain range start=%d domain range end=%d probe_size=0x%x bypass=0x%x"
+virtio_iommu_set_config(uint8_t bypass) "bypass=0x%x"
virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d"
diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c
index d172632bb0..42c7f6d9e5 100644
--- a/hw/virtio/vhost-user-i2c.c
+++ b/hw/virtio/vhost-user-i2c.c
@@ -19,6 +19,11 @@
#define VIRTIO_ID_I2C_ADAPTER 34
#endif
+static const int feature_bits[] = {
+ VIRTIO_I2C_F_ZERO_LENGTH_REQUEST,
+ VHOST_INVALID_FEATURE_BIT
+};
+
static void vu_i2c_start(VirtIODevice *vdev)
{
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
@@ -113,8 +118,10 @@ static void vu_i2c_set_status(VirtIODevice *vdev, uint8_t status)
static uint64_t vu_i2c_get_features(VirtIODevice *vdev,
uint64_t requested_features, Error **errp)
{
- /* No feature bits used yet */
- return requested_features;
+ VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+ virtio_add_feature(&requested_features, VIRTIO_I2C_F_ZERO_LENGTH_REQUEST);
+ return vhost_get_features(&i2c->vhost_dev, feature_bits, requested_features);
}
static void vu_i2c_handle_output(VirtIODevice *vdev, VirtQueue *vq)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 662853513e..6abbc9da32 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -25,6 +25,7 @@
#include "migration/migration.h"
#include "migration/postcopy-ram.h"
#include "trace.h"
+#include "exec/ramblock.h"
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -1162,37 +1163,32 @@ static int vhost_user_set_vring_num(struct vhost_dev *dev,
return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
}
-static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
- int queue_idx)
+static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
{
- struct vhost_user *u = dev->opaque;
- VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
- VirtIODevice *vdev = dev->vdev;
-
- if (n->addr && !n->set) {
- virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
- n->set = true;
- }
+ assert(n && n->unmap_addr);
+ munmap(n->unmap_addr, qemu_real_host_page_size);
+ n->unmap_addr = NULL;
}
-static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
- int queue_idx)
+static void vhost_user_host_notifier_remove(VhostUserState *user,
+ VirtIODevice *vdev, int queue_idx)
{
- struct vhost_user *u = dev->opaque;
- VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
- VirtIODevice *vdev = dev->vdev;
+ VhostUserHostNotifier *n = &user->notifier[queue_idx];
- if (n->addr && n->set) {
- virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
- n->set = false;
+ if (n->addr) {
+ if (vdev) {
+ virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
+ }
+ assert(!n->unmap_addr);
+ n->unmap_addr = n->addr;
+ n->addr = NULL;
+ call_rcu(n, vhost_user_host_notifier_free, rcu);
}
}
static int vhost_user_set_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
- vhost_user_host_notifier_restore(dev, ring->index);
-
return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
}
@@ -1235,8 +1231,9 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
.payload.state = *ring,
.hdr.size = sizeof(msg.payload.state),
};
+ struct vhost_user *u = dev->opaque;
- vhost_user_host_notifier_remove(dev, ring->index);
+ vhost_user_host_notifier_remove(u->user, dev->vdev, ring->index);
ret = vhost_user_write(dev, &msg, NULL, 0);
if (ret < 0) {
@@ -1522,12 +1519,7 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
n = &user->notifier[queue_idx];
- if (n->addr) {
- virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
- object_unparent(OBJECT(&n->mr));
- munmap(n->addr, page_size);
- n->addr = NULL;
- }
+ vhost_user_host_notifier_remove(user, vdev, queue_idx);
if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
return 0;
@@ -1546,9 +1538,12 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
user, queue_idx);
- if (!n->mr.ram) /* Don't init again after suspend. */
+ if (!n->mr.ram) { /* Don't init again after suspend. */
memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
page_size, addr);
+ } else {
+ n->mr.ram_block->host = addr;
+ }
g_free(name);
if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
@@ -1558,7 +1553,6 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
}
n->addr = addr;
- n->set = true;
return 0;
}
@@ -2522,17 +2516,16 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
void vhost_user_cleanup(VhostUserState *user)
{
int i;
+ VhostUserHostNotifier *n;
if (!user->chr) {
return;
}
memory_region_transaction_begin();
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
- if (user->notifier[i].addr) {
- object_unparent(OBJECT(&user->notifier[i].mr));
- munmap(user->notifier[i].addr, qemu_real_host_page_size);
- user->notifier[i].addr = NULL;
- }
+ n = &user->notifier[i];
+ vhost_user_host_notifier_remove(user, NULL, i);
+ object_unparent(OBJECT(&n->mr));
}
memory_region_transaction_commit();
user->chr = NULL;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 04ea43704f..6c67d5f034 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -395,15 +395,6 @@ static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
}
}
-static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
-{
- int i;
-
- for (i = 0; i < n; i++) {
- vhost_vdpa_host_notifier_uninit(dev, i);
- }
-}
-
static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index)
{
size_t page_size = qemu_real_host_page_size;
@@ -431,6 +422,7 @@ static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index)
g_free(name);
if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) {
+ object_unparent(OBJECT(&n->mr));
munmap(addr, page_size);
goto err;
}
@@ -442,6 +434,15 @@ err:
return -1;
}
+static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
+{
+ int i;
+
+ for (i = dev->vq_index; i < dev->vq_index + n; i++) {
+ vhost_vdpa_host_notifier_uninit(dev, i);
+ }
+}
+
static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
{
int i;
@@ -455,7 +456,7 @@ static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
return;
err:
- vhost_vdpa_host_notifiers_uninit(dev, i);
+ vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index);
return;
}
diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
index 3f3771274e..ed706681ac 100644
--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
@@ -153,19 +153,23 @@ static void vhost_vsock_common_send_transport_reset(VHostVSockCommon *vvc)
if (elem->out_num) {
error_report("invalid vhost-vsock event virtqueue element with "
"out buffers");
- goto out;
+ goto err;
}
if (iov_from_buf(elem->in_sg, elem->in_num, 0,
&event, sizeof(event)) != sizeof(event)) {
error_report("vhost-vsock event virtqueue element is too short");
- goto out;
+ goto err;
}
virtqueue_push(vq, elem, sizeof(event));
virtio_notify(VIRTIO_DEVICE(vvc), vq);
-out:
+ g_free(elem);
+ return;
+
+err:
+ virtqueue_detach_element(vq, elem, 0);
g_free(elem);
}
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 7b03efccec..b643f42ea4 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1287,7 +1287,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
return r;
}
- file.fd = event_notifier_get_fd(&vq->masked_notifier);
+ file.fd = event_notifier_get_wfd(&vq->masked_notifier);
r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
if (r) {
VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed");
@@ -1542,9 +1542,9 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
if (mask) {
assert(vdev->use_guest_notifier_mask);
- file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier);
+ file.fd = event_notifier_get_wfd(&hdev->vqs[index].masked_notifier);
} else {
- file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
+ file.fd = event_notifier_get_wfd(virtio_queue_get_guest_notifier(vvq));
}
file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index d23db98c56..0f69d1c742 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -48,6 +48,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+ bool vdev_has_iommu;
Error *local_err = NULL;
DPRINTF("%s: plug device.\n", qbus->name);
@@ -69,11 +70,6 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
return;
}
- if (has_iommu && !virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
- error_setg(errp, "iommu_platform=true is not supported by the device");
- return;
- }
-
if (klass->device_plugged != NULL) {
klass->device_plugged(qbus->parent, &local_err);
}
@@ -82,9 +78,15 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
return;
}
+ vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
if (klass->get_dma_as != NULL && has_iommu) {
virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM);
vdev->dma_as = klass->get_dma_as(qbus->parent);
+ if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) {
+ error_setg(errp,
+ "iommu_platform=true is not supported by the device");
+ return;
+ }
} else {
vdev->dma_as = &address_space_memory;
}
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index aa9c16a17b..239fe97b12 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -24,6 +24,7 @@
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio.h"
#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "trace.h"
@@ -42,6 +43,7 @@
typedef struct VirtIOIOMMUDomain {
uint32_t id;
+ bool bypass;
GTree *mappings;
QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
} VirtIOIOMMUDomain;
@@ -257,12 +259,16 @@ static void virtio_iommu_put_endpoint(gpointer data)
}
static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
- uint32_t domain_id)
+ uint32_t domain_id,
+ bool bypass)
{
VirtIOIOMMUDomain *domain;
domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
if (domain) {
+ if (domain->bypass != bypass) {
+ return NULL;
+ }
return domain;
}
domain = g_malloc0(sizeof(*domain));
@@ -270,6 +276,7 @@ static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
NULL, (GDestroyNotify)g_free,
(GDestroyNotify)g_free);
+ domain->bypass = bypass;
g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
QLIST_INIT(&domain->endpoint_list);
trace_virtio_iommu_get_domain(domain_id);
@@ -333,11 +340,16 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
{
uint32_t domain_id = le32_to_cpu(req->domain);
uint32_t ep_id = le32_to_cpu(req->endpoint);
+ uint32_t flags = le32_to_cpu(req->flags);
VirtIOIOMMUDomain *domain;
VirtIOIOMMUEndpoint *ep;
trace_virtio_iommu_attach(domain_id, ep_id);
+ if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) {
+ return VIRTIO_IOMMU_S_INVAL;
+ }
+
ep = virtio_iommu_get_endpoint(s, ep_id);
if (!ep) {
return VIRTIO_IOMMU_S_NOENT;
@@ -355,7 +367,12 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
}
}
- domain = virtio_iommu_get_domain(s, domain_id);
+ domain = virtio_iommu_get_domain(s, domain_id,
+ flags & VIRTIO_IOMMU_ATTACH_F_BYPASS);
+ if (!domain) {
+ /* Incompatible bypass flag */
+ return VIRTIO_IOMMU_S_INVAL;
+ }
QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
ep->domain = domain;
@@ -418,6 +435,10 @@ static int virtio_iommu_map(VirtIOIOMMU *s,
return VIRTIO_IOMMU_S_NOENT;
}
+ if (domain->bypass) {
+ return VIRTIO_IOMMU_S_INVAL;
+ }
+
interval = g_malloc0(sizeof(*interval));
interval->low = virt_start;
@@ -463,6 +484,11 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
if (!domain) {
return VIRTIO_IOMMU_S_NOENT;
}
+
+ if (domain->bypass) {
+ return VIRTIO_IOMMU_S_INVAL;
+ }
+
interval.low = virt_start;
interval.high = virt_end;
@@ -728,8 +754,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
.perm = IOMMU_NONE,
};
- bypass_allowed = virtio_vdev_has_feature(&s->parent_obj,
- VIRTIO_IOMMU_F_BYPASS);
+ bypass_allowed = s->config.bypass;
sid = virtio_iommu_get_bdf(sdev);
@@ -780,6 +805,9 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
entry.perm = flag;
}
goto unlock;
+ } else if (ep->domain->bypass) {
+ entry.perm = flag;
+ goto unlock;
}
found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
@@ -831,13 +859,37 @@ static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start);
out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end);
out_config->probe_size = cpu_to_le32(dev_config->probe_size);
+ out_config->bypass = dev_config->bypass;
trace_virtio_iommu_get_config(dev_config->page_size_mask,
dev_config->input_range.start,
dev_config->input_range.end,
dev_config->domain_range.start,
dev_config->domain_range.end,
- dev_config->probe_size);
+ dev_config->probe_size,
+ dev_config->bypass);
+}
+
+static void virtio_iommu_set_config(VirtIODevice *vdev,
+ const uint8_t *config_data)
+{
+ VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
+ struct virtio_iommu_config *dev_config = &dev->config;
+ const struct virtio_iommu_config *in_config = (void *)config_data;
+
+ if (in_config->bypass != dev_config->bypass) {
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) {
+ virtio_error(vdev, "cannot set config.bypass");
+ return;
+ } else if (in_config->bypass != 0 && in_config->bypass != 1) {
+ virtio_error(vdev, "invalid config.bypass value '%u'",
+ in_config->bypass);
+ return;
+ }
+ dev_config->bypass = in_config->bypass;
+ }
+
+ trace_virtio_iommu_set_config(in_config->bypass);
}
static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
@@ -963,6 +1015,19 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
return 0;
}
+static void virtio_iommu_system_reset(void *opaque)
+{
+ VirtIOIOMMU *s = opaque;
+
+ trace_virtio_iommu_system_reset();
+
+ /*
+ * config.bypass is sticky across device reset, but should be restored on
+ * system reset
+ */
+ s->config.bypass = s->boot_bypass;
+}
+
static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -988,9 +1053,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
- virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE);
+ virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG);
qemu_mutex_init(&s->mutex);
@@ -1001,6 +1066,8 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
} else {
error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
}
+
+ qemu_register_reset(virtio_iommu_system_reset, s);
}
static void virtio_iommu_device_unrealize(DeviceState *dev)
@@ -1008,6 +1075,8 @@ static void virtio_iommu_device_unrealize(DeviceState *dev)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
+ qemu_unregister_reset(virtio_iommu_system_reset, s);
+
g_hash_table_destroy(s->as_by_busptr);
if (s->domains) {
g_tree_destroy(s->domains);
@@ -1098,8 +1167,8 @@ static const VMStateDescription vmstate_endpoint = {
static const VMStateDescription vmstate_domain = {
.name = "domain",
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
.pre_load = domain_preload,
.fields = (VMStateField[]) {
VMSTATE_UINT32(id, VirtIOIOMMUDomain),
@@ -1108,6 +1177,7 @@ static const VMStateDescription vmstate_domain = {
VirtIOIOMMUInterval, VirtIOIOMMUMapping),
VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1,
vmstate_endpoint, VirtIOIOMMUEndpoint, next),
+ VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2),
VMSTATE_END_OF_LIST()
}
};
@@ -1141,21 +1211,22 @@ static int iommu_post_load(void *opaque, int version_id)
static const VMStateDescription vmstate_virtio_iommu_device = {
.name = "virtio-iommu-device",
- .minimum_version_id = 1,
- .version_id = 1,
+ .minimum_version_id = 2,
+ .version_id = 2,
.post_load = iommu_post_load,
.fields = (VMStateField[]) {
- VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 1,
+ VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2,
&vmstate_domain, VirtIOIOMMUDomain),
+ VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2),
VMSTATE_END_OF_LIST()
},
};
static const VMStateDescription vmstate_virtio_iommu = {
.name = "virtio-iommu",
- .minimum_version_id = 1,
+ .minimum_version_id = 2,
.priority = MIG_PRI_IOMMU,
- .version_id = 1,
+ .version_id = 2,
.fields = (VMStateField[]) {
VMSTATE_VIRTIO_DEVICE,
VMSTATE_END_OF_LIST()
@@ -1164,6 +1235,7 @@ static const VMStateDescription vmstate_virtio_iommu = {
static Property virtio_iommu_properties[] = {
DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *),
+ DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
DEFINE_PROP_END_OF_LIST(),
};
@@ -1180,6 +1252,7 @@ static void virtio_iommu_class_init(ObjectClass *klass, void *data)
vdc->unrealize = virtio_iommu_device_unrealize;
vdc->reset = virtio_iommu_device_reset;
vdc->get_config = virtio_iommu_get_config;
+ vdc->set_config = virtio_iommu_set_config;
vdc->get_features = virtio_iommu_get_features;
vdc->set_status = virtio_iommu_set_status;
vdc->vmsd = &vmstate_virtio_iommu_device;
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index c97e8633ad..2b42e4192b 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -77,6 +77,7 @@ typedef struct AcpiFadtData {
uint16_t plvl2_lat; /* P_LVL2_LAT */
uint16_t plvl3_lat; /* P_LVL3_LAT */
uint16_t arm_boot_arch; /* ARM_BOOT_ARCH */
+ uint16_t iapc_boot_arch; /* IAPC_BOOT_ARCH */
uint8_t minor_ver; /* FADT Minor Version */
/*
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 41783ee46d..3b5ac869db 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -228,6 +228,7 @@ struct IntelIOMMUState {
bool caching_mode; /* RO - is cap CM enabled? */
bool scalable_mode; /* RO - is Scalable Mode supported? */
+ bool snoop_control; /* RO - is SNP filed supported? */
dma_addr_t root; /* Current root table pointer */
bool root_scalable; /* Type of root table (scalable or not) */
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9c9f4ac748..1a27de9c8b 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -48,6 +48,7 @@ typedef struct PCMachineState {
bool sata_enabled;
bool pit_enabled;
bool hpet_enabled;
+ bool i8042_enabled;
bool default_bus_bypass_iommu;
uint64_t max_fw_size;
@@ -64,6 +65,7 @@ typedef struct PCMachineState {
#define PC_MACHINE_SMBUS "smbus"
#define PC_MACHINE_SATA "sata"
#define PC_MACHINE_PIT "pit"
+#define PC_MACHINE_I8042 "i8042"
#define PC_MACHINE_MAX_FW_SIZE "max-fw-size"
#define PC_MACHINE_SMBIOS_EP "smbios-entry-point-type"
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index a145a30370..916cc325ee 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -35,8 +35,6 @@ struct X86MachineClass {
/* TSC rate migration: */
bool save_tsc_khz;
- /* Enables contiguous-apic-ID mode */
- bool compat_apic_id_mode;
/* use DMA capable linuxboot option rom */
bool fwcfg_dma_enabled;
};
diff --git a/include/hw/input/i8042.h b/include/hw/input/i8042.h
index 1d90432dae..e070f546e4 100644
--- a/include/hw/input/i8042.h
+++ b/include/hw/input/i8042.h
@@ -23,4 +23,19 @@ void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq,
void i8042_isa_mouse_fake_event(ISAKBDState *isa);
void i8042_setup_a20_line(ISADevice *dev, qemu_irq a20_out);
+static inline bool i8042_present(void)
+{
+ bool amb = false;
+ return object_resolve_path_type("", TYPE_I8042, &amb) || amb;
+}
+
+/*
+ * ACPI v2, Table 5-10 - Fixed ACPI Description Table Boot Architecture
+ * Flags, bit offset 1 - 8042.
+ */
+static inline uint16_t iapc_boot_arch_8042(void)
+{
+ return i8042_present() ? 0x1 << 1 : 0x0 ;
+}
+
#endif /* HW_INPUT_I8042_H */
diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h
index ca3c5bb533..7f16cc9b16 100644
--- a/include/hw/misc/pvpanic.h
+++ b/include/hw/misc/pvpanic.h
@@ -22,14 +22,6 @@
#define PVPANIC_IOPORT_PROP "ioport"
-/* The bit of supported pv event, TODO: include uapi header and remove this */
-#define PVPANIC_F_PANICKED 0
-#define PVPANIC_F_CRASHLOADED 1
-
-/* The pv event value */
-#define PVPANIC_PANICKED (1 << PVPANIC_F_PANICKED)
-#define PVPANIC_CRASHLOADED (1 << PVPANIC_F_CRASHLOADED)
-
/*
* PVPanicState for any device type
*/
diff --git a/include/hw/pci-bridge/xio3130_downstream.h b/include/hw/pci-bridge/xio3130_downstream.h
new file mode 100644
index 0000000000..1d10139aea
--- /dev/null
+++ b/include/hw/pci-bridge/xio3130_downstream.h
@@ -0,0 +1,15 @@
+/*
+ * TI X3130 pci express downstream port switch
+ *
+ * Copyright (C) 2022 Igor Mammedov <imammedo@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_PCI_BRIDGE_XIO3130_DOWNSTREAM_H
+#define HW_PCI_BRIDGE_XIO3130_DOWNSTREAM_H
+
+#define TYPE_XIO3130_DOWNSTREAM "xio3130-downstream"
+
+#endif
+
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index c3f3c90473..3a32b8dd40 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -7,9 +7,6 @@
/* PCI includes legacy ISA access. */
#include "hw/isa/isa.h"
-#include "hw/pci/pcie.h"
-#include "qom/object.h"
-
extern bool pci_available;
/* PCI bus */
@@ -157,6 +154,7 @@ enum {
#define QEMU_PCI_VGA_IO_HI_SIZE 0x20
#include "hw/pci/pci_regs.h"
+#include "hw/pci/pcie.h"
/* PCI HEADER_TYPE */
#define PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
@@ -499,6 +497,9 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
+pcibus_t pci_bar_address(PCIDevice *d,
+ int reg, uint8_t type, pcibus_t size);
+
static inline void
pci_set_byte(uint8_t *config, uint8_t val)
{
@@ -779,6 +780,11 @@ static inline int pci_is_express_downstream_port(const PCIDevice *d)
return type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_ROOT_PORT;
}
+static inline int pci_is_vf(const PCIDevice *d)
+{
+ return d->exp.sriov_vf.pf != NULL;
+}
+
static inline uint32_t pci_config_size(const PCIDevice *d)
{
return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
diff --git a/include/hw/pci/pci_regs.h b/include/hw/pci/pci_regs.h
index 77ba64b931..a590140962 100644
--- a/include/hw/pci/pci_regs.h
+++ b/include/hw/pci/pci_regs.h
@@ -4,5 +4,6 @@
#include "standard-headers/linux/pci_regs.h"
#define PCI_PM_CAP_VER_1_1 0x0002 /* PCI PM spec ver. 1.1 */
+#define PCI_PM_CAP_VER_1_2 0x0003 /* PCI PM spec ver. 1.2 */
#endif
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 6063bee0ec..798a262a0a 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -24,6 +24,7 @@
#include "hw/pci/pci_regs.h"
#include "hw/pci/pcie_regs.h"
#include "hw/pci/pcie_aer.h"
+#include "hw/pci/pcie_sriov.h"
#include "hw/hotplug.h"
typedef enum {
@@ -81,6 +82,11 @@ struct PCIExpressDevice {
/* ACS */
uint16_t acs_cap;
+
+ /* SR/IOV */
+ uint16_t sriov_cap;
+ PCIESriovPF sriov_pf;
+ PCIESriovVF sriov_vf;
};
#define COMPAT_PROP_PCP "power_controller_present"
@@ -112,6 +118,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
uint32_t addr, uint32_t val, int len);
int pcie_cap_slot_post_load(void *opaque, int version_id);
void pcie_cap_slot_push_attention_button(PCIDevice *dev);
+void pcie_cap_slot_enable_power(PCIDevice *dev);
void pcie_cap_root_init(PCIDevice *dev);
void pcie_cap_root_reset(PCIDevice *dev);
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
new file mode 100644
index 0000000000..80f5c84e75
--- /dev/null
+++ b/include/hw/pci/pcie_sriov.h
@@ -0,0 +1,77 @@
+/*
+ * pcie_sriov.h:
+ *
+ * Implementation of SR/IOV emulation support.
+ *
+ * Copyright (c) 2015 Knut Omang <knut.omang@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_PCIE_SRIOV_H
+#define QEMU_PCIE_SRIOV_H
+
+struct PCIESriovPF {
+ uint16_t num_vfs; /* Number of virtual functions created */
+ uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
+ const char *vfname; /* Reference to the device type used for the VFs */
+ PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
+};
+
+struct PCIESriovVF {
+ PCIDevice *pf; /* Pointer back to owner physical function */
+ uint16_t vf_number; /* Logical VF number of this function */
+};
+
+void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+ const char *vfname, uint16_t vf_dev_id,
+ uint16_t init_vfs, uint16_t total_vfs,
+ uint16_t vf_offset, uint16_t vf_stride);
+void pcie_sriov_pf_exit(PCIDevice *dev);
+
+/* Set up a VF bar in the SR/IOV bar area */
+void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
+ uint8_t type, dma_addr_t size);
+
+/* Instantiate a bar for a VF */
+void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
+ MemoryRegion *memory);
+
+/*
+ * Default (minimal) page size support values
+ * as required by the SR/IOV standard:
+ * 0x553 << 12 = 0x553000 = 4K + 8K + 64K + 256K + 1M + 4M
+ */
+#define SRIOV_SUP_PGSIZE_MINREQ 0x553
+
+/*
+ * Optionally add supported page sizes to the mask of supported page sizes
+ * Page size values are interpreted as opt_sup_pgsize << 12.
+ */
+void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize);
+
+/* SR/IOV capability config write handler */
+void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
+ uint32_t val, int len);
+
+/* Reset SR/IOV VF Enable bit to unregister all VFs */
+void pcie_sriov_pf_disable_vfs(PCIDevice *dev);
+
+/* Get logical VF number of a VF - only valid for VFs */
+uint16_t pcie_sriov_vf_number(PCIDevice *dev);
+
+/*
+ * Get the physical function that owns this VF.
+ * Returns NULL if dev is not a virtual function
+ */
+PCIDevice *pcie_sriov_get_pf(PCIDevice *dev);
+
+/*
+ * Get the n-th VF of this physical function - only valid for PF.
+ * Returns NULL if index is invalid
+ */
+PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n);
+
+#endif /* QEMU_PCIE_SRIOV_H */
diff --git a/include/hw/virtio/vhost-user-i2c.h b/include/hw/virtio/vhost-user-i2c.h
index deae47a76d..d8372f3b43 100644
--- a/include/hw/virtio/vhost-user-i2c.h
+++ b/include/hw/virtio/vhost-user-i2c.h
@@ -25,4 +25,7 @@ struct VHostUserI2C {
bool connected;
};
+/* Virtio Feature bits */
+#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0
+
#endif /* _QEMU_VHOST_USER_I2C_H */
diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index a9abca3288..e44a41bb70 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -12,9 +12,10 @@
#include "hw/virtio/virtio.h"
typedef struct VhostUserHostNotifier {
+ struct rcu_head rcu;
MemoryRegion mr;
void *addr;
- bool set;
+ void *unmap_addr;
} VhostUserHostNotifier;
typedef struct VhostUserState {
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index e2339e5b72..84391f8448 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -58,6 +58,7 @@ struct VirtIOIOMMU {
GTree *domains;
QemuMutex mutex;
GTree *endpoints;
+ bool boot_bypass;
};
#endif
diff --git a/include/qemu/event_notifier.h b/include/qemu/event_notifier.h
index b79add035d..8a4ff308e1 100644
--- a/include/qemu/event_notifier.h
+++ b/include/qemu/event_notifier.h
@@ -38,6 +38,7 @@ int event_notifier_test_and_clear(EventNotifier *);
#ifdef CONFIG_POSIX
void event_notifier_init_fd(EventNotifier *, int fd);
int event_notifier_get_fd(const EventNotifier *);
+int event_notifier_get_wfd(const EventNotifier *);
#else
HANDLE event_notifier_get_handle(EventNotifier *);
#endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index c564f54c11..42f4ceb701 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -88,6 +88,8 @@ typedef struct PCIDevice PCIDevice;
typedef struct PCIEAERErr PCIEAERErr;
typedef struct PCIEAERLog PCIEAERLog;
typedef struct PCIEAERMsg PCIEAERMsg;
+typedef struct PCIESriovPF PCIESriovPF;
+typedef struct PCIESriovVF PCIESriovVF;
typedef struct PCIEPort PCIEPort;
typedef struct PCIESlot PCIESlot;
typedef struct PCIExpressDevice PCIExpressDevice;
diff --git a/include/standard-headers/linux/pvpanic.h b/include/standard-headers/linux/pvpanic.h
new file mode 100644
index 0000000000..54b7485390
--- /dev/null
+++ b/include/standard-headers/linux/pvpanic.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef __PVPANIC_H__
+#define __PVPANIC_H__
+
+#define PVPANIC_PANICKED (1 << 0)
+#define PVPANIC_CRASH_LOADED (1 << 1)
+
+#endif /* __PVPANIC_H__ */
diff --git a/meson.build b/meson.build
index 0763c3b6a0..2d6601467f 100644
--- a/meson.build
+++ b/meson.build
@@ -2725,7 +2725,7 @@ if have_system or have_user
endif
vhost_user = not_found
-if 'CONFIG_VHOST_USER' in config_host
+if targetos == 'linux' and 'CONFIG_VHOST_USER' in config_host
libvhost_user = subproject('libvhost-user')
vhost_user = libvhost_user.get_variable('vhost_user_dep')
endif
diff --git a/qemu-options.hx b/qemu-options.hx
index 094a6c1d7c..5ce0ada75e 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2537,6 +2537,7 @@ DEF("smbios", HAS_ARG, QEMU_OPTION_smbios,
" specify SMBIOS type 3 fields\n"
"-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str]\n"
" [,asset=str][,part=str][,max-speed=%d][,current-speed=%d]\n"
+ " [,processor-id=%d]\n"
" specify SMBIOS type 4 fields\n"
"-smbios type=11[,value=str][,path=filename]\n"
" specify SMBIOS type 11 fields\n"
@@ -2562,7 +2563,7 @@ SRST
``-smbios type=3[,manufacturer=str][,version=str][,serial=str][,asset=str][,sku=str]``
Specify SMBIOS type 3 fields
-``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str]``
+``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]``
Specify SMBIOS type 4 fields
``-smbios type=11[,value=str][,path=filename]``
diff --git a/qom/object.c b/qom/object.c
index 4048a2fef1..d34608558e 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -1168,10 +1168,14 @@ GSList *object_class_get_list_sorted(const char *implements_type,
Object *object_ref(void *objptr)
{
Object *obj = OBJECT(objptr);
+ uint32_t ref;
+
if (!obj) {
return NULL;
}
- qatomic_inc(&obj->ref);
+ ref = qatomic_fetch_inc(&obj->ref);
+ /* Assert waaay before the integer overflows */
+ g_assert(ref < INT_MAX);
return obj;
}
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index fe850763c5..839a5ec614 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -214,7 +214,8 @@ for i in "$tmpdir"/include/linux/*virtio*.h \
"$tmpdir/include/linux/const.h" \
"$tmpdir/include/linux/kernel.h" \
"$tmpdir/include/linux/vhost_types.h" \
- "$tmpdir/include/linux/sysinfo.h"; do
+ "$tmpdir/include/linux/sysinfo.h" \
+ "$tmpdir/include/misc/pvpanic.h"; do
cp_portable "$i" "$output/include/standard-headers/linux"
done
mkdir -p "$output/include/standard-headers/drm"
diff --git a/tests/data/acpi/q35/FACP b/tests/data/acpi/q35/FACP
index f6a864cc86..a8f6a89611 100644
--- a/tests/data/acpi/q35/FACP
+++ b/tests/data/acpi/q35/FACP
Binary files differ
diff --git a/tests/data/acpi/q35/FACP.nosmm b/tests/data/acpi/q35/FACP.nosmm
index 6a9aa5f370..c4e6d18ee5 100644
--- a/tests/data/acpi/q35/FACP.nosmm
+++ b/tests/data/acpi/q35/FACP.nosmm
Binary files differ
diff --git a/tests/data/acpi/q35/FACP.slic b/tests/data/acpi/q35/FACP.slic
index 15986e095c..48bbb1cf5a 100644
--- a/tests/data/acpi/q35/FACP.slic
+++ b/tests/data/acpi/q35/FACP.slic
Binary files differ
diff --git a/tests/data/acpi/q35/FACP.xapic b/tests/data/acpi/q35/FACP.xapic
index 2d3659c9c6..31fa5dd19c 100644
--- a/tests/data/acpi/q35/FACP.xapic
+++ b/tests/data/acpi/q35/FACP.xapic
Binary files differ
diff --git a/tests/qtest/virtio-iommu-test.c b/tests/qtest/virtio-iommu-test.c
index 47e68388a0..068e7a9e6c 100644
--- a/tests/qtest/virtio-iommu-test.c
+++ b/tests/qtest/virtio-iommu-test.c
@@ -31,11 +31,13 @@ static void pci_config(void *obj, void *data, QGuestAllocator *t_alloc)
uint64_t input_range_end = qvirtio_config_readq(dev, 16);
uint32_t domain_range_start = qvirtio_config_readl(dev, 24);
uint32_t domain_range_end = qvirtio_config_readl(dev, 28);
+ uint8_t bypass = qvirtio_config_readb(dev, 36);
g_assert_cmpint(input_range_start, ==, 0);
g_assert_cmphex(input_range_end, ==, UINT64_MAX);
g_assert_cmpint(domain_range_start, ==, 0);
g_assert_cmpint(domain_range_end, ==, UINT32_MAX);
+ g_assert_cmpint(bypass, ==, 1);
}
static int read_tail_status(struct virtio_iommu_req_tail *buffer)
diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
index 8307013c5d..16294e98d4 100644
--- a/util/event_notifier-posix.c
+++ b/util/event_notifier-posix.c
@@ -99,6 +99,11 @@ int event_notifier_get_fd(const EventNotifier *e)
return e->rfd;
}
+int event_notifier_get_wfd(const EventNotifier *e)
+{
+ return e->wfd;
+}
+
int event_notifier_set(EventNotifier *e)
{
static const uint64_t value = 1;