aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/s390x/meson.build1
-rw-r--r--hw/s390x/s390-pci-bus.c91
-rw-r--r--hw/s390x/s390-pci-bus.h372
-rw-r--r--hw/s390x/s390-pci-inst.c78
-rw-r--r--hw/s390x/s390-pci-inst.h312
-rw-r--r--hw/s390x/s390-pci-vfio.c276
-rw-r--r--hw/s390x/s390-virtio-ccw.c2
-rw-r--r--hw/s390x/trace-events5
-rw-r--r--hw/vfio/common.c508
-rw-r--r--hw/vfio/meson.build1
-rw-r--r--hw/vfio/migration.c933
-rw-r--r--hw/vfio/pci.c87
-rw-r--r--hw/vfio/pci.h1
-rw-r--r--hw/vfio/platform.c7
-rw-r--r--hw/vfio/trace-events21
15 files changed, 1922 insertions, 773 deletions
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
index 948ceae7a7..f4663a8355 100644
--- a/hw/s390x/meson.build
+++ b/hw/s390x/meson.build
@@ -27,6 +27,7 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
))
s390x_ss.add(when: 'CONFIG_S390_CCW_VIRTIO', if_true: files('s390-virtio-ccw.c'))
s390x_ss.add(when: 'CONFIG_TERMINAL3270', if_true: files('3270-ccw.c'))
+s390x_ss.add(when: 'CONFIG_LINUX', if_true: files('s390-pci-vfio.c'))
virtio_ss = ss.source_set()
virtio_ss.add(files('virtio-ccw.c'))
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index fb4cee87a4..48a3be802f 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -15,8 +15,9 @@
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "cpu.h"
-#include "s390-pci-bus.h"
-#include "s390-pci-inst.h"
+#include "hw/s390x/s390-pci-bus.h"
+#include "hw/s390x/s390-pci-inst.h"
+#include "hw/s390x/s390-pci-vfio.h"
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
#include "hw/pci/pci_bridge.h"
@@ -737,6 +738,57 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn)
object_unref(OBJECT(iommu));
}
+S390PCIGroup *s390_group_create(int id)
+{
+ S390PCIGroup *group;
+ S390pciState *s = s390_get_phb();
+
+ group = g_new0(S390PCIGroup, 1);
+ group->id = id;
+ QTAILQ_INSERT_TAIL(&s->zpci_groups, group, link);
+ return group;
+}
+
+S390PCIGroup *s390_group_find(int id)
+{
+ S390PCIGroup *group;
+ S390pciState *s = s390_get_phb();
+
+ QTAILQ_FOREACH(group, &s->zpci_groups, link) {
+ if (group->id == id) {
+ return group;
+ }
+ }
+ return NULL;
+}
+
+static void s390_pci_init_default_group(void)
+{
+ S390PCIGroup *group;
+ ClpRspQueryPciGrp *resgrp;
+
+ group = s390_group_create(ZPCI_DEFAULT_FN_GRP);
+ resgrp = &group->zpci_group;
+ resgrp->fr = 1;
+ stq_p(&resgrp->dasm, 0);
+ stq_p(&resgrp->msia, ZPCI_MSI_ADDR);
+ stw_p(&resgrp->mui, DEFAULT_MUI);
+ stw_p(&resgrp->i, 128);
+ stw_p(&resgrp->maxstbl, 128);
+ resgrp->version = 0;
+}
+
+static void set_pbdev_info(S390PCIBusDevice *pbdev)
+{
+ pbdev->zpci_fn.sdma = ZPCI_SDMA_ADDR;
+ pbdev->zpci_fn.edma = ZPCI_EDMA_ADDR;
+ pbdev->zpci_fn.pchid = 0;
+ pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
+ pbdev->zpci_fn.fid = pbdev->fid;
+ pbdev->zpci_fn.uid = pbdev->uid;
+ pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
+}
+
static void s390_pcihost_realize(DeviceState *dev, Error **errp)
{
PCIBus *b;
@@ -764,11 +816,25 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp)
s->bus_no = 0;
QTAILQ_INIT(&s->pending_sei);
QTAILQ_INIT(&s->zpci_devs);
+ QTAILQ_INIT(&s->zpci_dma_limit);
+ QTAILQ_INIT(&s->zpci_groups);
+ s390_pci_init_default_group();
css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false,
S390_ADAPTER_SUPPRESSIBLE, errp);
}
+static void s390_pcihost_unrealize(DeviceState *dev)
+{
+ S390PCIGroup *group;
+ S390pciState *s = S390_PCI_HOST_BRIDGE(dev);
+
+ while (!QTAILQ_EMPTY(&s->zpci_groups)) {
+ group = QTAILQ_FIRST(&s->zpci_groups);
+ QTAILQ_REMOVE(&s->zpci_groups, group, link);
+ }
+}
+
static int s390_pci_msix_init(S390PCIBusDevice *pbdev)
{
char *name;
@@ -797,7 +863,8 @@ static int s390_pci_msix_init(S390PCIBusDevice *pbdev)
name = g_strdup_printf("msix-s390-%04x", pbdev->uid);
memory_region_init_io(&pbdev->msix_notify_mr, OBJECT(pbdev),
&s390_msi_ctrl_ops, pbdev, name, PAGE_SIZE);
- memory_region_add_subregion(&pbdev->iommu->mr, ZPCI_MSI_ADDR,
+ memory_region_add_subregion(&pbdev->iommu->mr,
+ pbdev->pci_group->zpci_group.msia,
&pbdev->msix_notify_mr);
g_free(name);
@@ -941,17 +1008,21 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
}
}
+ pbdev->pdev = pdev;
+ pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
+ pbdev->iommu->pbdev = pbdev;
+ pbdev->state = ZPCI_FS_DISABLED;
+ set_pbdev_info(pbdev);
+
if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
pbdev->fh |= FH_SHM_VFIO;
+ pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
+ /* Fill in CLP information passed via the vfio region */
+ s390_pci_get_clp_info(pbdev);
} else {
pbdev->fh |= FH_SHM_EMUL;
}
- pbdev->pdev = pdev;
- pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
- pbdev->iommu->pbdev = pbdev;
- pbdev->state = ZPCI_FS_DISABLED;
-
if (s390_pci_msix_init(pbdev)) {
error_setg(errp, "MSI-X support is mandatory "
"in the S390 architecture");
@@ -1004,6 +1075,9 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
pbdev->fid = 0;
QTAILQ_REMOVE(&s->zpci_devs, pbdev, link);
g_hash_table_remove(s->zpci_table, &pbdev->idx);
+ if (pbdev->iommu->dma_limit) {
+ s390_pci_end_dma_count(s, pbdev->iommu->dma_limit);
+ }
qdev_unrealize(dev);
}
}
@@ -1123,6 +1197,7 @@ static void s390_pcihost_class_init(ObjectClass *klass, void *data)
dc->reset = s390_pcihost_reset;
dc->realize = s390_pcihost_realize;
+ dc->unrealize = s390_pcihost_unrealize;
hc->pre_plug = s390_pcihost_pre_plug;
hc->plug = s390_pcihost_plug;
hc->unplug_request = s390_pcihost_unplug_request;
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
deleted file mode 100644
index 97464d0ad3..0000000000
--- a/hw/s390x/s390-pci-bus.h
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * s390 PCI BUS definitions
- *
- * Copyright 2014 IBM Corp.
- * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
- * Hong Bo Li <lihbbj@cn.ibm.com>
- * Yi Min Zhao <zyimin@cn.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or (at
- * your option) any later version. See the COPYING file in the top-level
- * directory.
- */
-
-#ifndef HW_S390_PCI_BUS_H
-#define HW_S390_PCI_BUS_H
-
-#include "hw/pci/pci.h"
-#include "hw/pci/pci_host.h"
-#include "hw/s390x/sclp.h"
-#include "hw/s390x/s390_flic.h"
-#include "hw/s390x/css.h"
-#include "qom/object.h"
-
-#define TYPE_S390_PCI_HOST_BRIDGE "s390-pcihost"
-#define TYPE_S390_PCI_BUS "s390-pcibus"
-#define TYPE_S390_PCI_DEVICE "zpci"
-#define TYPE_S390_PCI_IOMMU "s390-pci-iommu"
-#define TYPE_S390_IOMMU_MEMORY_REGION "s390-iommu-memory-region"
-#define FH_MASK_ENABLE 0x80000000
-#define FH_MASK_INSTANCE 0x7f000000
-#define FH_MASK_SHM 0x00ff0000
-#define FH_MASK_INDEX 0x0000ffff
-#define FH_SHM_VFIO 0x00010000
-#define FH_SHM_EMUL 0x00020000
-#define ZPCI_MAX_FID 0xffffffff
-#define ZPCI_MAX_UID 0xffff
-#define UID_UNDEFINED 0
-#define UID_CHECKING_ENABLED 0x01
-
-OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE)
-OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS)
-OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE)
-OBJECT_DECLARE_SIMPLE_TYPE(S390PCIIOMMU, S390_PCI_IOMMU)
-
-#define HP_EVENT_TO_CONFIGURED 0x0301
-#define HP_EVENT_RESERVED_TO_STANDBY 0x0302
-#define HP_EVENT_DECONFIGURE_REQUEST 0x0303
-#define HP_EVENT_CONFIGURED_TO_STBRES 0x0304
-#define HP_EVENT_STANDBY_TO_RESERVED 0x0308
-
-#define ERR_EVENT_INVALAS 0x1
-#define ERR_EVENT_OORANGE 0x2
-#define ERR_EVENT_INVALTF 0x3
-#define ERR_EVENT_TPROTE 0x4
-#define ERR_EVENT_APROTE 0x5
-#define ERR_EVENT_KEYE 0x6
-#define ERR_EVENT_INVALTE 0x7
-#define ERR_EVENT_INVALTL 0x8
-#define ERR_EVENT_TT 0x9
-#define ERR_EVENT_INVALMS 0xa
-#define ERR_EVENT_SERR 0xb
-#define ERR_EVENT_NOMSI 0x10
-#define ERR_EVENT_INVALBV 0x11
-#define ERR_EVENT_AIBV 0x12
-#define ERR_EVENT_AIRERR 0x13
-#define ERR_EVENT_FMBA 0x2a
-#define ERR_EVENT_FMBUP 0x2b
-#define ERR_EVENT_FMBPRO 0x2c
-#define ERR_EVENT_CCONF 0x30
-#define ERR_EVENT_SERVAC 0x3a
-#define ERR_EVENT_PERMERR 0x3b
-
-#define ERR_EVENT_Q_BIT 0x2
-#define ERR_EVENT_MVN_OFFSET 16
-
-#define ZPCI_MSI_VEC_BITS 11
-#define ZPCI_MSI_VEC_MASK 0x7ff
-
-#define ZPCI_MSI_ADDR 0xfe00000000000000ULL
-#define ZPCI_SDMA_ADDR 0x100000000ULL
-#define ZPCI_EDMA_ADDR 0x1ffffffffffffffULL
-
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (1 << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
-#define PAGE_DEFAULT_ACC 0
-#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
-
-/* I/O Translation Anchor (IOTA) */
-enum ZpciIoatDtype {
- ZPCI_IOTA_STO = 0,
- ZPCI_IOTA_RTTO = 1,
- ZPCI_IOTA_RSTO = 2,
- ZPCI_IOTA_RFTO = 3,
- ZPCI_IOTA_PFAA = 4,
- ZPCI_IOTA_IOPFAA = 5,
- ZPCI_IOTA_IOPTO = 7
-};
-
-#define ZPCI_IOTA_IOT_ENABLED 0x800ULL
-#define ZPCI_IOTA_DT_ST (ZPCI_IOTA_STO << 2)
-#define ZPCI_IOTA_DT_RT (ZPCI_IOTA_RTTO << 2)
-#define ZPCI_IOTA_DT_RS (ZPCI_IOTA_RSTO << 2)
-#define ZPCI_IOTA_DT_RF (ZPCI_IOTA_RFTO << 2)
-#define ZPCI_IOTA_DT_PF (ZPCI_IOTA_PFAA << 2)
-#define ZPCI_IOTA_FS_4K 0
-#define ZPCI_IOTA_FS_1M 1
-#define ZPCI_IOTA_FS_2G 2
-#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5)
-
-#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
-#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
-#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS)
-#define ZPCI_IOTA_RFTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF)
-#define ZPCI_IOTA_RFAA_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY |\
- ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G)
-
-/* I/O Region and segment tables */
-#define ZPCI_INDEX_MASK 0x7ffULL
-
-#define ZPCI_TABLE_TYPE_MASK 0xc
-#define ZPCI_TABLE_TYPE_RFX 0xc
-#define ZPCI_TABLE_TYPE_RSX 0x8
-#define ZPCI_TABLE_TYPE_RTX 0x4
-#define ZPCI_TABLE_TYPE_SX 0x0
-
-#define ZPCI_TABLE_LEN_RFX 0x3
-#define ZPCI_TABLE_LEN_RSX 0x3
-#define ZPCI_TABLE_LEN_RTX 0x3
-
-#define ZPCI_TABLE_OFFSET_MASK 0xc0
-#define ZPCI_TABLE_SIZE 0x4000
-#define ZPCI_TABLE_ALIGN ZPCI_TABLE_SIZE
-#define ZPCI_TABLE_ENTRY_SIZE (sizeof(unsigned long))
-#define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
-
-#define ZPCI_TABLE_BITS 11
-#define ZPCI_PT_BITS 8
-#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT)
-#define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
-
-#define ZPCI_RTE_FLAG_MASK 0x3fffULL
-#define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK)
-#define ZPCI_STE_FLAG_MASK 0x7ffULL
-#define ZPCI_STE_ADDR_MASK (~ZPCI_STE_FLAG_MASK)
-
-#define ZPCI_SFAA_MASK (~((1ULL << 20) - 1))
-
-/* I/O Page tables */
-#define ZPCI_PTE_VALID_MASK 0x400
-#define ZPCI_PTE_INVALID 0x400
-#define ZPCI_PTE_VALID 0x000
-#define ZPCI_PT_SIZE 0x800
-#define ZPCI_PT_ALIGN ZPCI_PT_SIZE
-#define ZPCI_PT_ENTRIES (ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE)
-#define ZPCI_PT_MASK (ZPCI_PT_ENTRIES - 1)
-
-#define ZPCI_PTE_FLAG_MASK 0xfffULL
-#define ZPCI_PTE_ADDR_MASK (~ZPCI_PTE_FLAG_MASK)
-
-/* Shared bits */
-#define ZPCI_TABLE_VALID 0x00
-#define ZPCI_TABLE_INVALID 0x20
-#define ZPCI_TABLE_PROTECTED 0x200
-#define ZPCI_TABLE_UNPROTECTED 0x000
-#define ZPCI_TABLE_FC 0x400
-
-#define ZPCI_TABLE_VALID_MASK 0x20
-#define ZPCI_TABLE_PROT_MASK 0x200
-
-#define ZPCI_ETT_RT 1
-#define ZPCI_ETT_ST 0
-#define ZPCI_ETT_PT -1
-
-/* PCI Function States
- *
- * reserved: default; device has just been plugged or is in progress of being
- * unplugged
- * standby: device is present but not configured; transition from any
- * configured state/to this state via sclp configure/deconfigure
- *
- * The following states make up the "configured" meta-state:
- * disabled: device is configured but not enabled; transition between this
- * state and enabled via clp enable/disable
- * enbaled: device is ready for use; transition to disabled via clp disable;
- * may enter an error state
- * blocked: ignore all DMA and interrupts; transition back to enabled or from
- * error state via mpcifc
- * error: an error occurred; transition back to enabled via mpcifc
- * permanent error: an unrecoverable error occurred; transition to standby via
- * sclp deconfigure
- */
-typedef enum {
- ZPCI_FS_RESERVED,
- ZPCI_FS_STANDBY,
- ZPCI_FS_DISABLED,
- ZPCI_FS_ENABLED,
- ZPCI_FS_BLOCKED,
- ZPCI_FS_ERROR,
- ZPCI_FS_PERMANENT_ERROR,
-} ZpciState;
-
-typedef struct SeiContainer {
- QTAILQ_ENTRY(SeiContainer) link;
- uint32_t fid;
- uint32_t fh;
- uint8_t cc;
- uint16_t pec;
- uint64_t faddr;
- uint32_t e;
-} SeiContainer;
-
-typedef struct PciCcdfErr {
- uint32_t reserved1;
- uint32_t fh;
- uint32_t fid;
- uint32_t e;
- uint64_t faddr;
- uint32_t reserved3;
- uint16_t reserved4;
- uint16_t pec;
-} QEMU_PACKED PciCcdfErr;
-
-typedef struct PciCcdfAvail {
- uint32_t reserved1;
- uint32_t fh;
- uint32_t fid;
- uint32_t reserved2;
- uint32_t reserved3;
- uint32_t reserved4;
- uint32_t reserved5;
- uint16_t reserved6;
- uint16_t pec;
-} QEMU_PACKED PciCcdfAvail;
-
-typedef struct ChscSeiNt2Res {
- uint16_t length;
- uint16_t code;
- uint16_t reserved1;
- uint8_t reserved2;
- uint8_t nt;
- uint8_t flags;
- uint8_t reserved3;
- uint8_t reserved4;
- uint8_t cc;
- uint32_t reserved5[13];
- uint8_t ccdf[4016];
-} QEMU_PACKED ChscSeiNt2Res;
-
-typedef struct S390MsixInfo {
- uint8_t table_bar;
- uint8_t pba_bar;
- uint16_t entries;
- uint32_t table_offset;
- uint32_t pba_offset;
-} S390MsixInfo;
-
-typedef struct S390IOTLBEntry {
- uint64_t iova;
- uint64_t translated_addr;
- uint64_t len;
- uint64_t perm;
-} S390IOTLBEntry;
-
-struct S390PCIIOMMU {
- Object parent_obj;
- S390PCIBusDevice *pbdev;
- AddressSpace as;
- MemoryRegion mr;
- IOMMUMemoryRegion iommu_mr;
- bool enabled;
- uint64_t g_iota;
- uint64_t pba;
- uint64_t pal;
- GHashTable *iotlb;
-};
-
-typedef struct S390PCIIOMMUTable {
- uint64_t key;
- S390PCIIOMMU *iommu[PCI_SLOT_MAX];
-} S390PCIIOMMUTable;
-
-/* Function Measurement Block */
-#define DEFAULT_MUI 4000
-#define UPDATE_U_BIT 0x1ULL
-#define FMBK_MASK 0xfULL
-
-typedef struct ZpciFmbFmt0 {
- uint64_t dma_rbytes;
- uint64_t dma_wbytes;
-} ZpciFmbFmt0;
-
-#define ZPCI_FMB_CNT_LD 0
-#define ZPCI_FMB_CNT_ST 1
-#define ZPCI_FMB_CNT_STB 2
-#define ZPCI_FMB_CNT_RPCIT 3
-#define ZPCI_FMB_CNT_MAX 4
-
-#define ZPCI_FMB_FORMAT 0
-
-typedef struct ZpciFmb {
- uint32_t format;
- uint32_t sample;
- uint64_t last_update;
- uint64_t counter[ZPCI_FMB_CNT_MAX];
- ZpciFmbFmt0 fmt0;
-} ZpciFmb;
-QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb");
-
-struct S390PCIBusDevice {
- DeviceState qdev;
- PCIDevice *pdev;
- ZpciState state;
- char *target;
- uint16_t uid;
- uint32_t idx;
- uint32_t fh;
- uint32_t fid;
- bool fid_defined;
- uint64_t fmb_addr;
- ZpciFmb fmb;
- QEMUTimer *fmb_timer;
- uint8_t isc;
- uint16_t noi;
- uint16_t maxstbl;
- uint8_t sum;
- S390MsixInfo msix;
- AdapterRoutes routes;
- S390PCIIOMMU *iommu;
- MemoryRegion msix_notify_mr;
- IndAddr *summary_ind;
- IndAddr *indicator;
- bool pci_unplug_request_processed;
- bool unplug_requested;
- QTAILQ_ENTRY(S390PCIBusDevice) link;
-};
-
-struct S390PCIBus {
- BusState qbus;
-};
-
-struct S390pciState {
- PCIHostState parent_obj;
- uint32_t next_idx;
- int bus_no;
- S390PCIBus *bus;
- GHashTable *iommu_table;
- GHashTable *zpci_table;
- QTAILQ_HEAD(, SeiContainer) pending_sei;
- QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs;
-};
-
-S390pciState *s390_get_phb(void);
-int pci_chsc_sei_nt2_get_event(void *res);
-int pci_chsc_sei_nt2_have_event(void);
-void s390_pci_sclp_configure(SCCB *sccb);
-void s390_pci_sclp_deconfigure(SCCB *sccb);
-void s390_pci_iommu_enable(S390PCIIOMMU *iommu);
-void s390_pci_iommu_disable(S390PCIIOMMU *iommu);
-void s390_pci_generate_error_event(uint16_t pec, uint32_t fh, uint32_t fid,
- uint64_t faddr, uint32_t e);
-uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
- S390IOTLBEntry *entry);
-S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, uint32_t idx);
-S390PCIBusDevice *s390_pci_find_dev_by_fh(S390pciState *s, uint32_t fh);
-S390PCIBusDevice *s390_pci_find_dev_by_fid(S390pciState *s, uint32_t fid);
-S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
- const char *target);
-S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s,
- S390PCIBusDevice *pbdev);
-
-#endif
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 2f7a7d7bd1..58cd041d17 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -13,12 +13,12 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "s390-pci-inst.h"
-#include "s390-pci-bus.h"
#include "exec/memop.h"
#include "exec/memory-internal.h"
#include "qemu/error-report.h"
#include "sysemu/hw_accel.h"
+#include "hw/s390x/s390-pci-inst.h"
+#include "hw/s390x/s390-pci-bus.h"
#include "hw/s390x/tod.h"
#ifndef DEBUG_S390PCI_INST
@@ -32,6 +32,20 @@
} \
} while (0)
+static inline void inc_dma_avail(S390PCIIOMMU *iommu)
+{
+ if (iommu->dma_limit) {
+ iommu->dma_limit->avail++;
+ }
+}
+
+static inline void dec_dma_avail(S390PCIIOMMU *iommu)
+{
+ if (iommu->dma_limit) {
+ iommu->dma_limit->avail--;
+ }
+}
+
static void s390_set_status_code(CPUS390XState *env,
uint8_t r, uint64_t status_code)
{
@@ -267,6 +281,8 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
goto out;
}
+ memcpy(resquery, &pbdev->zpci_fn, sizeof(*resquery));
+
for (i = 0; i < PCI_BAR_COUNT; i++) {
uint32_t data = pci_get_long(pbdev->pdev->config +
PCI_BASE_ADDRESS_0 + (i * 4));
@@ -280,25 +296,23 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
resquery->bar_size[i]);
}
- stq_p(&resquery->sdma, ZPCI_SDMA_ADDR);
- stq_p(&resquery->edma, ZPCI_EDMA_ADDR);
- stl_p(&resquery->fid, pbdev->fid);
- stw_p(&resquery->pchid, 0);
- stw_p(&resquery->ug, 1);
- stl_p(&resquery->uid, pbdev->uid);
stw_p(&resquery->hdr.rsp, CLP_RC_OK);
break;
}
case CLP_QUERY_PCI_FNGRP: {
ClpRspQueryPciGrp *resgrp = (ClpRspQueryPciGrp *)resh;
- resgrp->fr = 1;
- stq_p(&resgrp->dasm, 0);
- stq_p(&resgrp->msia, ZPCI_MSI_ADDR);
- stw_p(&resgrp->mui, DEFAULT_MUI);
- stw_p(&resgrp->i, 128);
- stw_p(&resgrp->maxstbl, 128);
- resgrp->version = 0;
+ ClpReqQueryPciGrp *reqgrp = (ClpReqQueryPciGrp *)reqh;
+ S390PCIGroup *group;
+
+ group = s390_group_find(reqgrp->g);
+ if (!group) {
+ /* We do not allow access to unknown groups */
+ /* The group must have been obtained with a vfio device */
+ stw_p(&resgrp->hdr.rsp, CLP_RC_QUERYPCIFG_PFGID);
+ goto out;
+ }
+ memcpy(resgrp, &group->zpci_group, sizeof(ClpRspQueryPciGrp));
stw_p(&resgrp->hdr.rsp, CLP_RC_OK);
break;
}
@@ -572,7 +586,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
return 0;
}
-static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
+static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
+ S390IOTLBEntry *entry)
{
S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova);
IOMMUTLBEntry notify = {
@@ -585,14 +600,15 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
if (entry->perm == IOMMU_NONE) {
if (!cache) {
- return;
+ goto out;
}
g_hash_table_remove(iommu->iotlb, &entry->iova);
+ inc_dma_avail(iommu);
} else {
if (cache) {
if (cache->perm == entry->perm &&
cache->translated_addr == entry->translated_addr) {
- return;
+ goto out;
}
notify.perm = IOMMU_NONE;
@@ -606,9 +622,13 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
cache->len = PAGE_SIZE;
cache->perm = entry->perm;
g_hash_table_replace(iommu->iotlb, &cache->iova, cache);
+ dec_dma_avail(iommu);
}
memory_region_notify_iommu(&iommu->iommu_mr, 0, notify);
+
+out:
+ return iommu->dma_limit ? iommu->dma_limit->avail : 1;
}
int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
@@ -620,6 +640,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
S390PCIIOMMU *iommu;
S390IOTLBEntry entry;
hwaddr start, end;
+ uint32_t dma_avail;
if (env->psw.mask & PSW_MASK_PSTATE) {
s390_program_interrupt(env, PGM_PRIVILEGED, ra);
@@ -658,6 +679,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
}
iommu = pbdev->iommu;
+ if (iommu->dma_limit) {
+ dma_avail = iommu->dma_limit->avail;
+ } else {
+ dma_avail = 1;
+ }
if (!iommu->g_iota) {
error = ERR_EVENT_INVALAS;
goto err;
@@ -675,8 +701,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
}
start += entry.len;
- while (entry.iova < start && entry.iova < end) {
- s390_pci_update_iotlb(iommu, &entry);
+ while (entry.iova < start && entry.iova < end &&
+ (dma_avail > 0 || entry.perm == IOMMU_NONE)) {
+ dma_avail = s390_pci_update_iotlb(iommu, &entry);
entry.iova += PAGE_SIZE;
entry.translated_addr += PAGE_SIZE;
}
@@ -689,7 +716,13 @@ err:
s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
} else {
pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++;
- setcc(cpu, ZPCI_PCI_LS_OK);
+ if (dma_avail > 0) {
+ setcc(cpu, ZPCI_PCI_LS_OK);
+ } else {
+ /* vfio DMA mappings are exhausted, trigger a RPCIT */
+ setcc(cpu, ZPCI_PCI_LS_ERR);
+ s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES);
+ }
}
return 0;
}
@@ -754,7 +787,8 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
}
/* Length must be greater than 8, a multiple of 8 */
/* and not greater than maxstbl */
- if ((len <= 8) || (len % 8) || (len > pbdev->maxstbl)) {
+ if ((len <= 8) || (len % 8) ||
+ (len > pbdev->pci_group->zpci_group.maxstbl)) {
goto specification_error;
}
/* Do not cross a 4K-byte boundary */
diff --git a/hw/s390x/s390-pci-inst.h b/hw/s390x/s390-pci-inst.h
deleted file mode 100644
index fa3bf8b5aa..0000000000
--- a/hw/s390x/s390-pci-inst.h
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * s390 PCI instruction definitions
- *
- * Copyright 2014 IBM Corp.
- * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
- * Hong Bo Li <lihbbj@cn.ibm.com>
- * Yi Min Zhao <zyimin@cn.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or (at
- * your option) any later version. See the COPYING file in the top-level
- * directory.
- */
-
-#ifndef HW_S390_PCI_INST_H
-#define HW_S390_PCI_INST_H
-
-#include "s390-pci-bus.h"
-#include "sysemu/dma.h"
-
-/* CLP common request & response block size */
-#define CLP_BLK_SIZE 4096
-#define PCI_BAR_COUNT 6
-#define PCI_MAX_FUNCTIONS 4096
-
-typedef struct ClpReqHdr {
- uint16_t len;
- uint16_t cmd;
-} QEMU_PACKED ClpReqHdr;
-
-typedef struct ClpRspHdr {
- uint16_t len;
- uint16_t rsp;
-} QEMU_PACKED ClpRspHdr;
-
-/* CLP Response Codes */
-#define CLP_RC_OK 0x0010 /* Command request successfully */
-#define CLP_RC_CMD 0x0020 /* Command code not recognized */
-#define CLP_RC_PERM 0x0030 /* Command not authorized */
-#define CLP_RC_FMT 0x0040 /* Invalid command request format */
-#define CLP_RC_LEN 0x0050 /* Invalid command request length */
-#define CLP_RC_8K 0x0060 /* Command requires 8K LPCB */
-#define CLP_RC_RESNOT0 0x0070 /* Reserved field not zero */
-#define CLP_RC_NODATA 0x0080 /* No data available */
-#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */
-
-/*
- * Call Logical Processor - Command Codes
- */
-#define CLP_LIST_PCI 0x0002
-#define CLP_QUERY_PCI_FN 0x0003
-#define CLP_QUERY_PCI_FNGRP 0x0004
-#define CLP_SET_PCI_FN 0x0005
-
-/* PCI function handle list entry */
-typedef struct ClpFhListEntry {
- uint16_t device_id;
- uint16_t vendor_id;
-#define CLP_FHLIST_MASK_CONFIG 0x80000000
- uint32_t config;
- uint32_t fid;
- uint32_t fh;
-} QEMU_PACKED ClpFhListEntry;
-
-#define CLP_RC_SETPCIFN_FH 0x0101 /* Invalid PCI fn handle */
-#define CLP_RC_SETPCIFN_FHOP 0x0102 /* Fn handle not valid for op */
-#define CLP_RC_SETPCIFN_DMAAS 0x0103 /* Invalid DMA addr space */
-#define CLP_RC_SETPCIFN_RES 0x0104 /* Insufficient resources */
-#define CLP_RC_SETPCIFN_ALRDY 0x0105 /* Fn already in requested state */
-#define CLP_RC_SETPCIFN_ERR 0x0106 /* Fn in permanent error state */
-#define CLP_RC_SETPCIFN_RECPND 0x0107 /* Error recovery pending */
-#define CLP_RC_SETPCIFN_BUSY 0x0108 /* Fn busy */
-#define CLP_RC_LISTPCI_BADRT 0x010a /* Resume token not recognized */
-#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */
-
-/* request or response block header length */
-#define LIST_PCI_HDR_LEN 32
-
-/* Number of function handles fitting in response block */
-#define CLP_FH_LIST_NR_ENTRIES \
- ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \
- / sizeof(ClpFhListEntry))
-
-#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */
-#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */
-
-#define CLP_UTIL_STR_LEN 64
-
-#define CLP_MASK_FMT 0xf0000000
-
-/* List PCI functions request */
-typedef struct ClpReqListPci {
- ClpReqHdr hdr;
- uint32_t fmt;
- uint64_t reserved1;
- uint64_t resume_token;
- uint64_t reserved2;
-} QEMU_PACKED ClpReqListPci;
-
-/* List PCI functions response */
-typedef struct ClpRspListPci {
- ClpRspHdr hdr;
- uint32_t fmt;
- uint64_t reserved1;
- uint64_t resume_token;
- uint32_t mdd;
- uint16_t max_fn;
- uint8_t flags;
- uint8_t entry_size;
- ClpFhListEntry fh_list[CLP_FH_LIST_NR_ENTRIES];
-} QEMU_PACKED ClpRspListPci;
-
-/* Query PCI function request */
-typedef struct ClpReqQueryPci {
- ClpReqHdr hdr;
- uint32_t fmt;
- uint64_t reserved1;
- uint32_t fh; /* function handle */
- uint32_t reserved2;
- uint64_t reserved3;
-} QEMU_PACKED ClpReqQueryPci;
-
-/* Query PCI function response */
-typedef struct ClpRspQueryPci {
- ClpRspHdr hdr;
- uint32_t fmt;
- uint64_t reserved1;
- uint16_t vfn; /* virtual fn number */
-#define CLP_RSP_QPCI_MASK_UTIL 0x100
-#define CLP_RSP_QPCI_MASK_PFGID 0xff
- uint16_t ug;
- uint32_t fid; /* pci function id */
- uint8_t bar_size[PCI_BAR_COUNT];
- uint16_t pchid;
- uint32_t bar[PCI_BAR_COUNT];
- uint64_t reserved2;
- uint64_t sdma; /* start dma as */
- uint64_t edma; /* end dma as */
- uint32_t reserved3[11];
- uint32_t uid;
- uint8_t util_str[CLP_UTIL_STR_LEN]; /* utility string */
-} QEMU_PACKED ClpRspQueryPci;
-
-/* Query PCI function group request */
-typedef struct ClpReqQueryPciGrp {
- ClpReqHdr hdr;
- uint32_t fmt;
- uint64_t reserved1;
-#define CLP_REQ_QPCIG_MASK_PFGID 0xff
- uint32_t g;
- uint32_t reserved2;
- uint64_t reserved3;
-} QEMU_PACKED ClpReqQueryPciGrp;
-
-/* Query PCI function group response */
-typedef struct ClpRspQueryPciGrp {
- ClpRspHdr hdr;
- uint32_t fmt;
- uint64_t reserved1;
-#define CLP_RSP_QPCIG_MASK_NOI 0xfff
- uint16_t i;
- uint8_t version;
-#define CLP_RSP_QPCIG_MASK_FRAME 0x2
-#define CLP_RSP_QPCIG_MASK_REFRESH 0x1
- uint8_t fr;
- uint16_t maxstbl;
- uint16_t mui;
- uint64_t reserved3;
- uint64_t dasm; /* dma address space mask */
- uint64_t msia; /* MSI address */
- uint64_t reserved4;
- uint64_t reserved5;
-} QEMU_PACKED ClpRspQueryPciGrp;
-
-/* Set PCI function request */
-typedef struct ClpReqSetPci {
- ClpReqHdr hdr;
- uint32_t fmt;
- uint64_t reserved1;
- uint32_t fh; /* function handle */
- uint16_t reserved2;
- uint8_t oc; /* operation controls */
- uint8_t ndas; /* number of dma spaces */
- uint64_t reserved3;
-} QEMU_PACKED ClpReqSetPci;
-
-/* Set PCI function response */
-typedef struct ClpRspSetPci {
- ClpRspHdr hdr;
- uint32_t fmt;
- uint64_t reserved1;
- uint32_t fh; /* function handle */
- uint32_t reserved3;
- uint64_t reserved4;
-} QEMU_PACKED ClpRspSetPci;
-
-typedef struct ClpReqRspListPci {
- ClpReqListPci request;
- ClpRspListPci response;
-} QEMU_PACKED ClpReqRspListPci;
-
-typedef struct ClpReqRspSetPci {
- ClpReqSetPci request;
- ClpRspSetPci response;
-} QEMU_PACKED ClpReqRspSetPci;
-
-typedef struct ClpReqRspQueryPci {
- ClpReqQueryPci request;
- ClpRspQueryPci response;
-} QEMU_PACKED ClpReqRspQueryPci;
-
-typedef struct ClpReqRspQueryPciGrp {
- ClpReqQueryPciGrp request;
- ClpRspQueryPciGrp response;
-} QEMU_PACKED ClpReqRspQueryPciGrp;
-
-/* Load/Store status codes */
-#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4
-#define ZPCI_PCI_ST_FUNC_IN_ERR 8
-#define ZPCI_PCI_ST_BLOCKED 12
-#define ZPCI_PCI_ST_INSUF_RES 16
-#define ZPCI_PCI_ST_INVAL_AS 20
-#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24
-#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28
-#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36
-#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40
-#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44
-
-/* Load/Store return codes */
-#define ZPCI_PCI_LS_OK 0
-#define ZPCI_PCI_LS_ERR 1
-#define ZPCI_PCI_LS_BUSY 2
-#define ZPCI_PCI_LS_INVAL_HANDLE 3
-
-/* Modify PCI status codes */
-#define ZPCI_MOD_ST_RES_NOT_AVAIL 4
-#define ZPCI_MOD_ST_INSUF_RES 16
-#define ZPCI_MOD_ST_SEQUENCE 24
-#define ZPCI_MOD_ST_DMAAS_INVAL 28
-#define ZPCI_MOD_ST_FRAME_INVAL 32
-#define ZPCI_MOD_ST_ERROR_RECOVER 40
-
-/* Modify PCI Function Controls */
-#define ZPCI_MOD_FC_REG_INT 2
-#define ZPCI_MOD_FC_DEREG_INT 3
-#define ZPCI_MOD_FC_REG_IOAT 4
-#define ZPCI_MOD_FC_DEREG_IOAT 5
-#define ZPCI_MOD_FC_REREG_IOAT 6
-#define ZPCI_MOD_FC_RESET_ERROR 7
-#define ZPCI_MOD_FC_RESET_BLOCK 9
-#define ZPCI_MOD_FC_SET_MEASURE 10
-
-/* Store PCI Function Controls status codes */
-#define ZPCI_STPCIFC_ST_PERM_ERROR 8
-#define ZPCI_STPCIFC_ST_INVAL_DMAAS 28
-#define ZPCI_STPCIFC_ST_ERROR_RECOVER 40
-
-/* FIB function controls */
-#define ZPCI_FIB_FC_ENABLED 0x80
-#define ZPCI_FIB_FC_ERROR 0x40
-#define ZPCI_FIB_FC_LS_BLOCKED 0x20
-#define ZPCI_FIB_FC_DMAAS_REG 0x10
-
-/* FIB function controls */
-#define ZPCI_FIB_FC_ENABLED 0x80
-#define ZPCI_FIB_FC_ERROR 0x40
-#define ZPCI_FIB_FC_LS_BLOCKED 0x20
-#define ZPCI_FIB_FC_DMAAS_REG 0x10
-
-/* Function Information Block */
-typedef struct ZpciFib {
- uint8_t fmt; /* format */
- uint8_t reserved1[7];
- uint8_t fc; /* function controls */
- uint8_t reserved2;
- uint16_t reserved3;
- uint32_t reserved4;
- uint64_t pba; /* PCI base address */
- uint64_t pal; /* PCI address limit */
- uint64_t iota; /* I/O Translation Anchor */
-#define FIB_DATA_ISC(x) (((x) >> 28) & 0x7)
-#define FIB_DATA_NOI(x) (((x) >> 16) & 0xfff)
-#define FIB_DATA_AIBVO(x) (((x) >> 8) & 0x3f)
-#define FIB_DATA_SUM(x) (((x) >> 7) & 0x1)
-#define FIB_DATA_AISBO(x) ((x) & 0x3f)
- uint32_t data;
- uint32_t reserved5;
- uint64_t aibv; /* Adapter int bit vector address */
- uint64_t aisb; /* Adapter int summary bit address */
- uint64_t fmb_addr; /* Function measurement address and key */
- uint32_t reserved6;
- uint32_t gd;
-} QEMU_PACKED ZpciFib;
-
-int pci_dereg_irqs(S390PCIBusDevice *pbdev);
-void pci_dereg_ioat(S390PCIIOMMU *iommu);
-int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra);
-int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
-int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
-int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
-int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
- uint8_t ar, uintptr_t ra);
-int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar,
- uintptr_t ra);
-int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar,
- uintptr_t ra);
-void fmb_timer_free(S390PCIBusDevice *pbdev);
-
-#define ZPCI_IO_BAR_MIN 0
-#define ZPCI_IO_BAR_MAX 5
-#define ZPCI_CONFIG_BAR 15
-
-#endif
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
new file mode 100644
index 0000000000..d5c78063b5
--- /dev/null
+++ b/hw/s390x/s390-pci-vfio.c
@@ -0,0 +1,276 @@
+/*
+ * s390 vfio-pci interfaces
+ *
+ * Copyright 2020 IBM Corp.
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+#include <linux/vfio_zdev.h>
+
+#include "qemu/osdep.h"
+#include "trace.h"
+#include "hw/s390x/s390-pci-bus.h"
+#include "hw/s390x/s390-pci-clp.h"
+#include "hw/s390x/s390-pci-vfio.h"
+#include "hw/vfio/pci.h"
+#include "hw/vfio/vfio-common.h"
+
+/*
+ * Get the current DMA available count from vfio. Returns true if vfio is
+ * limiting DMA requests, false otherwise. The current available count read
+ * from vfio is returned in avail.
+ */
+bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
+{
+ g_autofree struct vfio_iommu_type1_info *info;
+ uint32_t argsz;
+
+ assert(avail);
+
+ argsz = sizeof(struct vfio_iommu_type1_info);
+ info = g_malloc0(argsz);
+
+ /*
+ * If the specified argsz is not large enough to contain all capabilities
+ * it will be updated upon return from the ioctl. Retry until we have
+ * a big enough buffer to hold the entire capability chain.
+ */
+retry:
+ info->argsz = argsz;
+
+ if (ioctl(fd, VFIO_IOMMU_GET_INFO, info)) {
+ return false;
+ }
+
+ if (info->argsz > argsz) {
+ argsz = info->argsz;
+ info = g_realloc(info, argsz);
+ goto retry;
+ }
+
+ /* If the capability exists, update with the current value */
+ return vfio_get_info_dma_avail(info, avail);
+}
+
+S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
+ S390PCIBusDevice *pbdev)
+{
+ S390PCIDMACount *cnt;
+ uint32_t avail;
+ VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ int id;
+
+ assert(vpdev);
+
+ id = vpdev->vbasedev.group->container->fd;
+
+ if (!s390_pci_update_dma_avail(id, &avail)) {
+ return NULL;
+ }
+
+ QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) {
+ if (cnt->id == id) {
+ cnt->users++;
+ return cnt;
+ }
+ }
+
+ cnt = g_new0(S390PCIDMACount, 1);
+ cnt->id = id;
+ cnt->users = 1;
+ cnt->avail = avail;
+ QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
+ return cnt;
+}
+
+void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
+{
+ assert(cnt);
+
+ cnt->users--;
+ if (cnt->users == 0) {
+ QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link);
+ }
+}
+
+static void s390_pci_read_base(S390PCIBusDevice *pbdev,
+ struct vfio_device_info *info)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_device_info_cap_zpci_base *cap;
+ VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+
+ hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
+
+ /* If capability not provided, just leave the defaults in place */
+ if (hdr == NULL) {
+ trace_s390_pci_clp_cap(vpci->vbasedev.name,
+ VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
+ return;
+ }
+ cap = (void *) hdr;
+
+ pbdev->zpci_fn.sdma = cap->start_dma;
+ pbdev->zpci_fn.edma = cap->end_dma;
+ pbdev->zpci_fn.pchid = cap->pchid;
+ pbdev->zpci_fn.vfn = cap->vfn;
+ pbdev->zpci_fn.pfgid = cap->gid;
+ /* The following values remain 0 until we support other FMB formats */
+ pbdev->zpci_fn.fmbl = 0;
+ pbdev->zpci_fn.pft = 0;
+}
+
+static void s390_pci_read_group(S390PCIBusDevice *pbdev,
+ struct vfio_device_info *info)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_device_info_cap_zpci_group *cap;
+ ClpRspQueryPciGrp *resgrp;
+ VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+
+ hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
+
+ /* If capability not provided, just use the default group */
+ if (hdr == NULL) {
+ trace_s390_pci_clp_cap(vpci->vbasedev.name,
+ VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
+ pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
+ pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
+ return;
+ }
+ cap = (void *) hdr;
+
+ /* See if the PCI group is already defined, create if not */
+ pbdev->pci_group = s390_group_find(pbdev->zpci_fn.pfgid);
+
+ if (!pbdev->pci_group) {
+ pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid);
+
+ resgrp = &pbdev->pci_group->zpci_group;
+ if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) {
+ resgrp->fr = 1;
+ }
+ stq_p(&resgrp->dasm, cap->dasm);
+ stq_p(&resgrp->msia, cap->msi_addr);
+ stw_p(&resgrp->mui, cap->mui);
+ stw_p(&resgrp->i, cap->noi);
+ stw_p(&resgrp->maxstbl, cap->maxstbl);
+ stb_p(&resgrp->version, cap->version);
+ }
+}
+
+static void s390_pci_read_util(S390PCIBusDevice *pbdev,
+ struct vfio_device_info *info)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_device_info_cap_zpci_util *cap;
+ VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+
+ hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
+
+ /* If capability not provided, just leave the defaults in place */
+ if (hdr == NULL) {
+ trace_s390_pci_clp_cap(vpci->vbasedev.name,
+ VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
+ return;
+ }
+ cap = (void *) hdr;
+
+ if (cap->size > CLP_UTIL_STR_LEN) {
+ trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size,
+ VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
+ return;
+ }
+
+ pbdev->zpci_fn.flags |= CLP_RSP_QPCI_MASK_UTIL;
+ memcpy(pbdev->zpci_fn.util_str, cap->util_str, CLP_UTIL_STR_LEN);
+}
+
+static void s390_pci_read_pfip(S390PCIBusDevice *pbdev,
+ struct vfio_device_info *info)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_device_info_cap_zpci_pfip *cap;
+ VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+
+ hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
+
+ /* If capability not provided, just leave the defaults in place */
+ if (hdr == NULL) {
+ trace_s390_pci_clp_cap(vpci->vbasedev.name,
+ VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
+ return;
+ }
+ cap = (void *) hdr;
+
+ if (cap->size > CLP_PFIP_NR_SEGMENTS) {
+ trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size,
+ VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
+ return;
+ }
+
+ memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS);
+}
+
+/*
+ * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for
+ * capabilities that contain information about CLP features provided by the
+ * underlying host.
+ * On entry, defaults have already been placed into the guest CLP response
+ * buffers. On exit, defaults will have been overwritten for any CLP features
+ * found in the capability chain; defaults will remain for any CLP features not
+ * found in the chain.
+ */
+void s390_pci_get_clp_info(S390PCIBusDevice *pbdev)
+{
+ g_autofree struct vfio_device_info *info;
+ VFIOPCIDevice *vfio_pci;
+ uint32_t argsz;
+ int fd;
+
+ argsz = sizeof(*info);
+ info = g_malloc0(argsz);
+
+ vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ fd = vfio_pci->vbasedev.fd;
+
+ /*
+ * If the specified argsz is not large enough to contain all capabilities
+ * it will be updated upon return from the ioctl. Retry until we have
+ * a big enough buffer to hold the entire capability chain. On error,
+ * just exit and rely on CLP defaults.
+ */
+retry:
+ info->argsz = argsz;
+
+ if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) {
+ trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name);
+ return;
+ }
+
+ if (info->argsz > argsz) {
+ argsz = info->argsz;
+ info = g_realloc(info, argsz);
+ goto retry;
+ }
+
+ /*
+ * Find the CLP features provided and fill in the guest CLP responses.
+ * Always call s390_pci_read_base first as information from this could
+ * determine which function group is used in s390_pci_read_group.
+ * For any feature not found, the default values will remain in the CLP
+ * response.
+ */
+ s390_pci_read_base(pbdev, info);
+ s390_pci_read_group(pbdev, info);
+ s390_pci_read_util(pbdev, info);
+ s390_pci_read_pfip(pbdev, info);
+
+ return;
+}
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 2e900335ea..22222c4fd5 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -28,7 +28,7 @@
#include "qemu/error-report.h"
#include "qemu/option.h"
#include "qemu/qemu-print.h"
-#include "s390-pci-bus.h"
+#include "hw/s390x/s390-pci-bus.h"
#include "sysemu/reset.h"
#include "hw/s390x/storage-keys.h"
#include "hw/s390x/storage-attributes.h"
diff --git a/hw/s390x/trace-events b/hw/s390x/trace-events
index 0dc5b818c4..8156693749 100644
--- a/hw/s390x/trace-events
+++ b/hw/s390x/trace-events
@@ -14,3 +14,8 @@ css_do_sic(uint16_t mode, uint8_t isc) "CSS: set interruption mode 0x%x on isc 0
virtio_ccw_interpret_ccw(int cssid, int ssid, int schid, int cmd_code) "VIRTIO-CCW: %x.%x.%04x: interpret command 0x%x"
virtio_ccw_new_device(int cssid, int ssid, int schid, int devno, const char *devno_mode) "VIRTIO-CCW: add subchannel %x.%x.%04x, devno 0x%04x (%s)"
virtio_ccw_set_ind(uint64_t ind_loc, uint8_t ind_old, uint8_t ind_new) "VIRTIO-CCW: indicator at %" PRIu64 ": 0x%x->0x%x"
+
+# s390-pci-vfio.c
+s390_pci_clp_cap(const char *id, uint32_t cap) "PCI: %s: missing expected CLP capability %u"
+s390_pci_clp_cap_size(const char *id, uint32_t size, uint32_t cap) "PCI: %s: bad size (%u) for CLP capability %u"
+s390_pci_clp_dev_info(const char *id) "PCI: %s: cannot read vfio device info"
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 13471ae294..e18ea2cf91 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -29,6 +29,7 @@
#include "hw/vfio/vfio.h"
#include "exec/address-spaces.h"
#include "exec/memory.h"
+#include "exec/ram_addr.h"
#include "hw/hw.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
@@ -37,6 +38,7 @@
#include "sysemu/reset.h"
#include "trace.h"
#include "qapi/error.h"
+#include "migration/migration.h"
VFIOGroupList vfio_group_list =
QLIST_HEAD_INITIALIZER(vfio_group_list);
@@ -203,7 +205,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
buf.qword = cpu_to_le64(data);
break;
default:
- hw_error("vfio: unsupported write size, %d bytes", size);
+ hw_error("vfio: unsupported write size, %u bytes", size);
break;
}
@@ -260,7 +262,7 @@ uint64_t vfio_region_read(void *opaque,
data = le64_to_cpu(buf.qword);
break;
default:
- hw_error("vfio: unsupported read size, %d bytes", size);
+ hw_error("vfio: unsupported read size, %u bytes", size);
break;
}
@@ -287,10 +289,146 @@ const MemoryRegionOps vfio_region_ops = {
};
/*
+ * Device state interfaces
+ */
+
+bool vfio_mig_active(void)
+{
+ VFIOGroup *group;
+ VFIODevice *vbasedev;
+
+ if (QLIST_EMPTY(&vfio_group_list)) {
+ return false;
+ }
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
+ if (vbasedev->migration_blocker) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container)
+{
+ VFIOGroup *group;
+ VFIODevice *vbasedev;
+ MigrationState *ms = migrate_get_current();
+
+ if (!migration_is_setup_or_active(ms->state)) {
+ return false;
+ }
+
+ QLIST_FOREACH(group, &container->group_list, container_next) {
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
+ VFIOMigration *migration = vbasedev->migration;
+
+ if (!migration) {
+ return false;
+ }
+
+ if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) &&
+ !(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) {
+ continue;
+ } else {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static bool vfio_devices_all_running_and_saving(VFIOContainer *container)
+{
+ VFIOGroup *group;
+ VFIODevice *vbasedev;
+ MigrationState *ms = migrate_get_current();
+
+ if (!migration_is_setup_or_active(ms->state)) {
+ return false;
+ }
+
+ QLIST_FOREACH(group, &container->group_list, container_next) {
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
+ VFIOMigration *migration = vbasedev->migration;
+
+ if (!migration) {
+ return false;
+ }
+
+ if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) &&
+ (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) {
+ continue;
+ } else {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static int vfio_dma_unmap_bitmap(VFIOContainer *container,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb)
+{
+ struct vfio_iommu_type1_dma_unmap *unmap;
+ struct vfio_bitmap *bitmap;
+ uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS;
+ int ret;
+
+ unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
+
+ unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
+ unmap->iova = iova;
+ unmap->size = size;
+ unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
+ bitmap = (struct vfio_bitmap *)&unmap->data;
+
+ /*
+ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
+ * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to
+ * TARGET_PAGE_SIZE.
+ */
+
+ bitmap->pgsize = TARGET_PAGE_SIZE;
+ bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
+ BITS_PER_BYTE;
+
+ if (bitmap->size > container->max_dirty_bitmap_size) {
+ error_report("UNMAP: Size of bitmap too big 0x%"PRIx64,
+ (uint64_t)bitmap->size);
+ ret = -E2BIG;
+ goto unmap_exit;
+ }
+
+ bitmap->data = g_try_malloc0(bitmap->size);
+ if (!bitmap->data) {
+ ret = -ENOMEM;
+ goto unmap_exit;
+ }
+
+ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+ if (!ret) {
+ cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
+ iotlb->translated_addr, pages);
+ } else {
+ error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
+ }
+
+ g_free(bitmap->data);
+unmap_exit:
+ g_free(unmap);
+ return ret;
+}
+
+/*
* DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
*/
static int vfio_dma_unmap(VFIOContainer *container,
- hwaddr iova, ram_addr_t size)
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb)
{
struct vfio_iommu_type1_dma_unmap unmap = {
.argsz = sizeof(unmap),
@@ -299,6 +437,11 @@ static int vfio_dma_unmap(VFIOContainer *container,
.size = size,
};
+ if (iotlb && container->dirty_pages_supported &&
+ vfio_devices_all_running_and_saving(container)) {
+ return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
+ }
+
while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
/*
* The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
@@ -346,7 +489,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
* the VGA ROM space.
*/
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
- (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
+ (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
return 0;
}
@@ -407,8 +550,8 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
}
/* Called with rcu_read_lock held. */
-static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
- bool *read_only)
+static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
+ ram_addr_t *ram_addr, bool *read_only)
{
MemoryRegion *mr;
hwaddr xlat;
@@ -439,8 +582,17 @@ static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
return false;
}
- *vaddr = memory_region_get_ram_ptr(mr) + xlat;
- *read_only = !writable || mr->readonly;
+ if (vaddr) {
+ *vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ }
+
+ if (ram_addr) {
+ *ram_addr = memory_region_get_ram_addr(mr) + xlat;
+ }
+
+ if (read_only) {
+ *read_only = !writable || mr->readonly;
+ }
return true;
}
@@ -450,7 +602,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
VFIOContainer *container = giommu->container;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
- bool read_only;
void *vaddr;
int ret;
@@ -466,7 +617,9 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
rcu_read_lock();
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
- if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
+ bool read_only;
+
+ if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) {
goto out;
}
/*
@@ -486,7 +639,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
iotlb->addr_mask + 1, vaddr, ret);
}
} else {
- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
+ ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb);
if (ret) {
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx") = %d (%m)",
@@ -789,7 +942,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
}
if (try_unmap) {
- ret = vfio_dma_unmap(container, iova, int128_get64(llsize));
+ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
if (ret) {
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx") = %d (%m)",
@@ -812,9 +965,156 @@ static void vfio_listener_region_del(MemoryListener *listener,
}
}
+static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
+ uint64_t size, ram_addr_t ram_addr)
+{
+ struct vfio_iommu_type1_dirty_bitmap *dbitmap;
+ struct vfio_iommu_type1_dirty_bitmap_get *range;
+ uint64_t pages;
+ int ret;
+
+ dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
+
+ dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
+ dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
+ range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
+ range->iova = iova;
+ range->size = size;
+
+ /*
+ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
+ * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap's pgsize to
+ * TARGET_PAGE_SIZE.
+ */
+ range->bitmap.pgsize = TARGET_PAGE_SIZE;
+
+ pages = TARGET_PAGE_ALIGN(range->size) >> TARGET_PAGE_BITS;
+ range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
+ BITS_PER_BYTE;
+ range->bitmap.data = g_try_malloc0(range->bitmap.size);
+ if (!range->bitmap.data) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
+ if (ret) {
+ error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
+ " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
+ (uint64_t)range->size, errno);
+ goto err_out;
+ }
+
+ cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data,
+ ram_addr, pages);
+
+ trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size,
+ range->bitmap.size, ram_addr);
+err_out:
+ g_free(range->bitmap.data);
+ g_free(dbitmap);
+
+ return ret;
+}
+
+typedef struct {
+ IOMMUNotifier n;
+ VFIOGuestIOMMU *giommu;
+} vfio_giommu_dirty_notifier;
+
+static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+ vfio_giommu_dirty_notifier *gdn = container_of(n,
+ vfio_giommu_dirty_notifier, n);
+ VFIOGuestIOMMU *giommu = gdn->giommu;
+ VFIOContainer *container = giommu->container;
+ hwaddr iova = iotlb->iova + giommu->iommu_offset;
+ ram_addr_t translated_addr;
+
+ trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
+
+ if (iotlb->target_as != &address_space_memory) {
+ error_report("Wrong target AS \"%s\", only system memory is allowed",
+ iotlb->target_as->name ? iotlb->target_as->name : "none");
+ return;
+ }
+
+ rcu_read_lock();
+ if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
+ int ret;
+
+ ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
+ translated_addr);
+ if (ret) {
+ error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx") = %d (%m)",
+ container, iova,
+ iotlb->addr_mask + 1, ret);
+ }
+ }
+ rcu_read_unlock();
+}
+
+static int vfio_sync_dirty_bitmap(VFIOContainer *container,
+ MemoryRegionSection *section)
+{
+ ram_addr_t ram_addr;
+
+ if (memory_region_is_iommu(section->mr)) {
+ VFIOGuestIOMMU *giommu;
+
+ QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
+ if (MEMORY_REGION(giommu->iommu) == section->mr &&
+ giommu->n.start == section->offset_within_region) {
+ Int128 llend;
+ vfio_giommu_dirty_notifier gdn = { .giommu = giommu };
+ int idx = memory_region_iommu_attrs_to_index(giommu->iommu,
+ MEMTXATTRS_UNSPECIFIED);
+
+ llend = int128_add(int128_make64(section->offset_within_region),
+ section->size);
+ llend = int128_sub(llend, int128_one());
+
+ iommu_notifier_init(&gdn.n,
+ vfio_iommu_map_dirty_notify,
+ IOMMU_NOTIFIER_MAP,
+ section->offset_within_region,
+ int128_get64(llend),
+ idx);
+ memory_region_iommu_replay(giommu->iommu, &gdn.n);
+ break;
+ }
+ }
+ return 0;
+ }
+
+ ram_addr = memory_region_get_ram_addr(section->mr) +
+ section->offset_within_region;
+
+ return vfio_get_dirty_bitmap(container,
+ TARGET_PAGE_ALIGN(section->offset_within_address_space),
+ int128_get64(section->size), ram_addr);
+}
+
+static void vfio_listerner_log_sync(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+
+ if (vfio_listener_skipped_section(section) ||
+ !container->dirty_pages_supported) {
+ return;
+ }
+
+ if (vfio_devices_all_stopped_and_saving(container)) {
+ vfio_sync_dirty_bitmap(container, section);
+ }
+}
+
static const MemoryListener vfio_memory_listener = {
.region_add = vfio_listener_region_add,
.region_del = vfio_listener_region_del,
+ .log_sync = vfio_listerner_log_sync,
};
static void vfio_listener_release(VFIOContainer *container)
@@ -825,17 +1125,12 @@ static void vfio_listener_release(VFIOContainer *container)
}
}
-struct vfio_info_cap_header *
-vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
+static struct vfio_info_cap_header *
+vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id)
{
struct vfio_info_cap_header *hdr;
- void *ptr = info;
- if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) {
- return NULL;
- }
-
- for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+ for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
if (hdr->id == id) {
return hdr;
}
@@ -844,6 +1139,57 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
return NULL;
}
+struct vfio_info_cap_header *
+vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
+{
+ if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) {
+ return NULL;
+ }
+
+ return vfio_get_cap((void *)info, info->cap_offset, id);
+}
+
+static struct vfio_info_cap_header *
+vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+{
+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
+ return NULL;
+ }
+
+ return vfio_get_cap((void *)info, info->cap_offset, id);
+}
+
+struct vfio_info_cap_header *
+vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id)
+{
+ if (!(info->flags & VFIO_DEVICE_FLAGS_CAPS)) {
+ return NULL;
+ }
+
+ return vfio_get_cap((void *)info, info->cap_offset, id);
+}
+
+bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
+ unsigned int *avail)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_iommu_type1_info_dma_avail *cap;
+
+ /* If the capability cannot be found, assume no DMA limiting */
+ hdr = vfio_get_iommu_type1_info_cap(info,
+ VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL);
+ if (hdr == NULL) {
+ return false;
+ }
+
+ if (avail != NULL) {
+ cap = (void *) hdr;
+ *avail = cap->avail;
+ }
+
+ return true;
+}
+
static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
struct vfio_region_info *info)
{
@@ -924,6 +1270,18 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
return 0;
}
+static void vfio_subregion_unmap(VFIORegion *region, int index)
+{
+ trace_vfio_region_unmap(memory_region_name(&region->mmaps[index].mem),
+ region->mmaps[index].offset,
+ region->mmaps[index].offset +
+ region->mmaps[index].size - 1);
+ memory_region_del_subregion(region->mem, &region->mmaps[index].mem);
+ munmap(region->mmaps[index].mmap, region->mmaps[index].size);
+ object_unparent(OBJECT(&region->mmaps[index].mem));
+ region->mmaps[index].mmap = NULL;
+}
+
int vfio_region_mmap(VFIORegion *region)
{
int i, prot = 0;
@@ -954,10 +1312,7 @@ int vfio_region_mmap(VFIORegion *region)
region->mmaps[i].mmap = NULL;
for (i--; i >= 0; i--) {
- memory_region_del_subregion(region->mem, &region->mmaps[i].mem);
- munmap(region->mmaps[i].mmap, region->mmaps[i].size);
- object_unparent(OBJECT(&region->mmaps[i].mem));
- region->mmaps[i].mmap = NULL;
+ vfio_subregion_unmap(region, i);
}
return ret;
@@ -982,6 +1337,21 @@ int vfio_region_mmap(VFIORegion *region)
return 0;
}
+void vfio_region_unmap(VFIORegion *region)
+{
+ int i;
+
+ if (!region->mem) {
+ return;
+ }
+
+ for (i = 0; i < region->nr_mmaps; i++) {
+ if (region->mmaps[i].mmap) {
+ vfio_subregion_unmap(region, i);
+ }
+ }
+}
+
void vfio_region_exit(VFIORegion *region)
{
int i;
@@ -1204,6 +1574,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd,
return 0;
}
+static int vfio_get_iommu_info(VFIOContainer *container,
+ struct vfio_iommu_type1_info **info)
+{
+
+ size_t argsz = sizeof(struct vfio_iommu_type1_info);
+
+ *info = g_new0(struct vfio_iommu_type1_info, 1);
+again:
+ (*info)->argsz = argsz;
+
+ if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
+ g_free(*info);
+ *info = NULL;
+ return -errno;
+ }
+
+ if (((*info)->argsz > argsz)) {
+ argsz = (*info)->argsz;
+ *info = g_realloc(*info, argsz);
+ goto again;
+ }
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *
+vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+{
+ struct vfio_info_cap_header *hdr;
+ void *ptr = info;
+
+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
+ return NULL;
+ }
+
+ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+ if (hdr->id == id) {
+ return hdr;
+ }
+ }
+
+ return NULL;
+}
+
+static void vfio_get_iommu_info_migration(VFIOContainer *container,
+ struct vfio_iommu_type1_info *info)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_iommu_type1_info_cap_migration *cap_mig;
+
+ hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
+ if (!hdr) {
+ return;
+ }
+
+ cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration,
+ header);
+
+ /*
+ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
+ * TARGET_PAGE_SIZE to mark those dirty.
+ */
+ if (cap_mig->pgsize_bitmap & TARGET_PAGE_SIZE) {
+ container->dirty_pages_supported = true;
+ container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
+ container->dirty_pgsizes = cap_mig->pgsize_bitmap;
+ }
+}
+
static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
Error **errp)
{
@@ -1273,6 +1712,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
container->space = space;
container->fd = fd;
container->error = NULL;
+ container->dirty_pages_supported = false;
QLIST_INIT(&container->giommu_list);
QLIST_INIT(&container->hostwin_list);
@@ -1285,7 +1725,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
case VFIO_TYPE1v2_IOMMU:
case VFIO_TYPE1_IOMMU:
{
- struct vfio_iommu_type1_info info;
+ struct vfio_iommu_type1_info *info;
/*
* FIXME: This assumes that a Type1 IOMMU can map any 64-bit
@@ -1294,15 +1734,19 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
* existing Type1 IOMMUs generally support any IOVA we're
* going to actually try in practice.
*/
- info.argsz = sizeof(info);
- ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info);
- /* Ignore errors */
- if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
+ ret = vfio_get_iommu_info(container, &info);
+
+ if (ret || !(info->flags & VFIO_IOMMU_INFO_PGSIZES)) {
/* Assume 4k IOVA page size */
- info.iova_pgsizes = 4096;
+ info->iova_pgsizes = 4096;
+ }
+ vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes);
+ container->pgsizes = info->iova_pgsizes;
+
+ if (!ret) {
+ vfio_get_iommu_info_migration(container, info);
}
- vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
- container->pgsizes = info.iova_pgsizes;
+ g_free(info);
break;
}
case VFIO_SPAPR_TCE_v2_IOMMU:
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index 37efa74018..da9af297a0 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -2,6 +2,7 @@ vfio_ss = ss.source_set()
vfio_ss.add(files(
'common.c',
'spapr.c',
+ 'migration.c',
))
vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
'display.c',
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
new file mode 100644
index 0000000000..3ce285ea39
--- /dev/null
+++ b/hw/vfio/migration.c
@@ -0,0 +1,933 @@
+/*
+ * Migration support for VFIO devices
+ *
+ * Copyright NVIDIA, Inc. 2020
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+
+#include "sysemu/runstate.h"
+#include "hw/vfio/vfio-common.h"
+#include "cpu.h"
+#include "migration/migration.h"
+#include "migration/vmstate.h"
+#include "migration/qemu-file.h"
+#include "migration/register.h"
+#include "migration/blocker.h"
+#include "migration/misc.h"
+#include "qapi/error.h"
+#include "exec/ramlist.h"
+#include "exec/ram_addr.h"
+#include "pci.h"
+#include "trace.h"
+#include "hw/hw.h"
+
+/*
+ * Flags to be used as unique delimiters for VFIO devices in the migration
+ * stream. These flags are composed as:
+ * 0xffffffff => MSB 32-bit all 1s
+ * 0xef10 => Magic ID, represents emulated (virtual) function IO
+ * 0x0000 => 16-bits reserved for flags
+ *
+ * The beginning of state information is marked by _DEV_CONFIG_STATE,
+ * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
+ * certain state information is marked by _END_OF_STATE.
+ */
+#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
+#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
+#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
+#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
+
+static int64_t bytes_transferred;
+
+static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
+ off_t off, bool iswrite)
+{
+ int ret;
+
+ ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
+ pread(vbasedev->fd, val, count, off);
+ if (ret < count) {
+ error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
+ HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
+ vbasedev->name, off, strerror(errno));
+ return (ret < 0) ? ret : -EINVAL;
+ }
+ return 0;
+}
+
+static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
+ off_t off, bool iswrite)
+{
+ int ret, done = 0;
+ __u8 *tbuf = buf;
+
+ while (count) {
+ int bytes = 0;
+
+ if (count >= 8 && !(off % 8)) {
+ bytes = 8;
+ } else if (count >= 4 && !(off % 4)) {
+ bytes = 4;
+ } else if (count >= 2 && !(off % 2)) {
+ bytes = 2;
+ } else {
+ bytes = 1;
+ }
+
+ ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
+ if (ret) {
+ return ret;
+ }
+
+ count -= bytes;
+ done += bytes;
+ off += bytes;
+ tbuf += bytes;
+ }
+ return done;
+}
+
+#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false)
+#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true)
+
+#define VFIO_MIG_STRUCT_OFFSET(f) \
+ offsetof(struct vfio_device_migration_info, f)
+/*
+ * Change the device_state register for device @vbasedev. Bits set in @mask
+ * are preserved, bits set in @value are set, and bits not set in either @mask
+ * or @value are cleared in device_state. If the register cannot be accessed,
+ * the resulting state would be invalid, or the device enters an error state,
+ * an error is returned.
+ */
+
+static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
+ uint32_t value)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIORegion *region = &migration->region;
+ off_t dev_state_off = region->fd_offset +
+ VFIO_MIG_STRUCT_OFFSET(device_state);
+ uint32_t device_state;
+ int ret;
+
+ ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
+ dev_state_off);
+ if (ret < 0) {
+ return ret;
+ }
+
+ device_state = (device_state & mask) | value;
+
+ if (!VFIO_DEVICE_STATE_VALID(device_state)) {
+ return -EINVAL;
+ }
+
+ ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
+ dev_state_off);
+ if (ret < 0) {
+ int rret;
+
+ rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
+ dev_state_off);
+
+ if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
+ hw_error("%s: Device in error state 0x%x", vbasedev->name,
+ device_state);
+ return rret ? rret : -EIO;
+ }
+ return ret;
+ }
+
+ migration->device_state = device_state;
+ trace_vfio_migration_set_state(vbasedev->name, device_state);
+ return 0;
+}
+
+static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
+ uint64_t data_size, uint64_t *size)
+{
+ void *ptr = NULL;
+ uint64_t limit = 0;
+ int i;
+
+ if (!region->mmaps) {
+ if (size) {
+ *size = MIN(data_size, region->size - data_offset);
+ }
+ return ptr;
+ }
+
+ for (i = 0; i < region->nr_mmaps; i++) {
+ VFIOMmap *map = region->mmaps + i;
+
+ if ((data_offset >= map->offset) &&
+ (data_offset < map->offset + map->size)) {
+
+ /* check if data_offset is within sparse mmap areas */
+ ptr = map->mmap + data_offset - map->offset;
+ if (size) {
+ *size = MIN(data_size, map->offset + map->size - data_offset);
+ }
+ break;
+ } else if ((data_offset < map->offset) &&
+ (!limit || limit > map->offset)) {
+ /*
+ * data_offset is not within sparse mmap areas, find size of
+ * non-mapped area. Check through all list since region->mmaps list
+ * is not sorted.
+ */
+ limit = map->offset;
+ }
+ }
+
+ if (!ptr && size) {
+ *size = limit ? MIN(data_size, limit - data_offset) : data_size;
+ }
+ return ptr;
+}
+
+static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIORegion *region = &migration->region;
+ uint64_t data_offset = 0, data_size = 0, sz;
+ int ret;
+
+ ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
+ if (ret < 0) {
+ return ret;
+ }
+
+ trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
+ migration->pending_bytes);
+
+ qemu_put_be64(f, data_size);
+ sz = data_size;
+
+ while (sz) {
+ void *buf;
+ uint64_t sec_size;
+ bool buf_allocated = false;
+
+ buf = get_data_section_size(region, data_offset, sz, &sec_size);
+
+ if (!buf) {
+ buf = g_try_malloc(sec_size);
+ if (!buf) {
+ error_report("%s: Error allocating buffer ", __func__);
+ return -ENOMEM;
+ }
+ buf_allocated = true;
+
+ ret = vfio_mig_read(vbasedev, buf, sec_size,
+ region->fd_offset + data_offset);
+ if (ret < 0) {
+ g_free(buf);
+ return ret;
+ }
+ }
+
+ qemu_put_buffer(f, buf, sec_size);
+
+ if (buf_allocated) {
+ g_free(buf);
+ }
+ sz -= sec_size;
+ data_offset += sec_size;
+ }
+
+ ret = qemu_file_get_error(f);
+
+ if (!ret && size) {
+ *size = data_size;
+ }
+
+ bytes_transferred += data_size;
+ return ret;
+}
+
+static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
+ uint64_t data_size)
+{
+ VFIORegion *region = &vbasedev->migration->region;
+ uint64_t data_offset = 0, size, report_size;
+ int ret;
+
+ do {
+ ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (data_offset + data_size > region->size) {
+ /*
+ * If data_size is greater than the data section of migration region
+ * then iterate the write buffer operation. This case can occur if
+ * size of migration region at destination is smaller than size of
+ * migration region at source.
+ */
+ report_size = size = region->size - data_offset;
+ data_size -= size;
+ } else {
+ report_size = size = data_size;
+ data_size = 0;
+ }
+
+ trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
+
+ while (size) {
+ void *buf;
+ uint64_t sec_size;
+ bool buf_alloc = false;
+
+ buf = get_data_section_size(region, data_offset, size, &sec_size);
+
+ if (!buf) {
+ buf = g_try_malloc(sec_size);
+ if (!buf) {
+ error_report("%s: Error allocating buffer ", __func__);
+ return -ENOMEM;
+ }
+ buf_alloc = true;
+ }
+
+ qemu_get_buffer(f, buf, sec_size);
+
+ if (buf_alloc) {
+ ret = vfio_mig_write(vbasedev, buf, sec_size,
+ region->fd_offset + data_offset);
+ g_free(buf);
+
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ size -= sec_size;
+ data_offset += sec_size;
+ }
+
+ ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
+ if (ret < 0) {
+ return ret;
+ }
+ } while (data_size);
+
+ return 0;
+}
+
+static int vfio_update_pending(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIORegion *region = &migration->region;
+ uint64_t pending_bytes = 0;
+ int ret;
+
+ ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
+ if (ret < 0) {
+ migration->pending_bytes = 0;
+ return ret;
+ }
+
+ migration->pending_bytes = pending_bytes;
+ trace_vfio_update_pending(vbasedev->name, pending_bytes);
+ return 0;
+}
+
+static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
+
+ if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
+ vbasedev->ops->vfio_save_config(vbasedev, f);
+ }
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+
+ trace_vfio_save_device_config_state(vbasedev->name);
+
+ return qemu_file_get_error(f);
+}
+
+static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ uint64_t data;
+
+ if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
+ int ret;
+
+ ret = vbasedev->ops->vfio_load_config(vbasedev, f);
+ if (ret) {
+ error_report("%s: Failed to load device config space",
+ vbasedev->name);
+ return ret;
+ }
+ }
+
+ data = qemu_get_be64(f);
+ if (data != VFIO_MIG_FLAG_END_OF_STATE) {
+ error_report("%s: Failed loading device config space, "
+ "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
+ return -EINVAL;
+ }
+
+ trace_vfio_load_device_config_state(vbasedev->name);
+ return qemu_file_get_error(f);
+}
+
+static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start)
+{
+ int ret;
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOContainer *container = vbasedev->group->container;
+ struct vfio_iommu_type1_dirty_bitmap dirty = {
+ .argsz = sizeof(dirty),
+ };
+
+ if (start) {
+ if (migration->device_state & VFIO_DEVICE_STATE_SAVING) {
+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
+ }
+
+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
+ if (ret) {
+ error_report("Failed to set dirty tracking flag 0x%x errno: %d",
+ dirty.flags, errno);
+ return -errno;
+ }
+ return ret;
+}
+
+static void vfio_migration_cleanup(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+
+ vfio_set_dirty_page_tracking(vbasedev, false);
+
+ if (migration->region.mmaps) {
+ vfio_region_unmap(&migration->region);
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int vfio_save_setup(QEMUFile *f, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ int ret;
+
+ trace_vfio_save_setup(vbasedev->name);
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
+
+ if (migration->region.mmaps) {
+ /*
+ * Calling vfio_region_mmap() from migration thread. Memory API called
+ * from this function require locking the iothread when called from
+ * outside the main loop thread.
+ */
+ qemu_mutex_lock_iothread();
+ ret = vfio_region_mmap(&migration->region);
+ qemu_mutex_unlock_iothread();
+ if (ret) {
+ error_report("%s: Failed to mmap VFIO migration region: %s",
+ vbasedev->name, strerror(-ret));
+ error_report("%s: Falling back to slow path", vbasedev->name);
+ }
+ }
+
+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
+ VFIO_DEVICE_STATE_SAVING);
+ if (ret) {
+ error_report("%s: Failed to set state SAVING", vbasedev->name);
+ return ret;
+ }
+
+ ret = vfio_set_dirty_page_tracking(vbasedev, true);
+ if (ret) {
+ return ret;
+ }
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+
+ ret = qemu_file_get_error(f);
+ if (ret) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void vfio_save_cleanup(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+
+ vfio_migration_cleanup(vbasedev);
+ trace_vfio_save_cleanup(vbasedev->name);
+}
+
+static void vfio_save_pending(QEMUFile *f, void *opaque,
+ uint64_t threshold_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ int ret;
+
+ ret = vfio_update_pending(vbasedev);
+ if (ret) {
+ return;
+ }
+
+ *res_precopy_only += migration->pending_bytes;
+
+ trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
+ *res_postcopy_only, *res_compatible);
+}
+
+static int vfio_save_iterate(QEMUFile *f, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ uint64_t data_size;
+ int ret;
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
+
+ if (migration->pending_bytes == 0) {
+ ret = vfio_update_pending(vbasedev);
+ if (ret) {
+ return ret;
+ }
+
+ if (migration->pending_bytes == 0) {
+ qemu_put_be64(f, 0);
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+ /* indicates data finished, goto complete phase */
+ return 1;
+ }
+ }
+
+ ret = vfio_save_buffer(f, vbasedev, &data_size);
+ if (ret) {
+ error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
+ strerror(errno));
+ return ret;
+ }
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+
+ ret = qemu_file_get_error(f);
+ if (ret) {
+ return ret;
+ }
+
+ /*
+ * Reset pending_bytes as .save_live_pending is not called during savevm or
+ * snapshot case, in such case vfio_update_pending() at the start of this
+ * function updates pending_bytes.
+ */
+ migration->pending_bytes = 0;
+ trace_vfio_save_iterate(vbasedev->name, data_size);
+ return 0;
+}
+
+static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ uint64_t data_size;
+ int ret;
+
+ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING,
+ VFIO_DEVICE_STATE_SAVING);
+ if (ret) {
+ error_report("%s: Failed to set state STOP and SAVING",
+ vbasedev->name);
+ return ret;
+ }
+
+ ret = vfio_save_device_config_state(f, opaque);
+ if (ret) {
+ return ret;
+ }
+
+ ret = vfio_update_pending(vbasedev);
+ if (ret) {
+ return ret;
+ }
+
+ while (migration->pending_bytes > 0) {
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
+ ret = vfio_save_buffer(f, vbasedev, &data_size);
+ if (ret < 0) {
+ error_report("%s: Failed to save buffer", vbasedev->name);
+ return ret;
+ }
+
+ if (data_size == 0) {
+ break;
+ }
+
+ ret = vfio_update_pending(vbasedev);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+
+ ret = qemu_file_get_error(f);
+ if (ret) {
+ return ret;
+ }
+
+ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0);
+ if (ret) {
+ error_report("%s: Failed to set state STOPPED", vbasedev->name);
+ return ret;
+ }
+
+ trace_vfio_save_complete_precopy(vbasedev->name);
+ return ret;
+}
+
+static int vfio_load_setup(QEMUFile *f, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ int ret = 0;
+
+ if (migration->region.mmaps) {
+ ret = vfio_region_mmap(&migration->region);
+ if (ret) {
+ error_report("%s: Failed to mmap VFIO migration region %d: %s",
+ vbasedev->name, migration->region.nr,
+ strerror(-ret));
+ error_report("%s: Falling back to slow path", vbasedev->name);
+ }
+ }
+
+ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
+ VFIO_DEVICE_STATE_RESUMING);
+ if (ret) {
+ error_report("%s: Failed to set state RESUMING", vbasedev->name);
+ if (migration->region.mmaps) {
+ vfio_region_unmap(&migration->region);
+ }
+ }
+ return ret;
+}
+
+static int vfio_load_cleanup(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+
+ vfio_migration_cleanup(vbasedev);
+ trace_vfio_load_cleanup(vbasedev->name);
+ return 0;
+}
+
+static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
+{
+ VFIODevice *vbasedev = opaque;
+ int ret = 0;
+ uint64_t data;
+
+ data = qemu_get_be64(f);
+ while (data != VFIO_MIG_FLAG_END_OF_STATE) {
+
+ trace_vfio_load_state(vbasedev->name, data);
+
+ switch (data) {
+ case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
+ {
+ ret = vfio_load_device_config_state(f, opaque);
+ if (ret) {
+ return ret;
+ }
+ break;
+ }
+ case VFIO_MIG_FLAG_DEV_SETUP_STATE:
+ {
+ data = qemu_get_be64(f);
+ if (data == VFIO_MIG_FLAG_END_OF_STATE) {
+ return ret;
+ } else {
+ error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
+ vbasedev->name, data);
+ return -EINVAL;
+ }
+ break;
+ }
+ case VFIO_MIG_FLAG_DEV_DATA_STATE:
+ {
+ uint64_t data_size = qemu_get_be64(f);
+
+ if (data_size) {
+ ret = vfio_load_buffer(f, vbasedev, data_size);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ break;
+ }
+ default:
+ error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
+ return -EINVAL;
+ }
+
+ data = qemu_get_be64(f);
+ ret = qemu_file_get_error(f);
+ if (ret) {
+ return ret;
+ }
+ }
+ return ret;
+}
+
+static SaveVMHandlers savevm_vfio_handlers = {
+ .save_setup = vfio_save_setup,
+ .save_cleanup = vfio_save_cleanup,
+ .save_live_pending = vfio_save_pending,
+ .save_live_iterate = vfio_save_iterate,
+ .save_live_complete_precopy = vfio_save_complete_precopy,
+ .load_setup = vfio_load_setup,
+ .load_cleanup = vfio_load_cleanup,
+ .load_state = vfio_load_state,
+};
+
+/* ---------------------------------------------------------------------- */
+
+static void vfio_vmstate_change(void *opaque, int running, RunState state)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ uint32_t value, mask;
+ int ret;
+
+ if (vbasedev->migration->vm_running == running) {
+ return;
+ }
+
+ if (running) {
+ /*
+ * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
+ * Transition from _SAVING to _RUNNING can happen if there is migration
+ * failure, in that case clear _SAVING bit.
+ * Transition from _RESUMING to _RUNNING occurs during resuming
+ * phase, in that case clear _RESUMING bit.
+ * In both the above cases, set _RUNNING bit.
+ */
+ mask = ~VFIO_DEVICE_STATE_MASK;
+ value = VFIO_DEVICE_STATE_RUNNING;
+ } else {
+ /*
+ * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
+ * _RUNNING bit
+ */
+ mask = ~VFIO_DEVICE_STATE_RUNNING;
+ value = 0;
+ }
+
+ ret = vfio_migration_set_state(vbasedev, mask, value);
+ if (ret) {
+ /*
+ * Migration should be aborted in this case, but vm_state_notify()
+ * currently does not support reporting failures.
+ */
+ error_report("%s: Failed to set device state 0x%x", vbasedev->name,
+ (migration->device_state & mask) | value);
+ qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+ }
+ vbasedev->migration->vm_running = running;
+ trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
+ (migration->device_state & mask) | value);
+}
+
+static void vfio_migration_state_notifier(Notifier *notifier, void *data)
+{
+ MigrationState *s = data;
+ VFIOMigration *migration = container_of(notifier, VFIOMigration,
+ migration_state);
+ VFIODevice *vbasedev = migration->vbasedev;
+ int ret;
+
+ trace_vfio_migration_state_notifier(vbasedev->name,
+ MigrationStatus_str(s->state));
+
+ switch (s->state) {
+ case MIGRATION_STATUS_CANCELLING:
+ case MIGRATION_STATUS_CANCELLED:
+ case MIGRATION_STATUS_FAILED:
+ bytes_transferred = 0;
+ ret = vfio_migration_set_state(vbasedev,
+ ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING),
+ VFIO_DEVICE_STATE_RUNNING);
+ if (ret) {
+ error_report("%s: Failed to set state RUNNING", vbasedev->name);
+ }
+ }
+}
+
+static void vfio_migration_exit(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+
+ vfio_region_exit(&migration->region);
+ vfio_region_finalize(&migration->region);
+ g_free(vbasedev->migration);
+ vbasedev->migration = NULL;
+}
+
+static int vfio_migration_init(VFIODevice *vbasedev,
+ struct vfio_region_info *info)
+{
+ int ret;
+ Object *obj;
+ VFIOMigration *migration;
+ char id[256] = "";
+ g_autofree char *path = NULL, *oid = NULL;
+
+ if (!vbasedev->ops->vfio_get_object) {
+ return -EINVAL;
+ }
+
+ obj = vbasedev->ops->vfio_get_object(vbasedev);
+ if (!obj) {
+ return -EINVAL;
+ }
+
+ vbasedev->migration = g_new0(VFIOMigration, 1);
+
+ ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
+ info->index, "migration");
+ if (ret) {
+ error_report("%s: Failed to setup VFIO migration region %d: %s",
+ vbasedev->name, info->index, strerror(-ret));
+ goto err;
+ }
+
+ if (!vbasedev->migration->region.size) {
+ error_report("%s: Invalid zero-sized VFIO migration region %d",
+ vbasedev->name, info->index);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ migration = vbasedev->migration;
+ migration->vbasedev = vbasedev;
+
+ oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
+ if (oid) {
+ path = g_strdup_printf("%s/vfio", oid);
+ } else {
+ path = g_strdup("vfio");
+ }
+ strpadcpy(id, sizeof(id), path, '\0');
+
+ register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
+ vbasedev);
+
+ migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
+ vbasedev);
+ migration->migration_state.notify = vfio_migration_state_notifier;
+ add_migration_state_change_notifier(&migration->migration_state);
+ return 0;
+
+err:
+ vfio_migration_exit(vbasedev);
+ return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int64_t vfio_mig_bytes_transferred(void)
+{
+ return bytes_transferred;
+}
+
+int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
+{
+ VFIOContainer *container = vbasedev->group->container;
+ struct vfio_region_info *info = NULL;
+ Error *local_err = NULL;
+ int ret = -ENOTSUP;
+
+ if (!container->dirty_pages_supported) {
+ goto add_blocker;
+ }
+
+ ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION,
+ VFIO_REGION_SUBTYPE_MIGRATION, &info);
+ if (ret) {
+ goto add_blocker;
+ }
+
+ ret = vfio_migration_init(vbasedev, info);
+ if (ret) {
+ goto add_blocker;
+ }
+
+ g_free(info);
+ trace_vfio_migration_probe(vbasedev->name, info->index);
+ return 0;
+
+add_blocker:
+ error_setg(&vbasedev->migration_blocker,
+ "VFIO device doesn't support migration");
+ g_free(info);
+
+ ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ error_free(vbasedev->migration_blocker);
+ vbasedev->migration_blocker = NULL;
+ }
+ return ret;
+}
+
+void vfio_migration_finalize(VFIODevice *vbasedev)
+{
+ if (vbasedev->migration) {
+ VFIOMigration *migration = vbasedev->migration;
+
+ remove_migration_state_change_notifier(&migration->migration_state);
+ qemu_del_vm_change_state_handler(migration->vm_state);
+ vfio_migration_exit(vbasedev);
+ }
+
+ if (vbasedev->migration_blocker) {
+ migrate_del_blocker(vbasedev->migration_blocker);
+ error_free(vbasedev->migration_blocker);
+ vbasedev->migration_blocker = NULL;
+ }
+}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0d83eb0e47..58c0ce8971 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -41,6 +41,7 @@
#include "trace.h"
#include "qapi/error.h"
#include "migration/blocker.h"
+#include "migration/qemu-file.h"
#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
@@ -2394,10 +2395,68 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev)
}
}
+static Object *vfio_pci_get_object(VFIODevice *vbasedev)
+{
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+
+ return OBJECT(vdev);
+}
+
+static bool vfio_msix_present(void *opaque, int version_id)
+{
+ PCIDevice *pdev = opaque;
+
+ return msix_present(pdev);
+}
+
+const VMStateDescription vmstate_vfio_pci_config = {
+ .name = "VFIOPCIDevice",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
+ VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
+{
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+
+ vmstate_save_state(f, &vmstate_vfio_pci_config, vdev, NULL);
+}
+
+static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
+{
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ PCIDevice *pdev = &vdev->pdev;
+ int ret;
+
+ ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1);
+ if (ret) {
+ return ret;
+ }
+
+ vfio_pci_write_config(pdev, PCI_COMMAND,
+ pci_get_word(pdev->config + PCI_COMMAND), 2);
+
+ if (msi_enabled(pdev)) {
+ vfio_msi_enable(vdev);
+ } else if (msix_enabled(pdev)) {
+ vfio_msix_enable(vdev);
+ }
+
+ return ret;
+}
+
static VFIODeviceOps vfio_pci_ops = {
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
.vfio_eoi = vfio_intx_eoi,
+ .vfio_get_object = vfio_pci_get_object,
+ .vfio_save_config = vfio_pci_save_config,
+ .vfio_load_config = vfio_pci_load_config,
};
int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
@@ -2732,17 +2791,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
return;
}
- if (!pdev->failover_pair_id) {
- error_setg(&vdev->migration_blocker,
- "VFIO device doesn't support migration");
- ret = migrate_add_blocker(vdev->migration_blocker, errp);
- if (ret) {
- error_free(vdev->migration_blocker);
- vdev->migration_blocker = NULL;
- return;
- }
- }
-
vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev);
vdev->vbasedev.ops = &vfio_pci_ops;
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
@@ -3010,6 +3058,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
}
}
+ if (!pdev->failover_pair_id) {
+ ret = vfio_migration_probe(&vdev->vbasedev, errp);
+ if (ret) {
+ error_report("%s: Migration disabled", vdev->vbasedev.name);
+ }
+ }
+
vfio_register_err_notifier(vdev);
vfio_register_req_notifier(vdev);
vfio_setup_resetfn_quirk(vdev);
@@ -3024,11 +3079,6 @@ out_teardown:
vfio_bars_exit(vdev);
error:
error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- if (vdev->migration_blocker) {
- migrate_del_blocker(vdev->migration_blocker);
- error_free(vdev->migration_blocker);
- vdev->migration_blocker = NULL;
- }
}
static void vfio_instance_finalize(Object *obj)
@@ -3040,10 +3090,6 @@ static void vfio_instance_finalize(Object *obj)
vfio_bars_finalize(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
- if (vdev->migration_blocker) {
- migrate_del_blocker(vdev->migration_blocker);
- error_free(vdev->migration_blocker);
- }
/*
* XXX Leaking igd_opregion is not an oversight, we can't remove the
* fw_cfg entry therefore leaking this allocation seems like the safest
@@ -3071,6 +3117,7 @@ static void vfio_exitfn(PCIDevice *pdev)
}
vfio_teardown_msi(vdev);
vfio_bars_exit(vdev);
+ vfio_migration_finalize(&vdev->vbasedev);
}
static void vfio_pci_reset(DeviceState *dev)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index bce71a9ac9..1574ef983f 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -172,7 +172,6 @@ struct VFIOPCIDevice {
bool no_vfio_ioeventfd;
bool enable_ramfb;
VFIODisplay *dpy;
- Error *migration_blocker;
Notifier irqchip_change_notifier;
};
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 869ed2c39d..cc3f66f7e4 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -166,7 +166,7 @@ static void vfio_intp_mmap_enable(void *opaque)
VFIOINTp *tmp;
VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
- qemu_mutex_lock(&vdev->intp_mutex);
+ QEMU_LOCK_GUARD(&vdev->intp_mutex);
QLIST_FOREACH(tmp, &vdev->intp_list, next) {
if (tmp->state == VFIO_IRQ_ACTIVE) {
trace_vfio_platform_intp_mmap_enable(tmp->pin);
@@ -174,12 +174,10 @@ static void vfio_intp_mmap_enable(void *opaque)
timer_mod(vdev->mmap_timer,
qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
vdev->mmap_timeout);
- qemu_mutex_unlock(&vdev->intp_mutex);
return;
}
}
vfio_mmap_set_enabled(vdev, true);
- qemu_mutex_unlock(&vdev->intp_mutex);
}
/**
@@ -289,7 +287,7 @@ static void vfio_platform_eoi(VFIODevice *vbasedev)
VFIOPlatformDevice *vdev =
container_of(vbasedev, VFIOPlatformDevice, vbasedev);
- qemu_mutex_lock(&vdev->intp_mutex);
+ QEMU_LOCK_GUARD(&vdev->intp_mutex);
QLIST_FOREACH(intp, &vdev->intp_list, next) {
if (intp->state == VFIO_IRQ_ACTIVE) {
trace_vfio_platform_eoi(intp->pin,
@@ -314,7 +312,6 @@ static void vfio_platform_eoi(VFIODevice *vbasedev)
vfio_intp_inject_pending_lockheld(intp);
QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
}
- qemu_mutex_unlock(&vdev->intp_mutex);
}
/**
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 93a0bc2522..c0e75f24b7 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -113,6 +113,7 @@ vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Reg
vfio_region_exit(const char *name, int index) "Device %s, region %d"
vfio_region_finalize(const char *name, int index) "Device %s, region %d"
vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
+vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Region %s unmap [0x%lx - 0x%lx]"
vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
@@ -144,3 +145,23 @@ vfio_display_edid_link_up(void) ""
vfio_display_edid_link_down(void) ""
vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u"
vfio_display_edid_write_error(void) ""
+
+# migration.c
+vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
+vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
+vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
+vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
+vfio_save_setup(const char *name) " (%s)"
+vfio_save_cleanup(const char *name) " (%s)"
+vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64
+vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64
+vfio_save_device_config_state(const char *name) " (%s)"
+vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
+vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
+vfio_save_complete_precopy(const char *name) " (%s)"
+vfio_load_device_config_state(const char *name) " (%s)"
+vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
+vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
+vfio_load_cleanup(const char *name) " (%s)"
+vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
+vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64