diff options
author | Alex Williamson <alex.williamson@redhat.com> | 2012-09-26 11:19:32 -0600 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2012-10-01 08:04:23 -0500 |
commit | 65501a745dbaf28284e099f724aa5fc478eae0e7 (patch) | |
tree | 0f167bc37b25e095ddfcd44ba13eb36bc4b0fa74 /hw/vfio_pci_int.h | |
parent | 883f0b85f0cbb8c31baeadfcbfd144a1ed29d935 (diff) |
vfio: vfio-pci device assignment driver
This adds the core of the QEMU VFIO-based PCI device assignment driver.
To make use of this driver, enable CONFIG_VFIO, CONFIG_VFIO_IOMMU_TYPE1,
and CONFIG_VFIO_PCI in your host Linux kernel config. Load the vfio-pci
module. To assign device 0000:05:00.0 to a guest, do the following:
for dev in $(ls /sys/bus/pci/devices/0000:05:00.0/iommu_group/devices); do
vendor=$(cat /sys/bus/pci/devices/$dev/vendor)
device=$(cat /sys/bus/pci/devices/$dev/device)
if [ -e /sys/bus/pci/devices/$dev/driver ]; then
echo $dev > /sys/bus/pci/devices/$dev/driver/unbind
fi
echo $vendor $device > /sys/bus/pci/drivers/vfio-pci/new_id
done
See Documentation/vfio.txt in the Linux kernel tree for further
description of IOMMU groups and VFIO.
Then launch qemu including the option:
-device vfio-pci,host=0000:05:00.0
Legacy PCI interrupts (INTx) currently makes use of a kludge where we
trap BAR accesses and assume the access is in response to an interrupt,
therefore de-asserting and unmasking the interrupt. It's not quite as
targetted as using the EOI for this, but it's self contained and seems
to work across all architectures. The side-effect is a significant
performance slow-down for device in INTx mode. Some devices, like
graphics cards, don't really use their interrupt, so this can be turned
off with the x-intx=off option, which disables INTx alltogether. This
should be considered an experimental option until we refine this code.
Both MSI and MSI-X are supported and avoid these issues.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Diffstat (limited to 'hw/vfio_pci_int.h')
-rw-r--r-- | hw/vfio_pci_int.h | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/hw/vfio_pci_int.h b/hw/vfio_pci_int.h new file mode 100644 index 0000000000..3812d8d7f1 --- /dev/null +++ b/hw/vfio_pci_int.h @@ -0,0 +1,114 @@ +/* + * vfio based device assignment support + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Alex Williamson <alex.williamson@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef HW_VFIO_PCI_INT_H +#define HW_VFIO_PCI_INT_H + +#include "qemu-common.h" +#include "qemu-queue.h" +#include "pci.h" +#include "event_notifier.h" + +typedef struct VFIOBAR { + off_t fd_offset; /* offset of BAR within device fd */ + int fd; /* device fd, allows us to pass VFIOBAR as opaque data */ + MemoryRegion mem; /* slow, read/write access */ + MemoryRegion mmap_mem; /* direct mapped access */ + void *mmap; + size_t size; + uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ + uint8_t nr; /* cache the BAR number for debug */ +} VFIOBAR; + +typedef struct VFIOINTx { + bool pending; /* interrupt pending */ + bool kvm_accel; /* set when QEMU bypass through KVM enabled */ + uint8_t pin; /* which pin to pull for qemu_set_irq */ + EventNotifier interrupt; /* eventfd triggered on interrupt */ + EventNotifier unmask; /* eventfd for unmask on QEMU bypass */ + PCIINTxRoute route; /* routing info for QEMU bypass */ + bool disabled; + char *intx; +} VFIOINTx; + +struct VFIODevice; + +typedef struct VFIOMSIVector { + EventNotifier interrupt; /* eventfd triggered on interrupt */ + struct VFIODevice *vdev; /* back pointer to device */ + int virq; /* KVM irqchip route for QEMU bypass */ + bool use; +} VFIOMSIVector; + +enum { + VFIO_INT_NONE = 0, + VFIO_INT_INTx = 1, + VFIO_INT_MSI = 2, + VFIO_INT_MSIX = 3, +}; + +struct VFIOGroup; + +typedef struct VFIOContainer { + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + struct { + /* enable abstraction to support various iommu backends */ + union { + MemoryListener listener; /* Used by type1 iommu */ + }; + void (*release)(struct VFIOContainer *); + } iommu_data; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_ENTRY(VFIOContainer) next; +} VFIOContainer; + +/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */ +typedef struct VFIOMSIXInfo { + uint8_t table_bar; + uint8_t pba_bar; + uint16_t entries; + uint32_t table_offset; + uint32_t pba_offset; + MemoryRegion mmap_mem; + void *mmap; +} VFIOMSIXInfo; + +typedef struct VFIODevice { + PCIDevice pdev; + int fd; + VFIOINTx intx; + unsigned int config_size; + off_t config_offset; /* Offset of config space region within device fd */ + unsigned int rom_size; + off_t rom_offset; /* Offset of ROM region within device fd */ + int msi_cap_size; + VFIOMSIVector *msi_vectors; + VFIOMSIXInfo *msix; + int nr_vectors; /* Number of MSI/MSIX vectors currently in use */ + int interrupt; /* Current interrupt type */ + VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */ + PCIHostDeviceAddress host; + QLIST_ENTRY(VFIODevice) next; + struct VFIOGroup *group; + bool reset_works; +} VFIODevice; + +typedef struct VFIOGroup { + int fd; + int groupid; + VFIOContainer *container; + QLIST_HEAD(, VFIODevice) device_list; + QLIST_ENTRY(VFIOGroup) next; + QLIST_ENTRY(VFIOGroup) container_next; +} VFIOGroup; + +#endif /* HW_VFIO_PCI_INT_H */ |