1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
|
/*
* vfio based device assignment support - PCI devices
*
* Copyright Red Hat, Inc. 2012-2015
*
* Authors:
* Alex Williamson <alex.williamson@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#ifndef HW_VFIO_VFIO_PCI_H
#define HW_VFIO_VFIO_PCI_H
#include "exec/memory.h"
#include "hw/pci/pci_device.h"
#include "hw/vfio/vfio-common.h"
#include "qemu/event_notifier.h"
#include "qemu/queue.h"
#include "qemu/timer.h"
#include "qom/object.h"
#include "sysemu/kvm.h"
#define PCI_ANY_ID (~0)
struct VFIOPCIDevice;
typedef struct VFIOIOEventFD {
QLIST_ENTRY(VFIOIOEventFD) next;
MemoryRegion *mr;
hwaddr addr;
unsigned size;
uint64_t data;
EventNotifier e;
VFIORegion *region;
hwaddr region_addr;
bool dynamic; /* Added runtime, removed on device reset */
bool vfio;
} VFIOIOEventFD;
typedef struct VFIOQuirk {
QLIST_ENTRY(VFIOQuirk) next;
void *data;
QLIST_HEAD(, VFIOIOEventFD) ioeventfds;
int nr_mem;
MemoryRegion *mem;
void (*reset)(struct VFIOPCIDevice *vdev, struct VFIOQuirk *quirk);
} VFIOQuirk;
typedef struct VFIOBAR {
VFIORegion region;
MemoryRegion *mr;
size_t size;
uint8_t type;
bool ioport;
bool mem64;
QLIST_HEAD(, VFIOQuirk) quirks;
} VFIOBAR;
typedef struct VFIOVGARegion {
MemoryRegion mem;
off_t offset;
int nr;
QLIST_HEAD(, VFIOQuirk) quirks;
} VFIOVGARegion;
typedef struct VFIOVGA {
off_t fd_offset;
int fd;
VFIOVGARegion region[QEMU_PCI_VGA_NUM_REGIONS];
} VFIOVGA;
typedef struct VFIOINTx {
bool pending; /* interrupt pending */
bool kvm_accel; /* set when QEMU bypass through KVM enabled */
uint8_t pin; /* which pin to pull for qemu_set_irq */
EventNotifier interrupt; /* eventfd triggered on interrupt */
EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
PCIINTxRoute route; /* routing info for QEMU bypass */
uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
} VFIOINTx;
typedef struct VFIOMSIVector {
/*
* Two interrupt paths are configured per vector. The first, is only used
* for interrupts injected via QEMU. This is typically the non-accel path,
* but may also be used when we want QEMU to handle masking and pending
* bits. The KVM path bypasses QEMU and is therefore higher performance,
* but requires masking at the device. virq is used to track the MSI route
* through KVM, thus kvm_interrupt is only available when virq is set to a
* valid (>= 0) value.
*/
EventNotifier interrupt;
EventNotifier kvm_interrupt;
struct VFIOPCIDevice *vdev; /* back pointer to device */
int virq;
bool use;
} VFIOMSIVector;
enum {
VFIO_INT_NONE = 0,
VFIO_INT_INTx = 1,
VFIO_INT_MSI = 2,
VFIO_INT_MSIX = 3,
};
/* Cache of MSI-X setup */
typedef struct VFIOMSIXInfo {
uint8_t table_bar;
uint8_t pba_bar;
uint16_t entries;
uint32_t table_offset;
uint32_t pba_offset;
unsigned long *pending;
bool noresize;
} VFIOMSIXInfo;
#define TYPE_VFIO_PCI "vfio-pci"
OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
struct VFIOPCIDevice {
PCIDevice pdev;
VFIODevice vbasedev;
VFIOINTx intx;
unsigned int config_size;
uint8_t *emulated_config_bits; /* QEMU emulated bits, little-endian */
off_t config_offset; /* Offset of config space region within device fd */
unsigned int rom_size;
off_t rom_offset; /* Offset of ROM region within device fd */
void *rom;
int msi_cap_size;
VFIOMSIVector *msi_vectors;
VFIOMSIXInfo *msix;
int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
int interrupt; /* Current interrupt type */
VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
void *igd_opregion;
PCIHostDeviceAddress host;
QemuUUID vf_token;
EventNotifier err_notifier;
EventNotifier req_notifier;
int (*resetfn)(struct VFIOPCIDevice *);
uint32_t vendor_id;
uint32_t device_id;
uint32_t sub_vendor_id;
uint32_t sub_device_id;
uint32_t features;
#define VFIO_FEATURE_ENABLE_VGA_BIT 0
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
#define VFIO_FEATURE_ENABLE_REQ_BIT 1
#define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
#define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
#define VFIO_FEATURE_ENABLE_IGD_OPREGION \
(1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
OnOffAuto display;
uint32_t display_xres;
uint32_t display_yres;
int32_t bootindex;
uint32_t igd_gms;
OffAutoPCIBAR msix_relo;
uint8_t pm_cap;
uint8_t nv_gpudirect_clique;
bool pci_aer;
bool req_enabled;
bool has_flr;
bool has_pm_reset;
bool rom_read_failed;
bool no_kvm_intx;
bool no_kvm_msi;
bool no_kvm_msix;
bool no_geforce_quirks;
bool no_kvm_ioeventfd;
bool no_vfio_ioeventfd;
bool enable_ramfb;
OnOffAuto ramfb_migrate;
bool defer_kvm_irq_routing;
bool clear_parent_atomics_on_exit;
VFIODisplay *dpy;
Notifier irqchip_change_notifier;
};
/* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
static inline bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
{
return (vendor == PCI_ANY_ID || vendor == vdev->vendor_id) &&
(device == PCI_ANY_ID || device == vdev->device_id);
}
static inline bool vfio_is_vga(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
return class == PCI_CLASS_DISPLAY_VGA;
}
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr, uint32_t val, int len);
uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev);
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);
void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr);
void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr);
void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr);
void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
void vfio_quirk_reset(VFIOPCIDevice *vdev);
VFIOQuirk *vfio_quirk_alloc(int nr_mem);
void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
void vfio_pci_pre_reset(VFIOPCIDevice *vdev);
void vfio_pci_post_reset(VFIOPCIDevice *vdev);
bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name);
int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
struct vfio_pci_hot_reset_info **info_p);
int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
struct vfio_region_info *info,
Error **errp);
void vfio_display_reset(VFIOPCIDevice *vdev);
int vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
void vfio_display_finalize(VFIOPCIDevice *vdev);
extern const VMStateDescription vfio_display_vmstate;
#endif /* HW_VFIO_VFIO_PCI_H */
|