diff options
-rw-r--r-- | hw/vfio/platform.c | 331 | ||||
-rw-r--r-- | include/hw/vfio/vfio-platform.h | 31 | ||||
-rw-r--r-- | trace-events | 7 |
3 files changed, 368 insertions, 1 deletions
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 569a675cb2..35266a813c 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -22,10 +22,299 @@ #include "qemu/range.h" #include "sysemu/sysemu.h" #include "exec/memory.h" +#include "qemu/queue.h" #include "hw/sysbus.h" #include "trace.h" #include "hw/platform-bus.h" +/* + * Functions used whatever the injection method + */ + +/** + * vfio_init_intp - allocate, initialize the IRQ struct pointer + * and add it into the list of IRQs + * @vbasedev: the VFIO device handle + * @info: irq info struct retrieved from VFIO driver + */ +static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, + struct vfio_irq_info info) +{ + int ret; + VFIOPlatformDevice *vdev = + container_of(vbasedev, VFIOPlatformDevice, vbasedev); + SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev); + VFIOINTp *intp; + + intp = g_malloc0(sizeof(*intp)); + intp->vdev = vdev; + intp->pin = info.index; + intp->flags = info.flags; + intp->state = VFIO_IRQ_INACTIVE; + + sysbus_init_irq(sbdev, &intp->qemuirq); + + /* Get an eventfd for trigger */ + ret = event_notifier_init(&intp->interrupt, 0); + if (ret) { + g_free(intp); + error_report("vfio: Error: trigger event_notifier_init failed "); + return NULL; + } + + QLIST_INSERT_HEAD(&vdev->intp_list, intp, next); + return intp; +} + +/** + * vfio_set_trigger_eventfd - set VFIO eventfd handling + * + * @intp: IRQ struct handle + * @handler: handler to be called on eventfd signaling + * + * Setup VFIO signaling and attach an optional user-side handler + * to the eventfd + */ +static int vfio_set_trigger_eventfd(VFIOINTp *intp, + eventfd_user_side_handler_t handler) +{ + VFIODevice *vbasedev = &intp->vdev->vbasedev; + struct vfio_irq_set *irq_set; + int argsz, ret; + int32_t *pfd; + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = intp->pin; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *)&irq_set->data; + *pfd = event_notifier_get_fd(&intp->interrupt); + qemu_set_fd_handler(*pfd, (IOHandler *)handler, NULL, intp); + ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + g_free(irq_set); + if (ret < 0) { + error_report("vfio: Failed to set trigger eventfd: %m"); + qemu_set_fd_handler(*pfd, NULL, NULL, NULL); + } + return ret; +} + +/* + * Functions only used when eventfds are handled on user-side + * ie. without irqfd + */ + +/** + * vfio_mmap_set_enabled - enable/disable the fast path mode + * @vdev: the VFIO platform device + * @enabled: the target mmap state + * + * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP); + * enabled = false ~ slow path = MMIO region is trapped and region callbacks + * are called; slow path enables to trap the device IRQ status register reset +*/ + +static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled) +{ + int i; + + trace_vfio_platform_mmap_set_enabled(enabled); + + for (i = 0; i < vdev->vbasedev.num_regions; i++) { + VFIORegion *region = vdev->regions[i]; + + memory_region_set_enabled(®ion->mmap_mem, enabled); + } +} + +/** + * vfio_intp_mmap_enable - timer function, restores the fast path + * if there is no more active IRQ + * @opaque: actually points to the VFIO platform device + * + * Called on mmap timer timout, this function checks whether the + * IRQ is still active and if not, restores the fast path. + * by construction a single eventfd is handled at a time. + * if the IRQ is still active, the timer is re-programmed. + */ +static void vfio_intp_mmap_enable(void *opaque) +{ + VFIOINTp *tmp; + VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque; + + qemu_mutex_lock(&vdev->intp_mutex); + QLIST_FOREACH(tmp, &vdev->intp_list, next) { + if (tmp->state == VFIO_IRQ_ACTIVE) { + trace_vfio_platform_intp_mmap_enable(tmp->pin); + /* re-program the timer to check active status later */ + timer_mod(vdev->mmap_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + vdev->mmap_timeout); + qemu_mutex_unlock(&vdev->intp_mutex); + return; + } + } + vfio_mmap_set_enabled(vdev, true); + qemu_mutex_unlock(&vdev->intp_mutex); +} + +/** + * vfio_intp_inject_pending_lockheld - Injects a pending IRQ + * @opaque: opaque pointer, in practice the VFIOINTp handle + * + * The function is called on a previous IRQ completion, from + * vfio_platform_eoi, while the intp_mutex is locked. + * Also in such situation, the slow path already is set and + * the mmap timer was already programmed. + */ +static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp) +{ + trace_vfio_platform_intp_inject_pending_lockheld(intp->pin, + event_notifier_get_fd(&intp->interrupt)); + + intp->state = VFIO_IRQ_ACTIVE; + + /* trigger the virtual IRQ */ + qemu_set_irq(intp->qemuirq, 1); +} + +/** + * vfio_intp_interrupt - The user-side eventfd handler + * @opaque: opaque pointer which in practice is the VFIOINTp handle + * + * the function is entered in event handler context: + * the vIRQ is injected into the guest if there is no other active + * or pending IRQ. + */ +static void vfio_intp_interrupt(VFIOINTp *intp) +{ + int ret; + VFIOINTp *tmp; + VFIOPlatformDevice *vdev = intp->vdev; + bool delay_handling = false; + + qemu_mutex_lock(&vdev->intp_mutex); + if (intp->state == VFIO_IRQ_INACTIVE) { + QLIST_FOREACH(tmp, &vdev->intp_list, next) { + if (tmp->state == VFIO_IRQ_ACTIVE || + tmp->state == VFIO_IRQ_PENDING) { + delay_handling = true; + break; + } + } + } + if (delay_handling) { + /* + * the new IRQ gets a pending status and is pushed in + * the pending queue + */ + intp->state = VFIO_IRQ_PENDING; + trace_vfio_intp_interrupt_set_pending(intp->pin); + QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue, + intp, pqnext); + ret = event_notifier_test_and_clear(&intp->interrupt); + qemu_mutex_unlock(&vdev->intp_mutex); + return; + } + + trace_vfio_platform_intp_interrupt(intp->pin, + event_notifier_get_fd(&intp->interrupt)); + + ret = event_notifier_test_and_clear(&intp->interrupt); + if (!ret) { + error_report("Error when clearing fd=%d (ret = %d)\n", + event_notifier_get_fd(&intp->interrupt), ret); + } + + intp->state = VFIO_IRQ_ACTIVE; + + /* sets slow path */ + vfio_mmap_set_enabled(vdev, false); + + /* trigger the virtual IRQ */ + qemu_set_irq(intp->qemuirq, 1); + + /* + * Schedule the mmap timer which will restore fastpath when no IRQ + * is active anymore + */ + if (vdev->mmap_timeout) { + timer_mod(vdev->mmap_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + vdev->mmap_timeout); + } + qemu_mutex_unlock(&vdev->intp_mutex); +} + +/** + * vfio_platform_eoi - IRQ completion routine + * @vbasedev: the VFIO device handle + * + * De-asserts the active virtual IRQ and unmasks the physical IRQ + * (effective for level sensitive IRQ auto-masked by the VFIO driver). + * Then it handles next pending IRQ if any. + * eoi function is called on the first access to any MMIO region + * after an IRQ was triggered, trapped since slow path was set. + * It is assumed this access corresponds to the IRQ status + * register reset. With such a mechanism, a single IRQ can be + * handled at a time since there is no way to know which IRQ + * was completed by the guest (we would need additional details + * about the IRQ status register mask). + */ +static void vfio_platform_eoi(VFIODevice *vbasedev) +{ + VFIOINTp *intp; + VFIOPlatformDevice *vdev = + container_of(vbasedev, VFIOPlatformDevice, vbasedev); + + qemu_mutex_lock(&vdev->intp_mutex); + QLIST_FOREACH(intp, &vdev->intp_list, next) { + if (intp->state == VFIO_IRQ_ACTIVE) { + trace_vfio_platform_eoi(intp->pin, + event_notifier_get_fd(&intp->interrupt)); + intp->state = VFIO_IRQ_INACTIVE; + + /* deassert the virtual IRQ */ + qemu_set_irq(intp->qemuirq, 0); + + if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) { + /* unmasks the physical level-sensitive IRQ */ + vfio_unmask_single_irqindex(vbasedev, intp->pin); + } + + /* a single IRQ can be active at a time */ + break; + } + } + /* in case there are pending IRQs, handle the first one */ + if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) { + intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue); + vfio_intp_inject_pending_lockheld(intp); + QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext); + } + qemu_mutex_unlock(&vdev->intp_mutex); +} + +/** + * vfio_start_eventfd_injection - starts the virtual IRQ injection using + * user-side handled eventfds + * @intp: the IRQ struct pointer + */ + +static int vfio_start_eventfd_injection(VFIOINTp *intp) +{ + int ret; + + ret = vfio_set_trigger_eventfd(intp, vfio_intp_interrupt); + if (ret) { + error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m"); + } + return ret; +} + /* VFIO skeleton */ static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev) @@ -41,12 +330,13 @@ static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev) /** * vfio_populate_device - Allocate and populate MMIO region - * structs according to driver returned information + * and IRQ structs according to driver returned information * @vbasedev: the VFIO device handle * */ static int vfio_populate_device(VFIODevice *vbasedev) { + VFIOINTp *intp, *tmp; int i, ret = -1; VFIOPlatformDevice *vdev = container_of(vbasedev, VFIOPlatformDevice, vbasedev); @@ -84,7 +374,38 @@ static int vfio_populate_device(VFIODevice *vbasedev) (unsigned long)ptr->fd_offset); } + vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + vfio_intp_mmap_enable, vdev); + + QSIMPLEQ_INIT(&vdev->pending_intp_queue); + + for (i = 0; i < vbasedev->num_irqs; i++) { + struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + + irq.index = i; + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); + if (ret) { + error_printf("vfio: error getting device %s irq info", + vbasedev->name); + goto irq_err; + } else { + trace_vfio_platform_populate_interrupts(irq.index, + irq.count, + irq.flags); + intp = vfio_init_intp(vbasedev, irq); + if (!intp) { + error_report("vfio: Error installing IRQ %d up", i); + goto irq_err; + } + } + } return 0; +irq_err: + timer_del(vdev->mmap_timer); + QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) { + QLIST_REMOVE(intp, next); + g_free(intp); + } reg_error: for (i = 0; i < vbasedev->num_regions; i++) { g_free(vdev->regions[i]); @@ -97,6 +418,7 @@ reg_error: static VFIODeviceOps vfio_platform_ops = { .vfio_compute_needs_reset = vfio_platform_compute_needs_reset, .vfio_hot_reset_multi = vfio_platform_hot_reset_multi, + .vfio_eoi = vfio_platform_eoi, }; /** @@ -228,6 +550,7 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev); SysBusDevice *sbdev = SYS_BUS_DEVICE(dev); VFIODevice *vbasedev = &vdev->vbasedev; + VFIOINTp *intp; int i, ret; vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; @@ -246,6 +569,10 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) vfio_map_region(vdev, i); sysbus_init_mmio(sbdev, &vdev->regions[i]->mem); } + + QLIST_FOREACH(intp, &vdev->intp_list, next) { + vfio_start_eventfd_injection(intp); + } } static const VMStateDescription vfio_platform_vmstate = { @@ -256,6 +583,8 @@ static const VMStateDescription vfio_platform_vmstate = { static Property vfio_platform_dev_properties[] = { DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name), DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true), + DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, + mmap_timeout, 1100), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h index 338f0c6163..26b2ad6f4e 100644 --- a/include/hw/vfio/vfio-platform.h +++ b/include/hw/vfio/vfio-platform.h @@ -18,14 +18,45 @@ #include "hw/sysbus.h" #include "hw/vfio/vfio-common.h" +#include "qemu/event_notifier.h" +#include "qemu/queue.h" +#include "hw/irq.h" #define TYPE_VFIO_PLATFORM "vfio-platform" +enum { + VFIO_IRQ_INACTIVE = 0, + VFIO_IRQ_PENDING = 1, + VFIO_IRQ_ACTIVE = 2, + /* VFIO_IRQ_ACTIVE_AND_PENDING cannot happen with VFIO */ +}; + +typedef struct VFIOINTp { + QLIST_ENTRY(VFIOINTp) next; /* entry for IRQ list */ + QSIMPLEQ_ENTRY(VFIOINTp) pqnext; /* entry for pending IRQ queue */ + EventNotifier interrupt; /* eventfd triggered on interrupt */ + EventNotifier unmask; /* eventfd for unmask on QEMU bypass */ + qemu_irq qemuirq; + struct VFIOPlatformDevice *vdev; /* back pointer to device */ + int state; /* inactive, pending, active */ + uint8_t pin; /* index */ + uint32_t flags; /* IRQ info flags */ +} VFIOINTp; + +/* function type for user side eventfd handler */ +typedef void (*eventfd_user_side_handler_t)(VFIOINTp *intp); + typedef struct VFIOPlatformDevice { SysBusDevice sbdev; VFIODevice vbasedev; /* not a QOM object */ VFIORegion **regions; + QLIST_HEAD(, VFIOINTp) intp_list; /* list of IRQs */ + /* queue of pending IRQs */ + QSIMPLEQ_HEAD(pending_intp_queue, VFIOINTp) pending_intp_queue; char *compat; /* compatibility string */ + uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */ + QEMUTimer *mmap_timer; /* allows fast-path resume after IRQ hit */ + QemuMutex intp_mutex; /* protect the intp_list IRQ state */ } VFIOPlatformDevice; typedef struct VFIOPlatformDeviceClass { diff --git a/trace-events b/trace-events index e1a34ec8ee..7af9f19a33 100644 --- a/trace-events +++ b/trace-events @@ -1568,6 +1568,13 @@ vfio_put_base_device(int fd) "close vdev->fd=%d" vfio_platform_populate_regions(int region_index, unsigned long flag, unsigned long size, int fd, unsigned long offset) "- region %d flags = 0x%lx, size = 0x%lx, fd= %d, offset = 0x%lx" vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s" +vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)" +vfio_platform_mmap_set_enabled(bool enabled) "fast path = %d" +vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow path" +vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)" +vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)" +vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d: count %d, flags=0x%x" +vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING" #hw/acpi/memory_hotplug.c mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32 |