aboutsummaryrefslogtreecommitdiff
path: root/hw/i386/kvm/xen_evtchn.c
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw@amazon.co.uk>2022-12-15 20:35:24 +0000
committerDavid Woodhouse <dwmw@amazon.co.uk>2023-03-01 09:06:44 +0000
commitddf0fd9ae1fd1ff95489763b37a483adb3cd5907 (patch)
treec3295981420b48bab9de241138cea4cad4735a0f /hw/i386/kvm/xen_evtchn.c
parent507cb64d6e66d672bfddb275fe746241e0ed8db2 (diff)
hw/xen: Support HVM_PARAM_CALLBACK_TYPE_GSI callback
The GSI callback (and later PCI_INTX) is a level triggered interrupt. It is asserted when an event channel is delivered to vCPU0, and is supposed to be cleared when the vcpu_info->evtchn_upcall_pending field for vCPU0 is cleared again. Thankfully, Xen does *not* assert the GSI if the guest sets its own evtchn_upcall_pending field; we only need to assert the GSI when we have delivered an event for ourselves. So that's the easy part, kind of. There's a slight complexity in that we need to hold the BQL before we can call qemu_set_irq(), and we definitely can't do that while holding our own port_lock (because we'll need to take that from the qemu-side functions that the PV backend drivers will call). So if we end up wanting to set the IRQ in a context where we *don't* already hold the BQL, defer to a BH. However, we *do* need to poll for the evtchn_upcall_pending flag being cleared. In an ideal world we would poll that when the EOI happens on the PIC/IOAPIC. That's how it works in the kernel with the VFIO eventfd pairs — one is used to trigger the interrupt, and the other works in the other direction to 'resample' on EOI, and trigger the first eventfd again if the line is still active. However, QEMU doesn't seem to do that. Even VFIO level interrupts seem to be supported by temporarily unmapping the device's BARs from the guest when an interrupt happens, then trapping *all* MMIO to the device and sending the 'resample' event on *every* MMIO access until the IRQ is cleared! Maybe in future we'll plumb the 'resample' concept through QEMU's irq framework but for now we'll do what Xen itself does: just check the flag on every vmexit if the upcall GSI is known to be asserted. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Paul Durrant <paul@xen.org>
Diffstat (limited to 'hw/i386/kvm/xen_evtchn.c')
-rw-r--r--hw/i386/kvm/xen_evtchn.c97
1 files changed, 97 insertions, 0 deletions
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index e937de7a93..6b0bdba65d 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -27,6 +27,8 @@
#include "hw/sysbus.h"
#include "hw/xen/xen.h"
+#include "hw/i386/x86.h"
+#include "hw/irq.h"
#include "xen_evtchn.h"
#include "xen_overlay.h"
@@ -100,9 +102,12 @@ struct XenEvtchnState {
uint64_t callback_param;
bool evtchn_in_kernel;
+ QEMUBH *gsi_bh;
+
QemuMutex port_lock;
uint32_t nr_ports;
XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
+ qemu_irq gsis[IOAPIC_NUM_PINS];
};
struct XenEvtchnState *xen_evtchn_singleton;
@@ -167,13 +172,42 @@ static const TypeInfo xen_evtchn_info = {
.class_init = xen_evtchn_class_init,
};
+static void gsi_assert_bh(void *opaque)
+{
+ struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
+ if (vi) {
+ xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
+ }
+}
+
void xen_evtchn_create(void)
{
XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
-1, NULL));
+ int i;
+
xen_evtchn_singleton = s;
qemu_mutex_init(&s->port_lock);
+ s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
+
+ for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+ sysbus_init_irq(SYS_BUS_DEVICE(s), &s->gsis[i]);
+ }
+}
+
+void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
+{
+ XenEvtchnState *s = xen_evtchn_singleton;
+ int i;
+
+ if (!s) {
+ return;
+ }
+
+ for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+ sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
+ }
}
static void xen_evtchn_register_types(void)
@@ -183,6 +217,64 @@ static void xen_evtchn_register_types(void)
type_init(xen_evtchn_register_types)
+void xen_evtchn_set_callback_level(int level)
+{
+ XenEvtchnState *s = xen_evtchn_singleton;
+ uint32_t param;
+
+ if (!s) {
+ return;
+ }
+
+ /*
+ * We get to this function in a number of ways:
+ *
+ * • From I/O context, via PV backend drivers sending a notification to
+ * the guest.
+ *
+ * • From guest vCPU context, via loopback interdomain event channels
+ * (or theoretically even IPIs but guests don't use those with GSI
+ * delivery because that's pointless. We don't want a malicious guest
+ * to be able to trigger a deadlock though, so we can't rule it out.)
+ *
+ * • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
+ * configured.
+ *
+ * • From guest vCPU context in the KVM exit handler, if the upcall
+ * pending flag has been cleared and the GSI needs to be deasserted.
+ *
+ * • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
+ * been acked in the irqchip.
+ *
+ * Whichever context we come from if we aren't already holding the BQL
+ * then e can't take it now, as we may already hold s->port_lock. So
+ * trigger the BH to set the IRQ for us instead of doing it immediately.
+ *
+ * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
+ * will deliberately take the BQL because they want the change to take
+ * effect immediately. That just leaves interdomain loopback as the case
+ * which uses the BH.
+ */
+ if (!qemu_mutex_iothread_locked()) {
+ qemu_bh_schedule(s->gsi_bh);
+ return;
+ }
+
+ param = (uint32_t)s->callback_param;
+
+ switch (s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) {
+ case HVM_PARAM_CALLBACK_TYPE_GSI:
+ if (param < IOAPIC_NUM_PINS) {
+ qemu_set_irq(s->gsis[param], level);
+ if (level) {
+ /* Ensure the vCPU polls for deassertion */
+ kvm_xen_set_callback_asserted();
+ }
+ }
+ break;
+ }
+}
+
int xen_evtchn_set_callback_param(uint64_t param)
{
XenEvtchnState *s = xen_evtchn_singleton;
@@ -209,6 +301,11 @@ int xen_evtchn_set_callback_param(uint64_t param)
}
break;
}
+
+ case HVM_PARAM_CALLBACK_TYPE_GSI:
+ ret = 0;
+ break;
+
default:
/* Xen doesn't return error even if you set something bogus */
ret = 0;