aboutsummaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorJeff Cody <jcody@redhat.com>2017-11-17 22:27:09 -0500
committerJeff Cody <jcody@redhat.com>2017-11-21 11:58:07 -0500
commit6133b39f3c36623425a6ede9e89d93175fde15cd (patch)
tree6ea888f4baa110f6e1dbf434e2ff25d275091a1f /util
parent4afeffc8572f40d8844b946a30c00b10da4442b1 (diff)
coroutine: abort if we try to schedule or enter a pending coroutine
The previous patch fixed a race condition, in which there were coroutines being executing doubly, or after coroutine deletion. We can detect common scenarios when this happens, and print an error message and abort before we corrupt memory / data, or segfault. This patch will abort if an attempt to enter a coroutine is made while it is currently pending execution, either in a specific AioContext bh, or pending execution via a timer. It will also abort if a coroutine is scheduled, before a prior scheduled run has occurred. We cannot rely on the existing co->caller check for recursive re-entry to catch this, as the coroutine may run and exit with COROUTINE_TERMINATE before the scheduled coroutine executes. (This is the scenario that was occurring and fixed in the previous patch). This patch also re-orders the Coroutine struct elements in an attempt to optimize caching. Signed-off-by: Jeff Cody <jcody@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'util')
-rw-r--r--util/async.c13
-rw-r--r--util/qemu-coroutine-sleep.c12
-rw-r--r--util/qemu-coroutine.c14
3 files changed, 39 insertions, 0 deletions
diff --git a/util/async.c b/util/async.c
index 0e1bd8780a..4dd9d95a9e 100644
--- a/util/async.c
+++ b/util/async.c
@@ -388,6 +388,9 @@ static void co_schedule_bh_cb(void *opaque)
QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
trace_aio_co_schedule_bh_cb(ctx, co);
aio_context_acquire(ctx);
+
+ /* Protected by write barrier in qemu_aio_coroutine_enter */
+ atomic_set(&co->scheduled, NULL);
qemu_coroutine_enter(co);
aio_context_release(ctx);
}
@@ -438,6 +441,16 @@ fail:
void aio_co_schedule(AioContext *ctx, Coroutine *co)
{
trace_aio_co_schedule(ctx, co);
+ const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL,
+ __func__);
+
+ if (scheduled) {
+ fprintf(stderr,
+ "%s: Co-routine was already scheduled in '%s'\n",
+ __func__, scheduled);
+ abort();
+ }
+
QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
co, co_scheduled_next);
qemu_bh_schedule(ctx->co_schedule_bh);
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
index 9c5655041b..254349cdbb 100644
--- a/util/qemu-coroutine-sleep.c
+++ b/util/qemu-coroutine-sleep.c
@@ -13,6 +13,7 @@
#include "qemu/osdep.h"
#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
#include "qemu/timer.h"
#include "block/aio.h"
@@ -25,6 +26,8 @@ static void co_sleep_cb(void *opaque)
{
CoSleepCB *sleep_cb = opaque;
+ /* Write of schedule protected by barrier write in aio_co_schedule */
+ atomic_set(&sleep_cb->co->scheduled, NULL);
aio_co_wake(sleep_cb->co);
}
@@ -34,6 +37,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
CoSleepCB sleep_cb = {
.co = qemu_coroutine_self(),
};
+
+ const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL,
+ __func__);
+ if (scheduled) {
+ fprintf(stderr,
+ "%s: Co-routine was already scheduled in '%s'\n",
+ __func__, scheduled);
+ abort();
+ }
sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
qemu_coroutine_yield();
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index d6095c1d5a..9eff7fd450 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -107,8 +107,22 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
Coroutine *self = qemu_coroutine_self();
CoroutineAction ret;
+ /* Cannot rely on the read barrier for co in aio_co_wake(), as there are
+ * callers outside of aio_co_wake() */
+ const char *scheduled = atomic_mb_read(&co->scheduled);
+
trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
+ /* if the Coroutine has already been scheduled, entering it again will
+ * cause us to enter it twice, potentially even after the coroutine has
+ * been deleted */
+ if (scheduled) {
+ fprintf(stderr,
+ "%s: Co-routine was already scheduled in '%s'\n",
+ __func__, scheduled);
+ abort();
+ }
+
if (co->caller) {
fprintf(stderr, "Co-routine re-entered recursively\n");
abort();