diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2014-12-02 12:05:44 +0100 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2015-01-13 13:43:28 +0000 |
commit | d1d1b206b07977fe0e75b0254e5b50c215c97803 (patch) | |
tree | 2926e9c04304e3ef1d016a84152a3e27cd486fc1 /coroutine-ucontext.c | |
parent | bc521696607c5348fcd8a9e57b408d0ac0dbe2f8 (diff) |
coroutine-ucontext: use __thread
ELF thread local storage is about 10% faster on tests/test-coroutine's
perf/cost test. The timing on my machine is 190ns per iteration with
pthread TLS, 170 with ELF TLS.
Based on a patch by Kevin Wolf and Peter Lieven, but redone to follow
the model of coroutine-win32.c (including the important "noinline"
attribute!).
Platforms without thread-local storage (OpenBSD probably?) will need
a new-enough GCC for this to compile, in order to use the same emutls
support that Windows already relies on.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1417518350-6167-2-git-send-email-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'coroutine-ucontext.c')
-rw-r--r-- | coroutine-ucontext.c | 69 |
1 files changed, 19 insertions, 50 deletions
diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c index 4bf2cde279..259fcb48a4 100644 --- a/coroutine-ucontext.c +++ b/coroutine-ucontext.c @@ -25,7 +25,6 @@ #include <stdlib.h> #include <setjmp.h> #include <stdint.h> -#include <pthread.h> #include <ucontext.h> #include "qemu-common.h" #include "block/coroutine_int.h" @@ -48,15 +47,8 @@ typedef struct { /** * Per-thread coroutine bookkeeping */ -typedef struct { - /** Currently executing coroutine */ - Coroutine *current; - - /** The default coroutine */ - CoroutineUContext leader; -} CoroutineThreadState; - -static pthread_key_t thread_state_key; +static __thread CoroutineUContext leader; +static __thread Coroutine *current; /* * va_args to makecontext() must be type 'int', so passing @@ -68,36 +60,6 @@ union cc_arg { int i[2]; }; -static CoroutineThreadState *coroutine_get_thread_state(void) -{ - CoroutineThreadState *s = pthread_getspecific(thread_state_key); - - if (!s) { - s = g_malloc0(sizeof(*s)); - s->current = &s->leader.base; - pthread_setspecific(thread_state_key, s); - } - return s; -} - -static void qemu_coroutine_thread_cleanup(void *opaque) -{ - CoroutineThreadState *s = opaque; - - g_free(s); -} - -static void __attribute__((constructor)) coroutine_init(void) -{ - int ret; - - ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup); - if (ret != 0) { - fprintf(stderr, "unable to create leader key: %s\n", strerror(errno)); - abort(); - } -} - static void coroutine_trampoline(int i0, int i1) { union cc_arg arg; @@ -193,15 +155,23 @@ void qemu_coroutine_delete(Coroutine *co_) g_free(co); } -CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, - CoroutineAction action) +/* This function is marked noinline to prevent GCC from inlining it + * into coroutine_trampoline(). If we allow it to do that then it + * hoists the code to get the address of the TLS variable "current" + * out of the while() loop. This is an invalid transformation because + * the sigsetjmp() call may be called when running thread A but + * return in thread B, and so we might be in a different thread + * context each time round the loop. + */ +CoroutineAction __attribute__((noinline)) +qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) { CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); - CoroutineThreadState *s = coroutine_get_thread_state(); int ret; - s->current = to_; + current = to_; ret = sigsetjmp(from->env, 0); if (ret == 0) { @@ -212,14 +182,13 @@ CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, Coroutine *qemu_coroutine_self(void) { - CoroutineThreadState *s = coroutine_get_thread_state(); - - return s->current; + if (!current) { + current = &leader.base; + } + return current; } bool qemu_in_coroutine(void) { - CoroutineThreadState *s = pthread_getspecific(thread_state_key); - - return s && s->current->caller; + return current && current->caller; } |