From d354c7eccf5466ec2715a03d3f33dbfd6680dcc5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 23 Feb 2012 13:23:34 +0100 Subject: aio: add generic thread-pool facility Add a generic thread-pool. The code is roughly based on posix-aio-compat.c, with some changes, especially the following: - use QemuSemaphore instead of QemuCond; - separate the state of the thread from the return code of the worker function. The return code is totally opaque for the thread pool; - do not busy wait when doing cancellation. A more generic threadpool (but still specific to I/O so that in the future it can use special scheduling classes or PI mutexes) can have many uses: it allows more flexibility in raw-posix.c and can more easily be extended to Win32, and it will also be used to do an msync of the persistent bitmap. Signed-off-by: Paolo Bonzini --- Makefile.objs | 2 +- thread-pool.c | 282 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ thread-pool.h | 34 +++++++ trace-events | 5 ++ 4 files changed, 322 insertions(+), 1 deletion(-) create mode 100644 thread-pool.c create mode 100644 thread-pool.h diff --git a/Makefile.objs b/Makefile.objs index a8ade04c02..f8ae0316b8 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -43,7 +43,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o block-obj-y += nbd.o block.o blockjob.o aes.o qemu-config.o -block-obj-y += qemu-progress.o qemu-sockets.o uri.o notify.o +block-obj-y += thread-pool.o qemu-progress.o qemu-sockets.o uri.o notify.o block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y) block-obj-$(CONFIG_POSIX) += posix-aio-compat.o block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o diff --git a/thread-pool.c b/thread-pool.c new file mode 100644 index 0000000000..80749b77e0 --- /dev/null +++ b/thread-pool.c @@ -0,0 +1,282 @@ +/* + * QEMU block layer thread pool + * + * Copyright IBM, Corp. 2008 + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Anthony Liguori + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#include "qemu-common.h" +#include "qemu-queue.h" +#include "qemu-thread.h" +#include "osdep.h" +#include "qemu-coroutine.h" +#include "trace.h" +#include "block_int.h" +#include "event_notifier.h" +#include "thread-pool.h" + +static void do_spawn_thread(void); + +typedef struct ThreadPoolElement ThreadPoolElement; + +enum ThreadState { + THREAD_QUEUED, + THREAD_ACTIVE, + THREAD_DONE, + THREAD_CANCELED, +}; + +struct ThreadPoolElement { + BlockDriverAIOCB common; + ThreadPoolFunc *func; + void *arg; + enum ThreadState state; + int ret; + + /* Access to this list is protected by lock. */ + QTAILQ_ENTRY(ThreadPoolElement) reqs; + + /* Access to this list is protected by the global mutex. */ + QLIST_ENTRY(ThreadPoolElement) all; +}; + +static EventNotifier notifier; +static QemuMutex lock; +static QemuCond check_cancel; +static QemuSemaphore sem; +static int max_threads = 64; +static QEMUBH *new_thread_bh; + +/* The following variables are protected by the global mutex. */ +static QLIST_HEAD(, ThreadPoolElement) head; + +/* The following variables are protected by lock. */ +static QTAILQ_HEAD(, ThreadPoolElement) request_list; +static int cur_threads; +static int idle_threads; +static int new_threads; /* backlog of threads we need to create */ +static int pending_threads; /* threads created but not running yet */ +static int pending_cancellations; /* whether we need a cond_broadcast */ + +static void *worker_thread(void *unused) +{ + qemu_mutex_lock(&lock); + pending_threads--; + do_spawn_thread(); + + while (1) { + ThreadPoolElement *req; + int ret; + + do { + idle_threads++; + qemu_mutex_unlock(&lock); + ret = qemu_sem_timedwait(&sem, 10000); + qemu_mutex_lock(&lock); + idle_threads--; + } while (ret == -1 && !QTAILQ_EMPTY(&request_list)); + if (ret == -1) { + break; + } + + req = QTAILQ_FIRST(&request_list); + QTAILQ_REMOVE(&request_list, req, reqs); + req->state = THREAD_ACTIVE; + qemu_mutex_unlock(&lock); + + ret = req->func(req->arg); + + qemu_mutex_lock(&lock); + req->state = THREAD_DONE; + req->ret = ret; + if (pending_cancellations) { + qemu_cond_broadcast(&check_cancel); + } + + event_notifier_set(¬ifier); + } + + cur_threads--; + qemu_mutex_unlock(&lock); + return NULL; +} + +static void do_spawn_thread(void) +{ + QemuThread t; + + /* Runs with lock taken. */ + if (!new_threads) { + return; + } + + new_threads--; + pending_threads++; + + qemu_thread_create(&t, worker_thread, NULL, QEMU_THREAD_DETACHED); +} + +static void spawn_thread_bh_fn(void *opaque) +{ + qemu_mutex_lock(&lock); + do_spawn_thread(); + qemu_mutex_unlock(&lock); +} + +static void spawn_thread(void) +{ + cur_threads++; + new_threads++; + /* If there are threads being created, they will spawn new workers, so + * we don't spend time creating many threads in a loop holding a mutex or + * starving the current vcpu. + * + * If there are no idle threads, ask the main thread to create one, so we + * inherit the correct affinity instead of the vcpu affinity. + */ + if (!pending_threads) { + qemu_bh_schedule(new_thread_bh); + } +} + +static void event_notifier_ready(EventNotifier *notifier) +{ + ThreadPoolElement *elem, *next; + + event_notifier_test_and_clear(notifier); +restart: + QLIST_FOREACH_SAFE(elem, &head, all, next) { + if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { + continue; + } + if (elem->state == THREAD_DONE) { + trace_thread_pool_complete(elem, elem->common.opaque, elem->ret); + } + if (elem->state == THREAD_DONE && elem->common.cb) { + qemu_mutex_lock(&lock); + int ret = elem->ret; + qemu_mutex_unlock(&lock); + QLIST_REMOVE(elem, all); + elem->common.cb(elem->common.opaque, ret); + qemu_aio_release(elem); + goto restart; + } else { + /* remove the request */ + QLIST_REMOVE(elem, all); + qemu_aio_release(elem); + } + } +} + +static int thread_pool_active(EventNotifier *notifier) +{ + return !QLIST_EMPTY(&head); +} + +static void thread_pool_cancel(BlockDriverAIOCB *acb) +{ + ThreadPoolElement *elem = (ThreadPoolElement *)acb; + + trace_thread_pool_cancel(elem, elem->common.opaque); + + qemu_mutex_lock(&lock); + if (elem->state == THREAD_QUEUED && + /* No thread has yet started working on elem. we can try to "steal" + * the item from the worker if we can get a signal from the + * semaphore. Because this is non-blocking, we can do it with + * the lock taken and ensure that elem will remain THREAD_QUEUED. + */ + qemu_sem_timedwait(&sem, 0) == 0) { + QTAILQ_REMOVE(&request_list, elem, reqs); + elem->state = THREAD_CANCELED; + event_notifier_set(¬ifier); + } else { + pending_cancellations++; + while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { + qemu_cond_wait(&check_cancel, &lock); + } + pending_cancellations--; + } + qemu_mutex_unlock(&lock); +} + +static AIOPool thread_pool_cb_pool = { + .aiocb_size = sizeof(ThreadPoolElement), + .cancel = thread_pool_cancel, +}; + +BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, + BlockDriverCompletionFunc *cb, void *opaque) +{ + ThreadPoolElement *req; + + req = qemu_aio_get(&thread_pool_cb_pool, NULL, cb, opaque); + req->func = func; + req->arg = arg; + req->state = THREAD_QUEUED; + + QLIST_INSERT_HEAD(&head, req, all); + + trace_thread_pool_submit(req, arg); + + qemu_mutex_lock(&lock); + if (idle_threads == 0 && cur_threads < max_threads) { + spawn_thread(); + } + QTAILQ_INSERT_TAIL(&request_list, req, reqs); + qemu_mutex_unlock(&lock); + qemu_sem_post(&sem); + return &req->common; +} + +typedef struct ThreadPoolCo { + Coroutine *co; + int ret; +} ThreadPoolCo; + +static void thread_pool_co_cb(void *opaque, int ret) +{ + ThreadPoolCo *co = opaque; + + co->ret = ret; + qemu_coroutine_enter(co->co, NULL); +} + +int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) +{ + ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS }; + assert(qemu_in_coroutine()); + thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc); + qemu_coroutine_yield(); + return tpc.ret; +} + +void thread_pool_submit(ThreadPoolFunc *func, void *arg) +{ + thread_pool_submit_aio(func, arg, NULL, NULL); +} + +static void thread_pool_init(void) +{ + QLIST_INIT(&head); + event_notifier_init(¬ifier, false); + qemu_mutex_init(&lock); + qemu_cond_init(&check_cancel); + qemu_sem_init(&sem, 0); + qemu_aio_set_event_notifier(¬ifier, event_notifier_ready, + thread_pool_active); + + QTAILQ_INIT(&request_list); + new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL); +} + +block_init(thread_pool_init) diff --git a/thread-pool.h b/thread-pool.h new file mode 100644 index 0000000000..378a4ac9f9 --- /dev/null +++ b/thread-pool.h @@ -0,0 +1,34 @@ +/* + * QEMU block layer thread pool + * + * Copyright IBM, Corp. 2008 + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Anthony Liguori + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef QEMU_THREAD_POOL_H +#define QEMU_THREAD_POOL_H 1 + +#include "qemu-common.h" +#include "qemu-queue.h" +#include "qemu-thread.h" +#include "qemu-coroutine.h" +#include "block_int.h" + +typedef int ThreadPoolFunc(void *opaque); + +BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, + BlockDriverCompletionFunc *cb, void *opaque); +int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg); +void thread_pool_submit(ThreadPoolFunc *func, void *arg); + +#endif diff --git a/trace-events b/trace-events index e2d4580d4c..58c18ebb6c 100644 --- a/trace-events +++ b/trace-events @@ -90,6 +90,11 @@ virtio_blk_rw_complete(void *req, int ret) "req %p ret %d" virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" +# thread-pool.c +thread_pool_submit(void *req, void *opaque) "req %p opaque %p" +thread_pool_complete(void *req, void *opaque, int ret) "req %p opaque %p ret %d" +thread_pool_cancel(void *req, void *opaque) "req %p opaque %p" + # posix-aio-compat.c paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d" paio_complete(void *acb, void *opaque, int ret) "acb %p opaque %p ret %d" -- cgit v1.2.3