diff options
-rw-r--r-- | aio-posix.c | 184 | ||||
-rw-r--r-- | include/block/aio.h | 5 |
2 files changed, 188 insertions, 1 deletions
diff --git a/aio-posix.c b/aio-posix.c index 5bff3cd670..06148a9ba3 100644 --- a/aio-posix.c +++ b/aio-posix.c @@ -17,6 +17,9 @@ #include "block/block.h" #include "qemu/queue.h" #include "qemu/sockets.h" +#ifdef CONFIG_EPOLL +#include <sys/epoll.h> +#endif struct AioHandler { @@ -29,6 +32,162 @@ struct AioHandler QLIST_ENTRY(AioHandler) node; }; +#ifdef CONFIG_EPOLL + +/* The fd number threashold to switch to epoll */ +#define EPOLL_ENABLE_THRESHOLD 64 + +static void aio_epoll_disable(AioContext *ctx) +{ + ctx->epoll_available = false; + if (!ctx->epoll_enabled) { + return; + } + ctx->epoll_enabled = false; + close(ctx->epollfd); +} + +static inline int epoll_events_from_pfd(int pfd_events) +{ + return (pfd_events & G_IO_IN ? EPOLLIN : 0) | + (pfd_events & G_IO_OUT ? EPOLLOUT : 0) | + (pfd_events & G_IO_HUP ? EPOLLHUP : 0) | + (pfd_events & G_IO_ERR ? EPOLLERR : 0); +} + +static bool aio_epoll_try_enable(AioContext *ctx) +{ + AioHandler *node; + struct epoll_event event; + + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + int r; + if (node->deleted || !node->pfd.events) { + continue; + } + event.events = epoll_events_from_pfd(node->pfd.events); + event.data.ptr = node; + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event); + if (r) { + return false; + } + } + ctx->epoll_enabled = true; + return true; +} + +static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new) +{ + struct epoll_event event; + int r; + + if (!ctx->epoll_enabled) { + return; + } + if (!node->pfd.events) { + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event); + if (r) { + aio_epoll_disable(ctx); + } + } else { + event.data.ptr = node; + event.events = epoll_events_from_pfd(node->pfd.events); + if (is_new) { + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event); + if (r) { + aio_epoll_disable(ctx); + } + } else { + r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event); + if (r) { + aio_epoll_disable(ctx); + } + } + } +} + +static int aio_epoll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + AioHandler *node; + int i, ret = 0; + struct epoll_event events[128]; + + assert(npfd == 1); + assert(pfds[0].fd == ctx->epollfd); + if (timeout > 0) { + ret = qemu_poll_ns(pfds, npfd, timeout); + } + if (timeout <= 0 || ret > 0) { + ret = epoll_wait(ctx->epollfd, events, + sizeof(events) / sizeof(events[0]), + timeout); + if (ret <= 0) { + goto out; + } + for (i = 0; i < ret; i++) { + int ev = events[i].events; + node = events[i].data.ptr; + node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) | + (ev & EPOLLOUT ? G_IO_OUT : 0) | + (ev & EPOLLHUP ? G_IO_HUP : 0) | + (ev & EPOLLERR ? G_IO_ERR : 0); + } + } +out: + return ret; +} + +static bool aio_epoll_enabled(AioContext *ctx) +{ + /* Fall back to ppoll when external clients are disabled. */ + return !aio_external_disabled(ctx) && ctx->epoll_enabled; +} + +static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + if (!ctx->epoll_available) { + return false; + } + if (aio_epoll_enabled(ctx)) { + return true; + } + if (npfd >= EPOLL_ENABLE_THRESHOLD) { + if (aio_epoll_try_enable(ctx)) { + return true; + } else { + aio_epoll_disable(ctx); + } + } + return false; +} + +#else + +static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new) +{ +} + +static int aio_epoll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + assert(false); +} + +static bool aio_epoll_enabled(AioContext *ctx) +{ + return false; +} + +static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds, + unsigned npfd, int64_t timeout) +{ + return false; +} + +#endif + static AioHandler *find_aio_handler(AioContext *ctx, int fd) { AioHandler *node; @@ -50,6 +209,7 @@ void aio_set_fd_handler(AioContext *ctx, void *opaque) { AioHandler *node; + bool is_new = false; node = find_aio_handler(ctx, fd); @@ -79,6 +239,7 @@ void aio_set_fd_handler(AioContext *ctx, QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); g_source_add_poll(&ctx->source, &node->pfd); + is_new = true; } /* Update handler with latest information */ node->io_read = io_read; @@ -90,6 +251,7 @@ void aio_set_fd_handler(AioContext *ctx, node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0); } + aio_epoll_update(ctx, node, is_new); aio_notify(ctx); } @@ -262,6 +424,7 @@ bool aio_poll(AioContext *ctx, bool blocking) /* fill pollfds */ QLIST_FOREACH(node, &ctx->aio_handlers, node) { if (!node->deleted && node->pfd.events + && !aio_epoll_enabled(ctx) && aio_node_check(ctx, node->is_external)) { add_pollfd(node); } @@ -273,7 +436,17 @@ bool aio_poll(AioContext *ctx, bool blocking) if (timeout) { aio_context_release(ctx); } - ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout); + if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) { + AioHandler epoll_handler; + + epoll_handler.pfd.fd = ctx->epollfd; + epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR; + npfd = 0; + add_pollfd(&epoll_handler); + ret = aio_epoll(ctx, pollfds, npfd, timeout); + } else { + ret = qemu_poll_ns(pollfds, npfd, timeout); + } if (blocking) { atomic_sub(&ctx->notify_me, 2); } @@ -305,4 +478,13 @@ bool aio_poll(AioContext *ctx, bool blocking) void aio_context_setup(AioContext *ctx, Error **errp) { +#ifdef CONFIG_EPOLL + assert(!ctx->epollfd); + ctx->epollfd = epoll_create1(EPOLL_CLOEXEC); + if (ctx->epollfd == -1) { + ctx->epoll_available = false; + } else { + ctx->epoll_available = true; + } +#endif } diff --git a/include/block/aio.h b/include/block/aio.h index 9ffecf7977..e086e3b4ee 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -124,6 +124,11 @@ struct AioContext { QEMUTimerListGroup tlg; int external_disable_cnt; + + /* epoll(7) state used when built with CONFIG_EPOLL */ + int epollfd; + bool epoll_enabled; + bool epoll_available; }; /** |