aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-03-11 14:41:27 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-03-11 14:41:27 +0000
commit6e8a73e911f066527e775e04b98f31ebd19db600 (patch)
treee7e14f8a09d10c275e4d8164b8b3091fedcdbc46 /include
parentba29883206d92a29ad5a466e679ccfc2ee6132ef (diff)
parentd37d0e365afb6825a90d8356fc6adcc1f58f40f3 (diff)
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Pull request # gpg: Signature made Wed 11 Mar 2020 12:40:36 GMT # gpg: using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8 # gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full] # gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" [full] # Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35 775A 9CA4 ABB3 81AB 73C8 * remotes/stefanha/tags/block-pull-request: aio-posix: remove idle poll handlers to improve scalability aio-posix: support userspace polling of fd monitoring aio-posix: add io_uring fd monitoring implementation aio-posix: simplify FDMonOps->update() prototype aio-posix: extract ppoll(2) and epoll(7) fd monitoring aio-posix: move RCU_READ_LOCK() into run_poll_handlers() aio-posix: completely stop polling when disabled aio-posix: remove confusing QLIST_SAFE_REMOVE() qemu/queue.h: clear linked list pointers on remove Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'include')
-rw-r--r--include/block/aio.h71
-rw-r--r--include/qemu/queue.h19
2 files changed, 84 insertions, 6 deletions
diff --git a/include/block/aio.h b/include/block/aio.h
index 9dd61cee7e..cb1989105a 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -14,6 +14,9 @@
#ifndef QEMU_AIO_H
#define QEMU_AIO_H
+#ifdef CONFIG_LINUX_IO_URING
+#include <liburing.h>
+#endif
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
#include "qemu/thread.h"
@@ -52,6 +55,56 @@ struct ThreadPool;
struct LinuxAioState;
struct LuringState;
+/* Is polling disabled? */
+bool aio_poll_disabled(AioContext *ctx);
+
+/* Callbacks for file descriptor monitoring implementations */
+typedef struct {
+ /*
+ * update:
+ * @ctx: the AioContext
+ * @old_node: the existing handler or NULL if this file descriptor is being
+ * monitored for the first time
+ * @new_node: the new handler or NULL if this file descriptor is being
+ * removed
+ *
+ * Add/remove/modify a monitored file descriptor.
+ *
+ * Called with ctx->list_lock acquired.
+ */
+ void (*update)(AioContext *ctx, AioHandler *old_node, AioHandler *new_node);
+
+ /*
+ * wait:
+ * @ctx: the AioContext
+ * @ready_list: list for handlers that become ready
+ * @timeout: maximum duration to wait, in nanoseconds
+ *
+ * Wait for file descriptors to become ready and place them on ready_list.
+ *
+ * Called with ctx->list_lock incremented but not locked.
+ *
+ * Returns: number of ready file descriptors.
+ */
+ int (*wait)(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout);
+
+ /*
+ * need_wait:
+ * @ctx: the AioContext
+ *
+ * Tell aio_poll() when to stop userspace polling early because ->wait()
+ * has fds ready.
+ *
+ * File descriptor monitoring implementations that cannot poll fd readiness
+ * from userspace should use aio_poll_disabled() here. This ensures that
+ * file descriptors are not starved by handlers that frequently make
+ * progress via userspace polling.
+ *
+ * Returns: true if ->wait() should be called, false otherwise.
+ */
+ bool (*need_wait)(AioContext *ctx);
+} FDMonOps;
+
/*
* Each aio_bh_poll() call carves off a slice of the BH list, so that newly
* scheduled BHs are not processed until the next aio_bh_poll() call. All
@@ -65,6 +118,8 @@ struct BHListSlice {
QSIMPLEQ_ENTRY(BHListSlice) next;
};
+typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;
+
struct AioContext {
GSource source;
@@ -150,6 +205,10 @@ struct AioContext {
* locking.
*/
struct LuringState *linux_io_uring;
+
+ /* State for file descriptor monitoring using Linux io_uring */
+ struct io_uring fdmon_io_uring;
+ AioHandlerSList submit_list;
#endif
/* TimerLists for calling timers - one per clock type. Has its own
@@ -168,13 +227,21 @@ struct AioContext {
int64_t poll_grow; /* polling time growth factor */
int64_t poll_shrink; /* polling time shrink factor */
+ /*
+ * List of handlers participating in userspace polling. Protected by
+ * ctx->list_lock. Iterated and modified mostly by the event loop thread
+ * from aio_poll() with ctx->list_lock incremented. aio_set_fd_handler()
+ * only touches the list to delete nodes if ctx->list_lock's count is zero.
+ */
+ AioHandlerList poll_aio_handlers;
+
/* Are we in polling mode or monitoring file descriptors? */
bool poll_started;
/* epoll(7) state used when built with CONFIG_EPOLL */
int epollfd;
- bool epoll_enabled;
- bool epoll_available;
+
+ const FDMonOps *fdmon_ops;
};
/**
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index 294db54eb1..456a5b01ee 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -142,6 +142,8 @@ struct { \
(elm)->field.le_next->field.le_prev = \
(elm)->field.le_prev; \
*(elm)->field.le_prev = (elm)->field.le_next; \
+ (elm)->field.le_next = NULL; \
+ (elm)->field.le_prev = NULL; \
} while (/*CONSTCOND*/0)
/*
@@ -225,12 +227,15 @@ struct { \
} while (/*CONSTCOND*/0)
#define QSLIST_REMOVE_HEAD(head, field) do { \
- (head)->slh_first = (head)->slh_first->field.sle_next; \
+ typeof((head)->slh_first) elm = (head)->slh_first; \
+ (head)->slh_first = elm->field.sle_next; \
+ elm->field.sle_next = NULL; \
} while (/*CONSTCOND*/0)
#define QSLIST_REMOVE_AFTER(slistelm, field) do { \
- (slistelm)->field.sle_next = \
- QSLIST_NEXT(QSLIST_NEXT((slistelm), field), field); \
+ typeof(slistelm) next = (slistelm)->field.sle_next; \
+ (slistelm)->field.sle_next = next->field.sle_next; \
+ next->field.sle_next = NULL; \
} while (/*CONSTCOND*/0)
#define QSLIST_REMOVE(head, elm, type, field) do { \
@@ -241,6 +246,7 @@ struct { \
while (curelm->field.sle_next != (elm)) \
curelm = curelm->field.sle_next; \
curelm->field.sle_next = curelm->field.sle_next->field.sle_next; \
+ (elm)->field.sle_next = NULL; \
} \
} while (/*CONSTCOND*/0)
@@ -304,8 +310,10 @@ struct { \
} while (/*CONSTCOND*/0)
#define QSIMPLEQ_REMOVE_HEAD(head, field) do { \
- if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL)\
+ typeof((head)->sqh_first) elm = (head)->sqh_first; \
+ if (((head)->sqh_first = elm->field.sqe_next) == NULL) \
(head)->sqh_last = &(head)->sqh_first; \
+ elm->field.sqe_next = NULL; \
} while (/*CONSTCOND*/0)
#define QSIMPLEQ_SPLIT_AFTER(head, elm, field, removed) do { \
@@ -329,6 +337,7 @@ struct { \
if ((curelm->field.sqe_next = \
curelm->field.sqe_next->field.sqe_next) == NULL) \
(head)->sqh_last = &(curelm)->field.sqe_next; \
+ (elm)->field.sqe_next = NULL; \
} \
} while (/*CONSTCOND*/0)
@@ -446,6 +455,8 @@ union { \
(head)->tqh_circ.tql_prev = (elm)->field.tqe_circ.tql_prev; \
(elm)->field.tqe_circ.tql_prev->tql_next = (elm)->field.tqe_next; \
(elm)->field.tqe_circ.tql_prev = NULL; \
+ (elm)->field.tqe_circ.tql_next = NULL; \
+ (elm)->field.tqe_next = NULL; \
} while (/*CONSTCOND*/0)
/* remove @left, @right and all elements in between from @head */