aboutsummaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2020-02-18 18:27:08 +0000
committerStefan Hajnoczi <stefanha@redhat.com>2020-02-22 08:26:47 +0000
commitf25c0b547916962d0b1be260b5b643287bea0851 (patch)
tree15de99bb92f45f7ffd006085ccb8f5e6fcd40427 /util
parentc9b7d9ec21dfca716f0bb3b68dee75660d86629c (diff)
aio-posix: avoid reacquiring rcu_read_lock() when polling
The first rcu_read_lock/unlock() is expensive. Nested calls are cheap. This optimization increases IOPS from 73k to 162k with a Linux guest that has 2 virtio-blk,num-queues=1 and 99 virtio-blk,num-queues=32 devices. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-id: 20200218182708.914552-1-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'util')
-rw-r--r--util/aio-posix.c11
1 files changed, 11 insertions, 0 deletions
diff --git a/util/aio-posix.c b/util/aio-posix.c
index a4977f538e..f67f5b34e9 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -15,6 +15,7 @@
#include "qemu/osdep.h"
#include "block/block.h"
+#include "qemu/rcu.h"
#include "qemu/rcu_queue.h"
#include "qemu/sockets.h"
#include "qemu/cutils.h"
@@ -514,6 +515,16 @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout)
bool progress = false;
AioHandler *node;
+ /*
+ * Optimization: ->io_poll() handlers often contain RCU read critical
+ * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
+ * -> rcu_read_lock() -> ... sequences with expensive memory
+ * synchronization primitives. Make the entire polling loop an RCU
+ * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
+ * are cheap.
+ */
+ RCU_READ_LOCK_GUARD();
+
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
if (!node->deleted && node->io_poll &&
aio_node_check(ctx, node->is_external) &&