aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--Makefile.target2
-rw-r--r--aio.c192
-rw-r--r--block-raw-posix.c109
-rw-r--r--block-raw-win32.c13
-rw-r--r--block.c2
-rw-r--r--block.h6
-rw-r--r--qemu-aio.h45
8 files changed, 285 insertions, 86 deletions
diff --git a/Makefile b/Makefile
index 7bb2bf2045..de6393e2f9 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ BLOCK_OBJS=cutils.o qemu-malloc.o
BLOCK_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o
BLOCK_OBJS+=block-dmg.o block-bochs.o block-vpc.o block-vvfat.o
BLOCK_OBJS+=block-qcow2.o block-parallels.o block-nbd.o
-BLOCK_OBJS+=nbd.o block.o
+BLOCK_OBJS+=nbd.o block.o aio.o
ifdef CONFIG_WIN32
BLOCK_OBJS += block-raw-win32.o
diff --git a/Makefile.target b/Makefile.target
index 88e877ff4b..4a490f4f85 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -474,7 +474,7 @@ endif #CONFIG_DARWIN_USER
ifndef CONFIG_USER_ONLY
OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o
-OBJS+=fw_cfg.o
+OBJS+=fw_cfg.o aio.o
ifdef CONFIG_WIN32
OBJS+=block-raw-win32.o
else
diff --git a/aio.c b/aio.c
new file mode 100644
index 0000000000..687e4bef08
--- /dev/null
+++ b/aio.c
@@ -0,0 +1,192 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "block.h"
+#include "sys-queue.h"
+#include "qemu_socket.h"
+
+typedef struct AioHandler AioHandler;
+
+/* The list of registered AIO handlers */
+static LIST_HEAD(, AioHandler) aio_handlers;
+
+/* This is a simple lock used to protect the aio_handlers list. Specifically,
+ * it's used to ensure that no callbacks are removed while we're walking and
+ * dispatching callbacks.
+ */
+static int walking_handlers;
+
+struct AioHandler
+{
+ int fd;
+ IOHandler *io_read;
+ IOHandler *io_write;
+ AioFlushHandler *io_flush;
+ int deleted;
+ void *opaque;
+ LIST_ENTRY(AioHandler) node;
+};
+
+static AioHandler *find_aio_handler(int fd)
+{
+ AioHandler *node;
+
+ LIST_FOREACH(node, &aio_handlers, node) {
+ if (node->fd == fd)
+ return node;
+ }
+
+ return NULL;
+}
+
+int qemu_aio_set_fd_handler(int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque)
+{
+ AioHandler *node;
+
+ node = find_aio_handler(fd);
+
+ /* Are we deleting the fd handler? */
+ if (!io_read && !io_write) {
+ if (node) {
+ /* If the lock is held, just mark the node as deleted */
+ if (walking_handlers)
+ node->deleted = 1;
+ else {
+ /* Otherwise, delete it for real. We can't just mark it as
+ * deleted because deleted nodes are only cleaned up after
+ * releasing the walking_handlers lock.
+ */
+ LIST_REMOVE(node, node);
+ qemu_free(node);
+ }
+ }
+ } else {
+ if (node == NULL) {
+ /* Alloc and insert if it's not already there */
+ node = qemu_mallocz(sizeof(AioHandler));
+ if (node == NULL)
+ return -ENOMEM;
+ node->fd = fd;
+ LIST_INSERT_HEAD(&aio_handlers, node, node);
+ }
+ /* Update handler with latest information */
+ node->io_read = io_read;
+ node->io_write = io_write;
+ node->io_flush = io_flush;
+ node->opaque = opaque;
+ }
+
+ qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque);
+
+ return 0;
+}
+
+void qemu_aio_flush(void)
+{
+ AioHandler *node;
+ int ret;
+
+ do {
+ ret = 0;
+
+ LIST_FOREACH(node, &aio_handlers, node) {
+ ret |= node->io_flush(node->opaque);
+ }
+
+ qemu_aio_wait();
+ } while (ret > 0);
+}
+
+void qemu_aio_wait(void)
+{
+ int ret;
+
+ if (qemu_bh_poll())
+ return;
+
+ do {
+ AioHandler *node;
+ fd_set rdfds, wrfds;
+ int max_fd = -1;
+
+ walking_handlers = 1;
+
+ /* fill fd sets */
+ LIST_FOREACH(node, &aio_handlers, node) {
+ /* If there aren't pending AIO operations, don't invoke callbacks.
+ * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+ * wait indefinitely.
+ */
+ if (node->io_flush && node->io_flush(node->opaque) == 0)
+ continue;
+
+ if (!node->deleted && node->io_read) {
+ FD_SET(node->fd, &rdfds);
+ max_fd = MAX(max_fd, node->fd + 1);
+ }
+ if (!node->deleted && node->io_write) {
+ FD_SET(node->fd, &wrfds);
+ max_fd = MAX(max_fd, node->fd + 1);
+ }
+ }
+
+ walking_handlers = 0;
+
+ /* No AIO operations? Get us out of here */
+ if (max_fd == -1)
+ break;
+
+ /* wait until next event */
+ ret = select(max_fd, &rdfds, &wrfds, NULL, NULL);
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ /* if we have any readable fds, dispatch event */
+ if (ret > 0) {
+ walking_handlers = 1;
+
+ /* we have to walk very carefully in case
+ * qemu_aio_set_fd_handler is called while we're walking */
+ node = LIST_FIRST(&aio_handlers);
+ while (node) {
+ AioHandler *tmp;
+
+ if (!node->deleted &&
+ FD_ISSET(node->fd, &rdfds) &&
+ node->io_read) {
+ node->io_read(node->opaque);
+ }
+ if (!node->deleted &&
+ FD_ISSET(node->fd, &wrfds) &&
+ node->io_write) {
+ node->io_write(node->opaque);
+ }
+
+ tmp = node;
+ node = LIST_NEXT(node, node);
+
+ if (tmp->deleted) {
+ LIST_REMOVE(tmp, node);
+ qemu_free(tmp);
+ }
+ }
+
+ walking_handlers = 0;
+ }
+ } while (ret == 0);
+}
diff --git a/block-raw-posix.c b/block-raw-posix.c
index 26819a4d8c..41f997686f 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -101,6 +101,8 @@ typedef struct BDRVRawState {
#endif
} BDRVRawState;
+static int posix_aio_init(void);
+
static int fd_open(BlockDriverState *bs);
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
@@ -108,6 +110,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
BDRVRawState *s = bs->opaque;
int fd, open_flags, ret;
+ posix_aio_init();
+
s->lseek_err_cnt = 0;
open_flags = O_BINARY;
@@ -437,13 +441,15 @@ typedef struct RawAIOCB {
int ret;
} RawAIOCB;
-static int aio_sig_fd = -1;
-static int aio_sig_num = SIGUSR2;
-static RawAIOCB *first_aio; /* AIO issued */
-static int aio_initialized = 0;
+typedef struct PosixAioState
+{
+ int fd;
+ RawAIOCB *first_aio;
+} PosixAioState;
-static void qemu_aio_poll(void *opaque)
+static void posix_aio_read(void *opaque)
{
+ PosixAioState *s = opaque;
RawAIOCB *acb, **pacb;
int ret;
size_t offset;
@@ -457,7 +463,7 @@ static void qemu_aio_poll(void *opaque)
while (offset < 128) {
ssize_t len;
- len = read(aio_sig_fd, sig.buf + offset, 128 - offset);
+ len = read(s->fd, sig.buf + offset, 128 - offset);
if (len == -1 && errno == EINTR)
continue;
if (len == -1 && errno == EAGAIN) {
@@ -472,7 +478,7 @@ static void qemu_aio_poll(void *opaque)
}
for(;;) {
- pacb = &first_aio;
+ pacb = &s->first_aio;
for(;;) {
acb = *pacb;
if (!acb)
@@ -507,25 +513,37 @@ static void qemu_aio_poll(void *opaque)
the_end: ;
}
-void qemu_aio_init(void)
+static int posix_aio_flush(void *opaque)
{
- sigset_t mask;
+ PosixAioState *s = opaque;
+ return !!s->first_aio;
+}
- if (aio_initialized)
- return;
+static PosixAioState *posix_aio_state;
- aio_initialized = 1;
+static int posix_aio_init(void)
+{
+ sigset_t mask;
+ PosixAioState *s;
+
+ if (posix_aio_state)
+ return 0;
+
+ s = qemu_malloc(sizeof(PosixAioState));
+ if (s == NULL)
+ return -ENOMEM;
/* Make sure to block AIO signal */
sigemptyset(&mask);
- sigaddset(&mask, aio_sig_num);
+ sigaddset(&mask, SIGUSR2);
sigprocmask(SIG_BLOCK, &mask, NULL);
- aio_sig_fd = qemu_signalfd(&mask);
+ s->first_aio = NULL;
+ s->fd = qemu_signalfd(&mask);
- fcntl(aio_sig_fd, F_SETFL, O_NONBLOCK);
+ fcntl(s->fd, F_SETFL, O_NONBLOCK);
- qemu_set_fd_handler2(aio_sig_fd, NULL, qemu_aio_poll, NULL, NULL);
+ qemu_aio_set_fd_handler(s->fd, posix_aio_read, NULL, posix_aio_flush, s);
#if defined(__GLIBC__) && defined(__linux__)
{
@@ -539,39 +557,9 @@ void qemu_aio_init(void)
aio_init(&ai);
}
#endif
-}
-
-/* Wait for all IO requests to complete. */
-void qemu_aio_flush(void)
-{
- qemu_aio_poll(NULL);
- while (first_aio) {
- qemu_aio_wait();
- }
-}
-
-void qemu_aio_wait(void)
-{
- int ret;
-
- if (qemu_bh_poll())
- return;
-
- if (!first_aio)
- return;
-
- do {
- fd_set rdfds;
-
- FD_ZERO(&rdfds);
- FD_SET(aio_sig_fd, &rdfds);
+ posix_aio_state = s;
- ret = select(aio_sig_fd + 1, &rdfds, NULL, NULL, NULL);
- if (ret == -1 && errno == EINTR)
- continue;
- } while (ret == 0);
-
- qemu_aio_poll(NULL);
+ return 0;
}
static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
@@ -588,7 +576,7 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
if (!acb)
return NULL;
acb->aiocb.aio_fildes = s->fd;
- acb->aiocb.aio_sigevent.sigev_signo = aio_sig_num;
+ acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
acb->aiocb.aio_buf = buf;
if (nb_sectors < 0)
@@ -596,8 +584,8 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
else
acb->aiocb.aio_nbytes = nb_sectors * 512;
acb->aiocb.aio_offset = sector_num * 512;
- acb->next = first_aio;
- first_aio = acb;
+ acb->next = posix_aio_state->first_aio;
+ posix_aio_state->first_aio = acb;
return acb;
}
@@ -688,7 +676,7 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
}
/* remove the callback from the queue */
- pacb = &first_aio;
+ pacb = &posix_aio_state->first_aio;
for(;;) {
if (*pacb == NULL) {
break;
@@ -701,21 +689,10 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
}
}
-# else /* CONFIG_AIO */
-
-void qemu_aio_init(void)
+#else /* CONFIG_AIO */
+static int posix_aio_init(void)
{
}
-
-void qemu_aio_flush(void)
-{
-}
-
-void qemu_aio_wait(void)
-{
- qemu_bh_poll();
-}
-
#endif /* CONFIG_AIO */
static void raw_close(BlockDriverState *bs)
@@ -921,6 +898,8 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
BDRVRawState *s = bs->opaque;
int fd, open_flags, ret;
+ posix_aio_init();
+
#ifdef CONFIG_COCOA
if (strstart(filename, "/dev/cdrom", NULL)) {
kern_return_t kernResult;
diff --git a/block-raw-win32.c b/block-raw-win32.c
index 71404acb58..fd4a9e3a4c 100644
--- a/block-raw-win32.c
+++ b/block-raw-win32.c
@@ -339,19 +339,6 @@ static int raw_create(const char *filename, int64_t total_size,
return 0;
}
-void qemu_aio_init(void)
-{
-}
-
-void qemu_aio_flush(void)
-{
-}
-
-void qemu_aio_wait(void)
-{
- qemu_bh_poll();
-}
-
BlockDriver bdrv_raw = {
"raw",
sizeof(BDRVRawState),
diff --git a/block.c b/block.c
index 15f807a0ca..27b39d65d8 100644
--- a/block.c
+++ b/block.c
@@ -1310,8 +1310,6 @@ void bdrv_init(void)
bdrv_register(&bdrv_qcow2);
bdrv_register(&bdrv_parallels);
bdrv_register(&bdrv_nbd);
-
- qemu_aio_init();
}
void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
diff --git a/block.h b/block.h
index d774a2e1f6..d0e9fd49e4 100644
--- a/block.h
+++ b/block.h
@@ -1,6 +1,8 @@
#ifndef BLOCK_H
#define BLOCK_H
+#include "qemu-aio.h"
+
/* block.c */
typedef struct BlockDriver BlockDriver;
@@ -87,10 +89,6 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
BlockDriverCompletionFunc *cb, void *opaque);
void bdrv_aio_cancel(BlockDriverAIOCB *acb);
-void qemu_aio_init(void);
-void qemu_aio_flush(void);
-void qemu_aio_wait(void);
-
int qemu_key_check(BlockDriverState *bs, const char *name);
/* Ensure contents are flushed to disk. */
diff --git a/qemu-aio.h b/qemu-aio.h
new file mode 100644
index 0000000000..79678293ef
--- /dev/null
+++ b/qemu-aio.h
@@ -0,0 +1,45 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_AIO_H
+#define QEMU_AIO_H
+
+#include "qemu-common.h"
+#include "qemu-char.h"
+
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushHandler)(void *opaque);
+
+/* Flush any pending AIO operation. This function will block until all
+ * outstanding AIO operations have been completed or cancelled. */
+void qemu_aio_flush(void);
+
+/* Wait for a single AIO completion to occur. This function will until a
+ * single AIO opeartion has completed. It is intended to be used as a looping
+ * primative when simulating synchronous IO based on asynchronous IO. */
+void qemu_aio_wait(void);
+
+/* Register a file descriptor and associated callbacks. Behaves very similarly
+ * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
+ * be invoked when using either qemu_aio_wait() or qemu_aio_flush().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of qemu_set_fd_handler[2].
+ */
+int qemu_aio_set_fd_handler(int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque);
+
+#endif