From 9ef91a677110ec200d7b2904fc4bcae5a77329ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Aug 2009 16:58:19 +0200 Subject: raw-posix: refactor AIO support Currently the raw-posix.c code contains a lot of knowledge about the asynchronous I/O scheme that is mostly implemented in posix-aio-compat.c. All this code does not really belong here and is getting a bit in the way of implementing native AIO on Linux. So instead move all the guts of the AIO implementation into posix-aio-compat.c (which might need a better name, btw). There's now a very small interface between the AIO providers and raw-posix.c: - an init routine is called from raw_open_common to return an AIO context for this drive. An AIO implementation may either re-use one context for all drives, or use a different one for each as the Linux native AIO support will do. - an submit routine is called from the aio_reav/writev methods to submit an AIO request There are no indirect calls involved in this interface as we need to decide which one to call manually. We will only call the Linux AIO native init function if we were requested to by vl.c, and we will only call the native submit function if we are asked to and the request is properly aligned. That's also the reason why the alignment check actually does the inverse move and now goes into raw-posix.c. The old posix-aio-compat.h headers is removed now that most of it's content is private to posix-aio-compat.c, and instead we add a new block/raw-posix-aio.h headers is created containing only the tiny interface between raw-posix.c and the AIO implementation. Signed-off-by: Christoph Hellwig Signed-off-by: Anthony Liguori --- block/raw-posix-aio.h | 36 +++++++ block/raw-posix.c | 275 +++++++------------------------------------------- 2 files changed, 75 insertions(+), 236 deletions(-) create mode 100644 block/raw-posix-aio.h (limited to 'block') diff --git a/block/raw-posix-aio.h b/block/raw-posix-aio.h new file mode 100644 index 0000000000..6761cd39f3 --- /dev/null +++ b/block/raw-posix-aio.h @@ -0,0 +1,36 @@ +/* + * QEMU Posix block I/O backend AIO support + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ +#ifndef QEMU_RAW_POSIX_AIO_H +#define QEMU_RAW_POSIX_AIO_H + +/* AIO request types */ +#define QEMU_AIO_READ 0x0001 +#define QEMU_AIO_WRITE 0x0002 +#define QEMU_AIO_IOCTL 0x0004 +#define QEMU_AIO_TYPE_MASK \ + (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL) + +/* AIO flags */ +#define QEMU_AIO_MISALIGNED 0x1000 + + +/* posix-aio-compat.c - thread pool based implementation */ +void *paio_init(void); +BlockDriverAIOCB *paio_submit(BlockDriverState *bs, void *aio_ctx, int fd, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type); +BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, + unsigned long int req, void *buf, + BlockDriverCompletionFunc *cb, void *opaque); + +#endif /* QEMU_RAW_POSIX_AIO_H */ diff --git a/block/raw-posix.c b/block/raw-posix.c index ab43589402..ca9bc616a7 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -27,7 +27,7 @@ #include "qemu-log.h" #include "block_int.h" #include "module.h" -#include "posix-aio-compat.h" +#include "block/raw-posix-aio.h" #ifdef CONFIG_COCOA #include @@ -107,6 +107,7 @@ typedef struct BDRVRawState { int type; unsigned int lseek_err_cnt; int open_flags; + void *aio_ctx; #if defined(__linux__) /* linux floppy specific */ int64_t fd_open_time; @@ -117,8 +118,6 @@ typedef struct BDRVRawState { uint8_t* aligned_buf; } BDRVRawState; -static int posix_aio_init(void); - static int fd_open(BlockDriverState *bs); static int64_t raw_getlength(BlockDriverState *bs); @@ -132,8 +131,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, BDRVRawState *s = bs->opaque; int fd, ret; - posix_aio_init(); - s->lseek_err_cnt = 0; s->open_flags = open_flags | O_BINARY; @@ -165,12 +162,22 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, if ((bdrv_flags & BDRV_O_NOCACHE)) { s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE); if (s->aligned_buf == NULL) { - ret = -errno; - close(fd); - return ret; + goto out_close; } } + + s->aio_ctx = paio_init(); + if (!s->aio_ctx) { + goto out_free_buf; + } + return 0; + +out_free_buf: + qemu_vfree(s->aligned_buf); +out_close: + close(fd); + return -errno; } static int raw_open(BlockDriverState *bs, const char *filename, int flags) @@ -487,240 +494,58 @@ static int raw_write(BlockDriverState *bs, int64_t sector_num, return ret; } -/***********************************************************/ -/* Unix AIO using POSIX AIO */ - -typedef struct RawAIOCB { - BlockDriverAIOCB common; - struct qemu_paiocb aiocb; - struct RawAIOCB *next; - int ret; -} RawAIOCB; - -typedef struct PosixAioState -{ - int rfd, wfd; - RawAIOCB *first_aio; -} PosixAioState; - -static void posix_aio_read(void *opaque) -{ - PosixAioState *s = opaque; - RawAIOCB *acb, **pacb; - int ret; - ssize_t len; - - /* read all bytes from signal pipe */ - for (;;) { - char bytes[16]; - - len = read(s->rfd, bytes, sizeof(bytes)); - if (len == -1 && errno == EINTR) - continue; /* try again */ - if (len == sizeof(bytes)) - continue; /* more to read */ - break; - } - - for(;;) { - pacb = &s->first_aio; - for(;;) { - acb = *pacb; - if (!acb) - goto the_end; - ret = qemu_paio_error(&acb->aiocb); - if (ret == ECANCELED) { - /* remove the request */ - *pacb = acb->next; - qemu_aio_release(acb); - } else if (ret != EINPROGRESS) { - /* end of aio */ - if (ret == 0) { - ret = qemu_paio_return(&acb->aiocb); - if (ret == acb->aiocb.aio_nbytes) - ret = 0; - else - ret = -EINVAL; - } else { - ret = -ret; - } - /* remove the request */ - *pacb = acb->next; - /* call the callback */ - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); - break; - } else { - pacb = &acb->next; - } - } - } - the_end: ; -} - -static int posix_aio_flush(void *opaque) -{ - PosixAioState *s = opaque; - return !!s->first_aio; -} - -static PosixAioState *posix_aio_state; - -static void aio_signal_handler(int signum) -{ - if (posix_aio_state) { - char byte = 0; - - write(posix_aio_state->wfd, &byte, sizeof(byte)); - } - - qemu_service_io(); -} - -static int posix_aio_init(void) +/* + * Check if all memory in this vector is sector aligned. + */ +static int qiov_is_aligned(QEMUIOVector *qiov) { - struct sigaction act; - PosixAioState *s; - int fds[2]; - struct qemu_paioinit ai; - - if (posix_aio_state) - return 0; - - s = qemu_malloc(sizeof(PosixAioState)); - - sigfillset(&act.sa_mask); - act.sa_flags = 0; /* do not restart syscalls to interrupt select() */ - act.sa_handler = aio_signal_handler; - sigaction(SIGUSR2, &act, NULL); - - s->first_aio = NULL; - if (pipe(fds) == -1) { - fprintf(stderr, "failed to create pipe\n"); - return -errno; - } - - s->rfd = fds[0]; - s->wfd = fds[1]; - - fcntl(s->rfd, F_SETFL, O_NONBLOCK); - fcntl(s->wfd, F_SETFL, O_NONBLOCK); - - qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s); - - memset(&ai, 0, sizeof(ai)); - ai.aio_threads = 64; - ai.aio_num = 64; - qemu_paio_init(&ai); - - posix_aio_state = s; - - return 0; -} + int i; -static void raw_aio_remove(RawAIOCB *acb) -{ - RawAIOCB **pacb; - - /* remove the callback from the queue */ - pacb = &posix_aio_state->first_aio; - for(;;) { - if (*pacb == NULL) { - fprintf(stderr, "raw_aio_remove: aio request not found!\n"); - break; - } else if (*pacb == acb) { - *pacb = acb->next; - qemu_aio_release(acb); - break; + for (i = 0; i < qiov->niov; i++) { + if ((uintptr_t) qiov->iov[i].iov_base % 512) { + return 0; } - pacb = &(*pacb)->next; } -} - -static void raw_aio_cancel(BlockDriverAIOCB *blockacb) -{ - int ret; - RawAIOCB *acb = (RawAIOCB *)blockacb; - ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb); - if (ret == QEMU_PAIO_NOTCANCELED) { - /* fail safe: if the aio could not be canceled, we wait for - it */ - while (qemu_paio_error(&acb->aiocb) == EINPROGRESS); - } - - raw_aio_remove(acb); + return 1; } -static AIOPool raw_aio_pool = { - .aiocb_size = sizeof(RawAIOCB), - .cancel = raw_aio_cancel, -}; - -static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type) { BDRVRawState *s = bs->opaque; - RawAIOCB *acb; if (fd_open(bs) < 0) return NULL; - acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque); - if (!acb) - return NULL; - acb->aiocb.aio_fildes = s->fd; - acb->aiocb.ev_signo = SIGUSR2; - acb->aiocb.aio_iov = qiov->iov; - acb->aiocb.aio_niov = qiov->niov; - acb->aiocb.aio_nbytes = nb_sectors * 512; - acb->aiocb.aio_offset = sector_num * 512; - acb->aiocb.aio_flags = 0; - /* * If O_DIRECT is used the buffer needs to be aligned on a sector - * boundary. Tell the low level code to ensure that in case it's - * not done yet. + * boundary. Check if this is the case or telll the low-level + * driver that it needs to copy the buffer. */ - if (s->aligned_buf) - acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED; + if (s->aligned_buf && !qiov_is_aligned(qiov)) { + type |= QEMU_AIO_MISALIGNED; + } - acb->next = posix_aio_state->first_aio; - posix_aio_state->first_aio = acb; - return acb; + return paio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov, nb_sectors, + cb, opaque, type); } static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque) { - RawAIOCB *acb; - - acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque); - if (!acb) - return NULL; - if (qemu_paio_read(&acb->aiocb) < 0) { - raw_aio_remove(acb); - return NULL; - } - return &acb->common; + return raw_aio_submit(bs, sector_num, qiov, nb_sectors, + cb, opaque, QEMU_AIO_READ); } static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque) { - RawAIOCB *acb; - - acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque); - if (!acb) - return NULL; - if (qemu_paio_write(&acb->aiocb) < 0) { - raw_aio_remove(acb); - return NULL; - } - return &acb->common; + return raw_aio_submit(bs, sector_num, qiov, nb_sectors, + cb, opaque, QEMU_AIO_WRITE); } static void raw_close(BlockDriverState *bs) @@ -1085,30 +910,10 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - RawAIOCB *acb; if (fd_open(bs) < 0) return NULL; - - acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque); - if (!acb) - return NULL; - acb->aiocb.aio_fildes = s->fd; - acb->aiocb.ev_signo = SIGUSR2; - acb->aiocb.aio_offset = 0; - acb->aiocb.aio_flags = 0; - - acb->next = posix_aio_state->first_aio; - posix_aio_state->first_aio = acb; - - acb->aiocb.aio_ioctl_buf = buf; - acb->aiocb.aio_ioctl_cmd = req; - if (qemu_paio_ioctl(&acb->aiocb) < 0) { - raw_aio_remove(acb); - return NULL; - } - - return &acb->common; + return paio_ioctl(bs, s->fd, req, buf, cb, opaque); } #elif defined(__FreeBSD__) @@ -1189,8 +994,6 @@ static int floppy_open(BlockDriverState *bs, const char *filename, int flags) BDRVRawState *s = bs->opaque; int ret; - posix_aio_init(); - s->type = FTYPE_FD; /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */ -- cgit v1.2.3