aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>2009-04-07 18:43:24 +0000
committeraliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>2009-04-07 18:43:24 +0000
commitf141eafe286c785f7e2c1e312a73f90d66bdfb90 (patch)
tree049474e891e06deec79bd3d58d49b49ba627276a
parentc87c0672936a5b825c0b95bb1d7f5d6dc92294e4 (diff)
push down vector linearization to posix-aio-compat.c (Christoph Hellwig)
Make all AIO requests vectored and defer linearization until the actual I/O thread. This prepares for using native preadv/pwritev. Also enables asynchronous direct I/O by handling that case in the I/O thread. Qcow and qcow2 propably want to be adopted to directly deal with multi-segment requests, but that can be implemented later. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@7020 c046a42c-6fe2-441c-8c8c-71466251a162
-rw-r--r--block-qcow.c84
-rw-r--r--block-qcow2.c93
-rw-r--r--block-raw-posix.c93
-rw-r--r--block.c196
-rw-r--r--block_int.h8
-rw-r--r--posix-aio-compat.c118
-rw-r--r--posix-aio-compat.h9
7 files changed, 317 insertions, 284 deletions
diff --git a/block-qcow.c b/block-qcow.c
index 2decd13e76..b60f4c1921 100644
--- a/block-qcow.c
+++ b/block-qcow.c
@@ -525,7 +525,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
typedef struct QCowAIOCB {
BlockDriverAIOCB common;
int64_t sector_num;
+ QEMUIOVector *qiov;
uint8_t *buf;
+ void *orig_buf;
int nb_sectors;
int n;
uint64_t cluster_offset;
@@ -543,12 +545,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
int index_in_cluster;
acb->hd_aiocb = NULL;
- if (ret < 0) {
- fail:
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
- return;
- }
+ if (ret < 0)
+ goto done;
redo:
/* post process the read buffer */
@@ -570,9 +568,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
if (acb->nb_sectors == 0) {
/* request completed */
- acb->common.cb(acb->common.opaque, 0);
- qemu_aio_release(acb);
- return;
+ ret = 0;
+ goto done;
}
/* prepare next AIO request */
@@ -592,7 +589,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
&acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
} else {
/* Note: in this case, no need to wait */
memset(acb->buf, 0, 512 * acb->n);
@@ -601,14 +598,14 @@ static void qcow_aio_read_cb(void *opaque, int ret)
} else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* add AIO support for compressed blocks ? */
if (decompress_cluster(s, acb->cluster_offset) < 0)
- goto fail;
+ goto done;
memcpy(acb->buf,
s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
goto redo;
} else {
if ((acb->cluster_offset & 511) != 0) {
ret = -EIO;
- goto fail;
+ goto done;
}
acb->hd_iov.iov_base = acb->buf;
acb->hd_iov.iov_len = acb->n * 512;
@@ -617,12 +614,22 @@ static void qcow_aio_read_cb(void *opaque, int ret)
(acb->cluster_offset >> 9) + index_in_cluster,
&acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
+ }
+
+ return;
+
+done:
+ if (acb->qiov->niov > 1) {
+ qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
+ qemu_vfree(acb->orig_buf);
}
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
}
-static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
QCowAIOCB *acb;
@@ -632,7 +639,11 @@ static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
return NULL;
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
- acb->buf = buf;
+ acb->qiov = qiov;
+ if (qiov->niov > 1)
+ acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
+ else
+ acb->buf = qiov->iov->iov_base;
acb->nb_sectors = nb_sectors;
acb->n = 0;
acb->cluster_offset = 0;
@@ -652,12 +663,8 @@ static void qcow_aio_write_cb(void *opaque, int ret)
acb->hd_aiocb = NULL;
- if (ret < 0) {
- fail:
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
- return;
- }
+ if (ret < 0)
+ goto done;
acb->nb_sectors -= acb->n;
acb->sector_num += acb->n;
@@ -665,9 +672,8 @@ static void qcow_aio_write_cb(void *opaque, int ret)
if (acb->nb_sectors == 0) {
/* request completed */
- acb->common.cb(acb->common.opaque, 0);
- qemu_aio_release(acb);
- return;
+ ret = 0;
+ goto done;
}
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -679,14 +685,14 @@ static void qcow_aio_write_cb(void *opaque, int ret)
index_in_cluster + acb->n);
if (!cluster_offset || (cluster_offset & 511) != 0) {
ret = -EIO;
- goto fail;
+ goto done;
}
if (s->crypt_method) {
if (!acb->cluster_data) {
acb->cluster_data = qemu_mallocz(s->cluster_size);
if (!acb->cluster_data) {
ret = -ENOMEM;
- goto fail;
+ goto done;
}
}
encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
@@ -704,11 +710,18 @@ static void qcow_aio_write_cb(void *opaque, int ret)
&acb->hd_qiov, acb->n,
qcow_aio_write_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
+ return;
+
+done:
+ if (acb->qiov->niov > 1)
+ qemu_vfree(acb->orig_buf);
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
}
-static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVQcowState *s = bs->opaque;
@@ -721,7 +734,12 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
return NULL;
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
- acb->buf = (uint8_t *)buf;
+ acb->qiov = qiov;
+ if (qiov->niov > 1) {
+ acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
+ qemu_iovec_to_buffer(qiov, acb->buf);
+ } else
+ acb->buf = qiov->iov->iov_base;
acb->nb_sectors = nb_sectors;
acb->n = 0;
@@ -909,8 +927,8 @@ BlockDriver bdrv_qcow = {
.bdrv_is_allocated = qcow_is_allocated,
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
- .bdrv_aio_read = qcow_aio_read,
- .bdrv_aio_write = qcow_aio_write,
+ .bdrv_aio_readv = qcow_aio_readv,
+ .bdrv_aio_writev = qcow_aio_writev,
.bdrv_aio_cancel = qcow_aio_cancel,
.aiocb_size = sizeof(QCowAIOCB),
.bdrv_write_compressed = qcow_write_compressed,
diff --git a/block-qcow2.c b/block-qcow2.c
index dd28c28e5f..3bd38b0d9d 100644
--- a/block-qcow2.c
+++ b/block-qcow2.c
@@ -1264,7 +1264,9 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
typedef struct QCowAIOCB {
BlockDriverAIOCB common;
int64_t sector_num;
+ QEMUIOVector *qiov;
uint8_t *buf;
+ void *orig_buf;
int nb_sectors;
int n;
uint64_t cluster_offset;
@@ -1307,12 +1309,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
int index_in_cluster, n1;
acb->hd_aiocb = NULL;
- if (ret < 0) {
-fail:
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
- return;
- }
+ if (ret < 0)
+ goto done;
/* post process the read buffer */
if (!acb->cluster_offset) {
@@ -1333,9 +1331,8 @@ fail:
if (acb->nb_sectors == 0) {
/* request completed */
- acb->common.cb(acb->common.opaque, 0);
- qemu_aio_release(acb);
- return;
+ ret = 0;
+ goto done;
}
/* prepare next AIO request */
@@ -1356,32 +1353,32 @@ fail:
&acb->hd_qiov, acb->n,
qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
} else {
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
- goto fail;
+ goto done;
}
} else {
/* Note: in this case, no need to wait */
memset(acb->buf, 0, 512 * acb->n);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
- goto fail;
+ goto done;
}
} else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* add AIO support for compressed blocks ? */
if (decompress_cluster(s, acb->cluster_offset) < 0)
- goto fail;
+ goto done;
memcpy(acb->buf,
s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
- goto fail;
+ goto done;
} else {
if ((acb->cluster_offset & 511) != 0) {
ret = -EIO;
- goto fail;
+ goto done;
}
acb->hd_iov.iov_base = acb->buf;
@@ -1391,13 +1388,22 @@ fail:
(acb->cluster_offset >> 9) + index_in_cluster,
&acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
+ }
+
+ return;
+done:
+ if (acb->qiov->niov > 1) {
+ qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
+ qemu_vfree(acb->orig_buf);
}
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
}
static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque, int is_write)
{
QCowAIOCB *acb;
@@ -1406,7 +1412,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
return NULL;
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
- acb->buf = buf;
+ acb->qiov = qiov;
+ if (qiov->niov > 1) {
+ acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
+ if (is_write)
+ qemu_iovec_to_buffer(qiov, acb->buf);
+ } else
+ acb->buf = qiov->iov->iov_base;
acb->nb_sectors = nb_sectors;
acb->n = 0;
acb->cluster_offset = 0;
@@ -1414,13 +1426,13 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
return acb;
}
-static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
QCowAIOCB *acb;
- acb = qcow_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
+ acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
if (!acb)
return NULL;
@@ -1439,16 +1451,12 @@ static void qcow_aio_write_cb(void *opaque, int ret)
acb->hd_aiocb = NULL;
- if (ret < 0) {
- fail:
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
- return;
- }
+ if (ret < 0)
+ goto done;
if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
- goto fail;
+ goto done;
}
acb->nb_sectors -= acb->n;
@@ -1457,9 +1465,8 @@ static void qcow_aio_write_cb(void *opaque, int ret)
if (acb->nb_sectors == 0) {
/* request completed */
- acb->common.cb(acb->common.opaque, 0);
- qemu_aio_release(acb);
- return;
+ ret = 0;
+ goto done;
}
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -1473,7 +1480,7 @@ static void qcow_aio_write_cb(void *opaque, int ret)
n_end, &acb->n, &acb->l2meta);
if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
ret = -EIO;
- goto fail;
+ goto done;
}
if (s->crypt_method) {
if (!acb->cluster_data) {
@@ -1494,11 +1501,19 @@ static void qcow_aio_write_cb(void *opaque, int ret)
&acb->hd_qiov, acb->n,
qcow_aio_write_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
+
+ return;
+
+done:
+ if (acb->qiov->niov > 1)
+ qemu_vfree(acb->orig_buf);
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
}
-static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVQcowState *s = bs->opaque;
@@ -1506,7 +1521,7 @@ static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
s->cluster_cache_offset = -1; /* disable compressed cache */
- acb = qcow_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
+ acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
if (!acb)
return NULL;
@@ -2771,8 +2786,8 @@ BlockDriver bdrv_qcow2 = {
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
- .bdrv_aio_read = qcow_aio_read,
- .bdrv_aio_write = qcow_aio_write,
+ .bdrv_aio_readv = qcow_aio_readv,
+ .bdrv_aio_writev = qcow_aio_writev,
.bdrv_aio_cancel = qcow_aio_cancel,
.aiocb_size = sizeof(QCowAIOCB),
.bdrv_write_compressed = qcow_write_compressed,
diff --git a/block-raw-posix.c b/block-raw-posix.c
index 4da5ae4397..822839fb8f 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -599,8 +599,8 @@ static int posix_aio_init(void)
return 0;
}
-static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
@@ -614,24 +614,25 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
return NULL;
acb->aiocb.aio_fildes = s->fd;
acb->aiocb.ev_signo = SIGUSR2;
- acb->aiocb.aio_buf = buf;
- if (nb_sectors < 0)
- acb->aiocb.aio_nbytes = -nb_sectors;
- else
- acb->aiocb.aio_nbytes = nb_sectors * 512;
+ acb->aiocb.aio_iov = qiov->iov;
+ acb->aiocb.aio_niov = qiov->niov;
+ acb->aiocb.aio_nbytes = nb_sectors * 512;
acb->aiocb.aio_offset = sector_num * 512;
+ acb->aiocb.aio_flags = 0;
+
+ /*
+ * If O_DIRECT is used the buffer needs to be aligned on a sector
+ * boundary. Tell the low level code to ensure that in case it's
+ * not done yet.
+ */
+ if (s->aligned_buf)
+ acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;
+
acb->next = posix_aio_state->first_aio;
posix_aio_state->first_aio = acb;
return acb;
}
-static void raw_aio_em_cb(void* opaque)
-{
- RawAIOCB *acb = opaque;
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_aio_release(acb);
-}
-
static void raw_aio_remove(RawAIOCB *acb)
{
RawAIOCB **pacb;
@@ -651,28 +652,13 @@ static void raw_aio_remove(RawAIOCB *acb)
}
}
-static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
- /*
- * If O_DIRECT is used and the buffer is not aligned fall back
- * to synchronous IO.
- */
- BDRVRawState *s = bs->opaque;
-
- if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
- QEMUBH *bh;
- acb = qemu_aio_get(bs, cb, opaque);
- acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
- bh = qemu_bh_new(raw_aio_em_cb, acb);
- qemu_bh_schedule(bh);
- return &acb->common;
- }
-
- acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
+ acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
if (!acb)
return NULL;
if (qemu_paio_read(&acb->aiocb) < 0) {
@@ -682,28 +668,13 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
return &acb->common;
}
-static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
- /*
- * If O_DIRECT is used and the buffer is not aligned fall back
- * to synchronous IO.
- */
- BDRVRawState *s = bs->opaque;
-
- if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
- QEMUBH *bh;
- acb = qemu_aio_get(bs, cb, opaque);
- acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
- bh = qemu_bh_new(raw_aio_em_cb, acb);
- qemu_bh_schedule(bh);
- return &acb->common;
- }
-
- acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
+ acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
if (!acb)
return NULL;
if (qemu_paio_write(&acb->aiocb) < 0) {
@@ -887,8 +858,8 @@ BlockDriver bdrv_raw = {
.bdrv_flush = raw_flush,
#ifdef CONFIG_AIO
- .bdrv_aio_read = raw_aio_read,
- .bdrv_aio_write = raw_aio_write,
+ .bdrv_aio_readv = raw_aio_readv,
+ .bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_cancel = raw_aio_cancel,
.aiocb_size = sizeof(RawAIOCB),
#endif
@@ -1215,12 +1186,24 @@ static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
BlockDriverCompletionFunc *cb, void *opaque)
{
+ BDRVRawState *s = bs->opaque;
RawAIOCB *acb;
- acb = raw_aio_setup(bs, 0, buf, 0, cb, opaque);
+ if (fd_open(bs) < 0)
+ return NULL;
+
+ acb = qemu_aio_get(bs, cb, opaque);
if (!acb)
return NULL;
+ acb->aiocb.aio_fildes = s->fd;
+ acb->aiocb.ev_signo = SIGUSR2;
+ acb->aiocb.aio_offset = 0;
+ acb->aiocb.aio_flags = 0;
+
+ acb->next = posix_aio_state->first_aio;
+ posix_aio_state->first_aio = acb;
+ acb->aiocb.aio_ioctl_buf = buf;
acb->aiocb.aio_ioctl_cmd = req;
if (qemu_paio_ioctl(&acb->aiocb) < 0) {
raw_aio_remove(acb);
@@ -1424,8 +1407,8 @@ BlockDriver bdrv_host_device = {
.bdrv_flush = raw_flush,
#ifdef CONFIG_AIO
- .bdrv_aio_read = raw_aio_read,
- .bdrv_aio_write = raw_aio_write,
+ .bdrv_aio_readv = raw_aio_readv,
+ .bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_cancel = raw_aio_cancel,
.aiocb_size = sizeof(RawAIOCB),
#endif
diff --git a/block.c b/block.c
index 916f08e76f..8a78f14ed5 100644
--- a/block.c
+++ b/block.c
@@ -47,25 +47,21 @@
#define SECTOR_BITS 9
#define SECTOR_SIZE (1 << SECTOR_BITS)
-static AIOPool vectored_aio_pool;
-
typedef struct BlockDriverAIOCBSync {
BlockDriverAIOCB common;
QEMUBH *bh;
int ret;
+ /* vector translation state */
+ QEMUIOVector *qiov;
+ uint8_t *bounce;
+ int is_write;
} BlockDriverAIOCBSync;
-static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
@@ -144,10 +140,10 @@ void path_combine(char *dest, int dest_size,
static void bdrv_register(BlockDriver *bdrv)
{
- if (!bdrv->bdrv_aio_read) {
+ if (!bdrv->bdrv_aio_readv) {
/* add AIO emulation layer */
- bdrv->bdrv_aio_read = bdrv_aio_read_em;
- bdrv->bdrv_aio_write = bdrv_aio_write_em;
+ bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
+ bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
} else if (!bdrv->bdrv_read) {
@@ -1295,91 +1291,10 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
/**************************************************************/
/* async I/Os */
-typedef struct VectorTranslationAIOCB {
- BlockDriverAIOCB common;
- QEMUIOVector *iov;
- uint8_t *bounce;
- int is_write;
- BlockDriverAIOCB *aiocb;
-} VectorTranslationAIOCB;
-
-static void bdrv_aio_cancel_vector(BlockDriverAIOCB *_acb)
-{
- VectorTranslationAIOCB *acb
- = container_of(_acb, VectorTranslationAIOCB, common);
-
- bdrv_aio_cancel(acb->aiocb);
-}
-
-static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
-{
- VectorTranslationAIOCB *s = (VectorTranslationAIOCB *)opaque;
-
- if (!s->is_write) {
- qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
- }
- qemu_vfree(s->bounce);
- s->common.cb(s->common.opaque, ret);
- qemu_aio_release(s);
-}
-
-static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *iov,
- int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque,
- int is_write)
-
-{
- VectorTranslationAIOCB *s = qemu_aio_get_pool(&vectored_aio_pool, bs,
- cb, opaque);
-
- s->iov = iov;
- s->bounce = qemu_memalign(512, nb_sectors * 512);
- s->is_write = is_write;
- if (is_write) {
- qemu_iovec_to_buffer(s->iov, s->bounce);
- s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
- bdrv_aio_rw_vector_cb, s);
- } else {
- s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
- bdrv_aio_rw_vector_cb, s);
- }
- if (!s->aiocb) {
- qemu_vfree(s->bounce);
- qemu_aio_release(s);
- return NULL;
- }
- return &s->common;
-}
-
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
+ QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
- if (bdrv_check_request(bs, sector_num, nb_sectors))
- return NULL;
-
- return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
- cb, opaque, 0);
-}
-
-BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- if (bdrv_check_request(bs, sector_num, nb_sectors))
- return NULL;
-
- return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
- cb, opaque, 1);
-}
-
-static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
BlockDriver *drv = bs->drv;
BlockDriverAIOCB *ret;
@@ -1388,7 +1303,8 @@ static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;
- ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
+ ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
+ cb, opaque);
if (ret) {
/* Update stats even though technically transfer has not happened. */
@@ -1399,9 +1315,9 @@ static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
return ret;
}
-static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
{
BlockDriver *drv = bs->drv;
BlockDriverAIOCB *ret;
@@ -1413,7 +1329,8 @@ static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;
- ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
+ ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
+ cb, opaque);
if (ret) {
/* Update stats even though technically transfer has not happened. */
@@ -1436,42 +1353,62 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
static void bdrv_aio_bh_cb(void *opaque)
{
BlockDriverAIOCBSync *acb = opaque;
+
+ qemu_vfree(acb->bounce);
+
+ if (!acb->is_write)
+ qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
acb->common.cb(acb->common.opaque, acb->ret);
+
qemu_aio_release(acb);
}
-static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ int is_write)
+
{
BlockDriverAIOCBSync *acb;
- int ret;
acb = qemu_aio_get(bs, cb, opaque);
+ acb->is_write = is_write;
+ acb->qiov = qiov;
+ acb->bounce = qemu_memalign(512, qiov->size);
+
if (!acb->bh)
acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
- ret = bdrv_read(bs, sector_num, buf, nb_sectors);
- acb->ret = ret;
+
+ if (is_write) {
+ qemu_iovec_to_buffer(acb->qiov, acb->bounce);
+ acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
+ } else {
+ acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
+ }
+
qemu_bh_schedule(acb->bh);
+
return &acb->common;
}
-static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
- BlockDriverAIOCBSync *acb;
- int ret;
+ return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
- acb = qemu_aio_get(bs, cb, opaque);
- if (!acb->bh)
- acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
- ret = bdrv_write(bs, sector_num, buf, nb_sectors);
- acb->ret = ret;
- qemu_bh_schedule(acb->bh);
- return &acb->common;
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
}
+
static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
{
BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
@@ -1494,10 +1431,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
{
int async_ret;
BlockDriverAIOCB *acb;
+ struct iovec iov;
+ QEMUIOVector qiov;
async_ret = NOT_DONE;
- acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
- bdrv_rw_em_cb, &async_ret);
+ iov.iov_base = buf;
+ iov.iov_len = nb_sectors * 512;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
+ bdrv_rw_em_cb, &async_ret);
if (acb == NULL)
return -1;
@@ -1513,10 +1455,15 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
{
int async_ret;
BlockDriverAIOCB *acb;
+ struct iovec iov;
+ QEMUIOVector qiov;
async_ret = NOT_DONE;
- acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
- bdrv_rw_em_cb, &async_ret);
+ iov.iov_base = (void *)buf;
+ iov.iov_len = nb_sectors * 512;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
+ bdrv_rw_em_cb, &async_ret);
if (acb == NULL)
return -1;
while (async_ret == NOT_DONE) {
@@ -1527,9 +1474,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
void bdrv_init(void)
{
- aio_pool_init(&vectored_aio_pool, sizeof(VectorTranslationAIOCB),
- bdrv_aio_cancel_vector);
-
bdrv_register(&bdrv_raw);
bdrv_register(&bdrv_host_device);
#ifndef _WIN32
diff --git a/block_int.h b/block_int.h
index dc4335982c..3e78997c71 100644
--- a/block_int.h
+++ b/block_int.h
@@ -54,11 +54,11 @@ struct BlockDriver {
int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
int (*bdrv_make_empty)(BlockDriverState *bs);
/* aio */
- BlockDriverAIOCB *(*bdrv_aio_read)(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+ BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
- BlockDriverAIOCB *(*bdrv_aio_write)(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+ BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb);
int aiocb_size;
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 65c80ecc3e..0eb77a50d3 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -20,6 +20,7 @@
#include <stdlib.h>
#include <stdio.h>
#include "osdep.h"
+#include "qemu-common.h"
#include "posix-aio-compat.h"
@@ -76,45 +77,110 @@ static void thread_create(pthread_t *thread, pthread_attr_t *attr,
if (ret) die2(ret, "pthread_create");
}
-static size_t handle_aiocb_readwrite(struct qemu_paiocb *aiocb)
+static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
+{
+ int ret;
+
+ ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
+ if (ret == -1)
+ return -errno;
+ return ret;
+}
+
+/*
+ * Check if we need to copy the data in the aiocb into a new
+ * properly aligned buffer.
+ */
+static int aiocb_needs_copy(struct qemu_paiocb *aiocb)
+{
+ if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) {
+ int i;
+
+ for (i = 0; i < aiocb->aio_niov; i++)
+ if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512)
+ return 1;
+ }
+
+ return 0;
+}
+
+static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
{
size_t offset = 0;
- ssize_t len;
+ size_t len;
while (offset < aiocb->aio_nbytes) {
- if (aiocb->aio_type == QEMU_PAIO_WRITE)
- len = pwrite(aiocb->aio_fildes,
- (const char *)aiocb->aio_buf + offset,
+ if (aiocb->aio_type == QEMU_PAIO_WRITE)
+ len = pwrite(aiocb->aio_fildes,
+ (const char *)buf + offset,
+ aiocb->aio_nbytes - offset,
+ aiocb->aio_offset + offset);
+ else
+ len = pread(aiocb->aio_fildes,
+ buf + offset,
aiocb->aio_nbytes - offset,
aiocb->aio_offset + offset);
- else
- len = pread(aiocb->aio_fildes,
- (char *)aiocb->aio_buf + offset,
- aiocb->aio_nbytes - offset,
- aiocb->aio_offset + offset);
-
- if (len == -1 && errno == EINTR)
- continue;
- else if (len == -1) {
- offset = -errno;
- break;
- } else if (len == 0)
- break;
- offset += len;
+ if (len == -1 && errno == EINTR)
+ continue;
+ else if (len == -1) {
+ offset = -errno;
+ break;
+ } else if (len == 0)
+ break;
+
+ offset += len;
}
return offset;
}
-static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
+static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
{
- int ret;
+ size_t nbytes;
+ char *buf;
+
+ if (!aiocb_needs_copy(aiocb) && aiocb->aio_niov == 1) {
+ /*
+ * If there is just a single buffer, and it is properly aligned
+ * we can just use plain pread/pwrite without any problems.
+ */
+ return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
+ }
- ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_buf);
- if (ret == -1)
- return -errno;
- return ret;
+ /*
+ * Ok, we have to do it the hard way, copy all segments into
+ * a single aligned buffer.
+ */
+ buf = qemu_memalign(512, aiocb->aio_nbytes);
+ if (aiocb->aio_type == QEMU_PAIO_WRITE) {
+ char *p = buf;
+ int i;
+
+ for (i = 0; i < aiocb->aio_niov; ++i) {
+ memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
+ p += aiocb->aio_iov[i].iov_len;
+ }
+ }
+
+ nbytes = handle_aiocb_rw_linear(aiocb, buf);
+ if (aiocb->aio_type != QEMU_PAIO_WRITE) {
+ char *p = buf;
+ size_t count = aiocb->aio_nbytes, copy;
+ int i;
+
+ for (i = 0; i < aiocb->aio_niov && count; ++i) {
+ copy = count;
+ if (copy > aiocb->aio_iov[i].iov_len)
+ copy = aiocb->aio_iov[i].iov_len;
+ memcpy(aiocb->aio_iov[i].iov_base, p, copy);
+ p += copy;
+ count -= copy;
+ }
+ }
+ qemu_vfree(buf);
+
+ return nbytes;
}
static void *aio_thread(void *unused)
@@ -157,7 +223,7 @@ static void *aio_thread(void *unused)
switch (aiocb->aio_type) {
case QEMU_PAIO_READ:
case QEMU_PAIO_WRITE:
- ret = handle_aiocb_readwrite(aiocb);
+ ret = handle_aiocb_rw(aiocb);
break;
case QEMU_PAIO_IOCTL:
ret = handle_aiocb_ioctl(aiocb);
diff --git a/posix-aio-compat.h b/posix-aio-compat.h
index a1cdfd7f1f..1c5dcbd920 100644
--- a/posix-aio-compat.h
+++ b/posix-aio-compat.h
@@ -27,11 +27,18 @@
struct qemu_paiocb
{
int aio_fildes;
- void *aio_buf;
+ union {
+ struct iovec *aio_iov;
+ void *aio_ioctl_buf;
+ };
+ int aio_niov;
size_t aio_nbytes;
#define aio_ioctl_cmd aio_nbytes /* for QEMU_PAIO_IOCTL */
int ev_signo;
off_t aio_offset;
+ unsigned aio_flags;
+/* 512 byte alignment required for buffer, offset and length */
+#define QEMU_AIO_SECTOR_ALIGNED 0x01
/* private */
TAILQ_ENTRY(qemu_paiocb) node;