aboutsummaryrefslogtreecommitdiff
path: root/block/qed.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/qed.c')
-rw-r--r--block/qed.c779
1 files changed, 302 insertions, 477 deletions
diff --git a/block/qed.c b/block/qed.c
index 8d899fd479..385381a78a 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -21,10 +21,6 @@
#include "qapi/qmp/qerror.h"
#include "sysemu/block-backend.h"
-static const AIOCBInfo qed_aiocb_info = {
- .aiocb_size = sizeof(QEDAIOCB),
-};
-
static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
const char *filename)
{
@@ -92,49 +88,13 @@ int qed_write_header_sync(BDRVQEDState *s)
return 0;
}
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- struct iovec iov;
- QEMUIOVector qiov;
- int nsectors;
- uint8_t *buf;
-} QEDWriteHeaderCB;
-
-static void qed_write_header_cb(void *opaque, int ret)
-{
- QEDWriteHeaderCB *write_header_cb = opaque;
-
- qemu_vfree(write_header_cb->buf);
- gencb_complete(write_header_cb, ret);
-}
-
-static void qed_write_header_read_cb(void *opaque, int ret)
-{
- QEDWriteHeaderCB *write_header_cb = opaque;
- BDRVQEDState *s = write_header_cb->s;
-
- if (ret) {
- qed_write_header_cb(write_header_cb, ret);
- return;
- }
-
- /* Update header */
- qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
-
- bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
- write_header_cb->nsectors, qed_write_header_cb,
- write_header_cb);
-}
-
/**
* Update header in-place (does not rewrite backing filename or other strings)
*
* This function only updates known header fields in-place and does not affect
* extra data after the QED header.
*/
-static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
- void *opaque)
+static int coroutine_fn qed_write_header(BDRVQEDState *s)
{
/* We must write full sectors for O_DIRECT but cannot necessarily generate
* the data following the header if an unrecognized compat feature is
@@ -144,18 +104,35 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
int nsectors = DIV_ROUND_UP(sizeof(QEDHeader), BDRV_SECTOR_SIZE);
size_t len = nsectors * BDRV_SECTOR_SIZE;
- QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
- cb, opaque);
-
- write_header_cb->s = s;
- write_header_cb->nsectors = nsectors;
- write_header_cb->buf = qemu_blockalign(s->bs, len);
- write_header_cb->iov.iov_base = write_header_cb->buf;
- write_header_cb->iov.iov_len = len;
- qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
-
- bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
- qed_write_header_read_cb, write_header_cb);
+ uint8_t *buf;
+ struct iovec iov;
+ QEMUIOVector qiov;
+ int ret;
+
+ buf = qemu_blockalign(s->bs, len);
+ iov = (struct iovec) {
+ .iov_base = buf,
+ .iov_len = len,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = bdrv_co_preadv(s->bs->file, 0, qiov.size, &qiov, 0);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* Update header */
+ qed_header_cpu_to_le(&s->header, (QEDHeader *) buf);
+
+ ret = bdrv_co_pwritev(s->bs->file, 0, qiov.size, &qiov, 0);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0;
+out:
+ qemu_vfree(buf);
+ return ret;
}
static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
@@ -272,20 +249,6 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
return l2_table;
}
-static void qed_aio_next_io(QEDAIOCB *acb, int ret);
-
-static void qed_aio_start_io(QEDAIOCB *acb)
-{
- qed_aio_next_io(acb, 0);
-}
-
-static void qed_aio_next_io_cb(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
-
- qed_aio_next_io(acb, ret);
-}
-
static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
{
assert(!s->allocating_write_reqs_plugged);
@@ -295,61 +258,47 @@ static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
{
- QEDAIOCB *acb;
-
assert(s->allocating_write_reqs_plugged);
s->allocating_write_reqs_plugged = false;
-
- acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
- if (acb) {
- qed_aio_start_io(acb);
- }
+ qemu_co_enter_next(&s->allocating_write_reqs);
}
-static void qed_finish_clear_need_check(void *opaque, int ret)
-{
- /* Do nothing */
-}
-
-static void qed_flush_after_clear_need_check(void *opaque, int ret)
+static void coroutine_fn qed_need_check_timer_entry(void *opaque)
{
BDRVQEDState *s = opaque;
+ int ret;
- bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
+ /* The timer should only fire when allocating writes have drained */
+ assert(!s->allocating_acb);
- /* No need to wait until flush completes */
- qed_unplug_allocating_write_reqs(s);
-}
+ trace_qed_need_check_timer_cb(s);
-static void qed_clear_need_check(void *opaque, int ret)
-{
- BDRVQEDState *s = opaque;
+ qed_acquire(s);
+ qed_plug_allocating_write_reqs(s);
- if (ret) {
+ /* Ensure writes are on disk before clearing flag */
+ ret = bdrv_co_flush(s->bs->file->bs);
+ qed_release(s);
+ if (ret < 0) {
qed_unplug_allocating_write_reqs(s);
return;
}
s->header.features &= ~QED_F_NEED_CHECK;
- qed_write_header(s, qed_flush_after_clear_need_check, s);
+ ret = qed_write_header(s);
+ (void) ret;
+
+ qed_unplug_allocating_write_reqs(s);
+
+ ret = bdrv_co_flush(s->bs);
+ (void) ret;
}
static void qed_need_check_timer_cb(void *opaque)
{
- BDRVQEDState *s = opaque;
-
- /* The timer should only fire when allocating writes have drained */
- assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
-
- trace_qed_need_check_timer_cb(s);
-
- qed_acquire(s);
- qed_plug_allocating_write_reqs(s);
-
- /* Ensure writes are on disk before clearing flag */
- bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
- qed_release(s);
+ Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque);
+ qemu_coroutine_enter(co);
}
void qed_acquire(BDRVQEDState *s)
@@ -423,7 +372,7 @@ static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
s->bs = bs;
- QSIMPLEQ_INIT(&s->allocating_write_reqs);
+ qemu_co_queue_init(&s->allocating_write_reqs);
ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
if (ret < 0) {
@@ -776,14 +725,14 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
.file = file,
};
QEDRequest request = { .l2_table = NULL };
+ uint64_t offset;
+ int ret;
- qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb);
+ ret = qed_find_cluster(s, &request, cb.pos, &len, &offset);
+ qed_is_allocated_cb(&cb, ret, offset, len);
- /* Now sleep if the callback wasn't invoked immediately */
- while (cb.status == BDRV_BLOCK_OFFSET_MASK) {
- cb.co = qemu_coroutine_self();
- qemu_coroutine_yield();
- }
+ /* The callback was invoked immediately */
+ assert(cb.status != BDRV_BLOCK_OFFSET_MASK);
qed_unref_l2_cache_entry(request.l2_table);
@@ -792,7 +741,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
{
- return acb->common.bs->opaque;
+ return acb->bs->opaque;
}
/**
@@ -808,13 +757,13 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
* This function reads qiov->size bytes starting at pos from the backing file.
* If there is no backing file then zeroes are read.
*/
-static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
- QEMUIOVector *qiov,
- QEMUIOVector **backing_qiov,
- BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
+ QEMUIOVector *qiov,
+ QEMUIOVector **backing_qiov)
{
uint64_t backing_length = 0;
size_t size;
+ int ret;
/* If there is a backing file, get its length. Treat the absence of a
* backing file like a zero length backing file.
@@ -822,8 +771,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
if (s->bs->backing) {
int64_t l = bdrv_getlength(s->bs->backing->bs);
if (l < 0) {
- cb(opaque, l);
- return;
+ return l;
}
backing_length = l;
}
@@ -836,8 +784,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
/* Complete now if there are no backing file sectors to read */
if (pos >= backing_length) {
- cb(opaque, 0);
- return;
+ return 0;
}
/* If the read straddles the end of the backing file, shorten it */
@@ -849,46 +796,11 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
qemu_iovec_concat(*backing_qiov, qiov, 0, size);
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
- bdrv_aio_readv(s->bs->backing, pos / BDRV_SECTOR_SIZE,
- *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- QEMUIOVector qiov;
- QEMUIOVector *backing_qiov;
- struct iovec iov;
- uint64_t offset;
-} CopyFromBackingFileCB;
-
-static void qed_copy_from_backing_file_cb(void *opaque, int ret)
-{
- CopyFromBackingFileCB *copy_cb = opaque;
- qemu_vfree(copy_cb->iov.iov_base);
- gencb_complete(&copy_cb->gencb, ret);
-}
-
-static void qed_copy_from_backing_file_write(void *opaque, int ret)
-{
- CopyFromBackingFileCB *copy_cb = opaque;
- BDRVQEDState *s = copy_cb->s;
-
- if (copy_cb->backing_qiov) {
- qemu_iovec_destroy(copy_cb->backing_qiov);
- g_free(copy_cb->backing_qiov);
- copy_cb->backing_qiov = NULL;
- }
-
- if (ret) {
- qed_copy_from_backing_file_cb(copy_cb, ret);
- return;
+ ret = bdrv_co_preadv(s->bs->backing, pos, size, *backing_qiov, 0);
+ if (ret < 0) {
+ return ret;
}
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
- bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
- &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
- qed_copy_from_backing_file_cb, copy_cb);
+ return 0;
}
/**
@@ -898,32 +810,48 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
* @pos: Byte position in device
* @len: Number of bytes
* @offset: Byte offset in image file
- * @cb: Completion function
- * @opaque: User data for completion function
*/
-static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
- uint64_t len, uint64_t offset,
- BlockCompletionFunc *cb,
- void *opaque)
+static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s,
+ uint64_t pos, uint64_t len,
+ uint64_t offset)
{
- CopyFromBackingFileCB *copy_cb;
+ QEMUIOVector qiov;
+ QEMUIOVector *backing_qiov = NULL;
+ struct iovec iov;
+ int ret;
/* Skip copy entirely if there is no work to do */
if (len == 0) {
- cb(opaque, 0);
- return;
+ return 0;
}
- copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
- copy_cb->s = s;
- copy_cb->offset = offset;
- copy_cb->backing_qiov = NULL;
- copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
- copy_cb->iov.iov_len = len;
- qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
+ iov = (struct iovec) {
+ .iov_base = qemu_blockalign(s->bs, len),
+ .iov_len = len,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = qed_read_backing_file(s, pos, &qiov, &backing_qiov);
- qed_read_backing_file(s, pos, &copy_cb->qiov, &copy_cb->backing_qiov,
- qed_copy_from_backing_file_write, copy_cb);
+ if (backing_qiov) {
+ qemu_iovec_destroy(backing_qiov);
+ g_free(backing_qiov);
+ backing_qiov = NULL;
+ }
+
+ if (ret) {
+ goto out;
+ }
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
+ ret = bdrv_co_pwritev(s->bs->file, offset, qiov.size, &qiov, 0);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = 0;
+out:
+ qemu_vfree(iov.iov_base);
+ return ret;
}
/**
@@ -938,8 +866,9 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
* The cluster offset may be an allocated byte offset in the image file, the
* zero cluster marker, or the unallocated cluster marker.
*/
-static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
- unsigned int n, uint64_t cluster)
+static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
+ int index, unsigned int n,
+ uint64_t cluster)
{
int i;
for (i = index; i < index + n; i++) {
@@ -951,27 +880,9 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
}
}
-static void qed_aio_complete_bh(void *opaque)
+static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
{
- QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
- BlockCompletionFunc *cb = acb->common.cb;
- void *user_opaque = acb->common.opaque;
- int ret = acb->bh_ret;
-
- qemu_aio_unref(acb);
-
- /* Invoke callback */
- qed_acquire(s);
- cb(user_opaque, ret);
- qed_release(s);
-}
-
-static void qed_aio_complete(QEDAIOCB *acb, int ret)
-{
- BDRVQEDState *s = acb_to_s(acb);
-
- trace_qed_aio_complete(s, acb, ret);
/* Free resources */
qemu_iovec_destroy(&acb->cur_qiov);
@@ -983,22 +894,16 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
acb->qiov->iov[0].iov_base = NULL;
}
- /* Arrange for a bh to invoke the completion function */
- acb->bh_ret = ret;
- aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
- qed_aio_complete_bh, acb);
-
/* Start next allocating write request waiting behind this one. Note that
* requests enqueue themselves when they first hit an unallocated cluster
* but they wait until the entire request is finished before waking up the
* next request in the queue. This ensures that we don't cycle through
* requests multiple times but rather finish one at a time completely.
*/
- if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
- QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
- acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
- if (acb) {
- qed_aio_start_io(acb);
+ if (acb == s->allocating_acb) {
+ s->allocating_acb = NULL;
+ if (!qemu_co_queue_empty(&s->allocating_write_reqs)) {
+ qemu_co_enter_next(&s->allocating_write_reqs);
} else if (s->header.features & QED_F_NEED_CHECK) {
qed_start_need_check_timer(s);
}
@@ -1006,15 +911,21 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
}
/**
- * Commit the current L2 table to the cache
+ * Update L1 table with new L2 table offset and write it out
*/
-static void qed_commit_l2_update(void *opaque, int ret)
+static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
{
- QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
CachedL2Table *l2_table = acb->request.l2_table;
uint64_t l2_offset = l2_table->offset;
+ int index, ret;
+
+ index = qed_l1_index(s, acb->cur_pos);
+ s->l1_table->offsets[index] = l2_table->offset;
+ ret = qed_write_l1_table(s, index, 1);
+
+ /* Commit the current L2 table to the cache */
qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
/* This is guaranteed to succeed because we just committed the entry to the
@@ -1023,41 +934,18 @@ static void qed_commit_l2_update(void *opaque, int ret)
acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
assert(acb->request.l2_table != NULL);
- qed_aio_next_io(acb, ret);
+ return ret;
}
-/**
- * Update L1 table with new L2 table offset and write it out
- */
-static void qed_aio_write_l1_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- int index;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- index = qed_l1_index(s, acb->cur_pos);
- s->l1_table->offsets[index] = acb->request.l2_table->offset;
-
- qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
-}
/**
* Update L2 table with new cluster offsets and write them out
*/
-static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
+static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
{
BDRVQEDState *s = acb_to_s(acb);
bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
- int index;
-
- if (ret) {
- goto err;
- }
+ int index, ret;
if (need_alloc) {
qed_unref_l2_cache_entry(acb->request.l2_table);
@@ -1070,115 +958,99 @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
if (need_alloc) {
/* Write out the whole new L2 table */
- qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
- qed_aio_write_l1_update, acb);
+ ret = qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true);
+ if (ret) {
+ return ret;
+ }
+ return qed_aio_write_l1_update(acb);
} else {
/* Write out only the updated part of the L2 table */
- qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
- qed_aio_next_io_cb, acb);
- }
- return;
-
-err:
- qed_aio_complete(acb, ret);
-}
-
-static void qed_aio_write_l2_update_cb(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
-}
-
-/**
- * Flush new data clusters before updating the L2 table
- *
- * This flush is necessary when a backing file is in use. A crash during an
- * allocating write could result in empty clusters in the image. If the write
- * only touched a subregion of the cluster, then backing image sectors have
- * been lost in the untouched region. The solution is to flush after writing a
- * new data cluster and before updating the L2 table.
- */
-static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
-
- if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) {
- qed_aio_complete(acb, -EIO);
+ ret = qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters,
+ false);
+ if (ret) {
+ return ret;
+ }
}
+ return 0;
}
/**
* Write data to the image file
*/
-static void qed_aio_write_main(void *opaque, int ret)
+static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
{
- QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
uint64_t offset = acb->cur_cluster +
qed_offset_into_cluster(s, acb->cur_pos);
- BlockCompletionFunc *next_fn;
+ int ret;
- trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);
+ trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size);
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
+ BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
+ ret = bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
+ &acb->cur_qiov, 0);
+ if (ret < 0) {
+ return ret;
}
- if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
- next_fn = qed_aio_next_io_cb;
- } else {
+ if (acb->find_cluster_ret != QED_CLUSTER_FOUND) {
if (s->bs->backing) {
- next_fn = qed_aio_write_flush_before_l2_update;
- } else {
- next_fn = qed_aio_write_l2_update_cb;
+ /*
+ * Flush new data clusters before updating the L2 table
+ *
+ * This flush is necessary when a backing file is in use. A crash
+ * during an allocating write could result in empty clusters in the
+ * image. If the write only touched a subregion of the cluster,
+ * then backing image sectors have been lost in the untouched
+ * region. The solution is to flush after writing a new data
+ * cluster and before updating the L2 table.
+ */
+ ret = bdrv_co_flush(s->bs->file->bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ ret = qed_aio_write_l2_update(acb, acb->cur_cluster);
+ if (ret < 0) {
+ return ret;
}
}
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
- &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
- next_fn, acb);
+ return 0;
}
/**
- * Populate back untouched region of new data cluster
+ * Populate untouched regions of new data cluster
*/
-static void qed_aio_write_postfill(void *opaque, int ret)
+static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
{
- QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
- uint64_t start = acb->cur_pos + acb->cur_qiov.size;
- uint64_t len =
- qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
- uint64_t offset = acb->cur_cluster +
- qed_offset_into_cluster(s, acb->cur_pos) +
- acb->cur_qiov.size;
+ uint64_t start, len, offset;
+ int ret;
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
+ /* Populate front untouched region of new data cluster */
+ start = qed_start_of_cluster(s, acb->cur_pos);
+ len = qed_offset_into_cluster(s, acb->cur_pos);
+
+ trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
+ ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster);
+ if (ret < 0) {
+ return ret;
}
- trace_qed_aio_write_postfill(s, acb, start, len, offset);
- qed_copy_from_backing_file(s, start, len, offset,
- qed_aio_write_main, acb);
-}
+ /* Populate back untouched region of new data cluster */
+ start = acb->cur_pos + acb->cur_qiov.size;
+ len = qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
+ offset = acb->cur_cluster +
+ qed_offset_into_cluster(s, acb->cur_pos) +
+ acb->cur_qiov.size;
-/**
- * Populate front untouched region of new data cluster
- */
-static void qed_aio_write_prefill(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- uint64_t start = qed_start_of_cluster(s, acb->cur_pos);
- uint64_t len = qed_offset_into_cluster(s, acb->cur_pos);
+ trace_qed_aio_write_postfill(s, acb, start, len, offset);
+ ret = qed_copy_from_backing_file(s, start, len, offset);
+ if (ret < 0) {
+ return ret;
+ }
- trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
- qed_copy_from_backing_file(s, start, len, acb->cur_cluster,
- qed_aio_write_postfill, acb);
+ return qed_aio_write_main(acb);
}
/**
@@ -1194,18 +1066,6 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
return !(s->header.features & QED_F_NEED_CHECK);
}
-static void qed_aio_write_zero_cluster(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- qed_aio_write_l2_update(acb, 0, 1);
-}
-
/**
* Write new data cluster
*
@@ -1214,23 +1074,24 @@ static void qed_aio_write_zero_cluster(void *opaque, int ret)
*
* This path is taken when writing to previously unallocated clusters.
*/
-static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
+static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
{
BDRVQEDState *s = acb_to_s(acb);
- BlockCompletionFunc *cb;
+ int ret;
/* Cancel timer when the first allocating request comes in */
- if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
+ if (s->allocating_acb == NULL) {
qed_cancel_need_check_timer(s);
}
/* Freeze this request if another allocating write is in progress */
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
- QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
- }
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
- s->allocating_write_reqs_plugged) {
- return; /* wait for existing request to finish */
+ if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) {
+ if (s->allocating_acb != NULL) {
+ qemu_co_queue_wait(&s->allocating_write_reqs, NULL);
+ assert(s->allocating_acb == NULL);
+ }
+ s->allocating_acb = acb;
+ return -EAGAIN; /* start over with looking up table entries */
}
acb->cur_nclusters = qed_bytes_to_clusters(s,
@@ -1240,22 +1101,29 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
if (acb->flags & QED_AIOCB_ZERO) {
/* Skip ahead if the clusters are already zero */
if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
- qed_aio_start_io(acb);
- return;
+ return 0;
}
-
- cb = qed_aio_write_zero_cluster;
} else {
- cb = qed_aio_write_prefill;
acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
}
if (qed_should_set_need_check(s)) {
s->header.features |= QED_F_NEED_CHECK;
- qed_write_header(s, cb, acb);
+ ret = qed_write_header(s);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if (acb->flags & QED_AIOCB_ZERO) {
+ ret = qed_aio_write_l2_update(acb, 1);
} else {
- cb(acb, 0);
+ ret = qed_aio_write_cow(acb);
+ }
+ if (ret < 0) {
+ return ret;
}
+ return 0;
}
/**
@@ -1267,17 +1135,17 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
*
* This path is taken when writing to already allocated clusters.
*/
-static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
+static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
+ size_t len)
{
/* Allocate buffer for zero writes */
if (acb->flags & QED_AIOCB_ZERO) {
struct iovec *iov = acb->qiov->iov;
if (!iov->iov_base) {
- iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len);
+ iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len);
if (iov->iov_base == NULL) {
- qed_aio_complete(acb, -ENOMEM);
- return;
+ return -ENOMEM;
}
memset(iov->iov_base, 0, iov->iov_len);
}
@@ -1288,22 +1156,19 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
/* Do the actual write */
- qed_aio_write_main(acb, 0);
+ return qed_aio_write_main(acb);
}
/**
* Write data cluster
*
* @opaque: Write request
- * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
- * or -errno
+ * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
* @offset: Cluster offset in bytes
* @len: Length in bytes
- *
- * Callback from qed_find_cluster().
*/
-static void qed_aio_write_data(void *opaque, int ret,
- uint64_t offset, size_t len)
+static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
+ uint64_t offset, size_t len)
{
QEDAIOCB *acb = opaque;
@@ -1313,18 +1178,15 @@ static void qed_aio_write_data(void *opaque, int ret,
switch (ret) {
case QED_CLUSTER_FOUND:
- qed_aio_write_inplace(acb, offset, len);
- break;
+ return qed_aio_write_inplace(acb, offset, len);
case QED_CLUSTER_L2:
case QED_CLUSTER_L1:
case QED_CLUSTER_ZERO:
- qed_aio_write_alloc(acb, len);
- break;
+ return qed_aio_write_alloc(acb, len);
default:
- qed_aio_complete(acb, ret);
- break;
+ g_assert_not_reached();
}
}
@@ -1332,166 +1194,139 @@ static void qed_aio_write_data(void *opaque, int ret,
* Read data cluster
*
* @opaque: Read request
- * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
- * or -errno
+ * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
* @offset: Cluster offset in bytes
* @len: Length in bytes
- *
- * Callback from qed_find_cluster().
*/
-static void qed_aio_read_data(void *opaque, int ret,
- uint64_t offset, size_t len)
+static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
+ uint64_t offset, size_t len)
{
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
- BlockDriverState *bs = acb->common.bs;
+ BlockDriverState *bs = acb->bs;
/* Adjust offset into cluster */
offset += qed_offset_into_cluster(s, acb->cur_pos);
trace_qed_aio_read_data(s, acb, ret, offset, len);
- if (ret < 0) {
- goto err;
- }
-
qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
/* Handle zero cluster and backing file reads */
if (ret == QED_CLUSTER_ZERO) {
qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
- qed_aio_start_io(acb);
- return;
+ return 0;
} else if (ret != QED_CLUSTER_FOUND) {
- qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
- &acb->backing_qiov, qed_aio_next_io_cb, acb);
- return;
+ return qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
+ &acb->backing_qiov);
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
- &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
- qed_aio_next_io_cb, acb);
- return;
-
-err:
- qed_aio_complete(acb, ret);
+ ret = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
+ &acb->cur_qiov, 0);
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
}
/**
* Begin next I/O or complete the request
*/
-static void qed_aio_next_io(QEDAIOCB *acb, int ret)
+static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
{
BDRVQEDState *s = acb_to_s(acb);
- QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
- qed_aio_write_data : qed_aio_read_data;
+ uint64_t offset;
+ size_t len;
+ int ret;
- trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
+ while (1) {
+ trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size);
- if (acb->backing_qiov) {
- qemu_iovec_destroy(acb->backing_qiov);
- g_free(acb->backing_qiov);
- acb->backing_qiov = NULL;
- }
+ if (acb->backing_qiov) {
+ qemu_iovec_destroy(acb->backing_qiov);
+ g_free(acb->backing_qiov);
+ acb->backing_qiov = NULL;
+ }
- /* Handle I/O error */
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
+ acb->qiov_offset += acb->cur_qiov.size;
+ acb->cur_pos += acb->cur_qiov.size;
+ qemu_iovec_reset(&acb->cur_qiov);
- acb->qiov_offset += acb->cur_qiov.size;
- acb->cur_pos += acb->cur_qiov.size;
- qemu_iovec_reset(&acb->cur_qiov);
+ /* Complete request */
+ if (acb->cur_pos >= acb->end_pos) {
+ ret = 0;
+ break;
+ }
- /* Complete request */
- if (acb->cur_pos >= acb->end_pos) {
- qed_aio_complete(acb, 0);
- return;
+ /* Find next cluster and start I/O */
+ len = acb->end_pos - acb->cur_pos;
+ ret = qed_find_cluster(s, &acb->request, acb->cur_pos, &len, &offset);
+ if (ret < 0) {
+ break;
+ }
+
+ if (acb->flags & QED_AIOCB_WRITE) {
+ ret = qed_aio_write_data(acb, ret, offset, len);
+ } else {
+ ret = qed_aio_read_data(acb, ret, offset, len);
+ }
+
+ if (ret < 0 && ret != -EAGAIN) {
+ break;
+ }
}
- /* Find next cluster and start I/O */
- qed_find_cluster(s, &acb->request,
- acb->cur_pos, acb->end_pos - acb->cur_pos,
- io_fn, acb);
+ trace_qed_aio_complete(s, acb, ret);
+ qed_aio_complete(acb);
+ return ret;
}
-static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque, int flags)
-{
- QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);
-
- trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
- opaque, flags);
+static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ int flags)
+{
+ QEDAIOCB acb = {
+ .bs = bs,
+ .cur_pos = (uint64_t) sector_num * BDRV_SECTOR_SIZE,
+ .end_pos = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE,
+ .qiov = qiov,
+ .flags = flags,
+ };
+ qemu_iovec_init(&acb.cur_qiov, qiov->niov);
- acb->flags = flags;
- acb->qiov = qiov;
- acb->qiov_offset = 0;
- acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
- acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
- acb->backing_qiov = NULL;
- acb->request.l2_table = NULL;
- qemu_iovec_init(&acb->cur_qiov, qiov->niov);
+ trace_qed_aio_setup(bs->opaque, &acb, sector_num, nb_sectors, NULL, flags);
/* Start request */
- qed_aio_start_io(acb);
- return &acb->common;
+ return qed_aio_next_io(&acb);
}
-static BlockAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
{
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+ return qed_co_request(bs, sector_num, qiov, nb_sectors, 0);
}
-static BlockAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
-{
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
- opaque, QED_AIOCB_WRITE);
-}
-
-typedef struct {
- Coroutine *co;
- int ret;
- bool done;
-} QEDWriteZeroesCB;
-
-static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
+static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
{
- QEDWriteZeroesCB *cb = opaque;
-
- cb->done = true;
- cb->ret = ret;
- if (cb->co) {
- aio_co_wake(cb->co);
- }
+ return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
}
static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset,
- int count,
+ int bytes,
BdrvRequestFlags flags)
{
- BlockAIOCB *blockacb;
BDRVQEDState *s = bs->opaque;
- QEDWriteZeroesCB cb = { .done = false };
QEMUIOVector qiov;
struct iovec iov;
/* Fall back if the request is not aligned */
if (qed_offset_into_cluster(s, offset) ||
- qed_offset_into_cluster(s, count)) {
+ qed_offset_into_cluster(s, bytes)) {
return -ENOTSUP;
}
@@ -1499,22 +1334,12 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
* then it will be allocated during request processing.
*/
iov.iov_base = NULL;
- iov.iov_len = count;
+ iov.iov_len = bytes;
qemu_iovec_init_external(&qiov, &iov, 1);
- blockacb = qed_aio_setup(bs, offset >> BDRV_SECTOR_BITS, &qiov,
- count >> BDRV_SECTOR_BITS,
- qed_co_pwrite_zeroes_cb, &cb,
- QED_AIOCB_WRITE | QED_AIOCB_ZERO);
- if (!blockacb) {
- return -EIO;
- }
- if (!cb.done) {
- cb.co = qemu_coroutine_self();
- qemu_coroutine_yield();
- }
- assert(cb.done);
- return cb.ret;
+ return qed_co_request(bs, offset >> BDRV_SECTOR_BITS, &qiov,
+ bytes >> BDRV_SECTOR_BITS,
+ QED_AIOCB_WRITE | QED_AIOCB_ZERO);
}
static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
@@ -1710,8 +1535,8 @@ static BlockDriver bdrv_qed = {
.bdrv_create = bdrv_qed_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
- .bdrv_aio_readv = bdrv_qed_aio_readv,
- .bdrv_aio_writev = bdrv_qed_aio_writev,
+ .bdrv_co_readv = bdrv_qed_co_readv,
+ .bdrv_co_writev = bdrv_qed_co_writev,
.bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes,
.bdrv_truncate = bdrv_qed_truncate,
.bdrv_getlength = bdrv_qed_getlength,