aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/curl.c20
-rw-r--r--block/qcow.c380
-rw-r--r--block/qcow2-cluster.c6
-rw-r--r--block/qcow2-refcount.c22
-rw-r--r--block/qcow2-snapshot.c15
-rw-r--r--block/qcow2.c418
-rw-r--r--block/qcow2.h2
-rw-r--r--block/sheepdog.c150
8 files changed, 424 insertions, 589 deletions
diff --git a/block/curl.c b/block/curl.c
index 5c157bc609..f3f61cc8a1 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -229,6 +229,23 @@ static void curl_multi_do(void *arg)
{
CURLState *state = NULL;
curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, (char**)&state);
+
+ /* ACBs for successful messages get completed in curl_read_cb */
+ if (msg->data.result != CURLE_OK) {
+ int i;
+ for (i = 0; i < CURL_NUM_ACB; i++) {
+ CURLAIOCB *acb = state->acb[i];
+
+ if (acb == NULL) {
+ continue;
+ }
+
+ acb->common.cb(acb->common.opaque, -EIO);
+ qemu_aio_release(acb);
+ state->acb[i] = NULL;
+ }
+ }
+
curl_clean_state(state);
break;
}
@@ -277,7 +294,8 @@ static CURLState *curl_init_state(BDRVCURLState *s)
curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
-
+ curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
+
#ifdef DEBUG_VERBOSE
curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1);
#endif
diff --git a/block/qcow.c b/block/qcow.c
index e155d3c002..c8bfecc1cb 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -159,6 +159,8 @@ static int qcow_open(BlockDriverState *bs, int flags)
goto fail;
bs->backing_file[len] = '\0';
}
+
+ qemu_co_mutex_init(&s->lock);
return 0;
fail:
@@ -190,24 +192,6 @@ static int qcow_set_key(BlockDriverState *bs, const char *key)
return -1;
if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
return -1;
-#if 0
- /* test */
- {
- uint8_t in[16];
- uint8_t out[16];
- uint8_t tmp[16];
- for(i=0;i<16;i++)
- in[i] = i;
- AES_encrypt(in, tmp, &s->aes_encrypt_key);
- AES_decrypt(tmp, out, &s->aes_decrypt_key);
- for(i = 0; i < 16; i++)
- printf(" %02x", tmp[i]);
- printf("\n");
- for(i = 0; i < 16; i++)
- printf(" %02x", out[i]);
- printf("\n");
- }
-#endif
return 0;
}
@@ -441,296 +425,178 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
return 0;
}
-#if 0
-
-static int qcow_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
- int ret, index_in_cluster, n;
+ int index_in_cluster;
+ int ret = 0, n;
uint64_t cluster_offset;
+ struct iovec hd_iov;
+ QEMUIOVector hd_qiov;
+ uint8_t *buf;
+ void *orig_buf;
+
+ if (qiov->niov > 1) {
+ buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ } else {
+ orig_buf = NULL;
+ buf = (uint8_t *)qiov->iov->iov_base;
+ }
+
+ qemu_co_mutex_lock(&s->lock);
- while (nb_sectors > 0) {
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+ while (nb_sectors != 0) {
+ /* prepare next request */
+ cluster_offset = get_cluster_offset(bs, sector_num << 9,
+ 0, 0, 0, 0);
index_in_cluster = sector_num & (s->cluster_sectors - 1);
n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors)
+ if (n > nb_sectors) {
n = nb_sectors;
+ }
+
if (!cluster_offset) {
if (bs->backing_hd) {
/* read from the base image */
- ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
- if (ret < 0)
- return -1;
+ hd_iov.iov_base = (void *)buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
} else {
+ /* Note: in this case, no need to wait */
memset(buf, 0, 512 * n);
}
} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- if (decompress_cluster(bs, cluster_offset) < 0)
- return -1;
- memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
+ /* add AIO support for compressed blocks ? */
+ if (decompress_cluster(bs, cluster_offset) < 0) {
+ goto fail;
+ }
+ memcpy(buf,
+ s->cluster_cache + index_in_cluster * 512, 512 * n);
} else {
- ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
- if (ret != n * 512)
- return -1;
+ if ((cluster_offset & 511) != 0) {
+ goto fail;
+ }
+ hd_iov.iov_base = (void *)buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ break;
+ }
if (s->crypt_method) {
- encrypt_sectors(s, sector_num, buf, buf, n, 0,
+ encrypt_sectors(s, sector_num, buf, buf,
+ n, 0,
&s->aes_decrypt_key);
}
}
+ ret = 0;
+
nb_sectors -= n;
sector_num += n;
buf += n * 512;
}
- return 0;
-}
-#endif
-typedef struct QCowAIOCB {
- BlockDriverAIOCB common;
- int64_t sector_num;
- QEMUIOVector *qiov;
- uint8_t *buf;
- void *orig_buf;
- int nb_sectors;
- int n;
- uint64_t cluster_offset;
- uint8_t *cluster_data;
- struct iovec hd_iov;
- bool is_write;
- QEMUBH *bh;
- QEMUIOVector hd_qiov;
- BlockDriverAIOCB *hd_aiocb;
-} QCowAIOCB;
-
-static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
- if (acb->hd_aiocb)
- bdrv_aio_cancel(acb->hd_aiocb);
- qemu_aio_release(acb);
-}
-
-static AIOPool qcow_aio_pool = {
- .aiocb_size = sizeof(QCowAIOCB),
- .cancel = qcow_aio_cancel,
-};
-
-static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- int is_write)
-{
- QCowAIOCB *acb;
-
- acb = qemu_aio_get(&qcow_aio_pool, bs, NULL, NULL);
- if (!acb)
- return NULL;
- acb->hd_aiocb = NULL;
- acb->sector_num = sector_num;
- acb->qiov = qiov;
- acb->is_write = is_write;
+done:
+ qemu_co_mutex_unlock(&s->lock);
if (qiov->niov > 1) {
- acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
- if (is_write)
- qemu_iovec_to_buffer(qiov, acb->buf);
- } else {
- acb->buf = (uint8_t *)qiov->iov->iov_base;
+ qemu_iovec_from_buffer(qiov, orig_buf, qiov->size);
+ qemu_vfree(orig_buf);
}
- acb->nb_sectors = nb_sectors;
- acb->n = 0;
- acb->cluster_offset = 0;
- return acb;
+
+ return ret;
+
+fail:
+ ret = -EIO;
+ goto done;
}
-static int qcow_aio_read_cb(void *opaque)
+static int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
{
- QCowAIOCB *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
BDRVQcowState *s = bs->opaque;
int index_in_cluster;
- int ret;
+ uint64_t cluster_offset;
+ const uint8_t *src_buf;
+ int ret = 0, n;
+ uint8_t *cluster_data = NULL;
+ struct iovec hd_iov;
+ QEMUIOVector hd_qiov;
+ uint8_t *buf;
+ void *orig_buf;
- acb->hd_aiocb = NULL;
+ s->cluster_cache_offset = -1; /* disable compressed cache */
- redo:
- /* post process the read buffer */
- if (!acb->cluster_offset) {
- /* nothing to do */
- } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
- /* nothing to do */
+ if (qiov->niov > 1) {
+ buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ qemu_iovec_to_buffer(qiov, buf);
} else {
- if (s->crypt_method) {
- encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
- acb->n, 0,
- &s->aes_decrypt_key);
- }
+ orig_buf = NULL;
+ buf = (uint8_t *)qiov->iov->iov_base;
}
- acb->nb_sectors -= acb->n;
- acb->sector_num += acb->n;
- acb->buf += acb->n * 512;
+ qemu_co_mutex_lock(&s->lock);
- if (acb->nb_sectors == 0) {
- /* request completed */
- return 0;
- }
+ while (nb_sectors != 0) {
- /* prepare next AIO request */
- acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9,
- 0, 0, 0, 0);
- index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
- acb->n = s->cluster_sectors - index_in_cluster;
- if (acb->n > acb->nb_sectors)
- acb->n = acb->nb_sectors;
-
- if (!acb->cluster_offset) {
- if (bs->backing_hd) {
- /* read from the base image */
- acb->hd_iov.iov_base = (void *)acb->buf;
- acb->hd_iov.iov_len = acb->n * 512;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, acb->sector_num,
- acb->n, &acb->hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- return -EIO;
- }
- } else {
- /* Note: in this case, no need to wait */
- memset(acb->buf, 0, 512 * acb->n);
- goto redo;
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
}
- } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
- /* add AIO support for compressed blocks ? */
- if (decompress_cluster(bs, acb->cluster_offset) < 0) {
- return -EIO;
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
+ index_in_cluster,
+ index_in_cluster + n);
+ if (!cluster_offset || (cluster_offset & 511) != 0) {
+ ret = -EIO;
+ break;
}
- memcpy(acb->buf,
- s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
- goto redo;
- } else {
- if ((acb->cluster_offset & 511) != 0) {
- return -EIO;
+ if (s->crypt_method) {
+ if (!cluster_data) {
+ cluster_data = g_malloc0(s->cluster_size);
+ }
+ encrypt_sectors(s, sector_num, cluster_data, buf,
+ n, 1, &s->aes_encrypt_key);
+ src_buf = cluster_data;
+ } else {
+ src_buf = buf;
}
- acb->hd_iov.iov_base = (void *)acb->buf;
- acb->hd_iov.iov_len = acb->n * 512;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+
+ hd_iov.iov_base = (void *)src_buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
- (acb->cluster_offset >> 9) + index_in_cluster,
- acb->n, &acb->hd_qiov);
+ ret = bdrv_co_writev(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
- return ret;
+ break;
}
- }
-
- return 1;
-}
-
-static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- QCowAIOCB *acb;
- int ret;
-
- acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 0);
-
- qemu_co_mutex_lock(&s->lock);
- do {
- ret = qcow_aio_read_cb(acb);
- } while (ret > 0);
- qemu_co_mutex_unlock(&s->lock);
+ ret = 0;
- if (acb->qiov->niov > 1) {
- qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
- qemu_vfree(acb->orig_buf);
- }
- qemu_aio_release(acb);
-
- return ret;
-}
-
-static int qcow_aio_write_cb(void *opaque)
-{
- QCowAIOCB *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
- uint64_t cluster_offset;
- const uint8_t *src_buf;
- int ret;
-
- acb->hd_aiocb = NULL;
-
- acb->nb_sectors -= acb->n;
- acb->sector_num += acb->n;
- acb->buf += acb->n * 512;
-
- if (acb->nb_sectors == 0) {
- /* request completed */
- return 0;
- }
-
- index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
- acb->n = s->cluster_sectors - index_in_cluster;
- if (acb->n > acb->nb_sectors)
- acb->n = acb->nb_sectors;
- cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 1, 0,
- index_in_cluster,
- index_in_cluster + acb->n);
- if (!cluster_offset || (cluster_offset & 511) != 0) {
- return -EIO;
- }
- if (s->crypt_method) {
- if (!acb->cluster_data) {
- acb->cluster_data = g_malloc0(s->cluster_size);
- }
- encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
- acb->n, 1, &s->aes_encrypt_key);
- src_buf = acb->cluster_data;
- } else {
- src_buf = acb->buf;
- }
-
- acb->hd_iov.iov_base = (void *)src_buf;
- acb->hd_iov.iov_len = acb->n * 512;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- acb->n, &acb->hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- return ret;
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
}
- return 1;
-}
-
-static int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- QCowAIOCB *acb;
- int ret;
-
- s->cluster_cache_offset = -1; /* disable compressed cache */
-
- acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 1);
-
- qemu_co_mutex_lock(&s->lock);
- do {
- ret = qcow_aio_write_cb(acb);
- } while (ret > 0);
qemu_co_mutex_unlock(&s->lock);
- if (acb->qiov->niov > 1) {
- qemu_vfree(acb->orig_buf);
+ if (qiov->niov > 1) {
+ qemu_vfree(orig_buf);
}
- qemu_aio_release(acb);
+ free(cluster_data);
return ret;
}
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 9269ddaefd..e06be64876 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -53,7 +53,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
}
#ifdef DEBUG_ALLOC2
- printf("grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
+ fprintf(stderr, "grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
@@ -381,10 +381,10 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
* For a given offset of the disk image, find the cluster offset in
* qcow2 file. The offset is stored in *cluster_offset.
*
- * on entry, *num is the number of contiguous clusters we'd like to
+ * on entry, *num is the number of contiguous sectors we'd like to
* access following offset.
*
- * on exit, *num is the number of contiguous clusters we can read.
+ * on exit, *num is the number of contiguous sectors we can read.
*
* Return 0, if the offset is found
* Return -errno, otherwise.
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 2a915be57a..9605367777 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -422,7 +422,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
int ret;
#ifdef DEBUG_ALLOC2
- printf("update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
+ fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
offset, length, addend);
#endif
if (length < 0) {
@@ -556,7 +556,7 @@ retry:
}
}
#ifdef DEBUG_ALLOC2
- printf("alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
+ fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
size,
(s->free_cluster_index - nb_clusters) << s->cluster_bits);
#endif
@@ -680,24 +680,6 @@ void qcow2_free_any_clusters(BlockDriverState *bs,
-void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
- int64_t size)
-{
- int refcount;
- int64_t start, last, cluster_offset;
- uint16_t *p;
-
- start = offset & ~(s->cluster_size - 1);
- last = (offset + size - 1) & ~(s->cluster_size - 1);
- for(cluster_offset = start; cluster_offset <= last;
- cluster_offset += s->cluster_size) {
- p = &s->refcount_block[cluster_offset >> s->cluster_bits];
- refcount = be16_to_cpu(*p);
- refcount++;
- *p = cpu_to_be16(refcount);
- }
-}
-
/* update the refcounts of snapshots and the copied flag */
int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend)
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 3bd2a30d35..3e6bf8b6f3 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -303,7 +303,10 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
if (qcow2_write_snapshots(bs) < 0)
goto fail;
#ifdef DEBUG_ALLOC
- qcow2_check_refcounts(bs);
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result);
+ }
#endif
return 0;
fail:
@@ -353,7 +356,10 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
#ifdef DEBUG_ALLOC
- qcow2_check_refcounts(bs);
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result);
+ }
#endif
return 0;
fail:
@@ -390,7 +396,10 @@ int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
return ret;
}
#ifdef DEBUG_ALLOC
- qcow2_check_refcounts(bs);
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result);
+ }
#endif
return 0;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index bfff6cd963..b725d68b1d 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -87,6 +87,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
while (offset < end_offset) {
#ifdef DEBUG_EXT
+ BDRVQcowState *s = bs->opaque;
/* Sanity check */
if (offset > s->cluster_size)
printf("qcow2_read_extension: suspicious offset %lu\n", offset);
@@ -280,7 +281,10 @@ static int qcow2_open(BlockDriverState *bs, int flags)
qemu_co_mutex_init(&s->lock);
#ifdef DEBUG_ALLOC
- qcow2_check_refcounts(bs);
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result);
+ }
#endif
return ret;
@@ -372,201 +376,127 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
return n1;
}
-typedef struct QCowAIOCB {
- BlockDriverAIOCB common;
- int64_t sector_num;
- QEMUIOVector *qiov;
- int remaining_sectors;
- int cur_nr_sectors; /* number of sectors in current iteration */
- uint64_t bytes_done;
- uint64_t cluster_offset;
- uint8_t *cluster_data;
- bool is_write;
- QEMUIOVector hd_qiov;
- QEMUBH *bh;
- QCowL2Meta l2meta;
- QLIST_ENTRY(QCowAIOCB) next_depend;
-} QCowAIOCB;
-
-static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
- qemu_aio_release(acb);
-}
-
-static AIOPool qcow2_aio_pool = {
- .aiocb_size = sizeof(QCowAIOCB),
- .cancel = qcow2_aio_cancel,
-};
-
-/*
- * Returns 0 when the request is completed successfully, 1 when there is still
- * a part left to do and -errno in error cases.
- */
-static int qcow2_aio_read_cb(QCowAIOCB *acb)
+static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int remaining_sectors, QEMUIOVector *qiov)
{
- BlockDriverState *bs = acb->common.bs;
BDRVQcowState *s = bs->opaque;
int index_in_cluster, n1;
int ret;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t cluster_offset = 0;
+ uint64_t bytes_done = 0;
+ QEMUIOVector hd_qiov;
+ uint8_t *cluster_data = NULL;
- /* post process the read buffer */
- if (!acb->cluster_offset) {
- /* nothing to do */
- } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
- /* nothing to do */
- } else {
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data,
- acb->cluster_data, acb->cur_nr_sectors, 0, &s->aes_decrypt_key);
- qemu_iovec_reset(&acb->hd_qiov);
- qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
- acb->cur_nr_sectors * 512);
- qemu_iovec_from_buffer(&acb->hd_qiov, acb->cluster_data,
- 512 * acb->cur_nr_sectors);
- }
- }
+ qemu_iovec_init(&hd_qiov, qiov->niov);
- acb->remaining_sectors -= acb->cur_nr_sectors;
- acb->sector_num += acb->cur_nr_sectors;
- acb->bytes_done += acb->cur_nr_sectors * 512;
+ qemu_co_mutex_lock(&s->lock);
- if (acb->remaining_sectors == 0) {
- /* request completed */
- return 0;
- }
+ while (remaining_sectors != 0) {
- /* prepare next AIO request */
- acb->cur_nr_sectors = acb->remaining_sectors;
- if (s->crypt_method) {
- acb->cur_nr_sectors = MIN(acb->cur_nr_sectors,
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
- }
+ /* prepare next request */
+ cur_nr_sectors = remaining_sectors;
+ if (s->crypt_method) {
+ cur_nr_sectors = MIN(cur_nr_sectors,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ }
- ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
- &acb->cur_nr_sectors, &acb->cluster_offset);
- if (ret < 0) {
- return ret;
- }
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9,
+ &cur_nr_sectors, &cluster_offset);
+ if (ret < 0) {
+ goto fail;
+ }
- index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
-
- qemu_iovec_reset(&acb->hd_qiov);
- qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
- acb->cur_nr_sectors * 512);
-
- if (!acb->cluster_offset) {
-
- if (bs->backing_hd) {
- /* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, &acb->hd_qiov,
- acb->sector_num, acb->cur_nr_sectors);
- if (n1 > 0) {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, acb->sector_num,
- n1, &acb->hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- return ret;
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * 512);
+
+ if (!cluster_offset) {
+
+ if (bs->backing_hd) {
+ /* read from the base image */
+ n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
+ sector_num, cur_nr_sectors);
+ if (n1 > 0) {
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ n1, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
}
+ } else {
+ /* Note: in this case, no need to wait */
+ qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors);
+ }
+ } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+ /* add AIO support for compressed blocks ? */
+ ret = qcow2_decompress_cluster(bs, cluster_offset);
+ if (ret < 0) {
+ goto fail;
}
- return 1;
- } else {
- /* Note: in this case, no need to wait */
- qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors);
- return 1;
- }
- } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
- /* add AIO support for compressed blocks ? */
- ret = qcow2_decompress_cluster(bs, acb->cluster_offset);
- if (ret < 0) {
- return ret;
- }
- qemu_iovec_from_buffer(&acb->hd_qiov,
- s->cluster_cache + index_in_cluster * 512,
- 512 * acb->cur_nr_sectors);
+ qemu_iovec_from_buffer(&hd_qiov,
+ s->cluster_cache + index_in_cluster * 512,
+ 512 * cur_nr_sectors);
+ } else {
+ if ((cluster_offset & 511) != 0) {
+ ret = -EIO;
+ goto fail;
+ }
- return 1;
- } else {
- if ((acb->cluster_offset & 511) != 0) {
- return -EIO;
- }
+ if (s->crypt_method) {
+ /*
+ * For encrypted images, read everything into a temporary
+ * contiguous buffer on which the AES functions can work.
+ */
+ if (!cluster_data) {
+ cluster_data =
+ g_malloc0(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ }
- if (s->crypt_method) {
- /*
- * For encrypted images, read everything into a temporary
- * contiguous buffer on which the AES functions can work.
- */
- if (!acb->cluster_data) {
- acb->cluster_data =
- g_malloc0(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ assert(cur_nr_sectors <=
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cluster_data,
+ 512 * cur_nr_sectors);
}
- assert(acb->cur_nr_sectors <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
- qemu_iovec_reset(&acb->hd_qiov);
- qemu_iovec_add(&acb->hd_qiov, acb->cluster_data,
- 512 * acb->cur_nr_sectors);
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ cur_nr_sectors, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ if (s->crypt_method) {
+ qcow2_encrypt_sectors(s, sector_num, cluster_data,
+ cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * 512);
+ qemu_iovec_from_buffer(&hd_qiov, cluster_data,
+ 512 * cur_nr_sectors);
+ }
}
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
- (acb->cluster_offset >> 9) + index_in_cluster,
- acb->cur_nr_sectors, &acb->hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- return ret;
- }
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
}
+ ret = 0;
- return 1;
-}
-
-static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque, int is_write)
-{
- QCowAIOCB *acb;
-
- acb = qemu_aio_get(&qcow2_aio_pool, bs, cb, opaque);
- if (!acb)
- return NULL;
- acb->sector_num = sector_num;
- acb->qiov = qiov;
- acb->is_write = is_write;
-
- qemu_iovec_init(&acb->hd_qiov, qiov->niov);
-
- acb->bytes_done = 0;
- acb->remaining_sectors = nb_sectors;
- acb->cur_nr_sectors = 0;
- acb->cluster_offset = 0;
- acb->l2meta.nb_clusters = 0;
- qemu_co_queue_init(&acb->l2meta.dependent_requests);
- return acb;
-}
-
-static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- QCowAIOCB *acb;
- int ret;
-
- acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 0);
-
- qemu_co_mutex_lock(&s->lock);
- do {
- ret = qcow2_aio_read_cb(acb);
- } while (ret > 0);
+fail:
qemu_co_mutex_unlock(&s->lock);
- qemu_iovec_destroy(&acb->hd_qiov);
- qemu_aio_release(acb);
+ qemu_iovec_destroy(&hd_qiov);
+ g_free(cluster_data);
return ret;
}
@@ -586,104 +516,100 @@ static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m)
}
}
-/*
- * Returns 0 when the request is completed successfully, 1 when there is still
- * a part left to do and -errno in error cases.
- */
-static int qcow2_aio_write_cb(QCowAIOCB *acb)
+static int qcow2_co_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ int remaining_sectors,
+ QEMUIOVector *qiov)
{
- BlockDriverState *bs = acb->common.bs;
BDRVQcowState *s = bs->opaque;
int index_in_cluster;
int n_end;
int ret;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ QCowL2Meta l2meta;
+ uint64_t cluster_offset;
+ QEMUIOVector hd_qiov;
+ uint64_t bytes_done = 0;
+ uint8_t *cluster_data = NULL;
- ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta);
+ l2meta.nb_clusters = 0;
+ qemu_co_queue_init(&l2meta.dependent_requests);
- run_dependent_requests(s, &acb->l2meta);
+ qemu_iovec_init(&hd_qiov, qiov->niov);
- if (ret < 0) {
- return ret;
- }
+ s->cluster_cache_offset = -1; /* disable compressed cache */
- acb->remaining_sectors -= acb->cur_nr_sectors;
- acb->sector_num += acb->cur_nr_sectors;
- acb->bytes_done += acb->cur_nr_sectors * 512;
+ qemu_co_mutex_lock(&s->lock);
- if (acb->remaining_sectors == 0) {
- /* request completed */
- return 0;
- }
+ while (remaining_sectors != 0) {
- index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
- n_end = index_in_cluster + acb->remaining_sectors;
- if (s->crypt_method &&
- n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
- n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n_end = index_in_cluster + remaining_sectors;
+ if (s->crypt_method &&
+ n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) {
+ n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
+ }
- ret = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9,
- index_in_cluster, n_end, &acb->cur_nr_sectors, &acb->l2meta);
- if (ret < 0) {
- return ret;
- }
+ ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
+ index_in_cluster, n_end, &cur_nr_sectors, &l2meta);
+ if (ret < 0) {
+ goto fail;
+ }
- acb->cluster_offset = acb->l2meta.cluster_offset;
- assert((acb->cluster_offset & 511) == 0);
+ cluster_offset = l2meta.cluster_offset;
+ assert((cluster_offset & 511) == 0);
- qemu_iovec_reset(&acb->hd_qiov);
- qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
- acb->cur_nr_sectors * 512);
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * 512);
- if (s->crypt_method) {
- if (!acb->cluster_data) {
- acb->cluster_data = g_malloc0(QCOW_MAX_CRYPT_CLUSTERS *
- s->cluster_size);
- }
+ if (s->crypt_method) {
+ if (!cluster_data) {
+ cluster_data = g_malloc0(QCOW_MAX_CRYPT_CLUSTERS *
+ s->cluster_size);
+ }
- assert(acb->hd_qiov.size <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- qemu_iovec_to_buffer(&acb->hd_qiov, acb->cluster_data);
+ assert(hd_qiov.size <=
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ qemu_iovec_to_buffer(&hd_qiov, cluster_data);
- qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data,
- acb->cluster_data, acb->cur_nr_sectors, 1, &s->aes_encrypt_key);
+ qcow2_encrypt_sectors(s, sector_num, cluster_data,
+ cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
- qemu_iovec_reset(&acb->hd_qiov);
- qemu_iovec_add(&acb->hd_qiov, acb->cluster_data,
- acb->cur_nr_sectors * 512);
- }
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cluster_data,
+ cur_nr_sectors * 512);
+ }
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file,
- (acb->cluster_offset >> 9) + index_in_cluster,
- acb->cur_nr_sectors, &acb->hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- return ret;
- }
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_writev(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ cur_nr_sectors, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
- return 1;
-}
+ ret = qcow2_alloc_cluster_link_l2(bs, &l2meta);
-static int qcow2_co_writev(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- QCowAIOCB *acb;
- int ret;
+ run_dependent_requests(s, &l2meta);
- acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 1);
- s->cluster_cache_offset = -1; /* disable compressed cache */
+ if (ret < 0) {
+ goto fail;
+ }
- qemu_co_mutex_lock(&s->lock);
- do {
- ret = qcow2_aio_write_cb(acb);
- } while (ret > 0);
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ }
+ ret = 0;
+
+fail:
qemu_co_mutex_unlock(&s->lock);
- qemu_iovec_destroy(&acb->hd_qiov);
- qemu_aio_release(acb);
+ qemu_iovec_destroy(&hd_qiov);
+ g_free(cluster_data);
return ret;
}
diff --git a/block/qcow2.h b/block/qcow2.h
index de23abe1a4..c8ca3bc574 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -189,8 +189,6 @@ void qcow2_free_clusters(BlockDriverState *bs,
void qcow2_free_any_clusters(BlockDriverState *bs,
uint64_t cluster_offset, int nb_clusters);
-void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
- int64_t size);
int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend);
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 57b6e1aad7..c1f6e07ec1 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -274,7 +274,7 @@ struct SheepdogAIOCB {
int ret;
enum AIOCBState aiocb_type;
- QEMUBH *bh;
+ Coroutine *coroutine;
void (*aio_done_func)(SheepdogAIOCB *);
int canceled;
@@ -295,6 +295,10 @@ typedef struct BDRVSheepdogState {
char *port;
int fd;
+ CoMutex lock;
+ Coroutine *co_send;
+ Coroutine *co_recv;
+
uint32_t aioreq_seq_num;
QLIST_HEAD(outstanding_aio_head, AIOReq) outstanding_aio_head;
} BDRVSheepdogState;
@@ -346,19 +350,16 @@ static const char * sd_strerror(int err)
/*
* Sheepdog I/O handling:
*
- * 1. In the sd_aio_readv/writev, read/write requests are added to the
- * QEMU Bottom Halves.
- *
- * 2. In sd_readv_writev_bh_cb, the callbacks of BHs, we send the I/O
- * requests to the server and link the requests to the
- * outstanding_list in the BDRVSheepdogState. we exits the
- * function without waiting for receiving the response.
+ * 1. In sd_co_rw_vector, we send the I/O requests to the server and
+ * link the requests to the outstanding_list in the
+ * BDRVSheepdogState. The function exits without waiting for
+ * receiving the response.
*
- * 3. We receive the response in aio_read_response, the fd handler to
+ * 2. We receive the response in aio_read_response, the fd handler to
* the sheepdog connection. If metadata update is needed, we send
* the write request to the vdi object in sd_write_done, the write
- * completion function. The AIOCB callback is not called until all
- * the requests belonging to the AIOCB are finished.
+ * completion function. We switch back to sd_co_readv/writev after
+ * all the requests belonging to the AIOCB are finished.
*/
static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
@@ -398,7 +399,7 @@ static inline int free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
static void sd_finish_aiocb(SheepdogAIOCB *acb)
{
if (!acb->canceled) {
- acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_coroutine_enter(acb->coroutine, NULL);
}
qemu_aio_release(acb);
}
@@ -411,7 +412,8 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
* Sheepdog cannot cancel the requests which are already sent to
* the servers, so we just complete the request with -EIO here.
*/
- acb->common.cb(acb->common.opaque, -EIO);
+ acb->ret = -EIO;
+ qemu_coroutine_enter(acb->coroutine, NULL);
acb->canceled = 1;
}
@@ -435,24 +437,12 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
acb->aio_done_func = NULL;
acb->canceled = 0;
- acb->bh = NULL;
+ acb->coroutine = qemu_coroutine_self();
acb->ret = 0;
QLIST_INIT(&acb->aioreq_head);
return acb;
}
-static int sd_schedule_bh(QEMUBHFunc *cb, SheepdogAIOCB *acb)
-{
- if (acb->bh) {
- error_report("bug: %d %d", acb->aiocb_type, acb->aiocb_type);
- return -EIO;
- }
-
- acb->bh = qemu_bh_new(cb, acb);
- qemu_bh_schedule(acb->bh);
- return 0;
-}
-
#ifdef _WIN32
struct msghdr {
@@ -635,7 +625,13 @@ static int do_readv_writev(int sockfd, struct iovec *iov, int len,
again:
ret = do_send_recv(sockfd, iov, len, iov_offset, write);
if (ret < 0) {
- if (errno == EINTR || errno == EAGAIN) {
+ if (errno == EINTR) {
+ goto again;
+ }
+ if (errno == EAGAIN) {
+ if (qemu_in_coroutine()) {
+ qemu_coroutine_yield();
+ }
goto again;
}
error_report("failed to recv a rsp, %s", strerror(errno));
@@ -793,14 +789,14 @@ static void aio_read_response(void *opaque)
unsigned long idx;
if (QLIST_EMPTY(&s->outstanding_aio_head)) {
- return;
+ goto out;
}
/* read a header */
ret = do_read(fd, &rsp, sizeof(rsp));
if (ret) {
error_report("failed to get the header, %s", strerror(errno));
- return;
+ goto out;
}
/* find the right aio_req from the outstanding_aio list */
@@ -811,7 +807,7 @@ static void aio_read_response(void *opaque)
}
if (!aio_req) {
error_report("cannot find aio_req %x", rsp.id);
- return;
+ goto out;
}
acb = aio_req->aiocb;
@@ -847,7 +843,7 @@ static void aio_read_response(void *opaque)
aio_req->iov_offset);
if (ret) {
error_report("failed to get the data, %s", strerror(errno));
- return;
+ goto out;
}
break;
}
@@ -861,10 +857,30 @@ static void aio_read_response(void *opaque)
if (!rest) {
/*
* We've finished all requests which belong to the AIOCB, so
- * we can call the callback now.
+ * we can switch back to sd_co_readv/writev now.
*/
acb->aio_done_func(acb);
}
+out:
+ s->co_recv = NULL;
+}
+
+static void co_read_response(void *opaque)
+{
+ BDRVSheepdogState *s = opaque;
+
+ if (!s->co_recv) {
+ s->co_recv = qemu_coroutine_create(aio_read_response);
+ }
+
+ qemu_coroutine_enter(s->co_recv, opaque);
+}
+
+static void co_write_request(void *opaque)
+{
+ BDRVSheepdogState *s = opaque;
+
+ qemu_coroutine_enter(s->co_send, NULL);
}
static int aio_flush_request(void *opaque)
@@ -924,7 +940,7 @@ static int get_sheep_fd(BDRVSheepdogState *s)
return -1;
}
- qemu_aio_set_fd_handler(fd, aio_read_response, NULL, aio_flush_request,
+ qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request,
NULL, s);
return fd;
}
@@ -1091,6 +1107,10 @@ static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
hdr.id = aio_req->id;
+ qemu_co_mutex_lock(&s->lock);
+ s->co_send = qemu_coroutine_self();
+ qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request,
+ aio_flush_request, NULL, s);
set_cork(s->fd, 1);
/* send a header */
@@ -1109,6 +1129,9 @@ static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
}
set_cork(s->fd, 0);
+ qemu_aio_set_fd_handler(s->fd, co_read_response, NULL,
+ aio_flush_request, NULL, s);
+ qemu_co_mutex_unlock(&s->lock);
return 0;
}
@@ -1225,6 +1248,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
bs->total_sectors = s->inode.vdi_size / SECTOR_SIZE;
strncpy(s->name, vdi, sizeof(s->name));
+ qemu_co_mutex_init(&s->lock);
g_free(buf);
return 0;
out:
@@ -1491,7 +1515,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
/*
* This function is called after writing data objects. If we need to
* update metadata, this sends a write request to the vdi object.
- * Otherwise, this calls the AIOCB callback.
+ * Otherwise, this switches back to sd_co_readv/writev.
*/
static void sd_write_done(SheepdogAIOCB *acb)
{
@@ -1587,8 +1611,11 @@ out:
* waiting the response. The responses are received in the
* `aio_read_response' function which is called from the main loop as
* a fd handler.
+ *
+ * Returns 1 when we need to wait a response, 0 when there is no sent
+ * request and -errno in error cases.
*/
-static void sd_readv_writev_bh_cb(void *p)
+static int sd_co_rw_vector(void *p)
{
SheepdogAIOCB *acb = p;
int ret = 0;
@@ -1600,9 +1627,6 @@ static void sd_readv_writev_bh_cb(void *p)
SheepdogInode *inode = &s->inode;
AIOReq *aio_req;
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
-
if (acb->aiocb_type == AIOCB_WRITE_UDATA && s->is_snapshot) {
/*
* In the case we open the snapshot VDI, Sheepdog creates the
@@ -1684,42 +1708,47 @@ static void sd_readv_writev_bh_cb(void *p)
}
out:
if (QLIST_EMPTY(&acb->aioreq_head)) {
- sd_finish_aiocb(acb);
+ return acb->ret;
}
+ return 1;
}
-static BlockDriverAIOCB *sd_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
+static int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
{
SheepdogAIOCB *acb;
+ int ret;
if (bs->growable && sector_num + nb_sectors > bs->total_sectors) {
/* TODO: shouldn't block here */
if (sd_truncate(bs, (sector_num + nb_sectors) * SECTOR_SIZE) < 0) {
- return NULL;
+ return -EIO;
}
bs->total_sectors = sector_num + nb_sectors;
}
- acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, cb, opaque);
+ acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL);
acb->aio_done_func = sd_write_done;
acb->aiocb_type = AIOCB_WRITE_UDATA;
- sd_schedule_bh(sd_readv_writev_bh_cb, acb);
- return &acb->common;
+ ret = sd_co_rw_vector(acb);
+ if (ret <= 0) {
+ qemu_aio_release(acb);
+ return ret;
+ }
+
+ qemu_coroutine_yield();
+
+ return acb->ret;
}
-static BlockDriverAIOCB *sd_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
+static int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
{
SheepdogAIOCB *acb;
- int i;
+ int i, ret;
- acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, cb, opaque);
+ acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL);
acb->aiocb_type = AIOCB_READ_UDATA;
acb->aio_done_func = sd_finish_aiocb;
@@ -1731,8 +1760,15 @@ static BlockDriverAIOCB *sd_aio_readv(BlockDriverState *bs, int64_t sector_num,
memset(qiov->iov[i].iov_base, 0, qiov->iov[i].iov_len);
}
- sd_schedule_bh(sd_readv_writev_bh_cb, acb);
- return &acb->common;
+ ret = sd_co_rw_vector(acb);
+ if (ret <= 0) {
+ qemu_aio_release(acb);
+ return ret;
+ }
+
+ qemu_coroutine_yield();
+
+ return acb->ret;
}
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
@@ -2062,8 +2098,8 @@ BlockDriver bdrv_sheepdog = {
.bdrv_getlength = sd_getlength,
.bdrv_truncate = sd_truncate,
- .bdrv_aio_readv = sd_aio_readv,
- .bdrv_aio_writev = sd_aio_writev,
+ .bdrv_co_readv = sd_co_readv,
+ .bdrv_co_writev = sd_co_writev,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,