aboutsummaryrefslogtreecommitdiff
path: root/block/qed.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/qed.c')
-rw-r--r--block/qed.c218
1 files changed, 136 insertions, 82 deletions
diff --git a/block/qed.c b/block/qed.c
index 86cad2188c..dc54bf4a93 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -93,6 +93,8 @@ int qed_write_header_sync(BDRVQEDState *s)
*
* This function only updates known header fields in-place and does not affect
* extra data after the QED header.
+ *
+ * No new allocating reqs can start while this function runs.
*/
static int coroutine_fn qed_write_header(BDRVQEDState *s)
{
@@ -109,6 +111,8 @@ static int coroutine_fn qed_write_header(BDRVQEDState *s)
QEMUIOVector qiov;
int ret;
+ assert(s->allocating_acb || s->allocating_write_reqs_plugged);
+
buf = qemu_blockalign(s->bs, len);
iov = (struct iovec) {
.iov_base = buf,
@@ -219,6 +223,8 @@ static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
* This function only produces the offset where the new clusters should be
* written. It updates BDRVQEDState but does not make any changes to the image
* file.
+ *
+ * Called with table_lock held.
*/
static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
{
@@ -236,6 +242,8 @@ QEDTable *qed_alloc_table(BDRVQEDState *s)
/**
* Allocate a new zeroed L2 table
+ *
+ * Called with table_lock held.
*/
static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
{
@@ -249,19 +257,32 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
return l2_table;
}
-static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+static bool qed_plug_allocating_write_reqs(BDRVQEDState *s)
{
+ qemu_co_mutex_lock(&s->table_lock);
+
+ /* No reentrancy is allowed. */
assert(!s->allocating_write_reqs_plugged);
+ if (s->allocating_acb != NULL) {
+ /* Another allocating write came concurrently. This cannot happen
+ * from bdrv_qed_co_drain, but it can happen when the timer runs.
+ */
+ qemu_co_mutex_unlock(&s->table_lock);
+ return false;
+ }
s->allocating_write_reqs_plugged = true;
+ qemu_co_mutex_unlock(&s->table_lock);
+ return true;
}
static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
{
+ qemu_co_mutex_lock(&s->table_lock);
assert(s->allocating_write_reqs_plugged);
-
s->allocating_write_reqs_plugged = false;
- qemu_co_enter_next(&s->allocating_write_reqs);
+ qemu_co_queue_next(&s->allocating_write_reqs);
+ qemu_co_mutex_unlock(&s->table_lock);
}
static void coroutine_fn qed_need_check_timer_entry(void *opaque)
@@ -269,17 +290,14 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque)
BDRVQEDState *s = opaque;
int ret;
- /* The timer should only fire when allocating writes have drained */
- assert(!s->allocating_acb);
-
trace_qed_need_check_timer_cb(s);
- qed_acquire(s);
- qed_plug_allocating_write_reqs(s);
+ if (!qed_plug_allocating_write_reqs(s)) {
+ return;
+ }
/* Ensure writes are on disk before clearing flag */
ret = bdrv_co_flush(s->bs->file->bs);
- qed_release(s);
if (ret < 0) {
qed_unplug_allocating_write_reqs(s);
return;
@@ -301,16 +319,6 @@ static void qed_need_check_timer_cb(void *opaque)
qemu_coroutine_enter(co);
}
-void qed_acquire(BDRVQEDState *s)
-{
- aio_context_acquire(bdrv_get_aio_context(s->bs));
-}
-
-void qed_release(BDRVQEDState *s)
-{
- aio_context_release(bdrv_get_aio_context(s->bs));
-}
-
static void qed_start_need_check_timer(BDRVQEDState *s)
{
trace_qed_start_need_check_timer(s);
@@ -350,7 +358,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
}
}
-static void bdrv_qed_drain(BlockDriverState *bs)
+static void coroutine_fn bdrv_qed_co_drain(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
@@ -359,10 +367,20 @@ static void bdrv_qed_drain(BlockDriverState *bs)
*/
if (s->need_check_timer && timer_pending(s->need_check_timer)) {
qed_cancel_need_check_timer(s);
- qed_need_check_timer_cb(s);
+ qed_need_check_timer_entry(s);
}
}
+static void bdrv_qed_init_state(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ memset(s, 0, sizeof(BDRVQEDState));
+ s->bs = bs;
+ qemu_co_mutex_init(&s->table_lock);
+ qemu_co_queue_init(&s->allocating_write_reqs);
+}
+
static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -371,9 +389,6 @@ static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
int64_t file_size;
int ret;
- s->bs = bs;
- qemu_co_queue_init(&s->allocating_write_reqs);
-
ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
if (ret < 0) {
return ret;
@@ -507,6 +522,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
+ bdrv_qed_init_state(bs);
return bdrv_qed_do_open(bs, options, flags, errp);
}
@@ -681,6 +697,7 @@ typedef struct {
BlockDriverState **file;
} QEDIsAllocatedCB;
+/* Called with table_lock held. */
static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
{
QEDIsAllocatedCB *cb = opaque;
@@ -728,6 +745,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
uint64_t offset;
int ret;
+ qemu_co_mutex_lock(&s->table_lock);
ret = qed_find_cluster(s, &request, cb.pos, &len, &offset);
qed_is_allocated_cb(&cb, ret, offset, len);
@@ -735,6 +753,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
assert(cb.status != BDRV_BLOCK_OFFSET_MASK);
qed_unref_l2_cache_entry(request.l2_table);
+ qemu_co_mutex_unlock(&s->table_lock);
return cb.status;
}
@@ -865,6 +884,8 @@ out:
*
* The cluster offset may be an allocated byte offset in the image file, the
* zero cluster marker, or the unallocated cluster marker.
+ *
+ * Called with table_lock held.
*/
static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
int index, unsigned int n,
@@ -880,6 +901,7 @@ static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
}
}
+/* Called with table_lock held. */
static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
{
BDRVQEDState *s = acb_to_s(acb);
@@ -903,7 +925,7 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
if (acb == s->allocating_acb) {
s->allocating_acb = NULL;
if (!qemu_co_queue_empty(&s->allocating_write_reqs)) {
- qemu_co_enter_next(&s->allocating_write_reqs);
+ qemu_co_queue_next(&s->allocating_write_reqs);
} else if (s->header.features & QED_F_NEED_CHECK) {
qed_start_need_check_timer(s);
}
@@ -912,6 +934,8 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
/**
* Update L1 table with new L2 table offset and write it out
+ *
+ * Called with table_lock held.
*/
static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
{
@@ -940,6 +964,8 @@ static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
/**
* Update L2 table with new cluster offsets and write them out
+ *
+ * Called with table_lock held.
*/
static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
{
@@ -976,50 +1002,26 @@ static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
/**
* Write data to the image file
+ *
+ * Called with table_lock *not* held.
*/
static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
{
BDRVQEDState *s = acb_to_s(acb);
uint64_t offset = acb->cur_cluster +
qed_offset_into_cluster(s, acb->cur_pos);
- int ret;
trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size);
BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
- ret = bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
- &acb->cur_qiov, 0);
- if (ret < 0) {
- return ret;
- }
-
- if (acb->find_cluster_ret != QED_CLUSTER_FOUND) {
- if (s->bs->backing) {
- /*
- * Flush new data clusters before updating the L2 table
- *
- * This flush is necessary when a backing file is in use. A crash
- * during an allocating write could result in empty clusters in the
- * image. If the write only touched a subregion of the cluster,
- * then backing image sectors have been lost in the untouched
- * region. The solution is to flush after writing a new data
- * cluster and before updating the L2 table.
- */
- ret = bdrv_co_flush(s->bs->file->bs);
- if (ret < 0) {
- return ret;
- }
- }
- ret = qed_aio_write_l2_update(acb, acb->cur_cluster);
- if (ret < 0) {
- return ret;
- }
- }
- return 0;
+ return bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
+ &acb->cur_qiov, 0);
}
/**
* Populate untouched regions of new data cluster
+ *
+ * Called with table_lock held.
*/
static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
{
@@ -1027,6 +1029,8 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
uint64_t start, len, offset;
int ret;
+ qemu_co_mutex_unlock(&s->table_lock);
+
/* Populate front untouched region of new data cluster */
start = qed_start_of_cluster(s, acb->cur_pos);
len = qed_offset_into_cluster(s, acb->cur_pos);
@@ -1034,7 +1038,7 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster);
if (ret < 0) {
- return ret;
+ goto out;
}
/* Populate back untouched region of new data cluster */
@@ -1047,10 +1051,31 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
trace_qed_aio_write_postfill(s, acb, start, len, offset);
ret = qed_copy_from_backing_file(s, start, len, offset);
if (ret < 0) {
- return ret;
+ goto out;
}
- return qed_aio_write_main(acb);
+ ret = qed_aio_write_main(acb);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (s->bs->backing) {
+ /*
+ * Flush new data clusters before updating the L2 table
+ *
+ * This flush is necessary when a backing file is in use. A crash
+ * during an allocating write could result in empty clusters in the
+ * image. If the write only touched a subregion of the cluster,
+ * then backing image sectors have been lost in the untouched
+ * region. The solution is to flush after writing a new data
+ * cluster and before updating the L2 table.
+ */
+ ret = bdrv_co_flush(s->bs->file->bs);
+ }
+
+out:
+ qemu_co_mutex_lock(&s->table_lock);
+ return ret;
}
/**
@@ -1073,6 +1098,8 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
* @len: Length in bytes
*
* This path is taken when writing to previously unallocated clusters.
+ *
+ * Called with table_lock held.
*/
static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
{
@@ -1087,7 +1114,7 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
/* Freeze this request if another allocating write is in progress */
if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) {
if (s->allocating_acb != NULL) {
- qemu_co_queue_wait(&s->allocating_write_reqs, NULL);
+ qemu_co_queue_wait(&s->allocating_write_reqs, &s->table_lock);
assert(s->allocating_acb == NULL);
}
s->allocating_acb = acb;
@@ -1103,6 +1130,7 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
return 0;
}
+ acb->cur_cluster = 1;
} else {
acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
}
@@ -1115,15 +1143,14 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
}
}
- if (acb->flags & QED_AIOCB_ZERO) {
- ret = qed_aio_write_l2_update(acb, 1);
- } else {
+ if (!(acb->flags & QED_AIOCB_ZERO)) {
ret = qed_aio_write_cow(acb);
+ if (ret < 0) {
+ return ret;
+ }
}
- if (ret < 0) {
- return ret;
- }
- return 0;
+
+ return qed_aio_write_l2_update(acb, acb->cur_cluster);
}
/**
@@ -1134,10 +1161,17 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
* @len: Length in bytes
*
* This path is taken when writing to already allocated clusters.
+ *
+ * Called with table_lock held.
*/
static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
size_t len)
{
+ BDRVQEDState *s = acb_to_s(acb);
+ int r;
+
+ qemu_co_mutex_unlock(&s->table_lock);
+
/* Allocate buffer for zero writes */
if (acb->flags & QED_AIOCB_ZERO) {
struct iovec *iov = acb->qiov->iov;
@@ -1145,7 +1179,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
if (!iov->iov_base) {
iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len);
if (iov->iov_base == NULL) {
- return -ENOMEM;
+ r = -ENOMEM;
+ goto out;
}
memset(iov->iov_base, 0, iov->iov_len);
}
@@ -1155,8 +1190,11 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
acb->cur_cluster = offset;
qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
- /* Do the actual write */
- return qed_aio_write_main(acb);
+ /* Do the actual write. */
+ r = qed_aio_write_main(acb);
+out:
+ qemu_co_mutex_lock(&s->table_lock);
+ return r;
}
/**
@@ -1166,6 +1204,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
* @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
* @offset: Cluster offset in bytes
* @len: Length in bytes
+ *
+ * Called with table_lock held.
*/
static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
uint64_t offset, size_t len)
@@ -1197,6 +1237,8 @@ static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
* @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
* @offset: Cluster offset in bytes
* @len: Length in bytes
+ *
+ * Called with table_lock held.
*/
static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
uint64_t offset, size_t len)
@@ -1204,6 +1246,9 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
BlockDriverState *bs = acb->bs;
+ int r;
+
+ qemu_co_mutex_unlock(&s->table_lock);
/* Adjust offset into cluster */
offset += qed_offset_into_cluster(s, acb->cur_pos);
@@ -1212,22 +1257,23 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
- /* Handle zero cluster and backing file reads */
+ /* Handle zero cluster and backing file reads, otherwise read
+ * data cluster directly.
+ */
if (ret == QED_CLUSTER_ZERO) {
qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
- return 0;
+ r = 0;
} else if (ret != QED_CLUSTER_FOUND) {
- return qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
- &acb->backing_qiov);
+ r = qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
+ &acb->backing_qiov);
+ } else {
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
+ &acb->cur_qiov, 0);
}
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- ret = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
- &acb->cur_qiov, 0);
- if (ret < 0) {
- return ret;
- }
- return 0;
+ qemu_co_mutex_lock(&s->table_lock);
+ return r;
}
/**
@@ -1240,6 +1286,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
size_t len;
int ret;
+ qemu_co_mutex_lock(&s->table_lock);
while (1) {
trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size);
@@ -1279,6 +1326,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
trace_qed_aio_complete(s, acb, ret);
qed_aio_complete(acb);
+ qemu_co_mutex_unlock(&s->table_lock);
return ret;
}
@@ -1474,8 +1522,14 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
bdrv_qed_close(bs);
- memset(s, 0, sizeof(BDRVQEDState));
+ bdrv_qed_init_state(bs);
+ if (qemu_in_coroutine()) {
+ qemu_co_mutex_lock(&s->table_lock);
+ }
ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
+ if (qemu_in_coroutine()) {
+ qemu_co_mutex_unlock(&s->table_lock);
+ }
if (local_err) {
error_propagate(errp, local_err);
error_prepend(errp, "Could not reopen qed layer: ");
@@ -1554,7 +1608,7 @@ static BlockDriver bdrv_qed = {
.bdrv_check = bdrv_qed_check,
.bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
.bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
- .bdrv_drain = bdrv_qed_drain,
+ .bdrv_co_drain = bdrv_qed_co_drain,
};
static void bdrv_qed_init(void)