diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2017-06-29 15:27:47 +0200 |
---|---|---|
committer | Fam Zheng <famz@redhat.com> | 2017-07-17 11:34:11 +0800 |
commit | 1f01e50b8330c24714ddca5841fdbb703076b121 (patch) | |
tree | ca4573cd50c640d12842de5f0ba8d8814318d67a /block/qed.c | |
parent | 61c7887e0f3dcfa9c4ccdfe43374243a4a5c0e8d (diff) |
qed: protect table cache with CoMutex
This makes the driver thread-safe. The CoMutex is dropped temporarily
while accessing the data clusters or the backing file.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-10-pbonzini@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Diffstat (limited to 'block/qed.c')
-rw-r--r-- | block/qed.c | 138 |
1 files changed, 96 insertions, 42 deletions
diff --git a/block/qed.c b/block/qed.c index adcc5c967f..dc54bf4a93 100644 --- a/block/qed.c +++ b/block/qed.c @@ -93,6 +93,8 @@ int qed_write_header_sync(BDRVQEDState *s) * * This function only updates known header fields in-place and does not affect * extra data after the QED header. + * + * No new allocating reqs can start while this function runs. */ static int coroutine_fn qed_write_header(BDRVQEDState *s) { @@ -109,6 +111,8 @@ static int coroutine_fn qed_write_header(BDRVQEDState *s) QEMUIOVector qiov; int ret; + assert(s->allocating_acb || s->allocating_write_reqs_plugged); + buf = qemu_blockalign(s->bs, len); iov = (struct iovec) { .iov_base = buf, @@ -219,6 +223,8 @@ static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n, * This function only produces the offset where the new clusters should be * written. It updates BDRVQEDState but does not make any changes to the image * file. + * + * Called with table_lock held. */ static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n) { @@ -236,6 +242,8 @@ QEDTable *qed_alloc_table(BDRVQEDState *s) /** * Allocate a new zeroed L2 table + * + * Called with table_lock held. */ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) { @@ -249,19 +257,32 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) return l2_table; } -static void qed_plug_allocating_write_reqs(BDRVQEDState *s) +static bool qed_plug_allocating_write_reqs(BDRVQEDState *s) { + qemu_co_mutex_lock(&s->table_lock); + + /* No reentrancy is allowed. */ assert(!s->allocating_write_reqs_plugged); + if (s->allocating_acb != NULL) { + /* Another allocating write came concurrently. This cannot happen + * from bdrv_qed_co_drain, but it can happen when the timer runs. + */ + qemu_co_mutex_unlock(&s->table_lock); + return false; + } s->allocating_write_reqs_plugged = true; + qemu_co_mutex_unlock(&s->table_lock); + return true; } static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) { + qemu_co_mutex_lock(&s->table_lock); assert(s->allocating_write_reqs_plugged); - s->allocating_write_reqs_plugged = false; - qemu_co_enter_next(&s->allocating_write_reqs); + qemu_co_queue_next(&s->allocating_write_reqs); + qemu_co_mutex_unlock(&s->table_lock); } static void coroutine_fn qed_need_check_timer_entry(void *opaque) @@ -269,17 +290,14 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque) BDRVQEDState *s = opaque; int ret; - /* The timer should only fire when allocating writes have drained */ - assert(!s->allocating_acb); - trace_qed_need_check_timer_cb(s); - qed_acquire(s); - qed_plug_allocating_write_reqs(s); + if (!qed_plug_allocating_write_reqs(s)) { + return; + } /* Ensure writes are on disk before clearing flag */ ret = bdrv_co_flush(s->bs->file->bs); - qed_release(s); if (ret < 0) { qed_unplug_allocating_write_reqs(s); return; @@ -301,16 +319,6 @@ static void qed_need_check_timer_cb(void *opaque) qemu_coroutine_enter(co); } -void qed_acquire(BDRVQEDState *s) -{ - aio_context_acquire(bdrv_get_aio_context(s->bs)); -} - -void qed_release(BDRVQEDState *s) -{ - aio_context_release(bdrv_get_aio_context(s->bs)); -} - static void qed_start_need_check_timer(BDRVQEDState *s) { trace_qed_start_need_check_timer(s); @@ -369,6 +377,7 @@ static void bdrv_qed_init_state(BlockDriverState *bs) memset(s, 0, sizeof(BDRVQEDState)); s->bs = bs; + qemu_co_mutex_init(&s->table_lock); qemu_co_queue_init(&s->allocating_write_reqs); } @@ -688,6 +697,7 @@ typedef struct { BlockDriverState **file; } QEDIsAllocatedCB; +/* Called with table_lock held. */ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len) { QEDIsAllocatedCB *cb = opaque; @@ -735,6 +745,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, uint64_t offset; int ret; + qemu_co_mutex_lock(&s->table_lock); ret = qed_find_cluster(s, &request, cb.pos, &len, &offset); qed_is_allocated_cb(&cb, ret, offset, len); @@ -742,6 +753,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, assert(cb.status != BDRV_BLOCK_OFFSET_MASK); qed_unref_l2_cache_entry(request.l2_table); + qemu_co_mutex_unlock(&s->table_lock); return cb.status; } @@ -872,6 +884,8 @@ out: * * The cluster offset may be an allocated byte offset in the image file, the * zero cluster marker, or the unallocated cluster marker. + * + * Called with table_lock held. */ static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, unsigned int n, @@ -887,6 +901,7 @@ static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table, } } +/* Called with table_lock held. */ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb) { BDRVQEDState *s = acb_to_s(acb); @@ -910,7 +925,7 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb) if (acb == s->allocating_acb) { s->allocating_acb = NULL; if (!qemu_co_queue_empty(&s->allocating_write_reqs)) { - qemu_co_enter_next(&s->allocating_write_reqs); + qemu_co_queue_next(&s->allocating_write_reqs); } else if (s->header.features & QED_F_NEED_CHECK) { qed_start_need_check_timer(s); } @@ -919,6 +934,8 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb) /** * Update L1 table with new L2 table offset and write it out + * + * Called with table_lock held. */ static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb) { @@ -947,6 +964,8 @@ static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb) /** * Update L2 table with new cluster offsets and write them out + * + * Called with table_lock held. */ static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset) { @@ -983,6 +1002,8 @@ static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset) /** * Write data to the image file + * + * Called with table_lock *not* held. */ static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb) { @@ -999,6 +1020,8 @@ static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb) /** * Populate untouched regions of new data cluster + * + * Called with table_lock held. */ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb) { @@ -1006,6 +1029,8 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb) uint64_t start, len, offset; int ret; + qemu_co_mutex_unlock(&s->table_lock); + /* Populate front untouched region of new data cluster */ start = qed_start_of_cluster(s, acb->cur_pos); len = qed_offset_into_cluster(s, acb->cur_pos); @@ -1013,7 +1038,7 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb) trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster); if (ret < 0) { - return ret; + goto out; } /* Populate back untouched region of new data cluster */ @@ -1026,12 +1051,12 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb) trace_qed_aio_write_postfill(s, acb, start, len, offset); ret = qed_copy_from_backing_file(s, start, len, offset); if (ret < 0) { - return ret; + goto out; } ret = qed_aio_write_main(acb); if (ret < 0) { - return ret; + goto out; } if (s->bs->backing) { @@ -1046,12 +1071,11 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb) * cluster and before updating the L2 table. */ ret = bdrv_co_flush(s->bs->file->bs); - if (ret < 0) { - return ret; - } } - return 0; +out: + qemu_co_mutex_lock(&s->table_lock); + return ret; } /** @@ -1074,6 +1098,8 @@ static bool qed_should_set_need_check(BDRVQEDState *s) * @len: Length in bytes * * This path is taken when writing to previously unallocated clusters. + * + * Called with table_lock held. */ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len) { @@ -1088,7 +1114,7 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len) /* Freeze this request if another allocating write is in progress */ if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) { if (s->allocating_acb != NULL) { - qemu_co_queue_wait(&s->allocating_write_reqs, NULL); + qemu_co_queue_wait(&s->allocating_write_reqs, &s->table_lock); assert(s->allocating_acb == NULL); } s->allocating_acb = acb; @@ -1135,10 +1161,17 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len) * @len: Length in bytes * * This path is taken when writing to already allocated clusters. + * + * Called with table_lock held. */ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) { + BDRVQEDState *s = acb_to_s(acb); + int r; + + qemu_co_mutex_unlock(&s->table_lock); + /* Allocate buffer for zero writes */ if (acb->flags & QED_AIOCB_ZERO) { struct iovec *iov = acb->qiov->iov; @@ -1146,7 +1179,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, if (!iov->iov_base) { iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len); if (iov->iov_base == NULL) { - return -ENOMEM; + r = -ENOMEM; + goto out; } memset(iov->iov_base, 0, iov->iov_len); } @@ -1156,8 +1190,11 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, acb->cur_cluster = offset; qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); - /* Do the actual write */ - return qed_aio_write_main(acb); + /* Do the actual write. */ + r = qed_aio_write_main(acb); +out: + qemu_co_mutex_lock(&s->table_lock); + return r; } /** @@ -1167,6 +1204,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1 * @offset: Cluster offset in bytes * @len: Length in bytes + * + * Called with table_lock held. */ static int coroutine_fn qed_aio_write_data(void *opaque, int ret, uint64_t offset, size_t len) @@ -1198,6 +1237,8 @@ static int coroutine_fn qed_aio_write_data(void *opaque, int ret, * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1 * @offset: Cluster offset in bytes * @len: Length in bytes + * + * Called with table_lock held. */ static int coroutine_fn qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len) @@ -1205,6 +1246,9 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret, QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); BlockDriverState *bs = acb->bs; + int r; + + qemu_co_mutex_unlock(&s->table_lock); /* Adjust offset into cluster */ offset += qed_offset_into_cluster(s, acb->cur_pos); @@ -1213,22 +1257,23 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret, qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); - /* Handle zero cluster and backing file reads */ + /* Handle zero cluster and backing file reads, otherwise read + * data cluster directly. + */ if (ret == QED_CLUSTER_ZERO) { qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size); - return 0; + r = 0; } else if (ret != QED_CLUSTER_FOUND) { - return qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, - &acb->backing_qiov); + r = qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, + &acb->backing_qiov); + } else { + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); + r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size, + &acb->cur_qiov, 0); } - BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - ret = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size, - &acb->cur_qiov, 0); - if (ret < 0) { - return ret; - } - return 0; + qemu_co_mutex_lock(&s->table_lock); + return r; } /** @@ -1241,6 +1286,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb) size_t len; int ret; + qemu_co_mutex_lock(&s->table_lock); while (1) { trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size); @@ -1280,6 +1326,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb) trace_qed_aio_complete(s, acb, ret); qed_aio_complete(acb); + qemu_co_mutex_unlock(&s->table_lock); return ret; } @@ -1469,13 +1516,20 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs, static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp) { + BDRVQEDState *s = bs->opaque; Error *local_err = NULL; int ret; bdrv_qed_close(bs); bdrv_qed_init_state(bs); + if (qemu_in_coroutine()) { + qemu_co_mutex_lock(&s->table_lock); + } ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err); + if (qemu_in_coroutine()) { + qemu_co_mutex_unlock(&s->table_lock); + } if (local_err) { error_propagate(errp, local_err); error_prepend(errp, "Could not reopen qed layer: "); |