qcow2: avoid reentrant bdrv_read() in copy_sectors()

A BlockDriverState should not issue requests on itself through the public block layer interface. Nested, or reentrant, requests are problematic because they do I/O throttling and request tracking twice. Features like block layer copy-on-read use request tracking to avoid race conditions between concurrent requests. The reentrant request will have to "wait" for its parent request to complete. But the parent is waiting for the reentrant request to make progress so we have reached deadlock. The solution is for block drivers to avoid the public block layer interfaces for reentrant requests. Instead they should call their own internal functions if they wish to perform reentrant requests. This is also a good opportunity to make copy_sectors() a true coroutine_fn. That means calling bdrv_co_writev() instead of bdrv_write(). Behavior is unchanged but we're being explicit that this executes in coroutine context. Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
author: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> 2011-11-30 12:23:41 +0000
committer: Kevin Wolf <kwolf@redhat.com> 2011-12-05 14:49:47 +0100
commit: aef4acb6616ab7fb5c105660aa8a2cee4e250e75 (patch)
tree: 53fe5915dd55fd1e4197f462c153b6c551417af4 /block
parent: 1b9f1491f82de0c8f9b09d1a06ac33304449a634 (diff)
1 files changed, 19 insertions, 8 deletions
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 0e337075b3..07a2e936fd 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -289,12 +289,15 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
     }
 }
 
-static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
-                        uint64_t cluster_offset, int n_start, int n_end)
+static int coroutine_fn copy_sectors(BlockDriverState *bs,
+                                     uint64_t start_sect,
+                                     uint64_t cluster_offset,
+                                     int n_start, int n_end)
 {
     BDRVQcowState *s = bs->opaque;
+    QEMUIOVector qiov;
+    struct iovec iov;
     int n, ret;
-    void *buf;
 
     /*
      * If this is the last cluster and it is only partially used, we must only
@@ -310,29 +313,37 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
         return 0;
     }
 
-    buf = qemu_blockalign(bs, n * BDRV_SECTOR_SIZE);
+    iov.iov_len = n * BDRV_SECTOR_SIZE;
+    iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+
+    qemu_iovec_init_external(&qiov, &iov, 1);
 
     BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
-    ret = bdrv_read(bs, start_sect + n_start, buf, n);
+
+    /* Call .bdrv_co_readv() directly instead of using the public block-layer
+     * interface.  This avoids double I/O throttling and request tracking,
+     * which can lead to deadlock when block layer copy-on-read is enabled.
+     */
+    ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
     if (ret < 0) {
         goto out;
     }
 
     if (s->crypt_method) {
         qcow2_encrypt_sectors(s, start_sect + n_start,
-                        buf, buf, n, 1,
+                        iov.iov_base, iov.iov_base, n, 1,
                         &s->aes_encrypt_key);
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-    ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start, buf, n);
+    ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
     if (ret < 0) {
         goto out;
     }
 
     ret = 0;
 out:
-    qemu_vfree(buf);
+    qemu_vfree(iov.iov_base);
     return ret;
 }
author	Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>	2011-11-30 12:23:41 +0000
committer	Kevin Wolf <kwolf@redhat.com>	2011-12-05 14:49:47 +0100
commit	aef4acb6616ab7fb5c105660aa8a2cee4e250e75 (patch)
tree	53fe5915dd55fd1e4197f462c153b6c551417af4 /block
parent	1b9f1491f82de0c8f9b09d1a06ac33304449a634 (diff)