diff options
-rw-r--r-- | block/commit.c | 2 | ||||
-rw-r--r-- | block/io.c | 21 | ||||
-rw-r--r-- | block/mirror.c | 2 | ||||
-rw-r--r-- | block/qcow2.c | 3 | ||||
-rw-r--r-- | block/rbd.c | 42 | ||||
-rw-r--r-- | block/replication.c | 2 | ||||
-rw-r--r-- | block/stream.c | 56 | ||||
-rw-r--r-- | include/block/block.h | 3 | ||||
-rw-r--r-- | qemu-img.c | 2 | ||||
-rw-r--r-- | tests/qemu-iotests/245 | 4 |
10 files changed, 91 insertions, 46 deletions
diff --git a/block/commit.c b/block/commit.c index 212c6f639e..ca7e408b26 100644 --- a/block/commit.c +++ b/block/commit.c @@ -174,7 +174,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) break; } /* Copy if allocated above the base */ - ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), + ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), false, offset, COMMIT_BUFFER_SIZE, &n); copy = (ret == 1); trace_commit_one_iteration(s, offset, n, ret); diff --git a/block/io.c b/block/io.c index 9ba1bada36..24a18759fd 100644 --- a/block/io.c +++ b/block/io.c @@ -2295,10 +2295,11 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset, /* * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] * - * Return true if (a prefix of) the given range is allocated in any image - * between BASE and TOP (inclusive). BASE can be NULL to check if the given - * offset is allocated in any image of the chain. Return false otherwise, - * or negative errno on failure. + * Return 1 if (a prefix of) the given range is allocated in any image + * between BASE and TOP (BASE is only included if include_base is set). + * BASE can be NULL to check if the given offset is allocated in any + * image of the chain. Return 0 otherwise, or negative errno on + * failure. * * 'pnum' is set to the number of bytes (including and immediately * following the specified offset) that are known to be in the same @@ -2310,17 +2311,21 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset, */ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, - int64_t offset, int64_t bytes, int64_t *pnum) + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum) { BlockDriverState *intermediate; int ret; int64_t n = bytes; + assert(base || !include_base); + intermediate = top; - while (intermediate && intermediate != base) { + while (include_base || intermediate != base) { int64_t pnum_inter; int64_t size_inter; + assert(intermediate); ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter); if (ret < 0) { return ret; @@ -2339,6 +2344,10 @@ int bdrv_is_allocated_above(BlockDriverState *top, n = pnum_inter; } + if (intermediate == base) { + break; + } + intermediate = backing_bs(intermediate); } diff --git a/block/mirror.c b/block/mirror.c index d17be4cdbc..2fcec70e35 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -808,7 +808,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) return 0; } - ret = bdrv_is_allocated_above(bs, base, offset, bytes, &count); + ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count); if (ret < 0) { return ret; } diff --git a/block/qcow2.c b/block/qcow2.c index 9396d490d5..2a59eb27fe 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2148,7 +2148,8 @@ static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes) { int64_t nr; return !bytes || - (!bdrv_is_allocated_above(bs, NULL, offset, bytes, &nr) && nr == bytes); + (!bdrv_is_allocated_above(bs, NULL, false, offset, bytes, &nr) && + nr == bytes); } static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m) diff --git a/block/rbd.c b/block/rbd.c index f2ac2c06f4..59757b3120 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -103,6 +103,7 @@ typedef struct BDRVRBDState { rbd_image_t image; char *image_name; char *snap; + uint64_t image_size; } BDRVRBDState; static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, @@ -778,6 +779,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, goto failed_open; } + r = rbd_get_size(s->image, &s->image_size); + if (r < 0) { + error_setg_errno(errp, -r, "error getting image size from %s", + s->image_name); + rbd_close(s->image); + goto failed_open; + } + /* If we are using an rbd snapshot, we must be r/o, otherwise * leave as-is */ if (s->snap != NULL) { @@ -834,6 +843,22 @@ static void qemu_rbd_close(BlockDriverState *bs) rados_shutdown(s->cluster); } +/* Resize the RBD image and update the 'image_size' with the current size */ +static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) +{ + BDRVRBDState *s = bs->opaque; + int r; + + r = rbd_resize(s->image, size); + if (r < 0) { + return r; + } + + s->image_size = size; + + return 0; +} + static const AIOCBInfo rbd_aiocb_info = { .aiocb_size = sizeof(RBDAIOCB), }; @@ -935,13 +960,25 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, } switch (cmd) { - case RBD_AIO_WRITE: + case RBD_AIO_WRITE: { + /* + * RBD APIs don't allow us to write more than actual size, so in order + * to support growing images, we resize the image before write + * operations that exceed the current size. + */ + if (off + size > s->image_size) { + r = qemu_rbd_resize(bs, off + size); + if (r < 0) { + goto failed_completion; + } + } #ifdef LIBRBD_SUPPORTS_IOVEC r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); #else r = rbd_aio_write(s->image, off, size, rcb->buf, c); #endif break; + } case RBD_AIO_READ: #ifdef LIBRBD_SUPPORTS_IOVEC r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); @@ -1052,7 +1089,6 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, PreallocMode prealloc, Error **errp) { - BDRVRBDState *s = bs->opaque; int r; if (prealloc != PREALLOC_MODE_OFF) { @@ -1061,7 +1097,7 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, return -ENOTSUP; } - r = rbd_resize(s->image, offset); + r = qemu_rbd_resize(bs, offset); if (r < 0) { error_setg_errno(errp, -r, "Failed to resize file"); return r; diff --git a/block/replication.c b/block/replication.c index b41bc507c0..23b2993d74 100644 --- a/block/replication.c +++ b/block/replication.c @@ -275,7 +275,7 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs, while (remaining_sectors > 0) { int64_t count; - ret = bdrv_is_allocated_above(top->bs, base->bs, + ret = bdrv_is_allocated_above(top->bs, base->bs, false, sector_num * BDRV_SECTOR_SIZE, remaining_sectors * BDRV_SECTOR_SIZE, &count); diff --git a/block/stream.c b/block/stream.c index 1a906fd860..cd5e2ba9b0 100644 --- a/block/stream.c +++ b/block/stream.c @@ -31,7 +31,7 @@ enum { typedef struct StreamBlockJob { BlockJob common; - BlockDriverState *base; + BlockDriverState *bottom; BlockdevOnError on_error; char *backing_file_str; bool bs_read_only; @@ -54,7 +54,7 @@ static void stream_abort(Job *job) if (s->chain_frozen) { BlockJob *bjob = &s->common; - bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->base); + bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->bottom); } } @@ -63,11 +63,11 @@ static int stream_prepare(Job *job) StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockJob *bjob = &s->common; BlockDriverState *bs = blk_bs(bjob->blk); - BlockDriverState *base = s->base; + BlockDriverState *base = backing_bs(s->bottom); Error *local_err = NULL; int ret = 0; - bdrv_unfreeze_backing_chain(bs, base); + bdrv_unfreeze_backing_chain(bs, s->bottom); s->chain_frozen = false; if (bs->backing) { @@ -110,7 +110,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp) StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockBackend *blk = s->common.blk; BlockDriverState *bs = blk_bs(blk); - BlockDriverState *base = s->base; + bool enable_cor = !backing_bs(s->bottom); int64_t len; int64_t offset = 0; uint64_t delay_ns = 0; @@ -119,14 +119,14 @@ static int coroutine_fn stream_run(Job *job, Error **errp) int64_t n = 0; /* bytes */ void *buf; - if (!bs->backing) { - goto out; + if (bs == s->bottom) { + /* Nothing to stream */ + return 0; } len = bdrv_getlength(bs); if (len < 0) { - ret = len; - goto out; + return len; } job_progress_set_remaining(&s->common.job, len); @@ -137,7 +137,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp) * backing chain since the copy-on-read operation does not take base into * account. */ - if (!base) { + if (enable_cor) { bdrv_enable_copy_on_read(bs); } @@ -160,9 +160,8 @@ static int coroutine_fn stream_run(Job *job, Error **errp) } else if (ret >= 0) { /* Copy if allocated in the intermediate images. Limit to the * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */ - ret = bdrv_is_allocated_above(backing_bs(bs), base, + ret = bdrv_is_allocated_above(backing_bs(bs), s->bottom, true, offset, n, &n); - /* Finish early if end of backing file has been reached */ if (ret == 0 && n == 0) { n = len - offset; @@ -199,18 +198,14 @@ static int coroutine_fn stream_run(Job *job, Error **errp) } } - if (!base) { + if (enable_cor) { bdrv_disable_copy_on_read(bs); } - /* Do not remove the backing file if an error was there but ignored. */ - ret = error; - qemu_vfree(buf); -out: - /* Modify backing chain and close BDSes in main loop */ - return ret; + /* Do not remove the backing file if an error was there but ignored. */ + return error; } static const BlockJobDriver stream_job_driver = { @@ -235,8 +230,10 @@ void stream_start(const char *job_id, BlockDriverState *bs, StreamBlockJob *s; BlockDriverState *iter; bool bs_read_only; + int basic_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; + BlockDriverState *bottom = bdrv_find_overlay(bs, base); - if (bdrv_freeze_backing_chain(bs, base, errp) < 0) { + if (bdrv_freeze_backing_chain(bs, bottom, errp) < 0) { return; } @@ -253,10 +250,8 @@ void stream_start(const char *job_id, BlockDriverState *bs, * already have our own plans. Also don't allow resize as the image size is * queried only at the job start and then cached. */ s = block_job_create(job_id, &stream_job_driver, NULL, bs, - BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | - BLK_PERM_GRAPH_MOD, - BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | - BLK_PERM_WRITE, + basic_flags | BLK_PERM_GRAPH_MOD, + basic_flags | BLK_PERM_WRITE, speed, creation_flags, NULL, NULL, errp); if (!s) { goto fail; @@ -264,15 +259,18 @@ void stream_start(const char *job_id, BlockDriverState *bs, /* Block all intermediate nodes between bs and base, because they will * disappear from the chain after this operation. The streaming job reads - * every block only once, assuming that it doesn't change, so block writes - * and resizes. */ + * every block only once, assuming that it doesn't change, so forbid writes + * and resizes. Reassign the base node pointer because the backing BS of the + * bottom node might change after the call to bdrv_reopen_set_read_only() + * due to parallel block jobs running. + */ + base = backing_bs(bottom); for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) { block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED, - &error_abort); + basic_flags, &error_abort); } - s->base = base; + s->bottom = bottom; s->backing_file_str = g_strdup(backing_file_str); s->bs_read_only = bs_read_only; s->chain_frozen = true; diff --git a/include/block/block.h b/include/block/block.h index f9415ed740..734c9d2f76 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -449,7 +449,8 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum); int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, - int64_t offset, int64_t bytes, int64_t *pnum); + bool include_base, int64_t offset, int64_t bytes, + int64_t *pnum); bool bdrv_is_read_only(BlockDriverState *bs); int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, diff --git a/qemu-img.c b/qemu-img.c index 158b3a505f..79983772de 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3518,7 +3518,7 @@ static int img_rebase(int argc, char **argv) * to take action */ ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs, - offset, n, &n); + false, offset, n, &n); if (ret < 0) { error_report("error while reading image metadata: %s", strerror(-ret)); diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 index 349b94aace..bc1ceb9792 100644 --- a/tests/qemu-iotests/245 +++ b/tests/qemu-iotests/245 @@ -866,9 +866,9 @@ class TestBlockdevReopen(iotests.QMPTestCase): auto_finalize = False) self.assert_qmp(result, 'return', {}) - # We can't remove hd2 while the stream job is ongoing + # We can remove hd2 while the stream job is ongoing opts['backing']['backing'] = None - self.reopen(opts, {}, "Cannot change 'backing' link from 'hd1' to 'hd2'") + self.reopen(opts, {}) # We can't remove hd1 while the stream job is ongoing opts['backing'] = None |