diff options
author | Alberto Garcia <berto@igalia.com> | 2020-11-13 17:52:31 +0100 |
---|---|---|
committer | Max Reitz <mreitz@redhat.com> | 2020-12-18 12:35:55 +0100 |
commit | ef9bba1484bb8fb5fda53a7bf90bf5e1a8e6a9f6 (patch) | |
tree | 0a0950911a5ce75c4861b92502c7233715cdab6f /block/quorum.c | |
parent | cff6d3ca43cdc8da0104204a52b0e4bd644e16e1 (diff) |
quorum: Implement bdrv_co_block_status()
The quorum driver does not implement bdrv_co_block_status() and
because of that it always reports to contain data even if all its
children are known to be empty.
One consequence of this is that if we for example create a quorum with
a size of 10GB and we mirror it to a new image the operation will
write 10GB of actual zeroes to the destination image wasting a lot of
time and disk space.
Since a quorum has an arbitrary number of children of potentially
different formats there is no way to report all possible allocation
status flags in a way that makes sense, so this implementation only
reports when a given region is known to contain zeroes
(BDRV_BLOCK_ZERO) or not (BDRV_BLOCK_DATA).
If all children agree that a region contains zeroes then we can return
BDRV_BLOCK_ZERO using the smallest size reported by the children
(because all agree that a region of at least that size contains
zeroes).
If at least one child disagrees we have to return BDRV_BLOCK_DATA.
In this case we use the largest of the sizes reported by the children
that didn't return BDRV_BLOCK_ZERO (because we know that there won't
be an agreement for at least that size).
Signed-off-by: Alberto Garcia <berto@igalia.com>
Tested-by: Tao Xu <tao3.xu@intel.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-Id: <db83149afcf0f793effc8878089d29af4c46ffe1.1605286097.git.berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Diffstat (limited to 'block/quorum.c')
-rw-r--r-- | block/quorum.c | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/block/quorum.c b/block/quorum.c index 4b08a199b7..ae62b206c9 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -18,6 +18,7 @@ #include "qemu/module.h" #include "qemu/option.h" #include "block/block_int.h" +#include "block/coroutines.h" #include "block/qdict.h" #include "qapi/error.h" #include "qapi/qapi-events-block.h" @@ -1179,6 +1180,56 @@ static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c, | DEFAULT_PERM_UNCHANGED; } +/* + * Each one of the children can report different status flags even + * when they contain the same data, so what this function does is + * return BDRV_BLOCK_ZERO if *all* children agree that a certain + * region contains zeroes, and BDRV_BLOCK_DATA otherwise. + */ +static int coroutine_fn quorum_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t count, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BDRVQuorumState *s = bs->opaque; + int i, ret; + int64_t pnum_zero = count; + int64_t pnum_data = 0; + + for (i = 0; i < s->num_children; i++) { + int64_t bytes; + ret = bdrv_co_common_block_status_above(s->children[i]->bs, NULL, false, + want_zero, offset, count, + &bytes, NULL, NULL, NULL); + if (ret < 0) { + quorum_report_bad(QUORUM_OP_TYPE_READ, offset, count, + s->children[i]->bs->node_name, ret); + pnum_data = count; + break; + } + /* + * Even if all children agree about whether there are zeroes + * or not at @offset they might disagree on the size, so use + * the smallest when reporting BDRV_BLOCK_ZERO and the largest + * when reporting BDRV_BLOCK_DATA. + */ + if (ret & BDRV_BLOCK_ZERO) { + pnum_zero = MIN(pnum_zero, bytes); + } else { + pnum_data = MAX(pnum_data, bytes); + } + } + + if (pnum_data) { + *pnum = pnum_data; + return BDRV_BLOCK_DATA; + } else { + *pnum = pnum_zero; + return BDRV_BLOCK_ZERO; + } +} + static const char *const quorum_strong_runtime_opts[] = { QUORUM_OPT_VOTE_THRESHOLD, QUORUM_OPT_BLKVERIFY, @@ -1197,6 +1248,7 @@ static BlockDriver bdrv_quorum = { .bdrv_close = quorum_close, .bdrv_gather_child_options = quorum_gather_child_options, .bdrv_dirname = quorum_dirname, + .bdrv_co_block_status = quorum_co_block_status, .bdrv_co_flush_to_disk = quorum_co_flush, |