diff options
author | Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 2020-12-04 01:27:13 +0300 |
---|---|---|
committer | Kevin Wolf <kwolf@redhat.com> | 2020-12-11 17:52:40 +0100 |
commit | 8b1170012b1de6649c66ac1887f4df7e312abf3b (patch) | |
tree | 4fd21be993596923ccfd84500d6b37cadb0e3d98 /block | |
parent | f4dad307ef844af097377c77dfb8049cc4b0b8d3 (diff) |
block: introduce BDRV_MAX_LENGTH
We are going to modify block layer to work with 64bit requests. And
first step is moving to int64_t type for both offset and bytes
arguments in all block request related functions.
It's mostly safe (when widening signed or unsigned int to int64_t), but
switching from uint64_t is questionable.
So, let's first establish the set of requests we want to work with.
First signed int64_t should be enough, as off_t is signed anyway. Then,
obviously offset + bytes should not overflow.
And most interesting: (offset + bytes) being aligned up should not
overflow as well. Aligned to what alignment? First thing that comes in
mind is bs->bl.request_alignment, as we align up request to this
alignment. But there is another thing: look at
bdrv_mark_request_serialising(). It aligns request up to some given
alignment. And this parameter may be bdrv_get_cluster_size(), which is
often a lot greater than bs->bl.request_alignment.
Note also, that bdrv_mark_request_serialising() uses signed int64_t for
calculations. So, actually, we already depend on some restrictions.
Happily, bdrv_get_cluster_size() returns int and
bs->bl.request_alignment has 32bit unsigned type, but defined to be a
power of 2 less than INT_MAX. So, we may establish, that INT_MAX is
absolute maximum for any kind of alignment that may occur with the
request.
Note, that bdrv_get_cluster_size() is not documented to return power
of 2, still bdrv_mark_request_serialising() behaves like it is.
Also, backup uses bdi.cluster_size and is not prepared to it not being
power of 2.
So, let's establish that Qemu supports only power-of-2 clusters and
alignments.
So, alignment can't be greater than 2^30.
Finally to be safe with calculations, to not calculate different
maximums for different nodes (depending on cluster size and
request_alignment), let's simply set QEMU_ALIGN_DOWN(INT64_MAX, 2^30)
as absolute maximum bytes length for Qemu. Actually, it's not much less
than INT64_MAX.
OK, then, let's apply it to block/io.
Let's consider all block/io entry points of offset/bytes:
4 bytes/offset interface functions: bdrv_co_preadv_part(),
bdrv_co_pwritev_part(), bdrv_co_copy_range_internal() and
bdrv_co_pdiscard() and we check them all with bdrv_check_request().
We also have one entry point with only offset: bdrv_co_truncate().
Check the offset.
And one public structure: BdrvTrackedRequest. Happily, it has only
three external users:
file-posix.c: adopted by this patch
write-threshold.c: only read fields
test-write-threshold.c: sets obviously small constant values
Better is to make the structure private and add corresponding
interfaces.. Still it's not obvious what kind of interface is needed
for file-posix.c. Let's keep it public but add corresponding
assertions.
After this patch we'll convert functions in block/io.c to int64_t bytes
and offset parameters. We can assume that offset/bytes pair always
satisfy new restrictions, and make
corresponding assertions where needed. If we reach some offset/bytes
point in block/io.c missing bdrv_check_request() it is considered a
bug. As well, if block/io.c modifies a offset/bytes request, expanding
it more then aligning up to request_alignment, it's a bug too.
For all io requests except for discard we keep for now old restriction
of 32bit request length.
iotest 206 output error message changed, as now test disk size is
larger than new limit. Add one more test case with new maximum disk
size to cover too-big-L1 case.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20201203222713.13507-5-vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/file-posix.c | 6 | ||||
-rw-r--r-- | block/io.c | 51 |
2 files changed, 45 insertions, 12 deletions
diff --git a/block/file-posix.c b/block/file-posix.c index 9bee3d88d0..83e2cc5530 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2926,7 +2926,6 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, #ifdef CONFIG_FALLOCATE if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { BdrvTrackedRequest *req; - uint64_t end; /* * This is a workaround for a bug in the Linux XFS driver, @@ -2950,8 +2949,9 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, assert(req->offset <= offset); assert(req->offset + req->bytes >= offset + bytes); - end = INT64_MAX & -(uint64_t)bs->bl.request_alignment; - req->bytes = end - req->offset; + req->bytes = BDRV_MAX_LENGTH - req->offset; + + assert(bdrv_check_request(req->offset, req->bytes) == 0); bdrv_mark_request_serialising(req, bs->bl.request_alignment); } diff --git a/block/io.c b/block/io.c index ef75a5abb4..6343d85476 100644 --- a/block/io.c +++ b/block/io.c @@ -176,6 +176,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) /* Then let the driver override it */ if (drv->bdrv_refresh_limits) { drv->bdrv_refresh_limits(bs, errp); + if (*errp) { + return; + } + } + + if (bs->bl.request_alignment > BDRV_MAX_ALIGNMENT) { + error_setg(errp, "Driver requires too large request alignment"); } } @@ -884,13 +891,31 @@ static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self return waited; } -static int bdrv_check_byte_request(int64_t offset, size_t size) +int bdrv_check_request(int64_t offset, int64_t bytes) { - if (size > BDRV_REQUEST_MAX_BYTES) { + if (offset < 0 || bytes < 0) { return -EIO; } - if (offset < 0) { + if (bytes > BDRV_MAX_LENGTH) { + return -EIO; + } + + if (offset > BDRV_MAX_LENGTH - bytes) { + return -EIO; + } + + return 0; +} + +static int bdrv_check_request32(int64_t offset, int64_t bytes) +{ + int ret = bdrv_check_request(offset, bytes); + if (ret < 0) { + return ret; + } + + if (bytes > BDRV_REQUEST_MAX_BYTES) { return -EIO; } @@ -1641,7 +1666,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, return -ENOMEDIUM; } - ret = bdrv_check_byte_request(offset, bytes); + ret = bdrv_check_request32(offset, bytes); if (ret < 0) { return ret; } @@ -2057,7 +2082,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, return -ENOMEDIUM; } - ret = bdrv_check_byte_request(offset, bytes); + ret = bdrv_check_request32(offset, bytes); if (ret < 0) { return ret; } @@ -2787,8 +2812,9 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, return -EPERM; } - if (offset < 0 || bytes < 0 || bytes > INT64_MAX - offset) { - return -EIO; + ret = bdrv_check_request(offset, bytes); + if (ret < 0) { + return ret; } /* Do nothing if disabled. */ @@ -3047,7 +3073,7 @@ static int coroutine_fn bdrv_co_copy_range_internal( if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) { return -ENOMEDIUM; } - ret = bdrv_check_byte_request(dst_offset, bytes); + ret = bdrv_check_request32(dst_offset, bytes); if (ret) { return ret; } @@ -3058,7 +3084,7 @@ static int coroutine_fn bdrv_co_copy_range_internal( if (!src || !src->bs || !bdrv_is_inserted(src->bs)) { return -ENOMEDIUM; } - ret = bdrv_check_byte_request(src_offset, bytes); + ret = bdrv_check_request32(src_offset, bytes); if (ret) { return ret; } @@ -3188,6 +3214,13 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, return -EINVAL; } + ret = bdrv_check_request(offset, 0); + if (ret < 0) { + error_setg(errp, "Required too big image size, it must be not greater " + "than %" PRId64, BDRV_MAX_LENGTH); + return ret; + } + old_size = bdrv_getlength(bs); if (old_size < 0) { error_setg_errno(errp, -old_size, "Failed to get old image size"); |