diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-02-06 18:06:07 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-02-06 18:06:07 +0000 |
commit | 3d815ac82b0a3e816fd7a6d2561a73e780c3b685 (patch) | |
tree | bb5af8266e302cbe460b3e5de2cb261a877a6678 | |
parent | a2f2d288b5a06e6c680c387c9980d91363f59c61 (diff) | |
parent | 728dacbda817b2ca259e9d337fab06bcf14e94a6 (diff) |
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block patches for 2.3
# gpg: Signature made Fri 06 Feb 2015 17:14:10 GMT using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
* remotes/kevin/tags/for-upstream: (47 commits)
block/raw-posix.c: Fix raw_getlength() on Mac OS X block devices
block: Eliminate silly QERR_ macros used for encryption keys
block: New bdrv_add_key(), convert monitor to use it
blockdev: Eliminate silly QERR_BLOCK_JOB_NOT_ACTIVE macro
blockdev: Give find_block_job() an Error ** parameter
qcow2: Rewrite qcow2_alloc_bytes()
block: Give always priority to unused entries in the qcow2 L2 cache
nbd: fix max_discard/max_transfer_length
block: introduce BDRV_REQUEST_MAX_SECTORS
nbd: Improve error messages
iotests: Fix 104 for NBD
iotests: Fix 100 for nbd
iotests: Fix 083
block: fix off-by-one error in qcow and qcow2
qemu-iotests: add 116 invalid QED input file tests
qed: check for header size overflow
block/dmg: improve zeroes handling
block/dmg: support bzip2 block entry types
block/dmg: factor out block type check
block/dmg: use SectorNumber from BLKX header
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
55 files changed, 1602 insertions, 462 deletions
@@ -2647,7 +2647,7 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, { int64_t len; - if (size > INT_MAX) { + if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) { return -EIO; } @@ -2671,7 +2671,7 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { - if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) { + if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { return -EIO; } @@ -2758,7 +2758,7 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, .iov_len = nb_sectors * BDRV_SECTOR_SIZE, }; - if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) { + if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { return -EINVAL; } @@ -2826,13 +2826,10 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) } for (;;) { - nb_sectors = target_sectors - sector_num; + nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); if (nb_sectors <= 0) { return 0; } - if (nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) { - nb_sectors = INT_MAX / BDRV_SECTOR_SIZE; - } ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n); if (ret < 0) { error_report("error getting block status at sector %" PRId64 ": %s", @@ -3167,7 +3164,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, BdrvRequestFlags flags) { - if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) { + if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { return -EINVAL; } @@ -3192,10 +3189,7 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, BDRV_REQ_COPY_ON_READ); } -/* if no limit is specified in the BlockLimits use a default - * of 32768 512-byte sectors (16 MiB) per request. - */ -#define MAX_WRITE_ZEROES_DEFAULT 32768 +#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) @@ -3205,8 +3199,8 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, struct iovec iov = {0}; int ret = 0; - int max_write_zeroes = bs->bl.max_write_zeroes ? - bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT; + int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes, + BDRV_REQUEST_MAX_SECTORS); while (nb_sectors > 0 && !ret) { int num = nb_sectors; @@ -3242,7 +3236,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, if (ret == -ENOTSUP) { /* Fall back to bounce buffer if write zeroes is unsupported */ int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length, - MAX_WRITE_ZEROES_DEFAULT); + MAX_WRITE_ZEROES_BOUNCE_BUFFER); num = MIN(num, max_xfer_len); iov.iov_len = num * BDRV_SECTOR_SIZE; if (iov.iov_base == NULL) { @@ -3461,7 +3455,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, BdrvRequestFlags flags) { - if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) { + if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { return -EINVAL; } @@ -3719,6 +3713,36 @@ int bdrv_set_key(BlockDriverState *bs, const char *key) return ret; } +/* + * Provide an encryption key for @bs. + * If @key is non-null: + * If @bs is not encrypted, fail. + * Else if the key is invalid, fail. + * Else set @bs's key to @key, replacing the existing key, if any. + * If @key is null: + * If @bs is encrypted and still lacks a key, fail. + * Else do nothing. + * On failure, store an error object through @errp if non-null. + */ +void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) +{ + if (key) { + if (!bdrv_is_encrypted(bs)) { + error_setg(errp, "Device '%s' is not encrypted", + bdrv_get_device_name(bs)); + } else if (bdrv_set_key(bs, key) < 0) { + error_set(errp, QERR_INVALID_PASSWORD); + } + } else { + if (bdrv_key_required(bs)) { + error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, + "'%s' (%s) is encrypted", + bdrv_get_device_name(bs), + bdrv_get_encrypted_filename(bs)); + } + } +} + const char *bdrv_get_format_name(BlockDriverState *bs) { return bs->drv ? bs->drv->format_name : NULL; @@ -4562,6 +4586,8 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, } } + block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1); + return outidx + 1; } @@ -5097,11 +5123,6 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque) rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); } -/* if no limit is specified in the BlockLimits use a default - * of 32768 512-byte sectors (16 MiB) per request. - */ -#define MAX_DISCARD_DEFAULT 32768 - int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { @@ -5126,7 +5147,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, return 0; } - max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT; + max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS); while (nb_sectors > 0) { int ret; int num = nb_sectors; diff --git a/block/Makefile.objs b/block/Makefile.objs index 04b0e43eb1..db2933e469 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -20,6 +20,7 @@ block-obj-$(CONFIG_GLUSTERFS) += gluster.o block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o block-obj-$(CONFIG_LIBSSH2) += ssh.o block-obj-y += accounting.o +block-obj-y += write-threshold.o common-obj-y += stream.o common-obj-y += commit.o @@ -36,5 +37,6 @@ gluster.o-libs := $(GLUSTERFS_LIBS) ssh.o-cflags := $(LIBSSH2_CFLAGS) ssh.o-libs := $(LIBSSH2_LIBS) archipelago.o-libs := $(ARCHIPELAGO_LIBS) +dmg.o-libs := $(BZIP2_LIBS) qcow.o-libs := -lz linux-aio.o-libs := -laio diff --git a/block/accounting.c b/block/accounting.c index 18102f015d..01d594ffdc 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -54,3 +54,10 @@ void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num, stats->wr_highest_sector = sector_num + nb_sectors - 1; } } + +void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, + int num_requests) +{ + assert(type < BLOCK_MAX_IOTYPE); + stats->merged[type] += num_requests; +} diff --git a/block/block-backend.c b/block/block-backend.c index d00c129f15..c28e2402c7 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -580,6 +580,11 @@ int blk_get_flags(BlockBackend *blk) return bdrv_get_flags(blk->bs); } +int blk_get_max_transfer_length(BlockBackend *blk) +{ + return blk->bs->bl.max_transfer_length; +} + void blk_set_guest_block_size(BlockBackend *blk, int align) { bdrv_set_guest_block_size(blk->bs, align); diff --git a/block/dmg.c b/block/dmg.c index e455886d2b..825c49d59a 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -26,6 +26,10 @@ #include "qemu/bswap.h" #include "qemu/module.h" #include <zlib.h> +#ifdef CONFIG_BZIP2 +#include <bzlib.h> +#endif +#include <glib.h> enum { /* Limit chunk sizes to prevent unreasonable amounts of memory being used @@ -55,6 +59,9 @@ typedef struct BDRVDMGState { uint8_t *compressed_chunk; uint8_t *uncompressed_chunk; z_stream zstream; +#ifdef CONFIG_BZIP2 + bz_stream bzstream; +#endif } BDRVDMGState; static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename) @@ -100,6 +107,16 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result) return 0; } +static inline uint64_t buff_read_uint64(const uint8_t *buffer, int64_t offset) +{ + return be64_to_cpu(*(uint64_t *)&buffer[offset]); +} + +static inline uint32_t buff_read_uint32(const uint8_t *buffer, int64_t offset) +{ + return be32_to_cpu(*(uint32_t *)&buffer[offset]); +} + /* Increase max chunk sizes, if necessary. This function is used to calculate * the buffer sizes needed for compressed/uncompressed chunk I/O. */ @@ -112,6 +129,7 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk, switch (s->types[chunk]) { case 0x80000005: /* zlib compressed */ + case 0x80000006: /* bzip2 compressed */ compressed_size = s->lengths[chunk]; uncompressed_sectors = s->sectorcounts[chunk]; break; @@ -119,7 +137,9 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk, uncompressed_sectors = (s->lengths[chunk] + 511) / 512; break; case 2: /* zero */ - uncompressed_sectors = s->sectorcounts[chunk]; + /* as the all-zeroes block may be large, it is treated specially: the + * sector is not copied from a large buffer, a simple memset is used + * instead. Therefore uncompressed_sectors does not need to be set. */ break; } @@ -131,163 +151,372 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk, } } +static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp) +{ + int64_t length; + int64_t offset = 0; + uint8_t buffer[515]; + int i, ret; + + /* bdrv_getlength returns a multiple of block size (512), rounded up. Since + * dmg images can have odd sizes, try to look for the "koly" magic which + * marks the begin of the UDIF trailer (512 bytes). This magic can be found + * in the last 511 bytes of the second-last sector or the first 4 bytes of + * the last sector (search space: 515 bytes) */ + length = bdrv_getlength(file_bs); + if (length < 0) { + error_setg_errno(errp, -length, + "Failed to get file size while reading UDIF trailer"); + return length; + } else if (length < 512) { + error_setg(errp, "dmg file must be at least 512 bytes long"); + return -EINVAL; + } + if (length > 511 + 512) { + offset = length - 511 - 512; + } + length = length < 515 ? length : 515; + ret = bdrv_pread(file_bs, offset, buffer, length); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed while reading UDIF trailer"); + return ret; + } + for (i = 0; i < length - 3; i++) { + if (buffer[i] == 'k' && buffer[i+1] == 'o' && + buffer[i+2] == 'l' && buffer[i+3] == 'y') { + return offset + i; + } + } + error_setg(errp, "Could not locate UDIF trailer in dmg file"); + return -EINVAL; +} + +/* used when building the sector table */ +typedef struct DmgHeaderState { + /* used internally by dmg_read_mish_block to remember offsets of blocks + * across calls */ + uint64_t data_fork_offset; + /* exported for dmg_open */ + uint32_t max_compressed_size; + uint32_t max_sectors_per_chunk; +} DmgHeaderState; + +static bool dmg_is_known_block_type(uint32_t entry_type) +{ + switch (entry_type) { + case 0x00000001: /* uncompressed */ + case 0x00000002: /* zeroes */ + case 0x80000005: /* zlib */ +#ifdef CONFIG_BZIP2 + case 0x80000006: /* bzip2 */ +#endif + return true; + default: + return false; + } +} + +static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds, + uint8_t *buffer, uint32_t count) +{ + uint32_t type, i; + int ret; + size_t new_size; + uint32_t chunk_count; + int64_t offset = 0; + uint64_t data_offset; + uint64_t in_offset = ds->data_fork_offset; + uint64_t out_offset; + + type = buff_read_uint32(buffer, offset); + /* skip data that is not a valid MISH block (invalid magic or too small) */ + if (type != 0x6d697368 || count < 244) { + /* assume success for now */ + return 0; + } + + /* chunk offsets are relative to this sector number */ + out_offset = buff_read_uint64(buffer, offset + 8); + + /* location in data fork for (compressed) blob (in bytes) */ + data_offset = buff_read_uint64(buffer, offset + 0x18); + in_offset += data_offset; + + /* move to begin of chunk entries */ + offset += 204; + + chunk_count = (count - 204) / 40; + new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count); + s->types = g_realloc(s->types, new_size / 2); + s->offsets = g_realloc(s->offsets, new_size); + s->lengths = g_realloc(s->lengths, new_size); + s->sectors = g_realloc(s->sectors, new_size); + s->sectorcounts = g_realloc(s->sectorcounts, new_size); + + for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) { + s->types[i] = buff_read_uint32(buffer, offset); + if (!dmg_is_known_block_type(s->types[i])) { + chunk_count--; + i--; + offset += 40; + continue; + } + + /* sector number */ + s->sectors[i] = buff_read_uint64(buffer, offset + 8); + s->sectors[i] += out_offset; + + /* sector count */ + s->sectorcounts[i] = buff_read_uint64(buffer, offset + 0x10); + + /* all-zeroes sector (type 2) does not need to be "uncompressed" and can + * therefore be unbounded. */ + if (s->types[i] != 2 && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) { + error_report("sector count %" PRIu64 " for chunk %" PRIu32 + " is larger than max (%u)", + s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX); + ret = -EINVAL; + goto fail; + } + + /* offset in (compressed) data fork */ + s->offsets[i] = buff_read_uint64(buffer, offset + 0x18); + s->offsets[i] += in_offset; + + /* length in (compressed) data fork */ + s->lengths[i] = buff_read_uint64(buffer, offset + 0x20); + + if (s->lengths[i] > DMG_LENGTHS_MAX) { + error_report("length %" PRIu64 " for chunk %" PRIu32 + " is larger than max (%u)", + s->lengths[i], i, DMG_LENGTHS_MAX); + ret = -EINVAL; + goto fail; + } + + update_max_chunk_size(s, i, &ds->max_compressed_size, + &ds->max_sectors_per_chunk); + offset += 40; + } + s->n_chunks += chunk_count; + return 0; + +fail: + return ret; +} + +static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds, + uint64_t info_begin, uint64_t info_length) +{ + BDRVDMGState *s = bs->opaque; + int ret; + uint32_t count, rsrc_data_offset; + uint8_t *buffer = NULL; + uint64_t info_end; + uint64_t offset; + + /* read offset from begin of resource fork (info_begin) to resource data */ + ret = read_uint32(bs, info_begin, &rsrc_data_offset); + if (ret < 0) { + goto fail; + } else if (rsrc_data_offset > info_length) { + ret = -EINVAL; + goto fail; + } + + /* read length of resource data */ + ret = read_uint32(bs, info_begin + 8, &count); + if (ret < 0) { + goto fail; + } else if (count == 0 || rsrc_data_offset + count > info_length) { + ret = -EINVAL; + goto fail; + } + + /* begin of resource data (consisting of one or more resources) */ + offset = info_begin + rsrc_data_offset; + + /* end of resource data (there is possibly a following resource map + * which will be ignored). */ + info_end = offset + count; + + /* read offsets (mish blocks) from one or more resources in resource data */ + while (offset < info_end) { + /* size of following resource */ + ret = read_uint32(bs, offset, &count); + if (ret < 0) { + goto fail; + } else if (count == 0 || count > info_end - offset) { + ret = -EINVAL; + goto fail; + } + offset += 4; + + buffer = g_realloc(buffer, count); + ret = bdrv_pread(bs->file, offset, buffer, count); + if (ret < 0) { + goto fail; + } + + ret = dmg_read_mish_block(s, ds, buffer, count); + if (ret < 0) { + goto fail; + } + /* advance offset by size of resource */ + offset += count; + } + ret = 0; + +fail: + g_free(buffer); + return ret; +} + +static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds, + uint64_t info_begin, uint64_t info_length) +{ + BDRVDMGState *s = bs->opaque; + int ret; + uint8_t *buffer = NULL; + char *data_begin, *data_end; + + /* Have at least some length to avoid NULL for g_malloc. Attempt to set a + * safe upper cap on the data length. A test sample had a XML length of + * about 1 MiB. */ + if (info_length == 0 || info_length > 16 * 1024 * 1024) { + ret = -EINVAL; + goto fail; + } + + buffer = g_malloc(info_length + 1); + buffer[info_length] = '\0'; + ret = bdrv_pread(bs->file, info_begin, buffer, info_length); + if (ret != info_length) { + ret = -EINVAL; + goto fail; + } + + /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64 + * decode. The actual data element has 431 (0x1af) bytes which includes tabs + * and line feeds. */ + data_end = (char *)buffer; + while ((data_begin = strstr(data_end, "<data>")) != NULL) { + guchar *mish; + gsize out_len = 0; + + data_begin += 6; + data_end = strstr(data_begin, "</data>"); + /* malformed XML? */ + if (data_end == NULL) { + ret = -EINVAL; + goto fail; + } + *data_end++ = '\0'; + mish = g_base64_decode(data_begin, &out_len); + ret = dmg_read_mish_block(s, ds, mish, (uint32_t)out_len); + g_free(mish); + if (ret < 0) { + goto fail; + } + } + ret = 0; + +fail: + g_free(buffer); + return ret; +} + static int dmg_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVDMGState *s = bs->opaque; - uint64_t info_begin, info_end, last_in_offset, last_out_offset; - uint32_t count, tmp; - uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i; + DmgHeaderState ds; + uint64_t rsrc_fork_offset, rsrc_fork_length; + uint64_t plist_xml_offset, plist_xml_length; int64_t offset; int ret; bs->read_only = 1; s->n_chunks = 0; s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL; + /* used by dmg_read_mish_block to keep track of the current I/O position */ + ds.data_fork_offset = 0; + ds.max_compressed_size = 1; + ds.max_sectors_per_chunk = 1; - /* read offset of info blocks */ - offset = bdrv_getlength(bs->file); + /* locate the UDIF trailer */ + offset = dmg_find_koly_offset(bs->file, errp); if (offset < 0) { ret = offset; goto fail; } - offset -= 0x1d8; - ret = read_uint64(bs, offset, &info_begin); + /* offset of data fork (DataForkOffset) */ + ret = read_uint64(bs, offset + 0x18, &ds.data_fork_offset); if (ret < 0) { goto fail; - } else if (info_begin == 0) { + } else if (ds.data_fork_offset > offset) { ret = -EINVAL; goto fail; } - ret = read_uint32(bs, info_begin, &tmp); + /* offset of resource fork (RsrcForkOffset) */ + ret = read_uint64(bs, offset + 0x28, &rsrc_fork_offset); if (ret < 0) { goto fail; - } else if (tmp != 0x100) { + } + ret = read_uint64(bs, offset + 0x30, &rsrc_fork_length); + if (ret < 0) { + goto fail; + } + if (rsrc_fork_offset >= offset || + rsrc_fork_length > offset - rsrc_fork_offset) { ret = -EINVAL; goto fail; } - - ret = read_uint32(bs, info_begin + 4, &count); + /* offset of property list (XMLOffset) */ + ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset); if (ret < 0) { goto fail; - } else if (count == 0) { + } + ret = read_uint64(bs, offset + 0xe0, &plist_xml_length); + if (ret < 0) { + goto fail; + } + if (plist_xml_offset >= offset || + plist_xml_length > offset - plist_xml_offset) { ret = -EINVAL; goto fail; } - info_end = info_begin + count; - - offset = info_begin + 0x100; - - /* read offsets */ - last_in_offset = last_out_offset = 0; - while (offset < info_end) { - uint32_t type; - - ret = read_uint32(bs, offset, &count); + ret = read_uint64(bs, offset + 0x1ec, (uint64_t *)&bs->total_sectors); + if (ret < 0) { + goto fail; + } + if (bs->total_sectors < 0) { + ret = -EINVAL; + goto fail; + } + if (rsrc_fork_length != 0) { + ret = dmg_read_resource_fork(bs, &ds, + rsrc_fork_offset, rsrc_fork_length); if (ret < 0) { goto fail; - } else if (count == 0) { - ret = -EINVAL; - goto fail; } - offset += 4; - - ret = read_uint32(bs, offset, &type); + } else if (plist_xml_length != 0) { + ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length); if (ret < 0) { goto fail; } - - if (type == 0x6d697368 && count >= 244) { - size_t new_size; - uint32_t chunk_count; - - offset += 4; - offset += 200; - - chunk_count = (count - 204) / 40; - new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count); - s->types = g_realloc(s->types, new_size / 2); - s->offsets = g_realloc(s->offsets, new_size); - s->lengths = g_realloc(s->lengths, new_size); - s->sectors = g_realloc(s->sectors, new_size); - s->sectorcounts = g_realloc(s->sectorcounts, new_size); - - for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) { - ret = read_uint32(bs, offset, &s->types[i]); - if (ret < 0) { - goto fail; - } - offset += 4; - if (s->types[i] != 0x80000005 && s->types[i] != 1 && - s->types[i] != 2) { - if (s->types[i] == 0xffffffff && i > 0) { - last_in_offset = s->offsets[i - 1] + s->lengths[i - 1]; - last_out_offset = s->sectors[i - 1] + - s->sectorcounts[i - 1]; - } - chunk_count--; - i--; - offset += 36; - continue; - } - offset += 4; - - ret = read_uint64(bs, offset, &s->sectors[i]); - if (ret < 0) { - goto fail; - } - s->sectors[i] += last_out_offset; - offset += 8; - - ret = read_uint64(bs, offset, &s->sectorcounts[i]); - if (ret < 0) { - goto fail; - } - offset += 8; - - if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) { - error_report("sector count %" PRIu64 " for chunk %" PRIu32 - " is larger than max (%u)", - s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX); - ret = -EINVAL; - goto fail; - } - - ret = read_uint64(bs, offset, &s->offsets[i]); - if (ret < 0) { - goto fail; - } - s->offsets[i] += last_in_offset; - offset += 8; - - ret = read_uint64(bs, offset, &s->lengths[i]); - if (ret < 0) { - goto fail; - } - offset += 8; - - if (s->lengths[i] > DMG_LENGTHS_MAX) { - error_report("length %" PRIu64 " for chunk %" PRIu32 - " is larger than max (%u)", - s->lengths[i], i, DMG_LENGTHS_MAX); - ret = -EINVAL; - goto fail; - } - - update_max_chunk_size(s, i, &max_compressed_size, - &max_sectors_per_chunk); - } - s->n_chunks += chunk_count; - } + } else { + ret = -EINVAL; + goto fail; } /* initialize zlib engine */ s->compressed_chunk = qemu_try_blockalign(bs->file, - max_compressed_size + 1); + ds.max_compressed_size + 1); s->uncompressed_chunk = qemu_try_blockalign(bs->file, - 512 * max_sectors_per_chunk); + 512 * ds.max_sectors_per_chunk); if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) { ret = -ENOMEM; goto fail; @@ -349,13 +578,16 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) { int ret; uint32_t chunk = search_chunk(s, sector_num); +#ifdef CONFIG_BZIP2 + uint64_t total_out; +#endif if (chunk >= s->n_chunks) { return -1; } s->current_chunk = s->n_chunks; - switch (s->types[chunk]) { + switch (s->types[chunk]) { /* block entry type */ case 0x80000005: { /* zlib compressed */ /* we need to buffer, because only the chunk as whole can be * inflated. */ @@ -379,6 +611,34 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) return -1; } break; } +#ifdef CONFIG_BZIP2 + case 0x80000006: /* bzip2 compressed */ + /* we need to buffer, because only the chunk as whole can be + * inflated. */ + ret = bdrv_pread(bs->file, s->offsets[chunk], + s->compressed_chunk, s->lengths[chunk]); + if (ret != s->lengths[chunk]) { + return -1; + } + + ret = BZ2_bzDecompressInit(&s->bzstream, 0, 0); + if (ret != BZ_OK) { + return -1; + } + s->bzstream.next_in = (char *)s->compressed_chunk; + s->bzstream.avail_in = (unsigned int) s->lengths[chunk]; + s->bzstream.next_out = (char *)s->uncompressed_chunk; + s->bzstream.avail_out = (unsigned int) 512 * s->sectorcounts[chunk]; + ret = BZ2_bzDecompress(&s->bzstream); + total_out = ((uint64_t)s->bzstream.total_out_hi32 << 32) + + s->bzstream.total_out_lo32; + BZ2_bzDecompressEnd(&s->bzstream); + if (ret != BZ_STREAM_END || + total_out != 512 * s->sectorcounts[chunk]) { + return -1; + } + break; +#endif /* CONFIG_BZIP2 */ case 1: /* copy */ ret = bdrv_pread(bs->file, s->offsets[chunk], s->uncompressed_chunk, s->lengths[chunk]); @@ -387,7 +647,8 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) } break; case 2: /* zero */ - memset(s->uncompressed_chunk, 0, 512 * s->sectorcounts[chunk]); + /* see dmg_read, it is treated specially. No buffer needs to be + * pre-filled, the zeroes can be set directly. */ break; } s->current_chunk = chunk; @@ -406,6 +667,13 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num, if (dmg_read_chunk(bs, sector_num + i) != 0) { return -1; } + /* Special case: current chunk is all zeroes. Do not perform a memcpy as + * s->uncompressed_chunk may be too small to cover the large all-zeroes + * section. dmg_read_chunk is called to find s->current_chunk */ + if (s->types[s->current_chunk] == 2) { /* all zeroes block entry */ + memset(buf + i * 512, 0, 512); + continue; + } sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk]; memcpy(buf + i * 512, s->uncompressed_chunk + sector_offset_in_chunk * 512, 512); diff --git a/block/nbd-client.c b/block/nbd-client.c index 6e1c97cad0..28bfb62bed 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -373,7 +373,7 @@ void nbd_client_session_close(NbdClientSession *client) } int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs, - int sock, const char *export) + int sock, const char *export, Error **errp) { int ret; @@ -382,7 +382,7 @@ int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs, qemu_set_block(sock); ret = nbd_receive_negotiate(sock, export, &client->nbdflags, &client->size, - &client->blocksize); + &client->blocksize, errp); if (ret < 0) { logout("Failed to negotiate with the NBD server\n"); closesocket(sock); diff --git a/block/nbd-client.h b/block/nbd-client.h index cd478f3a98..cfeecc2775 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -36,7 +36,7 @@ typedef struct NbdClientSession { } NbdClientSession; int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs, - int sock, const char *export_name); + int sock, const char *export_name, Error **errp); void nbd_client_session_close(NbdClientSession *client); int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num, diff --git a/block/nbd.c b/block/nbd.c index 04cc845076..b05d1d0407 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -271,7 +271,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, } /* NBD handshake */ - result = nbd_client_session_init(&s->client, bs, sock, export); + result = nbd_client_session_init(&s->client, bs, sock, export, errp); g_free(export); return result; } @@ -301,6 +301,12 @@ static int nbd_co_flush(BlockDriverState *bs) return nbd_client_session_co_flush(&s->client); } +static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) +{ + bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS; + bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS; +} + static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { @@ -396,6 +402,7 @@ static BlockDriver bdrv_nbd = { .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_discard = nbd_co_discard, + .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_getlength = nbd_getlength, .bdrv_detach_aio_context = nbd_detach_aio_context, .bdrv_attach_aio_context = nbd_attach_aio_context, @@ -413,6 +420,7 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_discard = nbd_co_discard, + .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_getlength = nbd_getlength, .bdrv_detach_aio_context = nbd_detach_aio_context, .bdrv_attach_aio_context = nbd_attach_aio_context, @@ -430,6 +438,7 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_discard = nbd_co_discard, + .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_getlength = nbd_getlength, .bdrv_detach_aio_context = nbd_detach_aio_context, .bdrv_attach_aio_context = nbd_attach_aio_context, diff --git a/block/qapi.c b/block/qapi.c index 75c388e90b..1808e67336 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -24,6 +24,7 @@ #include "block/qapi.h" #include "block/block_int.h" +#include "block/write-threshold.h" #include "qmp-commands.h" #include "qapi-visit.h" #include "qapi/qmp-output-visitor.h" @@ -89,6 +90,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs) info->iops_size = cfg.op_size; } + info->write_threshold = bdrv_write_threshold_get(bs); + return info; } @@ -335,6 +338,8 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs, s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE]; s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ]; s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE]; + s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ]; + s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE]; s->stats->wr_highest_offset = bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE; s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH]; diff --git a/block/qcow.c b/block/qcow.c index ccbe9e0d2c..055896910e 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -215,7 +215,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, /* read the backing file name */ if (header.backing_file_offset != 0) { len = header.backing_file_size; - if (len > 1023 || len > sizeof(bs->backing_file)) { + if (len > 1023 || len >= sizeof(bs->backing_file)) { error_setg(errp, "Backing file name too long"); ret = -EINVAL; goto fail; diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 904f6b1f44..b1155492a3 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -253,7 +253,9 @@ static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c) /* Give newer hits priority */ /* TODO Check how to optimize the replacement strategy */ - c->entries[i].cache_hits /= 2; + if (c->entries[i].cache_hits > 1) { + c->entries[i].cache_hits /= 2; + } } if (min_index == -1) { diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 9afdb40b40..9b80ca79ea 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -759,54 +759,54 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) { BDRVQcowState *s = bs->opaque; - int64_t offset, cluster_offset; - int free_in_cluster; + int64_t offset; + size_t free_in_cluster; + int ret; BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES); assert(size > 0 && size <= s->cluster_size); - if (s->free_byte_offset == 0) { - offset = qcow2_alloc_clusters(bs, s->cluster_size); - if (offset < 0) { - return offset; + assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset)); + + offset = s->free_byte_offset; + + if (offset) { + int refcount = qcow2_get_refcount(bs, offset >> s->cluster_bits); + if (refcount < 0) { + return refcount; } - s->free_byte_offset = offset; - } - redo: - free_in_cluster = s->cluster_size - - offset_into_cluster(s, s->free_byte_offset); - if (size <= free_in_cluster) { - /* enough space in current cluster */ - offset = s->free_byte_offset; - s->free_byte_offset += size; - free_in_cluster -= size; - if (free_in_cluster == 0) - s->free_byte_offset = 0; - if (offset_into_cluster(s, offset) != 0) - qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1, - QCOW2_DISCARD_NEVER); - } else { - offset = qcow2_alloc_clusters(bs, s->cluster_size); - if (offset < 0) { - return offset; + + if (refcount == 0xffff) { + offset = 0; } - cluster_offset = start_of_cluster(s, s->free_byte_offset); - if ((cluster_offset + s->cluster_size) == offset) { - /* we are lucky: contiguous data */ - offset = s->free_byte_offset; - qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1, - QCOW2_DISCARD_NEVER); - s->free_byte_offset += size; - } else { - s->free_byte_offset = offset; - goto redo; + } + + free_in_cluster = s->cluster_size - offset_into_cluster(s, offset); + if (!offset || free_in_cluster < size) { + int64_t new_cluster = alloc_clusters_noref(bs, s->cluster_size); + if (new_cluster < 0) { + return new_cluster; + } + + if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) { + offset = new_cluster; } } - /* The cluster refcount was incremented, either by qcow2_alloc_clusters() - * or explicitly by qcow2_update_cluster_refcount(). Refcount blocks must - * be flushed before the caller's L2 table updates. - */ + assert(offset); + ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER); + if (ret < 0) { + return ret; + } + + /* The cluster refcount was incremented; refcount blocks must be flushed + * before the caller's L2 table updates. */ qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); + + s->free_byte_offset = offset + size; + if (!offset_into_cluster(s, s->free_byte_offset)) { + s->free_byte_offset = 0; + } + return offset; } diff --git a/block/qcow2.c b/block/qcow2.c index dbaf016bc7..7e614d76a4 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -869,7 +869,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, if (header.backing_file_offset != 0) { len = header.backing_file_size; if (len > MIN(1023, s->cluster_size - header.backing_file_offset) || - len > sizeof(bs->backing_file)) { + len >= sizeof(bs->backing_file)) { error_setg(errp, "Backing file name too long"); ret = -EINVAL; goto fail; diff --git a/block/qed.c b/block/qed.c index 80f18d82e2..892b13c806 100644 --- a/block/qed.c +++ b/block/qed.c @@ -440,6 +440,11 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, s->l2_mask = s->table_nelems - 1; s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1; + /* Header size calculation must not overflow uint32_t */ + if (s->header.header_size > UINT32_MAX / s->header.cluster_size) { + return -EINVAL; + } + if ((s->header.features & QED_F_BACKING_FILE)) { if ((uint64_t)s->header.backing_filename_offset + s->header.backing_filename_size > diff --git a/block/qed.h b/block/qed.h index d3934a05cd..615e676fc8 100644 --- a/block/qed.h +++ b/block/qed.h @@ -133,7 +133,6 @@ typedef struct QEDAIOCB { int bh_ret; /* final return status for completion bh */ QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */ int flags; /* QED_AIOCB_* bits ORed together */ - bool *finished; /* signal for cancel completion */ uint64_t end_pos; /* request end on block device, in bytes */ /* User scatter-gather list */ diff --git a/block/raw-posix.c b/block/raw-posix.c index e51293a455..e474c17974 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -60,7 +60,7 @@ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ #endif #endif -#ifdef CONFIG_FALLOCATE_PUNCH_HOLE +#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_FALLOCATE_ZERO_RANGE) #include <linux/falloc.h> #endif #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__) @@ -147,6 +147,7 @@ typedef struct BDRVRawState { bool has_discard:1; bool has_write_zeroes:1; bool discard_zeroes:1; + bool has_fallocate; bool needs_alignment; } BDRVRawState; @@ -452,6 +453,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } if (S_ISREG(st.st_mode)) { s->discard_zeroes = true; + s->has_fallocate = true; } if (S_ISBLK(st.st_mode)) { #ifdef BLKDISCARDZEROES @@ -893,40 +895,108 @@ static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes) } #endif -static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb) +static int translate_err(int err) { - int ret = -EOPNOTSUPP; + if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP || + err == -ENOTTY) { + err = -ENOTSUP; + } + return err; +} + +#ifdef CONFIG_FALLOCATE +static int do_fallocate(int fd, int mode, off_t offset, off_t len) +{ + do { + if (fallocate(fd, mode, offset, len) == 0) { + return 0; + } + } while (errno == EINTR); + return translate_err(-errno); +} +#endif + +static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) +{ + int ret = -ENOTSUP; BDRVRawState *s = aiocb->bs->opaque; - if (s->has_write_zeroes == 0) { + if (!s->has_write_zeroes) { return -ENOTSUP; } - if (aiocb->aio_type & QEMU_AIO_BLKDEV) { #ifdef BLKZEROOUT - do { - uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; - if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { - return 0; - } - } while (errno == EINTR); + do { + uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; + if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { + return 0; + } + } while (errno == EINTR); - ret = -errno; + ret = translate_err(-errno); #endif - } else { + + if (ret == -ENOTSUP) { + s->has_write_zeroes = false; + } + return ret; +} + +static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb) +{ + BDRVRawState *s = aiocb->bs->opaque; + + if (aiocb->aio_type & QEMU_AIO_BLKDEV) { + return handle_aiocb_write_zeroes_block(aiocb); + } + #ifdef CONFIG_XFS - if (s->is_xfs) { - return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes); + if (s->is_xfs) { + return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes); + } +#endif + +#ifdef CONFIG_FALLOCATE_ZERO_RANGE + if (s->has_write_zeroes) { + int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE, + aiocb->aio_offset, aiocb->aio_nbytes); + if (ret == 0 || ret != -ENOTSUP) { + return ret; } + s->has_write_zeroes = false; + } #endif + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + if (s->has_discard && s->has_fallocate) { + int ret = do_fallocate(s->fd, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes); + if (ret == 0) { + ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes); + if (ret == 0 || ret != -ENOTSUP) { + return ret; + } + s->has_fallocate = false; + } else if (ret != -ENOTSUP) { + return ret; + } else { + s->has_discard = false; + } } +#endif - if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP || - ret == -ENOTTY) { - s->has_write_zeroes = false; - ret = -ENOTSUP; +#ifdef CONFIG_FALLOCATE + if (s->has_fallocate && aiocb->aio_offset >= bdrv_getlength(aiocb->bs)) { + int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes); + if (ret == 0 || ret != -ENOTSUP) { + return ret; + } + s->has_fallocate = false; } - return ret; +#endif + + return -ENOTSUP; } static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) @@ -957,21 +1027,14 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) #endif #ifdef CONFIG_FALLOCATE_PUNCH_HOLE - do { - if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - aiocb->aio_offset, aiocb->aio_nbytes) == 0) { - return 0; - } - } while (errno == EINTR); - - ret = -errno; + ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes); #endif } - if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP || - ret == -ENOTTY) { + ret = translate_err(ret); + if (ret == -ENOTSUP) { s->has_discard = false; - ret = -ENOTSUP; } return ret; } @@ -1312,7 +1375,20 @@ again: if (size == 0) #endif #if defined(__APPLE__) && defined(__MACH__) - size = LLONG_MAX; + { + uint64_t sectors = 0; + uint32_t sector_size = 0; + + if (ioctl(fd, DKIOCGETBLOCKCOUNT, §ors) == 0 + && ioctl(fd, DKIOCGETBLOCKSIZE, §or_size) == 0) { + size = sectors * sector_size; + } else { + size = lseek(fd, 0LL, SEEK_END); + if (size < 0) { + return -errno; + } + } + } #else size = lseek(fd, 0LL, SEEK_END); if (size < 0) { diff --git a/block/write-threshold.c b/block/write-threshold.c new file mode 100644 index 0000000000..c2cd517716 --- /dev/null +++ b/block/write-threshold.c @@ -0,0 +1,125 @@ +/* + * QEMU System Emulator block write threshold notification + * + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Francesco Romani <fromani@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include "block/block_int.h" +#include "block/coroutine.h" +#include "block/write-threshold.h" +#include "qemu/notify.h" +#include "qapi-event.h" +#include "qmp-commands.h" + + +uint64_t bdrv_write_threshold_get(const BlockDriverState *bs) +{ + return bs->write_threshold_offset; +} + +bool bdrv_write_threshold_is_set(const BlockDriverState *bs) +{ + return bs->write_threshold_offset > 0; +} + +static void write_threshold_disable(BlockDriverState *bs) +{ + if (bdrv_write_threshold_is_set(bs)) { + notifier_with_return_remove(&bs->write_threshold_notifier); + bs->write_threshold_offset = 0; + } +} + +uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, + const BdrvTrackedRequest *req) +{ + if (bdrv_write_threshold_is_set(bs)) { + if (req->offset > bs->write_threshold_offset) { + return (req->offset - bs->write_threshold_offset) + req->bytes; + } + if ((req->offset + req->bytes) > bs->write_threshold_offset) { + return (req->offset + req->bytes) - bs->write_threshold_offset; + } + } + return 0; +} + +static int coroutine_fn before_write_notify(NotifierWithReturn *notifier, + void *opaque) +{ + BdrvTrackedRequest *req = opaque; + BlockDriverState *bs = req->bs; + uint64_t amount = 0; + + amount = bdrv_write_threshold_exceeded(bs, req); + if (amount > 0) { + qapi_event_send_block_write_threshold( + bs->node_name, + amount, + bs->write_threshold_offset, + &error_abort); + + /* autodisable to avoid flooding the monitor */ + write_threshold_disable(bs); + } + + return 0; /* should always let other notifiers run */ +} + +static void write_threshold_register_notifier(BlockDriverState *bs) +{ + bs->write_threshold_notifier.notify = before_write_notify; + notifier_with_return_list_add(&bs->before_write_notifiers, + &bs->write_threshold_notifier); +} + +static void write_threshold_update(BlockDriverState *bs, + int64_t threshold_bytes) +{ + bs->write_threshold_offset = threshold_bytes; +} + +void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes) +{ + if (bdrv_write_threshold_is_set(bs)) { + if (threshold_bytes > 0) { + write_threshold_update(bs, threshold_bytes); + } else { + write_threshold_disable(bs); + } + } else { + if (threshold_bytes > 0) { + /* avoid multiple registration */ + write_threshold_register_notifier(bs); + write_threshold_update(bs, threshold_bytes); + } + /* discard bogus disable request */ + } +} + +void qmp_block_set_write_threshold(const char *node_name, + uint64_t threshold_bytes, + Error **errp) +{ + BlockDriverState *bs; + AioContext *aio_context; + + bs = bdrv_find_node(node_name); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, node_name); + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + bdrv_write_threshold_set(bs, threshold_bytes); + + aio_context_release(aio_context); +} diff --git a/blockdev.c b/blockdev.c index d59efd3f15..7d34960b96 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1793,7 +1793,6 @@ void qmp_block_passwd(bool has_device, const char *device, Error *local_err = NULL; BlockDriverState *bs; AioContext *aio_context; - int err; bs = bdrv_lookup_bs(has_device ? device : NULL, has_node_name ? node_name : NULL, @@ -1806,16 +1805,8 @@ void qmp_block_passwd(bool has_device, const char *device, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - err = bdrv_set_key(bs, password); - if (err == -EINVAL) { - error_set(errp, QERR_DEVICE_NOT_ENCRYPTED, bdrv_get_device_name(bs)); - goto out; - } else if (err < 0) { - error_set(errp, QERR_INVALID_PASSWORD); - goto out; - } + bdrv_add_key(bs, password, errp); -out: aio_context_release(aio_context); } @@ -1833,18 +1824,7 @@ static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename, return; } - if (bdrv_key_required(bs)) { - if (password) { - if (bdrv_set_key(bs, password) < 0) { - error_set(errp, QERR_INVALID_PASSWORD); - } - } else { - error_set(errp, QERR_DEVICE_ENCRYPTED, bdrv_get_device_name(bs), - bdrv_get_encrypted_filename(bs)); - } - } else if (password) { - error_set(errp, QERR_DEVICE_NOT_ENCRYPTED, bdrv_get_device_name(bs)); - } + bdrv_add_key(bs, password, errp); } void qmp_change_blockdev(const char *device, const char *filename, @@ -2653,7 +2633,8 @@ out: } /* Get the block job for a given device name and acquire its AioContext */ -static BlockJob *find_block_job(const char *device, AioContext **aio_context) +static BlockJob *find_block_job(const char *device, AioContext **aio_context, + Error **errp) { BlockDriverState *bs; @@ -2673,6 +2654,8 @@ static BlockJob *find_block_job(const char *device, AioContext **aio_context) return bs->job; notfound: + error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE, + "No active block job on device '%s'", device); *aio_context = NULL; return NULL; } @@ -2680,10 +2663,9 @@ notfound: void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp) { AioContext *aio_context; - BlockJob *job = find_block_job(device, &aio_context); + BlockJob *job = find_block_job(device, &aio_context, errp); if (!job) { - error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } @@ -2695,10 +2677,9 @@ void qmp_block_job_cancel(const char *device, bool has_force, bool force, Error **errp) { AioContext *aio_context; - BlockJob *job = find_block_job(device, &aio_context); + BlockJob *job = find_block_job(device, &aio_context, errp); if (!job) { - error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } @@ -2721,10 +2702,9 @@ out: void qmp_block_job_pause(const char *device, Error **errp) { AioContext *aio_context; - BlockJob *job = find_block_job(device, &aio_context); + BlockJob *job = find_block_job(device, &aio_context, errp); if (!job) { - error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } @@ -2736,10 +2716,9 @@ void qmp_block_job_pause(const char *device, Error **errp) void qmp_block_job_resume(const char *device, Error **errp) { AioContext *aio_context; - BlockJob *job = find_block_job(device, &aio_context); + BlockJob *job = find_block_job(device, &aio_context, errp); if (!job) { - error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } @@ -2751,10 +2730,9 @@ void qmp_block_job_resume(const char *device, Error **errp) void qmp_block_job_complete(const char *device, Error **errp) { AioContext *aio_context; - BlockJob *job = find_block_job(device, &aio_context); + BlockJob *job = find_block_job(device, &aio_context, errp); if (!job) { - error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } @@ -313,6 +313,7 @@ glx="" zlib="yes" lzo="" snappy="" +bzip2="" guest_agent="" guest_agent_with_vss="no" vss_win32_sdk="" @@ -1060,6 +1061,10 @@ for opt do ;; --enable-snappy) snappy="yes" ;; + --disable-bzip2) bzip2="no" + ;; + --enable-bzip2) bzip2="yes" + ;; --enable-guest-agent) guest_agent="yes" ;; --disable-guest-agent) guest_agent="no" @@ -1374,6 +1379,8 @@ Advanced options (experts only): --enable-usb-redir enable usb network redirection support --enable-lzo enable the support of lzo compression library --enable-snappy enable the support of snappy compression library + --enable-bzip2 enable the support of bzip2 compression library (for + reading bzip2-compressed dmg images) --disable-guest-agent disable building of the QEMU Guest Agent --enable-guest-agent enable building of the QEMU Guest Agent --with-vss-sdk=SDK-path enable Windows VSS support in QEMU Guest Agent @@ -1820,6 +1827,24 @@ EOF fi ########################################## +# bzip2 check + +if test "$bzip2" != "no" ; then + cat > $TMPC << EOF +#include <bzlib.h> +int main(void) { BZ2_bzlibVersion(); return 0; } +EOF + if compile_prog "" "-lbz2" ; then + bzip2="yes" + else + if test "$bzip2" = "yes"; then + feature_not_found "libbzip2" "Install libbzip2 devel" + fi + bzip2="no" + fi +fi + +########################################## # libseccomp check if test "$seccomp" != "no" ; then @@ -3335,6 +3360,22 @@ if compile_prog "" "" ; then fallocate_punch_hole=yes fi +# check that fallocate supports range zeroing inside the file +fallocate_zero_range=no +cat > $TMPC << EOF +#include <fcntl.h> +#include <linux/falloc.h> + +int main(void) +{ + fallocate(0, FALLOC_FL_ZERO_RANGE, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + fallocate_zero_range=yes +fi + # check for posix_fallocate posix_fallocate=no cat > $TMPC << EOF @@ -4369,6 +4410,7 @@ echo "vhdx $vhdx" echo "Quorum $quorum" echo "lzo support $lzo" echo "snappy support $snappy" +echo "bzip2 support $bzip2" echo "NUMA host support $numa" if test "$sdl_too_old" = "yes"; then @@ -4567,6 +4609,9 @@ fi if test "$fallocate_punch_hole" = "yes" ; then echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak fi +if test "$fallocate_zero_range" = "yes" ; then + echo "CONFIG_FALLOCATE_ZERO_RANGE=y" >> $config_host_mak +fi if test "$posix_fallocate" = "yes" ; then echo "CONFIG_POSIX_FALLOCATE=y" >> $config_host_mak fi @@ -4724,6 +4769,11 @@ if test "$snappy" = "yes" ; then echo "CONFIG_SNAPPY=y" >> $config_host_mak fi +if test "$bzip2" = "yes" ; then + echo "CONFIG_BZIP2=y" >> $config_host_mak + echo "BZIP2_LIBS=-lbz2" >> $config_host_mak +fi + if test "$libiscsi" = "yes" ; then echo "CONFIG_LIBISCSI=m" >> $config_host_mak echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak @@ -474,6 +474,8 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict) " wr_total_time_ns=%" PRId64 " rd_total_time_ns=%" PRId64 " flush_total_time_ns=%" PRId64 + " rd_merged=%" PRId64 + " wr_merged=%" PRId64 "\n", stats->value->stats->rd_bytes, stats->value->stats->wr_bytes, @@ -482,7 +484,9 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict) stats->value->stats->flush_operations, stats->value->stats->wr_total_time_ns, stats->value->stats->rd_total_time_ns, - stats->value->stats->flush_total_time_ns); + stats->value->stats->flush_total_time_ns, + stats->value->stats->rd_merged, + stats->value->stats->wr_merged); } qapi_free_BlockStatsList(stats_list); diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 39c5d7103c..be957d1117 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -96,9 +96,7 @@ static void handle_notify(EventNotifier *e) event_notifier_test_and_clear(&s->host_notifier); blk_io_plug(s->conf->conf.blk); for (;;) { - MultiReqBuffer mrb = { - .num_writes = 0, - }; + MultiReqBuffer mrb = {}; int ret; /* Disable guest->host notifies to avoid unnecessary vmexits */ @@ -120,7 +118,9 @@ static void handle_notify(EventNotifier *e) virtio_blk_handle_request(req, &mrb); } - virtio_submit_multiwrite(s->conf->conf.blk, &mrb); + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->conf->conf.blk, &mrb); + } if (likely(ret == -EAGAIN)) { /* vring emptied */ /* Re-enable guest->host notifies and stop processing the vring. diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 4032fcae27..1a8a176dcc 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -34,6 +34,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) req->dev = s; req->qiov.size = 0; req->next = NULL; + req->mr_next = NULL; return req; } @@ -84,20 +85,32 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, static void virtio_blk_rw_complete(void *opaque, int ret) { - VirtIOBlockReq *req = opaque; + VirtIOBlockReq *next = opaque; - trace_virtio_blk_rw_complete(req, ret); + while (next) { + VirtIOBlockReq *req = next; + next = req->mr_next; + trace_virtio_blk_rw_complete(req, ret); - if (ret) { - int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); - bool is_read = !(p & VIRTIO_BLK_T_OUT); - if (virtio_blk_handle_rw_error(req, -ret, is_read)) - return; - } + if (req->qiov.nalloc != -1) { + /* If nalloc is != 1 req->qiov is a local copy of the original + * external iovec. It was allocated in submit_merged_requests + * to be able to merge requests. */ + qemu_iovec_destroy(&req->qiov); + } - virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); - block_acct_done(blk_get_stats(req->dev->blk), &req->acct); - virtio_blk_free_request(req); + if (ret) { + int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); + bool is_read = !(p & VIRTIO_BLK_T_OUT); + if (virtio_blk_handle_rw_error(req, -ret, is_read)) { + continue; + } + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(req->dev->blk), &req->acct); + virtio_blk_free_request(req); + } } static void virtio_blk_flush_complete(void *opaque, int ret) @@ -291,24 +304,127 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) } } -void virtio_submit_multiwrite(BlockBackend *blk, MultiReqBuffer *mrb) +static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, + int start, int num_reqs, int niov) { - int i, ret; + QEMUIOVector *qiov = &mrb->reqs[start]->qiov; + int64_t sector_num = mrb->reqs[start]->sector_num; + int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE; + bool is_write = mrb->is_write; + + if (num_reqs > 1) { + int i; + struct iovec *tmp_iov = qiov->iov; + int tmp_niov = qiov->niov; + + /* mrb->reqs[start]->qiov was initialized from external so we can't + * modifiy it here. We need to initialize it locally and then add the + * external iovecs. */ + qemu_iovec_init(qiov, niov); + + for (i = 0; i < tmp_niov; i++) { + qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len); + } - if (!mrb->num_writes) { + for (i = start + 1; i < start + num_reqs; i++) { + qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0, + mrb->reqs[i]->qiov.size); + mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; + nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE; + } + assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE); + + trace_virtio_blk_submit_multireq(mrb, start, num_reqs, sector_num, + nb_sectors, is_write); + block_acct_merge_done(blk_get_stats(blk), + is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ, + num_reqs - 1); + } + + if (is_write) { + blk_aio_writev(blk, sector_num, qiov, nb_sectors, + virtio_blk_rw_complete, mrb->reqs[start]); + } else { + blk_aio_readv(blk, sector_num, qiov, nb_sectors, + virtio_blk_rw_complete, mrb->reqs[start]); + } +} + +static int multireq_compare(const void *a, const void *b) +{ + const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a, + *req2 = *(VirtIOBlockReq **)b; + + /* + * Note that we can't simply subtract sector_num1 from sector_num2 + * here as that could overflow the return value. + */ + if (req1->sector_num > req2->sector_num) { + return 1; + } else if (req1->sector_num < req2->sector_num) { + return -1; + } else { + return 0; + } +} + +void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb) +{ + int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0; + int max_xfer_len = 0; + int64_t sector_num = 0; + + if (mrb->num_reqs == 1) { + submit_requests(blk, mrb, 0, 1, -1); + mrb->num_reqs = 0; return; } - ret = blk_aio_multiwrite(blk, mrb->blkreq, mrb->num_writes); - if (ret != 0) { - for (i = 0; i < mrb->num_writes; i++) { - if (mrb->blkreq[i].error) { - virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO); + max_xfer_len = blk_get_max_transfer_length(mrb->reqs[0]->dev->blk); + max_xfer_len = MIN_NON_ZERO(max_xfer_len, BDRV_REQUEST_MAX_SECTORS); + + qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs), + &multireq_compare); + + for (i = 0; i < mrb->num_reqs; i++) { + VirtIOBlockReq *req = mrb->reqs[i]; + if (num_reqs > 0) { + bool merge = true; + + /* merge would exceed maximum number of IOVs */ + if (niov + req->qiov.niov > IOV_MAX) { + merge = false; + } + + /* merge would exceed maximum transfer length of backend device */ + if (req->qiov.size / BDRV_SECTOR_SIZE + nb_sectors > max_xfer_len) { + merge = false; + } + + /* requests are not sequential */ + if (sector_num + nb_sectors != req->sector_num) { + merge = false; + } + + if (!merge) { + submit_requests(blk, mrb, start, num_reqs, niov); + num_reqs = 0; } } + + if (num_reqs == 0) { + sector_num = req->sector_num; + nb_sectors = niov = 0; + start = i; + } + + nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE; + niov += req->qiov.niov; + num_reqs++; } - mrb->num_writes = 0; + submit_requests(blk, mrb, start, num_reqs, niov); + mrb->num_reqs = 0; } static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) @@ -319,7 +435,9 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) /* * Make sure all outstanding writes are posted to the backing device. */ - virtio_submit_multiwrite(req->dev->blk, mrb); + if (mrb->is_write && mrb->num_reqs > 0) { + virtio_blk_submit_multireq(req->dev->blk, mrb); + } blk_aio_flush(req->dev->blk, virtio_blk_flush_complete, req); } @@ -329,6 +447,9 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; uint64_t total_sectors; + if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { + return false; + } if (sector & dev->sector_mask) { return false; } @@ -342,60 +463,6 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, return true; } -static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) -{ - BlockRequest *blkreq; - uint64_t sector; - - sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); - - trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512); - - if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { - virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); - virtio_blk_free_request(req); - return; - } - - block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size, - BLOCK_ACCT_WRITE); - - if (mrb->num_writes == 32) { - virtio_submit_multiwrite(req->dev->blk, mrb); - } - - blkreq = &mrb->blkreq[mrb->num_writes]; - blkreq->sector = sector; - blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE; - blkreq->qiov = &req->qiov; - blkreq->cb = virtio_blk_rw_complete; - blkreq->opaque = req; - blkreq->error = 0; - - mrb->num_writes++; -} - -static void virtio_blk_handle_read(VirtIOBlockReq *req) -{ - uint64_t sector; - - sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); - - trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512); - - if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { - virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); - virtio_blk_free_request(req); - return; - } - - block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size, - BLOCK_ACCT_READ); - blk_aio_readv(req->dev->blk, sector, &req->qiov, - req->qiov.size / BDRV_SECTOR_SIZE, - virtio_blk_rw_complete, req); -} - void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) { uint32_t type; @@ -430,11 +497,58 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); - if (type & VIRTIO_BLK_T_FLUSH) { + /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER + * is an optional flag. Altough a guest should not send this flag if + * not negotiated we ignored it in the past. So keep ignoring it. */ + switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { + case VIRTIO_BLK_T_IN: + { + bool is_write = type & VIRTIO_BLK_T_OUT; + req->sector_num = virtio_ldq_p(VIRTIO_DEVICE(req->dev), + &req->out.sector); + + if (is_write) { + qemu_iovec_init_external(&req->qiov, iov, out_num); + trace_virtio_blk_handle_write(req, req->sector_num, + req->qiov.size / BDRV_SECTOR_SIZE); + } else { + qemu_iovec_init_external(&req->qiov, in_iov, in_num); + trace_virtio_blk_handle_read(req, req->sector_num, + req->qiov.size / BDRV_SECTOR_SIZE); + } + + if (!virtio_blk_sect_range_ok(req->dev, req->sector_num, + req->qiov.size)) { + virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + virtio_blk_free_request(req); + return; + } + + block_acct_start(blk_get_stats(req->dev->blk), + &req->acct, req->qiov.size, + is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); + + /* merge would exceed maximum number of requests or IO direction + * changes */ + if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS || + is_write != mrb->is_write || + !req->dev->conf.request_merging)) { + virtio_blk_submit_multireq(req->dev->blk, mrb); + } + + assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS); + mrb->reqs[mrb->num_reqs++] = req; + mrb->is_write = is_write; + break; + } + case VIRTIO_BLK_T_FLUSH: virtio_blk_handle_flush(req, mrb); - } else if (type & VIRTIO_BLK_T_SCSI_CMD) { + break; + case VIRTIO_BLK_T_SCSI_CMD: virtio_blk_handle_scsi(req); - } else if (type & VIRTIO_BLK_T_GET_ID) { + break; + case VIRTIO_BLK_T_GET_ID: + { VirtIOBlock *s = req->dev; /* @@ -448,14 +562,9 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) iov_from_buf(in_iov, in_num, 0, serial, size); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); virtio_blk_free_request(req); - } else if (type & VIRTIO_BLK_T_OUT) { - qemu_iovec_init_external(&req->qiov, iov, out_num); - virtio_blk_handle_write(req, mrb); - } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) { - /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */ - qemu_iovec_init_external(&req->qiov, in_iov, in_num); - virtio_blk_handle_read(req); - } else { + break; + } + default: virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); virtio_blk_free_request(req); } @@ -465,9 +574,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBlock *s = VIRTIO_BLK(vdev); VirtIOBlockReq *req; - MultiReqBuffer mrb = { - .num_writes = 0, - }; + MultiReqBuffer mrb = {}; /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start * dataplane here instead of waiting for .set_status(). @@ -481,7 +588,9 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtio_blk_handle_request(req, &mrb); } - virtio_submit_multiwrite(s->blk, &mrb); + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->blk, &mrb); + } /* * FIXME: Want to check for completions before returning to guest mode, @@ -494,9 +603,7 @@ static void virtio_blk_dma_restart_bh(void *opaque) { VirtIOBlock *s = opaque; VirtIOBlockReq *req = s->rq; - MultiReqBuffer mrb = { - .num_writes = 0, - }; + MultiReqBuffer mrb = {}; qemu_bh_delete(s->bh); s->bh = NULL; @@ -509,7 +616,9 @@ static void virtio_blk_dma_restart_bh(void *opaque) req = next; } - virtio_submit_multiwrite(s->blk, &mrb); + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->blk, &mrb); + } } static void virtio_blk_dma_restart_cb(void *opaque, int running, @@ -842,6 +951,8 @@ static Property virtio_blk_properties[] = { #ifdef __linux__ DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true), #endif + DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, + true), DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, conf.data_plane, 0, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index a71e6e014f..1bf8b34528 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -394,6 +394,23 @@ static void ide_atapi_cmd_read(IDEState *s, int lba, int nb_sectors, } } + +/* Called by *_restart_bh when the transfer function points + * to ide_atapi_cmd + */ +void ide_atapi_dma_restart(IDEState *s) +{ + /* + * I'm not sure we have enough stored to restart the command + * safely, so give the guest an error it should recover from. + * I'm assuming most guests will try to recover from something + * listed as a medium error on a CD; it seems to work on Linux. + * This would be more of a problem if we did any other type of + * DMA operation. + */ + ide_atapi_cmd_error(s, MEDIUM_ERROR, ASC_NO_SEEK_COMPLETE); +} + static inline uint8_t ide_atapi_set_profile(uint8_t *buf, uint8_t *index, uint16_t profile) { diff --git a/hw/ide/core.c b/hw/ide/core.c index d4af5e2eb1..ac3f015a8d 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2417,6 +2417,7 @@ static int ide_drive_pio_post_load(void *opaque, int version_id) s->end_transfer_func = transfer_end_table[s->end_transfer_fn_idx]; s->data_ptr = s->io_buffer + s->cur_io_buffer_offset; s->data_end = s->data_ptr + s->cur_io_buffer_len; + s->atapi_dma = s->feature & 1; /* as per cmd_packet */ return 0; } diff --git a/hw/ide/internal.h b/hw/ide/internal.h index c998003bf3..ee9a57f039 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -289,6 +289,7 @@ typedef struct IDEDMAOps IDEDMAOps; #define ATAPI_INT_REASON_TAG 0xf8 /* same constants as bochs */ +#define ASC_NO_SEEK_COMPLETE 0x02 #define ASC_ILLEGAL_OPCODE 0x20 #define ASC_LOGICAL_BLOCK_OOR 0x21 #define ASC_INV_FIELD_IN_CMD_PACKET 0x24 @@ -530,6 +531,7 @@ void ide_dma_error(IDEState *s); void ide_atapi_cmd_ok(IDEState *s); void ide_atapi_cmd_error(IDEState *s, int sense_key, int asc); +void ide_atapi_dma_restart(IDEState *s); void ide_atapi_io_error(IDEState *s, int ret); void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val); diff --git a/hw/ide/pci.c b/hw/ide/pci.c index bee5ad39fe..e3f2054a90 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -235,6 +235,17 @@ static void bmdma_restart_bh(void *opaque) } } else if (error_status & IDE_RETRY_FLUSH) { ide_flush_cache(bmdma_active_if(bm)); + } else { + IDEState *s = bmdma_active_if(bm); + + /* + * We've not got any bits to tell us about ATAPI - but + * we do have the end_transfer_func that tells us what + * we're trying to do. + */ + if (s->end_transfer_func == ide_atapi_cmd) { + ide_atapi_dma_restart(s); + } } } diff --git a/include/block/accounting.h b/include/block/accounting.h index 50b42b3808..4c406cff7a 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -39,6 +39,7 @@ typedef struct BlockAcctStats { uint64_t nr_bytes[BLOCK_MAX_IOTYPE]; uint64_t nr_ops[BLOCK_MAX_IOTYPE]; uint64_t total_time_ns[BLOCK_MAX_IOTYPE]; + uint64_t merged[BLOCK_MAX_IOTYPE]; uint64_t wr_highest_sector; } BlockAcctStats; @@ -53,5 +54,7 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie); void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num, unsigned int nb_sectors); +void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, + int num_requests); #endif diff --git a/include/block/block.h b/include/block/block.h index 3082d2b80e..321295e5f7 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -83,6 +83,9 @@ typedef enum { #define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) #define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) +#define BDRV_REQUEST_MAX_SECTORS MIN(SIZE_MAX >> BDRV_SECTOR_BITS, \ + INT_MAX >> BDRV_SECTOR_BITS) + /* * Allocation status flags * BDRV_BLOCK_DATA: data is read from bs->file or another file @@ -378,6 +381,7 @@ BlockDriverState *bdrv_next(BlockDriverState *bs); int bdrv_is_encrypted(BlockDriverState *bs); int bdrv_key_required(BlockDriverState *bs); int bdrv_set_key(BlockDriverState *bs, const char *key); +void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp); int bdrv_query_missing_keys(void); void bdrv_iterate_format(void (*it)(void *opaque, const char *name), void *opaque); diff --git a/include/block/block_int.h b/include/block/block_int.h index e264be97b2..7ad19503df 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -412,6 +412,10 @@ struct BlockDriverState { /* The error object in use for blocking operations on backing_hd */ Error *backing_blocker; + + /* threshold limit for writes, in bytes. "High water mark". */ + uint64_t write_threshold_offset; + NotifierWithReturn write_threshold_notifier; }; diff --git a/include/block/nbd.h b/include/block/nbd.h index 348302c90b..b75959556c 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -75,7 +75,7 @@ enum { ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read); int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, - off_t *size, size_t *blocksize); + off_t *size, size_t *blocksize, Error **errp); int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize); ssize_t nbd_send_request(int csock, struct nbd_request *request); ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply); diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h new file mode 100644 index 0000000000..f1b899cd5f --- /dev/null +++ b/include/block/write-threshold.h @@ -0,0 +1,64 @@ +/* + * QEMU System Emulator block write threshold notification + * + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Francesco Romani <fromani@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#ifndef BLOCK_WRITE_THRESHOLD_H +#define BLOCK_WRITE_THRESHOLD_H + +#include <stdint.h> + +#include "qemu/typedefs.h" +#include "qemu-common.h" + +/* + * bdrv_write_threshold_set: + * + * Set the write threshold for block devices, in bytes. + * Notify when a write exceeds the threshold, meaning the device + * is becoming full, so it can be transparently resized. + * To be used with thin-provisioned block devices. + * + * Use threshold_bytes == 0 to disable. + */ +void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes); + +/* + * bdrv_write_threshold_get + * + * Get the configured write threshold, in bytes. + * Zero means no threshold configured. + */ +uint64_t bdrv_write_threshold_get(const BlockDriverState *bs); + +/* + * bdrv_write_threshold_is_set + * + * Tell if a write threshold is set for a given BDS. + */ +bool bdrv_write_threshold_is_set(const BlockDriverState *bs); + +/* + * bdrv_write_threshold_exceeded + * + * Return the extent of a write request that exceeded the threshold, + * or zero if the request is below the threshold. + * Return zero also if the threshold was not set. + * + * NOTE: here we assume the following holds for each request this code + * deals with: + * + * assert((req->offset + req->bytes) <= UINT64_MAX) + * + * Please not there is *not* an actual C assert(). + */ +uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, + const BdrvTrackedRequest *req); + +#endif diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 4652b70b5d..fc7d311a43 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -113,6 +113,7 @@ struct VirtIOBlkConf uint32_t scsi; uint32_t config_wce; uint32_t data_plane; + uint32_t request_merging; }; struct VirtIOBlockDataPlane; @@ -134,27 +135,32 @@ typedef struct VirtIOBlock { struct VirtIOBlockDataPlane *dataplane; } VirtIOBlock; -typedef struct MultiReqBuffer { - BlockRequest blkreq[32]; - unsigned int num_writes; -} MultiReqBuffer; - typedef struct VirtIOBlockReq { + int64_t sector_num; VirtIOBlock *dev; VirtQueueElement elem; struct virtio_blk_inhdr *in; struct virtio_blk_outhdr out; QEMUIOVector qiov; struct VirtIOBlockReq *next; + struct VirtIOBlockReq *mr_next; BlockAcctCookie acct; } VirtIOBlockReq; +#define VIRTIO_BLK_MAX_MERGE_REQS 32 + +typedef struct MultiReqBuffer { + VirtIOBlockReq *reqs[VIRTIO_BLK_MAX_MERGE_REQS]; + unsigned int num_reqs; + bool is_write; +} MultiReqBuffer; + VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s); void virtio_blk_free_request(VirtIOBlockReq *req); void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb); -void virtio_submit_multiwrite(BlockBackend *blk, MultiReqBuffer *mrb); +void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb); #endif diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h index eeaf0cb981..986260f466 100644 --- a/include/qapi/qmp/qerror.h +++ b/include/qapi/qmp/qerror.h @@ -37,9 +37,6 @@ void qerror_report_err(Error *err); #define QERR_BASE_NOT_FOUND \ ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found" -#define QERR_BLOCK_JOB_NOT_ACTIVE \ - ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'" - #define QERR_BLOCK_JOB_NOT_READY \ ERROR_CLASS_GENERIC_ERROR, "The active block job for device '%s' cannot be completed" @@ -52,9 +49,6 @@ void qerror_report_err(Error *err); #define QERR_BUS_NOT_FOUND \ ERROR_CLASS_GENERIC_ERROR, "Bus '%s' not found" -#define QERR_DEVICE_ENCRYPTED \ - ERROR_CLASS_DEVICE_ENCRYPTED, "'%s' (%s) is encrypted" - #define QERR_DEVICE_HAS_NO_MEDIUM \ ERROR_CLASS_GENERIC_ERROR, "Device '%s' has no medium" @@ -70,9 +64,6 @@ void qerror_report_err(Error *err); #define QERR_DEVICE_NO_HOTPLUG \ ERROR_CLASS_GENERIC_ERROR, "Device '%s' does not support hotplugging" -#define QERR_DEVICE_NOT_ENCRYPTED \ - ERROR_CLASS_GENERIC_ERROR, "Device '%s' is not encrypted" - #define QERR_DEVICE_NOT_FOUND \ ERROR_CLASS_DEVICE_NOT_FOUND, "Device '%s' not found" diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 8871a021d0..aab12b982c 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -127,6 +127,7 @@ int blk_is_inserted(BlockBackend *blk); void blk_lock_medium(BlockBackend *blk, bool locked); void blk_eject(BlockBackend *blk, bool eject_flag); int blk_get_flags(BlockBackend *blk); +int blk_get_max_transfer_length(BlockBackend *blk); void blk_set_guest_block_size(BlockBackend *blk, int align); void *blk_blockalign(BlockBackend *blk, size_t size); bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp); @@ -5368,9 +5368,12 @@ static void bdrv_password_cb(void *opaque, const char *password, Monitor *mon = opaque; BlockDriverState *bs = readline_opaque; int ret = 0; + Error *local_err = NULL; - if (bdrv_set_key(bs, password) != 0) { - monitor_printf(mon, "invalid password\n"); + bdrv_add_key(bs, password, &local_err); + if (local_err) { + monitor_printf(mon, "%s\n", error_get_pretty(local_err)); + error_free(local_err); ret = -EPERM; } if (mon->password_completion_cb) @@ -5388,17 +5391,20 @@ int monitor_read_bdrv_key_start(Monitor *mon, BlockDriverState *bs, BlockCompletionFunc *completion_cb, void *opaque) { + Error *local_err = NULL; int err; - if (!bdrv_key_required(bs)) { + bdrv_add_key(bs, NULL, &local_err); + if (!local_err) { if (completion_cb) completion_cb(opaque, 0); return 0; } + /* Need a key for @bs */ + if (monitor_ctrl_mode(mon)) { - qerror_report(QERR_DEVICE_ENCRYPTED, bdrv_get_device_name(bs), - bdrv_get_encrypted_filename(bs)); + qerror_report_err(local_err); return -1; } @@ -494,7 +494,7 @@ fail: } int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, - off_t *size, size_t *blocksize) + off_t *size, size_t *blocksize, Error **errp) { char buf[256]; uint64_t magic, s; @@ -506,13 +506,13 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, rc = -EINVAL; if (read_sync(csock, buf, 8) != 8) { - LOG("read failed"); + error_setg(errp, "Failed to read data"); goto fail; } buf[8] = '\0'; if (strlen(buf) == 0) { - LOG("server connection closed"); + error_setg(errp, "Server connection closed unexpectedly"); goto fail; } @@ -527,12 +527,12 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, qemu_isprint(buf[7]) ? buf[7] : '.'); if (memcmp(buf, "NBDMAGIC", 8) != 0) { - LOG("Invalid magic received"); + error_setg(errp, "Invalid magic received"); goto fail; } if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { - LOG("read failed"); + error_setg(errp, "Failed to read magic"); goto fail; } magic = be64_to_cpu(magic); @@ -545,52 +545,60 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, TRACE("Checking magic (opts_magic)"); if (magic != NBD_OPTS_MAGIC) { - LOG("Bad magic received"); + if (magic == NBD_CLIENT_MAGIC) { + error_setg(errp, "Server does not support export names"); + } else { + error_setg(errp, "Bad magic received"); + } goto fail; } if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { - LOG("flags read failed"); + error_setg(errp, "Failed to read server flags"); goto fail; } *flags = be16_to_cpu(tmp) << 16; /* reserved for future use */ if (write_sync(csock, &reserved, sizeof(reserved)) != sizeof(reserved)) { - LOG("write failed (reserved)"); + error_setg(errp, "Failed to read reserved field"); goto fail; } /* write the export name */ magic = cpu_to_be64(magic); if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { - LOG("write failed (magic)"); + error_setg(errp, "Failed to send export name magic"); goto fail; } opt = cpu_to_be32(NBD_OPT_EXPORT_NAME); if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) { - LOG("write failed (opt)"); + error_setg(errp, "Failed to send export name option number"); goto fail; } namesize = cpu_to_be32(strlen(name)); if (write_sync(csock, &namesize, sizeof(namesize)) != sizeof(namesize)) { - LOG("write failed (namesize)"); + error_setg(errp, "Failed to send export name length"); goto fail; } if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) { - LOG("write failed (name)"); + error_setg(errp, "Failed to send export name"); goto fail; } } else { TRACE("Checking magic (cli_magic)"); if (magic != NBD_CLIENT_MAGIC) { - LOG("Bad magic received"); + if (magic == NBD_OPTS_MAGIC) { + error_setg(errp, "Server requires an export name"); + } else { + error_setg(errp, "Bad magic received"); + } goto fail; } } if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) { - LOG("read failed"); + error_setg(errp, "Failed to read export length"); goto fail; } *size = be64_to_cpu(s); @@ -599,19 +607,19 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, if (!name) { if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) { - LOG("read failed (flags)"); + error_setg(errp, "Failed to read export flags"); goto fail; } *flags = be32_to_cpup(flags); } else { if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { - LOG("read failed (tmp)"); + error_setg(errp, "Failed to read export flags"); goto fail; } *flags |= be32_to_cpu(tmp); } if (read_sync(csock, &buf, 124) != 124) { - LOG("read failed (buf)"); + error_setg(errp, "Failed to read reserved block"); goto fail; } rc = 0; diff --git a/qapi/block-core.json b/qapi/block-core.json index 80984d1660..a3fdaf02ba 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -257,6 +257,9 @@ # # @cache: the cache mode used for the block device (since: 2.3) # +# @write_threshold: configured write threshold for the device. +# 0 if disabled. (Since 2.3) +# # Since: 0.14.0 # ## @@ -271,7 +274,8 @@ '*bps_max': 'int', '*bps_rd_max': 'int', '*bps_wr_max': 'int', '*iops_max': 'int', '*iops_rd_max': 'int', '*iops_wr_max': 'int', - '*iops_size': 'int', 'cache': 'BlockdevCacheInfo' } } + '*iops_size': 'int', 'cache': 'BlockdevCacheInfo', + 'write_threshold': 'int' } } ## # @BlockDeviceIoStatus: @@ -407,13 +411,20 @@ # growable sparse files (like qcow2) that are used on top # of a physical device. # +# @rd_merged: Number of read requests that have been merged into another +# request (Since 2.3). +# +# @wr_merged: Number of write requests that have been merged into another +# request (Since 2.3). +# # Since: 0.14.0 ## { 'type': 'BlockDeviceStats', 'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'rd_operations': 'int', 'wr_operations': 'int', 'flush_operations': 'int', 'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int', - 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int' } } + 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int', + 'rd_merged': 'int', 'wr_merged': 'int' } } ## # @BlockStats: @@ -1910,3 +1921,48 @@ ## { 'enum': 'PreallocMode', 'data': [ 'off', 'metadata', 'falloc', 'full' ] } + +## +# @BLOCK_WRITE_THRESHOLD +# +# Emitted when writes on block device reaches or exceeds the +# configured write threshold. For thin-provisioned devices, this +# means the device should be extended to avoid pausing for +# disk exhaustion. +# The event is one shot. Once triggered, it needs to be +# re-registered with another block-set-threshold command. +# +# @node-name: graph node name on which the threshold was exceeded. +# +# @amount-exceeded: amount of data which exceeded the threshold, in bytes. +# +# @write-threshold: last configured threshold, in bytes. +# +# Since: 2.3 +## +{ 'event': 'BLOCK_WRITE_THRESHOLD', + 'data': { 'node-name': 'str', + 'amount-exceeded': 'uint64', + 'write-threshold': 'uint64' } } + +## +# @block-set-write-threshold +# +# Change the write threshold for a block drive. An event will be delivered +# if a write to this block drive crosses the configured threshold. +# This is useful to transparently resize thin-provisioned drives without +# the guest OS noticing. +# +# @node-name: graph node name on which the threshold must be set. +# +# @write-threshold: configured threshold for the block device, bytes. +# Use 0 to disable the threshold. +# +# Returns: Nothing on success +# If @node name is not found on the block device graph, +# DeviceNotFound +# +# Since: 2.3 +## +{ 'command': 'block-set-write-threshold', + 'data': { 'node-name': 'str', 'write-threshold': 'uint64' } } diff --git a/qemu-img.c b/qemu-img.c index 4e9a7f5741..e148af8a3e 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -35,7 +35,7 @@ #include "block/qapi.h" #include <getopt.h> -#define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION \ +#define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION QEMU_PKGVERSION \ ", Copyright (c) 2004-2008 Fabrice Bellard\n" typedef struct img_cmd_t { diff --git a/qemu-nbd.c b/qemu-nbd.c index d222512412..4d8df08385 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -284,6 +284,7 @@ static void *nbd_client_thread(void *arg) int fd, sock; int ret; pthread_t show_parts_thread; + Error *local_error = NULL; sock = unix_socket_outgoing(sockpath); if (sock < 0) { @@ -291,8 +292,12 @@ static void *nbd_client_thread(void *arg) } ret = nbd_receive_negotiate(sock, NULL, &nbdflags, - &size, &blocksize); + &size, &blocksize, &local_error); if (ret < 0) { + if (local_error) { + fprintf(stderr, "%s\n", error_get_pretty(local_error)); + error_free(local_error); + } goto out_socket; } diff --git a/qmp-commands.hx b/qmp-commands.hx index c5f16dd922..a85d8479e3 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2146,6 +2146,8 @@ Each json-object contain the following: - "iops_size": I/O size when limiting by iops (json-int) - "detect_zeroes": detect and optimize zero writing (json-string) - Possible values: "off", "on", "unmap" + - "write_threshold": write offset threshold in bytes, a event will be + emitted if crossed. Zero if disabled (json-int) - "image": the detail of the image, it is a json-object containing the following: - "filename": image file name (json-string) @@ -2223,6 +2225,7 @@ Example: "iops_wr_max": 0, "iops_size": 0, "detect_zeroes": "on", + "write_threshold": 0, "image":{ "filename":"disks/test.qcow2", "format":"qcow2", @@ -2303,6 +2306,10 @@ Each json-object contain the following: - "flush_total_time_ns": total time spend on cache flushes in nano-seconds (json-int) - "wr_highest_offset": Highest offset of a sector written since the BlockDriverState has been opened (json-int) + - "rd_merged": number of read requests that have been merged into + another request (json-int) + - "wr_merged": number of write requests that have been merged into + another request (json-int) - "parent": Contains recursively the statistics of the underlying protocol (e.g. the host file for a qcow2 image). If there is no underlying protocol, this field is omitted @@ -2326,6 +2333,8 @@ Example: "rd_total_times_ns":3465673657 "flush_total_times_ns":49653 "flush_operations":61, + "rd_merged":0, + "wr_merged":0 } }, "stats":{ @@ -2337,7 +2346,9 @@ Example: "flush_operations":51, "wr_total_times_ns":313253456 "rd_total_times_ns":3465673657 - "flush_total_times_ns":49653 + "flush_total_times_ns":49653, + "rd_merged":0, + "wr_merged":0 } }, { @@ -2351,7 +2362,9 @@ Example: "flush_operations":0, "wr_total_times_ns":0 "rd_total_times_ns":0 - "flush_total_times_ns":0 + "flush_total_times_ns":0, + "rd_merged":0, + "wr_merged":0 } }, { @@ -2365,7 +2378,9 @@ Example: "flush_operations":0, "wr_total_times_ns":0 "rd_total_times_ns":0 - "flush_total_times_ns":0 + "flush_total_times_ns":0, + "rd_merged":0, + "wr_merged":0 } }, { @@ -2379,7 +2394,9 @@ Example: "flush_operations":0, "wr_total_times_ns":0 "rd_total_times_ns":0 - "flush_total_times_ns":0 + "flush_total_times_ns":0, + "rd_merged":0, + "wr_merged":0 } } ] @@ -3671,6 +3688,7 @@ Example: "iops_rd_max": 0, "iops_wr_max": 0, "iops_size": 0, + "write_threshold": 0, "image":{ "filename":"disks/test.qcow2", "format":"qcow2", @@ -3907,3 +3925,31 @@ Move mouse pointer to absolute coordinates (20000, 400). <- { "return": {} } EQMP + + { + .name = "block-set-write-threshold", + .args_type = "node-name:s,write-threshold:l", + .mhandler.cmd_new = qmp_marshal_input_block_set_write_threshold, + }, + +SQMP +block-set-write-threshold +------------ + +Change the write threshold for a block drive. The threshold is an offset, +thus must be non-negative. Default is no write threshold. +Setting the threshold to zero disables it. + +Arguments: + +- "node-name": the node name in the block driver state graph (json-string) +- "write-threshold": the write threshold in bytes (json-int) + +Example: + +-> { "execute": "block-set-write-threshold", + "arguments": { "node-name": "mydev", + "write-threshold": 17179869184 } } +<- { "return": {} } + +EQMP @@ -154,6 +154,7 @@ SpiceInfo *qmp_query_spice(Error **errp) void qmp_cont(Error **errp) { + Error *local_err = NULL; BlockDriverState *bs; if (runstate_needs_reset()) { @@ -167,10 +168,9 @@ void qmp_cont(Error **errp) bdrv_iostatus_reset(bs); } for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) { - if (bdrv_key_required(bs)) { - error_set(errp, QERR_DEVICE_ENCRYPTED, - bdrv_get_device_name(bs), - bdrv_get_encrypted_filename(bs)); + bdrv_add_key(bs, NULL, &local_err); + if (local_err) { + error_propagate(errp, local_err); return; } } diff --git a/tests/Makefile b/tests/Makefile index 5caccf765a..d5df16882d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -68,6 +68,8 @@ check-unit-y += tests/check-qom-interface$(EXESUF) gcov-files-check-qom-interface-y = qom/object.c check-unit-y += tests/test-qemu-opts$(EXESUF) gcov-files-test-qemu-opts-y = qom/test-qemu-opts.c +check-unit-y += tests/test-write-threshold$(EXESUF) +gcov-files-test-write-threshold-y = block/write-threshold.c check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh @@ -360,6 +362,7 @@ tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y) tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a +tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(block-obj-y) libqemuutil.a libqemustub.a ifeq ($(CONFIG_POSIX),y) LIBS += -lutil diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out index 13ff3cd7aa..00b3eaefc7 100644 --- a/tests/qemu-iotests/067.out +++ b/tests/qemu-iotests/067.out @@ -43,6 +43,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virti "drv": "qcow2", "iops": 0, "bps_wr": 0, + "write_threshold": 0, "encrypted": false, "bps": 0, "bps_rd": 0, @@ -218,6 +219,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk "drv": "qcow2", "iops": 0, "bps_wr": 0, + "write_threshold": 0, "encrypted": false, "bps": 0, "bps_rd": 0, @@ -423,6 +425,7 @@ Testing: "drv": "qcow2", "iops": 0, "bps_wr": 0, + "write_threshold": 0, "encrypted": false, "bps": 0, "bps_rd": 0, @@ -607,6 +610,7 @@ Testing: "drv": "qcow2", "iops": 0, "bps_wr": 0, + "write_threshold": 0, "encrypted": false, "bps": 0, "bps_rd": 0, @@ -717,6 +721,7 @@ Testing: "drv": "qcow2", "iops": 0, "bps_wr": 0, + "write_threshold": 0, "encrypted": false, "bps": 0, "bps_rd": 0, diff --git a/tests/qemu-iotests/083 b/tests/qemu-iotests/083 index 991a9d91db..1b2d3f1156 100755 --- a/tests/qemu-iotests/083 +++ b/tests/qemu-iotests/083 @@ -56,7 +56,8 @@ filter_nbd() { # # Filter out the TCP port number since this changes between runs. sed -e 's#^.*nbd\.c:.*##g' \ - -e 's#nbd:127\.0\.0\.1:[^:]*:#nbd:127\.0\.0\.1:PORT:#g' + -e 's#nbd:127\.0\.0\.1:[^:]*:#nbd:127\.0\.0\.1:PORT:#g' \ + -e 's#\(exportname=foo\|PORT\): Failed to .*$#\1#' } check_disconnect() { diff --git a/tests/qemu-iotests/083.out b/tests/qemu-iotests/083.out index 85ee8d6dd7..8c1441bf4f 100644 --- a/tests/qemu-iotests/083.out +++ b/tests/qemu-iotests/083.out @@ -1,163 +1,138 @@ QA output created by 083 === Check disconnect before neg1 === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect after neg1 === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect 8 neg1 === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect 16 neg1 === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect before export === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect after export === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect 4 export === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect 12 export === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect 16 export === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect before neg2 === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect after neg2 === -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error -no file open, try 'help open' +read failed: Input/output error === Check disconnect 8 neg2 === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect 10 neg2 === - -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo no file open, try 'help open' === Check disconnect before request === -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error -no file open, try 'help open' +read failed: Input/output error === Check disconnect after request === -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error -no file open, try 'help open' +read failed: Input/output error === Check disconnect before reply === -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error -no file open, try 'help open' +read failed: Input/output error === Check disconnect after reply === -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error -no file open, try 'help open' +read failed: Input/output error === Check disconnect 4 reply === -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error -no file open, try 'help open' +read failed: Input/output error === Check disconnect 8 reply === -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error -no file open, try 'help open' +read failed: Input/output error === Check disconnect before data === -qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error -no file open, try 'help open' +read failed: Input/output error === Check disconnect after data === -read failed: Input/output error +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) === Check disconnect before neg-classic === - -qemu-io: can't open device nbd:127.0.0.1:PORT: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT no file open, try 'help open' === Check disconnect 8 neg-classic === - -qemu-io: can't open device nbd:127.0.0.1:PORT: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT no file open, try 'help open' === Check disconnect 16 neg-classic === - -qemu-io: can't open device nbd:127.0.0.1:PORT: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT no file open, try 'help open' === Check disconnect 24 neg-classic === - -qemu-io: can't open device nbd:127.0.0.1:PORT: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT no file open, try 'help open' === Check disconnect 28 neg-classic === - -qemu-io: can't open device nbd:127.0.0.1:PORT: Could not open image: Invalid argument +qemu-io: can't open device nbd:127.0.0.1:PORT no file open, try 'help open' === Check disconnect after neg-classic === -qemu-io: can't open device nbd:127.0.0.1:PORT: Could not read image for determining its format: Input/output error -no file open, try 'help open' +read failed: Input/output error *** done diff --git a/tests/qemu-iotests/100 b/tests/qemu-iotests/100 index 9124aba760..7c1b235b51 100755 --- a/tests/qemu-iotests/100 +++ b/tests/qemu-iotests/100 @@ -55,6 +55,8 @@ echo "== verify pattern ==" $QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io $QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io +_cleanup_test_img + echo echo "== Sequential requests ==" _make_test_img $size @@ -66,6 +68,8 @@ $QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io $QEMU_IO -c "read -P 0xce 4k 4k" "$TEST_IMG" | _filter_qemu_io $QEMU_IO -c "read -P 0 8k 4k" "$TEST_IMG" | _filter_qemu_io +_cleanup_test_img + echo echo "== Superset overlapping requests ==" _make_test_img $size @@ -79,6 +83,8 @@ $QEMU_IO -c "read -P 0xcd 0 1k" "$TEST_IMG" | _filter_qemu_io $QEMU_IO -c "read -P 0xcd 3k 1k" "$TEST_IMG" | _filter_qemu_io $QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io +_cleanup_test_img + echo echo "== Subset overlapping requests ==" _make_test_img $size @@ -92,6 +98,8 @@ $QEMU_IO -c "read -P 0xce 0 1k" "$TEST_IMG" | _filter_qemu_io $QEMU_IO -c "read -P 0xce 3k 1k" "$TEST_IMG" | _filter_qemu_io $QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io +_cleanup_test_img + echo echo "== Head overlapping requests ==" _make_test_img $size @@ -104,6 +112,8 @@ echo "== verify pattern ==" $QEMU_IO -c "read -P 0xce 2k 2k" "$TEST_IMG" | _filter_qemu_io $QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io +_cleanup_test_img + echo echo "== Tail overlapping requests ==" _make_test_img $size @@ -116,6 +126,8 @@ echo "== verify pattern ==" $QEMU_IO -c "read -P 0xce 0k 2k" "$TEST_IMG" | _filter_qemu_io $QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io +_cleanup_test_img + echo echo "== Disjoint requests ==" _make_test_img $size diff --git a/tests/qemu-iotests/104 b/tests/qemu-iotests/104 index b471aa5bb1..f32752bb6f 100755 --- a/tests/qemu-iotests/104 +++ b/tests/qemu-iotests/104 @@ -28,11 +28,7 @@ here=`pwd` tmp=/tmp/$$ status=1 # failure is the default! -_cleanup() -{ - _cleanup_test_img -} -trap "_cleanup; exit \$status" 0 1 2 3 15 +trap "exit \$status" 0 1 2 3 15 # get standard environment, filters and checks . ./common.rc @@ -47,8 +43,9 @@ echo image_sizes="1024 1234" for s in $image_sizes; do - _make_test_img $s | _filter_img_create + _make_test_img $s _img_info | _filter_img_info + _cleanup_test_img done # success, all done diff --git a/tests/qemu-iotests/116 b/tests/qemu-iotests/116 new file mode 100755 index 0000000000..713ed484ba --- /dev/null +++ b/tests/qemu-iotests/116 @@ -0,0 +1,96 @@ +#!/bin/bash +# +# Test error code paths for invalid QED images +# +# The aim of this test is to exercise the error paths in qed_open() to ensure +# there are no crashes with invalid input files. +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=stefanha@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qed +_supported_proto generic +_supported_os Linux + + +size=128M + +echo +echo "== truncated header cluster ==" +_make_test_img $size +truncate -s 512 "$TEST_IMG" +$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir + +echo +echo "== invalid header magic ==" +_make_test_img $size +poke_file "$TEST_IMG" "0" "QEDX" +$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir + +echo +echo "== invalid cluster size ==" +_make_test_img $size +poke_file "$TEST_IMG" "4" "\xff\xff\xff\xff" +$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir + +echo +echo "== invalid table size ==" +_make_test_img $size +poke_file "$TEST_IMG" "8" "\xff\xff\xff\xff" +$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir + +echo +echo "== invalid header size ==" +_make_test_img $size +poke_file "$TEST_IMG" "12" "\xff\xff\xff\xff" +$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir + +echo +echo "== invalid L1 table offset ==" +_make_test_img $size +poke_file "$TEST_IMG" "40" "\xff\xff\xff\xff\xff\xff\xff\xff" +$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir + +echo +echo "== invalid image size ==" +_make_test_img $size +poke_file "$TEST_IMG" "48" "\xff\xff\xff\xff\xff\xff\xff\xff" +$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/116.out b/tests/qemu-iotests/116.out new file mode 100644 index 0000000000..b679ceea63 --- /dev/null +++ b/tests/qemu-iotests/116.out @@ -0,0 +1,37 @@ +QA output created by 116 + +== truncated header cluster == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument +no file open, try 'help open' + +== invalid header magic == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +qemu-io: can't open device TEST_DIR/t.qed: Image not in QED format +no file open, try 'help open' + +== invalid cluster size == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument +no file open, try 'help open' + +== invalid table size == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument +no file open, try 'help open' + +== invalid header size == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument +no file open, try 'help open' + +== invalid L1 table offset == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument +no file open, try 'help open' + +== invalid image size == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument +no file open, try 'help open' +*** done diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index b73c70be95..06e1bb045f 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -200,6 +200,7 @@ _filter_img_info() sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -e "s#$TEST_DIR#TEST_DIR#g" \ -e "s#$IMGFMT#IMGFMT#g" \ + -e 's#nbd://127.0.0.1:10810$#TEST_DIR/t.IMGFMT#g' \ -e "/encrypted: yes/d" \ -e "/cluster_size: [0-9]\\+/d" \ -e "/table_size: [0-9]\\+/d" \ diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index aa093d9d84..22d3514041 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -153,7 +153,7 @@ _make_test_img() # Start an NBD server on the image file, which is what we'll be talking to if [ $IMGPROTO = "nbd" ]; then - eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 $TEST_IMG_FILE &" + eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 -f $IMGFMT $TEST_IMG_FILE &" QEMU_NBD_PID=$! sleep 1 # FIXME: qemu-nbd needs to be listening before we continue fi diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index f8bf354156..4b2b93bc19 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -116,3 +116,4 @@ 111 rw auto quick 113 rw auto quick 114 rw auto quick +116 rw auto quick diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 87002e0e2c..241b5ee9dd 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -288,7 +288,7 @@ def main(supported_fmts=[], supported_oses=['linux']): if supported_fmts and (imgfmt not in supported_fmts): notrun('not suitable for this image format: %s' % imgfmt) - if sys.platform not in supported_oses: + if True not in [sys.platform.startswith(x) for x in supported_oses]: notrun('not suitable for this OS: %s' % sys.platform) # We need to filter out the time taken from the output so that qemu-iotest diff --git a/tests/test-write-threshold.c b/tests/test-write-threshold.c new file mode 100644 index 0000000000..faffa7b855 --- /dev/null +++ b/tests/test-write-threshold.c @@ -0,0 +1,119 @@ +/* + * Test block device write threshold + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include <glib.h> +#include <stdint.h> +#include "block/block_int.h" +#include "block/write-threshold.h" + + +static void test_threshold_not_set_on_init(void) +{ + uint64_t res; + BlockDriverState bs; + memset(&bs, 0, sizeof(bs)); + + g_assert(!bdrv_write_threshold_is_set(&bs)); + + res = bdrv_write_threshold_get(&bs); + g_assert_cmpint(res, ==, 0); +} + +static void test_threshold_set_get(void) +{ + uint64_t threshold = 4 * 1024 * 1024; + uint64_t res; + BlockDriverState bs; + memset(&bs, 0, sizeof(bs)); + + bdrv_write_threshold_set(&bs, threshold); + + g_assert(bdrv_write_threshold_is_set(&bs)); + + res = bdrv_write_threshold_get(&bs); + g_assert_cmpint(res, ==, threshold); +} + +static void test_threshold_multi_set_get(void) +{ + uint64_t threshold1 = 4 * 1024 * 1024; + uint64_t threshold2 = 15 * 1024 * 1024; + uint64_t res; + BlockDriverState bs; + memset(&bs, 0, sizeof(bs)); + + bdrv_write_threshold_set(&bs, threshold1); + bdrv_write_threshold_set(&bs, threshold2); + res = bdrv_write_threshold_get(&bs); + g_assert_cmpint(res, ==, threshold2); +} + +static void test_threshold_not_trigger(void) +{ + uint64_t amount = 0; + uint64_t threshold = 4 * 1024 * 1024; + BlockDriverState bs; + BdrvTrackedRequest req; + + memset(&bs, 0, sizeof(bs)); + memset(&req, 0, sizeof(req)); + req.offset = 1024; + req.bytes = 1024; + + bdrv_write_threshold_set(&bs, threshold); + amount = bdrv_write_threshold_exceeded(&bs, &req); + g_assert_cmpuint(amount, ==, 0); +} + + +static void test_threshold_trigger(void) +{ + uint64_t amount = 0; + uint64_t threshold = 4 * 1024 * 1024; + BlockDriverState bs; + BdrvTrackedRequest req; + + memset(&bs, 0, sizeof(bs)); + memset(&req, 0, sizeof(req)); + req.offset = (4 * 1024 * 1024) - 1024; + req.bytes = 2 * 1024; + + bdrv_write_threshold_set(&bs, threshold); + amount = bdrv_write_threshold_exceeded(&bs, &req); + g_assert_cmpuint(amount, >=, 1024); +} + +typedef struct TestStruct { + const char *name; + void (*func)(void); +} TestStruct; + + +int main(int argc, char **argv) +{ + size_t i; + TestStruct tests[] = { + { "/write-threshold/not-set-on-init", + test_threshold_not_set_on_init }, + { "/write-threshold/set-get", + test_threshold_set_get }, + { "/write-threshold/multi-set-get", + test_threshold_multi_set_get }, + { "/write-threshold/not-trigger", + test_threshold_not_trigger }, + { "/write-threshold/trigger", + test_threshold_trigger }, + { NULL, NULL } + }; + + g_test_init(&argc, &argv, NULL); + for (i = 0; tests[i].name != NULL; i++) { + g_test_add_func(tests[i].name, tests[i].func); + } + return g_test_run(); +} diff --git a/trace-events b/trace-events index 907da7ed8a..57f357f7f7 100644 --- a/trace-events +++ b/trace-events @@ -116,6 +116,7 @@ virtio_blk_req_complete(void *req, int status) "req %p status %d" virtio_blk_rw_complete(void *req, int ret) "req %p ret %d" virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" +virtio_blk_submit_multireq(void *mrb, int start, int num_reqs, uint64_t sector, size_t nsectors, bool is_write) "mrb %p start %d num_reqs %d sector %"PRIu64" nsectors %zu is_write %d" # hw/block/dataplane/virtio-blk.c virtio_blk_data_plane_start(void *s) "dataplane %p" |