aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c65
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/accounting.c7
-rw-r--r--block/block-backend.c5
-rw-r--r--block/dmg.c502
-rw-r--r--block/nbd-client.c4
-rw-r--r--block/nbd-client.h2
-rw-r--r--block/nbd.c11
-rw-r--r--block/qapi.c5
-rw-r--r--block/qcow.c2
-rw-r--r--block/qcow2-cache.c4
-rw-r--r--block/qcow2-refcount.c78
-rw-r--r--block/qcow2.c2
-rw-r--r--block/qed.c5
-rw-r--r--block/qed.h1
-rw-r--r--block/raw-posix.c140
-rw-r--r--block/write-threshold.c125
-rw-r--r--blockdev.c44
-rwxr-xr-xconfigure50
-rw-r--r--hmp.c6
-rw-r--r--hw/block/dataplane/virtio-blk.c8
-rw-r--r--hw/block/virtio-blk.c299
-rw-r--r--hw/ide/atapi.c17
-rw-r--r--hw/ide/core.c1
-rw-r--r--hw/ide/internal.h2
-rw-r--r--hw/ide/pci.c11
-rw-r--r--include/block/accounting.h3
-rw-r--r--include/block/block.h4
-rw-r--r--include/block/block_int.h4
-rw-r--r--include/block/nbd.h2
-rw-r--r--include/block/write-threshold.h64
-rw-r--r--include/hw/virtio/virtio-blk.h18
-rw-r--r--include/qapi/qmp/qerror.h9
-rw-r--r--include/sysemu/block-backend.h1
-rw-r--r--monitor.c16
-rw-r--r--nbd.c42
-rw-r--r--qapi/block-core.json60
-rw-r--r--qemu-img.c2
-rw-r--r--qemu-nbd.c7
-rw-r--r--qmp-commands.hx54
-rw-r--r--qmp.c8
-rw-r--r--tests/Makefile3
-rw-r--r--tests/qemu-iotests/067.out5
-rwxr-xr-xtests/qemu-iotests/0833
-rw-r--r--tests/qemu-iotests/083.out81
-rwxr-xr-xtests/qemu-iotests/10012
-rwxr-xr-xtests/qemu-iotests/1049
-rwxr-xr-xtests/qemu-iotests/11696
-rw-r--r--tests/qemu-iotests/116.out37
-rw-r--r--tests/qemu-iotests/common.filter1
-rw-r--r--tests/qemu-iotests/common.rc2
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/qemu-iotests/iotests.py2
-rw-r--r--tests/test-write-threshold.c119
-rw-r--r--trace-events1
55 files changed, 1602 insertions, 462 deletions
diff --git a/block.c b/block.c
index d45e4ddf31..210fd5f997 100644
--- a/block.c
+++ b/block.c
@@ -2647,7 +2647,7 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
{
int64_t len;
- if (size > INT_MAX) {
+ if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
return -EIO;
}
@@ -2671,7 +2671,7 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
- if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
return -EIO;
}
@@ -2758,7 +2758,7 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
};
- if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
return -EINVAL;
}
@@ -2826,13 +2826,10 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
}
for (;;) {
- nb_sectors = target_sectors - sector_num;
+ nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
if (nb_sectors <= 0) {
return 0;
}
- if (nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
- nb_sectors = INT_MAX / BDRV_SECTOR_SIZE;
- }
ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
if (ret < 0) {
error_report("error getting block status at sector %" PRId64 ": %s",
@@ -3167,7 +3164,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
- if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
return -EINVAL;
}
@@ -3192,10 +3189,7 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
BDRV_REQ_COPY_ON_READ);
}
-/* if no limit is specified in the BlockLimits use a default
- * of 32768 512-byte sectors (16 MiB) per request.
- */
-#define MAX_WRITE_ZEROES_DEFAULT 32768
+#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
@@ -3205,8 +3199,8 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
struct iovec iov = {0};
int ret = 0;
- int max_write_zeroes = bs->bl.max_write_zeroes ?
- bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
+ int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
+ BDRV_REQUEST_MAX_SECTORS);
while (nb_sectors > 0 && !ret) {
int num = nb_sectors;
@@ -3242,7 +3236,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
if (ret == -ENOTSUP) {
/* Fall back to bounce buffer if write zeroes is unsupported */
int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
- MAX_WRITE_ZEROES_DEFAULT);
+ MAX_WRITE_ZEROES_BOUNCE_BUFFER);
num = MIN(num, max_xfer_len);
iov.iov_len = num * BDRV_SECTOR_SIZE;
if (iov.iov_base == NULL) {
@@ -3461,7 +3455,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
- if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
return -EINVAL;
}
@@ -3719,6 +3713,36 @@ int bdrv_set_key(BlockDriverState *bs, const char *key)
return ret;
}
+/*
+ * Provide an encryption key for @bs.
+ * If @key is non-null:
+ * If @bs is not encrypted, fail.
+ * Else if the key is invalid, fail.
+ * Else set @bs's key to @key, replacing the existing key, if any.
+ * If @key is null:
+ * If @bs is encrypted and still lacks a key, fail.
+ * Else do nothing.
+ * On failure, store an error object through @errp if non-null.
+ */
+void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
+{
+ if (key) {
+ if (!bdrv_is_encrypted(bs)) {
+ error_setg(errp, "Device '%s' is not encrypted",
+ bdrv_get_device_name(bs));
+ } else if (bdrv_set_key(bs, key) < 0) {
+ error_set(errp, QERR_INVALID_PASSWORD);
+ }
+ } else {
+ if (bdrv_key_required(bs)) {
+ error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
+ "'%s' (%s) is encrypted",
+ bdrv_get_device_name(bs),
+ bdrv_get_encrypted_filename(bs));
+ }
+ }
+}
+
const char *bdrv_get_format_name(BlockDriverState *bs)
{
return bs->drv ? bs->drv->format_name : NULL;
@@ -4562,6 +4586,8 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
}
}
+ block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
+
return outidx + 1;
}
@@ -5097,11 +5123,6 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque)
rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
}
-/* if no limit is specified in the BlockLimits use a default
- * of 32768 512-byte sectors (16 MiB) per request.
- */
-#define MAX_DISCARD_DEFAULT 32768
-
int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
@@ -5126,7 +5147,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
return 0;
}
- max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
+ max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
while (nb_sectors > 0) {
int ret;
int num = nb_sectors;
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 04b0e43eb1..db2933e469 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -20,6 +20,7 @@ block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
block-obj-y += accounting.o
+block-obj-y += write-threshold.o
common-obj-y += stream.o
common-obj-y += commit.o
@@ -36,5 +37,6 @@ gluster.o-libs := $(GLUSTERFS_LIBS)
ssh.o-cflags := $(LIBSSH2_CFLAGS)
ssh.o-libs := $(LIBSSH2_LIBS)
archipelago.o-libs := $(ARCHIPELAGO_LIBS)
+dmg.o-libs := $(BZIP2_LIBS)
qcow.o-libs := -lz
linux-aio.o-libs := -laio
diff --git a/block/accounting.c b/block/accounting.c
index 18102f015d..01d594ffdc 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -54,3 +54,10 @@ void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
stats->wr_highest_sector = sector_num + nb_sectors - 1;
}
}
+
+void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
+ int num_requests)
+{
+ assert(type < BLOCK_MAX_IOTYPE);
+ stats->merged[type] += num_requests;
+}
diff --git a/block/block-backend.c b/block/block-backend.c
index d00c129f15..c28e2402c7 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -580,6 +580,11 @@ int blk_get_flags(BlockBackend *blk)
return bdrv_get_flags(blk->bs);
}
+int blk_get_max_transfer_length(BlockBackend *blk)
+{
+ return blk->bs->bl.max_transfer_length;
+}
+
void blk_set_guest_block_size(BlockBackend *blk, int align)
{
bdrv_set_guest_block_size(blk->bs, align);
diff --git a/block/dmg.c b/block/dmg.c
index e455886d2b..825c49d59a 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -26,6 +26,10 @@
#include "qemu/bswap.h"
#include "qemu/module.h"
#include <zlib.h>
+#ifdef CONFIG_BZIP2
+#include <bzlib.h>
+#endif
+#include <glib.h>
enum {
/* Limit chunk sizes to prevent unreasonable amounts of memory being used
@@ -55,6 +59,9 @@ typedef struct BDRVDMGState {
uint8_t *compressed_chunk;
uint8_t *uncompressed_chunk;
z_stream zstream;
+#ifdef CONFIG_BZIP2
+ bz_stream bzstream;
+#endif
} BDRVDMGState;
static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -100,6 +107,16 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
return 0;
}
+static inline uint64_t buff_read_uint64(const uint8_t *buffer, int64_t offset)
+{
+ return be64_to_cpu(*(uint64_t *)&buffer[offset]);
+}
+
+static inline uint32_t buff_read_uint32(const uint8_t *buffer, int64_t offset)
+{
+ return be32_to_cpu(*(uint32_t *)&buffer[offset]);
+}
+
/* Increase max chunk sizes, if necessary. This function is used to calculate
* the buffer sizes needed for compressed/uncompressed chunk I/O.
*/
@@ -112,6 +129,7 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
switch (s->types[chunk]) {
case 0x80000005: /* zlib compressed */
+ case 0x80000006: /* bzip2 compressed */
compressed_size = s->lengths[chunk];
uncompressed_sectors = s->sectorcounts[chunk];
break;
@@ -119,7 +137,9 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
break;
case 2: /* zero */
- uncompressed_sectors = s->sectorcounts[chunk];
+ /* as the all-zeroes block may be large, it is treated specially: the
+ * sector is not copied from a large buffer, a simple memset is used
+ * instead. Therefore uncompressed_sectors does not need to be set. */
break;
}
@@ -131,163 +151,372 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
}
}
+static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp)
+{
+ int64_t length;
+ int64_t offset = 0;
+ uint8_t buffer[515];
+ int i, ret;
+
+ /* bdrv_getlength returns a multiple of block size (512), rounded up. Since
+ * dmg images can have odd sizes, try to look for the "koly" magic which
+ * marks the begin of the UDIF trailer (512 bytes). This magic can be found
+ * in the last 511 bytes of the second-last sector or the first 4 bytes of
+ * the last sector (search space: 515 bytes) */
+ length = bdrv_getlength(file_bs);
+ if (length < 0) {
+ error_setg_errno(errp, -length,
+ "Failed to get file size while reading UDIF trailer");
+ return length;
+ } else if (length < 512) {
+ error_setg(errp, "dmg file must be at least 512 bytes long");
+ return -EINVAL;
+ }
+ if (length > 511 + 512) {
+ offset = length - 511 - 512;
+ }
+ length = length < 515 ? length : 515;
+ ret = bdrv_pread(file_bs, offset, buffer, length);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed while reading UDIF trailer");
+ return ret;
+ }
+ for (i = 0; i < length - 3; i++) {
+ if (buffer[i] == 'k' && buffer[i+1] == 'o' &&
+ buffer[i+2] == 'l' && buffer[i+3] == 'y') {
+ return offset + i;
+ }
+ }
+ error_setg(errp, "Could not locate UDIF trailer in dmg file");
+ return -EINVAL;
+}
+
+/* used when building the sector table */
+typedef struct DmgHeaderState {
+ /* used internally by dmg_read_mish_block to remember offsets of blocks
+ * across calls */
+ uint64_t data_fork_offset;
+ /* exported for dmg_open */
+ uint32_t max_compressed_size;
+ uint32_t max_sectors_per_chunk;
+} DmgHeaderState;
+
+static bool dmg_is_known_block_type(uint32_t entry_type)
+{
+ switch (entry_type) {
+ case 0x00000001: /* uncompressed */
+ case 0x00000002: /* zeroes */
+ case 0x80000005: /* zlib */
+#ifdef CONFIG_BZIP2
+ case 0x80000006: /* bzip2 */
+#endif
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
+ uint8_t *buffer, uint32_t count)
+{
+ uint32_t type, i;
+ int ret;
+ size_t new_size;
+ uint32_t chunk_count;
+ int64_t offset = 0;
+ uint64_t data_offset;
+ uint64_t in_offset = ds->data_fork_offset;
+ uint64_t out_offset;
+
+ type = buff_read_uint32(buffer, offset);
+ /* skip data that is not a valid MISH block (invalid magic or too small) */
+ if (type != 0x6d697368 || count < 244) {
+ /* assume success for now */
+ return 0;
+ }
+
+ /* chunk offsets are relative to this sector number */
+ out_offset = buff_read_uint64(buffer, offset + 8);
+
+ /* location in data fork for (compressed) blob (in bytes) */
+ data_offset = buff_read_uint64(buffer, offset + 0x18);
+ in_offset += data_offset;
+
+ /* move to begin of chunk entries */
+ offset += 204;
+
+ chunk_count = (count - 204) / 40;
+ new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
+ s->types = g_realloc(s->types, new_size / 2);
+ s->offsets = g_realloc(s->offsets, new_size);
+ s->lengths = g_realloc(s->lengths, new_size);
+ s->sectors = g_realloc(s->sectors, new_size);
+ s->sectorcounts = g_realloc(s->sectorcounts, new_size);
+
+ for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
+ s->types[i] = buff_read_uint32(buffer, offset);
+ if (!dmg_is_known_block_type(s->types[i])) {
+ chunk_count--;
+ i--;
+ offset += 40;
+ continue;
+ }
+
+ /* sector number */
+ s->sectors[i] = buff_read_uint64(buffer, offset + 8);
+ s->sectors[i] += out_offset;
+
+ /* sector count */
+ s->sectorcounts[i] = buff_read_uint64(buffer, offset + 0x10);
+
+ /* all-zeroes sector (type 2) does not need to be "uncompressed" and can
+ * therefore be unbounded. */
+ if (s->types[i] != 2 && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
+ error_report("sector count %" PRIu64 " for chunk %" PRIu32
+ " is larger than max (%u)",
+ s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* offset in (compressed) data fork */
+ s->offsets[i] = buff_read_uint64(buffer, offset + 0x18);
+ s->offsets[i] += in_offset;
+
+ /* length in (compressed) data fork */
+ s->lengths[i] = buff_read_uint64(buffer, offset + 0x20);
+
+ if (s->lengths[i] > DMG_LENGTHS_MAX) {
+ error_report("length %" PRIu64 " for chunk %" PRIu32
+ " is larger than max (%u)",
+ s->lengths[i], i, DMG_LENGTHS_MAX);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ update_max_chunk_size(s, i, &ds->max_compressed_size,
+ &ds->max_sectors_per_chunk);
+ offset += 40;
+ }
+ s->n_chunks += chunk_count;
+ return 0;
+
+fail:
+ return ret;
+}
+
+static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
+ uint64_t info_begin, uint64_t info_length)
+{
+ BDRVDMGState *s = bs->opaque;
+ int ret;
+ uint32_t count, rsrc_data_offset;
+ uint8_t *buffer = NULL;
+ uint64_t info_end;
+ uint64_t offset;
+
+ /* read offset from begin of resource fork (info_begin) to resource data */
+ ret = read_uint32(bs, info_begin, &rsrc_data_offset);
+ if (ret < 0) {
+ goto fail;
+ } else if (rsrc_data_offset > info_length) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* read length of resource data */
+ ret = read_uint32(bs, info_begin + 8, &count);
+ if (ret < 0) {
+ goto fail;
+ } else if (count == 0 || rsrc_data_offset + count > info_length) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* begin of resource data (consisting of one or more resources) */
+ offset = info_begin + rsrc_data_offset;
+
+ /* end of resource data (there is possibly a following resource map
+ * which will be ignored). */
+ info_end = offset + count;
+
+ /* read offsets (mish blocks) from one or more resources in resource data */
+ while (offset < info_end) {
+ /* size of following resource */
+ ret = read_uint32(bs, offset, &count);
+ if (ret < 0) {
+ goto fail;
+ } else if (count == 0 || count > info_end - offset) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ offset += 4;
+
+ buffer = g_realloc(buffer, count);
+ ret = bdrv_pread(bs->file, offset, buffer, count);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = dmg_read_mish_block(s, ds, buffer, count);
+ if (ret < 0) {
+ goto fail;
+ }
+ /* advance offset by size of resource */
+ offset += count;
+ }
+ ret = 0;
+
+fail:
+ g_free(buffer);
+ return ret;
+}
+
+static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
+ uint64_t info_begin, uint64_t info_length)
+{
+ BDRVDMGState *s = bs->opaque;
+ int ret;
+ uint8_t *buffer = NULL;
+ char *data_begin, *data_end;
+
+ /* Have at least some length to avoid NULL for g_malloc. Attempt to set a
+ * safe upper cap on the data length. A test sample had a XML length of
+ * about 1 MiB. */
+ if (info_length == 0 || info_length > 16 * 1024 * 1024) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ buffer = g_malloc(info_length + 1);
+ buffer[info_length] = '\0';
+ ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
+ if (ret != info_length) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64
+ * decode. The actual data element has 431 (0x1af) bytes which includes tabs
+ * and line feeds. */
+ data_end = (char *)buffer;
+ while ((data_begin = strstr(data_end, "<data>")) != NULL) {
+ guchar *mish;
+ gsize out_len = 0;
+
+ data_begin += 6;
+ data_end = strstr(data_begin, "</data>");
+ /* malformed XML? */
+ if (data_end == NULL) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ *data_end++ = '\0';
+ mish = g_base64_decode(data_begin, &out_len);
+ ret = dmg_read_mish_block(s, ds, mish, (uint32_t)out_len);
+ g_free(mish);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ ret = 0;
+
+fail:
+ g_free(buffer);
+ return ret;
+}
+
static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVDMGState *s = bs->opaque;
- uint64_t info_begin, info_end, last_in_offset, last_out_offset;
- uint32_t count, tmp;
- uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i;
+ DmgHeaderState ds;
+ uint64_t rsrc_fork_offset, rsrc_fork_length;
+ uint64_t plist_xml_offset, plist_xml_length;
int64_t offset;
int ret;
bs->read_only = 1;
s->n_chunks = 0;
s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
+ /* used by dmg_read_mish_block to keep track of the current I/O position */
+ ds.data_fork_offset = 0;
+ ds.max_compressed_size = 1;
+ ds.max_sectors_per_chunk = 1;
- /* read offset of info blocks */
- offset = bdrv_getlength(bs->file);
+ /* locate the UDIF trailer */
+ offset = dmg_find_koly_offset(bs->file, errp);
if (offset < 0) {
ret = offset;
goto fail;
}
- offset -= 0x1d8;
- ret = read_uint64(bs, offset, &info_begin);
+ /* offset of data fork (DataForkOffset) */
+ ret = read_uint64(bs, offset + 0x18, &ds.data_fork_offset);
if (ret < 0) {
goto fail;
- } else if (info_begin == 0) {
+ } else if (ds.data_fork_offset > offset) {
ret = -EINVAL;
goto fail;
}
- ret = read_uint32(bs, info_begin, &tmp);
+ /* offset of resource fork (RsrcForkOffset) */
+ ret = read_uint64(bs, offset + 0x28, &rsrc_fork_offset);
if (ret < 0) {
goto fail;
- } else if (tmp != 0x100) {
+ }
+ ret = read_uint64(bs, offset + 0x30, &rsrc_fork_length);
+ if (ret < 0) {
+ goto fail;
+ }
+ if (rsrc_fork_offset >= offset ||
+ rsrc_fork_length > offset - rsrc_fork_offset) {
ret = -EINVAL;
goto fail;
}
-
- ret = read_uint32(bs, info_begin + 4, &count);
+ /* offset of property list (XMLOffset) */
+ ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
if (ret < 0) {
goto fail;
- } else if (count == 0) {
+ }
+ ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
+ if (ret < 0) {
+ goto fail;
+ }
+ if (plist_xml_offset >= offset ||
+ plist_xml_length > offset - plist_xml_offset) {
ret = -EINVAL;
goto fail;
}
- info_end = info_begin + count;
-
- offset = info_begin + 0x100;
-
- /* read offsets */
- last_in_offset = last_out_offset = 0;
- while (offset < info_end) {
- uint32_t type;
-
- ret = read_uint32(bs, offset, &count);
+ ret = read_uint64(bs, offset + 0x1ec, (uint64_t *)&bs->total_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+ if (bs->total_sectors < 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (rsrc_fork_length != 0) {
+ ret = dmg_read_resource_fork(bs, &ds,
+ rsrc_fork_offset, rsrc_fork_length);
if (ret < 0) {
goto fail;
- } else if (count == 0) {
- ret = -EINVAL;
- goto fail;
}
- offset += 4;
-
- ret = read_uint32(bs, offset, &type);
+ } else if (plist_xml_length != 0) {
+ ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
if (ret < 0) {
goto fail;
}
-
- if (type == 0x6d697368 && count >= 244) {
- size_t new_size;
- uint32_t chunk_count;
-
- offset += 4;
- offset += 200;
-
- chunk_count = (count - 204) / 40;
- new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
- s->types = g_realloc(s->types, new_size / 2);
- s->offsets = g_realloc(s->offsets, new_size);
- s->lengths = g_realloc(s->lengths, new_size);
- s->sectors = g_realloc(s->sectors, new_size);
- s->sectorcounts = g_realloc(s->sectorcounts, new_size);
-
- for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
- ret = read_uint32(bs, offset, &s->types[i]);
- if (ret < 0) {
- goto fail;
- }
- offset += 4;
- if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
- s->types[i] != 2) {
- if (s->types[i] == 0xffffffff && i > 0) {
- last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
- last_out_offset = s->sectors[i - 1] +
- s->sectorcounts[i - 1];
- }
- chunk_count--;
- i--;
- offset += 36;
- continue;
- }
- offset += 4;
-
- ret = read_uint64(bs, offset, &s->sectors[i]);
- if (ret < 0) {
- goto fail;
- }
- s->sectors[i] += last_out_offset;
- offset += 8;
-
- ret = read_uint64(bs, offset, &s->sectorcounts[i]);
- if (ret < 0) {
- goto fail;
- }
- offset += 8;
-
- if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
- error_report("sector count %" PRIu64 " for chunk %" PRIu32
- " is larger than max (%u)",
- s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
- ret = -EINVAL;
- goto fail;
- }
-
- ret = read_uint64(bs, offset, &s->offsets[i]);
- if (ret < 0) {
- goto fail;
- }
- s->offsets[i] += last_in_offset;
- offset += 8;
-
- ret = read_uint64(bs, offset, &s->lengths[i]);
- if (ret < 0) {
- goto fail;
- }
- offset += 8;
-
- if (s->lengths[i] > DMG_LENGTHS_MAX) {
- error_report("length %" PRIu64 " for chunk %" PRIu32
- " is larger than max (%u)",
- s->lengths[i], i, DMG_LENGTHS_MAX);
- ret = -EINVAL;
- goto fail;
- }
-
- update_max_chunk_size(s, i, &max_compressed_size,
- &max_sectors_per_chunk);
- }
- s->n_chunks += chunk_count;
- }
+ } else {
+ ret = -EINVAL;
+ goto fail;
}
/* initialize zlib engine */
s->compressed_chunk = qemu_try_blockalign(bs->file,
- max_compressed_size + 1);
+ ds.max_compressed_size + 1);
s->uncompressed_chunk = qemu_try_blockalign(bs->file,
- 512 * max_sectors_per_chunk);
+ 512 * ds.max_sectors_per_chunk);
if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
ret = -ENOMEM;
goto fail;
@@ -349,13 +578,16 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
int ret;
uint32_t chunk = search_chunk(s, sector_num);
+#ifdef CONFIG_BZIP2
+ uint64_t total_out;
+#endif
if (chunk >= s->n_chunks) {
return -1;
}
s->current_chunk = s->n_chunks;
- switch (s->types[chunk]) {
+ switch (s->types[chunk]) { /* block entry type */
case 0x80000005: { /* zlib compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
@@ -379,6 +611,34 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
return -1;
}
break; }
+#ifdef CONFIG_BZIP2
+ case 0x80000006: /* bzip2 compressed */
+ /* we need to buffer, because only the chunk as whole can be
+ * inflated. */
+ ret = bdrv_pread(bs->file, s->offsets[chunk],
+ s->compressed_chunk, s->lengths[chunk]);
+ if (ret != s->lengths[chunk]) {
+ return -1;
+ }
+
+ ret = BZ2_bzDecompressInit(&s->bzstream, 0, 0);
+ if (ret != BZ_OK) {
+ return -1;
+ }
+ s->bzstream.next_in = (char *)s->compressed_chunk;
+ s->bzstream.avail_in = (unsigned int) s->lengths[chunk];
+ s->bzstream.next_out = (char *)s->uncompressed_chunk;
+ s->bzstream.avail_out = (unsigned int) 512 * s->sectorcounts[chunk];
+ ret = BZ2_bzDecompress(&s->bzstream);
+ total_out = ((uint64_t)s->bzstream.total_out_hi32 << 32) +
+ s->bzstream.total_out_lo32;
+ BZ2_bzDecompressEnd(&s->bzstream);
+ if (ret != BZ_STREAM_END ||
+ total_out != 512 * s->sectorcounts[chunk]) {
+ return -1;
+ }
+ break;
+#endif /* CONFIG_BZIP2 */
case 1: /* copy */
ret = bdrv_pread(bs->file, s->offsets[chunk],
s->uncompressed_chunk, s->lengths[chunk]);
@@ -387,7 +647,8 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
}
break;
case 2: /* zero */
- memset(s->uncompressed_chunk, 0, 512 * s->sectorcounts[chunk]);
+ /* see dmg_read, it is treated specially. No buffer needs to be
+ * pre-filled, the zeroes can be set directly. */
break;
}
s->current_chunk = chunk;
@@ -406,6 +667,13 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num,
if (dmg_read_chunk(bs, sector_num + i) != 0) {
return -1;
}
+ /* Special case: current chunk is all zeroes. Do not perform a memcpy as
+ * s->uncompressed_chunk may be too small to cover the large all-zeroes
+ * section. dmg_read_chunk is called to find s->current_chunk */
+ if (s->types[s->current_chunk] == 2) { /* all zeroes block entry */
+ memset(buf + i * 512, 0, 512);
+ continue;
+ }
sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
memcpy(buf + i * 512,
s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 6e1c97cad0..28bfb62bed 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -373,7 +373,7 @@ void nbd_client_session_close(NbdClientSession *client)
}
int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
- int sock, const char *export)
+ int sock, const char *export, Error **errp)
{
int ret;
@@ -382,7 +382,7 @@ int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
qemu_set_block(sock);
ret = nbd_receive_negotiate(sock, export,
&client->nbdflags, &client->size,
- &client->blocksize);
+ &client->blocksize, errp);
if (ret < 0) {
logout("Failed to negotiate with the NBD server\n");
closesocket(sock);
diff --git a/block/nbd-client.h b/block/nbd-client.h
index cd478f3a98..cfeecc2775 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -36,7 +36,7 @@ typedef struct NbdClientSession {
} NbdClientSession;
int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
- int sock, const char *export_name);
+ int sock, const char *export_name, Error **errp);
void nbd_client_session_close(NbdClientSession *client);
int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
diff --git a/block/nbd.c b/block/nbd.c
index 04cc845076..b05d1d0407 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -271,7 +271,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
}
/* NBD handshake */
- result = nbd_client_session_init(&s->client, bs, sock, export);
+ result = nbd_client_session_init(&s->client, bs, sock, export, errp);
g_free(export);
return result;
}
@@ -301,6 +301,12 @@ static int nbd_co_flush(BlockDriverState *bs)
return nbd_client_session_co_flush(&s->client);
}
+static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
+ bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
+}
+
static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
@@ -396,6 +402,7 @@ static BlockDriver bdrv_nbd = {
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
+ .bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
.bdrv_attach_aio_context = nbd_attach_aio_context,
@@ -413,6 +420,7 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
+ .bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
.bdrv_attach_aio_context = nbd_attach_aio_context,
@@ -430,6 +438,7 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
+ .bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
.bdrv_attach_aio_context = nbd_attach_aio_context,
diff --git a/block/qapi.c b/block/qapi.c
index 75c388e90b..1808e67336 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -24,6 +24,7 @@
#include "block/qapi.h"
#include "block/block_int.h"
+#include "block/write-threshold.h"
#include "qmp-commands.h"
#include "qapi-visit.h"
#include "qapi/qmp-output-visitor.h"
@@ -89,6 +90,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
info->iops_size = cfg.op_size;
}
+ info->write_threshold = bdrv_write_threshold_get(bs);
+
return info;
}
@@ -335,6 +338,8 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
+ s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ];
+ s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE];
s->stats->wr_highest_offset =
bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
diff --git a/block/qcow.c b/block/qcow.c
index ccbe9e0d2c..055896910e 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -215,7 +215,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
/* read the backing file name */
if (header.backing_file_offset != 0) {
len = header.backing_file_size;
- if (len > 1023 || len > sizeof(bs->backing_file)) {
+ if (len > 1023 || len >= sizeof(bs->backing_file)) {
error_setg(errp, "Backing file name too long");
ret = -EINVAL;
goto fail;
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 904f6b1f44..b1155492a3 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -253,7 +253,9 @@ static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
/* Give newer hits priority */
/* TODO Check how to optimize the replacement strategy */
- c->entries[i].cache_hits /= 2;
+ if (c->entries[i].cache_hits > 1) {
+ c->entries[i].cache_hits /= 2;
+ }
}
if (min_index == -1) {
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 9afdb40b40..9b80ca79ea 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -759,54 +759,54 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
{
BDRVQcowState *s = bs->opaque;
- int64_t offset, cluster_offset;
- int free_in_cluster;
+ int64_t offset;
+ size_t free_in_cluster;
+ int ret;
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
assert(size > 0 && size <= s->cluster_size);
- if (s->free_byte_offset == 0) {
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- return offset;
+ assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset));
+
+ offset = s->free_byte_offset;
+
+ if (offset) {
+ int refcount = qcow2_get_refcount(bs, offset >> s->cluster_bits);
+ if (refcount < 0) {
+ return refcount;
}
- s->free_byte_offset = offset;
- }
- redo:
- free_in_cluster = s->cluster_size -
- offset_into_cluster(s, s->free_byte_offset);
- if (size <= free_in_cluster) {
- /* enough space in current cluster */
- offset = s->free_byte_offset;
- s->free_byte_offset += size;
- free_in_cluster -= size;
- if (free_in_cluster == 0)
- s->free_byte_offset = 0;
- if (offset_into_cluster(s, offset) != 0)
- qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- } else {
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- return offset;
+
+ if (refcount == 0xffff) {
+ offset = 0;
}
- cluster_offset = start_of_cluster(s, s->free_byte_offset);
- if ((cluster_offset + s->cluster_size) == offset) {
- /* we are lucky: contiguous data */
- offset = s->free_byte_offset;
- qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- s->free_byte_offset += size;
- } else {
- s->free_byte_offset = offset;
- goto redo;
+ }
+
+ free_in_cluster = s->cluster_size - offset_into_cluster(s, offset);
+ if (!offset || free_in_cluster < size) {
+ int64_t new_cluster = alloc_clusters_noref(bs, s->cluster_size);
+ if (new_cluster < 0) {
+ return new_cluster;
+ }
+
+ if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
+ offset = new_cluster;
}
}
- /* The cluster refcount was incremented, either by qcow2_alloc_clusters()
- * or explicitly by qcow2_update_cluster_refcount(). Refcount blocks must
- * be flushed before the caller's L2 table updates.
- */
+ assert(offset);
+ ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* The cluster refcount was incremented; refcount blocks must be flushed
+ * before the caller's L2 table updates. */
qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
+
+ s->free_byte_offset = offset + size;
+ if (!offset_into_cluster(s, s->free_byte_offset)) {
+ s->free_byte_offset = 0;
+ }
+
return offset;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index dbaf016bc7..7e614d76a4 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -869,7 +869,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (header.backing_file_offset != 0) {
len = header.backing_file_size;
if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
- len > sizeof(bs->backing_file)) {
+ len >= sizeof(bs->backing_file)) {
error_setg(errp, "Backing file name too long");
ret = -EINVAL;
goto fail;
diff --git a/block/qed.c b/block/qed.c
index 80f18d82e2..892b13c806 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -440,6 +440,11 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
s->l2_mask = s->table_nelems - 1;
s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
+ /* Header size calculation must not overflow uint32_t */
+ if (s->header.header_size > UINT32_MAX / s->header.cluster_size) {
+ return -EINVAL;
+ }
+
if ((s->header.features & QED_F_BACKING_FILE)) {
if ((uint64_t)s->header.backing_filename_offset +
s->header.backing_filename_size >
diff --git a/block/qed.h b/block/qed.h
index d3934a05cd..615e676fc8 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -133,7 +133,6 @@ typedef struct QEDAIOCB {
int bh_ret; /* final return status for completion bh */
QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
int flags; /* QED_AIOCB_* bits ORed together */
- bool *finished; /* signal for cancel completion */
uint64_t end_pos; /* request end on block device, in bytes */
/* User scatter-gather list */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index e51293a455..e474c17974 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -60,7 +60,7 @@
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
#endif
#endif
-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_FALLOCATE_ZERO_RANGE)
#include <linux/falloc.h>
#endif
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -147,6 +147,7 @@ typedef struct BDRVRawState {
bool has_discard:1;
bool has_write_zeroes:1;
bool discard_zeroes:1;
+ bool has_fallocate;
bool needs_alignment;
} BDRVRawState;
@@ -452,6 +453,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
if (S_ISREG(st.st_mode)) {
s->discard_zeroes = true;
+ s->has_fallocate = true;
}
if (S_ISBLK(st.st_mode)) {
#ifdef BLKDISCARDZEROES
@@ -893,40 +895,108 @@ static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
}
#endif
-static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
+static int translate_err(int err)
{
- int ret = -EOPNOTSUPP;
+ if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
+ err == -ENOTTY) {
+ err = -ENOTSUP;
+ }
+ return err;
+}
+
+#ifdef CONFIG_FALLOCATE
+static int do_fallocate(int fd, int mode, off_t offset, off_t len)
+{
+ do {
+ if (fallocate(fd, mode, offset, len) == 0) {
+ return 0;
+ }
+ } while (errno == EINTR);
+ return translate_err(-errno);
+}
+#endif
+
+static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb)
+{
+ int ret = -ENOTSUP;
BDRVRawState *s = aiocb->bs->opaque;
- if (s->has_write_zeroes == 0) {
+ if (!s->has_write_zeroes) {
return -ENOTSUP;
}
- if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
#ifdef BLKZEROOUT
- do {
- uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
- if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
- return 0;
- }
- } while (errno == EINTR);
+ do {
+ uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
+ if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
+ return 0;
+ }
+ } while (errno == EINTR);
- ret = -errno;
+ ret = translate_err(-errno);
#endif
- } else {
+
+ if (ret == -ENOTSUP) {
+ s->has_write_zeroes = false;
+ }
+ return ret;
+}
+
+static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
+{
+ BDRVRawState *s = aiocb->bs->opaque;
+
+ if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
+ return handle_aiocb_write_zeroes_block(aiocb);
+ }
+
#ifdef CONFIG_XFS
- if (s->is_xfs) {
- return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
+ if (s->is_xfs) {
+ return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
+ }
+#endif
+
+#ifdef CONFIG_FALLOCATE_ZERO_RANGE
+ if (s->has_write_zeroes) {
+ int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
+ aiocb->aio_offset, aiocb->aio_nbytes);
+ if (ret == 0 || ret != -ENOTSUP) {
+ return ret;
}
+ s->has_write_zeroes = false;
+ }
#endif
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ if (s->has_discard && s->has_fallocate) {
+ int ret = do_fallocate(s->fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ aiocb->aio_offset, aiocb->aio_nbytes);
+ if (ret == 0) {
+ ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
+ if (ret == 0 || ret != -ENOTSUP) {
+ return ret;
+ }
+ s->has_fallocate = false;
+ } else if (ret != -ENOTSUP) {
+ return ret;
+ } else {
+ s->has_discard = false;
+ }
}
+#endif
- if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
- ret == -ENOTTY) {
- s->has_write_zeroes = false;
- ret = -ENOTSUP;
+#ifdef CONFIG_FALLOCATE
+ if (s->has_fallocate && aiocb->aio_offset >= bdrv_getlength(aiocb->bs)) {
+ int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
+ if (ret == 0 || ret != -ENOTSUP) {
+ return ret;
+ }
+ s->has_fallocate = false;
}
- return ret;
+#endif
+
+ return -ENOTSUP;
}
static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
@@ -957,21 +1027,14 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
#endif
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
- do {
- if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
- return 0;
- }
- } while (errno == EINTR);
-
- ret = -errno;
+ ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ aiocb->aio_offset, aiocb->aio_nbytes);
#endif
}
- if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
- ret == -ENOTTY) {
+ ret = translate_err(ret);
+ if (ret == -ENOTSUP) {
s->has_discard = false;
- ret = -ENOTSUP;
}
return ret;
}
@@ -1312,7 +1375,20 @@ again:
if (size == 0)
#endif
#if defined(__APPLE__) && defined(__MACH__)
- size = LLONG_MAX;
+ {
+ uint64_t sectors = 0;
+ uint32_t sector_size = 0;
+
+ if (ioctl(fd, DKIOCGETBLOCKCOUNT, &sectors) == 0
+ && ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) == 0) {
+ size = sectors * sector_size;
+ } else {
+ size = lseek(fd, 0LL, SEEK_END);
+ if (size < 0) {
+ return -errno;
+ }
+ }
+ }
#else
size = lseek(fd, 0LL, SEEK_END);
if (size < 0) {
diff --git a/block/write-threshold.c b/block/write-threshold.c
new file mode 100644
index 0000000000..c2cd517716
--- /dev/null
+++ b/block/write-threshold.c
@@ -0,0 +1,125 @@
+/*
+ * QEMU System Emulator block write threshold notification
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ * Francesco Romani <fromani@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "block/block_int.h"
+#include "block/coroutine.h"
+#include "block/write-threshold.h"
+#include "qemu/notify.h"
+#include "qapi-event.h"
+#include "qmp-commands.h"
+
+
+uint64_t bdrv_write_threshold_get(const BlockDriverState *bs)
+{
+ return bs->write_threshold_offset;
+}
+
+bool bdrv_write_threshold_is_set(const BlockDriverState *bs)
+{
+ return bs->write_threshold_offset > 0;
+}
+
+static void write_threshold_disable(BlockDriverState *bs)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ notifier_with_return_remove(&bs->write_threshold_notifier);
+ bs->write_threshold_offset = 0;
+ }
+}
+
+uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
+ const BdrvTrackedRequest *req)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ if (req->offset > bs->write_threshold_offset) {
+ return (req->offset - bs->write_threshold_offset) + req->bytes;
+ }
+ if ((req->offset + req->bytes) > bs->write_threshold_offset) {
+ return (req->offset + req->bytes) - bs->write_threshold_offset;
+ }
+ }
+ return 0;
+}
+
+static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
+ void *opaque)
+{
+ BdrvTrackedRequest *req = opaque;
+ BlockDriverState *bs = req->bs;
+ uint64_t amount = 0;
+
+ amount = bdrv_write_threshold_exceeded(bs, req);
+ if (amount > 0) {
+ qapi_event_send_block_write_threshold(
+ bs->node_name,
+ amount,
+ bs->write_threshold_offset,
+ &error_abort);
+
+ /* autodisable to avoid flooding the monitor */
+ write_threshold_disable(bs);
+ }
+
+ return 0; /* should always let other notifiers run */
+}
+
+static void write_threshold_register_notifier(BlockDriverState *bs)
+{
+ bs->write_threshold_notifier.notify = before_write_notify;
+ notifier_with_return_list_add(&bs->before_write_notifiers,
+ &bs->write_threshold_notifier);
+}
+
+static void write_threshold_update(BlockDriverState *bs,
+ int64_t threshold_bytes)
+{
+ bs->write_threshold_offset = threshold_bytes;
+}
+
+void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ if (threshold_bytes > 0) {
+ write_threshold_update(bs, threshold_bytes);
+ } else {
+ write_threshold_disable(bs);
+ }
+ } else {
+ if (threshold_bytes > 0) {
+ /* avoid multiple registration */
+ write_threshold_register_notifier(bs);
+ write_threshold_update(bs, threshold_bytes);
+ }
+ /* discard bogus disable request */
+ }
+}
+
+void qmp_block_set_write_threshold(const char *node_name,
+ uint64_t threshold_bytes,
+ Error **errp)
+{
+ BlockDriverState *bs;
+ AioContext *aio_context;
+
+ bs = bdrv_find_node(node_name);
+ if (!bs) {
+ error_set(errp, QERR_DEVICE_NOT_FOUND, node_name);
+ return;
+ }
+
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_write_threshold_set(bs, threshold_bytes);
+
+ aio_context_release(aio_context);
+}
diff --git a/blockdev.c b/blockdev.c
index d59efd3f15..7d34960b96 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1793,7 +1793,6 @@ void qmp_block_passwd(bool has_device, const char *device,
Error *local_err = NULL;
BlockDriverState *bs;
AioContext *aio_context;
- int err;
bs = bdrv_lookup_bs(has_device ? device : NULL,
has_node_name ? node_name : NULL,
@@ -1806,16 +1805,8 @@ void qmp_block_passwd(bool has_device, const char *device,
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- err = bdrv_set_key(bs, password);
- if (err == -EINVAL) {
- error_set(errp, QERR_DEVICE_NOT_ENCRYPTED, bdrv_get_device_name(bs));
- goto out;
- } else if (err < 0) {
- error_set(errp, QERR_INVALID_PASSWORD);
- goto out;
- }
+ bdrv_add_key(bs, password, errp);
-out:
aio_context_release(aio_context);
}
@@ -1833,18 +1824,7 @@ static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
return;
}
- if (bdrv_key_required(bs)) {
- if (password) {
- if (bdrv_set_key(bs, password) < 0) {
- error_set(errp, QERR_INVALID_PASSWORD);
- }
- } else {
- error_set(errp, QERR_DEVICE_ENCRYPTED, bdrv_get_device_name(bs),
- bdrv_get_encrypted_filename(bs));
- }
- } else if (password) {
- error_set(errp, QERR_DEVICE_NOT_ENCRYPTED, bdrv_get_device_name(bs));
- }
+ bdrv_add_key(bs, password, errp);
}
void qmp_change_blockdev(const char *device, const char *filename,
@@ -2653,7 +2633,8 @@ out:
}
/* Get the block job for a given device name and acquire its AioContext */
-static BlockJob *find_block_job(const char *device, AioContext **aio_context)
+static BlockJob *find_block_job(const char *device, AioContext **aio_context,
+ Error **errp)
{
BlockDriverState *bs;
@@ -2673,6 +2654,8 @@ static BlockJob *find_block_job(const char *device, AioContext **aio_context)
return bs->job;
notfound:
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
+ "No active block job on device '%s'", device);
*aio_context = NULL;
return NULL;
}
@@ -2680,10 +2663,9 @@ notfound:
void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
{
AioContext *aio_context;
- BlockJob *job = find_block_job(device, &aio_context);
+ BlockJob *job = find_block_job(device, &aio_context, errp);
if (!job) {
- error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
return;
}
@@ -2695,10 +2677,9 @@ void qmp_block_job_cancel(const char *device,
bool has_force, bool force, Error **errp)
{
AioContext *aio_context;
- BlockJob *job = find_block_job(device, &aio_context);
+ BlockJob *job = find_block_job(device, &aio_context, errp);
if (!job) {
- error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
return;
}
@@ -2721,10 +2702,9 @@ out:
void qmp_block_job_pause(const char *device, Error **errp)
{
AioContext *aio_context;
- BlockJob *job = find_block_job(device, &aio_context);
+ BlockJob *job = find_block_job(device, &aio_context, errp);
if (!job) {
- error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
return;
}
@@ -2736,10 +2716,9 @@ void qmp_block_job_pause(const char *device, Error **errp)
void qmp_block_job_resume(const char *device, Error **errp)
{
AioContext *aio_context;
- BlockJob *job = find_block_job(device, &aio_context);
+ BlockJob *job = find_block_job(device, &aio_context, errp);
if (!job) {
- error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
return;
}
@@ -2751,10 +2730,9 @@ void qmp_block_job_resume(const char *device, Error **errp)
void qmp_block_job_complete(const char *device, Error **errp)
{
AioContext *aio_context;
- BlockJob *job = find_block_job(device, &aio_context);
+ BlockJob *job = find_block_job(device, &aio_context, errp);
if (!job) {
- error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
return;
}
diff --git a/configure b/configure
index f185dd0838..7ba4bcb866 100755
--- a/configure
+++ b/configure
@@ -313,6 +313,7 @@ glx=""
zlib="yes"
lzo=""
snappy=""
+bzip2=""
guest_agent=""
guest_agent_with_vss="no"
vss_win32_sdk=""
@@ -1060,6 +1061,10 @@ for opt do
;;
--enable-snappy) snappy="yes"
;;
+ --disable-bzip2) bzip2="no"
+ ;;
+ --enable-bzip2) bzip2="yes"
+ ;;
--enable-guest-agent) guest_agent="yes"
;;
--disable-guest-agent) guest_agent="no"
@@ -1374,6 +1379,8 @@ Advanced options (experts only):
--enable-usb-redir enable usb network redirection support
--enable-lzo enable the support of lzo compression library
--enable-snappy enable the support of snappy compression library
+ --enable-bzip2 enable the support of bzip2 compression library (for
+ reading bzip2-compressed dmg images)
--disable-guest-agent disable building of the QEMU Guest Agent
--enable-guest-agent enable building of the QEMU Guest Agent
--with-vss-sdk=SDK-path enable Windows VSS support in QEMU Guest Agent
@@ -1820,6 +1827,24 @@ EOF
fi
##########################################
+# bzip2 check
+
+if test "$bzip2" != "no" ; then
+ cat > $TMPC << EOF
+#include <bzlib.h>
+int main(void) { BZ2_bzlibVersion(); return 0; }
+EOF
+ if compile_prog "" "-lbz2" ; then
+ bzip2="yes"
+ else
+ if test "$bzip2" = "yes"; then
+ feature_not_found "libbzip2" "Install libbzip2 devel"
+ fi
+ bzip2="no"
+ fi
+fi
+
+##########################################
# libseccomp check
if test "$seccomp" != "no" ; then
@@ -3335,6 +3360,22 @@ if compile_prog "" "" ; then
fallocate_punch_hole=yes
fi
+# check that fallocate supports range zeroing inside the file
+fallocate_zero_range=no
+cat > $TMPC << EOF
+#include <fcntl.h>
+#include <linux/falloc.h>
+
+int main(void)
+{
+ fallocate(0, FALLOC_FL_ZERO_RANGE, 0, 0);
+ return 0;
+}
+EOF
+if compile_prog "" "" ; then
+ fallocate_zero_range=yes
+fi
+
# check for posix_fallocate
posix_fallocate=no
cat > $TMPC << EOF
@@ -4369,6 +4410,7 @@ echo "vhdx $vhdx"
echo "Quorum $quorum"
echo "lzo support $lzo"
echo "snappy support $snappy"
+echo "bzip2 support $bzip2"
echo "NUMA host support $numa"
if test "$sdl_too_old" = "yes"; then
@@ -4567,6 +4609,9 @@ fi
if test "$fallocate_punch_hole" = "yes" ; then
echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak
fi
+if test "$fallocate_zero_range" = "yes" ; then
+ echo "CONFIG_FALLOCATE_ZERO_RANGE=y" >> $config_host_mak
+fi
if test "$posix_fallocate" = "yes" ; then
echo "CONFIG_POSIX_FALLOCATE=y" >> $config_host_mak
fi
@@ -4724,6 +4769,11 @@ if test "$snappy" = "yes" ; then
echo "CONFIG_SNAPPY=y" >> $config_host_mak
fi
+if test "$bzip2" = "yes" ; then
+ echo "CONFIG_BZIP2=y" >> $config_host_mak
+ echo "BZIP2_LIBS=-lbz2" >> $config_host_mak
+fi
+
if test "$libiscsi" = "yes" ; then
echo "CONFIG_LIBISCSI=m" >> $config_host_mak
echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak
diff --git a/hmp.c b/hmp.c
index a42c5c07be..b47f331f4d 100644
--- a/hmp.c
+++ b/hmp.c
@@ -474,6 +474,8 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict)
" wr_total_time_ns=%" PRId64
" rd_total_time_ns=%" PRId64
" flush_total_time_ns=%" PRId64
+ " rd_merged=%" PRId64
+ " wr_merged=%" PRId64
"\n",
stats->value->stats->rd_bytes,
stats->value->stats->wr_bytes,
@@ -482,7 +484,9 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict)
stats->value->stats->flush_operations,
stats->value->stats->wr_total_time_ns,
stats->value->stats->rd_total_time_ns,
- stats->value->stats->flush_total_time_ns);
+ stats->value->stats->flush_total_time_ns,
+ stats->value->stats->rd_merged,
+ stats->value->stats->wr_merged);
}
qapi_free_BlockStatsList(stats_list);
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 39c5d7103c..be957d1117 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -96,9 +96,7 @@ static void handle_notify(EventNotifier *e)
event_notifier_test_and_clear(&s->host_notifier);
blk_io_plug(s->conf->conf.blk);
for (;;) {
- MultiReqBuffer mrb = {
- .num_writes = 0,
- };
+ MultiReqBuffer mrb = {};
int ret;
/* Disable guest->host notifies to avoid unnecessary vmexits */
@@ -120,7 +118,9 @@ static void handle_notify(EventNotifier *e)
virtio_blk_handle_request(req, &mrb);
}
- virtio_submit_multiwrite(s->conf->conf.blk, &mrb);
+ if (mrb.num_reqs) {
+ virtio_blk_submit_multireq(s->conf->conf.blk, &mrb);
+ }
if (likely(ret == -EAGAIN)) { /* vring emptied */
/* Re-enable guest->host notifies and stop processing the vring.
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 4032fcae27..1a8a176dcc 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -34,6 +34,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
req->dev = s;
req->qiov.size = 0;
req->next = NULL;
+ req->mr_next = NULL;
return req;
}
@@ -84,20 +85,32 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
static void virtio_blk_rw_complete(void *opaque, int ret)
{
- VirtIOBlockReq *req = opaque;
+ VirtIOBlockReq *next = opaque;
- trace_virtio_blk_rw_complete(req, ret);
+ while (next) {
+ VirtIOBlockReq *req = next;
+ next = req->mr_next;
+ trace_virtio_blk_rw_complete(req, ret);
- if (ret) {
- int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
- bool is_read = !(p & VIRTIO_BLK_T_OUT);
- if (virtio_blk_handle_rw_error(req, -ret, is_read))
- return;
- }
+ if (req->qiov.nalloc != -1) {
+ /* If nalloc is != 1 req->qiov is a local copy of the original
+ * external iovec. It was allocated in submit_merged_requests
+ * to be able to merge requests. */
+ qemu_iovec_destroy(&req->qiov);
+ }
- virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
- block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
- virtio_blk_free_request(req);
+ if (ret) {
+ int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
+ bool is_read = !(p & VIRTIO_BLK_T_OUT);
+ if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
+ continue;
+ }
+ }
+
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
+ virtio_blk_free_request(req);
+ }
}
static void virtio_blk_flush_complete(void *opaque, int ret)
@@ -291,24 +304,127 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
}
}
-void virtio_submit_multiwrite(BlockBackend *blk, MultiReqBuffer *mrb)
+static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
+ int start, int num_reqs, int niov)
{
- int i, ret;
+ QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
+ int64_t sector_num = mrb->reqs[start]->sector_num;
+ int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE;
+ bool is_write = mrb->is_write;
+
+ if (num_reqs > 1) {
+ int i;
+ struct iovec *tmp_iov = qiov->iov;
+ int tmp_niov = qiov->niov;
+
+ /* mrb->reqs[start]->qiov was initialized from external so we can't
+ * modifiy it here. We need to initialize it locally and then add the
+ * external iovecs. */
+ qemu_iovec_init(qiov, niov);
+
+ for (i = 0; i < tmp_niov; i++) {
+ qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len);
+ }
- if (!mrb->num_writes) {
+ for (i = start + 1; i < start + num_reqs; i++) {
+ qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0,
+ mrb->reqs[i]->qiov.size);
+ mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
+ nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE;
+ }
+ assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE);
+
+ trace_virtio_blk_submit_multireq(mrb, start, num_reqs, sector_num,
+ nb_sectors, is_write);
+ block_acct_merge_done(blk_get_stats(blk),
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ,
+ num_reqs - 1);
+ }
+
+ if (is_write) {
+ blk_aio_writev(blk, sector_num, qiov, nb_sectors,
+ virtio_blk_rw_complete, mrb->reqs[start]);
+ } else {
+ blk_aio_readv(blk, sector_num, qiov, nb_sectors,
+ virtio_blk_rw_complete, mrb->reqs[start]);
+ }
+}
+
+static int multireq_compare(const void *a, const void *b)
+{
+ const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a,
+ *req2 = *(VirtIOBlockReq **)b;
+
+ /*
+ * Note that we can't simply subtract sector_num1 from sector_num2
+ * here as that could overflow the return value.
+ */
+ if (req1->sector_num > req2->sector_num) {
+ return 1;
+ } else if (req1->sector_num < req2->sector_num) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
+{
+ int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
+ int max_xfer_len = 0;
+ int64_t sector_num = 0;
+
+ if (mrb->num_reqs == 1) {
+ submit_requests(blk, mrb, 0, 1, -1);
+ mrb->num_reqs = 0;
return;
}
- ret = blk_aio_multiwrite(blk, mrb->blkreq, mrb->num_writes);
- if (ret != 0) {
- for (i = 0; i < mrb->num_writes; i++) {
- if (mrb->blkreq[i].error) {
- virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO);
+ max_xfer_len = blk_get_max_transfer_length(mrb->reqs[0]->dev->blk);
+ max_xfer_len = MIN_NON_ZERO(max_xfer_len, BDRV_REQUEST_MAX_SECTORS);
+
+ qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs),
+ &multireq_compare);
+
+ for (i = 0; i < mrb->num_reqs; i++) {
+ VirtIOBlockReq *req = mrb->reqs[i];
+ if (num_reqs > 0) {
+ bool merge = true;
+
+ /* merge would exceed maximum number of IOVs */
+ if (niov + req->qiov.niov > IOV_MAX) {
+ merge = false;
+ }
+
+ /* merge would exceed maximum transfer length of backend device */
+ if (req->qiov.size / BDRV_SECTOR_SIZE + nb_sectors > max_xfer_len) {
+ merge = false;
+ }
+
+ /* requests are not sequential */
+ if (sector_num + nb_sectors != req->sector_num) {
+ merge = false;
+ }
+
+ if (!merge) {
+ submit_requests(blk, mrb, start, num_reqs, niov);
+ num_reqs = 0;
}
}
+
+ if (num_reqs == 0) {
+ sector_num = req->sector_num;
+ nb_sectors = niov = 0;
+ start = i;
+ }
+
+ nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE;
+ niov += req->qiov.niov;
+ num_reqs++;
}
- mrb->num_writes = 0;
+ submit_requests(blk, mrb, start, num_reqs, niov);
+ mrb->num_reqs = 0;
}
static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
@@ -319,7 +435,9 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
/*
* Make sure all outstanding writes are posted to the backing device.
*/
- virtio_submit_multiwrite(req->dev->blk, mrb);
+ if (mrb->is_write && mrb->num_reqs > 0) {
+ virtio_blk_submit_multireq(req->dev->blk, mrb);
+ }
blk_aio_flush(req->dev->blk, virtio_blk_flush_complete, req);
}
@@ -329,6 +447,9 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
uint64_t total_sectors;
+ if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return false;
+ }
if (sector & dev->sector_mask) {
return false;
}
@@ -342,60 +463,6 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
return true;
}
-static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
-{
- BlockRequest *blkreq;
- uint64_t sector;
-
- sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
-
- trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
-
- if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
- virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- virtio_blk_free_request(req);
- return;
- }
-
- block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size,
- BLOCK_ACCT_WRITE);
-
- if (mrb->num_writes == 32) {
- virtio_submit_multiwrite(req->dev->blk, mrb);
- }
-
- blkreq = &mrb->blkreq[mrb->num_writes];
- blkreq->sector = sector;
- blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE;
- blkreq->qiov = &req->qiov;
- blkreq->cb = virtio_blk_rw_complete;
- blkreq->opaque = req;
- blkreq->error = 0;
-
- mrb->num_writes++;
-}
-
-static void virtio_blk_handle_read(VirtIOBlockReq *req)
-{
- uint64_t sector;
-
- sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
-
- trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
-
- if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
- virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- virtio_blk_free_request(req);
- return;
- }
-
- block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size,
- BLOCK_ACCT_READ);
- blk_aio_readv(req->dev->blk, sector, &req->qiov,
- req->qiov.size / BDRV_SECTOR_SIZE,
- virtio_blk_rw_complete, req);
-}
-
void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
@@ -430,11 +497,58 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
- if (type & VIRTIO_BLK_T_FLUSH) {
+ /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER
+ * is an optional flag. Altough a guest should not send this flag if
+ * not negotiated we ignored it in the past. So keep ignoring it. */
+ switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
+ case VIRTIO_BLK_T_IN:
+ {
+ bool is_write = type & VIRTIO_BLK_T_OUT;
+ req->sector_num = virtio_ldq_p(VIRTIO_DEVICE(req->dev),
+ &req->out.sector);
+
+ if (is_write) {
+ qemu_iovec_init_external(&req->qiov, iov, out_num);
+ trace_virtio_blk_handle_write(req, req->sector_num,
+ req->qiov.size / BDRV_SECTOR_SIZE);
+ } else {
+ qemu_iovec_init_external(&req->qiov, in_iov, in_num);
+ trace_virtio_blk_handle_read(req, req->sector_num,
+ req->qiov.size / BDRV_SECTOR_SIZE);
+ }
+
+ if (!virtio_blk_sect_range_ok(req->dev, req->sector_num,
+ req->qiov.size)) {
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+ virtio_blk_free_request(req);
+ return;
+ }
+
+ block_acct_start(blk_get_stats(req->dev->blk),
+ &req->acct, req->qiov.size,
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
+
+ /* merge would exceed maximum number of requests or IO direction
+ * changes */
+ if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS ||
+ is_write != mrb->is_write ||
+ !req->dev->conf.request_merging)) {
+ virtio_blk_submit_multireq(req->dev->blk, mrb);
+ }
+
+ assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS);
+ mrb->reqs[mrb->num_reqs++] = req;
+ mrb->is_write = is_write;
+ break;
+ }
+ case VIRTIO_BLK_T_FLUSH:
virtio_blk_handle_flush(req, mrb);
- } else if (type & VIRTIO_BLK_T_SCSI_CMD) {
+ break;
+ case VIRTIO_BLK_T_SCSI_CMD:
virtio_blk_handle_scsi(req);
- } else if (type & VIRTIO_BLK_T_GET_ID) {
+ break;
+ case VIRTIO_BLK_T_GET_ID:
+ {
VirtIOBlock *s = req->dev;
/*
@@ -448,14 +562,9 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
iov_from_buf(in_iov, in_num, 0, serial, size);
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
virtio_blk_free_request(req);
- } else if (type & VIRTIO_BLK_T_OUT) {
- qemu_iovec_init_external(&req->qiov, iov, out_num);
- virtio_blk_handle_write(req, mrb);
- } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
- /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
- qemu_iovec_init_external(&req->qiov, in_iov, in_num);
- virtio_blk_handle_read(req);
- } else {
+ break;
+ }
+ default:
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
virtio_blk_free_request(req);
}
@@ -465,9 +574,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
VirtIOBlockReq *req;
- MultiReqBuffer mrb = {
- .num_writes = 0,
- };
+ MultiReqBuffer mrb = {};
/* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
* dataplane here instead of waiting for .set_status().
@@ -481,7 +588,9 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
virtio_blk_handle_request(req, &mrb);
}
- virtio_submit_multiwrite(s->blk, &mrb);
+ if (mrb.num_reqs) {
+ virtio_blk_submit_multireq(s->blk, &mrb);
+ }
/*
* FIXME: Want to check for completions before returning to guest mode,
@@ -494,9 +603,7 @@ static void virtio_blk_dma_restart_bh(void *opaque)
{
VirtIOBlock *s = opaque;
VirtIOBlockReq *req = s->rq;
- MultiReqBuffer mrb = {
- .num_writes = 0,
- };
+ MultiReqBuffer mrb = {};
qemu_bh_delete(s->bh);
s->bh = NULL;
@@ -509,7 +616,9 @@ static void virtio_blk_dma_restart_bh(void *opaque)
req = next;
}
- virtio_submit_multiwrite(s->blk, &mrb);
+ if (mrb.num_reqs) {
+ virtio_blk_submit_multireq(s->blk, &mrb);
+ }
}
static void virtio_blk_dma_restart_cb(void *opaque, int running,
@@ -842,6 +951,8 @@ static Property virtio_blk_properties[] = {
#ifdef __linux__
DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true),
#endif
+ DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
+ true),
DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, conf.data_plane, 0, false),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index a71e6e014f..1bf8b34528 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -394,6 +394,23 @@ static void ide_atapi_cmd_read(IDEState *s, int lba, int nb_sectors,
}
}
+
+/* Called by *_restart_bh when the transfer function points
+ * to ide_atapi_cmd
+ */
+void ide_atapi_dma_restart(IDEState *s)
+{
+ /*
+ * I'm not sure we have enough stored to restart the command
+ * safely, so give the guest an error it should recover from.
+ * I'm assuming most guests will try to recover from something
+ * listed as a medium error on a CD; it seems to work on Linux.
+ * This would be more of a problem if we did any other type of
+ * DMA operation.
+ */
+ ide_atapi_cmd_error(s, MEDIUM_ERROR, ASC_NO_SEEK_COMPLETE);
+}
+
static inline uint8_t ide_atapi_set_profile(uint8_t *buf, uint8_t *index,
uint16_t profile)
{
diff --git a/hw/ide/core.c b/hw/ide/core.c
index d4af5e2eb1..ac3f015a8d 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2417,6 +2417,7 @@ static int ide_drive_pio_post_load(void *opaque, int version_id)
s->end_transfer_func = transfer_end_table[s->end_transfer_fn_idx];
s->data_ptr = s->io_buffer + s->cur_io_buffer_offset;
s->data_end = s->data_ptr + s->cur_io_buffer_len;
+ s->atapi_dma = s->feature & 1; /* as per cmd_packet */
return 0;
}
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index c998003bf3..ee9a57f039 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -289,6 +289,7 @@ typedef struct IDEDMAOps IDEDMAOps;
#define ATAPI_INT_REASON_TAG 0xf8
/* same constants as bochs */
+#define ASC_NO_SEEK_COMPLETE 0x02
#define ASC_ILLEGAL_OPCODE 0x20
#define ASC_LOGICAL_BLOCK_OOR 0x21
#define ASC_INV_FIELD_IN_CMD_PACKET 0x24
@@ -530,6 +531,7 @@ void ide_dma_error(IDEState *s);
void ide_atapi_cmd_ok(IDEState *s);
void ide_atapi_cmd_error(IDEState *s, int sense_key, int asc);
+void ide_atapi_dma_restart(IDEState *s);
void ide_atapi_io_error(IDEState *s, int ret);
void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val);
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index bee5ad39fe..e3f2054a90 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -235,6 +235,17 @@ static void bmdma_restart_bh(void *opaque)
}
} else if (error_status & IDE_RETRY_FLUSH) {
ide_flush_cache(bmdma_active_if(bm));
+ } else {
+ IDEState *s = bmdma_active_if(bm);
+
+ /*
+ * We've not got any bits to tell us about ATAPI - but
+ * we do have the end_transfer_func that tells us what
+ * we're trying to do.
+ */
+ if (s->end_transfer_func == ide_atapi_cmd) {
+ ide_atapi_dma_restart(s);
+ }
}
}
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 50b42b3808..4c406cff7a 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -39,6 +39,7 @@ typedef struct BlockAcctStats {
uint64_t nr_bytes[BLOCK_MAX_IOTYPE];
uint64_t nr_ops[BLOCK_MAX_IOTYPE];
uint64_t total_time_ns[BLOCK_MAX_IOTYPE];
+ uint64_t merged[BLOCK_MAX_IOTYPE];
uint64_t wr_highest_sector;
} BlockAcctStats;
@@ -53,5 +54,7 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie);
void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
unsigned int nb_sectors);
+void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
+ int num_requests);
#endif
diff --git a/include/block/block.h b/include/block/block.h
index 3082d2b80e..321295e5f7 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -83,6 +83,9 @@ typedef enum {
#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
+#define BDRV_REQUEST_MAX_SECTORS MIN(SIZE_MAX >> BDRV_SECTOR_BITS, \
+ INT_MAX >> BDRV_SECTOR_BITS)
+
/*
* Allocation status flags
* BDRV_BLOCK_DATA: data is read from bs->file or another file
@@ -378,6 +381,7 @@ BlockDriverState *bdrv_next(BlockDriverState *bs);
int bdrv_is_encrypted(BlockDriverState *bs);
int bdrv_key_required(BlockDriverState *bs);
int bdrv_set_key(BlockDriverState *bs, const char *key);
+void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp);
int bdrv_query_missing_keys(void);
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
void *opaque);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index e264be97b2..7ad19503df 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -412,6 +412,10 @@ struct BlockDriverState {
/* The error object in use for blocking operations on backing_hd */
Error *backing_blocker;
+
+ /* threshold limit for writes, in bytes. "High water mark". */
+ uint64_t write_threshold_offset;
+ NotifierWithReturn write_threshold_notifier;
};
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 348302c90b..b75959556c 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -75,7 +75,7 @@ enum {
ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read);
int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
- off_t *size, size_t *blocksize);
+ off_t *size, size_t *blocksize, Error **errp);
int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize);
ssize_t nbd_send_request(int csock, struct nbd_request *request);
ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply);
diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h
new file mode 100644
index 0000000000..f1b899cd5f
--- /dev/null
+++ b/include/block/write-threshold.h
@@ -0,0 +1,64 @@
+/*
+ * QEMU System Emulator block write threshold notification
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ * Francesco Romani <fromani@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#ifndef BLOCK_WRITE_THRESHOLD_H
+#define BLOCK_WRITE_THRESHOLD_H
+
+#include <stdint.h>
+
+#include "qemu/typedefs.h"
+#include "qemu-common.h"
+
+/*
+ * bdrv_write_threshold_set:
+ *
+ * Set the write threshold for block devices, in bytes.
+ * Notify when a write exceeds the threshold, meaning the device
+ * is becoming full, so it can be transparently resized.
+ * To be used with thin-provisioned block devices.
+ *
+ * Use threshold_bytes == 0 to disable.
+ */
+void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes);
+
+/*
+ * bdrv_write_threshold_get
+ *
+ * Get the configured write threshold, in bytes.
+ * Zero means no threshold configured.
+ */
+uint64_t bdrv_write_threshold_get(const BlockDriverState *bs);
+
+/*
+ * bdrv_write_threshold_is_set
+ *
+ * Tell if a write threshold is set for a given BDS.
+ */
+bool bdrv_write_threshold_is_set(const BlockDriverState *bs);
+
+/*
+ * bdrv_write_threshold_exceeded
+ *
+ * Return the extent of a write request that exceeded the threshold,
+ * or zero if the request is below the threshold.
+ * Return zero also if the threshold was not set.
+ *
+ * NOTE: here we assume the following holds for each request this code
+ * deals with:
+ *
+ * assert((req->offset + req->bytes) <= UINT64_MAX)
+ *
+ * Please not there is *not* an actual C assert().
+ */
+uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
+ const BdrvTrackedRequest *req);
+
+#endif
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 4652b70b5d..fc7d311a43 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -113,6 +113,7 @@ struct VirtIOBlkConf
uint32_t scsi;
uint32_t config_wce;
uint32_t data_plane;
+ uint32_t request_merging;
};
struct VirtIOBlockDataPlane;
@@ -134,27 +135,32 @@ typedef struct VirtIOBlock {
struct VirtIOBlockDataPlane *dataplane;
} VirtIOBlock;
-typedef struct MultiReqBuffer {
- BlockRequest blkreq[32];
- unsigned int num_writes;
-} MultiReqBuffer;
-
typedef struct VirtIOBlockReq {
+ int64_t sector_num;
VirtIOBlock *dev;
VirtQueueElement elem;
struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr out;
QEMUIOVector qiov;
struct VirtIOBlockReq *next;
+ struct VirtIOBlockReq *mr_next;
BlockAcctCookie acct;
} VirtIOBlockReq;
+#define VIRTIO_BLK_MAX_MERGE_REQS 32
+
+typedef struct MultiReqBuffer {
+ VirtIOBlockReq *reqs[VIRTIO_BLK_MAX_MERGE_REQS];
+ unsigned int num_reqs;
+ bool is_write;
+} MultiReqBuffer;
+
VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s);
void virtio_blk_free_request(VirtIOBlockReq *req);
void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb);
-void virtio_submit_multiwrite(BlockBackend *blk, MultiReqBuffer *mrb);
+void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb);
#endif
diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h
index eeaf0cb981..986260f466 100644
--- a/include/qapi/qmp/qerror.h
+++ b/include/qapi/qmp/qerror.h
@@ -37,9 +37,6 @@ void qerror_report_err(Error *err);
#define QERR_BASE_NOT_FOUND \
ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found"
-#define QERR_BLOCK_JOB_NOT_ACTIVE \
- ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'"
-
#define QERR_BLOCK_JOB_NOT_READY \
ERROR_CLASS_GENERIC_ERROR, "The active block job for device '%s' cannot be completed"
@@ -52,9 +49,6 @@ void qerror_report_err(Error *err);
#define QERR_BUS_NOT_FOUND \
ERROR_CLASS_GENERIC_ERROR, "Bus '%s' not found"
-#define QERR_DEVICE_ENCRYPTED \
- ERROR_CLASS_DEVICE_ENCRYPTED, "'%s' (%s) is encrypted"
-
#define QERR_DEVICE_HAS_NO_MEDIUM \
ERROR_CLASS_GENERIC_ERROR, "Device '%s' has no medium"
@@ -70,9 +64,6 @@ void qerror_report_err(Error *err);
#define QERR_DEVICE_NO_HOTPLUG \
ERROR_CLASS_GENERIC_ERROR, "Device '%s' does not support hotplugging"
-#define QERR_DEVICE_NOT_ENCRYPTED \
- ERROR_CLASS_GENERIC_ERROR, "Device '%s' is not encrypted"
-
#define QERR_DEVICE_NOT_FOUND \
ERROR_CLASS_DEVICE_NOT_FOUND, "Device '%s' not found"
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 8871a021d0..aab12b982c 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -127,6 +127,7 @@ int blk_is_inserted(BlockBackend *blk);
void blk_lock_medium(BlockBackend *blk, bool locked);
void blk_eject(BlockBackend *blk, bool eject_flag);
int blk_get_flags(BlockBackend *blk);
+int blk_get_max_transfer_length(BlockBackend *blk);
void blk_set_guest_block_size(BlockBackend *blk, int align);
void *blk_blockalign(BlockBackend *blk, size_t size);
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
diff --git a/monitor.c b/monitor.c
index 5a24311844..c3cc060b45 100644
--- a/monitor.c
+++ b/monitor.c
@@ -5368,9 +5368,12 @@ static void bdrv_password_cb(void *opaque, const char *password,
Monitor *mon = opaque;
BlockDriverState *bs = readline_opaque;
int ret = 0;
+ Error *local_err = NULL;
- if (bdrv_set_key(bs, password) != 0) {
- monitor_printf(mon, "invalid password\n");
+ bdrv_add_key(bs, password, &local_err);
+ if (local_err) {
+ monitor_printf(mon, "%s\n", error_get_pretty(local_err));
+ error_free(local_err);
ret = -EPERM;
}
if (mon->password_completion_cb)
@@ -5388,17 +5391,20 @@ int monitor_read_bdrv_key_start(Monitor *mon, BlockDriverState *bs,
BlockCompletionFunc *completion_cb,
void *opaque)
{
+ Error *local_err = NULL;
int err;
- if (!bdrv_key_required(bs)) {
+ bdrv_add_key(bs, NULL, &local_err);
+ if (!local_err) {
if (completion_cb)
completion_cb(opaque, 0);
return 0;
}
+ /* Need a key for @bs */
+
if (monitor_ctrl_mode(mon)) {
- qerror_report(QERR_DEVICE_ENCRYPTED, bdrv_get_device_name(bs),
- bdrv_get_encrypted_filename(bs));
+ qerror_report_err(local_err);
return -1;
}
diff --git a/nbd.c b/nbd.c
index 53cf82be0b..e56afbc162 100644
--- a/nbd.c
+++ b/nbd.c
@@ -494,7 +494,7 @@ fail:
}
int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
- off_t *size, size_t *blocksize)
+ off_t *size, size_t *blocksize, Error **errp)
{
char buf[256];
uint64_t magic, s;
@@ -506,13 +506,13 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
rc = -EINVAL;
if (read_sync(csock, buf, 8) != 8) {
- LOG("read failed");
+ error_setg(errp, "Failed to read data");
goto fail;
}
buf[8] = '\0';
if (strlen(buf) == 0) {
- LOG("server connection closed");
+ error_setg(errp, "Server connection closed unexpectedly");
goto fail;
}
@@ -527,12 +527,12 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
qemu_isprint(buf[7]) ? buf[7] : '.');
if (memcmp(buf, "NBDMAGIC", 8) != 0) {
- LOG("Invalid magic received");
+ error_setg(errp, "Invalid magic received");
goto fail;
}
if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
- LOG("read failed");
+ error_setg(errp, "Failed to read magic");
goto fail;
}
magic = be64_to_cpu(magic);
@@ -545,52 +545,60 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
TRACE("Checking magic (opts_magic)");
if (magic != NBD_OPTS_MAGIC) {
- LOG("Bad magic received");
+ if (magic == NBD_CLIENT_MAGIC) {
+ error_setg(errp, "Server does not support export names");
+ } else {
+ error_setg(errp, "Bad magic received");
+ }
goto fail;
}
if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
- LOG("flags read failed");
+ error_setg(errp, "Failed to read server flags");
goto fail;
}
*flags = be16_to_cpu(tmp) << 16;
/* reserved for future use */
if (write_sync(csock, &reserved, sizeof(reserved)) !=
sizeof(reserved)) {
- LOG("write failed (reserved)");
+ error_setg(errp, "Failed to read reserved field");
goto fail;
}
/* write the export name */
magic = cpu_to_be64(magic);
if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
- LOG("write failed (magic)");
+ error_setg(errp, "Failed to send export name magic");
goto fail;
}
opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
- LOG("write failed (opt)");
+ error_setg(errp, "Failed to send export name option number");
goto fail;
}
namesize = cpu_to_be32(strlen(name));
if (write_sync(csock, &namesize, sizeof(namesize)) !=
sizeof(namesize)) {
- LOG("write failed (namesize)");
+ error_setg(errp, "Failed to send export name length");
goto fail;
}
if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) {
- LOG("write failed (name)");
+ error_setg(errp, "Failed to send export name");
goto fail;
}
} else {
TRACE("Checking magic (cli_magic)");
if (magic != NBD_CLIENT_MAGIC) {
- LOG("Bad magic received");
+ if (magic == NBD_OPTS_MAGIC) {
+ error_setg(errp, "Server requires an export name");
+ } else {
+ error_setg(errp, "Bad magic received");
+ }
goto fail;
}
}
if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) {
- LOG("read failed");
+ error_setg(errp, "Failed to read export length");
goto fail;
}
*size = be64_to_cpu(s);
@@ -599,19 +607,19 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
if (!name) {
if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) {
- LOG("read failed (flags)");
+ error_setg(errp, "Failed to read export flags");
goto fail;
}
*flags = be32_to_cpup(flags);
} else {
if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
- LOG("read failed (tmp)");
+ error_setg(errp, "Failed to read export flags");
goto fail;
}
*flags |= be32_to_cpu(tmp);
}
if (read_sync(csock, &buf, 124) != 124) {
- LOG("read failed (buf)");
+ error_setg(errp, "Failed to read reserved block");
goto fail;
}
rc = 0;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 80984d1660..a3fdaf02ba 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -257,6 +257,9 @@
#
# @cache: the cache mode used for the block device (since: 2.3)
#
+# @write_threshold: configured write threshold for the device.
+# 0 if disabled. (Since 2.3)
+#
# Since: 0.14.0
#
##
@@ -271,7 +274,8 @@
'*bps_max': 'int', '*bps_rd_max': 'int',
'*bps_wr_max': 'int', '*iops_max': 'int',
'*iops_rd_max': 'int', '*iops_wr_max': 'int',
- '*iops_size': 'int', 'cache': 'BlockdevCacheInfo' } }
+ '*iops_size': 'int', 'cache': 'BlockdevCacheInfo',
+ 'write_threshold': 'int' } }
##
# @BlockDeviceIoStatus:
@@ -407,13 +411,20 @@
# growable sparse files (like qcow2) that are used on top
# of a physical device.
#
+# @rd_merged: Number of read requests that have been merged into another
+# request (Since 2.3).
+#
+# @wr_merged: Number of write requests that have been merged into another
+# request (Since 2.3).
+#
# Since: 0.14.0
##
{ 'type': 'BlockDeviceStats',
'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'rd_operations': 'int',
'wr_operations': 'int', 'flush_operations': 'int',
'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int',
- 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int' } }
+ 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int',
+ 'rd_merged': 'int', 'wr_merged': 'int' } }
##
# @BlockStats:
@@ -1910,3 +1921,48 @@
##
{ 'enum': 'PreallocMode',
'data': [ 'off', 'metadata', 'falloc', 'full' ] }
+
+##
+# @BLOCK_WRITE_THRESHOLD
+#
+# Emitted when writes on block device reaches or exceeds the
+# configured write threshold. For thin-provisioned devices, this
+# means the device should be extended to avoid pausing for
+# disk exhaustion.
+# The event is one shot. Once triggered, it needs to be
+# re-registered with another block-set-threshold command.
+#
+# @node-name: graph node name on which the threshold was exceeded.
+#
+# @amount-exceeded: amount of data which exceeded the threshold, in bytes.
+#
+# @write-threshold: last configured threshold, in bytes.
+#
+# Since: 2.3
+##
+{ 'event': 'BLOCK_WRITE_THRESHOLD',
+ 'data': { 'node-name': 'str',
+ 'amount-exceeded': 'uint64',
+ 'write-threshold': 'uint64' } }
+
+##
+# @block-set-write-threshold
+#
+# Change the write threshold for a block drive. An event will be delivered
+# if a write to this block drive crosses the configured threshold.
+# This is useful to transparently resize thin-provisioned drives without
+# the guest OS noticing.
+#
+# @node-name: graph node name on which the threshold must be set.
+#
+# @write-threshold: configured threshold for the block device, bytes.
+# Use 0 to disable the threshold.
+#
+# Returns: Nothing on success
+# If @node name is not found on the block device graph,
+# DeviceNotFound
+#
+# Since: 2.3
+##
+{ 'command': 'block-set-write-threshold',
+ 'data': { 'node-name': 'str', 'write-threshold': 'uint64' } }
diff --git a/qemu-img.c b/qemu-img.c
index 4e9a7f5741..e148af8a3e 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -35,7 +35,7 @@
#include "block/qapi.h"
#include <getopt.h>
-#define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION \
+#define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION QEMU_PKGVERSION \
", Copyright (c) 2004-2008 Fabrice Bellard\n"
typedef struct img_cmd_t {
diff --git a/qemu-nbd.c b/qemu-nbd.c
index d222512412..4d8df08385 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -284,6 +284,7 @@ static void *nbd_client_thread(void *arg)
int fd, sock;
int ret;
pthread_t show_parts_thread;
+ Error *local_error = NULL;
sock = unix_socket_outgoing(sockpath);
if (sock < 0) {
@@ -291,8 +292,12 @@ static void *nbd_client_thread(void *arg)
}
ret = nbd_receive_negotiate(sock, NULL, &nbdflags,
- &size, &blocksize);
+ &size, &blocksize, &local_error);
if (ret < 0) {
+ if (local_error) {
+ fprintf(stderr, "%s\n", error_get_pretty(local_error));
+ error_free(local_error);
+ }
goto out_socket;
}
diff --git a/qmp-commands.hx b/qmp-commands.hx
index c5f16dd922..a85d8479e3 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2146,6 +2146,8 @@ Each json-object contain the following:
- "iops_size": I/O size when limiting by iops (json-int)
- "detect_zeroes": detect and optimize zero writing (json-string)
- Possible values: "off", "on", "unmap"
+ - "write_threshold": write offset threshold in bytes, a event will be
+ emitted if crossed. Zero if disabled (json-int)
- "image": the detail of the image, it is a json-object containing
the following:
- "filename": image file name (json-string)
@@ -2223,6 +2225,7 @@ Example:
"iops_wr_max": 0,
"iops_size": 0,
"detect_zeroes": "on",
+ "write_threshold": 0,
"image":{
"filename":"disks/test.qcow2",
"format":"qcow2",
@@ -2303,6 +2306,10 @@ Each json-object contain the following:
- "flush_total_time_ns": total time spend on cache flushes in nano-seconds (json-int)
- "wr_highest_offset": Highest offset of a sector written since the
BlockDriverState has been opened (json-int)
+ - "rd_merged": number of read requests that have been merged into
+ another request (json-int)
+ - "wr_merged": number of write requests that have been merged into
+ another request (json-int)
- "parent": Contains recursively the statistics of the underlying
protocol (e.g. the host file for a qcow2 image). If there is
no underlying protocol, this field is omitted
@@ -2326,6 +2333,8 @@ Example:
"rd_total_times_ns":3465673657
"flush_total_times_ns":49653
"flush_operations":61,
+ "rd_merged":0,
+ "wr_merged":0
}
},
"stats":{
@@ -2337,7 +2346,9 @@ Example:
"flush_operations":51,
"wr_total_times_ns":313253456
"rd_total_times_ns":3465673657
- "flush_total_times_ns":49653
+ "flush_total_times_ns":49653,
+ "rd_merged":0,
+ "wr_merged":0
}
},
{
@@ -2351,7 +2362,9 @@ Example:
"flush_operations":0,
"wr_total_times_ns":0
"rd_total_times_ns":0
- "flush_total_times_ns":0
+ "flush_total_times_ns":0,
+ "rd_merged":0,
+ "wr_merged":0
}
},
{
@@ -2365,7 +2378,9 @@ Example:
"flush_operations":0,
"wr_total_times_ns":0
"rd_total_times_ns":0
- "flush_total_times_ns":0
+ "flush_total_times_ns":0,
+ "rd_merged":0,
+ "wr_merged":0
}
},
{
@@ -2379,7 +2394,9 @@ Example:
"flush_operations":0,
"wr_total_times_ns":0
"rd_total_times_ns":0
- "flush_total_times_ns":0
+ "flush_total_times_ns":0,
+ "rd_merged":0,
+ "wr_merged":0
}
}
]
@@ -3671,6 +3688,7 @@ Example:
"iops_rd_max": 0,
"iops_wr_max": 0,
"iops_size": 0,
+ "write_threshold": 0,
"image":{
"filename":"disks/test.qcow2",
"format":"qcow2",
@@ -3907,3 +3925,31 @@ Move mouse pointer to absolute coordinates (20000, 400).
<- { "return": {} }
EQMP
+
+ {
+ .name = "block-set-write-threshold",
+ .args_type = "node-name:s,write-threshold:l",
+ .mhandler.cmd_new = qmp_marshal_input_block_set_write_threshold,
+ },
+
+SQMP
+block-set-write-threshold
+------------
+
+Change the write threshold for a block drive. The threshold is an offset,
+thus must be non-negative. Default is no write threshold.
+Setting the threshold to zero disables it.
+
+Arguments:
+
+- "node-name": the node name in the block driver state graph (json-string)
+- "write-threshold": the write threshold in bytes (json-int)
+
+Example:
+
+-> { "execute": "block-set-write-threshold",
+ "arguments": { "node-name": "mydev",
+ "write-threshold": 17179869184 } }
+<- { "return": {} }
+
+EQMP
diff --git a/qmp.c b/qmp.c
index 7f2d25a492..20a9e9739f 100644
--- a/qmp.c
+++ b/qmp.c
@@ -154,6 +154,7 @@ SpiceInfo *qmp_query_spice(Error **errp)
void qmp_cont(Error **errp)
{
+ Error *local_err = NULL;
BlockDriverState *bs;
if (runstate_needs_reset()) {
@@ -167,10 +168,9 @@ void qmp_cont(Error **errp)
bdrv_iostatus_reset(bs);
}
for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
- if (bdrv_key_required(bs)) {
- error_set(errp, QERR_DEVICE_ENCRYPTED,
- bdrv_get_device_name(bs),
- bdrv_get_encrypted_filename(bs));
+ bdrv_add_key(bs, NULL, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
return;
}
}
diff --git a/tests/Makefile b/tests/Makefile
index 5caccf765a..d5df16882d 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -68,6 +68,8 @@ check-unit-y += tests/check-qom-interface$(EXESUF)
gcov-files-check-qom-interface-y = qom/object.c
check-unit-y += tests/test-qemu-opts$(EXESUF)
gcov-files-test-qemu-opts-y = qom/test-qemu-opts.c
+check-unit-y += tests/test-write-threshold$(EXESUF)
+gcov-files-test-write-threshold-y = block/write-threshold.c
check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
@@ -360,6 +362,7 @@ tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y)
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a
+tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(block-obj-y) libqemuutil.a libqemustub.a
ifeq ($(CONFIG_POSIX),y)
LIBS += -lutil
diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out
index 13ff3cd7aa..00b3eaefc7 100644
--- a/tests/qemu-iotests/067.out
+++ b/tests/qemu-iotests/067.out
@@ -43,6 +43,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virti
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
@@ -218,6 +219,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
@@ -423,6 +425,7 @@ Testing:
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
@@ -607,6 +610,7 @@ Testing:
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
@@ -717,6 +721,7 @@ Testing:
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
diff --git a/tests/qemu-iotests/083 b/tests/qemu-iotests/083
index 991a9d91db..1b2d3f1156 100755
--- a/tests/qemu-iotests/083
+++ b/tests/qemu-iotests/083
@@ -56,7 +56,8 @@ filter_nbd() {
#
# Filter out the TCP port number since this changes between runs.
sed -e 's#^.*nbd\.c:.*##g' \
- -e 's#nbd:127\.0\.0\.1:[^:]*:#nbd:127\.0\.0\.1:PORT:#g'
+ -e 's#nbd:127\.0\.0\.1:[^:]*:#nbd:127\.0\.0\.1:PORT:#g' \
+ -e 's#\(exportname=foo\|PORT\): Failed to .*$#\1#'
}
check_disconnect() {
diff --git a/tests/qemu-iotests/083.out b/tests/qemu-iotests/083.out
index 85ee8d6dd7..8c1441bf4f 100644
--- a/tests/qemu-iotests/083.out
+++ b/tests/qemu-iotests/083.out
@@ -1,163 +1,138 @@
QA output created by 083
=== Check disconnect before neg1 ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect after neg1 ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect 8 neg1 ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect 16 neg1 ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect before export ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect after export ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect 4 export ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect 12 export ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect 16 export ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect before neg2 ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect after neg2 ===
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error
-no file open, try 'help open'
+read failed: Input/output error
=== Check disconnect 8 neg2 ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect 10 neg2 ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo
no file open, try 'help open'
=== Check disconnect before request ===
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error
-no file open, try 'help open'
+read failed: Input/output error
=== Check disconnect after request ===
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error
-no file open, try 'help open'
+read failed: Input/output error
=== Check disconnect before reply ===
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error
-no file open, try 'help open'
+read failed: Input/output error
=== Check disconnect after reply ===
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error
-no file open, try 'help open'
+read failed: Input/output error
=== Check disconnect 4 reply ===
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error
-no file open, try 'help open'
+read failed: Input/output error
=== Check disconnect 8 reply ===
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error
-no file open, try 'help open'
+read failed: Input/output error
=== Check disconnect before data ===
-qemu-io: can't open device nbd:127.0.0.1:PORT:exportname=foo: Could not read image for determining its format: Input/output error
-no file open, try 'help open'
+read failed: Input/output error
=== Check disconnect after data ===
-read failed: Input/output error
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
=== Check disconnect before neg-classic ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT
no file open, try 'help open'
=== Check disconnect 8 neg-classic ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT
no file open, try 'help open'
=== Check disconnect 16 neg-classic ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT
no file open, try 'help open'
=== Check disconnect 24 neg-classic ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT
no file open, try 'help open'
=== Check disconnect 28 neg-classic ===
-
-qemu-io: can't open device nbd:127.0.0.1:PORT: Could not open image: Invalid argument
+qemu-io: can't open device nbd:127.0.0.1:PORT
no file open, try 'help open'
=== Check disconnect after neg-classic ===
-qemu-io: can't open device nbd:127.0.0.1:PORT: Could not read image for determining its format: Input/output error
-no file open, try 'help open'
+read failed: Input/output error
*** done
diff --git a/tests/qemu-iotests/100 b/tests/qemu-iotests/100
index 9124aba760..7c1b235b51 100755
--- a/tests/qemu-iotests/100
+++ b/tests/qemu-iotests/100
@@ -55,6 +55,8 @@ echo "== verify pattern =="
$QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+_cleanup_test_img
+
echo
echo "== Sequential requests =="
_make_test_img $size
@@ -66,6 +68,8 @@ $QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0xce 4k 4k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0 8k 4k" "$TEST_IMG" | _filter_qemu_io
+_cleanup_test_img
+
echo
echo "== Superset overlapping requests =="
_make_test_img $size
@@ -79,6 +83,8 @@ $QEMU_IO -c "read -P 0xcd 0 1k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0xcd 3k 1k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+_cleanup_test_img
+
echo
echo "== Subset overlapping requests =="
_make_test_img $size
@@ -92,6 +98,8 @@ $QEMU_IO -c "read -P 0xce 0 1k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0xce 3k 1k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+_cleanup_test_img
+
echo
echo "== Head overlapping requests =="
_make_test_img $size
@@ -104,6 +112,8 @@ echo "== verify pattern =="
$QEMU_IO -c "read -P 0xce 2k 2k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+_cleanup_test_img
+
echo
echo "== Tail overlapping requests =="
_make_test_img $size
@@ -116,6 +126,8 @@ echo "== verify pattern =="
$QEMU_IO -c "read -P 0xce 0k 2k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+_cleanup_test_img
+
echo
echo "== Disjoint requests =="
_make_test_img $size
diff --git a/tests/qemu-iotests/104 b/tests/qemu-iotests/104
index b471aa5bb1..f32752bb6f 100755
--- a/tests/qemu-iotests/104
+++ b/tests/qemu-iotests/104
@@ -28,11 +28,7 @@ here=`pwd`
tmp=/tmp/$$
status=1 # failure is the default!
-_cleanup()
-{
- _cleanup_test_img
-}
-trap "_cleanup; exit \$status" 0 1 2 3 15
+trap "exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
@@ -47,8 +43,9 @@ echo
image_sizes="1024 1234"
for s in $image_sizes; do
- _make_test_img $s | _filter_img_create
+ _make_test_img $s
_img_info | _filter_img_info
+ _cleanup_test_img
done
# success, all done
diff --git a/tests/qemu-iotests/116 b/tests/qemu-iotests/116
new file mode 100755
index 0000000000..713ed484ba
--- /dev/null
+++ b/tests/qemu-iotests/116
@@ -0,0 +1,96 @@
+#!/bin/bash
+#
+# Test error code paths for invalid QED images
+#
+# The aim of this test is to exercise the error paths in qed_open() to ensure
+# there are no crashes with invalid input files.
+#
+# Copyright (C) 2015 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=stefanha@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qed
+_supported_proto generic
+_supported_os Linux
+
+
+size=128M
+
+echo
+echo "== truncated header cluster =="
+_make_test_img $size
+truncate -s 512 "$TEST_IMG"
+$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== invalid header magic =="
+_make_test_img $size
+poke_file "$TEST_IMG" "0" "QEDX"
+$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== invalid cluster size =="
+_make_test_img $size
+poke_file "$TEST_IMG" "4" "\xff\xff\xff\xff"
+$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== invalid table size =="
+_make_test_img $size
+poke_file "$TEST_IMG" "8" "\xff\xff\xff\xff"
+$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== invalid header size =="
+_make_test_img $size
+poke_file "$TEST_IMG" "12" "\xff\xff\xff\xff"
+$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== invalid L1 table offset =="
+_make_test_img $size
+poke_file "$TEST_IMG" "40" "\xff\xff\xff\xff\xff\xff\xff\xff"
+$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== invalid image size =="
+_make_test_img $size
+poke_file "$TEST_IMG" "48" "\xff\xff\xff\xff\xff\xff\xff\xff"
+$QEMU_IO -f "$IMGFMT" -c "read 0 $size" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/116.out b/tests/qemu-iotests/116.out
new file mode 100644
index 0000000000..b679ceea63
--- /dev/null
+++ b/tests/qemu-iotests/116.out
@@ -0,0 +1,37 @@
+QA output created by 116
+
+== truncated header cluster ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument
+no file open, try 'help open'
+
+== invalid header magic ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+qemu-io: can't open device TEST_DIR/t.qed: Image not in QED format
+no file open, try 'help open'
+
+== invalid cluster size ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument
+no file open, try 'help open'
+
+== invalid table size ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument
+no file open, try 'help open'
+
+== invalid header size ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument
+no file open, try 'help open'
+
+== invalid L1 table offset ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument
+no file open, try 'help open'
+
+== invalid image size ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+qemu-io: can't open device TEST_DIR/t.qed: Could not open 'TEST_DIR/t.qed': Invalid argument
+no file open, try 'help open'
+*** done
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index b73c70be95..06e1bb045f 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -200,6 +200,7 @@ _filter_img_info()
sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
-e "s#$TEST_DIR#TEST_DIR#g" \
-e "s#$IMGFMT#IMGFMT#g" \
+ -e 's#nbd://127.0.0.1:10810$#TEST_DIR/t.IMGFMT#g' \
-e "/encrypted: yes/d" \
-e "/cluster_size: [0-9]\\+/d" \
-e "/table_size: [0-9]\\+/d" \
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index aa093d9d84..22d3514041 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -153,7 +153,7 @@ _make_test_img()
# Start an NBD server on the image file, which is what we'll be talking to
if [ $IMGPROTO = "nbd" ]; then
- eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 $TEST_IMG_FILE &"
+ eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 -f $IMGFMT $TEST_IMG_FILE &"
QEMU_NBD_PID=$!
sleep 1 # FIXME: qemu-nbd needs to be listening before we continue
fi
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index f8bf354156..4b2b93bc19 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -116,3 +116,4 @@
111 rw auto quick
113 rw auto quick
114 rw auto quick
+116 rw auto quick
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 87002e0e2c..241b5ee9dd 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -288,7 +288,7 @@ def main(supported_fmts=[], supported_oses=['linux']):
if supported_fmts and (imgfmt not in supported_fmts):
notrun('not suitable for this image format: %s' % imgfmt)
- if sys.platform not in supported_oses:
+ if True not in [sys.platform.startswith(x) for x in supported_oses]:
notrun('not suitable for this OS: %s' % sys.platform)
# We need to filter out the time taken from the output so that qemu-iotest
diff --git a/tests/test-write-threshold.c b/tests/test-write-threshold.c
new file mode 100644
index 0000000000..faffa7b855
--- /dev/null
+++ b/tests/test-write-threshold.c
@@ -0,0 +1,119 @@
+/*
+ * Test block device write threshold
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include <glib.h>
+#include <stdint.h>
+#include "block/block_int.h"
+#include "block/write-threshold.h"
+
+
+static void test_threshold_not_set_on_init(void)
+{
+ uint64_t res;
+ BlockDriverState bs;
+ memset(&bs, 0, sizeof(bs));
+
+ g_assert(!bdrv_write_threshold_is_set(&bs));
+
+ res = bdrv_write_threshold_get(&bs);
+ g_assert_cmpint(res, ==, 0);
+}
+
+static void test_threshold_set_get(void)
+{
+ uint64_t threshold = 4 * 1024 * 1024;
+ uint64_t res;
+ BlockDriverState bs;
+ memset(&bs, 0, sizeof(bs));
+
+ bdrv_write_threshold_set(&bs, threshold);
+
+ g_assert(bdrv_write_threshold_is_set(&bs));
+
+ res = bdrv_write_threshold_get(&bs);
+ g_assert_cmpint(res, ==, threshold);
+}
+
+static void test_threshold_multi_set_get(void)
+{
+ uint64_t threshold1 = 4 * 1024 * 1024;
+ uint64_t threshold2 = 15 * 1024 * 1024;
+ uint64_t res;
+ BlockDriverState bs;
+ memset(&bs, 0, sizeof(bs));
+
+ bdrv_write_threshold_set(&bs, threshold1);
+ bdrv_write_threshold_set(&bs, threshold2);
+ res = bdrv_write_threshold_get(&bs);
+ g_assert_cmpint(res, ==, threshold2);
+}
+
+static void test_threshold_not_trigger(void)
+{
+ uint64_t amount = 0;
+ uint64_t threshold = 4 * 1024 * 1024;
+ BlockDriverState bs;
+ BdrvTrackedRequest req;
+
+ memset(&bs, 0, sizeof(bs));
+ memset(&req, 0, sizeof(req));
+ req.offset = 1024;
+ req.bytes = 1024;
+
+ bdrv_write_threshold_set(&bs, threshold);
+ amount = bdrv_write_threshold_exceeded(&bs, &req);
+ g_assert_cmpuint(amount, ==, 0);
+}
+
+
+static void test_threshold_trigger(void)
+{
+ uint64_t amount = 0;
+ uint64_t threshold = 4 * 1024 * 1024;
+ BlockDriverState bs;
+ BdrvTrackedRequest req;
+
+ memset(&bs, 0, sizeof(bs));
+ memset(&req, 0, sizeof(req));
+ req.offset = (4 * 1024 * 1024) - 1024;
+ req.bytes = 2 * 1024;
+
+ bdrv_write_threshold_set(&bs, threshold);
+ amount = bdrv_write_threshold_exceeded(&bs, &req);
+ g_assert_cmpuint(amount, >=, 1024);
+}
+
+typedef struct TestStruct {
+ const char *name;
+ void (*func)(void);
+} TestStruct;
+
+
+int main(int argc, char **argv)
+{
+ size_t i;
+ TestStruct tests[] = {
+ { "/write-threshold/not-set-on-init",
+ test_threshold_not_set_on_init },
+ { "/write-threshold/set-get",
+ test_threshold_set_get },
+ { "/write-threshold/multi-set-get",
+ test_threshold_multi_set_get },
+ { "/write-threshold/not-trigger",
+ test_threshold_not_trigger },
+ { "/write-threshold/trigger",
+ test_threshold_trigger },
+ { NULL, NULL }
+ };
+
+ g_test_init(&argc, &argv, NULL);
+ for (i = 0; tests[i].name != NULL; i++) {
+ g_test_add_func(tests[i].name, tests[i].func);
+ }
+ return g_test_run();
+}
diff --git a/trace-events b/trace-events
index 907da7ed8a..57f357f7f7 100644
--- a/trace-events
+++ b/trace-events
@@ -116,6 +116,7 @@ virtio_blk_req_complete(void *req, int status) "req %p status %d"
virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
+virtio_blk_submit_multireq(void *mrb, int start, int num_reqs, uint64_t sector, size_t nsectors, bool is_write) "mrb %p start %d num_reqs %d sector %"PRIu64" nsectors %zu is_write %d"
# hw/block/dataplane/virtio-blk.c
virtio_blk_data_plane_start(void *s) "dataplane %p"