aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rw-r--r--audio/ossaudio.c1
-rw-r--r--block.c32
-rw-r--r--block/commit.c11
-rw-r--r--block/io.c306
-rw-r--r--block/linux-aio.c88
-rw-r--r--block/mirror.c55
-rw-r--r--block/null.c20
-rw-r--r--block/qcow.c14
-rw-r--r--block/qcow2-cache.c5
-rw-r--r--block/qcow2-cluster.c147
-rw-r--r--block/qcow2.c239
-rw-r--r--block/qcow2.h18
-rw-r--r--block/raw-aio.h3
-rw-r--r--block/raw-posix.c62
-rw-r--r--block/rbd.c38
-rw-r--r--block/sheepdog.c13
-rw-r--r--blockdev.c42
-rw-r--r--blockjob.c6
-rw-r--r--bsd-user/elfload.c1
-rw-r--r--bsd-user/main.c1
-rw-r--r--bsd-user/mmap.c1
-rw-r--r--bsd-user/syscall.c1
-rwxr-xr-xconfigure57
-rw-r--r--contrib/ivshmem-server/ivshmem-server.c1
-rw-r--r--exec.c1
-rw-r--r--hmp.c9
-rw-r--r--hw/block/m25p80.c2
-rw-r--r--hw/block/xen_disk.c1
-rw-r--r--hw/char/xen_console.c1
-rw-r--r--hw/display/xenfb.c1
-rw-r--r--hw/i386/kvm/pci-assign.c1
-rw-r--r--hw/i386/pc_piix.c16
-rw-r--r--hw/i386/pc_q35.c13
-rw-r--r--hw/misc/ivshmem.c2
-rw-r--r--hw/misc/pc-testdev.c3
-rw-r--r--hw/net/net_tx_pkt.c1
-rw-r--r--hw/net/net_tx_pkt.h1
-rw-r--r--hw/net/xen_nic.c1
-rw-r--r--hw/scsi/esp.c22
-rw-r--r--hw/scsi/scsi-disk.c8
-rw-r--r--hw/usb/xen-usb.c1
-rw-r--r--hw/vfio/common.c1
-rw-r--r--hw/vfio/pci.c1
-rw-r--r--hw/virtio/virtio-balloon.c4
-rw-r--r--hw/xen/xen_backend.c1
-rw-r--r--hw/xen/xen_pt_msi.c1
-rw-r--r--include/block/block.h15
-rw-r--r--include/block/block_int.h31
-rw-r--r--include/block/nbd.h19
-rw-r--r--include/hw/i386/pc.h9
-rw-r--r--include/hw/scsi/esp.h3
-rw-r--r--include/migration/migration.h28
-rw-r--r--include/qemu/osdep.h2
-rw-r--r--include/qemu/qdist.h1
-rw-r--r--include/qemu/qht.h1
-rw-r--r--include/sysemu/os-posix.h1
-rw-r--r--kvm-all.c9
-rw-r--r--linux-user/elfload.c1
-rw-r--r--linux-user/flatload.c1
-rw-r--r--linux-user/main.c1
-rw-r--r--linux-user/mmap.c1
-rw-r--r--linux-user/strace.c1
-rw-r--r--linux-user/syscall.c1
-rw-r--r--migration/exec.c4
-rw-r--r--migration/fd.c4
-rw-r--r--migration/migration.c91
-rw-r--r--migration/postcopy-ram.c1
-rw-r--r--migration/ram.c5
-rw-r--r--migration/rdma.c4
-rw-r--r--migration/socket.c6
-rw-r--r--migration/tls.c18
-rw-r--r--nbd/client.c98
-rw-r--r--nbd/server.c191
-rw-r--r--net/netmap.c1
-rw-r--r--os-posix.c1
-rw-r--r--qapi-schema.json16
-rw-r--r--qemu-char.c1
-rw-r--r--qemu-img.c2
-rw-r--r--qemu-nbd.c4
-rw-r--r--qemu-options.hx2
-rwxr-xr-xscripts/clean-includes3
-rw-r--r--target-arm/kvm.c1
-rw-r--r--target-arm/kvm32.c1
-rw-r--r--target-arm/kvm64.c1
-rw-r--r--target-i386/cpu.c276
-rw-r--r--target-i386/cpu.h8
-rw-r--r--target-i386/kvm.c10
-rw-r--r--target-mips/kvm.c1
-rw-r--r--target-ppc/kvm.c1
-rw-r--r--target-s390x/kvm.c1
-rw-r--r--tests/Makefile.include2
-rw-r--r--tests/e1000e-test.c1
-rw-r--r--tests/i440fx-test.c1
-rw-r--r--tests/ivshmem-test.c1
-rw-r--r--tests/libqtest.c2
-rw-r--r--tests/postcopy-test.c453
-rw-r--r--tests/qemu-iotests/087.out12
-rwxr-xr-xtests/qemu-iotests/0952
-rwxr-xr-xtests/qemu-iotests/155261
-rw-r--r--tests/qemu-iotests/155.out5
-rwxr-xr-xtests/qemu-iotests/156174
-rw-r--r--tests/qemu-iotests/156.out48
-rw-r--r--tests/qemu-iotests/README3
-rw-r--r--tests/qemu-iotests/group2
-rw-r--r--tests/qht-bench.c1
-rw-r--r--tests/test-qdist.c1
-rw-r--r--tests/test-qht-par.c1
-rw-r--r--tests/test-qht.c1
-rw-r--r--tests/vhost-user-bridge.c1
-rw-r--r--tests/vhost-user-test.c1
-rw-r--r--trace-events8
-rw-r--r--translate-all.c2
-rw-r--r--util/cutils.c8
-rw-r--r--util/hbitmap.c3
-rw-r--r--util/memfd.c2
-rw-r--r--util/mmap-alloc.c1
-rw-r--r--util/osdep.c4
-rw-r--r--util/oslib-posix.c1
-rw-r--r--util/qdist.c1
-rw-r--r--util/qht.c1
-rw-r--r--vl.c47
-rw-r--r--xen-hvm.c1
-rw-r--r--xen-mapcache.c1
124 files changed, 2218 insertions, 970 deletions
diff --git a/Makefile b/Makefile
index ed4032a373..53e4119c47 100644
--- a/Makefile
+++ b/Makefile
@@ -498,12 +498,12 @@ test speed: all
.PHONY: ctags
ctags:
- rm -f $@
+ rm -f tags
find "$(SRC_PATH)" -name '*.[hc]' -exec ctags --append {} +
.PHONY: TAGS
TAGS:
- rm -f $@
+ rm -f TAGS
find "$(SRC_PATH)" -name '*.[hc]' -exec etags --append {} +
cscope:
diff --git a/audio/ossaudio.c b/audio/ossaudio.c
index a0d9cda1ec..0edd7ea5fe 100644
--- a/audio/ossaudio.c
+++ b/audio/ossaudio.c
@@ -22,7 +22,6 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
-#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/soundcard.h>
#include "qemu-common.h"
diff --git a/block.c b/block.c
index f54bc25e8a..b331eb9d38 100644
--- a/block.c
+++ b/block.c
@@ -684,6 +684,10 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
/* For temporary files, unconditional cache=unsafe is fine */
qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
+
+ /* aio=native doesn't work for cache.direct=off, so disable it for the
+ * temporary snapshot */
+ *child_flags &= ~BDRV_O_NATIVE_AIO;
}
/*
@@ -937,8 +941,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
goto fail_opts;
}
- bs->request_alignment = 512;
- bs->zero_beyond_eof = true;
+ bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
@@ -2192,7 +2195,6 @@ static void bdrv_close(BlockDriverState *bs)
bs->encrypted = 0;
bs->valid_key = 0;
bs->sg = 0;
- bs->zero_beyond_eof = false;
QDECREF(bs->options);
QDECREF(bs->explicit_options);
bs->options = NULL;
@@ -2224,9 +2226,23 @@ void bdrv_close_all(void)
static void change_parent_backing_link(BlockDriverState *from,
BlockDriverState *to)
{
- BdrvChild *c, *next;
+ BdrvChild *c, *next, *to_c;
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+ if (c->role == &child_backing) {
+ /* @from is generally not allowed to be a backing file, except for
+ * when @to is the overlay. In that case, @from may not be replaced
+ * by @to as @to's backing node. */
+ QLIST_FOREACH(to_c, &to->children, next) {
+ if (to_c == c) {
+ break;
+ }
+ }
+ if (to_c) {
+ continue;
+ }
+ }
+
assert(c->role != &child_backing);
bdrv_ref(to);
bdrv_replace_child(c, to);
@@ -2275,14 +2291,6 @@ void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
change_parent_backing_link(old, new);
- /* Change backing files if a previously independent node is added to the
- * chain. For active commit, we replace top by its own (indirect) backing
- * file and don't do anything here so we don't build a loop. */
- if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
- bdrv_set_backing_hd(new, backing_bs(old));
- bdrv_set_backing_hd(old, NULL);
- }
-
bdrv_unref(old);
}
diff --git a/block/commit.c b/block/commit.c
index 8a00e1146c..444333ba65 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -236,6 +236,11 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
return;
}
+ s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
+ if (!s) {
+ return;
+ }
+
orig_base_flags = bdrv_get_flags(base);
orig_overlay_flags = bdrv_get_flags(overlay_bs);
@@ -252,16 +257,12 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
bdrv_reopen_multiple(reopen_queue, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
+ block_job_unref(&s->common);
return;
}
}
- s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
- if (!s) {
- return;
- }
-
s->base = blk_new();
blk_insert_bs(s->base, base);
diff --git a/block/io.c b/block/io.c
index fb99a7151c..ebdb9d834c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -289,15 +289,21 @@ void bdrv_drain_all(void)
bool busy = true;
BlockDriverState *bs;
BdrvNextIterator it;
+ BlockJob *job = NULL;
GSList *aio_ctxs = NULL, *ctx;
+ while ((job = block_job_next(job))) {
+ AioContext *aio_context = blk_get_aio_context(job->blk);
+
+ aio_context_acquire(aio_context);
+ block_job_pause(job);
+ aio_context_release(aio_context);
+ }
+
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- if (bs->job) {
- block_job_pause(bs->job);
- }
bdrv_parent_drained_begin(bs);
bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
@@ -340,12 +346,18 @@ void bdrv_drain_all(void)
aio_context_acquire(aio_context);
bdrv_io_unplugged_end(bs);
bdrv_parent_drained_end(bs);
- if (bs->job) {
- block_job_resume(bs->job);
- }
aio_context_release(aio_context);
}
g_slist_free(aio_ctxs);
+
+ job = NULL;
+ while ((job = block_job_next(job))) {
+ AioContext *aio_context = blk_get_aio_context(job->blk);
+
+ aio_context_acquire(aio_context);
+ block_job_resume(job);
+ aio_context_release(aio_context);
+ }
}
/**
@@ -404,12 +416,12 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
}
/**
- * Round a region to cluster boundaries
+ * Round a region to cluster boundaries (sector-based)
*/
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors)
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
{
BlockDriverInfo bdi;
@@ -424,6 +436,26 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
}
}
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes,
+ int64_t *cluster_offset,
+ unsigned int *cluster_bytes)
+{
+ BlockDriverInfo bdi;
+
+ if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+ *cluster_offset = offset;
+ *cluster_bytes = bytes;
+ } else {
+ int64_t c = bdi.cluster_size;
+ *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
+ *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+ }
+}
+
static int bdrv_get_cluster_size(BlockDriverState *bs)
{
BlockDriverInfo bdi;
@@ -680,6 +712,18 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
}
}
+int bdrv_preadv(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
+{
+ int ret;
+
+ ret = bdrv_prwv_co(bs, offset, qiov, false, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return qiov->size;
+}
+
int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
{
QEMUIOVector qiov;
@@ -687,19 +731,13 @@ int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
.iov_base = (void *)buf,
.iov_len = bytes,
};
- int ret;
if (bytes < 0) {
return -EINVAL;
}
qemu_iovec_init_external(&qiov, &iov, 1);
- ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
- if (ret < 0) {
- return ret;
- }
-
- return bytes;
+ return bdrv_preadv(bs, offset, &qiov);
}
int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
@@ -776,6 +814,8 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
int64_t sector_num;
unsigned int nb_sectors;
+ assert(!(flags & ~BDRV_REQ_MASK));
+
if (drv->bdrv_co_preadv) {
return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
}
@@ -815,6 +855,8 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
unsigned int nb_sectors;
int ret;
+ assert(!(flags & ~BDRV_REQ_MASK));
+
if (drv->bdrv_co_pwritev) {
ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
flags & bs->supported_write_flags);
@@ -861,7 +903,7 @@ emulate_flags:
}
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
@@ -873,21 +915,20 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
BlockDriver *drv = bs->drv;
struct iovec iov;
QEMUIOVector bounce_qiov;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
+ int64_t cluster_offset;
+ unsigned int cluster_bytes;
size_t skip_bytes;
int ret;
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
+ bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
- trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
- cluster_sector_num, cluster_nb_sectors);
+ trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
+ cluster_offset, cluster_bytes);
- iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_len = cluster_bytes;
iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
if (bounce_buffer == NULL) {
ret = -ENOMEM;
@@ -896,8 +937,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- ret = bdrv_driver_preadv(bs, cluster_sector_num * BDRV_SECTOR_SIZE,
- cluster_nb_sectors * BDRV_SECTOR_SIZE,
+ ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes,
&bounce_qiov, 0);
if (ret < 0) {
goto err;
@@ -905,16 +945,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
if (drv->bdrv_co_pwrite_zeroes &&
buffer_is_zero(bounce_buffer, iov.iov_len)) {
- ret = bdrv_co_do_pwrite_zeroes(bs,
- cluster_sector_num * BDRV_SECTOR_SIZE,
- cluster_nb_sectors * BDRV_SECTOR_SIZE,
- 0);
+ ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0);
} else {
/* This does not change the data on the disk, it is not necessary
* to flush even in cache=writethrough mode.
*/
- ret = bdrv_driver_pwritev(bs, cluster_sector_num * BDRV_SECTOR_SIZE,
- cluster_nb_sectors * BDRV_SECTOR_SIZE,
+ ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes,
&bounce_qiov, 0);
}
@@ -926,9 +962,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
goto err;
}
- skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
- qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
- nb_sectors * BDRV_SECTOR_SIZE);
+ skip_bytes = offset - cluster_offset;
+ qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes);
err:
qemu_vfree(bounce_buffer);
@@ -944,15 +979,15 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
+ int64_t total_bytes, max_bytes;
int ret;
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert(is_power_of_2(align));
+ assert((offset & (align - 1)) == 0);
+ assert((bytes & (align - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
+ assert(!(flags & ~BDRV_REQ_MASK));
/* Handle Copy on Read and associated serialisation */
if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -969,59 +1004,50 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (flags & BDRV_REQ_COPY_ON_READ) {
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+ unsigned int nb_sectors = end_sector - start_sector;
int pnum;
- ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
+ ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
if (ret < 0) {
goto out;
}
if (!ret || pnum != nb_sectors) {
- ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+ ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
goto out;
}
}
/* Forward the request to the BlockDriver */
- if (!bs->zero_beyond_eof) {
- ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
- } else {
- /* Read zeros after EOF */
- int64_t total_sectors, max_nb_sectors;
-
- total_sectors = bdrv_nb_sectors(bs);
- if (total_sectors < 0) {
- ret = total_sectors;
- goto out;
- }
+ total_bytes = bdrv_getlength(bs);
+ if (total_bytes < 0) {
+ ret = total_bytes;
+ goto out;
+ }
- max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
- align >> BDRV_SECTOR_BITS);
- if (nb_sectors < max_nb_sectors) {
- ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
- } else if (max_nb_sectors > 0) {
- QEMUIOVector local_qiov;
+ max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
+ if (bytes < max_bytes) {
+ ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
+ } else if (max_bytes > 0) {
+ QEMUIOVector local_qiov;
- qemu_iovec_init(&local_qiov, qiov->niov);
- qemu_iovec_concat(&local_qiov, qiov, 0,
- max_nb_sectors * BDRV_SECTOR_SIZE);
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, 0, max_bytes);
- ret = bdrv_driver_preadv(bs, offset,
- max_nb_sectors * BDRV_SECTOR_SIZE,
- &local_qiov, 0);
+ ret = bdrv_driver_preadv(bs, offset, max_bytes, &local_qiov, 0);
- qemu_iovec_destroy(&local_qiov);
- } else {
- ret = 0;
- }
+ qemu_iovec_destroy(&local_qiov);
+ } else {
+ ret = 0;
+ }
- /* Reading beyond end of file is supposed to produce zeroes */
- if (ret == 0 && total_sectors < sector_num + nb_sectors) {
- uint64_t offset = MAX(0, total_sectors - sector_num);
- uint64_t bytes = (sector_num + nb_sectors - offset) *
- BDRV_SECTOR_SIZE;
- qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
- }
+ /* Reading beyond end of file is supposed to produce zeroes */
+ if (ret == 0 && total_bytes < offset + bytes) {
+ uint64_t zero_offset = MAX(0, total_bytes - offset);
+ uint64_t zero_bytes = offset + bytes - zero_offset;
+ qemu_iovec_memset(qiov, zero_offset, 0, zero_bytes);
}
out:
@@ -1038,8 +1064,7 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
@@ -1235,13 +1260,12 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
bool waited;
int ret;
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
+ assert(!(flags & ~BDRV_REQ_MASK));
waited = wait_serialising_requests(req);
assert(!waited || !req->serialising);
@@ -1263,22 +1287,21 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
- ret = bdrv_co_do_pwrite_zeroes(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, flags);
+ ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
} else {
bdrv_debug_event(bs, BLKDBG_PWRITEV);
ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
- bdrv_set_dirty(bs, sector_num, nb_sectors);
+ bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
if (bs->wr_highest_offset < offset + bytes) {
bs->wr_highest_offset = offset + bytes;
}
if (ret >= 0) {
- bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
+ bs->total_sectors = MAX(bs->total_sectors, end_sector);
}
return ret;
@@ -1293,7 +1316,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
struct iovec iov;
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->request_alignment;
unsigned int head_padding_bytes, tail_padding_bytes;
int ret = 0;
@@ -1380,8 +1403,7 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs,
BdrvRequestFlags flags)
{
BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
@@ -1824,6 +1846,62 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
}
+typedef struct BdrvVmstateCo {
+ BlockDriverState *bs;
+ QEMUIOVector *qiov;
+ int64_t pos;
+ bool is_read;
+ int ret;
+} BdrvVmstateCo;
+
+static int coroutine_fn
+bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ } else if (drv->bdrv_load_vmstate) {
+ return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos)
+ : drv->bdrv_save_vmstate(bs, qiov, pos);
+ } else if (bs->file) {
+ return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
+ }
+
+ return -ENOTSUP;
+}
+
+static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
+{
+ BdrvVmstateCo *co = opaque;
+ co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
+}
+
+static inline int
+bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read)
+{
+ if (qemu_in_coroutine()) {
+ return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
+ } else {
+ BdrvVmstateCo data = {
+ .bs = bs,
+ .qiov = qiov,
+ .pos = pos,
+ .is_read = is_read,
+ .ret = -EINPROGRESS,
+ };
+ Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry);
+
+ qemu_coroutine_enter(co, &data);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(bdrv_get_aio_context(bs), true);
+ }
+ return data.ret;
+ }
+}
+
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size)
{
@@ -1832,37 +1910,45 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
.iov_base = (void *) buf,
.iov_len = size,
};
+ int ret;
qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_writev_vmstate(bs, &qiov, pos);
+
+ ret = bdrv_writev_vmstate(bs, &qiov, pos);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return size;
}
int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
- BlockDriver *drv = bs->drv;
+ return bdrv_rw_vmstate(bs, qiov, pos, false);
+}
- if (!drv) {
- return -ENOMEDIUM;
- } else if (drv->bdrv_save_vmstate) {
- return drv->bdrv_save_vmstate(bs, qiov, pos);
- } else if (bs->file) {
- return bdrv_writev_vmstate(bs->file->bs, qiov, pos);
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = size,
+ };
+ int ret;
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ ret = bdrv_readv_vmstate(bs, &qiov, pos);
+ if (ret < 0) {
+ return ret;
}
- return -ENOTSUP;
+ return size;
}
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
+int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (drv->bdrv_load_vmstate)
- return drv->bdrv_load_vmstate(bs, buf, pos, size);
- if (bs->file)
- return bdrv_load_vmstate(bs->file->bs, buf, pos, size);
- return -ENOTSUP;
+ return bdrv_rw_vmstate(bs, qiov, pos, true);
}
/**************************************************************/
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 90ec98ee23..e468960146 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -11,8 +11,10 @@
#include "qemu-common.h"
#include "block/aio.h"
#include "qemu/queue.h"
+#include "block/block.h"
#include "block/raw-aio.h"
#include "qemu/event_notifier.h"
+#include "qemu/coroutine.h"
#include <libaio.h>
@@ -30,6 +32,7 @@
struct qemu_laiocb {
BlockAIOCB common;
+ Coroutine *co;
LinuxAioState *ctx;
struct iocb iocb;
ssize_t ret;
@@ -88,9 +91,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
}
}
}
- laiocb->common.cb(laiocb->common.opaque, ret);
- qemu_aio_unref(laiocb);
+ laiocb->ret = ret;
+ if (laiocb->co) {
+ qemu_coroutine_enter(laiocb->co, NULL);
+ } else {
+ laiocb->common.cb(laiocb->common.opaque, ret);
+ qemu_aio_unref(laiocb);
+ }
}
/* The completion BH fetches completed I/O requests and invokes their
@@ -141,6 +149,8 @@ static void qemu_laio_completion_bh(void *opaque)
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
+
+ qemu_bh_cancel(s->completion_bh);
}
static void qemu_laio_completion_cb(EventNotifier *e)
@@ -148,7 +158,7 @@ static void qemu_laio_completion_cb(EventNotifier *e)
LinuxAioState *s = container_of(e, LinuxAioState, e);
if (event_notifier_test_and_clear(&s->e)) {
- qemu_bh_schedule(s->completion_bh);
+ qemu_laio_completion_bh(s);
}
}
@@ -230,22 +240,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
}
}
-BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
+ int type)
{
- struct qemu_laiocb *laiocb;
- struct iocb *iocbs;
- off_t offset = sector_num * 512;
-
- laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
- laiocb->nbytes = nb_sectors * 512;
- laiocb->ctx = s;
- laiocb->ret = -EINPROGRESS;
- laiocb->is_read = (type == QEMU_AIO_READ);
- laiocb->qiov = qiov;
-
- iocbs = &laiocb->iocb;
+ LinuxAioState *s = laiocb->ctx;
+ struct iocb *iocbs = &laiocb->iocb;
+ QEMUIOVector *qiov = laiocb->qiov;
switch (type) {
case QEMU_AIO_WRITE:
@@ -258,7 +258,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
default:
fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
__func__, type);
- goto out_free_aiocb;
+ return -EIO;
}
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
@@ -268,11 +268,53 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
(!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
ioq_submit(s);
}
- return &laiocb->common;
-out_free_aiocb:
- qemu_aio_unref(laiocb);
- return NULL;
+ return 0;
+}
+
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ uint64_t offset, QEMUIOVector *qiov, int type)
+{
+ int ret;
+ struct qemu_laiocb laiocb = {
+ .co = qemu_coroutine_self(),
+ .nbytes = qiov->size,
+ .ctx = s,
+ .is_read = (type == QEMU_AIO_READ),
+ .qiov = qiov,
+ };
+
+ ret = laio_do_submit(fd, &laiocb, offset, type);
+ if (ret < 0) {
+ return ret;
+ }
+
+ qemu_coroutine_yield();
+ return laiocb.ret;
+}
+
+BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque, int type)
+{
+ struct qemu_laiocb *laiocb;
+ off_t offset = sector_num * BDRV_SECTOR_SIZE;
+ int ret;
+
+ laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
+ laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+ laiocb->ctx = s;
+ laiocb->ret = -EINPROGRESS;
+ laiocb->is_read = (type == QEMU_AIO_READ);
+ laiocb->qiov = qiov;
+
+ ret = laio_do_submit(fd, laiocb, offset, type);
+ if (ret < 0) {
+ qemu_aio_unref(laiocb);
+ return NULL;
+ }
+
+ return &laiocb->common;
}
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
diff --git a/block/mirror.c b/block/mirror.c
index 80fd3c7469..075384a9cf 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -44,6 +44,7 @@ typedef struct MirrorBlockJob {
/* Used to block operations on the drive-mirror-replace target */
Error *replace_blocker;
bool is_none_mode;
+ BlockMirrorBackingMode backing_mode;
BlockdevOnError on_source_error, on_target_error;
bool synced;
bool should_complete;
@@ -157,8 +158,7 @@ static void mirror_read_complete(void *opaque, int ret)
return;
}
blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
- op->nb_sectors * BDRV_SECTOR_SIZE,
- mirror_write_complete, op);
+ 0, mirror_write_complete, op);
}
static inline void mirror_clip_sectors(MirrorBlockJob *s,
@@ -186,8 +186,9 @@ static int mirror_cow_align(MirrorBlockJob *s,
need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
s->cow_bitmap);
if (need_cow) {
- bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors,
- &align_sector_num, &align_nb_sectors);
+ bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num,
+ *nb_sectors, &align_sector_num,
+ &align_nb_sectors);
}
if (align_nb_sectors > max_sectors) {
@@ -274,8 +275,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
s->sectors_in_flight += nb_sectors;
trace_mirror_one_iteration(s, sector_num, nb_sectors);
- blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov,
- nb_sectors * BDRV_SECTOR_SIZE,
+ blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, 0,
mirror_read_complete, op);
return ret;
}
@@ -386,8 +386,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
} else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
int64_t target_sector_num;
int target_nb_sectors;
- bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors,
- &target_sector_num, &target_nb_sectors);
+ bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num,
+ io_sectors, &target_sector_num,
+ &target_nb_sectors);
if (target_sector_num == sector_num &&
target_nb_sectors == io_sectors) {
mirror_method = ret & BDRV_BLOCK_ZERO ?
@@ -742,20 +743,26 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
- Error *local_err = NULL;
- int ret;
+ BlockDriverState *src, *target;
+
+ src = blk_bs(job->blk);
+ target = blk_bs(s->target);
- ret = bdrv_open_backing_file(blk_bs(s->target), NULL, "backing",
- &local_err);
- if (ret < 0) {
- error_propagate(errp, local_err);
- return;
- }
if (!s->synced) {
error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
return;
}
+ if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
+ int ret;
+
+ assert(!target->backing);
+ ret = bdrv_open_backing_file(target, NULL, "backing", errp);
+ if (ret < 0) {
+ return;
+ }
+ }
+
/* check the target bs is not blocked and block all operations on it */
if (s->replaces) {
AioContext *replace_aio_context;
@@ -777,6 +784,13 @@ static void mirror_complete(BlockJob *job, Error **errp)
aio_context_release(replace_aio_context);
}
+ if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+ BlockDriverState *backing = s->is_none_mode ? src : s->base;
+ if (backing_bs(target) != backing) {
+ bdrv_set_backing_hd(target, backing);
+ }
+ }
+
s->should_complete = true;
block_job_enter(&s->common);
}
@@ -799,6 +813,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
const char *replaces,
int64_t speed, uint32_t granularity,
int64_t buf_size,
+ BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap,
@@ -836,6 +851,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
s->is_none_mode = is_none_mode;
+ s->backing_mode = backing_mode;
s->base = base;
s->granularity = granularity;
s->buf_size = ROUND_UP(buf_size, granularity);
@@ -859,7 +875,8 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
const char *replaces,
int64_t speed, uint32_t granularity, int64_t buf_size,
- MirrorSyncMode mode, BlockdevOnError on_source_error,
+ MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+ BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap,
BlockCompletionFunc *cb,
@@ -875,7 +892,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
mirror_start_job(bs, target, replaces,
- speed, granularity, buf_size,
+ speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, cb, opaque, errp,
&mirror_job_driver, is_none_mode, base);
}
@@ -922,7 +939,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
}
}
- mirror_start_job(bs, base, NULL, speed, 0, 0,
+ mirror_start_job(bs, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, false, cb, opaque, &local_err,
&commit_active_job_driver, false, base);
if (local_err) {
diff --git a/block/null.c b/block/null.c
index 396500babd..b511010ba5 100644
--- a/block/null.c
+++ b/block/null.c
@@ -12,6 +12,8 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
#include "block/block_int.h"
#define NULL_OPT_LATENCY "latency-ns"
@@ -223,6 +225,20 @@ static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs,
}
}
+static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
+{
+ QINCREF(opts);
+ qdict_del(opts, "filename");
+
+ if (!qdict_size(opts)) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s://",
+ bs->drv->format_name);
+ }
+
+ qdict_put(opts, "driver", qstring_from_str(bs->drv->format_name));
+ bs->full_open_options = opts;
+}
+
static BlockDriver bdrv_null_co = {
.format_name = "null-co",
.protocol_name = "null-co",
@@ -238,6 +254,8 @@ static BlockDriver bdrv_null_co = {
.bdrv_reopen_prepare = null_reopen_prepare,
.bdrv_co_get_block_status = null_co_get_block_status,
+
+ .bdrv_refresh_filename = null_refresh_filename,
};
static BlockDriver bdrv_null_aio = {
@@ -255,6 +273,8 @@ static BlockDriver bdrv_null_aio = {
.bdrv_reopen_prepare = null_reopen_prepare,
.bdrv_co_get_block_status = null_co_get_block_status,
+
+ .bdrv_refresh_filename = null_refresh_filename,
};
static void bdrv_null_init(void)
diff --git a/block/qcow.c b/block/qcow.c
index c5cf813469..312af52816 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -162,10 +162,16 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
if (s->crypt_method_header) {
if (bdrv_uses_whitelist() &&
s->crypt_method_header == QCOW_CRYPT_AES) {
- error_report("qcow built-in AES encryption is deprecated");
- error_printf("Support for it will be removed in a future release.\n"
- "You can use 'qemu-img convert' to switch to an\n"
- "unencrypted qcow image, or a LUKS raw image.\n");
+ error_setg(errp,
+ "Use of AES-CBC encrypted qcow images is no longer "
+ "supported in system emulators");
+ error_append_hint(errp,
+ "You can use 'qemu-img convert' to convert your "
+ "image to an alternative supported format, such "
+ "as unencrypted qcow, or raw with the LUKS "
+ "format instead.\n");
+ ret = -ENOSYS;
+ goto fail;
}
bs->encrypted = 1;
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 208a060421..580631c3d8 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -24,11 +24,6 @@
/* Needed for CONFIG_MADVISE */
#include "qemu/osdep.h"
-
-#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
-#include <sys/mman.h>
-#endif
-
#include "block/block_int.h"
#include "qemu-common.h"
#include "qcow2.h"
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index b04bfafd65..893ddf6798 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -390,22 +390,18 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
return 0;
}
-static int coroutine_fn copy_sectors(BlockDriverState *bs,
- uint64_t start_sect,
- uint64_t cluster_offset,
- int n_start, int n_end)
+static int coroutine_fn do_perform_cow(BlockDriverState *bs,
+ uint64_t src_cluster_offset,
+ uint64_t cluster_offset,
+ int offset_in_cluster,
+ int bytes)
{
BDRVQcow2State *s = bs->opaque;
QEMUIOVector qiov;
struct iovec iov;
- int n, ret;
-
- n = n_end - n_start;
- if (n <= 0) {
- return 0;
- }
+ int ret;
- iov.iov_len = n * BDRV_SECTOR_SIZE;
+ iov.iov_len = bytes;
iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
if (iov.iov_base == NULL) {
return -ENOMEM;
@@ -424,17 +420,21 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
* interface. This avoids double I/O throttling and request tracking,
* which can lead to deadlock when block layer copy-on-read is enabled.
*/
- ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
+ ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
+ bytes, &qiov, 0);
if (ret < 0) {
goto out;
}
if (bs->encrypted) {
Error *err = NULL;
+ int64_t sector = (cluster_offset + offset_in_cluster)
+ >> BDRV_SECTOR_BITS;
assert(s->cipher);
- if (qcow2_encrypt_sectors(s, start_sect + n_start,
- iov.iov_base, iov.iov_base, n,
- true, &err) < 0) {
+ assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
+ assert((bytes & ~BDRV_SECTOR_MASK) == 0);
+ if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base,
+ bytes >> BDRV_SECTOR_BITS, true, &err) < 0) {
ret = -EIO;
error_free(err);
goto out;
@@ -442,14 +442,14 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
}
ret = qcow2_pre_write_overlap_check(bs, 0,
- cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE);
+ cluster_offset + offset_in_cluster, bytes);
if (ret < 0) {
goto out;
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n,
- &qiov);
+ ret = bdrv_co_pwritev(bs->file->bs, cluster_offset + offset_in_cluster,
+ bytes, &qiov, 0);
if (ret < 0) {
goto out;
}
@@ -464,47 +464,44 @@ out:
/*
* get_cluster_offset
*
- * For a given offset of the disk image, find the cluster offset in
- * qcow2 file. The offset is stored in *cluster_offset.
+ * For a given offset of the virtual disk, find the cluster type and offset in
+ * the qcow2 file. The offset is stored in *cluster_offset.
*
- * on entry, *num is the number of contiguous sectors we'd like to
- * access following offset.
+ * On entry, *bytes is the maximum number of contiguous bytes starting at
+ * offset that we are interested in.
*
- * on exit, *num is the number of contiguous sectors we can read.
+ * On exit, *bytes is the number of bytes starting at offset that have the same
+ * cluster type and (if applicable) are stored contiguously in the image file.
+ * Compressed clusters are always returned one by one.
*
* Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
* cases.
*/
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset)
+ unsigned int *bytes, uint64_t *cluster_offset)
{
BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
uint64_t l1_index, l2_offset, *l2_table;
int l1_bits, c;
- unsigned int index_in_cluster, nb_clusters;
- uint64_t nb_available, nb_needed;
+ unsigned int offset_in_cluster, nb_clusters;
+ uint64_t bytes_available, bytes_needed;
int ret;
- index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
- nb_needed = *num + index_in_cluster;
+ offset_in_cluster = offset_into_cluster(s, offset);
+ bytes_needed = (uint64_t) *bytes + offset_in_cluster;
l1_bits = s->l2_bits + s->cluster_bits;
- /* compute how many bytes there are between the offset and
- * the end of the l1 entry
- */
-
- nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
+ /* compute how many bytes there are between the start of the cluster
+ * containing offset and the end of the l1 entry */
+ bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1))
+ + offset_in_cluster;
- /* compute the number of available sectors */
-
- nb_available = (nb_available >> 9) + index_in_cluster;
-
- if (nb_needed > nb_available) {
- nb_needed = nb_available;
+ if (bytes_needed > bytes_available) {
+ bytes_needed = bytes_available;
}
- assert(nb_needed <= INT_MAX);
+ assert(bytes_needed <= INT_MAX);
*cluster_offset = 0;
@@ -542,7 +539,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
*cluster_offset = be64_to_cpu(l2_table[l2_index]);
/* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */
- nb_clusters = size_to_clusters(s, nb_needed << 9);
+ nb_clusters = size_to_clusters(s, bytes_needed);
ret = qcow2_get_cluster_type(*cluster_offset);
switch (ret) {
@@ -589,13 +586,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- nb_available = (c * s->cluster_sectors);
+ bytes_available = (c * s->cluster_size);
out:
- if (nb_available > nb_needed)
- nb_available = nb_needed;
+ if (bytes_available > bytes_needed) {
+ bytes_available = bytes_needed;
+ }
- *num = nb_available - index_in_cluster;
+ *bytes = bytes_available - offset_in_cluster;
return ret;
@@ -741,14 +739,12 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
BDRVQcow2State *s = bs->opaque;
int ret;
- if (r->nb_sectors == 0) {
+ if (r->nb_bytes == 0) {
return 0;
}
qemu_co_mutex_unlock(&s->lock);
- ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
- r->offset / BDRV_SECTOR_SIZE,
- r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
+ ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
@@ -810,13 +806,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
assert(l2_index + m->nb_clusters <= s->l2_size);
for (i = 0; i < m->nb_clusters; i++) {
/* if two concurrent writes happen to the same unallocated cluster
- * each write allocates separate cluster and writes data concurrently.
- * The first one to complete updates l2 table with pointer to its
- * cluster the second one has to do RMW (which is done above by
- * copy_sectors()), update l2 table with its cluster pointer and free
- * old cluster. This is what this loop does */
- if(l2_table[l2_index + i] != 0)
+ * each write allocates separate cluster and writes data concurrently.
+ * The first one to complete updates l2 table with pointer to its
+ * cluster the second one has to do RMW (which is done above by
+ * perform_cow()), update l2 table with its cluster pointer and free
+ * old cluster. This is what this loop does */
+ if (l2_table[l2_index + i] != 0) {
old_cluster[j++] = l2_table[l2_index + i];
+ }
l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
(i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
@@ -1198,25 +1195,20 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
/*
* Save info needed for meta data update.
*
- * requested_sectors: Number of sectors from the start of the first
+ * requested_bytes: Number of bytes from the start of the first
* newly allocated cluster to the end of the (possibly shortened
* before) write request.
*
- * avail_sectors: Number of sectors from the start of the first
+ * avail_bytes: Number of bytes from the start of the first
* newly allocated to the end of the last newly allocated cluster.
*
- * nb_sectors: The number of sectors from the start of the first
+ * nb_bytes: The number of bytes from the start of the first
* newly allocated cluster to the end of the area that the write
* request actually writes to (excluding COW at the end)
*/
- int requested_sectors =
- (*bytes + offset_into_cluster(s, guest_offset))
- >> BDRV_SECTOR_BITS;
- int avail_sectors = nb_clusters
- << (s->cluster_bits - BDRV_SECTOR_BITS);
- int alloc_n_start = offset_into_cluster(s, guest_offset)
- >> BDRV_SECTOR_BITS;
- int nb_sectors = MIN(requested_sectors, avail_sectors);
+ uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset);
+ int avail_bytes = MIN(INT_MAX, nb_clusters << s->cluster_bits);
+ int nb_bytes = MIN(requested_bytes, avail_bytes);
QCowL2Meta *old_m = *m;
*m = g_malloc0(sizeof(**m));
@@ -1227,23 +1219,21 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
.alloc_offset = alloc_cluster_offset,
.offset = start_of_cluster(s, guest_offset),
.nb_clusters = nb_clusters,
- .nb_available = nb_sectors,
.cow_start = {
.offset = 0,
- .nb_sectors = alloc_n_start,
+ .nb_bytes = offset_into_cluster(s, guest_offset),
},
.cow_end = {
- .offset = nb_sectors * BDRV_SECTOR_SIZE,
- .nb_sectors = avail_sectors - nb_sectors,
+ .offset = nb_bytes,
+ .nb_bytes = avail_bytes - nb_bytes,
},
};
qemu_co_queue_init(&(*m)->dependent_requests);
QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
*host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
- *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
- - offset_into_cluster(s, guest_offset));
+ *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset));
assert(*bytes != 0);
return 1;
@@ -1275,7 +1265,8 @@ fail:
* Return 0 on success and -errno in error cases
*/
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *host_offset, QCowL2Meta **m)
+ unsigned int *bytes, uint64_t *host_offset,
+ QCowL2Meta **m)
{
BDRVQcow2State *s = bs->opaque;
uint64_t start, remaining;
@@ -1283,13 +1274,11 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
uint64_t cur_bytes;
int ret;
- trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num);
-
- assert((offset & ~BDRV_SECTOR_MASK) == 0);
+ trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes);
again:
start = offset;
- remaining = (uint64_t)*num << BDRV_SECTOR_BITS;
+ remaining = *bytes;
cluster_offset = 0;
*host_offset = 0;
cur_bytes = 0;
@@ -1375,8 +1364,8 @@ again:
}
}
- *num -= remaining >> BDRV_SECTOR_BITS;
- assert(*num > 0);
+ *bytes -= remaining;
+ assert(*bytes > 0);
assert(*host_offset != 0);
return 0;
diff --git a/block/qcow2.c b/block/qcow2.c
index 6f5fb810e4..4718f8250e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -968,13 +968,22 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (s->crypt_method_header) {
if (bdrv_uses_whitelist() &&
s->crypt_method_header == QCOW_CRYPT_AES) {
- error_report("qcow2 built-in AES encryption is deprecated");
- error_printf("Support for it will be removed in a future release.\n"
- "You can use 'qemu-img convert' to switch to an\n"
- "unencrypted qcow2 image, or a LUKS raw image.\n");
+ error_setg(errp,
+ "Use of AES-CBC encrypted qcow2 images is no longer "
+ "supported in system emulators");
+ error_append_hint(errp,
+ "You can use 'qemu-img convert' to convert your "
+ "image to an alternative supported format, such "
+ "as unencrypted qcow2, or raw with the LUKS "
+ "format instead.\n");
+ ret = -ENOSYS;
+ goto fail;
}
bs->encrypted = 1;
+
+ /* Encryption works on a sector granularity */
+ bs->request_alignment = BDRV_SECTOR_SIZE;
}
s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@@ -1331,16 +1340,20 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
BDRVQcow2State *s = bs->opaque;
uint64_t cluster_offset;
int index_in_cluster, ret;
+ unsigned int bytes;
int64_t status = 0;
- *pnum = nb_sectors;
+ bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
qemu_co_mutex_lock(&s->lock);
- ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes,
+ &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
return ret;
}
+ *pnum = bytes >> BDRV_SECTOR_BITS;
+
if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
!s->cipher) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
@@ -1358,28 +1371,34 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
/* handle reading after the end of the backing file */
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors)
+ int64_t offset, int bytes)
{
+ uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
int n1;
- if ((sector_num + nb_sectors) <= bs->total_sectors)
- return nb_sectors;
- if (sector_num >= bs->total_sectors)
+
+ if ((offset + bytes) <= bs_size) {
+ return bytes;
+ }
+
+ if (offset >= bs_size) {
n1 = 0;
- else
- n1 = bs->total_sectors - sector_num;
+ } else {
+ n1 = bs_size - offset;
+ }
- qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
+ qemu_iovec_memset(qiov, n1, 0, bytes - n1);
return n1;
}
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
- int remaining_sectors, QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
BDRVQcow2State *s = bs->opaque;
- int index_in_cluster, n1;
+ int offset_in_cluster, n1;
int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
+ unsigned int cur_bytes; /* number of bytes in current iteration */
uint64_t cluster_offset = 0;
uint64_t bytes_done = 0;
QEMUIOVector hd_qiov;
@@ -1389,26 +1408,24 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
qemu_co_mutex_lock(&s->lock);
- while (remaining_sectors != 0) {
+ while (bytes != 0) {
/* prepare next request */
- cur_nr_sectors = remaining_sectors;
+ cur_bytes = MIN(bytes, INT_MAX);
if (s->cipher) {
- cur_nr_sectors = MIN(cur_nr_sectors,
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ cur_bytes = MIN(cur_bytes,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
}
- ret = qcow2_get_cluster_offset(bs, sector_num << 9,
- &cur_nr_sectors, &cluster_offset);
+ ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
if (ret < 0) {
goto fail;
}
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ offset_in_cluster = offset_into_cluster(s, offset);
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
switch (ret) {
case QCOW2_CLUSTER_UNALLOCATED:
@@ -1416,18 +1433,17 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
if (bs->backing) {
/* read from the base image */
n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
- sector_num, cur_nr_sectors);
+ offset, cur_bytes);
if (n1 > 0) {
QEMUIOVector local_qiov;
qemu_iovec_init(&local_qiov, hd_qiov.niov);
- qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
- n1 * BDRV_SECTOR_SIZE);
+ qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing->bs, sector_num,
- n1, &local_qiov);
+ ret = bdrv_co_preadv(bs->backing->bs, offset, n1,
+ &local_qiov, 0);
qemu_co_mutex_lock(&s->lock);
qemu_iovec_destroy(&local_qiov);
@@ -1438,12 +1454,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
}
} else {
/* Note: in this case, no need to wait */
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
}
break;
case QCOW2_CLUSTER_ZERO:
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
break;
case QCOW2_CLUSTER_COMPRESSED:
@@ -1454,8 +1470,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
}
qemu_iovec_from_buf(&hd_qiov, 0,
- s->cluster_cache + index_in_cluster * 512,
- 512 * cur_nr_sectors);
+ s->cluster_cache + offset_in_cluster,
+ cur_bytes);
break;
case QCOW2_CLUSTER_NORMAL:
@@ -1482,34 +1498,34 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
}
}
- assert(cur_nr_sectors <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- 512 * cur_nr_sectors);
+ qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file->bs,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
+ ret = bdrv_co_preadv(bs->file->bs,
+ cluster_offset + offset_in_cluster,
+ cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
if (bs->encrypted) {
assert(s->cipher);
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Error *err = NULL;
- if (qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors, false,
- &err) < 0) {
+ if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+ cluster_data, cluster_data,
+ cur_bytes >> BDRV_SECTOR_BITS,
+ false, &err) < 0) {
error_free(err);
ret = -EIO;
goto fail;
}
- qemu_iovec_from_buf(qiov, bytes_done,
- cluster_data, 512 * cur_nr_sectors);
+ qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
}
break;
@@ -1519,9 +1535,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
goto fail;
}
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
+ bytes -= cur_bytes;
+ offset += cur_bytes;
+ bytes_done += cur_bytes;
}
ret = 0;
@@ -1534,23 +1550,21 @@ fail:
return ret;
}
-static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
- int64_t sector_num,
- int remaining_sectors,
- QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
BDRVQcow2State *s = bs->opaque;
- int index_in_cluster;
+ int offset_in_cluster;
int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
+ unsigned int cur_bytes; /* number of sectors in current iteration */
uint64_t cluster_offset;
QEMUIOVector hd_qiov;
uint64_t bytes_done = 0;
uint8_t *cluster_data = NULL;
QCowL2Meta *l2meta = NULL;
- trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
- remaining_sectors);
+ trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
qemu_iovec_init(&hd_qiov, qiov->niov);
@@ -1558,22 +1572,21 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
qemu_co_mutex_lock(&s->lock);
- while (remaining_sectors != 0) {
+ while (bytes != 0) {
l2meta = NULL;
trace_qcow2_writev_start_part(qemu_coroutine_self());
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- cur_nr_sectors = remaining_sectors;
- if (bs->encrypted &&
- cur_nr_sectors >
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) {
- cur_nr_sectors =
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster;
+ offset_in_cluster = offset_into_cluster(s, offset);
+ cur_bytes = MIN(bytes, INT_MAX);
+ if (bs->encrypted) {
+ cur_bytes = MIN(cur_bytes,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+ - offset_in_cluster);
}
- ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
- &cur_nr_sectors, &cluster_offset, &l2meta);
+ ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
+ &cluster_offset, &l2meta);
if (ret < 0) {
goto fail;
}
@@ -1581,8 +1594,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
assert((cluster_offset & 511) == 0);
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
if (bs->encrypted) {
Error *err = NULL;
@@ -1601,8 +1613,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
- if (qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors,
+ if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+ cluster_data, cluster_data,
+ cur_bytes >>BDRV_SECTOR_BITS,
true, &err) < 0) {
error_free(err);
ret = -EIO;
@@ -1610,13 +1623,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
}
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- cur_nr_sectors * 512);
+ qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
}
ret = qcow2_pre_write_overlap_check(bs, 0,
- cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE,
- cur_nr_sectors * BDRV_SECTOR_SIZE);
+ cluster_offset + offset_in_cluster, cur_bytes);
if (ret < 0) {
goto fail;
}
@@ -1624,10 +1635,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
qemu_co_mutex_unlock(&s->lock);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
- (cluster_offset >> 9) + index_in_cluster);
- ret = bdrv_co_writev(bs->file->bs,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
+ cluster_offset + offset_in_cluster);
+ ret = bdrv_co_pwritev(bs->file->bs,
+ cluster_offset + offset_in_cluster,
+ cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
@@ -1653,10 +1664,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
l2meta = next;
}
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
- trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
+ bytes -= cur_bytes;
+ offset += cur_bytes;
+ bytes_done += cur_bytes;
+ trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
}
ret = 0;
@@ -1998,19 +2009,19 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
static int preallocate(BlockDriverState *bs)
{
- uint64_t nb_sectors;
+ uint64_t bytes;
uint64_t offset;
uint64_t host_offset = 0;
- int num;
+ unsigned int cur_bytes;
int ret;
QCowL2Meta *meta;
- nb_sectors = bdrv_nb_sectors(bs);
+ bytes = bdrv_getlength(bs);
offset = 0;
- while (nb_sectors) {
- num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS);
- ret = qcow2_alloc_cluster_offset(bs, offset, &num,
+ while (bytes) {
+ cur_bytes = MIN(bytes, INT_MAX);
+ ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
&host_offset, &meta);
if (ret < 0) {
return ret;
@@ -2036,8 +2047,8 @@ static int preallocate(BlockDriverState *bs)
/* TODO Preallocate data if requested */
- nb_sectors -= num;
- offset += num << BDRV_SECTOR_BITS;
+ bytes -= cur_bytes;
+ offset += cur_bytes;
}
/*
@@ -2046,11 +2057,9 @@ static int preallocate(BlockDriverState *bs)
* EOF). Extend the image to the last allocated sector.
*/
if (host_offset != 0) {
- uint8_t buf[BDRV_SECTOR_SIZE];
- memset(buf, 0, BDRV_SECTOR_SIZE);
- ret = bdrv_write(bs->file->bs,
- (host_offset >> BDRV_SECTOR_BITS) + num - 1,
- buf, 1);
+ uint8_t data = 0;
+ ret = bdrv_pwrite(bs->file->bs, (host_offset + cur_bytes) - 1,
+ &data, 1);
if (ret < 0) {
return ret;
}
@@ -2435,7 +2444,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
if (head || tail) {
int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
uint64_t off;
- int nr;
+ unsigned int nr;
assert(head + count <= s->cluster_size);
@@ -2452,7 +2461,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
/* We can have new write after previous check */
offset = cl_start << BDRV_SECTOR_BITS;
count = s->cluster_size;
- nr = s->cluster_sectors;
+ nr = s->cluster_size;
ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
qemu_co_mutex_unlock(&s->lock);
@@ -2900,36 +2909,20 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t pos)
{
BDRVQcow2State *s = bs->opaque;
- int64_t total_sectors = bs->total_sectors;
- bool zero_beyond_eof = bs->zero_beyond_eof;
- int ret;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
- bs->zero_beyond_eof = false;
- ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
- bs->zero_beyond_eof = zero_beyond_eof;
-
- /* bdrv_co_do_writev will have increased the total_sectors value to include
- * the VM state - the VM state is however not an actual part of the block
- * device, therefore, we need to restore the old value. */
- bs->total_sectors = total_sectors;
-
- return ret;
+ return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0);
}
-static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
+static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos)
{
BDRVQcow2State *s = bs->opaque;
- bool zero_beyond_eof = bs->zero_beyond_eof;
- int ret;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
- bs->zero_beyond_eof = false;
- ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
- bs->zero_beyond_eof = zero_beyond_eof;
-
- return ret;
+ return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0);
}
/*
@@ -3368,8 +3361,8 @@ BlockDriver bdrv_qcow2 = {
.bdrv_co_get_block_status = qcow2_co_get_block_status,
.bdrv_set_key = qcow2_set_key,
- .bdrv_co_readv = qcow2_co_readv,
- .bdrv_co_writev = qcow2_co_writev,
+ .bdrv_co_preadv = qcow2_co_preadv,
+ .bdrv_co_pwritev = qcow2_co_pwritev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
diff --git a/block/qcow2.h b/block/qcow2.h
index 7db9795d44..b36a7bf8ad 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -302,8 +302,8 @@ typedef struct Qcow2COWRegion {
*/
uint64_t offset;
- /** Number of sectors to copy */
- int nb_sectors;
+ /** Number of bytes to copy */
+ int nb_bytes;
} Qcow2COWRegion;
/**
@@ -318,12 +318,6 @@ typedef struct QCowL2Meta
/** Host offset of the first newly allocated cluster */
uint64_t alloc_offset;
- /**
- * Number of sectors from the start of the first allocated cluster to
- * the end of the (possibly shortened) request
- */
- int nb_available;
-
/** Number of newly allocated clusters */
int nb_clusters;
@@ -471,8 +465,7 @@ static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
{
- return m->offset + m->cow_end.offset
- + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
+ return m->offset + m->cow_end.offset + m->cow_end.nb_bytes;
}
static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
@@ -544,9 +537,10 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
int nb_sectors, bool enc, Error **errp);
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset);
+ unsigned int *bytes, uint64_t *cluster_offset);
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *host_offset, QCowL2Meta **m);
+ unsigned int *bytes, uint64_t *host_offset,
+ QCowL2Meta **m);
uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
uint64_t offset,
int compressed_size);
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 714714e016..a4cdbbf1b7 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -15,6 +15,7 @@
#ifndef QEMU_RAW_AIO_H
#define QEMU_RAW_AIO_H
+#include "qemu/coroutine.h"
#include "qemu/iov.h"
/* AIO request types */
@@ -38,6 +39,8 @@
typedef struct LinuxAioState LinuxAioState;
LinuxAioState *laio_init(void);
void laio_cleanup(LinuxAioState *s);
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ uint64_t offset, QEMUIOVector *qiov, int type);
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index ce2e20f203..aacf13203f 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1325,14 +1325,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
-static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
if (fd_open(bs) < 0)
- return NULL;
+ return -EIO;
/*
* Check if the underlying device requires requests to be aligned,
@@ -1345,14 +1344,28 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {
- return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
- nb_sectors, cb, opaque, type);
+ assert(qiov->size == bytes);
+ return laio_co_submit(bs, s->aio_ctx, s->fd, offset, qiov, type);
#endif
}
}
- return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
- cb, opaque, type);
+ return paio_submit_co(bs, s->fd, offset, qiov, bytes, type);
+}
+
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
+{
+ return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
+}
+
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
+{
+ assert(flags == 0);
+ return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
}
static void raw_aio_plug(BlockDriverState *bs)
@@ -1375,22 +1388,6 @@ static void raw_aio_unplug(BlockDriverState *bs)
#endif
}
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_READ);
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_WRITE);
-}
-
static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque)
{
@@ -1957,8 +1954,8 @@ BlockDriver bdrv_file = {
.bdrv_co_get_block_status = raw_co_get_block_status,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2405,8 +2402,8 @@ static BlockDriver bdrv_host_device = {
.create_opts = &raw_create_opts,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2535,8 +2532,9 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
@@ -2670,8 +2668,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
diff --git a/block/rbd.c b/block/rbd.c
index 5bc5b32530..5226b6fef8 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -290,7 +290,8 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
if (only_read_conf_file) {
ret = rados_conf_read_file(cluster, value);
if (ret < 0) {
- error_setg(errp, "error reading conf file %s", value);
+ error_setg_errno(errp, -ret, "error reading conf file %s",
+ value);
break;
}
}
@@ -299,7 +300,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
} else if (!only_read_conf_file) {
ret = rados_conf_set(cluster, name, value);
if (ret < 0) {
- error_setg(errp, "invalid conf option %s", name);
+ error_setg_errno(errp, -ret, "invalid conf option %s", name);
ret = -EINVAL;
break;
}
@@ -354,9 +355,10 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
}
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
- if (rados_create(&cluster, clientname) < 0) {
- error_setg(errp, "error initializing");
- return -EIO;
+ ret = rados_create(&cluster, clientname);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error initializing");
+ return ret;
}
if (strstr(conf, "conf=") == NULL) {
@@ -381,21 +383,27 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
return -EIO;
}
- if (rados_connect(cluster) < 0) {
- error_setg(errp, "error connecting");
+ ret = rados_connect(cluster);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error connecting");
rados_shutdown(cluster);
- return -EIO;
+ return ret;
}
- if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) {
- error_setg(errp, "error opening pool %s", pool);
+ ret = rados_ioctx_create(cluster, pool, &io_ctx);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error opening pool %s", pool);
rados_shutdown(cluster);
- return -EIO;
+ return ret;
}
ret = rbd_create(io_ctx, name, bytes, &obj_order);
rados_ioctx_destroy(io_ctx);
rados_shutdown(cluster);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error rbd create");
+ return ret;
+ }
return ret;
}
@@ -500,7 +508,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
r = rados_create(&s->cluster, clientname);
if (r < 0) {
- error_setg(errp, "error initializing");
+ error_setg_errno(errp, -r, "error initializing");
goto failed_opts;
}
@@ -546,19 +554,19 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
r = rados_connect(s->cluster);
if (r < 0) {
- error_setg(errp, "error connecting");
+ error_setg_errno(errp, -r, "error connecting");
goto failed_shutdown;
}
r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
if (r < 0) {
- error_setg(errp, "error opening pool %s", pool);
+ error_setg_errno(errp, -r, "error opening pool %s", pool);
goto failed_shutdown;
}
r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
if (r < 0) {
- error_setg(errp, "error reading header from %s", s->name);
+ error_setg_errno(errp, -r, "error reading header from %s", s->name);
goto failed_open;
}
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 23fbace1f9..ef5d044ab9 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -2784,12 +2784,19 @@ static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
return ret;
}
-static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
- int64_t pos, int size)
+static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos)
{
BDRVSheepdogState *s = bs->opaque;
+ void *buf;
+ int ret;
- return do_load_save_vmstate(s, data, pos, size, 1);
+ buf = qemu_blockalign(bs, qiov->size);
+ ret = do_load_save_vmstate(s, buf, pos, qiov->size, 1);
+ qemu_iovec_from_buf(qiov, 0, buf, qiov->size);
+ qemu_vfree(buf);
+
+ return ret;
}
diff --git a/blockdev.c b/blockdev.c
index 7fd515a4fa..c9a0068cd6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2544,6 +2544,7 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
BlockBackend *blk;
BlockDriverState *medium_bs = NULL;
int bdrv_flags;
+ int rc;
QDict *options = NULL;
Error *err = NULL;
@@ -2598,11 +2599,13 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
goto fail;
}
- qmp_blockdev_open_tray(device, false, false, &err);
- if (err) {
+ rc = do_open_tray(device, false, &err);
+ if (rc && rc != -ENOSYS) {
error_propagate(errp, err);
goto fail;
}
+ error_free(err);
+ err = NULL;
qmp_x_blockdev_remove_medium(device, &err);
if (err) {
@@ -3423,6 +3426,7 @@ static void blockdev_mirror_common(BlockDriverState *bs,
BlockDriverState *target,
bool has_replaces, const char *replaces,
enum MirrorSyncMode sync,
+ BlockMirrorBackingMode backing_mode,
bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity,
bool has_buf_size, int64_t buf_size,
@@ -3480,7 +3484,7 @@ static void blockdev_mirror_common(BlockDriverState *bs,
*/
mirror_start(bs, target,
has_replaces ? replaces : NULL,
- speed, granularity, buf_size, sync,
+ speed, granularity, buf_size, sync, backing_mode,
on_source_error, on_target_error, unmap,
block_job_cb, bs, errp);
}
@@ -3503,6 +3507,7 @@ void qmp_drive_mirror(const char *device, const char *target,
BlockBackend *blk;
BlockDriverState *source, *target_bs;
AioContext *aio_context;
+ BlockMirrorBackingMode backing_mode;
Error *local_err = NULL;
QDict *options = NULL;
int flags;
@@ -3576,6 +3581,12 @@ void qmp_drive_mirror(const char *device, const char *target,
}
}
+ if (mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
+ backing_mode = MIRROR_SOURCE_BACKING_CHAIN;
+ } else {
+ backing_mode = MIRROR_OPEN_BACKING_CHAIN;
+ }
+
if ((sync == MIRROR_SYNC_MODE_FULL || !source)
&& mode != NEW_IMAGE_MODE_EXISTING)
{
@@ -3624,7 +3635,7 @@ void qmp_drive_mirror(const char *device, const char *target,
bdrv_set_aio_context(target_bs, aio_context);
blockdev_mirror_common(bs, target_bs,
- has_replaces, replaces, sync,
+ has_replaces, replaces, sync, backing_mode,
has_speed, speed,
has_granularity, granularity,
has_buf_size, buf_size,
@@ -3656,6 +3667,7 @@ void qmp_blockdev_mirror(const char *device, const char *target,
BlockBackend *blk;
BlockDriverState *target_bs;
AioContext *aio_context;
+ BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
Error *local_err = NULL;
blk = blk_by_name(device);
@@ -3681,7 +3693,7 @@ void qmp_blockdev_mirror(const char *device, const char *target,
bdrv_set_aio_context(target_bs, aio_context);
blockdev_mirror_common(bs, target_bs,
- has_replaces, replaces, sync,
+ has_replaces, replaces, sync, backing_mode,
has_speed, speed,
has_granularity, granularity,
has_buf_size, buf_size,
@@ -4154,22 +4166,18 @@ void qmp_x_blockdev_change(const char *parent, bool has_child,
BlockJobInfoList *qmp_query_block_jobs(Error **errp)
{
BlockJobInfoList *head = NULL, **p_next = &head;
- BlockDriverState *bs;
- BdrvNextIterator it;
+ BlockJob *job;
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
- AioContext *aio_context = bdrv_get_aio_context(bs);
+ for (job = block_job_next(NULL); job; job = block_job_next(job)) {
+ BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
+ AioContext *aio_context = blk_get_aio_context(job->blk);
aio_context_acquire(aio_context);
-
- if (bs->job) {
- BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
- elem->value = block_job_query(bs->job);
- *p_next = elem;
- p_next = &elem->next;
- }
-
+ elem->value = block_job_query(job);
aio_context_release(aio_context);
+
+ *p_next = elem;
+ p_next = &elem->next;
}
return head;
diff --git a/blockjob.c b/blockjob.c
index c095cc57cb..01b896b7e7 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -361,10 +361,12 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
}
job->busy = false;
+ if (!block_job_is_paused(job)) {
+ co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns);
+ }
+ /* The job can be paused while sleeping, so check this again */
if (block_job_is_paused(job)) {
qemu_coroutine_yield();
- } else {
- co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns);
}
job->busy = true;
}
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 898ee05472..41a1309296 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -1,7 +1,6 @@
/* This is the Linux kernel elf-loading code, ported into user space */
#include "qemu/osdep.h"
-#include <sys/mman.h>
#include "qemu.h"
#include "disas/disas.h"
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 9f592be96f..abe9a26f9b 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -18,7 +18,6 @@
*/
#include "qemu/osdep.h"
#include <machine/trap.h>
-#include <sys/mman.h>
#include "qemu.h"
#include "qemu/path.h"
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 6ab5334702..610f91b285 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -17,7 +17,6 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
-#include <sys/mman.h>
#include "qemu.h"
#include "qemu-common.h"
diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c
index 47cf865a32..a9fe8693c1 100644
--- a/bsd-user/syscall.c
+++ b/bsd-user/syscall.c
@@ -19,7 +19,6 @@
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "qemu/path.h"
-#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/param.h>
#include <sys/sysctl.h>
diff --git a/configure b/configure
index 8c2f90b312..10cb212359 100755
--- a/configure
+++ b/configure
@@ -270,7 +270,6 @@ aix="no"
blobs="yes"
pkgversion=""
pie=""
-zero_malloc=""
qom_cast_debug="yes"
trace_backends="log"
trace_file="trace"
@@ -1389,11 +1388,9 @@ fi
# Consult white-list to determine whether to enable werror
# by default. Only enable by default for git builds
-z_version=$(cut -f3 -d. $source_path/VERSION)
-
if test -z "$werror" ; then
if test -d "$source_path/.git" -a \
- "$linux" = "yes" ; then
+ \( "$linux" = "yes" -o "$mingw32" = "yes" \) ; then
werror="yes"
else
werror="no"
@@ -1782,13 +1779,20 @@ fi
# avx2 optimization requirement check
cat > $TMPC << EOF
-static void bar(void) {}
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#include <cpuid.h>
+#include <immintrin.h>
+
+static int bar(void *a) {
+ return _mm256_movemask_epi8(_mm256_cmpeq_epi8(*(__m256i *)a, (__m256i){0}));
+}
static void *bar_ifunc(void) {return (void*) bar;}
-static void foo(void) __attribute__((ifunc("bar_ifunc")));
-int main(void) { foo(); return 0; }
+int foo(void *a) __attribute__((ifunc("bar_ifunc")));
+int main(int argc, char *argv[]) { return foo(argv[0]);}
EOF
-if compile_prog "-mavx2" "" ; then
- if readelf --syms $TMPE |grep "IFUNC.*foo" >/dev/null 2>&1; then
+if compile_object "" ; then
+ if readelf --syms $TMPO |grep "IFUNC.*foo" >/dev/null 2>&1; then
avx2_opt="yes"
fi
fi
@@ -4178,24 +4182,6 @@ if compile_prog "" "" ; then
fi
##########################################
-# check if we have usable SIGEV_THREAD_ID
-
-sigev_thread_id=no
-cat > $TMPC << EOF
-#include <signal.h>
-int main(void) {
- struct sigevent ev;
- ev.sigev_notify = SIGEV_THREAD_ID;
- ev._sigev_un._tid = 0;
- asm volatile("" : : "g"(&ev));
- return 0;
-}
-EOF
-if compile_prog "" "" ; then
- sigev_thread_id=yes
-fi
-
-##########################################
# check if trace backend exists
$python "$source_path/scripts/tracetool.py" "--backends=$trace_backends" --check-backends > /dev/null 2> /dev/null
@@ -4574,16 +4560,6 @@ if test "$libnfs" != "no" ; then
fi
fi
-# Disable zero malloc errors for official releases unless explicitly told to
-# enable/disable
-if test -z "$zero_malloc" ; then
- if test "$z_version" = "50" ; then
- zero_malloc="no"
- else
- zero_malloc="yes"
- fi
-fi
-
# Now we've finished running tests it's OK to add -Werror to the compiler flags
if test "$werror" = "yes"; then
QEMU_CFLAGS="-Werror $QEMU_CFLAGS"
@@ -4862,7 +4838,6 @@ echo "preadv support $preadv"
echo "fdatasync $fdatasync"
echo "madvise $madvise"
echo "posix_madvise $posix_madvise"
-echo "sigev_thread_id $sigev_thread_id"
echo "uuid support $uuid"
echo "libcap-ng support $cap_ng"
echo "vhost-net support $vhost_net"
@@ -5277,9 +5252,6 @@ fi
if test "$posix_madvise" = "yes" ; then
echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak
fi
-if test "$sigev_thread_id" = "yes" ; then
- echo "CONFIG_SIGEV_THREAD_ID=y" >> $config_host_mak
-fi
if test "$spice" = "yes" ; then
echo "CONFIG_SPICE=y" >> $config_host_mak
@@ -5342,9 +5314,6 @@ if [ "$bsd" = "yes" ] ; then
echo "CONFIG_BSD=y" >> $config_host_mak
fi
-if test "$zero_malloc" = "yes" ; then
- echo "CONFIG_ZERO_MALLOC=y" >> $config_host_mak
-fi
if test "$localtime_r" = "yes" ; then
echo "CONFIG_LOCALTIME_R=y" >> $config_host_mak
fi
diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c
index bf4ee0b2e2..e2f295bd43 100644
--- a/contrib/ivshmem-server/ivshmem-server.c
+++ b/contrib/ivshmem-server/ivshmem-server.c
@@ -10,7 +10,6 @@
#include "qemu/host-utils.h"
#include "qemu/sockets.h"
-#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/un.h>
diff --git a/exec.c b/exec.c
index 4f3818c561..0122ef76de 100644
--- a/exec.c
+++ b/exec.c
@@ -19,7 +19,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#ifndef _WIN32
-#include <sys/mman.h>
#endif
#include "qemu/cutils.h"
diff --git a/hmp.c b/hmp.c
index a4b1d3d220..30897af6ae 100644
--- a/hmp.c
+++ b/hmp.c
@@ -217,6 +217,10 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n",
info->ram->dirty_pages_rate);
}
+ if (info->ram->postcopy_requests) {
+ monitor_printf(mon, "postcopy request count: %" PRIu64 "\n",
+ info->ram->postcopy_requests);
+ }
}
if (info->has_disk) {
@@ -1951,7 +1955,12 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
blk = blk_by_name(device);
if (blk) {
+ AioContext *aio_context = blk_get_aio_context(blk);
+ aio_context_acquire(aio_context);
+
qemuio_command(blk, command);
+
+ aio_context_release(aio_context);
} else {
error_set(&err, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 4c856f5278..51d8596056 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -900,7 +900,7 @@ static int m25p80_init(SSISlave *ss)
s->storage = blk_blockalign(s->blk, s->size);
/* FIXME: Move to late init */
- if (blk_pread(s->blk, 0, s->storage, s->size)) {
+ if (blk_pread(s->blk, 0, s->storage, s->size) != s->size) {
fprintf(stderr, "Failed to initialize SPI flash!\n");
return 1;
}
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index cf57814fb6..90aca73121 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -21,7 +21,6 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <sys/uio.h>
#include "hw/hw.h"
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index cbf1dccbb1..83108b0bdb 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -22,7 +22,6 @@
#include "qemu/osdep.h"
#include <sys/select.h>
#include <termios.h>
-#include <sys/mman.h>
#include "hw/hw.h"
#include "sysemu/char.h"
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 570b0977c3..46b7d5eded 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -25,7 +25,6 @@
*/
#include "qemu/osdep.h"
-#include <sys/mman.h>
#include "hw/hw.h"
#include "ui/console.h"
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index bceed091e8..f9c901471d 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -22,7 +22,6 @@
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include <sys/mman.h>
#include "hw/hw.h"
#include "hw/i386/pc.h"
#include "qemu/error-report.h"
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 82c7c0a5fa..53bc968bd0 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -432,13 +432,25 @@ static void pc_i440fx_machine_options(MachineClass *m)
m->default_display = "std";
}
-static void pc_i440fx_2_6_machine_options(MachineClass *m)
+static void pc_i440fx_2_7_machine_options(MachineClass *m)
{
pc_i440fx_machine_options(m);
m->alias = "pc";
m->is_default = 1;
}
+DEFINE_I440FX_MACHINE(v2_7, "pc-i440fx-2.7", NULL,
+ pc_i440fx_2_7_machine_options);
+
+
+static void pc_i440fx_2_6_machine_options(MachineClass *m)
+{
+ pc_i440fx_2_7_machine_options(m);
+ m->is_default = 0;
+ m->alias = NULL;
+ SET_MACHINE_COMPAT(m, PC_COMPAT_2_6);
+}
+
DEFINE_I440FX_MACHINE(v2_6, "pc-i440fx-2.6", NULL,
pc_i440fx_2_6_machine_options);
@@ -447,8 +459,6 @@ static void pc_i440fx_2_5_machine_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_i440fx_2_6_machine_options(m);
- m->alias = NULL;
- m->is_default = 0;
pcmc->save_tsc_khz = false;
m->legacy_fw_cfg_order = 1;
SET_MACHINE_COMPAT(m, PC_COMPAT_2_5);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 31a6a59383..e4b541f7b2 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -283,12 +283,22 @@ static void pc_q35_machine_options(MachineClass *m)
m->no_floppy = 1;
}
-static void pc_q35_2_6_machine_options(MachineClass *m)
+static void pc_q35_2_7_machine_options(MachineClass *m)
{
pc_q35_machine_options(m);
m->alias = "q35";
}
+DEFINE_Q35_MACHINE(v2_7, "pc-q35-2.7", NULL,
+ pc_q35_2_7_machine_options);
+
+static void pc_q35_2_6_machine_options(MachineClass *m)
+{
+ pc_q35_2_7_machine_options(m);
+ m->alias = NULL;
+ SET_MACHINE_COMPAT(m, PC_COMPAT_2_6);
+}
+
DEFINE_Q35_MACHINE(v2_6, "pc-q35-2.6", NULL,
pc_q35_2_6_machine_options);
@@ -296,7 +306,6 @@ static void pc_q35_2_5_machine_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_2_6_machine_options(m);
- m->alias = NULL;
pcmc->save_tsc_khz = false;
m->legacy_fw_cfg_order = 1;
SET_MACHINE_COMPAT(m, PC_COMPAT_2_5);
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 90be9f7617..c4dde3a52e 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -36,8 +36,6 @@
#include "hw/misc/ivshmem.h"
-#include <sys/mman.h>
-
#define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET
#define PCI_DEVICE_ID_IVSHMEM 0x1110
diff --git a/hw/misc/pc-testdev.c b/hw/misc/pc-testdev.c
index 086893dcca..b81d820084 100644
--- a/hw/misc/pc-testdev.c
+++ b/hw/misc/pc-testdev.c
@@ -36,9 +36,6 @@
*/
#include "qemu/osdep.h"
-#if defined(CONFIG_POSIX)
-#include <sys/mman.h>
-#endif
#include "hw/hw.h"
#include "hw/qdev.h"
#include "hw/isa/isa.h"
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index e4478bead8..efd43b47b8 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -15,6 +15,7 @@
*
*/
+#include "qemu/osdep.h"
#include "net_tx_pkt.h"
#include "net/eth.h"
#include "net/checksum.h"
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
index 07b9a2098b..212ecc62fc 100644
--- a/hw/net/net_tx_pkt.h
+++ b/hw/net/net_tx_pkt.h
@@ -18,7 +18,6 @@
#ifndef NET_TX_PKT_H
#define NET_TX_PKT_H
-#include "qemu/osdep.h"
#include "net/eth.h"
#include "exec/hwaddr.h"
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 7281730d94..0b4ddae48c 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -22,7 +22,6 @@
#include "qemu/osdep.h"
#include <sys/socket.h>
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <sys/wait.h>
#include "hw/hw.h"
diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c
index 3adb685177..baa0a2cfdf 100644
--- a/hw/scsi/esp.c
+++ b/hw/scsi/esp.c
@@ -98,6 +98,9 @@ static uint32_t get_cmd(ESPState *s, uint8_t *buf, uint8_t buflen)
s->dma_memory_read(s->dma_opaque, buf, dmalen);
} else {
dmalen = s->ti_size;
+ if (dmalen > TI_BUFSZ) {
+ return 0;
+ }
memcpy(buf, s->ti_buf, dmalen);
buf[0] = buf[2] >> 5;
}
@@ -219,7 +222,7 @@ static void write_response(ESPState *s)
} else {
s->ti_size = 2;
s->ti_rptr = 0;
- s->ti_wptr = 0;
+ s->ti_wptr = 2;
s->rregs[ESP_RFLAGS] = 2;
}
esp_raise_irq(s);
@@ -242,15 +245,12 @@ static void esp_do_dma(ESPState *s)
uint32_t len;
int to_device;
- to_device = (s->ti_size < 0);
len = s->dma_left;
if (s->do_cmd) {
trace_esp_do_dma(s->cmdlen, len);
+ assert (s->cmdlen <= sizeof(s->cmdbuf) &&
+ len <= sizeof(s->cmdbuf) - s->cmdlen);
s->dma_memory_read(s->dma_opaque, &s->cmdbuf[s->cmdlen], len);
- s->ti_size = 0;
- s->cmdlen = 0;
- s->do_cmd = 0;
- do_cmd(s, s->cmdbuf);
return;
}
if (s->async_len == 0) {
@@ -260,6 +260,7 @@ static void esp_do_dma(ESPState *s)
if (len > s->async_len) {
len = s->async_len;
}
+ to_device = (s->ti_size < 0);
if (to_device) {
s->dma_memory_read(s->dma_opaque, s->async_buf, len);
} else {
@@ -315,6 +316,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len)
{
ESPState *s = req->hba_private;
+ assert(!s->do_cmd);
trace_esp_transfer_data(s->dma_left, s->ti_size);
s->async_len = len;
s->async_buf = scsi_req_get_buf(req);
@@ -345,7 +347,7 @@ static void handle_ti(ESPState *s)
s->dma_counter = dmalen;
if (s->do_cmd)
- minlen = (dmalen < 32) ? dmalen : 32;
+ minlen = (dmalen < ESP_CMDBUF_SZ) ? dmalen : ESP_CMDBUF_SZ;
else if (s->ti_size < 0)
minlen = (dmalen < -s->ti_size) ? dmalen : -s->ti_size;
else
@@ -355,13 +357,13 @@ static void handle_ti(ESPState *s)
s->dma_left = minlen;
s->rregs[ESP_RSTAT] &= ~STAT_TC;
esp_do_dma(s);
- } else if (s->do_cmd) {
+ }
+ if (s->do_cmd) {
trace_esp_handle_ti_cmd(s->cmdlen);
s->ti_size = 0;
s->cmdlen = 0;
s->do_cmd = 0;
do_cmd(s, s->cmdbuf);
- return;
}
}
@@ -449,7 +451,7 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
break;
case ESP_FIFO:
if (s->do_cmd) {
- if (s->cmdlen < TI_BUFSZ) {
+ if (s->cmdlen < ESP_CMDBUF_SZ) {
s->cmdbuf[s->cmdlen++] = val & 0xff;
} else {
trace_esp_error_fifo_overrun();
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 188196990e..36f8a85a70 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2060,13 +2060,13 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
}
break;
case MODE_SELECT:
- DPRINTF("Mode Select(6) (len %lu)\n", (long)r->req.cmd.xfer);
+ DPRINTF("Mode Select(6) (len %lu)\n", (unsigned long)r->req.cmd.xfer);
break;
case MODE_SELECT_10:
- DPRINTF("Mode Select(10) (len %lu)\n", (long)r->req.cmd.xfer);
+ DPRINTF("Mode Select(10) (len %lu)\n", (unsigned long)r->req.cmd.xfer);
break;
case UNMAP:
- DPRINTF("Unmap (len %lu)\n", (long)r->req.cmd.xfer);
+ DPRINTF("Unmap (len %lu)\n", (unsigned long)r->req.cmd.xfer);
break;
case VERIFY_10:
case VERIFY_12:
@@ -2080,7 +2080,7 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
case WRITE_SAME_16:
DPRINTF("WRITE SAME %d (len %lu)\n",
req->cmd.buf[0] == WRITE_SAME_10 ? 10 : 16,
- (long)r->req.cmd.xfer);
+ (unsigned long)r->req.cmd.xfer);
break;
default:
DPRINTF("Unknown SCSI command (%2.2x=%s)\n", buf[0],
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 8fa47edd9a..0fd34c62c4 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -21,7 +21,6 @@
#include "qemu/osdep.h"
#include <libusb.h>
-#include <sys/mman.h>
#include "qemu-common.h"
#include "qemu/config-file.h"
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index e51ed3a348..5ff5e9220a 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -20,7 +20,6 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <linux/vfio.h>
#include "hw/vfio/vfio-common.h"
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index deab0c601a..53b87b76ea 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -21,7 +21,6 @@
#include "qemu/osdep.h"
#include <linux/vfio.h>
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 8c15e09470..557d3f9e0c 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -27,10 +27,6 @@
#include "qapi-event.h"
#include "trace.h"
-#if defined(__linux__)
-#include <sys/mman.h>
-#endif
-
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"
diff --git a/hw/xen/xen_backend.c b/hw/xen/xen_backend.c
index c63f9df38b..e7ce724567 100644
--- a/hw/xen/xen_backend.c
+++ b/hw/xen/xen_backend.c
@@ -23,7 +23,6 @@
*/
#include "qemu/osdep.h"
-#include <sys/mman.h>
#include <sys/signal.h>
#include "hw/hw.h"
diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index 9a16f2bff1..62add0639f 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c
@@ -10,7 +10,6 @@
*/
#include "qemu/osdep.h"
-#include <sys/mman.h>
#include "hw/xen/xen_backend.h"
#include "xen_pt.h"
diff --git a/include/block/block.h b/include/block/block.h
index 54cca28bac..733a8ec2ec 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -65,6 +65,9 @@ typedef enum {
BDRV_REQ_MAY_UNMAP = 0x4,
BDRV_REQ_NO_SERIALISING = 0x8,
BDRV_REQ_FUA = 0x10,
+
+ /* Mask of valid flags */
+ BDRV_REQ_MASK = 0x1f,
} BdrvRequestFlags;
typedef struct BlockSizes {
@@ -232,6 +235,7 @@ int bdrv_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags);
int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
+int bdrv_preadv(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
const void *buf, int count);
int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
@@ -401,10 +405,14 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors);
void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors);
+ int64_t offset, unsigned int bytes,
+ int64_t *cluster_offset,
+ unsigned int *cluster_bytes);
const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
void bdrv_get_backing_filename(BlockDriverState *bs,
@@ -423,6 +431,7 @@ void path_combine(char *dest, int dest_size,
const char *base_path,
const char *filename);
+int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8a4963c4fe..688c6be009 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -224,10 +224,12 @@ struct BlockDriver {
int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
- int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t pos);
- int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size);
+ int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t pos);
+ int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t pos);
int (*bdrv_change_backing_file)(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt);
@@ -449,9 +451,6 @@ struct BlockDriverState {
/* I/O Limits */
BlockLimits bl;
- /* Whether produces zeros when read beyond eof */
- bool zero_beyond_eof;
-
/* Alignment requirement for offset/length of I/O requests */
unsigned int request_alignment;
/* Flags honored during pwrite (so far: BDRV_REQ_FUA) */
@@ -510,6 +509,20 @@ struct BlockBackendRootState {
BlockdevDetectZeroesOptions detect_zeroes;
};
+typedef enum BlockMirrorBackingMode {
+ /* Reuse the existing backing chain from the source for the target.
+ * - sync=full: Set backing BDS to NULL.
+ * - sync=top: Use source's backing BDS.
+ * - sync=none: Use source as the backing BDS. */
+ MIRROR_SOURCE_BACKING_CHAIN,
+
+ /* Open the target's backing chain completely anew */
+ MIRROR_OPEN_BACKING_CHAIN,
+
+ /* Do not change the target's backing BDS after job completion */
+ MIRROR_LEAVE_BACKING_CHAIN,
+} BlockMirrorBackingMode;
+
static inline BlockDriverState *backing_bs(BlockDriverState *bs)
{
return bs->backing ? bs->backing->bs : NULL;
@@ -672,6 +685,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
* @granularity: The chosen granularity for the dirty bitmap.
* @buf_size: The amount of data that can be in flight at one time.
* @mode: Whether to collapse all images in the chain to the target.
+ * @backing_mode: How to establish the target's backing chain after completion.
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
* @unmap: Whether to unmap target where source sectors only contain zeroes.
@@ -687,7 +701,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
const char *replaces,
int64_t speed, uint32_t granularity, int64_t buf_size,
- MirrorSyncMode mode, BlockdevOnError on_source_error,
+ MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+ BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap,
BlockCompletionFunc *cb,
diff --git a/include/block/nbd.h b/include/block/nbd.h
index b86a976984..df1f804338 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -25,19 +25,20 @@
#include "io/channel-socket.h"
#include "crypto/tlscreds.h"
+/* Note: these are _NOT_ the same as the network representation of an NBD
+ * request and reply!
+ */
struct nbd_request {
- uint32_t magic;
- uint32_t type;
uint64_t handle;
uint64_t from;
uint32_t len;
-} QEMU_PACKED;
+ uint32_t type;
+};
struct nbd_reply {
- uint32_t magic;
- uint32_t error;
uint64_t handle;
-} QEMU_PACKED;
+ uint32_t error;
+};
#define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */
#define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */
@@ -76,6 +77,12 @@ enum {
/* Maximum size of a single READ/WRITE data buffer */
#define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024)
+/* Maximum size of an export name. The NBD spec requires 256 and
+ * suggests that servers support up to 4096, but we stick to only the
+ * required size so that we can stack-allocate the names, and because
+ * going larger would require an audit of more code to make sure we
+ * aren't overflowing some other buffer. */
+#define NBD_MAX_NAME_SIZE 256
ssize_t nbd_wr_syncv(QIOChannel *ioc,
struct iovec *iov,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9ca23098bd..49566c89d4 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -356,7 +356,16 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
int e820_get_num_entries(void);
bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
+#define PC_COMPAT_2_6 \
+ HW_COMPAT_2_6 \
+ {\
+ .driver = TYPE_X86_CPU,\
+ .property = "cpuid-0xb",\
+ .value = "off",\
+ },
+
#define PC_COMPAT_2_5 \
+ PC_COMPAT_2_6 \
HW_COMPAT_2_5
/* Helper for setting model-id for CPU models that changed model-id
diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h
index 6c795276c9..d2c48869e1 100644
--- a/include/hw/scsi/esp.h
+++ b/include/hw/scsi/esp.h
@@ -14,6 +14,7 @@ void esp_init(hwaddr espaddr, int it_shift,
#define ESP_REGS 16
#define TI_BUFSZ 16
+#define ESP_CMDBUF_SZ 32
typedef struct ESPState ESPState;
@@ -31,7 +32,7 @@ struct ESPState {
SCSIBus bus;
SCSIDevice *current_dev;
SCSIRequest *current_req;
- uint8_t cmdbuf[TI_BUFSZ];
+ uint8_t cmdbuf[ESP_CMDBUF_SZ];
uint32_t cmdlen;
uint32_t do_cmd;
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 13b12b7e87..3c96623d3d 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -160,6 +160,8 @@ struct MigrationState
int64_t xbzrle_cache_size;
int64_t setup_time;
int64_t dirty_sync_count;
+ /* Count of requests incoming from destination */
+ int64_t postcopy_requests;
/* Flag set once the migration has been asked to enter postcopy */
bool start_postcopy;
@@ -181,25 +183,25 @@ struct MigrationState
void migrate_set_state(int *state, int old_state, int new_state);
-void process_incoming_migration(QEMUFile *f);
+void migration_fd_process_incoming(QEMUFile *f);
void qemu_start_incoming_migration(const char *uri, Error **errp);
-void migration_set_incoming_channel(MigrationState *s,
- QIOChannel *ioc);
+void migration_channel_process_incoming(MigrationState *s,
+ QIOChannel *ioc);
-void migration_tls_set_incoming_channel(MigrationState *s,
- QIOChannel *ioc,
- Error **errp);
+void migration_tls_channel_process_incoming(MigrationState *s,
+ QIOChannel *ioc,
+ Error **errp);
-void migration_set_outgoing_channel(MigrationState *s,
- QIOChannel *ioc,
- const char *hostname);
+void migration_channel_connect(MigrationState *s,
+ QIOChannel *ioc,
+ const char *hostname);
-void migration_tls_set_outgoing_channel(MigrationState *s,
- QIOChannel *ioc,
- const char *hostname,
- Error **errp);
+void migration_tls_channel_connect(MigrationState *s,
+ QIOChannel *ioc,
+ const char *hostname,
+ Error **errp);
uint64_t migrate_max_downtime(void);
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 693769403f..e63da2831a 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -197,8 +197,6 @@ void qemu_anon_ram_free(void *ptr, size_t size);
#if defined(CONFIG_MADVISE)
-#include <sys/mman.h>
-
#define QEMU_MADV_WILLNEED MADV_WILLNEED
#define QEMU_MADV_DONTNEED MADV_DONTNEED
#ifdef MADV_DONTFORK
diff --git a/include/qemu/qdist.h b/include/qemu/qdist.h
index f30050c2d1..54ece760d6 100644
--- a/include/qemu/qdist.h
+++ b/include/qemu/qdist.h
@@ -7,7 +7,6 @@
#ifndef QEMU_QDIST_H
#define QEMU_QDIST_H
-#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/bitops.h"
diff --git a/include/qemu/qht.h b/include/qemu/qht.h
index aec60aa534..70bfc68b8d 100644
--- a/include/qemu/qht.h
+++ b/include/qemu/qht.h
@@ -7,7 +7,6 @@
#ifndef QEMU_QHT_H
#define QEMU_QHT_H
-#include "qemu/osdep.h"
#include "qemu/seqlock.h"
#include "qemu/thread.h"
#include "qemu/qdist.h"
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 07e3e5ae9b..9c7dfdfbec 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -26,6 +26,7 @@
#ifndef QEMU_OS_POSIX_H
#define QEMU_OS_POSIX_H
+#include <sys/mman.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
diff --git a/kvm-all.c b/kvm-all.c
index fbd2d93188..a88f917fda 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -15,7 +15,6 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <linux/kvm.h>
@@ -1520,10 +1519,16 @@ static int kvm_max_vcpus(KVMState *s)
return (ret) ? ret : kvm_recommended_vcpus(s);
}
+static int kvm_max_vcpu_id(KVMState *s)
+{
+ int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPU_ID);
+ return (ret) ? ret : kvm_max_vcpus(s);
+}
+
bool kvm_vcpu_id_is_valid(int vcpu_id)
{
KVMState *s = KVM_STATE(current_machine->accelerator);
- return vcpu_id >= 0 && vcpu_id < kvm_max_vcpus(s);
+ return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s);
}
static int kvm_init(MachineState *ms)
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index bb2558f284..f807baf389 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2,7 +2,6 @@
#include "qemu/osdep.h"
#include <sys/param.h>
-#include <sys/mman.h>
#include <sys/resource.h>
#include "qemu.h"
diff --git a/linux-user/flatload.c b/linux-user/flatload.c
index f9139c399a..48ad1c5e9e 100644
--- a/linux-user/flatload.c
+++ b/linux-user/flatload.c
@@ -34,7 +34,6 @@
/****************************************************************************/
#include "qemu/osdep.h"
-#include <sys/mman.h>
#include "qemu.h"
#include "flat.h"
diff --git a/linux-user/main.c b/linux-user/main.c
index f8a8764ae9..b9a4e0ea45 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -18,7 +18,6 @@
*/
#include "qemu/osdep.h"
#include "qemu-version.h"
-#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/resource.h>
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 3519147bce..c4371d943a 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -17,7 +17,6 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
-#include <sys/mman.h>
#include <linux/mman.h>
#include <linux/unistd.h>
diff --git a/linux-user/strace.c b/linux-user/strace.c
index c5980a128c..4046b81705 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -5,7 +5,6 @@
#include <sys/shm.h>
#include <sys/select.h>
#include <sys/mount.h>
-#include <sys/mman.h>
#include <sched.h>
#include "qemu.h"
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 71ccbd9c5e..1c17b741c2 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -32,7 +32,6 @@
#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/resource.h>
-#include <sys/mman.h>
#include <sys/swap.h>
#include <linux/capability.h>
#include <sched.h>
diff --git a/migration/exec.c b/migration/exec.c
index 1515cc3319..2af63cced6 100644
--- a/migration/exec.c
+++ b/migration/exec.c
@@ -38,7 +38,7 @@ void exec_start_outgoing_migration(MigrationState *s, const char *command, Error
return;
}
- migration_set_outgoing_channel(s, ioc, NULL);
+ migration_channel_connect(s, ioc, NULL);
object_unref(OBJECT(ioc));
}
@@ -46,7 +46,7 @@ static gboolean exec_accept_incoming_migration(QIOChannel *ioc,
GIOCondition condition,
gpointer opaque)
{
- migration_set_incoming_channel(migrate_get_current(), ioc);
+ migration_channel_process_incoming(migrate_get_current(), ioc);
object_unref(OBJECT(ioc));
return FALSE; /* unregister */
}
diff --git a/migration/fd.c b/migration/fd.c
index fc5c9eee02..84a10fd68f 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -38,7 +38,7 @@ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **
return;
}
- migration_set_outgoing_channel(s, ioc, NULL);
+ migration_channel_connect(s, ioc, NULL);
object_unref(OBJECT(ioc));
}
@@ -46,7 +46,7 @@ static gboolean fd_accept_incoming_migration(QIOChannel *ioc,
GIOCondition condition,
gpointer opaque)
{
- migration_set_incoming_channel(migrate_get_current(), ioc);
+ migration_channel_process_incoming(migrate_get_current(), ioc);
object_unref(OBJECT(ioc));
return FALSE; /* unregister */
}
diff --git a/migration/migration.c b/migration/migration.c
index 7ecbadee6f..20f88757d8 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -416,7 +416,7 @@ static void process_incoming_migration_co(void *opaque)
qemu_bh_schedule(mis->bh);
}
-void process_incoming_migration(QEMUFile *f)
+void migration_fd_process_incoming(QEMUFile *f)
{
Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
@@ -426,8 +426,8 @@ void process_incoming_migration(QEMUFile *f)
}
-void migration_set_incoming_channel(MigrationState *s,
- QIOChannel *ioc)
+void migration_channel_process_incoming(MigrationState *s,
+ QIOChannel *ioc)
{
trace_migration_set_incoming_channel(
ioc, object_get_typename(OBJECT(ioc)));
@@ -436,20 +436,20 @@ void migration_set_incoming_channel(MigrationState *s,
!object_dynamic_cast(OBJECT(ioc),
TYPE_QIO_CHANNEL_TLS)) {
Error *local_err = NULL;
- migration_tls_set_incoming_channel(s, ioc, &local_err);
+ migration_tls_channel_process_incoming(s, ioc, &local_err);
if (local_err) {
error_report_err(local_err);
}
} else {
QEMUFile *f = qemu_fopen_channel_input(ioc);
- process_incoming_migration(f);
+ migration_fd_process_incoming(f);
}
}
-void migration_set_outgoing_channel(MigrationState *s,
- QIOChannel *ioc,
- const char *hostname)
+void migration_channel_connect(MigrationState *s,
+ QIOChannel *ioc,
+ const char *hostname)
{
trace_migration_set_outgoing_channel(
ioc, object_get_typename(OBJECT(ioc)), hostname);
@@ -458,7 +458,7 @@ void migration_set_outgoing_channel(MigrationState *s,
!object_dynamic_cast(OBJECT(ioc),
TYPE_QIO_CHANNEL_TLS)) {
Error *local_err = NULL;
- migration_tls_set_outgoing_channel(s, ioc, hostname, &local_err);
+ migration_tls_channel_connect(s, ioc, hostname, &local_err);
if (local_err) {
migrate_fd_error(s, local_err);
error_free(local_err);
@@ -602,6 +602,26 @@ static void get_xbzrle_cache_stats(MigrationInfo *info)
}
}
+static void populate_ram_info(MigrationInfo *info, MigrationState *s)
+{
+ info->has_ram = true;
+ info->ram = g_malloc0(sizeof(*info->ram));
+ info->ram->transferred = ram_bytes_transferred();
+ info->ram->total = ram_bytes_total();
+ info->ram->duplicate = dup_mig_pages_transferred();
+ info->ram->skipped = skipped_mig_pages_transferred();
+ info->ram->normal = norm_mig_pages_transferred();
+ info->ram->normal_bytes = norm_mig_bytes_transferred();
+ info->ram->mbps = s->mbps;
+ info->ram->dirty_sync_count = s->dirty_sync_count;
+ info->ram->postcopy_requests = s->postcopy_requests;
+
+ if (s->state != MIGRATION_STATUS_COMPLETED) {
+ info->ram->remaining = ram_bytes_remaining();
+ info->ram->dirty_pages_rate = s->dirty_pages_rate;
+ }
+}
+
MigrationInfo *qmp_query_migrate(Error **errp)
{
MigrationInfo *info = g_malloc0(sizeof(*info));
@@ -626,18 +646,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
info->has_setup_time = true;
info->setup_time = s->setup_time;
- info->has_ram = true;
- info->ram = g_malloc0(sizeof(*info->ram));
- info->ram->transferred = ram_bytes_transferred();
- info->ram->remaining = ram_bytes_remaining();
- info->ram->total = ram_bytes_total();
- info->ram->duplicate = dup_mig_pages_transferred();
- info->ram->skipped = skipped_mig_pages_transferred();
- info->ram->normal = norm_mig_pages_transferred();
- info->ram->normal_bytes = norm_mig_bytes_transferred();
- info->ram->dirty_pages_rate = s->dirty_pages_rate;
- info->ram->mbps = s->mbps;
- info->ram->dirty_sync_count = s->dirty_sync_count;
+ populate_ram_info(info, s);
if (blk_mig_active()) {
info->has_disk = true;
@@ -665,18 +674,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
info->has_setup_time = true;
info->setup_time = s->setup_time;
- info->has_ram = true;
- info->ram = g_malloc0(sizeof(*info->ram));
- info->ram->transferred = ram_bytes_transferred();
- info->ram->remaining = ram_bytes_remaining();
- info->ram->total = ram_bytes_total();
- info->ram->duplicate = dup_mig_pages_transferred();
- info->ram->skipped = skipped_mig_pages_transferred();
- info->ram->normal = norm_mig_pages_transferred();
- info->ram->normal_bytes = norm_mig_bytes_transferred();
- info->ram->dirty_pages_rate = s->dirty_pages_rate;
- info->ram->mbps = s->mbps;
- info->ram->dirty_sync_count = s->dirty_sync_count;
+ populate_ram_info(info, s);
if (blk_mig_active()) {
info->has_disk = true;
@@ -699,17 +697,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
info->has_setup_time = true;
info->setup_time = s->setup_time;
- info->has_ram = true;
- info->ram = g_malloc0(sizeof(*info->ram));
- info->ram->transferred = ram_bytes_transferred();
- info->ram->remaining = 0;
- info->ram->total = ram_bytes_total();
- info->ram->duplicate = dup_mig_pages_transferred();
- info->ram->skipped = skipped_mig_pages_transferred();
- info->ram->normal = norm_mig_pages_transferred();
- info->ram->normal_bytes = norm_mig_bytes_transferred();
- info->ram->mbps = s->mbps;
- info->ram->dirty_sync_count = s->dirty_sync_count;
+ populate_ram_info(info, s);
break;
case MIGRATION_STATUS_FAILED:
info->has_status = true;
@@ -732,6 +720,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
{
MigrationState *s = migrate_get_current();
MigrationCapabilityStatusList *cap;
+ bool old_postcopy_cap = migrate_postcopy_ram();
if (migration_is_setup_or_active(s->state)) {
error_setg(errp, QERR_MIGRATION_ACTIVE);
@@ -754,6 +743,19 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] =
false;
}
+ /* This check is reasonably expensive, so only when it's being
+ * set the first time, also it's only the destination that needs
+ * special support.
+ */
+ if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
+ !postcopy_ram_supported_by_host()) {
+ /* postcopy_ram_supported_by_host will have emitted a more
+ * detailed message
+ */
+ error_report("Postcopy is not supported");
+ s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] =
+ false;
+ }
}
}
@@ -1004,6 +1006,7 @@ MigrationState *migrate_init(const MigrationParams *params)
s->dirty_sync_count = 0;
s->start_postcopy = false;
s->postcopy_after_devices = false;
+ s->postcopy_requests = 0;
s->migration_thread_running = false;
s->last_req_rb = NULL;
error_free(s->error);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 47250b675d..abe8c60a90 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -51,7 +51,6 @@ struct PostcopyDiscardState {
#if defined(__linux__)
#include <poll.h>
-#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <asm/types.h> /* for __u64 */
diff --git a/migration/ram.c b/migration/ram.c
index 844ea4694f..42fb8ac6d6 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1169,6 +1169,7 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
{
RAMBlock *ramblock;
+ ms->postcopy_requests++;
rcu_read_lock();
if (!rbname) {
/* Reuse last RAMBlock */
@@ -1557,7 +1558,9 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms,
} else {
discard_length = zero - one;
}
- postcopy_discard_send_range(ms, pds, one, discard_length);
+ if (discard_length) {
+ postcopy_discard_send_range(ms, pds, one, discard_length);
+ }
current = one + discard_length;
} else {
current = one;
diff --git a/migration/rdma.c b/migration/rdma.c
index 51bafc702b..5110ec828d 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -1511,7 +1511,7 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested,
while (1) {
/*
- * Coroutine doesn't start until process_incoming_migration()
+ * Coroutine doesn't start until migration_fd_process_incoming()
* so don't yield unless we know we're running inside of a coroutine.
*/
if (rdma->migration_started_on_destination) {
@@ -3620,7 +3620,7 @@ static void rdma_accept_incoming_migration(void *opaque)
}
rdma->migration_started_on_destination = 1;
- process_incoming_migration(f);
+ migration_fd_process_incoming(f);
}
void rdma_start_incoming_migration(const char *host_port, Error **errp)
diff --git a/migration/socket.c b/migration/socket.c
index 977a8d3c1d..5c0a38f7b9 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -83,7 +83,7 @@ static void socket_outgoing_migration(Object *src,
migrate_fd_error(data->s, err);
} else {
trace_migration_socket_outgoing_connected(data->hostname);
- migration_set_outgoing_channel(data->s, sioc, data->hostname);
+ migration_channel_connect(data->s, sioc, data->hostname);
}
object_unref(src);
}
@@ -140,8 +140,8 @@ static gboolean socket_accept_incoming_migration(QIOChannel *ioc,
trace_migration_socket_incoming_accepted();
- migration_set_incoming_channel(migrate_get_current(),
- QIO_CHANNEL(sioc));
+ migration_channel_process_incoming(migrate_get_current(),
+ QIO_CHANNEL(sioc));
object_unref(OBJECT(sioc));
out:
diff --git a/migration/tls.c b/migration/tls.c
index 75f959ff9c..12c053d15f 100644
--- a/migration/tls.c
+++ b/migration/tls.c
@@ -72,14 +72,14 @@ static void migration_tls_incoming_handshake(Object *src,
error_report("%s", error_get_pretty(err));
} else {
trace_migration_tls_incoming_handshake_complete();
- migration_set_incoming_channel(migrate_get_current(), ioc);
+ migration_channel_process_incoming(migrate_get_current(), ioc);
}
object_unref(OBJECT(ioc));
}
-void migration_tls_set_incoming_channel(MigrationState *s,
- QIOChannel *ioc,
- Error **errp)
+void migration_tls_channel_process_incoming(MigrationState *s,
+ QIOChannel *ioc,
+ Error **errp)
{
QCryptoTLSCreds *creds;
QIOChannelTLS *tioc;
@@ -119,16 +119,16 @@ static void migration_tls_outgoing_handshake(Object *src,
migrate_fd_error(s, err);
} else {
trace_migration_tls_outgoing_handshake_complete();
- migration_set_outgoing_channel(s, ioc, NULL);
+ migration_channel_connect(s, ioc, NULL);
}
object_unref(OBJECT(ioc));
}
-void migration_tls_set_outgoing_channel(MigrationState *s,
- QIOChannel *ioc,
- const char *hostname,
- Error **errp)
+void migration_tls_channel_connect(MigrationState *s,
+ QIOChannel *ioc,
+ const char *hostname,
+ Error **errp)
{
QCryptoTLSCreds *creds;
QIOChannelTLS *tioc;
diff --git a/nbd/client.c b/nbd/client.c
index 31b88f3a31..287487c6c2 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -33,8 +33,10 @@ static int nbd_errno_to_system_errno(int err)
return ENOMEM;
case NBD_ENOSPC:
return ENOSPC;
- case NBD_EINVAL:
default:
+ TRACE("Squashing unexpected error %d to EINVAL", err);
+ /* fallthrough */
+ case NBD_EINVAL:
return EINVAL;
}
}
@@ -109,25 +111,27 @@ static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type,
switch (type) {
case NBD_REP_ERR_UNSUP:
- TRACE("server doesn't understand request %d, attempting fallback",
- opt);
+ TRACE("server doesn't understand request %" PRIx32
+ ", attempting fallback", opt);
result = 0;
goto cleanup;
case NBD_REP_ERR_POLICY:
- error_setg(errp, "Denied by server for option %x", opt);
+ error_setg(errp, "Denied by server for option %" PRIx32, opt);
break;
case NBD_REP_ERR_INVALID:
- error_setg(errp, "Invalid data length for option %x", opt);
+ error_setg(errp, "Invalid data length for option %" PRIx32, opt);
break;
case NBD_REP_ERR_TLS_REQD:
- error_setg(errp, "TLS negotiation required before option %x", opt);
+ error_setg(errp, "TLS negotiation required before option %" PRIx32,
+ opt);
break;
default:
- error_setg(errp, "Unknown error code when asking for option %x", opt);
+ error_setg(errp, "Unknown error code when asking for option %" PRIx32,
+ opt);
break;
}
@@ -165,7 +169,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
}
opt = be32_to_cpu(opt);
if (opt != NBD_OPT_LIST) {
- error_setg(errp, "Unexpected option type %x expected %x",
+ error_setg(errp, "Unexpected option type %" PRIx32 " expected %x",
opt, NBD_OPT_LIST);
return -1;
}
@@ -206,8 +210,8 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
error_setg(errp, "incorrect option name length");
return -1;
}
- if (namelen > 255) {
- error_setg(errp, "export name length too long %d", namelen);
+ if (namelen > NBD_MAX_NAME_SIZE) {
+ error_setg(errp, "export name length too long %" PRIu32, namelen);
return -1;
}
@@ -234,7 +238,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
g_free(buf);
}
} else {
- error_setg(errp, "Unexpected reply type %x expected %x",
+ error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
type, NBD_REP_SERVER);
return -1;
}
@@ -349,7 +353,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
}
opt = be32_to_cpu(opt);
if (opt != NBD_OPT_STARTTLS) {
- error_setg(errp, "Unexpected option type %x expected %x",
+ error_setg(errp, "Unexpected option type %" PRIx32 " expected %x",
opt, NBD_OPT_STARTTLS);
return NULL;
}
@@ -361,7 +365,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
}
type = be32_to_cpu(type);
if (type != NBD_REP_ACK) {
- error_setg(errp, "Server rejected request to start TLS %x",
+ error_setg(errp, "Server rejected request to start TLS %" PRIx32,
type);
return NULL;
}
@@ -373,7 +377,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
}
length = be32_to_cpu(length);
if (length != 0) {
- error_setg(errp, "Start TLS response was not zero %x",
+ error_setg(errp, "Start TLS response was not zero %" PRIu32,
length);
return NULL;
}
@@ -384,7 +388,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
return NULL;
}
data.loop = g_main_loop_new(g_main_context_default(), FALSE);
- TRACE("Starting TLS hanshake");
+ TRACE("Starting TLS handshake");
qio_channel_tls_handshake(tioc,
nbd_tls_handshake,
&data,
@@ -474,7 +478,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags,
}
globalflags = be16_to_cpu(globalflags);
*flags = globalflags << 16;
- TRACE("Global flags are %x", globalflags);
+ TRACE("Global flags are %" PRIx32, globalflags);
if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
fixedNewStyle = true;
TRACE("Server supports fixed new style");
@@ -550,7 +554,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags,
}
exportflags = be16_to_cpu(exportflags);
*flags |= exportflags;
- TRACE("Export flags are %x", exportflags);
+ TRACE("Export flags are %" PRIx16, exportflags);
} else if (magic == NBD_CLIENT_MAGIC) {
if (name) {
error_setg(errp, "Server does not support export names");
@@ -572,7 +576,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags,
error_setg(errp, "Failed to read export flags");
goto fail;
}
- *flags = be32_to_cpup(flags);
+ *flags = be32_to_cpu(*flags);
} else {
error_setg(errp, "Bad magic received");
goto fail;
@@ -591,9 +595,15 @@ fail:
#ifdef __linux__
int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size)
{
+ unsigned long sectors = size / BDRV_SECTOR_SIZE;
+ if (size / BDRV_SECTOR_SIZE != sectors) {
+ LOG("Export size %lld too large for 32-bit kernel", (long long) size);
+ return -E2BIG;
+ }
+
TRACE("Setting NBD socket");
- if (ioctl(fd, NBD_SET_SOCK, sioc->fd) < 0) {
+ if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
int serrno = errno;
LOG("Failed to set NBD socket");
return -serrno;
@@ -601,21 +611,25 @@ int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size)
TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE);
- if (ioctl(fd, NBD_SET_BLKSIZE, (size_t)BDRV_SECTOR_SIZE) < 0) {
+ if (ioctl(fd, NBD_SET_BLKSIZE, (unsigned long)BDRV_SECTOR_SIZE) < 0) {
int serrno = errno;
LOG("Failed setting NBD block size");
return -serrno;
}
- TRACE("Setting size to %zd block(s)", (size_t)(size / BDRV_SECTOR_SIZE));
+ TRACE("Setting size to %lu block(s)", sectors);
+ if (size % BDRV_SECTOR_SIZE) {
+ TRACE("Ignoring trailing %d bytes of export",
+ (int) (size % BDRV_SECTOR_SIZE));
+ }
- if (ioctl(fd, NBD_SET_SIZE_BLOCKS, (size_t)(size / BDRV_SECTOR_SIZE)) < 0) {
+ if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
int serrno = errno;
LOG("Failed setting size (in blocks)");
return -serrno;
}
- if (ioctl(fd, NBD_SET_FLAGS, flags) < 0) {
+ if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) flags) < 0) {
if (errno == ENOTTY) {
int read_only = (flags & NBD_FLAG_READ_ONLY) != 0;
TRACE("Setting readonly attribute");
@@ -665,6 +679,15 @@ int nbd_client(int fd)
errno = serrno;
return ret;
}
+
+int nbd_disconnect(int fd)
+{
+ ioctl(fd, NBD_CLEAR_QUE);
+ ioctl(fd, NBD_DISCONNECT);
+ ioctl(fd, NBD_CLEAR_SOCK);
+ return 0;
+}
+
#else
int nbd_init(int fd, QIOChannelSocket *ioc, uint32_t flags, off_t size)
{
@@ -675,6 +698,10 @@ int nbd_client(int fd)
{
return -ENOTSUP;
}
+int nbd_disconnect(int fd)
+{
+ return -ENOTSUP;
+}
#endif
ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
@@ -683,14 +710,15 @@ ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
ssize_t ret;
TRACE("Sending request to server: "
- "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
+ "{ .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64
+ ", .type=%" PRIu16 " }",
request->from, request->len, request->handle, request->type);
- cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
- cpu_to_be32w((uint32_t*)(buf + 4), request->type);
- cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
- cpu_to_be64w((uint64_t*)(buf + 16), request->from);
- cpu_to_be32w((uint32_t*)(buf + 24), request->len);
+ stl_be_p(buf, NBD_REQUEST_MAGIC);
+ stl_be_p(buf + 4, request->type);
+ stq_be_p(buf + 8, request->handle);
+ stq_be_p(buf + 16, request->from);
+ stl_be_p(buf + 24, request->len);
ret = write_sync(ioc, buf, sizeof(buf));
if (ret < 0) {
@@ -726,18 +754,18 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
[ 7 .. 15] handle
*/
- magic = be32_to_cpup((uint32_t*)buf);
- reply->error = be32_to_cpup((uint32_t*)(buf + 4));
- reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
+ magic = ldl_be_p(buf);
+ reply->error = ldl_be_p(buf + 4);
+ reply->handle = ldq_be_p(buf + 8);
reply->error = nbd_errno_to_system_errno(reply->error);
- TRACE("Got reply: "
- "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
+ TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32
+ ", handle = %" PRIu64" }",
magic, reply->error, reply->handle);
if (magic != NBD_REPLY_MAGIC) {
- LOG("invalid magic (got 0x%x)", magic);
+ LOG("invalid magic (got 0x%" PRIx32 ")", magic);
return -EINVAL;
}
return 0;
diff --git a/nbd/server.c b/nbd/server.c
index b2cfeb9843..a677e266ff 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -52,6 +52,7 @@ struct NBDRequest {
QSIMPLEQ_ENTRY(NBDRequest) entry;
NBDClient *client;
uint8_t *data;
+ bool complete;
};
struct NBDExport {
@@ -196,7 +197,7 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
uint64_t magic;
uint32_t len;
- TRACE("Reply opt=%x type=%x", type, opt);
+ TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt);
magic = cpu_to_be64(NBD_REP_MAGIC);
if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
@@ -226,7 +227,7 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
uint64_t magic, name_len;
uint32_t opt, type, len;
- TRACE("Advertizing export name '%s'", exp->name ? exp->name : "");
+ TRACE("Advertising export name '%s'", exp->name ? exp->name : "");
name_len = strlen(exp->name);
magic = cpu_to_be64(NBD_REP_MAGIC);
if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
@@ -285,13 +286,13 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
{
int rc = -EINVAL;
- char name[256];
+ char name[NBD_MAX_NAME_SIZE + 1];
/* Client sends:
[20 .. xx] export name (length bytes)
*/
TRACE("Checking length");
- if (length > 255) {
+ if (length >= sizeof(name)) {
LOG("Bad length received");
goto fail;
}
@@ -334,7 +335,10 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
return NULL;
}
- nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_STARTTLS);
+ if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
+ NBD_OPT_STARTTLS) < 0) {
+ return NULL;
+ }
tioc = qio_channel_tls_new_server(ioc,
client->tlscreds,
@@ -392,12 +396,12 @@ static int nbd_negotiate_options(NBDClient *client)
TRACE("Checking client flags");
be32_to_cpus(&flags);
if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
- TRACE("Support supports fixed newstyle handshake");
+ TRACE("Client supports fixed newstyle handshake");
fixedNewstyle = true;
flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
}
if (flags != 0) {
- TRACE("Unknown client flags 0x%x received", flags);
+ TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
return -EIO;
}
@@ -431,12 +435,12 @@ static int nbd_negotiate_options(NBDClient *client)
}
length = be32_to_cpu(length);
- TRACE("Checking option 0x%x", clientflags);
+ TRACE("Checking option 0x%" PRIx32, clientflags);
if (client->tlscreds &&
client->ioc == (QIOChannel *)client->sioc) {
QIOChannel *tioc;
if (!fixedNewstyle) {
- TRACE("Unsupported option 0x%x", clientflags);
+ TRACE("Unsupported option 0x%" PRIx32, clientflags);
return -EINVAL;
}
switch (clientflags) {
@@ -455,12 +459,16 @@ static int nbd_negotiate_options(NBDClient *client)
return -EINVAL;
default:
- TRACE("Option 0x%x not permitted before TLS", clientflags);
+ TRACE("Option 0x%" PRIx32 " not permitted before TLS",
+ clientflags);
if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
return -EIO;
}
- nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD,
- clientflags);
+ ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD,
+ clientflags);
+ if (ret < 0) {
+ return ret;
+ }
break;
}
} else if (fixedNewstyle) {
@@ -484,21 +492,29 @@ static int nbd_negotiate_options(NBDClient *client)
}
if (client->tlscreds) {
TRACE("TLS already enabled");
- nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_INVALID,
- clientflags);
+ ret = nbd_negotiate_send_rep(client->ioc,
+ NBD_REP_ERR_INVALID,
+ clientflags);
} else {
TRACE("TLS not configured");
- nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_POLICY,
- clientflags);
+ ret = nbd_negotiate_send_rep(client->ioc,
+ NBD_REP_ERR_POLICY,
+ clientflags);
+ }
+ if (ret < 0) {
+ return ret;
}
break;
default:
- TRACE("Unsupported option 0x%x", clientflags);
+ TRACE("Unsupported option 0x%" PRIx32, clientflags);
if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
return -EIO;
}
- nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP,
- clientflags);
+ ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP,
+ clientflags);
+ if (ret < 0) {
+ return ret;
+ }
break;
}
} else {
@@ -511,7 +527,7 @@ static int nbd_negotiate_options(NBDClient *client)
return nbd_negotiate_handle_export_name(client, length);
default:
- TRACE("Unsupported option 0x%x", clientflags);
+ TRACE("Unsupported option 0x%" PRIx32, clientflags);
return -EINVAL;
}
}
@@ -560,6 +576,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
oldStyle = client->exp != NULL && !client->tlscreds;
if (oldStyle) {
assert ((client->exp->nbdflags & ~65535) == 0);
+ TRACE("advertising size %" PRIu64 " and flags %x",
+ client->exp->size, client->exp->nbdflags | myflags);
stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
stq_be_p(buf + 16, client->exp->size);
stw_be_p(buf + 26, client->exp->nbdflags | myflags);
@@ -589,6 +607,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
}
assert ((client->exp->nbdflags & ~65535) == 0);
+ TRACE("advertising size %" PRIu64 " and flags %x",
+ client->exp->size, client->exp->nbdflags | myflags);
stq_be_p(buf + 18, client->exp->size);
stw_be_p(buf + 26, client->exp->nbdflags | myflags);
if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) !=
@@ -604,24 +624,6 @@ fail:
return rc;
}
-#ifdef __linux__
-
-int nbd_disconnect(int fd)
-{
- ioctl(fd, NBD_CLEAR_QUE);
- ioctl(fd, NBD_DISCONNECT);
- ioctl(fd, NBD_CLEAR_SOCK);
- return 0;
-}
-
-#else
-
-int nbd_disconnect(int fd)
-{
- return -ENOTSUP;
-}
-#endif
-
static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
{
uint8_t buf[NBD_REQUEST_SIZE];
@@ -646,18 +648,18 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
[24 .. 27] len
*/
- magic = be32_to_cpup((uint32_t*)buf);
- request->type = be32_to_cpup((uint32_t*)(buf + 4));
- request->handle = be64_to_cpup((uint64_t*)(buf + 8));
- request->from = be64_to_cpup((uint64_t*)(buf + 16));
- request->len = be32_to_cpup((uint32_t*)(buf + 24));
+ magic = ldl_be_p(buf);
+ request->type = ldl_be_p(buf + 4);
+ request->handle = ldq_be_p(buf + 8);
+ request->from = ldq_be_p(buf + 16);
+ request->len = ldl_be_p(buf + 24);
- TRACE("Got request: "
- "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
+ TRACE("Got request: { magic = 0x%" PRIx32 ", .type = %" PRIx32
+ ", from = %" PRIu64 " , len = %" PRIu32 " }",
magic, request->type, request->from, request->len);
if (magic != NBD_REQUEST_MAGIC) {
- LOG("invalid magic (got 0x%x)", magic);
+ LOG("invalid magic (got 0x%" PRIx32 ")", magic);
return -EINVAL;
}
return 0;
@@ -670,7 +672,8 @@ static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply)
reply->error = system_errno_to_nbd_errno(reply->error);
- TRACE("Sending response to client: { .error = %d, handle = %" PRIu64 " }",
+ TRACE("Sending response to client: { .error = %" PRId32
+ ", handle = %" PRIu64 " }",
reply->error, reply->handle);
/* Reply
@@ -969,7 +972,13 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
return rc;
}
-static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request)
+/* Collect a client request. Return 0 if request looks valid, -EAGAIN
+ * to keep trying the collection, -EIO to drop connection right away,
+ * and any other negative value to report an error to the client
+ * (although the caller may still need to disconnect after reporting
+ * the error). */
+static ssize_t nbd_co_receive_request(NBDRequest *req,
+ struct nbd_request *request)
{
NBDClient *client = req->client;
uint32_t command;
@@ -987,19 +996,34 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
goto out;
}
+ TRACE("Decoding type");
+
+ command = request->type & NBD_CMD_MASK_COMMAND;
+ if (command != NBD_CMD_WRITE) {
+ /* No payload, we are ready to read the next request. */
+ req->complete = true;
+ }
+
+ if (command == NBD_CMD_DISC) {
+ /* Special case: we're going to disconnect without a reply,
+ * whether or not flags, from, or len are bogus */
+ TRACE("Request type is DISCONNECT");
+ rc = -EIO;
+ goto out;
+ }
+
+ /* Check for sanity in the parameters, part 1. Defer as many
+ * checks as possible until after reading any NBD_CMD_WRITE
+ * payload, so we can try and keep the connection alive. */
if ((request->from + request->len) < request->from) {
- LOG("integer overflow detected! "
- "you're probably being attacked");
+ LOG("integer overflow detected, you're probably being attacked");
rc = -EINVAL;
goto out;
}
- TRACE("Decoding type");
-
- command = request->type & NBD_CMD_MASK_COMMAND;
if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
if (request->len > NBD_MAX_BUFFER_SIZE) {
- LOG("len (%u) is larger than max len (%u)",
+ LOG("len (%" PRIu32" ) is larger than max len (%u)",
request->len, NBD_MAX_BUFFER_SIZE);
rc = -EINVAL;
goto out;
@@ -1012,14 +1036,30 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
}
}
if (command == NBD_CMD_WRITE) {
- TRACE("Reading %u byte(s)", request->len);
+ TRACE("Reading %" PRIu32 " byte(s)", request->len);
if (read_sync(client->ioc, req->data, request->len) != request->len) {
LOG("reading from socket failed");
rc = -EIO;
goto out;
}
+ req->complete = true;
+ }
+
+ /* Sanity checks, part 2. */
+ if (request->from + request->len > client->exp->size) {
+ LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
+ ", Size: %" PRIu64, request->from, request->len,
+ (uint64_t)client->exp->size);
+ rc = command == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
+ goto out;
}
+ if (request->type & ~NBD_CMD_MASK_COMMAND & ~NBD_CMD_FLAG_FUA) {
+ LOG("unsupported flags (got 0x%x)",
+ request->type & ~NBD_CMD_MASK_COMMAND);
+ return -EINVAL;
+ }
+
rc = 0;
out:
@@ -1038,6 +1078,7 @@ static void nbd_trip(void *opaque)
struct nbd_reply reply;
ssize_t ret;
uint32_t command;
+ int flags;
TRACE("Reading request.");
if (client->closing) {
@@ -1061,14 +1102,6 @@ static void nbd_trip(void *opaque)
goto error_reply;
}
command = request.type & NBD_CMD_MASK_COMMAND;
- if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) {
- LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
- ", Offset: %" PRIu64 "\n",
- request.from, request.len,
- (uint64_t)exp->size, (uint64_t)exp->dev_offset);
- LOG("requested operation past EOF--bad client?");
- goto invalid_request;
- }
if (client->closing) {
/*
@@ -1099,7 +1132,7 @@ static void nbd_trip(void *opaque)
goto error_reply;
}
- TRACE("Read %u byte(s)", request.len);
+ TRACE("Read %" PRIu32" byte(s)", request.len);
if (nbd_co_send_reply(req, &reply, request.len) < 0)
goto out;
break;
@@ -1114,31 +1147,27 @@ static void nbd_trip(void *opaque)
TRACE("Writing to device");
+ flags = 0;
+ if (request.type & NBD_CMD_FLAG_FUA) {
+ flags |= BDRV_REQ_FUA;
+ }
ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
- req->data, request.len, 0);
+ req->data, request.len, flags);
if (ret < 0) {
LOG("writing to file failed");
reply.error = -ret;
goto error_reply;
}
- if (request.type & NBD_CMD_FLAG_FUA) {
- ret = blk_co_flush(exp->blk);
- if (ret < 0) {
- LOG("flush failed");
- reply.error = -ret;
- goto error_reply;
- }
- }
-
if (nbd_co_send_reply(req, &reply, 0) < 0) {
goto out;
}
break;
+
case NBD_CMD_DISC:
- TRACE("Request type is DISCONNECT");
- errno = 0;
- goto out;
+ /* unreachable, thanks to special case in nbd_co_receive_request() */
+ abort();
+
case NBD_CMD_FLUSH:
TRACE("Request type is FLUSH");
@@ -1173,11 +1202,13 @@ static void nbd_trip(void *opaque)
}
break;
default:
- LOG("invalid request type (%u) received", request.type);
- invalid_request:
+ LOG("invalid request type (%" PRIu32 ") received", request.type);
reply.error = EINVAL;
error_reply:
- if (nbd_co_send_reply(req, &reply, 0) < 0) {
+ /* We must disconnect after NBD_CMD_WRITE if we did not
+ * read the payload.
+ */
+ if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) {
goto out;
}
break;
diff --git a/net/netmap.c b/net/netmap.c
index 6cc0db5ee1..64967b947e 100644
--- a/net/netmap.c
+++ b/net/netmap.c
@@ -26,7 +26,6 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
#include <net/if.h>
-#include <sys/mman.h>
#define NETMAP_WITH_LIBS
#include <net/netmap.h>
#include <net/netmap_user.h>
diff --git a/os-posix.c b/os-posix.c
index 107fde38bf..3755265582 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -26,7 +26,6 @@
#include "qemu/osdep.h"
#include <sys/wait.h>
/*needed for MAP_POPULATE before including qemu-options.h */
-#include <sys/mman.h>
#include <pwd.h>
#include <grp.h>
#include <libgen.h>
diff --git a/qapi-schema.json b/qapi-schema.json
index 48c3a6f5cd..40b1db4271 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -382,13 +382,17 @@
#
# @dirty-sync-count: number of times that dirty ram was synchronized (since 2.1)
#
+# @postcopy-requests: The number of page requests received from the destination
+# (since 2.7)
+#
# Since: 0.14.0
##
{ 'struct': 'MigrationStats',
'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' ,
'duplicate': 'int', 'skipped': 'int', 'normal': 'int',
'normal-bytes': 'int', 'dirty-pages-rate' : 'int',
- 'mbps' : 'number', 'dirty-sync-count' : 'int' } }
+ 'mbps' : 'number', 'dirty-sync-count' : 'int',
+ 'postcopy-requests' : 'int' } }
##
# @XBZRLECacheStats
@@ -486,7 +490,7 @@
#
# @error-desc: #optional the human readable error description string, when
# @status is 'failed'. Clients should not attempt to parse the
-# error strings. (Since 2.6)
+# error strings. (Since 2.7)
#
# Since: 0.14.0
##
@@ -631,7 +635,7 @@
# migration URI does not already include a hostname. For
# example if using fd: or exec: based migration, the
# hostname must be provided so that the server's x509
-# certificate identity canbe validated. (Since 2.7)
+# certificate identity can be validated. (Since 2.7)
#
# Since: 2.4
##
@@ -672,7 +676,7 @@
# migration URI does not already include a hostname. For
# example if using fd: or exec: based migration, the
# hostname must be provided so that the server's x509
-# certificate identity canbe validated. (Since 2.7)
+# certificate identity can be validated. (Since 2.7)
#
# Since: 2.4
##
@@ -708,14 +712,14 @@
# be for a 'client' endpoint, while for the incoming side the
# credentials must be for a 'server' endpoint. Setting this
# will enable TLS for all migrations. The default is unset,
-# resulting in unsecured migration at the QEMU level. (Since 2.6)
+# resulting in unsecured migration at the QEMU level. (Since 2.7)
#
# @tls-hostname: hostname of the target host for the migration. This is
# required when using x509 based TLS credentials and the
# migration URI does not already include a hostname. For
# example if using fd: or exec: based migration, the
# hostname must be provided so that the server's x509
-# certificate identity canbe validated. (Since 2.6)
+# certificate identity can be validated. (Since 2.7)
#
# Since: 2.4
##
diff --git a/qemu-char.c b/qemu-char.c
index e3127772c4..84f49acbac 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -47,7 +47,6 @@
#include <sys/times.h>
#include <sys/wait.h>
#include <termios.h>
-#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/resource.h>
#include <sys/socket.h>
diff --git a/qemu-img.c b/qemu-img.c
index 251386b49d..14e2661a5c 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3570,7 +3570,7 @@ static int img_bench(int argc, char **argv)
BlockBackend *blk = NULL;
BenchData data = {};
int flags = 0;
- bool writethrough;
+ bool writethrough = false;
struct timeval t1, t2;
int i;
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 6554f0ab65..9519db324b 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -154,8 +154,8 @@ static void read_partition(uint8_t *p, struct partition_record *r)
r->end_cylinder = p[7] | ((p[6] << 2) & 0x300);
r->end_sector = p[6] & 0x3f;
- r->start_sector_abs = le32_to_cpup((uint32_t *)(p + 8));
- r->nb_sectors_abs = le32_to_cpup((uint32_t *)(p + 12));
+ r->start_sector_abs = ldl_le_p(p + 8);
+ r->nb_sectors_abs = ldl_le_p(p + 12);
}
static int find_partition(BlockBackend *blk, int partition,
diff --git a/qemu-options.hx b/qemu-options.hx
index 0e42ba55be..17f15ad1fe 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3214,6 +3214,8 @@ STEXI
@item -L @var{path}
@findex -L
Set the directory for the BIOS, VGA BIOS and keymaps.
+
+To list all the data directories, use @code{-L help}.
ETEXI
DEF("bios", HAS_ARG, QEMU_OPTION_bios, \
diff --git a/scripts/clean-includes b/scripts/clean-includes
index 37b73b5433..4412a5590a 100755
--- a/scripts/clean-includes
+++ b/scripts/clean-includes
@@ -105,6 +105,8 @@ for f in "$@"; do
*include/qemu/osdep.h | \
*include/qemu/compiler.h | \
*include/glib-compat.h | \
+ *include/sysemu/os-posix.h | \
+ *include/sysemu/os-win32.h | \
*include/standard-headers/ )
# Removing include lines from osdep.h itself would be counterproductive.
echo "SKIPPING $f (special case header)"
@@ -145,6 +147,7 @@ for f in "$@"; do
<stdlib.h> <stdio.h> <string.h> <strings.h> <inttypes.h>
<limits.h> <unistd.h> <time.h> <ctype.h> <errno.h> <fcntl.h>
<sys/stat.h> <sys/time.h> <assert.h> <signal.h> <glib.h>
+ <sys/stat.h> <sys/time.h> <assert.h> <signal.h> <glib.h> <sys/mman.h>
"sysemu/os-posix.h, sysemu/os-win32.h "glib-compat.h"
"qemu/typedefs.h"
))' "$f"
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 83da447cb7..5c2bd7a10b 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -10,7 +10,6 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <linux/kvm.h>
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index c35c676e14..069da0c5fd 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -10,7 +10,6 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <linux/kvm.h>
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index 2d6a310ebb..5faa76c57e 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -11,7 +11,6 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <sys/ptrace.h>
#include <linux/elf.h>
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 895a386d3b..3665fecb5f 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -41,6 +41,7 @@
#include "sysemu/sysemu.h"
#include "hw/qdev-properties.h"
+#include "hw/i386/topology.h"
#ifndef CONFIG_USER_ONLY
#include "exec/address-spaces.h"
#include "hw/hw.h"
@@ -677,6 +678,14 @@ static ObjectClass *x86_cpu_class_by_name(const char *cpu_model)
return oc;
}
+static char *x86_cpu_class_get_model_name(X86CPUClass *cc)
+{
+ const char *class_name = object_class_get_name(OBJECT_CLASS(cc));
+ assert(g_str_has_suffix(class_name, X86_CPU_TYPE_SUFFIX));
+ return g_strndup(class_name,
+ strlen(class_name) - strlen(X86_CPU_TYPE_SUFFIX));
+}
+
struct X86CPUDefinition {
const char *name;
uint32_t level;
@@ -1239,6 +1248,51 @@ static X86CPUDefinition builtin_x86_defs[] = {
.model_id = "Intel Core Processor (Broadwell)",
},
{
+ .name = "Skylake-Client",
+ .level = 0xd,
+ .vendor = CPUID_VENDOR_INTEL,
+ .family = 6,
+ .model = 94,
+ .stepping = 3,
+ .features[FEAT_1_EDX] =
+ CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
+ .features[FEAT_1_ECX] =
+ CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES |
+ CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 |
+ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
+ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 |
+ CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE |
+ CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND,
+ .features[FEAT_8000_0001_EDX] =
+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX |
+ CPUID_EXT2_SYSCALL,
+ .features[FEAT_8000_0001_ECX] =
+ CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH,
+ .features[FEAT_7_0_EBX] =
+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 |
+ CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP |
+ CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID |
+ CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX |
+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX,
+ /* Missing: XSAVES (not supported by some Linux versions,
+ * including v4.1 to v4.6).
+ * KVM doesn't yet expose any XSAVES state save component,
+ * and the only one defined in Skylake (processor tracing)
+ * probably will block migration anyway.
+ */
+ .features[FEAT_XSAVE] =
+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
+ CPUID_XSAVE_XGETBV1,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
+ .xlevel = 0x80000008,
+ .model_id = "Intel Core Processor (Skylake)",
+ },
+ {
.name = "Opteron_G1",
.level = 5,
.vendor = CPUID_VENDOR_AMD,
@@ -1501,16 +1555,17 @@ static void host_x86_cpu_initfn(Object *obj)
CPUX86State *env = &cpu->env;
KVMState *s = kvm_state;
- assert(kvm_enabled());
-
/* We can't fill the features array here because we don't know yet if
* "migratable" is true or false.
*/
cpu->host_features = true;
- env->cpuid_level = kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
- env->cpuid_xlevel = kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX);
- env->cpuid_xlevel2 = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
+ /* If KVM is disabled, x86_cpu_realizefn() will report an error later */
+ if (kvm_enabled()) {
+ env->cpuid_level = kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
+ env->cpuid_xlevel = kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX);
+ env->cpuid_xlevel2 = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
+ }
object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort);
}
@@ -1894,6 +1949,14 @@ static inline void feat2prop(char *s)
}
}
+/* Compatibily hack to maintain legacy +-feat semantic,
+ * where +-feat overwrites any feature set by
+ * feat=on|feat even if the later is parsed after +-feat
+ * (i.e. "-x2apic,x2apic=on" will result in x2apic disabled)
+ */
+static FeatureWordArray plus_features = { 0 };
+static FeatureWordArray minus_features = { 0 };
+
/* Parse "+feature,-feature,feature=foo" CPU feature string
*/
static void x86_cpu_parse_featurestr(CPUState *cs, char *features,
@@ -1901,97 +1964,61 @@ static void x86_cpu_parse_featurestr(CPUState *cs, char *features,
{
X86CPU *cpu = X86_CPU(cs);
char *featurestr; /* Single 'key=value" string being parsed */
- FeatureWord w;
- /* Features to be added */
- FeatureWordArray plus_features = { 0 };
- /* Features to be removed */
- FeatureWordArray minus_features = { 0 };
- uint32_t numvalue;
- CPUX86State *env = &cpu->env;
Error *local_err = NULL;
- featurestr = features ? strtok(features, ",") : NULL;
+ if (!features) {
+ return;
+ }
- while (featurestr) {
- char *val;
+ for (featurestr = strtok(features, ",");
+ featurestr && !local_err;
+ featurestr = strtok(NULL, ",")) {
+ const char *name;
+ const char *val = NULL;
+ char *eq = NULL;
+
+ /* Compatibility syntax: */
if (featurestr[0] == '+') {
add_flagname_to_bitmaps(featurestr + 1, plus_features, &local_err);
+ continue;
} else if (featurestr[0] == '-') {
add_flagname_to_bitmaps(featurestr + 1, minus_features, &local_err);
- } else if ((val = strchr(featurestr, '='))) {
- *val = 0; val++;
- feat2prop(featurestr);
- if (!strcmp(featurestr, "xlevel")) {
- char *err;
- char num[32];
-
- numvalue = strtoul(val, &err, 0);
- if (!*val || *err) {
- error_setg(errp, "bad numerical value %s", val);
- return;
- }
- if (numvalue < 0x80000000) {
- error_report("xlevel value shall always be >= 0x80000000"
- ", fixup will be removed in future versions");
- numvalue += 0x80000000;
- }
- snprintf(num, sizeof(num), "%" PRIu32, numvalue);
- object_property_parse(OBJECT(cpu), num, featurestr, &local_err);
- } else if (!strcmp(featurestr, "tsc-freq")) {
- int64_t tsc_freq;
- char *err;
- char num[32];
-
- tsc_freq = qemu_strtosz_suffix_unit(val, &err,
- QEMU_STRTOSZ_DEFSUFFIX_B, 1000);
- if (tsc_freq < 0 || *err) {
- error_setg(errp, "bad numerical value %s", val);
- return;
- }
- snprintf(num, sizeof(num), "%" PRId64, tsc_freq);
- object_property_parse(OBJECT(cpu), num, "tsc-frequency",
- &local_err);
- } else if (!strcmp(featurestr, "hv-spinlocks")) {
- char *err;
- const int min = 0xFFF;
- char num[32];
- numvalue = strtoul(val, &err, 0);
- if (!*val || *err) {
- error_setg(errp, "bad numerical value %s", val);
- return;
- }
- if (numvalue < min) {
- error_report("hv-spinlocks value shall always be >= 0x%x"
- ", fixup will be removed in future versions",
- min);
- numvalue = min;
- }
- snprintf(num, sizeof(num), "%" PRId32, numvalue);
- object_property_parse(OBJECT(cpu), num, featurestr, &local_err);
- } else {
- object_property_parse(OBJECT(cpu), val, featurestr, &local_err);
- }
- } else {
- feat2prop(featurestr);
- object_property_parse(OBJECT(cpu), "on", featurestr, &local_err);
+ continue;
}
- if (local_err) {
- error_propagate(errp, local_err);
- return;
+
+ eq = strchr(featurestr, '=');
+ if (eq) {
+ *eq++ = 0;
+ val = eq;
+ } else {
+ val = "on";
}
- featurestr = strtok(NULL, ",");
- }
- if (cpu->host_features) {
- for (w = 0; w < FEATURE_WORDS; w++) {
- env->features[w] =
- x86_cpu_get_supported_feature_word(w, cpu->migratable);
+ feat2prop(featurestr);
+ name = featurestr;
+
+ /* Special case: */
+ if (!strcmp(name, "tsc-freq")) {
+ int64_t tsc_freq;
+ char *err;
+ char num[32];
+
+ tsc_freq = qemu_strtosz_suffix_unit(val, &err,
+ QEMU_STRTOSZ_DEFSUFFIX_B, 1000);
+ if (tsc_freq < 0 || *err) {
+ error_setg(errp, "bad numerical value %s", val);
+ return;
+ }
+ snprintf(num, sizeof(num), "%" PRId64, tsc_freq);
+ val = num;
+ name = "tsc-frequency";
}
+
+ object_property_parse(OBJECT(cpu), val, name, &local_err);
}
- for (w = 0; w < FEATURE_WORDS; w++) {
- env->features[w] |= plus_features[w];
- env->features[w] &= ~minus_features[w];
+ if (local_err) {
+ error_propagate(errp, local_err);
}
}
@@ -2175,7 +2202,6 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
X86CPU *cpu_x86_create(const char *cpu_model, Error **errp)
{
X86CPU *cpu = NULL;
- X86CPUClass *xcc;
ObjectClass *oc;
gchar **model_pieces;
char *name, *features;
@@ -2194,12 +2220,6 @@ X86CPU *cpu_x86_create(const char *cpu_model, Error **errp)
error_setg(&error, "Unable to find CPU definition: %s", name);
goto out;
}
- xcc = X86_CPU_CLASS(oc);
-
- if (xcc->kvm_required && !kvm_enabled()) {
- error_setg(&error, "CPU model '%s' requires KVM", name);
- goto out;
- }
cpu = X86_CPU(object_new(object_class_get_name(oc)));
@@ -2222,25 +2242,7 @@ out:
X86CPU *cpu_x86_init(const char *cpu_model)
{
- Error *error = NULL;
- X86CPU *cpu;
-
- cpu = cpu_x86_create(cpu_model, &error);
- if (error) {
- goto out;
- }
-
- object_property_set_bool(OBJECT(cpu), true, "realized", &error);
-
-out:
- if (error) {
- error_report_err(error);
- if (cpu != NULL) {
- object_unref(OBJECT(cpu));
- cpu = NULL;
- }
- }
- return cpu;
+ return X86_CPU(cpu_generic_init(TYPE_X86_CPU, cpu_model));
}
static void x86_cpu_cpudef_class_init(ObjectClass *oc, void *data)
@@ -2447,6 +2449,36 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*edx = 0;
}
break;
+ case 0xB:
+ /* Extended Topology Enumeration Leaf */
+ if (!cpu->enable_cpuid_0xb) {
+ *eax = *ebx = *ecx = *edx = 0;
+ break;
+ }
+
+ *ecx = count & 0xff;
+ *edx = cpu->apic_id;
+
+ switch (count) {
+ case 0:
+ *eax = apicid_core_offset(smp_cores, smp_threads);
+ *ebx = smp_threads;
+ *ecx |= CPUID_TOPOLOGY_LEVEL_SMT;
+ break;
+ case 1:
+ *eax = apicid_pkg_offset(smp_cores, smp_threads);
+ *ebx = smp_cores * smp_threads;
+ *ecx |= CPUID_TOPOLOGY_LEVEL_CORE;
+ break;
+ default:
+ *eax = 0;
+ *ebx = 0;
+ *ecx |= CPUID_TOPOLOGY_LEVEL_INVALID;
+ }
+
+ assert(!(*eax & ~0x1f));
+ *ebx &= 0xffff; /* The count doesn't need to be reliable. */
+ break;
case 0xD: {
KVMState *s = cs->kvm_state;
uint64_t ena_mask;
@@ -2874,12 +2906,37 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
CPUX86State *env = &cpu->env;
Error *local_err = NULL;
static bool ht_warned;
+ FeatureWord w;
+
+ if (xcc->kvm_required && !kvm_enabled()) {
+ char *name = x86_cpu_class_get_model_name(xcc);
+ error_setg(&local_err, "CPU model '%s' requires KVM", name);
+ g_free(name);
+ goto out;
+ }
if (cpu->apic_id < 0) {
error_setg(errp, "apic-id property was not initialized properly");
return;
}
+ /*TODO: cpu->host_features incorrectly overwrites features
+ * set using "feat=on|off". Once we fix this, we can convert
+ * plus_features & minus_features to global properties
+ * inside x86_cpu_parse_featurestr() too.
+ */
+ if (cpu->host_features) {
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ env->features[w] =
+ x86_cpu_get_supported_feature_word(w, cpu->migratable);
+ }
+ }
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ cpu->env.features[w] |= plus_features[w];
+ cpu->env.features[w] &= ~minus_features[w];
+ }
+
if (env->features[FEAT_7_0_EBX] && env->cpuid_level < 7) {
env->cpuid_level = 7;
}
@@ -3206,6 +3263,7 @@ static Property x86_cpu_properties[] = {
DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, 0),
DEFINE_PROP_UINT32("xlevel2", X86CPU, env.cpuid_xlevel2, 0),
DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id),
+ DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true),
DEFINE_PROP_END_OF_LIST()
};
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 0426459bba..d9ab884c2b 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -636,6 +636,11 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_MWAIT_IBE (1U << 1) /* Interrupts can exit capability */
#define CPUID_MWAIT_EMX (1U << 0) /* enumeration supported */
+/* CPUID[0xB].ECX level types */
+#define CPUID_TOPOLOGY_LEVEL_INVALID (0U << 8)
+#define CPUID_TOPOLOGY_LEVEL_SMT (1U << 8)
+#define CPUID_TOPOLOGY_LEVEL_CORE (2U << 8)
+
#ifndef HYPERV_SPINLOCK_NEVER_RETRY
#define HYPERV_SPINLOCK_NEVER_RETRY 0xFFFFFFFF
#endif
@@ -1173,6 +1178,9 @@ struct X86CPU {
*/
bool enable_pmu;
+ /* Compatibility bits for old machine types: */
+ bool enable_cpuid_0xb;
+
/* in order to simplify APIC support, we leave this pointer to the
user */
struct DeviceState *apic_state;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index abf50e6632..ff92b1d118 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -15,7 +15,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <sys/utsname.h>
#include <linux/kvm.h>
@@ -107,6 +106,8 @@ static int has_xsave;
static int has_xcrs;
static int has_pit_state2;
+static struct kvm_cpuid2 *cpuid_cache;
+
int kvm_has_pit_state2(void)
{
return has_pit_state2;
@@ -200,9 +201,14 @@ static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s)
{
struct kvm_cpuid2 *cpuid;
int max = 1;
+
+ if (cpuid_cache != NULL) {
+ return cpuid_cache;
+ }
while ((cpuid = try_get_cpuid(s, max)) == NULL) {
max *= 2;
}
+ cpuid_cache = cpuid;
return cpuid;
}
@@ -320,8 +326,6 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
}
- g_free(cpuid);
-
/* fallback for older kernels */
if ((function == KVM_CPUID_FEATURES) && !found) {
ret = get_para_features(s);
diff --git a/target-mips/kvm.c b/target-mips/kvm.c
index a854e4de59..f3f832d498 100644
--- a/target-mips/kvm.c
+++ b/target-mips/kvm.c
@@ -11,7 +11,6 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <linux/kvm.h>
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 6c153611c0..16208649c5 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -17,7 +17,6 @@
#include "qemu/osdep.h"
#include <dirent.h>
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <sys/vfs.h>
#include <linux/kvm.h>
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index f108cd3875..45e94ca48a 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -23,7 +23,6 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <linux/kvm.h>
#include <asm/ptrace.h>
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 4b4bfa5930..bf620b8dd4 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -234,6 +234,7 @@ endif
check-qtest-i386-y += tests/test-netfilter$(EXESUF)
check-qtest-i386-y += tests/test-filter-mirror$(EXESUF)
check-qtest-i386-y += tests/test-filter-redirector$(EXESUF)
+check-qtest-i386-y += tests/postcopy-test$(EXESUF)
check-qtest-x86_64-y += $(check-qtest-i386-y)
gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -599,6 +600,7 @@ tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o $(libqos-usb-obj-y)
tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y)
tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o
+tests/postcopy-test$(EXESUF): tests/postcopy-test.o
tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y)
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
diff --git a/tests/e1000e-test.c b/tests/e1000e-test.c
index dbf4859f88..d497b0857c 100644
--- a/tests/e1000e-test.c
+++ b/tests/e1000e-test.c
@@ -25,7 +25,6 @@
#include "qemu/osdep.h"
-#include <glib.h>
#include "libqtest.h"
#include "qemu-common.h"
#include "libqos/pci-pc.h"
diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index bff999cf12..c1d9b3eb9e 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -13,7 +13,6 @@
*/
#include "qemu/osdep.h"
-#include <sys/mman.h>
#include "libqtest.h"
#include "libqos/pci.h"
diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c
index 010860a5b7..0957ee7555 100644
--- a/tests/ivshmem-test.c
+++ b/tests/ivshmem-test.c
@@ -10,7 +10,6 @@
#include "qemu/osdep.h"
#include <glib/gstdio.h>
-#include <sys/mman.h>
#include "contrib/ivshmem-server/ivshmem-server.h"
#include "libqos/pci-pc.h"
#include "libqtest.h"
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 5c82348265..eb00f1392b 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -26,7 +26,7 @@
#include "qapi/qmp/qjson.h"
#define MAX_IRQ 256
-#define SOCKET_TIMEOUT 5
+#define SOCKET_TIMEOUT 50
QTestState *global_qtest;
diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
new file mode 100644
index 0000000000..35d5180173
--- /dev/null
+++ b/tests/postcopy-test.c
@@ -0,0 +1,453 @@
+/*
+ * QTest testcase for postcopy
+ *
+ * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates
+ * based on the vhost-user-test.c that is:
+ * Copyright (c) 2014 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "libqtest.h"
+#include "qemu/option.h"
+#include "qemu/range.h"
+#include "sysemu/char.h"
+#include "sysemu/sysemu.h"
+
+#include <qemu/sockets.h>
+
+const unsigned start_address = 1024 * 1024;
+const unsigned end_address = 100 * 1024 * 1024;
+bool got_stop;
+
+#if defined(__linux__)
+#include <sys/syscall.h>
+#include <sys/vfs.h>
+#endif
+
+#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <linux/userfaultfd.h>
+
+static bool ufd_version_check(void)
+{
+ struct uffdio_api api_struct;
+ uint64_t ioctl_mask;
+
+ int ufd = ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
+
+ if (ufd == -1) {
+ g_test_message("Skipping test: userfaultfd not available");
+ return false;
+ }
+
+ api_struct.api = UFFD_API;
+ api_struct.features = 0;
+ if (ioctl(ufd, UFFDIO_API, &api_struct)) {
+ g_test_message("Skipping test: UFFDIO_API failed");
+ return false;
+ }
+
+ ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
+ (__u64)1 << _UFFDIO_UNREGISTER;
+ if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
+ g_test_message("Skipping test: Missing userfault feature");
+ return false;
+ }
+
+ return true;
+}
+
+#else
+static bool ufd_version_check(void)
+{
+ g_test_message("Skipping test: Userfault not available (builtdtime)");
+ return false;
+}
+
+#endif
+
+static const char *tmpfs;
+
+/* A simple PC boot sector that modifies memory (1-100MB) quickly
+ * outputing a 'B' every so often if it's still running.
+ */
+unsigned char bootsect[] = {
+ 0xfa, 0x0f, 0x01, 0x16, 0x74, 0x7c, 0x66, 0xb8, 0x01, 0x00, 0x00, 0x00,
+ 0x0f, 0x22, 0xc0, 0x66, 0xea, 0x20, 0x7c, 0x00, 0x00, 0x08, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe4, 0x92, 0x0c, 0x02,
+ 0xe6, 0x92, 0xb8, 0x10, 0x00, 0x00, 0x00, 0x8e, 0xd8, 0x66, 0xb8, 0x41,
+ 0x00, 0x66, 0xba, 0xf8, 0x03, 0xee, 0xb3, 0x00, 0xb8, 0x00, 0x00, 0x10,
+ 0x00, 0xfe, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x40,
+ 0x06, 0x7c, 0xf2, 0xfe, 0xc3, 0x75, 0xe9, 0x66, 0xb8, 0x42, 0x00, 0x66,
+ 0xba, 0xf8, 0x03, 0xee, 0xeb, 0xde, 0x66, 0x90, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00,
+ 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00, 0x27, 0x00, 0x5c, 0x7c,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0xaa
+};
+
+/*
+ * Wait for some output in the serial output file,
+ * we get an 'A' followed by an endless string of 'B's
+ * but on the destination we won't have the A.
+ */
+static void wait_for_serial(const char *side)
+{
+ char *serialpath = g_strdup_printf("%s/%s", tmpfs, side);
+ FILE *serialfile = fopen(serialpath, "r");
+
+ do {
+ int readvalue = fgetc(serialfile);
+
+ switch (readvalue) {
+ case 'A':
+ /* Fine */
+ break;
+
+ case 'B':
+ /* It's alive! */
+ fclose(serialfile);
+ g_free(serialpath);
+ return;
+
+ case EOF:
+ fseek(serialfile, 0, SEEK_SET);
+ usleep(1000);
+ break;
+
+ default:
+ fprintf(stderr, "Unexpected %d on %s serial\n", readvalue, side);
+ g_assert_not_reached();
+ }
+ } while (true);
+}
+
+/*
+ * Events can get in the way of responses we are actually waiting for.
+ */
+static QDict *return_or_event(QDict *response)
+{
+ const char *event_string;
+ if (!qdict_haskey(response, "event")) {
+ return response;
+ }
+
+ /* OK, it was an event */
+ event_string = qdict_get_str(response, "event");
+ if (!strcmp(event_string, "STOP")) {
+ got_stop = true;
+ }
+ QDECREF(response);
+ return return_or_event(qtest_qmp_receive(global_qtest));
+}
+
+
+/*
+ * It's tricky to use qemu's migration event capability with qtest,
+ * events suddenly appearing confuse the qmp()/hmp() responses.
+ * so wait for a couple of passes to have happened before
+ * going postcopy.
+ */
+
+static uint64_t get_migration_pass(void)
+{
+ QDict *rsp, *rsp_return, *rsp_ram;
+ uint64_t result;
+
+ rsp = return_or_event(qmp("{ 'execute': 'query-migrate' }"));
+ rsp_return = qdict_get_qdict(rsp, "return");
+ if (!qdict_haskey(rsp_return, "ram")) {
+ /* Still in setup */
+ result = 0;
+ } else {
+ rsp_ram = qdict_get_qdict(rsp_return, "ram");
+ result = qdict_get_try_int(rsp_ram, "dirty-sync-count", 0);
+ QDECREF(rsp);
+ }
+ return result;
+}
+
+static void wait_for_migration_complete(void)
+{
+ QDict *rsp, *rsp_return;
+ bool completed;
+
+ do {
+ const char *status;
+
+ rsp = return_or_event(qmp("{ 'execute': 'query-migrate' }"));
+ rsp_return = qdict_get_qdict(rsp, "return");
+ status = qdict_get_str(rsp_return, "status");
+ completed = strcmp(status, "completed") == 0;
+ g_assert_cmpstr(status, !=, "failed");
+ QDECREF(rsp);
+ usleep(1000 * 100);
+ } while (!completed);
+}
+
+static void wait_for_migration_pass(void)
+{
+ uint64_t initial_pass = get_migration_pass();
+ uint64_t pass;
+
+ /* Wait for the 1st sync */
+ do {
+ initial_pass = get_migration_pass();
+ if (got_stop || initial_pass) {
+ break;
+ }
+ usleep(1000 * 100);
+ } while (true);
+
+ do {
+ usleep(1000 * 100);
+ pass = get_migration_pass();
+ } while (pass == initial_pass && !got_stop);
+}
+
+static void check_guests_ram(void)
+{
+ /* Our ASM test will have been incrementing one byte from each page from
+ * 1MB to <100MB in order.
+ * This gives us a constraint that any page's byte should be equal or less
+ * than the previous pages byte (mod 256); and they should all be equal
+ * except for one transition at the point where we meet the incrementer.
+ * (We're running this with the guest stopped).
+ */
+ unsigned address;
+ uint8_t first_byte;
+ uint8_t last_byte;
+ bool hit_edge = false;
+ bool bad = false;
+
+ qtest_memread(global_qtest, start_address, &first_byte, 1);
+ last_byte = first_byte;
+
+ for (address = start_address + 4096; address < end_address; address += 4096)
+ {
+ uint8_t b;
+ qtest_memread(global_qtest, address, &b, 1);
+ if (b != last_byte) {
+ if (((b + 1) % 256) == last_byte && !hit_edge) {
+ /* This is OK, the guest stopped at the point of
+ * incrementing the previous page but didn't get
+ * to us yet.
+ */
+ hit_edge = true;
+ } else {
+ fprintf(stderr, "Memory content inconsistency at %x"
+ " first_byte = %x last_byte = %x current = %x"
+ " hit_edge = %x\n",
+ address, first_byte, last_byte, b, hit_edge);
+ bad = true;
+ }
+ }
+ last_byte = b;
+ }
+ g_assert_false(bad);
+}
+
+static void cleanup(const char *filename)
+{
+ char *path = g_strdup_printf("%s/%s", tmpfs, filename);
+
+ unlink(path);
+}
+
+static void test_migrate(void)
+{
+ char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+ QTestState *global = global_qtest, *from, *to;
+ unsigned char dest_byte_a, dest_byte_b, dest_byte_c, dest_byte_d;
+ gchar *cmd;
+ QDict *rsp;
+
+ char *bootpath = g_strdup_printf("%s/bootsect", tmpfs);
+ FILE *bootfile = fopen(bootpath, "wb");
+
+ got_stop = false;
+ g_assert_cmpint(fwrite(bootsect, 512, 1, bootfile), ==, 1);
+ fclose(bootfile);
+
+ cmd = g_strdup_printf("-machine accel=kvm:tcg -m 150M"
+ " -name pcsource,debug-threads=on"
+ " -serial file:%s/src_serial"
+ " -drive file=%s,format=raw",
+ tmpfs, bootpath);
+ from = qtest_start(cmd);
+ g_free(cmd);
+
+ cmd = g_strdup_printf("-machine accel=kvm:tcg -m 150M"
+ " -name pcdest,debug-threads=on"
+ " -serial file:%s/dest_serial"
+ " -drive file=%s,format=raw"
+ " -incoming %s",
+ tmpfs, bootpath, uri);
+ to = qtest_init(cmd);
+ g_free(cmd);
+
+ global_qtest = from;
+ rsp = qmp("{ 'execute': 'migrate-set-capabilities',"
+ "'arguments': { "
+ "'capabilities': [ {"
+ "'capability': 'postcopy-ram',"
+ "'state': true } ] } }");
+ g_assert(qdict_haskey(rsp, "return"));
+ QDECREF(rsp);
+
+ global_qtest = to;
+ rsp = qmp("{ 'execute': 'migrate-set-capabilities',"
+ "'arguments': { "
+ "'capabilities': [ {"
+ "'capability': 'postcopy-ram',"
+ "'state': true } ] } }");
+ g_assert(qdict_haskey(rsp, "return"));
+ QDECREF(rsp);
+
+ /* We want to pick a speed slow enough that the test completes
+ * quickly, but that it doesn't complete precopy even on a slow
+ * machine, so also set the downtime.
+ */
+ global_qtest = from;
+ rsp = qmp("{ 'execute': 'migrate_set_speed',"
+ "'arguments': { 'value': 100000000 } }");
+ g_assert(qdict_haskey(rsp, "return"));
+ QDECREF(rsp);
+
+ /* 1ms downtime - it should never finish precopy */
+ rsp = qmp("{ 'execute': 'migrate_set_downtime',"
+ "'arguments': { 'value': 0.001 } }");
+ g_assert(qdict_haskey(rsp, "return"));
+ QDECREF(rsp);
+
+
+ /* Wait for the first serial output from the source */
+ wait_for_serial("src_serial");
+
+ cmd = g_strdup_printf("{ 'execute': 'migrate',"
+ "'arguments': { 'uri': '%s' } }",
+ uri);
+ rsp = qmp(cmd);
+ g_free(cmd);
+ g_assert(qdict_haskey(rsp, "return"));
+ QDECREF(rsp);
+
+ wait_for_migration_pass();
+
+ rsp = return_or_event(qmp("{ 'execute': 'migrate-start-postcopy' }"));
+ g_assert(qdict_haskey(rsp, "return"));
+ QDECREF(rsp);
+
+ if (!got_stop) {
+ qmp_eventwait("STOP");
+ }
+
+ global_qtest = to;
+ qmp_eventwait("RESUME");
+
+ wait_for_serial("dest_serial");
+ global_qtest = from;
+ wait_for_migration_complete();
+
+ qtest_quit(from);
+
+ global_qtest = to;
+
+ qtest_memread(to, start_address, &dest_byte_a, 1);
+
+ /* Destination still running, wait for a byte to change */
+ do {
+ qtest_memread(to, start_address, &dest_byte_b, 1);
+ usleep(10 * 1000);
+ } while (dest_byte_a == dest_byte_b);
+
+ qmp("{ 'execute' : 'stop'}");
+ /* With it stopped, check nothing changes */
+ qtest_memread(to, start_address, &dest_byte_c, 1);
+ sleep(1);
+ qtest_memread(to, start_address, &dest_byte_d, 1);
+ g_assert_cmpint(dest_byte_c, ==, dest_byte_d);
+
+ check_guests_ram();
+
+ qtest_quit(to);
+ g_free(uri);
+
+ global_qtest = global;
+
+ cleanup("bootsect");
+ cleanup("migsocket");
+ cleanup("src_serial");
+ cleanup("dest_serial");
+}
+
+int main(int argc, char **argv)
+{
+ char template[] = "/tmp/postcopy-test-XXXXXX";
+ int ret;
+
+ g_test_init(&argc, &argv, NULL);
+
+ if (!ufd_version_check()) {
+ return 0;
+ }
+
+ tmpfs = mkdtemp(template);
+ if (!tmpfs) {
+ g_test_message("mkdtemp on path (%s): %s\n", template, strerror(errno));
+ }
+ g_assert(tmpfs);
+
+ module_call_init(MODULE_INIT_QOM);
+
+ qtest_add_func("/postcopy", test_migrate);
+
+ ret = g_test_run();
+
+ g_assert_cmpint(ret, ==, 0);
+
+ ret = rmdir(tmpfs);
+ if (ret != 0) {
+ g_test_message("unable to rmdir: path (%s): %s\n",
+ tmpfs, strerror(errno));
+ }
+
+ return ret;
+}
diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index 055c553cdb..a95c4b0be8 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out
@@ -42,22 +42,14 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on
Testing: -S
QMP_VERSION
{"return": {}}
-IMGFMT built-in AES encryption is deprecated
-Support for it will be removed in a future release.
-You can use 'qemu-img convert' to switch to an
-unencrypted IMGFMT image, or a LUKS raw image.
-{"error": {"class": "GenericError", "desc": "blockdev-add doesn't support encrypted devices"}}
+{"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"}
Testing:
QMP_VERSION
{"return": {}}
-IMGFMT built-in AES encryption is deprecated
-Support for it will be removed in a future release.
-You can use 'qemu-img convert' to switch to an
-unencrypted IMGFMT image, or a LUKS raw image.
-{"error": {"class": "GenericError", "desc": "Guest must be stopped for opening of encrypted image"}}
+{"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"}
diff --git a/tests/qemu-iotests/095 b/tests/qemu-iotests/095
index dad04b9ac9..030adb22e1 100755
--- a/tests/qemu-iotests/095
+++ b/tests/qemu-iotests/095
@@ -74,6 +74,8 @@ _send_qemu_cmd $h "{ 'execute': 'block-commit',
'arguments': { 'device': 'test',
'top': '"${TEST_IMG}.snp1"' } }" "BLOCK_JOB_COMPLETED"
+_cleanup_qemu
+
echo
echo "=== Base image info after commit and resize ==="
TEST_IMG="${TEST_IMG}.base" _img_info | _filter_img_info
diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155
new file mode 100755
index 0000000000..4057b5e2aa
--- /dev/null
+++ b/tests/qemu-iotests/155
@@ -0,0 +1,261 @@
+#!/usr/bin/env python
+#
+# Test whether the backing BDSs are correct after completion of a
+# mirror block job; in "existing" modes (drive-mirror with
+# mode=existing and blockdev-mirror) the backing chain should not be
+# overridden.
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+from iotests import qemu_img
+
+back0_img = os.path.join(iotests.test_dir, 'back0.' + iotests.imgfmt)
+back1_img = os.path.join(iotests.test_dir, 'back1.' + iotests.imgfmt)
+back2_img = os.path.join(iotests.test_dir, 'back2.' + iotests.imgfmt)
+source_img = os.path.join(iotests.test_dir, 'source.' + iotests.imgfmt)
+target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt)
+
+
+# Class variables for controlling its behavior:
+#
+# existing: If True, explicitly create the target image and blockdev-add it
+# target_backing: If existing is True: Use this filename as the backing file
+# of the target image
+# (None: no backing file)
+# target_blockdev_backing: If existing is True: Pass this dict as "backing"
+# for the blockdev-add command
+# (None: do not pass "backing")
+# target_real_backing: If existing is True: The real filename of the backing
+# image during runtime, only makes sense if
+# target_blockdev_backing is not None
+# (None: same as target_backing)
+
+class BaseClass(iotests.QMPTestCase):
+ target_blockdev_backing = None
+ target_real_backing = None
+
+ def setUp(self):
+ qemu_img('create', '-f', iotests.imgfmt, back0_img, '1M')
+ qemu_img('create', '-f', iotests.imgfmt, '-b', back0_img, back1_img)
+ qemu_img('create', '-f', iotests.imgfmt, '-b', back1_img, back2_img)
+ qemu_img('create', '-f', iotests.imgfmt, '-b', back2_img, source_img)
+
+ self.vm = iotests.VM()
+ self.vm.add_drive(None, '', 'none')
+ self.vm.launch()
+
+ # Add the BDS via blockdev-add so it stays around after the mirror block
+ # job has been completed
+ result = self.vm.qmp('blockdev-add',
+ options={'node-name': 'source',
+ 'driver': iotests.imgfmt,
+ 'file': {'driver': 'file',
+ 'filename': source_img}})
+ self.assert_qmp(result, 'return', {})
+
+ result = self.vm.qmp('x-blockdev-insert-medium',
+ device='drive0', node_name='source')
+ self.assert_qmp(result, 'return', {})
+
+ self.assertIntactSourceBackingChain()
+
+ if self.existing:
+ if self.target_backing:
+ qemu_img('create', '-f', iotests.imgfmt,
+ '-b', self.target_backing, target_img, '1M')
+ else:
+ qemu_img('create', '-f', iotests.imgfmt, target_img, '1M')
+
+ if self.cmd == 'blockdev-mirror':
+ options = { 'node-name': 'target',
+ 'driver': iotests.imgfmt,
+ 'file': { 'driver': 'file',
+ 'filename': target_img } }
+ if self.target_blockdev_backing:
+ options['backing'] = self.target_blockdev_backing
+
+ result = self.vm.qmp('blockdev-add', options=options)
+ self.assert_qmp(result, 'return', {})
+
+ def tearDown(self):
+ self.vm.shutdown()
+ os.remove(source_img)
+ os.remove(back2_img)
+ os.remove(back1_img)
+ os.remove(back0_img)
+ try:
+ os.remove(target_img)
+ except OSError:
+ pass
+
+ def findBlockNode(self, node_name, id=None):
+ if id:
+ result = self.vm.qmp('query-block')
+ for device in result['return']:
+ if device['device'] == id:
+ if node_name:
+ self.assert_qmp(device, 'inserted/node-name', node_name)
+ return device['inserted']
+ else:
+ result = self.vm.qmp('query-named-block-nodes')
+ for node in result['return']:
+ if node['node-name'] == node_name:
+ return node
+
+ self.fail('Cannot find node %s/%s' % (id, node_name))
+
+ def assertIntactSourceBackingChain(self):
+ node = self.findBlockNode('source')
+
+ self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename',
+ source_img)
+ self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename',
+ back2_img)
+ self.assert_qmp(node, 'image' + '/backing-image' * 2 + '/filename',
+ back1_img)
+ self.assert_qmp(node, 'image' + '/backing-image' * 3 + '/filename',
+ back0_img)
+ self.assert_qmp_absent(node, 'image' + '/backing-image' * 4)
+
+ def assertCorrectBackingImage(self, node, default_image):
+ if self.existing:
+ if self.target_real_backing:
+ image = self.target_real_backing
+ else:
+ image = self.target_backing
+ else:
+ image = default_image
+
+ if image:
+ self.assert_qmp(node, 'image/backing-image/filename', image)
+ else:
+ self.assert_qmp_absent(node, 'image/backing-image')
+
+
+# Class variables for controlling its behavior:
+#
+# cmd: Mirroring command to execute, either drive-mirror or blockdev-mirror
+
+class MirrorBaseClass(BaseClass):
+ def runMirror(self, sync):
+ if self.cmd == 'blockdev-mirror':
+ result = self.vm.qmp(self.cmd, device='drive0', sync=sync,
+ target='target')
+ else:
+ if self.existing:
+ mode = 'existing'
+ else:
+ mode = 'absolute-paths'
+ result = self.vm.qmp(self.cmd, device='drive0', sync=sync,
+ target=target_img, format=iotests.imgfmt,
+ mode=mode, node_name='target')
+
+ self.assert_qmp(result, 'return', {})
+
+ self.vm.event_wait('BLOCK_JOB_READY')
+
+ result = self.vm.qmp('block-job-complete', device='drive0')
+ self.assert_qmp(result, 'return', {})
+
+ self.vm.event_wait('BLOCK_JOB_COMPLETED')
+
+ def testFull(self):
+ self.runMirror('full')
+
+ node = self.findBlockNode('target', 'drive0')
+ self.assertCorrectBackingImage(node, None)
+ self.assertIntactSourceBackingChain()
+
+ def testTop(self):
+ self.runMirror('top')
+
+ node = self.findBlockNode('target', 'drive0')
+ self.assertCorrectBackingImage(node, back2_img)
+ self.assertIntactSourceBackingChain()
+
+ def testNone(self):
+ self.runMirror('none')
+
+ node = self.findBlockNode('target', 'drive0')
+ self.assertCorrectBackingImage(node, source_img)
+ self.assertIntactSourceBackingChain()
+
+
+class TestDriveMirrorAbsolutePaths(MirrorBaseClass):
+ cmd = 'drive-mirror'
+ existing = False
+
+class TestDriveMirrorExistingNoBacking(MirrorBaseClass):
+ cmd = 'drive-mirror'
+ existing = True
+ target_backing = None
+
+class TestDriveMirrorExistingBacking(MirrorBaseClass):
+ cmd = 'drive-mirror'
+ existing = True
+ target_backing = 'null-co://'
+
+class TestBlockdevMirrorNoBacking(MirrorBaseClass):
+ cmd = 'blockdev-mirror'
+ existing = True
+ target_backing = None
+
+class TestBlockdevMirrorBacking(MirrorBaseClass):
+ cmd = 'blockdev-mirror'
+ existing = True
+ target_backing = 'null-co://'
+
+class TestBlockdevMirrorForcedBacking(MirrorBaseClass):
+ cmd = 'blockdev-mirror'
+ existing = True
+ target_backing = None
+ target_blockdev_backing = { 'driver': 'null-co' }
+ target_real_backing = 'null-co://'
+
+
+class TestCommit(BaseClass):
+ existing = False
+
+ def testCommit(self):
+ result = self.vm.qmp('block-commit', device='drive0', base=back1_img)
+ self.assert_qmp(result, 'return', {})
+
+ self.vm.event_wait('BLOCK_JOB_READY')
+
+ result = self.vm.qmp('block-job-complete', device='drive0')
+ self.assert_qmp(result, 'return', {})
+
+ self.vm.event_wait('BLOCK_JOB_COMPLETED')
+
+ node = self.findBlockNode(None, 'drive0')
+ self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename',
+ back1_img)
+ self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename',
+ back0_img)
+ self.assert_qmp_absent(node, 'image' + '/backing-image' * 2 +
+ '/filename')
+
+ self.assertIntactSourceBackingChain()
+
+
+BaseClass = None
+MirrorBaseClass = None
+
+if __name__ == '__main__':
+ iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out
new file mode 100644
index 0000000000..4176bb9402
--- /dev/null
+++ b/tests/qemu-iotests/155.out
@@ -0,0 +1,5 @@
+...................
+----------------------------------------------------------------------
+Ran 19 tests
+
+OK
diff --git a/tests/qemu-iotests/156 b/tests/qemu-iotests/156
new file mode 100755
index 0000000000..cc95ff1f98
--- /dev/null
+++ b/tests/qemu-iotests/156
@@ -0,0 +1,174 @@
+#!/bin/bash
+#
+# Tests oVirt-like storage migration:
+# - Create snapshot
+# - Create target image with (not yet existing) target backing chain
+# (i.e. just write the name of a soon-to-be-copied-over backing file into it)
+# - drive-mirror the snapshot to the target with mode=existing and sync=top
+# - In the meantime, copy the original source files to the destination via
+# conventional means (i.e. outside of qemu)
+# - Complete the drive-mirror job
+# - Delete all source images
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1 # failure is the default!
+
+_cleanup()
+{
+ rm -f "$TEST_IMG{,.target}{,.backing,.overlay}"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2 qed
+_supported_proto generic
+_supported_os Linux
+
+# Create source disk
+TEST_IMG="$TEST_IMG.backing" _make_test_img 1M
+_make_test_img -b "$TEST_IMG.backing" 1M
+
+$QEMU_IO -c 'write -P 1 0 256k' "$TEST_IMG.backing" | _filter_qemu_io
+$QEMU_IO -c 'write -P 2 64k 192k' "$TEST_IMG" | _filter_qemu_io
+
+_launch_qemu -drive if=none,id=source,file="$TEST_IMG"
+
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'qmp_capabilities' }" \
+ 'return'
+
+# Create snapshot
+TEST_IMG="$TEST_IMG.overlay" _make_test_img -b "$TEST_IMG" 1M
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'blockdev-snapshot-sync',
+ 'arguments': { 'device': 'source',
+ 'snapshot-file': '$TEST_IMG.overlay',
+ 'format': '$IMGFMT',
+ 'mode': 'existing' } }" \
+ 'return'
+
+# Write something to the snapshot
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'human-monitor-command',
+ 'arguments': { 'command-line':
+ 'qemu-io source \"write -P 3 128k 128k\"' } }" \
+ 'return'
+
+# Create target image
+TEST_IMG="$TEST_IMG.target.overlay" _make_test_img -b "$TEST_IMG.target" 1M
+
+# Mirror snapshot
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'drive-mirror',
+ 'arguments': { 'device': 'source',
+ 'target': '$TEST_IMG.target.overlay',
+ 'mode': 'existing',
+ 'sync': 'top' } }" \
+ 'return'
+
+# Wait for convergence
+_send_qemu_cmd $QEMU_HANDLE \
+ '' \
+ 'BLOCK_JOB_READY'
+
+# Write some more
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'human-monitor-command',
+ 'arguments': { 'command-line':
+ 'qemu-io source \"write -P 4 192k 64k\"' } }" \
+ 'return'
+
+# Copy source backing chain to the target before completing the job
+cp "$TEST_IMG.backing" "$TEST_IMG.target.backing"
+cp "$TEST_IMG" "$TEST_IMG.target"
+$QEMU_IMG rebase -u -b "$TEST_IMG.target.backing" "$TEST_IMG.target"
+
+# Complete block job
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'block-job-complete',
+ 'arguments': { 'device': 'source' } }" \
+ ''
+
+_send_qemu_cmd $QEMU_HANDLE \
+ '' \
+ 'BLOCK_JOB_COMPLETED'
+
+# Remove the source images
+rm -f "$TEST_IMG{,.backing,.overlay}"
+
+echo
+
+# Check online disk contents
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'human-monitor-command',
+ 'arguments': { 'command-line':
+ 'qemu-io source \"read -P 1 0k 64k\"' } }" \
+ 'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'human-monitor-command',
+ 'arguments': { 'command-line':
+ 'qemu-io source \"read -P 2 64k 64k\"' } }" \
+ 'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'human-monitor-command',
+ 'arguments': { 'command-line':
+ 'qemu-io source \"read -P 3 128k 64k\"' } }" \
+ 'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'human-monitor-command',
+ 'arguments': { 'command-line':
+ 'qemu-io source \"read -P 4 192k 64k\"' } }" \
+ 'return'
+
+echo
+
+_send_qemu_cmd $QEMU_HANDLE \
+ "{ 'execute': 'quit' }" \
+ 'return'
+
+wait=1 _cleanup_qemu
+
+echo
+
+# Check offline disk contents
+$QEMU_IO -c 'read -P 1 0k 64k' \
+ -c 'read -P 2 64k 64k' \
+ -c 'read -P 3 128k 64k' \
+ -c 'read -P 4 192k 64k' \
+ "$TEST_IMG.target.overlay" | _filter_qemu_io
+
+echo
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/156.out b/tests/qemu-iotests/156.out
new file mode 100644
index 0000000000..3af82ae540
--- /dev/null
+++ b/tests/qemu-iotests/156.out
@@ -0,0 +1,48 @@
+QA output created by 156
+Formatting 'TEST_DIR/t.IMGFMT.backing', fmt=IMGFMT size=1048576
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.backing
+wrote 262144/262144 bytes at offset 0
+256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 196608/196608 bytes at offset 65536
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": {}}
+Formatting 'TEST_DIR/t.IMGFMT.overlay', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT
+{"return": {}}
+wrote 131072/131072 bytes at offset 131072
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+Formatting 'TEST_DIR/t.IMGFMT.target.overlay', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.target
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "source", "len": 131072, "offset": 131072, "speed": 0, "type": "mirror"}}
+wrote 65536/65536 bytes at offset 196608
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "source", "len": 196608, "offset": 196608, "speed": 0, "type": "mirror"}}
+
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+read 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+read 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+read 65536/65536 bytes at offset 196608
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"}
+
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 196608
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+*** done
diff --git a/tests/qemu-iotests/README b/tests/qemu-iotests/README
index 4ccfdd1cc0..6079b401ae 100644
--- a/tests/qemu-iotests/README
+++ b/tests/qemu-iotests/README
@@ -17,4 +17,5 @@ additional options to test further image formats or I/O methods.
* Feedback and patches
Please send improvements to the test suite, general feedback or just
-reports of failing tests cases to qemu-devel@savannah.nongnu.org.
+reports of failing tests cases to qemu-devel@nongnu.org with a CC:
+to qemu-block@nongnu.org.
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index ab1d76efdf..1c6fcb6018 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -154,3 +154,5 @@
150 rw auto quick
152 rw auto quick
154 rw auto backing quick
+155 rw auto
+156 rw auto quick
diff --git a/tests/qht-bench.c b/tests/qht-bench.c
index ad8efbca95..76360a0cf5 100644
--- a/tests/qht-bench.c
+++ b/tests/qht-bench.c
@@ -5,7 +5,6 @@
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
-#include <glib.h>
#include "qemu/processor.h"
#include "qemu/atomic.h"
#include "qemu/qht.h"
diff --git a/tests/test-qdist.c b/tests/test-qdist.c
index a67f26057e..0298986ac9 100644
--- a/tests/test-qdist.c
+++ b/tests/test-qdist.c
@@ -5,7 +5,6 @@
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
-#include <glib.h>
#include "qemu/qdist.h"
#include <math.h>
diff --git a/tests/test-qht-par.c b/tests/test-qht-par.c
index f09e004ec6..d8a83caf5c 100644
--- a/tests/test-qht-par.c
+++ b/tests/test-qht-par.c
@@ -5,7 +5,6 @@
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
-#include <glib.h>
#define TEST_QHT_STRING "tests/qht-bench 1>/dev/null 2>&1 -R -S0.1 -D10000 -N1 "
diff --git a/tests/test-qht.c b/tests/test-qht.c
index c8eb9305ed..f1d628371d 100644
--- a/tests/test-qht.c
+++ b/tests/test-qht.c
@@ -5,7 +5,6 @@
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
-#include <glib.h>
#include "qemu/qht.h"
#define N 5000
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 36b3cd8b8c..45fa2b6148 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -33,7 +33,6 @@
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/unistd.h>
-#include <sys/mman.h>
#include <sys/eventfd.h>
#include <arpa/inet.h>
#include <netdb.h>
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 6e3a4c0d57..8b2164b99d 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -17,7 +17,6 @@
#include "sysemu/sysemu.h"
#include <linux/vhost.h>
-#include <sys/mman.h>
#include <sys/vfs.h>
#include <qemu/sockets.h>
diff --git a/trace-events b/trace-events
index 2f14205de0..104b64fae1 100644
--- a/trace-events
+++ b/trace-events
@@ -73,7 +73,7 @@ bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs
bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x"
-bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
+bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"
# block/stream.c
stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
@@ -606,16 +606,16 @@ qemu_system_shutdown_request(void) ""
qemu_system_powerdown_request(void) ""
# block/qcow2.c
-qcow2_writev_start_req(void *co, int64_t sector, int nb_sectors) "co %p sector %" PRIx64 " nb_sectors %d"
+qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"
qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
qcow2_writev_start_part(void *co) "co %p"
-qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d"
+qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
# block/qcow2-cluster.c
-qcow2_alloc_clusters_offset(void *co, uint64_t offset, int num) "co %p offset %" PRIx64 " num %d"
+qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"
qcow2_handle_copied(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64
qcow2_handle_alloc(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64
qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d"
diff --git a/translate-all.c b/translate-all.c
index e8b88b4485..3f402dfe04 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -18,8 +18,6 @@
*/
#ifdef _WIN32
#include <windows.h>
-#else
-#include <sys/mman.h>
#endif
#include "qemu/osdep.h"
diff --git a/util/cutils.c b/util/cutils.c
index 43d1afbbec..5830a688dc 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -256,13 +256,7 @@ static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len)
return i * sizeof(VECTYPE);
}
-/*
- * GCC before version 4.9 has a bug which will cause the target
- * attribute work incorrectly and failed to compile in some case,
- * restrict the gcc version to 4.9+ to prevent the failure.
- */
-
-#if defined CONFIG_AVX2_OPT && QEMU_GNUC_PREREQ(4, 9)
+#if defined CONFIG_AVX2_OPT
#pragma GCC push_options
#pragma GCC target("avx2")
#include <cpuid.h>
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 7121b11c01..99fd2ba37b 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -269,6 +269,7 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
start >>= hb->granularity;
last >>= hb->granularity;
count = last - start + 1;
+ assert(last < hb->size);
hb->count += count - hb_count_between(hb, start, last);
hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
@@ -348,6 +349,7 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
start >>= hb->granularity;
last >>= hb->granularity;
+ assert(last < hb->size);
hb->count -= hb_count_between(hb, start, last);
hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
@@ -371,6 +373,7 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item)
/* Compute position and bit in the last layer. */
uint64_t pos = item >> hb->granularity;
unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
+ assert(pos < hb->size);
return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
}
diff --git a/util/memfd.c b/util/memfd.c
index b374238a59..4571d1aba8 100644
--- a/util/memfd.c
+++ b/util/memfd.c
@@ -29,8 +29,6 @@
#include <glib/gprintf.h>
-#include <sys/mman.h>
-
#include "qemu/memfd.h"
#ifdef CONFIG_MEMFD
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 0b4cc7f7f1..629d97a362 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -11,7 +11,6 @@
*/
#include "qemu/osdep.h"
#include <qemu/mmap-alloc.h>
-#include <sys/mman.h>
#define HUGETLBFS_MAGIC 0x958458f6
diff --git a/util/osdep.c b/util/osdep.c
index 9a7a439e13..ff004e8074 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -25,10 +25,6 @@
/* Needed early for CONFIG_BSD etc. */
-#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
-#include <sys/mman.h>
-#endif
-
#ifdef CONFIG_SOLARIS
#include <sys/statvfs.h>
/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 4adde93ac1..e2e1d4d39f 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -36,7 +36,6 @@
#include "trace.h"
#include "qapi/error.h"
#include "qemu/sockets.h"
-#include <sys/mman.h>
#include <libgen.h>
#include <sys/signal.h>
#include "qemu/cutils.h"
diff --git a/util/qdist.c b/util/qdist.c
index 4ea2e34fc2..56f573837d 100644
--- a/util/qdist.c
+++ b/util/qdist.c
@@ -6,6 +6,7 @@
* License: GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "qemu/qdist.h"
#include <math.h>
diff --git a/util/qht.c b/util/qht.c
index 6f749098f4..40d6e218f7 100644
--- a/util/qht.c
+++ b/util/qht.c
@@ -65,6 +65,7 @@
* + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014.
* https://lwn.net/Articles/612021/
*/
+#include "qemu/osdep.h"
#include "qemu/qht.h"
#include "qemu/atomic.h"
#include "qemu/rcu.h"
diff --git a/vl.c b/vl.c
index 45eff5661b..0736d8430d 100644
--- a/vl.c
+++ b/vl.c
@@ -154,7 +154,7 @@ CharDriverState *sclp_hds[MAX_SCLP_CONSOLES];
int win2k_install_hack = 0;
int singlestep = 0;
int smp_cpus = 1;
-int max_cpus = 0;
+int max_cpus = 1;
int smp_cores = 1;
int smp_threads = 1;
int acpi_enabled = 1;
@@ -1218,7 +1218,6 @@ static QemuOptsList qemu_smp_opts = {
static void smp_parse(QemuOpts *opts)
{
if (opts) {
-
unsigned cpus = qemu_opt_get_number(opts, "cpus", 0);
unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
unsigned cores = qemu_opt_get_number(opts, "cores", 0);
@@ -1246,6 +1245,17 @@ static void smp_parse(QemuOpts *opts)
}
max_cpus = qemu_opt_get_number(opts, "maxcpus", cpus);
+
+ if (max_cpus > MAX_CPUMASK_BITS) {
+ error_report("unsupported number of maxcpus");
+ exit(1);
+ }
+
+ if (max_cpus < cpus) {
+ error_report("maxcpus must be equal to or greater than smp");
+ exit(1);
+ }
+
if (sockets * cores * threads > max_cpus) {
error_report("cpu topology: "
"sockets (%u) * cores (%u) * threads (%u) > "
@@ -1255,25 +1265,11 @@ static void smp_parse(QemuOpts *opts)
}
smp_cpus = cpus;
- smp_cores = cores > 0 ? cores : 1;
- smp_threads = threads > 0 ? threads : 1;
-
- }
-
- if (max_cpus == 0) {
- max_cpus = smp_cpus;
- }
-
- if (max_cpus > MAX_CPUMASK_BITS) {
- error_report("unsupported number of maxcpus");
- exit(1);
- }
- if (max_cpus < smp_cpus) {
- error_report("maxcpus must be equal to or greater than smp");
- exit(1);
+ smp_cores = cores;
+ smp_threads = threads;
}
- if (smp_cpus > 1 || smp_cores > 1 || smp_threads > 1) {
+ if (smp_cpus > 1) {
Error *blocker = NULL;
error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp");
replay_add_blocker(blocker);
@@ -2968,6 +2964,7 @@ int main(int argc, char **argv, char **envp)
FILE *vmstate_dump_file = NULL;
Error *main_loop_err = NULL;
Error *err = NULL;
+ bool list_data_dirs = false;
qemu_init_cpu_loop();
qemu_mutex_lock_iothread();
@@ -3354,7 +3351,9 @@ int main(int argc, char **argv, char **envp)
add_device_config(DEV_GDB, optarg);
break;
case QEMU_OPTION_L:
- if (data_dir_idx < ARRAY_SIZE(data_dir)) {
+ if (is_help_option(optarg)) {
+ list_data_dirs = true;
+ } else if (data_dir_idx < ARRAY_SIZE(data_dir)) {
data_dir[data_dir_idx++] = optarg;
}
break;
@@ -4086,6 +4085,14 @@ int main(int argc, char **argv, char **envp)
data_dir[data_dir_idx++] = CONFIG_QEMU_DATADIR;
}
+ /* -L help lists the data directories and exits. */
+ if (list_data_dirs) {
+ for (i = 0; i < data_dir_idx; i++) {
+ printf("%s\n", data_dir[i]);
+ }
+ exit(0);
+ }
+
smp_parse(qemu_opts_find(qemu_find_opts("smp-opts"), NULL));
machine_class->max_cpus = machine_class->max_cpus ?: 1; /* Default to UP */
diff --git a/xen-hvm.c b/xen-hvm.c
index a0da8d7d91..98ea44fdf3 100644
--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -9,7 +9,6 @@
*/
#include "qemu/osdep.h"
-#include <sys/mman.h>
#include "cpu.h"
#include "hw/pci/pci.h"
diff --git a/xen-mapcache.c b/xen-mapcache.c
index 49f394a777..8f3a592013 100644
--- a/xen-mapcache.c
+++ b/xen-mapcache.c
@@ -17,7 +17,6 @@
#include "qemu/bitmap.h"
#include <xen/hvm/params.h>
-#include <sys/mman.h>
#include "sysemu/xen-mapcache.h"
#include "trace.h"