diff options
124 files changed, 2218 insertions, 970 deletions
@@ -498,12 +498,12 @@ test speed: all .PHONY: ctags ctags: - rm -f $@ + rm -f tags find "$(SRC_PATH)" -name '*.[hc]' -exec ctags --append {} + .PHONY: TAGS TAGS: - rm -f $@ + rm -f TAGS find "$(SRC_PATH)" -name '*.[hc]' -exec etags --append {} + cscope: diff --git a/audio/ossaudio.c b/audio/ossaudio.c index a0d9cda1ec..0edd7ea5fe 100644 --- a/audio/ossaudio.c +++ b/audio/ossaudio.c @@ -22,7 +22,6 @@ * THE SOFTWARE. */ #include "qemu/osdep.h" -#include <sys/mman.h> #include <sys/ioctl.h> #include <sys/soundcard.h> #include "qemu-common.h" @@ -684,6 +684,10 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, /* For temporary files, unconditional cache=unsafe is fine */ qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); + + /* aio=native doesn't work for cache.direct=off, so disable it for the + * temporary snapshot */ + *child_flags &= ~BDRV_O_NATIVE_AIO; } /* @@ -937,8 +941,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, goto fail_opts; } - bs->request_alignment = 512; - bs->zero_beyond_eof = true; + bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512; bs->read_only = !(bs->open_flags & BDRV_O_RDWR); if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { @@ -2192,7 +2195,6 @@ static void bdrv_close(BlockDriverState *bs) bs->encrypted = 0; bs->valid_key = 0; bs->sg = 0; - bs->zero_beyond_eof = false; QDECREF(bs->options); QDECREF(bs->explicit_options); bs->options = NULL; @@ -2224,9 +2226,23 @@ void bdrv_close_all(void) static void change_parent_backing_link(BlockDriverState *from, BlockDriverState *to) { - BdrvChild *c, *next; + BdrvChild *c, *next, *to_c; QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + if (c->role == &child_backing) { + /* @from is generally not allowed to be a backing file, except for + * when @to is the overlay. In that case, @from may not be replaced + * by @to as @to's backing node. */ + QLIST_FOREACH(to_c, &to->children, next) { + if (to_c == c) { + break; + } + } + if (to_c) { + continue; + } + } + assert(c->role != &child_backing); bdrv_ref(to); bdrv_replace_child(c, to); @@ -2275,14 +2291,6 @@ void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new) change_parent_backing_link(old, new); - /* Change backing files if a previously independent node is added to the - * chain. For active commit, we replace top by its own (indirect) backing - * file and don't do anything here so we don't build a loop. */ - if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) { - bdrv_set_backing_hd(new, backing_bs(old)); - bdrv_set_backing_hd(old, NULL); - } - bdrv_unref(old); } diff --git a/block/commit.c b/block/commit.c index 8a00e1146c..444333ba65 100644 --- a/block/commit.c +++ b/block/commit.c @@ -236,6 +236,11 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, return; } + s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp); + if (!s) { + return; + } + orig_base_flags = bdrv_get_flags(base); orig_overlay_flags = bdrv_get_flags(overlay_bs); @@ -252,16 +257,12 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, bdrv_reopen_multiple(reopen_queue, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); + block_job_unref(&s->common); return; } } - s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp); - if (!s) { - return; - } - s->base = blk_new(); blk_insert_bs(s->base, base); diff --git a/block/io.c b/block/io.c index fb99a7151c..ebdb9d834c 100644 --- a/block/io.c +++ b/block/io.c @@ -289,15 +289,21 @@ void bdrv_drain_all(void) bool busy = true; BlockDriverState *bs; BdrvNextIterator it; + BlockJob *job = NULL; GSList *aio_ctxs = NULL, *ctx; + while ((job = block_job_next(job))) { + AioContext *aio_context = blk_get_aio_context(job->blk); + + aio_context_acquire(aio_context); + block_job_pause(job); + aio_context_release(aio_context); + } + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - if (bs->job) { - block_job_pause(bs->job); - } bdrv_parent_drained_begin(bs); bdrv_io_unplugged_begin(bs); bdrv_drain_recurse(bs); @@ -340,12 +346,18 @@ void bdrv_drain_all(void) aio_context_acquire(aio_context); bdrv_io_unplugged_end(bs); bdrv_parent_drained_end(bs); - if (bs->job) { - block_job_resume(bs->job); - } aio_context_release(aio_context); } g_slist_free(aio_ctxs); + + job = NULL; + while ((job = block_job_next(job))) { + AioContext *aio_context = blk_get_aio_context(job->blk); + + aio_context_acquire(aio_context); + block_job_resume(job); + aio_context_release(aio_context); + } } /** @@ -404,12 +416,12 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) } /** - * Round a region to cluster boundaries + * Round a region to cluster boundaries (sector-based) */ -void bdrv_round_to_clusters(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - int64_t *cluster_sector_num, - int *cluster_nb_sectors) +void bdrv_round_sectors_to_clusters(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + int64_t *cluster_sector_num, + int *cluster_nb_sectors) { BlockDriverInfo bdi; @@ -424,6 +436,26 @@ void bdrv_round_to_clusters(BlockDriverState *bs, } } +/** + * Round a region to cluster boundaries + */ +void bdrv_round_to_clusters(BlockDriverState *bs, + int64_t offset, unsigned int bytes, + int64_t *cluster_offset, + unsigned int *cluster_bytes) +{ + BlockDriverInfo bdi; + + if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { + *cluster_offset = offset; + *cluster_bytes = bytes; + } else { + int64_t c = bdi.cluster_size; + *cluster_offset = QEMU_ALIGN_DOWN(offset, c); + *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c); + } +} + static int bdrv_get_cluster_size(BlockDriverState *bs) { BlockDriverInfo bdi; @@ -680,6 +712,18 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) } } +int bdrv_preadv(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) +{ + int ret; + + ret = bdrv_prwv_co(bs, offset, qiov, false, 0); + if (ret < 0) { + return ret; + } + + return qiov->size; +} + int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) { QEMUIOVector qiov; @@ -687,19 +731,13 @@ int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) .iov_base = (void *)buf, .iov_len = bytes, }; - int ret; if (bytes < 0) { return -EINVAL; } qemu_iovec_init_external(&qiov, &iov, 1); - ret = bdrv_prwv_co(bs, offset, &qiov, false, 0); - if (ret < 0) { - return ret; - } - - return bytes; + return bdrv_preadv(bs, offset, &qiov); } int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) @@ -776,6 +814,8 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, int64_t sector_num; unsigned int nb_sectors; + assert(!(flags & ~BDRV_REQ_MASK)); + if (drv->bdrv_co_preadv) { return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags); } @@ -815,6 +855,8 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, unsigned int nb_sectors; int ret; + assert(!(flags & ~BDRV_REQ_MASK)); + if (drv->bdrv_co_pwritev) { ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags & bs->supported_write_flags); @@ -861,7 +903,7 @@ emulate_flags: } static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) + int64_t offset, unsigned int bytes, QEMUIOVector *qiov) { /* Perform I/O through a temporary buffer so that users who scribble over * their read buffer while the operation is in progress do not end up @@ -873,21 +915,20 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, BlockDriver *drv = bs->drv; struct iovec iov; QEMUIOVector bounce_qiov; - int64_t cluster_sector_num; - int cluster_nb_sectors; + int64_t cluster_offset; + unsigned int cluster_bytes; size_t skip_bytes; int ret; /* Cover entire cluster so no additional backing file I/O is required when * allocating cluster in the image file. */ - bdrv_round_to_clusters(bs, sector_num, nb_sectors, - &cluster_sector_num, &cluster_nb_sectors); + bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes); - trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, - cluster_sector_num, cluster_nb_sectors); + trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, + cluster_offset, cluster_bytes); - iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE; + iov.iov_len = cluster_bytes; iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); if (bounce_buffer == NULL) { ret = -ENOMEM; @@ -896,8 +937,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, qemu_iovec_init_external(&bounce_qiov, &iov, 1); - ret = bdrv_driver_preadv(bs, cluster_sector_num * BDRV_SECTOR_SIZE, - cluster_nb_sectors * BDRV_SECTOR_SIZE, + ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes, &bounce_qiov, 0); if (ret < 0) { goto err; @@ -905,16 +945,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, if (drv->bdrv_co_pwrite_zeroes && buffer_is_zero(bounce_buffer, iov.iov_len)) { - ret = bdrv_co_do_pwrite_zeroes(bs, - cluster_sector_num * BDRV_SECTOR_SIZE, - cluster_nb_sectors * BDRV_SECTOR_SIZE, - 0); + ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0); } else { /* This does not change the data on the disk, it is not necessary * to flush even in cache=writethrough mode. */ - ret = bdrv_driver_pwritev(bs, cluster_sector_num * BDRV_SECTOR_SIZE, - cluster_nb_sectors * BDRV_SECTOR_SIZE, + ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes, &bounce_qiov, 0); } @@ -926,9 +962,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, goto err; } - skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; - qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, - nb_sectors * BDRV_SECTOR_SIZE); + skip_bytes = offset - cluster_offset; + qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes); err: qemu_vfree(bounce_buffer); @@ -944,15 +979,15 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + int64_t total_bytes, max_bytes; int ret; - int64_t sector_num = offset >> BDRV_SECTOR_BITS; - unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; - - assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); + assert(is_power_of_2(align)); + assert((offset & (align - 1)) == 0); + assert((bytes & (align - 1)) == 0); assert(!qiov || bytes == qiov->size); assert((bs->open_flags & BDRV_O_NO_IO) == 0); + assert(!(flags & ~BDRV_REQ_MASK)); /* Handle Copy on Read and associated serialisation */ if (flags & BDRV_REQ_COPY_ON_READ) { @@ -969,59 +1004,50 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } if (flags & BDRV_REQ_COPY_ON_READ) { + int64_t start_sector = offset >> BDRV_SECTOR_BITS; + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + unsigned int nb_sectors = end_sector - start_sector; int pnum; - ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum); + ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum); if (ret < 0) { goto out; } if (!ret || pnum != nb_sectors) { - ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov); + ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov); goto out; } } /* Forward the request to the BlockDriver */ - if (!bs->zero_beyond_eof) { - ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); - } else { - /* Read zeros after EOF */ - int64_t total_sectors, max_nb_sectors; - - total_sectors = bdrv_nb_sectors(bs); - if (total_sectors < 0) { - ret = total_sectors; - goto out; - } + total_bytes = bdrv_getlength(bs); + if (total_bytes < 0) { + ret = total_bytes; + goto out; + } - max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), - align >> BDRV_SECTOR_BITS); - if (nb_sectors < max_nb_sectors) { - ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); - } else if (max_nb_sectors > 0) { - QEMUIOVector local_qiov; + max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); + if (bytes < max_bytes) { + ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); + } else if (max_bytes > 0) { + QEMUIOVector local_qiov; - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, 0, - max_nb_sectors * BDRV_SECTOR_SIZE); + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, 0, max_bytes); - ret = bdrv_driver_preadv(bs, offset, - max_nb_sectors * BDRV_SECTOR_SIZE, - &local_qiov, 0); + ret = bdrv_driver_preadv(bs, offset, max_bytes, &local_qiov, 0); - qemu_iovec_destroy(&local_qiov); - } else { - ret = 0; - } + qemu_iovec_destroy(&local_qiov); + } else { + ret = 0; + } - /* Reading beyond end of file is supposed to produce zeroes */ - if (ret == 0 && total_sectors < sector_num + nb_sectors) { - uint64_t offset = MAX(0, total_sectors - sector_num); - uint64_t bytes = (sector_num + nb_sectors - offset) * - BDRV_SECTOR_SIZE; - qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes); - } + /* Reading beyond end of file is supposed to produce zeroes */ + if (ret == 0 && total_bytes < offset + bytes) { + uint64_t zero_offset = MAX(0, total_bytes - offset); + uint64_t zero_bytes = offset + bytes - zero_offset; + qemu_iovec_memset(qiov, zero_offset, 0, zero_bytes); } out: @@ -1038,8 +1064,7 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs, BlockDriver *drv = bs->drv; BdrvTrackedRequest req; - /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ - uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + uint64_t align = bs->request_alignment; uint8_t *head_buf = NULL; uint8_t *tail_buf = NULL; QEMUIOVector local_qiov; @@ -1235,13 +1260,12 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, bool waited; int ret; - int64_t sector_num = offset >> BDRV_SECTOR_BITS; - unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; + int64_t start_sector = offset >> BDRV_SECTOR_BITS; + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); - assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); assert(!qiov || bytes == qiov->size); assert((bs->open_flags & BDRV_O_NO_IO) == 0); + assert(!(flags & ~BDRV_REQ_MASK)); waited = wait_serialising_requests(req); assert(!waited || !req->serialising); @@ -1263,22 +1287,21 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, /* Do nothing, write notifier decided to fail this request */ } else if (flags & BDRV_REQ_ZERO_WRITE) { bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO); - ret = bdrv_co_do_pwrite_zeroes(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS, flags); + ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags); } else { bdrv_debug_event(bs, BLKDBG_PWRITEV); ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags); } bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); - bdrv_set_dirty(bs, sector_num, nb_sectors); + bdrv_set_dirty(bs, start_sector, end_sector - start_sector); if (bs->wr_highest_offset < offset + bytes) { bs->wr_highest_offset = offset + bytes; } if (ret >= 0) { - bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); + bs->total_sectors = MAX(bs->total_sectors, end_sector); } return ret; @@ -1293,7 +1316,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, uint8_t *buf = NULL; QEMUIOVector local_qiov; struct iovec iov; - uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + uint64_t align = bs->request_alignment; unsigned int head_padding_bytes, tail_padding_bytes; int ret = 0; @@ -1380,8 +1403,7 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs, BdrvRequestFlags flags) { BdrvTrackedRequest req; - /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ - uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + uint64_t align = bs->request_alignment; uint8_t *head_buf = NULL; uint8_t *tail_buf = NULL; QEMUIOVector local_qiov; @@ -1824,6 +1846,62 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); } +typedef struct BdrvVmstateCo { + BlockDriverState *bs; + QEMUIOVector *qiov; + int64_t pos; + bool is_read; + int ret; +} BdrvVmstateCo; + +static int coroutine_fn +bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, + bool is_read) +{ + BlockDriver *drv = bs->drv; + + if (!drv) { + return -ENOMEDIUM; + } else if (drv->bdrv_load_vmstate) { + return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos) + : drv->bdrv_save_vmstate(bs, qiov, pos); + } else if (bs->file) { + return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read); + } + + return -ENOTSUP; +} + +static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque) +{ + BdrvVmstateCo *co = opaque; + co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read); +} + +static inline int +bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, + bool is_read) +{ + if (qemu_in_coroutine()) { + return bdrv_co_rw_vmstate(bs, qiov, pos, is_read); + } else { + BdrvVmstateCo data = { + .bs = bs, + .qiov = qiov, + .pos = pos, + .is_read = is_read, + .ret = -EINPROGRESS, + }; + Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry); + + qemu_coroutine_enter(co, &data); + while (data.ret == -EINPROGRESS) { + aio_poll(bdrv_get_aio_context(bs), true); + } + return data.ret; + } +} + int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) { @@ -1832,37 +1910,45 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, .iov_base = (void *) buf, .iov_len = size, }; + int ret; qemu_iovec_init_external(&qiov, &iov, 1); - return bdrv_writev_vmstate(bs, &qiov, pos); + + ret = bdrv_writev_vmstate(bs, &qiov, pos); + if (ret < 0) { + return ret; + } + + return size; } int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BlockDriver *drv = bs->drv; + return bdrv_rw_vmstate(bs, qiov, pos, false); +} - if (!drv) { - return -ENOMEDIUM; - } else if (drv->bdrv_save_vmstate) { - return drv->bdrv_save_vmstate(bs, qiov, pos); - } else if (bs->file) { - return bdrv_writev_vmstate(bs->file->bs, qiov, pos); +int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size) +{ + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = buf, + .iov_len = size, + }; + int ret; + + qemu_iovec_init_external(&qiov, &iov, 1); + ret = bdrv_readv_vmstate(bs, &qiov, pos); + if (ret < 0) { + return ret; } - return -ENOTSUP; + return size; } -int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, - int64_t pos, int size) +int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BlockDriver *drv = bs->drv; - if (!drv) - return -ENOMEDIUM; - if (drv->bdrv_load_vmstate) - return drv->bdrv_load_vmstate(bs, buf, pos, size); - if (bs->file) - return bdrv_load_vmstate(bs->file->bs, buf, pos, size); - return -ENOTSUP; + return bdrv_rw_vmstate(bs, qiov, pos, true); } /**************************************************************/ diff --git a/block/linux-aio.c b/block/linux-aio.c index 90ec98ee23..e468960146 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -11,8 +11,10 @@ #include "qemu-common.h" #include "block/aio.h" #include "qemu/queue.h" +#include "block/block.h" #include "block/raw-aio.h" #include "qemu/event_notifier.h" +#include "qemu/coroutine.h" #include <libaio.h> @@ -30,6 +32,7 @@ struct qemu_laiocb { BlockAIOCB common; + Coroutine *co; LinuxAioState *ctx; struct iocb iocb; ssize_t ret; @@ -88,9 +91,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb) } } } - laiocb->common.cb(laiocb->common.opaque, ret); - qemu_aio_unref(laiocb); + laiocb->ret = ret; + if (laiocb->co) { + qemu_coroutine_enter(laiocb->co, NULL); + } else { + laiocb->common.cb(laiocb->common.opaque, ret); + qemu_aio_unref(laiocb); + } } /* The completion BH fetches completed I/O requests and invokes their @@ -141,6 +149,8 @@ static void qemu_laio_completion_bh(void *opaque) if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { ioq_submit(s); } + + qemu_bh_cancel(s->completion_bh); } static void qemu_laio_completion_cb(EventNotifier *e) @@ -148,7 +158,7 @@ static void qemu_laio_completion_cb(EventNotifier *e) LinuxAioState *s = container_of(e, LinuxAioState, e); if (event_notifier_test_and_clear(&s->e)) { - qemu_bh_schedule(s->completion_bh); + qemu_laio_completion_bh(s); } } @@ -230,22 +240,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s) } } -BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type) +static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, + int type) { - struct qemu_laiocb *laiocb; - struct iocb *iocbs; - off_t offset = sector_num * 512; - - laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque); - laiocb->nbytes = nb_sectors * 512; - laiocb->ctx = s; - laiocb->ret = -EINPROGRESS; - laiocb->is_read = (type == QEMU_AIO_READ); - laiocb->qiov = qiov; - - iocbs = &laiocb->iocb; + LinuxAioState *s = laiocb->ctx; + struct iocb *iocbs = &laiocb->iocb; + QEMUIOVector *qiov = laiocb->qiov; switch (type) { case QEMU_AIO_WRITE: @@ -258,7 +258,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, default: fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", __func__, type); - goto out_free_aiocb; + return -EIO; } io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); @@ -268,11 +268,53 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) { ioq_submit(s); } - return &laiocb->common; -out_free_aiocb: - qemu_aio_unref(laiocb); - return NULL; + return 0; +} + +int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, + uint64_t offset, QEMUIOVector *qiov, int type) +{ + int ret; + struct qemu_laiocb laiocb = { + .co = qemu_coroutine_self(), + .nbytes = qiov->size, + .ctx = s, + .is_read = (type == QEMU_AIO_READ), + .qiov = qiov, + }; + + ret = laio_do_submit(fd, &laiocb, offset, type); + if (ret < 0) { + return ret; + } + + qemu_coroutine_yield(); + return laiocb.ret; +} + +BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque, int type) +{ + struct qemu_laiocb *laiocb; + off_t offset = sector_num * BDRV_SECTOR_SIZE; + int ret; + + laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque); + laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE; + laiocb->ctx = s; + laiocb->ret = -EINPROGRESS; + laiocb->is_read = (type == QEMU_AIO_READ); + laiocb->qiov = qiov; + + ret = laio_do_submit(fd, laiocb, offset, type); + if (ret < 0) { + qemu_aio_unref(laiocb); + return NULL; + } + + return &laiocb->common; } void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context) diff --git a/block/mirror.c b/block/mirror.c index 80fd3c7469..075384a9cf 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -44,6 +44,7 @@ typedef struct MirrorBlockJob { /* Used to block operations on the drive-mirror-replace target */ Error *replace_blocker; bool is_none_mode; + BlockMirrorBackingMode backing_mode; BlockdevOnError on_source_error, on_target_error; bool synced; bool should_complete; @@ -157,8 +158,7 @@ static void mirror_read_complete(void *opaque, int ret) return; } blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov, - op->nb_sectors * BDRV_SECTOR_SIZE, - mirror_write_complete, op); + 0, mirror_write_complete, op); } static inline void mirror_clip_sectors(MirrorBlockJob *s, @@ -186,8 +186,9 @@ static int mirror_cow_align(MirrorBlockJob *s, need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors, s->cow_bitmap); if (need_cow) { - bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors, - &align_sector_num, &align_nb_sectors); + bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num, + *nb_sectors, &align_sector_num, + &align_nb_sectors); } if (align_nb_sectors > max_sectors) { @@ -274,8 +275,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num, s->sectors_in_flight += nb_sectors; trace_mirror_one_iteration(s, sector_num, nb_sectors); - blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, - nb_sectors * BDRV_SECTOR_SIZE, + blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, 0, mirror_read_complete, op); return ret; } @@ -386,8 +386,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { int64_t target_sector_num; int target_nb_sectors; - bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors, - &target_sector_num, &target_nb_sectors); + bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num, + io_sectors, &target_sector_num, + &target_nb_sectors); if (target_sector_num == sector_num && target_nb_sectors == io_sectors) { mirror_method = ret & BDRV_BLOCK_ZERO ? @@ -742,20 +743,26 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) static void mirror_complete(BlockJob *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); - Error *local_err = NULL; - int ret; + BlockDriverState *src, *target; + + src = blk_bs(job->blk); + target = blk_bs(s->target); - ret = bdrv_open_backing_file(blk_bs(s->target), NULL, "backing", - &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - return; - } if (!s->synced) { error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id); return; } + if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { + int ret; + + assert(!target->backing); + ret = bdrv_open_backing_file(target, NULL, "backing", errp); + if (ret < 0) { + return; + } + } + /* check the target bs is not blocked and block all operations on it */ if (s->replaces) { AioContext *replace_aio_context; @@ -777,6 +784,13 @@ static void mirror_complete(BlockJob *job, Error **errp) aio_context_release(replace_aio_context); } + if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { + BlockDriverState *backing = s->is_none_mode ? src : s->base; + if (backing_bs(target) != backing) { + bdrv_set_backing_hd(target, backing); + } + } + s->should_complete = true; block_job_enter(&s->common); } @@ -799,6 +813,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, const char *replaces, int64_t speed, uint32_t granularity, int64_t buf_size, + BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, @@ -836,6 +851,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, s->on_source_error = on_source_error; s->on_target_error = on_target_error; s->is_none_mode = is_none_mode; + s->backing_mode = backing_mode; s->base = base; s->granularity = granularity; s->buf_size = ROUND_UP(buf_size, granularity); @@ -859,7 +875,8 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, void mirror_start(BlockDriverState *bs, BlockDriverState *target, const char *replaces, int64_t speed, uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockdevOnError on_source_error, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, + BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, BlockCompletionFunc *cb, @@ -875,7 +892,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, is_none_mode = mode == MIRROR_SYNC_MODE_NONE; base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; mirror_start_job(bs, target, replaces, - speed, granularity, buf_size, + speed, granularity, buf_size, backing_mode, on_source_error, on_target_error, unmap, cb, opaque, errp, &mirror_job_driver, is_none_mode, base); } @@ -922,7 +939,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, } } - mirror_start_job(bs, base, NULL, speed, 0, 0, + mirror_start_job(bs, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN, on_error, on_error, false, cb, opaque, &local_err, &commit_active_job_driver, false, base); if (local_err) { diff --git a/block/null.c b/block/null.c index 396500babd..b511010ba5 100644 --- a/block/null.c +++ b/block/null.c @@ -12,6 +12,8 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" #include "block/block_int.h" #define NULL_OPT_LATENCY "latency-ns" @@ -223,6 +225,20 @@ static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs, } } +static void null_refresh_filename(BlockDriverState *bs, QDict *opts) +{ + QINCREF(opts); + qdict_del(opts, "filename"); + + if (!qdict_size(opts)) { + snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s://", + bs->drv->format_name); + } + + qdict_put(opts, "driver", qstring_from_str(bs->drv->format_name)); + bs->full_open_options = opts; +} + static BlockDriver bdrv_null_co = { .format_name = "null-co", .protocol_name = "null-co", @@ -238,6 +254,8 @@ static BlockDriver bdrv_null_co = { .bdrv_reopen_prepare = null_reopen_prepare, .bdrv_co_get_block_status = null_co_get_block_status, + + .bdrv_refresh_filename = null_refresh_filename, }; static BlockDriver bdrv_null_aio = { @@ -255,6 +273,8 @@ static BlockDriver bdrv_null_aio = { .bdrv_reopen_prepare = null_reopen_prepare, .bdrv_co_get_block_status = null_co_get_block_status, + + .bdrv_refresh_filename = null_refresh_filename, }; static void bdrv_null_init(void) diff --git a/block/qcow.c b/block/qcow.c index c5cf813469..312af52816 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -162,10 +162,16 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, if (s->crypt_method_header) { if (bdrv_uses_whitelist() && s->crypt_method_header == QCOW_CRYPT_AES) { - error_report("qcow built-in AES encryption is deprecated"); - error_printf("Support for it will be removed in a future release.\n" - "You can use 'qemu-img convert' to switch to an\n" - "unencrypted qcow image, or a LUKS raw image.\n"); + error_setg(errp, + "Use of AES-CBC encrypted qcow images is no longer " + "supported in system emulators"); + error_append_hint(errp, + "You can use 'qemu-img convert' to convert your " + "image to an alternative supported format, such " + "as unencrypted qcow, or raw with the LUKS " + "format instead.\n"); + ret = -ENOSYS; + goto fail; } bs->encrypted = 1; diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 208a060421..580631c3d8 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -24,11 +24,6 @@ /* Needed for CONFIG_MADVISE */ #include "qemu/osdep.h" - -#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE) -#include <sys/mman.h> -#endif - #include "block/block_int.h" #include "qemu-common.h" #include "qcow2.h" diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index b04bfafd65..893ddf6798 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -390,22 +390,18 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, return 0; } -static int coroutine_fn copy_sectors(BlockDriverState *bs, - uint64_t start_sect, - uint64_t cluster_offset, - int n_start, int n_end) +static int coroutine_fn do_perform_cow(BlockDriverState *bs, + uint64_t src_cluster_offset, + uint64_t cluster_offset, + int offset_in_cluster, + int bytes) { BDRVQcow2State *s = bs->opaque; QEMUIOVector qiov; struct iovec iov; - int n, ret; - - n = n_end - n_start; - if (n <= 0) { - return 0; - } + int ret; - iov.iov_len = n * BDRV_SECTOR_SIZE; + iov.iov_len = bytes; iov.iov_base = qemu_try_blockalign(bs, iov.iov_len); if (iov.iov_base == NULL) { return -ENOMEM; @@ -424,17 +420,21 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs, * interface. This avoids double I/O throttling and request tracking, * which can lead to deadlock when block layer copy-on-read is enabled. */ - ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov); + ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster, + bytes, &qiov, 0); if (ret < 0) { goto out; } if (bs->encrypted) { Error *err = NULL; + int64_t sector = (cluster_offset + offset_in_cluster) + >> BDRV_SECTOR_BITS; assert(s->cipher); - if (qcow2_encrypt_sectors(s, start_sect + n_start, - iov.iov_base, iov.iov_base, n, - true, &err) < 0) { + assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); + assert((bytes & ~BDRV_SECTOR_MASK) == 0); + if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base, + bytes >> BDRV_SECTOR_BITS, true, &err) < 0) { ret = -EIO; error_free(err); goto out; @@ -442,14 +442,14 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs, } ret = qcow2_pre_write_overlap_check(bs, 0, - cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE); + cluster_offset + offset_in_cluster, bytes); if (ret < 0) { goto out; } BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); - ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n, - &qiov); + ret = bdrv_co_pwritev(bs->file->bs, cluster_offset + offset_in_cluster, + bytes, &qiov, 0); if (ret < 0) { goto out; } @@ -464,47 +464,44 @@ out: /* * get_cluster_offset * - * For a given offset of the disk image, find the cluster offset in - * qcow2 file. The offset is stored in *cluster_offset. + * For a given offset of the virtual disk, find the cluster type and offset in + * the qcow2 file. The offset is stored in *cluster_offset. * - * on entry, *num is the number of contiguous sectors we'd like to - * access following offset. + * On entry, *bytes is the maximum number of contiguous bytes starting at + * offset that we are interested in. * - * on exit, *num is the number of contiguous sectors we can read. + * On exit, *bytes is the number of bytes starting at offset that have the same + * cluster type and (if applicable) are stored contiguously in the image file. + * Compressed clusters are always returned one by one. * * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error * cases. */ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, - int *num, uint64_t *cluster_offset) + unsigned int *bytes, uint64_t *cluster_offset) { BDRVQcow2State *s = bs->opaque; unsigned int l2_index; uint64_t l1_index, l2_offset, *l2_table; int l1_bits, c; - unsigned int index_in_cluster, nb_clusters; - uint64_t nb_available, nb_needed; + unsigned int offset_in_cluster, nb_clusters; + uint64_t bytes_available, bytes_needed; int ret; - index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1); - nb_needed = *num + index_in_cluster; + offset_in_cluster = offset_into_cluster(s, offset); + bytes_needed = (uint64_t) *bytes + offset_in_cluster; l1_bits = s->l2_bits + s->cluster_bits; - /* compute how many bytes there are between the offset and - * the end of the l1 entry - */ - - nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1)); + /* compute how many bytes there are between the start of the cluster + * containing offset and the end of the l1 entry */ + bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1)) + + offset_in_cluster; - /* compute the number of available sectors */ - - nb_available = (nb_available >> 9) + index_in_cluster; - - if (nb_needed > nb_available) { - nb_needed = nb_available; + if (bytes_needed > bytes_available) { + bytes_needed = bytes_available; } - assert(nb_needed <= INT_MAX); + assert(bytes_needed <= INT_MAX); *cluster_offset = 0; @@ -542,7 +539,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, *cluster_offset = be64_to_cpu(l2_table[l2_index]); /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */ - nb_clusters = size_to_clusters(s, nb_needed << 9); + nb_clusters = size_to_clusters(s, bytes_needed); ret = qcow2_get_cluster_type(*cluster_offset); switch (ret) { @@ -589,13 +586,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); - nb_available = (c * s->cluster_sectors); + bytes_available = (c * s->cluster_size); out: - if (nb_available > nb_needed) - nb_available = nb_needed; + if (bytes_available > bytes_needed) { + bytes_available = bytes_needed; + } - *num = nb_available - index_in_cluster; + *bytes = bytes_available - offset_in_cluster; return ret; @@ -741,14 +739,12 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) BDRVQcow2State *s = bs->opaque; int ret; - if (r->nb_sectors == 0) { + if (r->nb_bytes == 0) { return 0; } qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset, - r->offset / BDRV_SECTOR_SIZE, - r->offset / BDRV_SECTOR_SIZE + r->nb_sectors); + ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes); qemu_co_mutex_lock(&s->lock); if (ret < 0) { @@ -810,13 +806,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) assert(l2_index + m->nb_clusters <= s->l2_size); for (i = 0; i < m->nb_clusters; i++) { /* if two concurrent writes happen to the same unallocated cluster - * each write allocates separate cluster and writes data concurrently. - * The first one to complete updates l2 table with pointer to its - * cluster the second one has to do RMW (which is done above by - * copy_sectors()), update l2 table with its cluster pointer and free - * old cluster. This is what this loop does */ - if(l2_table[l2_index + i] != 0) + * each write allocates separate cluster and writes data concurrently. + * The first one to complete updates l2 table with pointer to its + * cluster the second one has to do RMW (which is done above by + * perform_cow()), update l2 table with its cluster pointer and free + * old cluster. This is what this loop does */ + if (l2_table[l2_index + i] != 0) { old_cluster[j++] = l2_table[l2_index + i]; + } l2_table[l2_index + i] = cpu_to_be64((cluster_offset + (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); @@ -1198,25 +1195,20 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, /* * Save info needed for meta data update. * - * requested_sectors: Number of sectors from the start of the first + * requested_bytes: Number of bytes from the start of the first * newly allocated cluster to the end of the (possibly shortened * before) write request. * - * avail_sectors: Number of sectors from the start of the first + * avail_bytes: Number of bytes from the start of the first * newly allocated to the end of the last newly allocated cluster. * - * nb_sectors: The number of sectors from the start of the first + * nb_bytes: The number of bytes from the start of the first * newly allocated cluster to the end of the area that the write * request actually writes to (excluding COW at the end) */ - int requested_sectors = - (*bytes + offset_into_cluster(s, guest_offset)) - >> BDRV_SECTOR_BITS; - int avail_sectors = nb_clusters - << (s->cluster_bits - BDRV_SECTOR_BITS); - int alloc_n_start = offset_into_cluster(s, guest_offset) - >> BDRV_SECTOR_BITS; - int nb_sectors = MIN(requested_sectors, avail_sectors); + uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset); + int avail_bytes = MIN(INT_MAX, nb_clusters << s->cluster_bits); + int nb_bytes = MIN(requested_bytes, avail_bytes); QCowL2Meta *old_m = *m; *m = g_malloc0(sizeof(**m)); @@ -1227,23 +1219,21 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, .alloc_offset = alloc_cluster_offset, .offset = start_of_cluster(s, guest_offset), .nb_clusters = nb_clusters, - .nb_available = nb_sectors, .cow_start = { .offset = 0, - .nb_sectors = alloc_n_start, + .nb_bytes = offset_into_cluster(s, guest_offset), }, .cow_end = { - .offset = nb_sectors * BDRV_SECTOR_SIZE, - .nb_sectors = avail_sectors - nb_sectors, + .offset = nb_bytes, + .nb_bytes = avail_bytes - nb_bytes, }, }; qemu_co_queue_init(&(*m)->dependent_requests); QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); - *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE) - - offset_into_cluster(s, guest_offset)); + *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset)); assert(*bytes != 0); return 1; @@ -1275,7 +1265,8 @@ fail: * Return 0 on success and -errno in error cases */ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int *num, uint64_t *host_offset, QCowL2Meta **m) + unsigned int *bytes, uint64_t *host_offset, + QCowL2Meta **m) { BDRVQcow2State *s = bs->opaque; uint64_t start, remaining; @@ -1283,13 +1274,11 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, uint64_t cur_bytes; int ret; - trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num); - - assert((offset & ~BDRV_SECTOR_MASK) == 0); + trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes); again: start = offset; - remaining = (uint64_t)*num << BDRV_SECTOR_BITS; + remaining = *bytes; cluster_offset = 0; *host_offset = 0; cur_bytes = 0; @@ -1375,8 +1364,8 @@ again: } } - *num -= remaining >> BDRV_SECTOR_BITS; - assert(*num > 0); + *bytes -= remaining; + assert(*bytes > 0); assert(*host_offset != 0); return 0; diff --git a/block/qcow2.c b/block/qcow2.c index 6f5fb810e4..4718f8250e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -968,13 +968,22 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, if (s->crypt_method_header) { if (bdrv_uses_whitelist() && s->crypt_method_header == QCOW_CRYPT_AES) { - error_report("qcow2 built-in AES encryption is deprecated"); - error_printf("Support for it will be removed in a future release.\n" - "You can use 'qemu-img convert' to switch to an\n" - "unencrypted qcow2 image, or a LUKS raw image.\n"); + error_setg(errp, + "Use of AES-CBC encrypted qcow2 images is no longer " + "supported in system emulators"); + error_append_hint(errp, + "You can use 'qemu-img convert' to convert your " + "image to an alternative supported format, such " + "as unencrypted qcow2, or raw with the LUKS " + "format instead.\n"); + ret = -ENOSYS; + goto fail; } bs->encrypted = 1; + + /* Encryption works on a sector granularity */ + bs->request_alignment = BDRV_SECTOR_SIZE; } s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ @@ -1331,16 +1340,20 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, BDRVQcow2State *s = bs->opaque; uint64_t cluster_offset; int index_in_cluster, ret; + unsigned int bytes; int64_t status = 0; - *pnum = nb_sectors; + bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE); qemu_co_mutex_lock(&s->lock); - ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset); + ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes, + &cluster_offset); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { return ret; } + *pnum = bytes >> BDRV_SECTOR_BITS; + if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && !s->cipher) { index_in_cluster = sector_num & (s->cluster_sectors - 1); @@ -1358,28 +1371,34 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, /* handle reading after the end of the backing file */ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t sector_num, int nb_sectors) + int64_t offset, int bytes) { + uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE; int n1; - if ((sector_num + nb_sectors) <= bs->total_sectors) - return nb_sectors; - if (sector_num >= bs->total_sectors) + + if ((offset + bytes) <= bs_size) { + return bytes; + } + + if (offset >= bs_size) { n1 = 0; - else - n1 = bs->total_sectors - sector_num; + } else { + n1 = bs_size - offset; + } - qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1)); + qemu_iovec_memset(qiov, n1, 0, bytes - n1); return n1; } -static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, - int remaining_sectors, QEMUIOVector *qiov) +static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) { BDRVQcow2State *s = bs->opaque; - int index_in_cluster, n1; + int offset_in_cluster, n1; int ret; - int cur_nr_sectors; /* number of sectors in current iteration */ + unsigned int cur_bytes; /* number of bytes in current iteration */ uint64_t cluster_offset = 0; uint64_t bytes_done = 0; QEMUIOVector hd_qiov; @@ -1389,26 +1408,24 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, qemu_co_mutex_lock(&s->lock); - while (remaining_sectors != 0) { + while (bytes != 0) { /* prepare next request */ - cur_nr_sectors = remaining_sectors; + cur_bytes = MIN(bytes, INT_MAX); if (s->cipher) { - cur_nr_sectors = MIN(cur_nr_sectors, - QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); + cur_bytes = MIN(cur_bytes, + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); } - ret = qcow2_get_cluster_offset(bs, sector_num << 9, - &cur_nr_sectors, &cluster_offset); + ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset); if (ret < 0) { goto fail; } - index_in_cluster = sector_num & (s->cluster_sectors - 1); + offset_in_cluster = offset_into_cluster(s, offset); qemu_iovec_reset(&hd_qiov); - qemu_iovec_concat(&hd_qiov, qiov, bytes_done, - cur_nr_sectors * 512); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); switch (ret) { case QCOW2_CLUSTER_UNALLOCATED: @@ -1416,18 +1433,17 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, if (bs->backing) { /* read from the base image */ n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov, - sector_num, cur_nr_sectors); + offset, cur_bytes); if (n1 > 0) { QEMUIOVector local_qiov; qemu_iovec_init(&local_qiov, hd_qiov.niov); - qemu_iovec_concat(&local_qiov, &hd_qiov, 0, - n1 * BDRV_SECTOR_SIZE); + qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1); BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->backing->bs, sector_num, - n1, &local_qiov); + ret = bdrv_co_preadv(bs->backing->bs, offset, n1, + &local_qiov, 0); qemu_co_mutex_lock(&s->lock); qemu_iovec_destroy(&local_qiov); @@ -1438,12 +1454,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, } } else { /* Note: in this case, no need to wait */ - qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); + qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); } break; case QCOW2_CLUSTER_ZERO: - qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); + qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); break; case QCOW2_CLUSTER_COMPRESSED: @@ -1454,8 +1470,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, } qemu_iovec_from_buf(&hd_qiov, 0, - s->cluster_cache + index_in_cluster * 512, - 512 * cur_nr_sectors); + s->cluster_cache + offset_in_cluster, + cur_bytes); break; case QCOW2_CLUSTER_NORMAL: @@ -1482,34 +1498,34 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, } } - assert(cur_nr_sectors <= - QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); + assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cluster_data, - 512 * cur_nr_sectors); + qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); } BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->file->bs, - (cluster_offset >> 9) + index_in_cluster, - cur_nr_sectors, &hd_qiov); + ret = bdrv_co_preadv(bs->file->bs, + cluster_offset + offset_in_cluster, + cur_bytes, &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto fail; } if (bs->encrypted) { assert(s->cipher); + assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); + assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); Error *err = NULL; - if (qcow2_encrypt_sectors(s, sector_num, cluster_data, - cluster_data, cur_nr_sectors, false, - &err) < 0) { + if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS, + cluster_data, cluster_data, + cur_bytes >> BDRV_SECTOR_BITS, + false, &err) < 0) { error_free(err); ret = -EIO; goto fail; } - qemu_iovec_from_buf(qiov, bytes_done, - cluster_data, 512 * cur_nr_sectors); + qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes); } break; @@ -1519,9 +1535,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, goto fail; } - remaining_sectors -= cur_nr_sectors; - sector_num += cur_nr_sectors; - bytes_done += cur_nr_sectors * 512; + bytes -= cur_bytes; + offset += cur_bytes; + bytes_done += cur_bytes; } ret = 0; @@ -1534,23 +1550,21 @@ fail: return ret; } -static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, - int64_t sector_num, - int remaining_sectors, - QEMUIOVector *qiov) +static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) { BDRVQcow2State *s = bs->opaque; - int index_in_cluster; + int offset_in_cluster; int ret; - int cur_nr_sectors; /* number of sectors in current iteration */ + unsigned int cur_bytes; /* number of sectors in current iteration */ uint64_t cluster_offset; QEMUIOVector hd_qiov; uint64_t bytes_done = 0; uint8_t *cluster_data = NULL; QCowL2Meta *l2meta = NULL; - trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, - remaining_sectors); + trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -1558,22 +1572,21 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, qemu_co_mutex_lock(&s->lock); - while (remaining_sectors != 0) { + while (bytes != 0) { l2meta = NULL; trace_qcow2_writev_start_part(qemu_coroutine_self()); - index_in_cluster = sector_num & (s->cluster_sectors - 1); - cur_nr_sectors = remaining_sectors; - if (bs->encrypted && - cur_nr_sectors > - QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) { - cur_nr_sectors = - QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster; + offset_in_cluster = offset_into_cluster(s, offset); + cur_bytes = MIN(bytes, INT_MAX); + if (bs->encrypted) { + cur_bytes = MIN(cur_bytes, + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + - offset_in_cluster); } - ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, - &cur_nr_sectors, &cluster_offset, &l2meta); + ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, + &cluster_offset, &l2meta); if (ret < 0) { goto fail; } @@ -1581,8 +1594,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, assert((cluster_offset & 511) == 0); qemu_iovec_reset(&hd_qiov); - qemu_iovec_concat(&hd_qiov, qiov, bytes_done, - cur_nr_sectors * 512); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); if (bs->encrypted) { Error *err = NULL; @@ -1601,8 +1613,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); - if (qcow2_encrypt_sectors(s, sector_num, cluster_data, - cluster_data, cur_nr_sectors, + if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS, + cluster_data, cluster_data, + cur_bytes >>BDRV_SECTOR_BITS, true, &err) < 0) { error_free(err); ret = -EIO; @@ -1610,13 +1623,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cluster_data, - cur_nr_sectors * 512); + qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); } ret = qcow2_pre_write_overlap_check(bs, 0, - cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE, - cur_nr_sectors * BDRV_SECTOR_SIZE); + cluster_offset + offset_in_cluster, cur_bytes); if (ret < 0) { goto fail; } @@ -1624,10 +1635,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); trace_qcow2_writev_data(qemu_coroutine_self(), - (cluster_offset >> 9) + index_in_cluster); - ret = bdrv_co_writev(bs->file->bs, - (cluster_offset >> 9) + index_in_cluster, - cur_nr_sectors, &hd_qiov); + cluster_offset + offset_in_cluster); + ret = bdrv_co_pwritev(bs->file->bs, + cluster_offset + offset_in_cluster, + cur_bytes, &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto fail; @@ -1653,10 +1664,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, l2meta = next; } - remaining_sectors -= cur_nr_sectors; - sector_num += cur_nr_sectors; - bytes_done += cur_nr_sectors * 512; - trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors); + bytes -= cur_bytes; + offset += cur_bytes; + bytes_done += cur_bytes; + trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); } ret = 0; @@ -1998,19 +2009,19 @@ static int qcow2_change_backing_file(BlockDriverState *bs, static int preallocate(BlockDriverState *bs) { - uint64_t nb_sectors; + uint64_t bytes; uint64_t offset; uint64_t host_offset = 0; - int num; + unsigned int cur_bytes; int ret; QCowL2Meta *meta; - nb_sectors = bdrv_nb_sectors(bs); + bytes = bdrv_getlength(bs); offset = 0; - while (nb_sectors) { - num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS); - ret = qcow2_alloc_cluster_offset(bs, offset, &num, + while (bytes) { + cur_bytes = MIN(bytes, INT_MAX); + ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, &host_offset, &meta); if (ret < 0) { return ret; @@ -2036,8 +2047,8 @@ static int preallocate(BlockDriverState *bs) /* TODO Preallocate data if requested */ - nb_sectors -= num; - offset += num << BDRV_SECTOR_BITS; + bytes -= cur_bytes; + offset += cur_bytes; } /* @@ -2046,11 +2057,9 @@ static int preallocate(BlockDriverState *bs) * EOF). Extend the image to the last allocated sector. */ if (host_offset != 0) { - uint8_t buf[BDRV_SECTOR_SIZE]; - memset(buf, 0, BDRV_SECTOR_SIZE); - ret = bdrv_write(bs->file->bs, - (host_offset >> BDRV_SECTOR_BITS) + num - 1, - buf, 1); + uint8_t data = 0; + ret = bdrv_pwrite(bs->file->bs, (host_offset + cur_bytes) - 1, + &data, 1); if (ret < 0) { return ret; } @@ -2435,7 +2444,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, if (head || tail) { int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS; uint64_t off; - int nr; + unsigned int nr; assert(head + count <= s->cluster_size); @@ -2452,7 +2461,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, /* We can have new write after previous check */ offset = cl_start << BDRV_SECTOR_BITS; count = s->cluster_size; - nr = s->cluster_sectors; + nr = s->cluster_size; ret = qcow2_get_cluster_offset(bs, offset, &nr, &off); if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) { qemu_co_mutex_unlock(&s->lock); @@ -2900,36 +2909,20 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { BDRVQcow2State *s = bs->opaque; - int64_t total_sectors = bs->total_sectors; - bool zero_beyond_eof = bs->zero_beyond_eof; - int ret; BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); - bs->zero_beyond_eof = false; - ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov); - bs->zero_beyond_eof = zero_beyond_eof; - - /* bdrv_co_do_writev will have increased the total_sectors value to include - * the VM state - the VM state is however not an actual part of the block - * device, therefore, we need to restore the old value. */ - bs->total_sectors = total_sectors; - - return ret; + return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos, + qiov->size, qiov, 0); } -static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, - int64_t pos, int size) +static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t pos) { BDRVQcow2State *s = bs->opaque; - bool zero_beyond_eof = bs->zero_beyond_eof; - int ret; BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); - bs->zero_beyond_eof = false; - ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size); - bs->zero_beyond_eof = zero_beyond_eof; - - return ret; + return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos, + qiov->size, qiov, 0); } /* @@ -3368,8 +3361,8 @@ BlockDriver bdrv_qcow2 = { .bdrv_co_get_block_status = qcow2_co_get_block_status, .bdrv_set_key = qcow2_set_key, - .bdrv_co_readv = qcow2_co_readv, - .bdrv_co_writev = qcow2_co_writev, + .bdrv_co_preadv = qcow2_co_preadv, + .bdrv_co_pwritev = qcow2_co_pwritev, .bdrv_co_flush_to_os = qcow2_co_flush_to_os, .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, diff --git a/block/qcow2.h b/block/qcow2.h index 7db9795d44..b36a7bf8ad 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -302,8 +302,8 @@ typedef struct Qcow2COWRegion { */ uint64_t offset; - /** Number of sectors to copy */ - int nb_sectors; + /** Number of bytes to copy */ + int nb_bytes; } Qcow2COWRegion; /** @@ -318,12 +318,6 @@ typedef struct QCowL2Meta /** Host offset of the first newly allocated cluster */ uint64_t alloc_offset; - /** - * Number of sectors from the start of the first allocated cluster to - * the end of the (possibly shortened) request - */ - int nb_available; - /** Number of newly allocated clusters */ int nb_clusters; @@ -471,8 +465,7 @@ static inline uint64_t l2meta_cow_start(QCowL2Meta *m) static inline uint64_t l2meta_cow_end(QCowL2Meta *m) { - return m->offset + m->cow_end.offset - + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS); + return m->offset + m->cow_end.offset + m->cow_end.nb_bytes; } static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2) @@ -544,9 +537,10 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, int nb_sectors, bool enc, Error **errp); int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, - int *num, uint64_t *cluster_offset); + unsigned int *bytes, uint64_t *cluster_offset); int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int *num, uint64_t *host_offset, QCowL2Meta **m); + unsigned int *bytes, uint64_t *host_offset, + QCowL2Meta **m); uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset, int compressed_size); diff --git a/block/raw-aio.h b/block/raw-aio.h index 714714e016..a4cdbbf1b7 100644 --- a/block/raw-aio.h +++ b/block/raw-aio.h @@ -15,6 +15,7 @@ #ifndef QEMU_RAW_AIO_H #define QEMU_RAW_AIO_H +#include "qemu/coroutine.h" #include "qemu/iov.h" /* AIO request types */ @@ -38,6 +39,8 @@ typedef struct LinuxAioState LinuxAioState; LinuxAioState *laio_init(void); void laio_cleanup(LinuxAioState *s); +int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, + uint64_t offset, QEMUIOVector *qiov, int type); BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockCompletionFunc *cb, void *opaque, int type); diff --git a/block/raw-posix.c b/block/raw-posix.c index ce2e20f203..aacf13203f 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1325,14 +1325,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); } -static BlockAIOCB *raw_aio_submit(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type) +static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int type) { BDRVRawState *s = bs->opaque; if (fd_open(bs) < 0) - return NULL; + return -EIO; /* * Check if the underlying device requires requests to be aligned, @@ -1345,14 +1344,28 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs, type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO } else if (s->use_aio) { - return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov, - nb_sectors, cb, opaque, type); + assert(qiov->size == bytes); + return laio_co_submit(bs, s->aio_ctx, s->fd, offset, qiov, type); #endif } } - return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors, - cb, opaque, type); + return paio_submit_co(bs, s->fd, offset, qiov, bytes, type); +} + +static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) +{ + return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ); +} + +static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) +{ + assert(flags == 0); + return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE); } static void raw_aio_plug(BlockDriverState *bs) @@ -1375,22 +1388,6 @@ static void raw_aio_unplug(BlockDriverState *bs) #endif } -static BlockAIOCB *raw_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - return raw_aio_submit(bs, sector_num, qiov, nb_sectors, - cb, opaque, QEMU_AIO_READ); -} - -static BlockAIOCB *raw_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - return raw_aio_submit(bs, sector_num, qiov, nb_sectors, - cb, opaque, QEMU_AIO_WRITE); -} - static BlockAIOCB *raw_aio_flush(BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque) { @@ -1957,8 +1954,8 @@ BlockDriver bdrv_file = { .bdrv_co_get_block_status = raw_co_get_block_status, .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, .bdrv_aio_flush = raw_aio_flush, .bdrv_aio_discard = raw_aio_discard, .bdrv_refresh_limits = raw_refresh_limits, @@ -2405,8 +2402,8 @@ static BlockDriver bdrv_host_device = { .create_opts = &raw_create_opts, .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, .bdrv_aio_flush = raw_aio_flush, .bdrv_aio_discard = hdev_aio_discard, .bdrv_refresh_limits = raw_refresh_limits, @@ -2535,8 +2532,9 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_create = hdev_create, .create_opts = &raw_create_opts, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, .bdrv_aio_flush = raw_aio_flush, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, @@ -2670,8 +2668,8 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_create = hdev_create, .create_opts = &raw_create_opts, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, .bdrv_aio_flush = raw_aio_flush, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, diff --git a/block/rbd.c b/block/rbd.c index 5bc5b32530..5226b6fef8 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -290,7 +290,8 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf, if (only_read_conf_file) { ret = rados_conf_read_file(cluster, value); if (ret < 0) { - error_setg(errp, "error reading conf file %s", value); + error_setg_errno(errp, -ret, "error reading conf file %s", + value); break; } } @@ -299,7 +300,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf, } else if (!only_read_conf_file) { ret = rados_conf_set(cluster, name, value); if (ret < 0) { - error_setg(errp, "invalid conf option %s", name); + error_setg_errno(errp, -ret, "invalid conf option %s", name); ret = -EINVAL; break; } @@ -354,9 +355,10 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) } clientname = qemu_rbd_parse_clientname(conf, clientname_buf); - if (rados_create(&cluster, clientname) < 0) { - error_setg(errp, "error initializing"); - return -EIO; + ret = rados_create(&cluster, clientname); + if (ret < 0) { + error_setg_errno(errp, -ret, "error initializing"); + return ret; } if (strstr(conf, "conf=") == NULL) { @@ -381,21 +383,27 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) return -EIO; } - if (rados_connect(cluster) < 0) { - error_setg(errp, "error connecting"); + ret = rados_connect(cluster); + if (ret < 0) { + error_setg_errno(errp, -ret, "error connecting"); rados_shutdown(cluster); - return -EIO; + return ret; } - if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) { - error_setg(errp, "error opening pool %s", pool); + ret = rados_ioctx_create(cluster, pool, &io_ctx); + if (ret < 0) { + error_setg_errno(errp, -ret, "error opening pool %s", pool); rados_shutdown(cluster); - return -EIO; + return ret; } ret = rbd_create(io_ctx, name, bytes, &obj_order); rados_ioctx_destroy(io_ctx); rados_shutdown(cluster); + if (ret < 0) { + error_setg_errno(errp, -ret, "error rbd create"); + return ret; + } return ret; } @@ -500,7 +508,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, clientname = qemu_rbd_parse_clientname(conf, clientname_buf); r = rados_create(&s->cluster, clientname); if (r < 0) { - error_setg(errp, "error initializing"); + error_setg_errno(errp, -r, "error initializing"); goto failed_opts; } @@ -546,19 +554,19 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, r = rados_connect(s->cluster); if (r < 0) { - error_setg(errp, "error connecting"); + error_setg_errno(errp, -r, "error connecting"); goto failed_shutdown; } r = rados_ioctx_create(s->cluster, pool, &s->io_ctx); if (r < 0) { - error_setg(errp, "error opening pool %s", pool); + error_setg_errno(errp, -r, "error opening pool %s", pool); goto failed_shutdown; } r = rbd_open(s->io_ctx, s->name, &s->image, s->snap); if (r < 0) { - error_setg(errp, "error reading header from %s", s->name); + error_setg_errno(errp, -r, "error reading header from %s", s->name); goto failed_open; } diff --git a/block/sheepdog.c b/block/sheepdog.c index 23fbace1f9..ef5d044ab9 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2784,12 +2784,19 @@ static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, return ret; } -static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data, - int64_t pos, int size) +static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t pos) { BDRVSheepdogState *s = bs->opaque; + void *buf; + int ret; - return do_load_save_vmstate(s, data, pos, size, 1); + buf = qemu_blockalign(bs, qiov->size); + ret = do_load_save_vmstate(s, buf, pos, qiov->size, 1); + qemu_iovec_from_buf(qiov, 0, buf, qiov->size); + qemu_vfree(buf); + + return ret; } diff --git a/blockdev.c b/blockdev.c index 7fd515a4fa..c9a0068cd6 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2544,6 +2544,7 @@ void qmp_blockdev_change_medium(const char *device, const char *filename, BlockBackend *blk; BlockDriverState *medium_bs = NULL; int bdrv_flags; + int rc; QDict *options = NULL; Error *err = NULL; @@ -2598,11 +2599,13 @@ void qmp_blockdev_change_medium(const char *device, const char *filename, goto fail; } - qmp_blockdev_open_tray(device, false, false, &err); - if (err) { + rc = do_open_tray(device, false, &err); + if (rc && rc != -ENOSYS) { error_propagate(errp, err); goto fail; } + error_free(err); + err = NULL; qmp_x_blockdev_remove_medium(device, &err); if (err) { @@ -3423,6 +3426,7 @@ static void blockdev_mirror_common(BlockDriverState *bs, BlockDriverState *target, bool has_replaces, const char *replaces, enum MirrorSyncMode sync, + BlockMirrorBackingMode backing_mode, bool has_speed, int64_t speed, bool has_granularity, uint32_t granularity, bool has_buf_size, int64_t buf_size, @@ -3480,7 +3484,7 @@ static void blockdev_mirror_common(BlockDriverState *bs, */ mirror_start(bs, target, has_replaces ? replaces : NULL, - speed, granularity, buf_size, sync, + speed, granularity, buf_size, sync, backing_mode, on_source_error, on_target_error, unmap, block_job_cb, bs, errp); } @@ -3503,6 +3507,7 @@ void qmp_drive_mirror(const char *device, const char *target, BlockBackend *blk; BlockDriverState *source, *target_bs; AioContext *aio_context; + BlockMirrorBackingMode backing_mode; Error *local_err = NULL; QDict *options = NULL; int flags; @@ -3576,6 +3581,12 @@ void qmp_drive_mirror(const char *device, const char *target, } } + if (mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) { + backing_mode = MIRROR_SOURCE_BACKING_CHAIN; + } else { + backing_mode = MIRROR_OPEN_BACKING_CHAIN; + } + if ((sync == MIRROR_SYNC_MODE_FULL || !source) && mode != NEW_IMAGE_MODE_EXISTING) { @@ -3624,7 +3635,7 @@ void qmp_drive_mirror(const char *device, const char *target, bdrv_set_aio_context(target_bs, aio_context); blockdev_mirror_common(bs, target_bs, - has_replaces, replaces, sync, + has_replaces, replaces, sync, backing_mode, has_speed, speed, has_granularity, granularity, has_buf_size, buf_size, @@ -3656,6 +3667,7 @@ void qmp_blockdev_mirror(const char *device, const char *target, BlockBackend *blk; BlockDriverState *target_bs; AioContext *aio_context; + BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; Error *local_err = NULL; blk = blk_by_name(device); @@ -3681,7 +3693,7 @@ void qmp_blockdev_mirror(const char *device, const char *target, bdrv_set_aio_context(target_bs, aio_context); blockdev_mirror_common(bs, target_bs, - has_replaces, replaces, sync, + has_replaces, replaces, sync, backing_mode, has_speed, speed, has_granularity, granularity, has_buf_size, buf_size, @@ -4154,22 +4166,18 @@ void qmp_x_blockdev_change(const char *parent, bool has_child, BlockJobInfoList *qmp_query_block_jobs(Error **errp) { BlockJobInfoList *head = NULL, **p_next = &head; - BlockDriverState *bs; - BdrvNextIterator it; + BlockJob *job; - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { - AioContext *aio_context = bdrv_get_aio_context(bs); + for (job = block_job_next(NULL); job; job = block_job_next(job)) { + BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1); + AioContext *aio_context = blk_get_aio_context(job->blk); aio_context_acquire(aio_context); - - if (bs->job) { - BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1); - elem->value = block_job_query(bs->job); - *p_next = elem; - p_next = &elem->next; - } - + elem->value = block_job_query(job); aio_context_release(aio_context); + + *p_next = elem; + p_next = &elem->next; } return head; diff --git a/blockjob.c b/blockjob.c index c095cc57cb..01b896b7e7 100644 --- a/blockjob.c +++ b/blockjob.c @@ -361,10 +361,12 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) } job->busy = false; + if (!block_job_is_paused(job)) { + co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); + } + /* The job can be paused while sleeping, so check this again */ if (block_job_is_paused(job)) { qemu_coroutine_yield(); - } else { - co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); } job->busy = true; } diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c index 898ee05472..41a1309296 100644 --- a/bsd-user/elfload.c +++ b/bsd-user/elfload.c @@ -1,7 +1,6 @@ /* This is the Linux kernel elf-loading code, ported into user space */ #include "qemu/osdep.h" -#include <sys/mman.h> #include "qemu.h" #include "disas/disas.h" diff --git a/bsd-user/main.c b/bsd-user/main.c index 9f592be96f..abe9a26f9b 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -18,7 +18,6 @@ */ #include "qemu/osdep.h" #include <machine/trap.h> -#include <sys/mman.h> #include "qemu.h" #include "qemu/path.h" diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c index 6ab5334702..610f91b285 100644 --- a/bsd-user/mmap.c +++ b/bsd-user/mmap.c @@ -17,7 +17,6 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" -#include <sys/mman.h> #include "qemu.h" #include "qemu-common.h" diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c index 47cf865a32..a9fe8693c1 100644 --- a/bsd-user/syscall.c +++ b/bsd-user/syscall.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" #include "qemu/path.h" -#include <sys/mman.h> #include <sys/syscall.h> #include <sys/param.h> #include <sys/sysctl.h> @@ -270,7 +270,6 @@ aix="no" blobs="yes" pkgversion="" pie="" -zero_malloc="" qom_cast_debug="yes" trace_backends="log" trace_file="trace" @@ -1389,11 +1388,9 @@ fi # Consult white-list to determine whether to enable werror # by default. Only enable by default for git builds -z_version=$(cut -f3 -d. $source_path/VERSION) - if test -z "$werror" ; then if test -d "$source_path/.git" -a \ - "$linux" = "yes" ; then + \( "$linux" = "yes" -o "$mingw32" = "yes" \) ; then werror="yes" else werror="no" @@ -1782,13 +1779,20 @@ fi # avx2 optimization requirement check cat > $TMPC << EOF -static void bar(void) {} +#pragma GCC push_options +#pragma GCC target("avx2") +#include <cpuid.h> +#include <immintrin.h> + +static int bar(void *a) { + return _mm256_movemask_epi8(_mm256_cmpeq_epi8(*(__m256i *)a, (__m256i){0})); +} static void *bar_ifunc(void) {return (void*) bar;} -static void foo(void) __attribute__((ifunc("bar_ifunc"))); -int main(void) { foo(); return 0; } +int foo(void *a) __attribute__((ifunc("bar_ifunc"))); +int main(int argc, char *argv[]) { return foo(argv[0]);} EOF -if compile_prog "-mavx2" "" ; then - if readelf --syms $TMPE |grep "IFUNC.*foo" >/dev/null 2>&1; then +if compile_object "" ; then + if readelf --syms $TMPO |grep "IFUNC.*foo" >/dev/null 2>&1; then avx2_opt="yes" fi fi @@ -4178,24 +4182,6 @@ if compile_prog "" "" ; then fi ########################################## -# check if we have usable SIGEV_THREAD_ID - -sigev_thread_id=no -cat > $TMPC << EOF -#include <signal.h> -int main(void) { - struct sigevent ev; - ev.sigev_notify = SIGEV_THREAD_ID; - ev._sigev_un._tid = 0; - asm volatile("" : : "g"(&ev)); - return 0; -} -EOF -if compile_prog "" "" ; then - sigev_thread_id=yes -fi - -########################################## # check if trace backend exists $python "$source_path/scripts/tracetool.py" "--backends=$trace_backends" --check-backends > /dev/null 2> /dev/null @@ -4574,16 +4560,6 @@ if test "$libnfs" != "no" ; then fi fi -# Disable zero malloc errors for official releases unless explicitly told to -# enable/disable -if test -z "$zero_malloc" ; then - if test "$z_version" = "50" ; then - zero_malloc="no" - else - zero_malloc="yes" - fi -fi - # Now we've finished running tests it's OK to add -Werror to the compiler flags if test "$werror" = "yes"; then QEMU_CFLAGS="-Werror $QEMU_CFLAGS" @@ -4862,7 +4838,6 @@ echo "preadv support $preadv" echo "fdatasync $fdatasync" echo "madvise $madvise" echo "posix_madvise $posix_madvise" -echo "sigev_thread_id $sigev_thread_id" echo "uuid support $uuid" echo "libcap-ng support $cap_ng" echo "vhost-net support $vhost_net" @@ -5277,9 +5252,6 @@ fi if test "$posix_madvise" = "yes" ; then echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak fi -if test "$sigev_thread_id" = "yes" ; then - echo "CONFIG_SIGEV_THREAD_ID=y" >> $config_host_mak -fi if test "$spice" = "yes" ; then echo "CONFIG_SPICE=y" >> $config_host_mak @@ -5342,9 +5314,6 @@ if [ "$bsd" = "yes" ] ; then echo "CONFIG_BSD=y" >> $config_host_mak fi -if test "$zero_malloc" = "yes" ; then - echo "CONFIG_ZERO_MALLOC=y" >> $config_host_mak -fi if test "$localtime_r" = "yes" ; then echo "CONFIG_LOCALTIME_R=y" >> $config_host_mak fi diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c index bf4ee0b2e2..e2f295bd43 100644 --- a/contrib/ivshmem-server/ivshmem-server.c +++ b/contrib/ivshmem-server/ivshmem-server.c @@ -10,7 +10,6 @@ #include "qemu/host-utils.h" #include "qemu/sockets.h" -#include <sys/mman.h> #include <sys/socket.h> #include <sys/un.h> @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #ifndef _WIN32 -#include <sys/mman.h> #endif #include "qemu/cutils.h" @@ -217,6 +217,10 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n", info->ram->dirty_pages_rate); } + if (info->ram->postcopy_requests) { + monitor_printf(mon, "postcopy request count: %" PRIu64 "\n", + info->ram->postcopy_requests); + } } if (info->has_disk) { @@ -1951,7 +1955,12 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) blk = blk_by_name(device); if (blk) { + AioContext *aio_context = blk_get_aio_context(blk); + aio_context_acquire(aio_context); + qemuio_command(blk, command); + + aio_context_release(aio_context); } else { error_set(&err, ERROR_CLASS_DEVICE_NOT_FOUND, "Device '%s' not found", device); diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 4c856f5278..51d8596056 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -900,7 +900,7 @@ static int m25p80_init(SSISlave *ss) s->storage = blk_blockalign(s->blk, s->size); /* FIXME: Move to late init */ - if (blk_pread(s->blk, 0, s->storage, s->size)) { + if (blk_pread(s->blk, 0, s->storage, s->size) != s->size) { fprintf(stderr, "Failed to initialize SPI flash!\n"); return 1; } diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index cf57814fb6..90aca73121 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -21,7 +21,6 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <sys/uio.h> #include "hw/hw.h" diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c index cbf1dccbb1..83108b0bdb 100644 --- a/hw/char/xen_console.c +++ b/hw/char/xen_console.c @@ -22,7 +22,6 @@ #include "qemu/osdep.h" #include <sys/select.h> #include <termios.h> -#include <sys/mman.h> #include "hw/hw.h" #include "sysemu/char.h" diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c index 570b0977c3..46b7d5eded 100644 --- a/hw/display/xenfb.c +++ b/hw/display/xenfb.c @@ -25,7 +25,6 @@ */ #include "qemu/osdep.h" -#include <sys/mman.h> #include "hw/hw.h" #include "ui/console.h" diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c index bceed091e8..f9c901471d 100644 --- a/hw/i386/kvm/pci-assign.c +++ b/hw/i386/kvm/pci-assign.c @@ -22,7 +22,6 @@ */ #include "qemu/osdep.h" #include "qapi/error.h" -#include <sys/mman.h> #include "hw/hw.h" #include "hw/i386/pc.h" #include "qemu/error-report.h" diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 82c7c0a5fa..53bc968bd0 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -432,13 +432,25 @@ static void pc_i440fx_machine_options(MachineClass *m) m->default_display = "std"; } -static void pc_i440fx_2_6_machine_options(MachineClass *m) +static void pc_i440fx_2_7_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; } +DEFINE_I440FX_MACHINE(v2_7, "pc-i440fx-2.7", NULL, + pc_i440fx_2_7_machine_options); + + +static void pc_i440fx_2_6_machine_options(MachineClass *m) +{ + pc_i440fx_2_7_machine_options(m); + m->is_default = 0; + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_6); +} + DEFINE_I440FX_MACHINE(v2_6, "pc-i440fx-2.6", NULL, pc_i440fx_2_6_machine_options); @@ -447,8 +459,6 @@ static void pc_i440fx_2_5_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_2_6_machine_options(m); - m->alias = NULL; - m->is_default = 0; pcmc->save_tsc_khz = false; m->legacy_fw_cfg_order = 1; SET_MACHINE_COMPAT(m, PC_COMPAT_2_5); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 31a6a59383..e4b541f7b2 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -283,12 +283,22 @@ static void pc_q35_machine_options(MachineClass *m) m->no_floppy = 1; } -static void pc_q35_2_6_machine_options(MachineClass *m) +static void pc_q35_2_7_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v2_7, "pc-q35-2.7", NULL, + pc_q35_2_7_machine_options); + +static void pc_q35_2_6_machine_options(MachineClass *m) +{ + pc_q35_2_7_machine_options(m); + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_6); +} + DEFINE_Q35_MACHINE(v2_6, "pc-q35-2.6", NULL, pc_q35_2_6_machine_options); @@ -296,7 +306,6 @@ static void pc_q35_2_5_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_2_6_machine_options(m); - m->alias = NULL; pcmc->save_tsc_khz = false; m->legacy_fw_cfg_order = 1; SET_MACHINE_COMPAT(m, PC_COMPAT_2_5); diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 90be9f7617..c4dde3a52e 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -36,8 +36,6 @@ #include "hw/misc/ivshmem.h" -#include <sys/mman.h> - #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET #define PCI_DEVICE_ID_IVSHMEM 0x1110 diff --git a/hw/misc/pc-testdev.c b/hw/misc/pc-testdev.c index 086893dcca..b81d820084 100644 --- a/hw/misc/pc-testdev.c +++ b/hw/misc/pc-testdev.c @@ -36,9 +36,6 @@ */ #include "qemu/osdep.h" -#if defined(CONFIG_POSIX) -#include <sys/mman.h> -#endif #include "hw/hw.h" #include "hw/qdev.h" #include "hw/isa/isa.h" diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c index e4478bead8..efd43b47b8 100644 --- a/hw/net/net_tx_pkt.c +++ b/hw/net/net_tx_pkt.c @@ -15,6 +15,7 @@ * */ +#include "qemu/osdep.h" #include "net_tx_pkt.h" #include "net/eth.h" #include "net/checksum.h" diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h index 07b9a2098b..212ecc62fc 100644 --- a/hw/net/net_tx_pkt.h +++ b/hw/net/net_tx_pkt.h @@ -18,7 +18,6 @@ #ifndef NET_TX_PKT_H #define NET_TX_PKT_H -#include "qemu/osdep.h" #include "net/eth.h" #include "exec/hwaddr.h" diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 7281730d94..0b4ddae48c 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -22,7 +22,6 @@ #include "qemu/osdep.h" #include <sys/socket.h> #include <sys/ioctl.h> -#include <sys/mman.h> #include <sys/wait.h> #include "hw/hw.h" diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c index 3adb685177..baa0a2cfdf 100644 --- a/hw/scsi/esp.c +++ b/hw/scsi/esp.c @@ -98,6 +98,9 @@ static uint32_t get_cmd(ESPState *s, uint8_t *buf, uint8_t buflen) s->dma_memory_read(s->dma_opaque, buf, dmalen); } else { dmalen = s->ti_size; + if (dmalen > TI_BUFSZ) { + return 0; + } memcpy(buf, s->ti_buf, dmalen); buf[0] = buf[2] >> 5; } @@ -219,7 +222,7 @@ static void write_response(ESPState *s) } else { s->ti_size = 2; s->ti_rptr = 0; - s->ti_wptr = 0; + s->ti_wptr = 2; s->rregs[ESP_RFLAGS] = 2; } esp_raise_irq(s); @@ -242,15 +245,12 @@ static void esp_do_dma(ESPState *s) uint32_t len; int to_device; - to_device = (s->ti_size < 0); len = s->dma_left; if (s->do_cmd) { trace_esp_do_dma(s->cmdlen, len); + assert (s->cmdlen <= sizeof(s->cmdbuf) && + len <= sizeof(s->cmdbuf) - s->cmdlen); s->dma_memory_read(s->dma_opaque, &s->cmdbuf[s->cmdlen], len); - s->ti_size = 0; - s->cmdlen = 0; - s->do_cmd = 0; - do_cmd(s, s->cmdbuf); return; } if (s->async_len == 0) { @@ -260,6 +260,7 @@ static void esp_do_dma(ESPState *s) if (len > s->async_len) { len = s->async_len; } + to_device = (s->ti_size < 0); if (to_device) { s->dma_memory_read(s->dma_opaque, s->async_buf, len); } else { @@ -315,6 +316,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len) { ESPState *s = req->hba_private; + assert(!s->do_cmd); trace_esp_transfer_data(s->dma_left, s->ti_size); s->async_len = len; s->async_buf = scsi_req_get_buf(req); @@ -345,7 +347,7 @@ static void handle_ti(ESPState *s) s->dma_counter = dmalen; if (s->do_cmd) - minlen = (dmalen < 32) ? dmalen : 32; + minlen = (dmalen < ESP_CMDBUF_SZ) ? dmalen : ESP_CMDBUF_SZ; else if (s->ti_size < 0) minlen = (dmalen < -s->ti_size) ? dmalen : -s->ti_size; else @@ -355,13 +357,13 @@ static void handle_ti(ESPState *s) s->dma_left = minlen; s->rregs[ESP_RSTAT] &= ~STAT_TC; esp_do_dma(s); - } else if (s->do_cmd) { + } + if (s->do_cmd) { trace_esp_handle_ti_cmd(s->cmdlen); s->ti_size = 0; s->cmdlen = 0; s->do_cmd = 0; do_cmd(s, s->cmdbuf); - return; } } @@ -449,7 +451,7 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val) break; case ESP_FIFO: if (s->do_cmd) { - if (s->cmdlen < TI_BUFSZ) { + if (s->cmdlen < ESP_CMDBUF_SZ) { s->cmdbuf[s->cmdlen++] = val & 0xff; } else { trace_esp_error_fifo_overrun(); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 188196990e..36f8a85a70 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2060,13 +2060,13 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) } break; case MODE_SELECT: - DPRINTF("Mode Select(6) (len %lu)\n", (long)r->req.cmd.xfer); + DPRINTF("Mode Select(6) (len %lu)\n", (unsigned long)r->req.cmd.xfer); break; case MODE_SELECT_10: - DPRINTF("Mode Select(10) (len %lu)\n", (long)r->req.cmd.xfer); + DPRINTF("Mode Select(10) (len %lu)\n", (unsigned long)r->req.cmd.xfer); break; case UNMAP: - DPRINTF("Unmap (len %lu)\n", (long)r->req.cmd.xfer); + DPRINTF("Unmap (len %lu)\n", (unsigned long)r->req.cmd.xfer); break; case VERIFY_10: case VERIFY_12: @@ -2080,7 +2080,7 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) case WRITE_SAME_16: DPRINTF("WRITE SAME %d (len %lu)\n", req->cmd.buf[0] == WRITE_SAME_10 ? 10 : 16, - (long)r->req.cmd.xfer); + (unsigned long)r->req.cmd.xfer); break; default: DPRINTF("Unknown SCSI command (%2.2x=%s)\n", buf[0], diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c index 8fa47edd9a..0fd34c62c4 100644 --- a/hw/usb/xen-usb.c +++ b/hw/usb/xen-usb.c @@ -21,7 +21,6 @@ #include "qemu/osdep.h" #include <libusb.h> -#include <sys/mman.h> #include "qemu-common.h" #include "qemu/config-file.h" diff --git a/hw/vfio/common.c b/hw/vfio/common.c index e51ed3a348..5ff5e9220a 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <linux/vfio.h> #include "hw/vfio/vfio-common.h" diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index deab0c601a..53b87b76ea 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -21,7 +21,6 @@ #include "qemu/osdep.h" #include <linux/vfio.h> #include <sys/ioctl.h> -#include <sys/mman.h> #include "hw/pci/msi.h" #include "hw/pci/msix.h" diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 8c15e09470..557d3f9e0c 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -27,10 +27,6 @@ #include "qapi-event.h" #include "trace.h" -#if defined(__linux__) -#include <sys/mman.h> -#endif - #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" diff --git a/hw/xen/xen_backend.c b/hw/xen/xen_backend.c index c63f9df38b..e7ce724567 100644 --- a/hw/xen/xen_backend.c +++ b/hw/xen/xen_backend.c @@ -23,7 +23,6 @@ */ #include "qemu/osdep.h" -#include <sys/mman.h> #include <sys/signal.h> #include "hw/hw.h" diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c index 9a16f2bff1..62add0639f 100644 --- a/hw/xen/xen_pt_msi.c +++ b/hw/xen/xen_pt_msi.c @@ -10,7 +10,6 @@ */ #include "qemu/osdep.h" -#include <sys/mman.h> #include "hw/xen/xen_backend.h" #include "xen_pt.h" diff --git a/include/block/block.h b/include/block/block.h index 54cca28bac..733a8ec2ec 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -65,6 +65,9 @@ typedef enum { BDRV_REQ_MAY_UNMAP = 0x4, BDRV_REQ_NO_SERIALISING = 0x8, BDRV_REQ_FUA = 0x10, + + /* Mask of valid flags */ + BDRV_REQ_MASK = 0x1f, } BdrvRequestFlags; typedef struct BlockSizes { @@ -232,6 +235,7 @@ int bdrv_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags); int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count); +int bdrv_preadv(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov); int bdrv_pwrite(BlockDriverState *bs, int64_t offset, const void *buf, int count); int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov); @@ -401,10 +405,14 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs); +void bdrv_round_sectors_to_clusters(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + int64_t *cluster_sector_num, + int *cluster_nb_sectors); void bdrv_round_to_clusters(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - int64_t *cluster_sector_num, - int *cluster_nb_sectors); + int64_t offset, unsigned int bytes, + int64_t *cluster_offset, + unsigned int *cluster_bytes); const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, @@ -423,6 +431,7 @@ void path_combine(char *dest, int dest_size, const char *base_path, const char *filename); +int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size); diff --git a/include/block/block_int.h b/include/block/block_int.h index 8a4963c4fe..688c6be009 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -224,10 +224,12 @@ struct BlockDriver { int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs); - int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t pos); - int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf, - int64_t pos, int size); + int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, + QEMUIOVector *qiov, + int64_t pos); + int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, + QEMUIOVector *qiov, + int64_t pos); int (*bdrv_change_backing_file)(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); @@ -449,9 +451,6 @@ struct BlockDriverState { /* I/O Limits */ BlockLimits bl; - /* Whether produces zeros when read beyond eof */ - bool zero_beyond_eof; - /* Alignment requirement for offset/length of I/O requests */ unsigned int request_alignment; /* Flags honored during pwrite (so far: BDRV_REQ_FUA) */ @@ -510,6 +509,20 @@ struct BlockBackendRootState { BlockdevDetectZeroesOptions detect_zeroes; }; +typedef enum BlockMirrorBackingMode { + /* Reuse the existing backing chain from the source for the target. + * - sync=full: Set backing BDS to NULL. + * - sync=top: Use source's backing BDS. + * - sync=none: Use source as the backing BDS. */ + MIRROR_SOURCE_BACKING_CHAIN, + + /* Open the target's backing chain completely anew */ + MIRROR_OPEN_BACKING_CHAIN, + + /* Do not change the target's backing BDS after job completion */ + MIRROR_LEAVE_BACKING_CHAIN, +} BlockMirrorBackingMode; + static inline BlockDriverState *backing_bs(BlockDriverState *bs) { return bs->backing ? bs->backing->bs : NULL; @@ -672,6 +685,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, * @granularity: The chosen granularity for the dirty bitmap. * @buf_size: The amount of data that can be in flight at one time. * @mode: Whether to collapse all images in the chain to the target. + * @backing_mode: How to establish the target's backing chain after completion. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. * @unmap: Whether to unmap target where source sectors only contain zeroes. @@ -687,7 +701,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, void mirror_start(BlockDriverState *bs, BlockDriverState *target, const char *replaces, int64_t speed, uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockdevOnError on_source_error, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, + BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, BlockCompletionFunc *cb, diff --git a/include/block/nbd.h b/include/block/nbd.h index b86a976984..df1f804338 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -25,19 +25,20 @@ #include "io/channel-socket.h" #include "crypto/tlscreds.h" +/* Note: these are _NOT_ the same as the network representation of an NBD + * request and reply! + */ struct nbd_request { - uint32_t magic; - uint32_t type; uint64_t handle; uint64_t from; uint32_t len; -} QEMU_PACKED; + uint32_t type; +}; struct nbd_reply { - uint32_t magic; - uint32_t error; uint64_t handle; -} QEMU_PACKED; + uint32_t error; +}; #define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */ #define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */ @@ -76,6 +77,12 @@ enum { /* Maximum size of a single READ/WRITE data buffer */ #define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024) +/* Maximum size of an export name. The NBD spec requires 256 and + * suggests that servers support up to 4096, but we stick to only the + * required size so that we can stack-allocate the names, and because + * going larger would require an audit of more code to make sure we + * aren't overflowing some other buffer. */ +#define NBD_MAX_NAME_SIZE 256 ssize_t nbd_wr_syncv(QIOChannel *ioc, struct iovec *iov, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 9ca23098bd..49566c89d4 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -356,7 +356,16 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +#define PC_COMPAT_2_6 \ + HW_COMPAT_2_6 \ + {\ + .driver = TYPE_X86_CPU,\ + .property = "cpuid-0xb",\ + .value = "off",\ + }, + #define PC_COMPAT_2_5 \ + PC_COMPAT_2_6 \ HW_COMPAT_2_5 /* Helper for setting model-id for CPU models that changed model-id diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h index 6c795276c9..d2c48869e1 100644 --- a/include/hw/scsi/esp.h +++ b/include/hw/scsi/esp.h @@ -14,6 +14,7 @@ void esp_init(hwaddr espaddr, int it_shift, #define ESP_REGS 16 #define TI_BUFSZ 16 +#define ESP_CMDBUF_SZ 32 typedef struct ESPState ESPState; @@ -31,7 +32,7 @@ struct ESPState { SCSIBus bus; SCSIDevice *current_dev; SCSIRequest *current_req; - uint8_t cmdbuf[TI_BUFSZ]; + uint8_t cmdbuf[ESP_CMDBUF_SZ]; uint32_t cmdlen; uint32_t do_cmd; diff --git a/include/migration/migration.h b/include/migration/migration.h index 13b12b7e87..3c96623d3d 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -160,6 +160,8 @@ struct MigrationState int64_t xbzrle_cache_size; int64_t setup_time; int64_t dirty_sync_count; + /* Count of requests incoming from destination */ + int64_t postcopy_requests; /* Flag set once the migration has been asked to enter postcopy */ bool start_postcopy; @@ -181,25 +183,25 @@ struct MigrationState void migrate_set_state(int *state, int old_state, int new_state); -void process_incoming_migration(QEMUFile *f); +void migration_fd_process_incoming(QEMUFile *f); void qemu_start_incoming_migration(const char *uri, Error **errp); -void migration_set_incoming_channel(MigrationState *s, - QIOChannel *ioc); +void migration_channel_process_incoming(MigrationState *s, + QIOChannel *ioc); -void migration_tls_set_incoming_channel(MigrationState *s, - QIOChannel *ioc, - Error **errp); +void migration_tls_channel_process_incoming(MigrationState *s, + QIOChannel *ioc, + Error **errp); -void migration_set_outgoing_channel(MigrationState *s, - QIOChannel *ioc, - const char *hostname); +void migration_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname); -void migration_tls_set_outgoing_channel(MigrationState *s, - QIOChannel *ioc, - const char *hostname, - Error **errp); +void migration_tls_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error **errp); uint64_t migrate_max_downtime(void); diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 693769403f..e63da2831a 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -197,8 +197,6 @@ void qemu_anon_ram_free(void *ptr, size_t size); #if defined(CONFIG_MADVISE) -#include <sys/mman.h> - #define QEMU_MADV_WILLNEED MADV_WILLNEED #define QEMU_MADV_DONTNEED MADV_DONTNEED #ifdef MADV_DONTFORK diff --git a/include/qemu/qdist.h b/include/qemu/qdist.h index f30050c2d1..54ece760d6 100644 --- a/include/qemu/qdist.h +++ b/include/qemu/qdist.h @@ -7,7 +7,6 @@ #ifndef QEMU_QDIST_H #define QEMU_QDIST_H -#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/bitops.h" diff --git a/include/qemu/qht.h b/include/qemu/qht.h index aec60aa534..70bfc68b8d 100644 --- a/include/qemu/qht.h +++ b/include/qemu/qht.h @@ -7,7 +7,6 @@ #ifndef QEMU_QHT_H #define QEMU_QHT_H -#include "qemu/osdep.h" #include "qemu/seqlock.h" #include "qemu/thread.h" #include "qemu/qdist.h" diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 07e3e5ae9b..9c7dfdfbec 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -26,6 +26,7 @@ #ifndef QEMU_OS_POSIX_H #define QEMU_OS_POSIX_H +#include <sys/mman.h> #include <sys/socket.h> #include <netinet/in.h> #include <netinet/tcp.h> @@ -15,7 +15,6 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <linux/kvm.h> @@ -1520,10 +1519,16 @@ static int kvm_max_vcpus(KVMState *s) return (ret) ? ret : kvm_recommended_vcpus(s); } +static int kvm_max_vcpu_id(KVMState *s) +{ + int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPU_ID); + return (ret) ? ret : kvm_max_vcpus(s); +} + bool kvm_vcpu_id_is_valid(int vcpu_id) { KVMState *s = KVM_STATE(current_machine->accelerator); - return vcpu_id >= 0 && vcpu_id < kvm_max_vcpus(s); + return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s); } static int kvm_init(MachineState *ms) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index bb2558f284..f807baf389 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -2,7 +2,6 @@ #include "qemu/osdep.h" #include <sys/param.h> -#include <sys/mman.h> #include <sys/resource.h> #include "qemu.h" diff --git a/linux-user/flatload.c b/linux-user/flatload.c index f9139c399a..48ad1c5e9e 100644 --- a/linux-user/flatload.c +++ b/linux-user/flatload.c @@ -34,7 +34,6 @@ /****************************************************************************/ #include "qemu/osdep.h" -#include <sys/mman.h> #include "qemu.h" #include "flat.h" diff --git a/linux-user/main.c b/linux-user/main.c index f8a8764ae9..b9a4e0ea45 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -18,7 +18,6 @@ */ #include "qemu/osdep.h" #include "qemu-version.h" -#include <sys/mman.h> #include <sys/syscall.h> #include <sys/resource.h> diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 3519147bce..c4371d943a 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -17,7 +17,6 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" -#include <sys/mman.h> #include <linux/mman.h> #include <linux/unistd.h> diff --git a/linux-user/strace.c b/linux-user/strace.c index c5980a128c..4046b81705 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -5,7 +5,6 @@ #include <sys/shm.h> #include <sys/select.h> #include <sys/mount.h> -#include <sys/mman.h> #include <sched.h> #include "qemu.h" diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 71ccbd9c5e..1c17b741c2 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -32,7 +32,6 @@ #include <sys/personality.h> #include <sys/prctl.h> #include <sys/resource.h> -#include <sys/mman.h> #include <sys/swap.h> #include <linux/capability.h> #include <sched.h> diff --git a/migration/exec.c b/migration/exec.c index 1515cc3319..2af63cced6 100644 --- a/migration/exec.c +++ b/migration/exec.c @@ -38,7 +38,7 @@ void exec_start_outgoing_migration(MigrationState *s, const char *command, Error return; } - migration_set_outgoing_channel(s, ioc, NULL); + migration_channel_connect(s, ioc, NULL); object_unref(OBJECT(ioc)); } @@ -46,7 +46,7 @@ static gboolean exec_accept_incoming_migration(QIOChannel *ioc, GIOCondition condition, gpointer opaque) { - migration_set_incoming_channel(migrate_get_current(), ioc); + migration_channel_process_incoming(migrate_get_current(), ioc); object_unref(OBJECT(ioc)); return FALSE; /* unregister */ } diff --git a/migration/fd.c b/migration/fd.c index fc5c9eee02..84a10fd68f 100644 --- a/migration/fd.c +++ b/migration/fd.c @@ -38,7 +38,7 @@ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error ** return; } - migration_set_outgoing_channel(s, ioc, NULL); + migration_channel_connect(s, ioc, NULL); object_unref(OBJECT(ioc)); } @@ -46,7 +46,7 @@ static gboolean fd_accept_incoming_migration(QIOChannel *ioc, GIOCondition condition, gpointer opaque) { - migration_set_incoming_channel(migrate_get_current(), ioc); + migration_channel_process_incoming(migrate_get_current(), ioc); object_unref(OBJECT(ioc)); return FALSE; /* unregister */ } diff --git a/migration/migration.c b/migration/migration.c index 7ecbadee6f..20f88757d8 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -416,7 +416,7 @@ static void process_incoming_migration_co(void *opaque) qemu_bh_schedule(mis->bh); } -void process_incoming_migration(QEMUFile *f) +void migration_fd_process_incoming(QEMUFile *f) { Coroutine *co = qemu_coroutine_create(process_incoming_migration_co); @@ -426,8 +426,8 @@ void process_incoming_migration(QEMUFile *f) } -void migration_set_incoming_channel(MigrationState *s, - QIOChannel *ioc) +void migration_channel_process_incoming(MigrationState *s, + QIOChannel *ioc) { trace_migration_set_incoming_channel( ioc, object_get_typename(OBJECT(ioc))); @@ -436,20 +436,20 @@ void migration_set_incoming_channel(MigrationState *s, !object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { Error *local_err = NULL; - migration_tls_set_incoming_channel(s, ioc, &local_err); + migration_tls_channel_process_incoming(s, ioc, &local_err); if (local_err) { error_report_err(local_err); } } else { QEMUFile *f = qemu_fopen_channel_input(ioc); - process_incoming_migration(f); + migration_fd_process_incoming(f); } } -void migration_set_outgoing_channel(MigrationState *s, - QIOChannel *ioc, - const char *hostname) +void migration_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname) { trace_migration_set_outgoing_channel( ioc, object_get_typename(OBJECT(ioc)), hostname); @@ -458,7 +458,7 @@ void migration_set_outgoing_channel(MigrationState *s, !object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { Error *local_err = NULL; - migration_tls_set_outgoing_channel(s, ioc, hostname, &local_err); + migration_tls_channel_connect(s, ioc, hostname, &local_err); if (local_err) { migrate_fd_error(s, local_err); error_free(local_err); @@ -602,6 +602,26 @@ static void get_xbzrle_cache_stats(MigrationInfo *info) } } +static void populate_ram_info(MigrationInfo *info, MigrationState *s) +{ + info->has_ram = true; + info->ram = g_malloc0(sizeof(*info->ram)); + info->ram->transferred = ram_bytes_transferred(); + info->ram->total = ram_bytes_total(); + info->ram->duplicate = dup_mig_pages_transferred(); + info->ram->skipped = skipped_mig_pages_transferred(); + info->ram->normal = norm_mig_pages_transferred(); + info->ram->normal_bytes = norm_mig_bytes_transferred(); + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = s->dirty_sync_count; + info->ram->postcopy_requests = s->postcopy_requests; + + if (s->state != MIGRATION_STATUS_COMPLETED) { + info->ram->remaining = ram_bytes_remaining(); + info->ram->dirty_pages_rate = s->dirty_pages_rate; + } +} + MigrationInfo *qmp_query_migrate(Error **errp) { MigrationInfo *info = g_malloc0(sizeof(*info)); @@ -626,18 +646,7 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->has_setup_time = true; info->setup_time = s->setup_time; - info->has_ram = true; - info->ram = g_malloc0(sizeof(*info->ram)); - info->ram->transferred = ram_bytes_transferred(); - info->ram->remaining = ram_bytes_remaining(); - info->ram->total = ram_bytes_total(); - info->ram->duplicate = dup_mig_pages_transferred(); - info->ram->skipped = skipped_mig_pages_transferred(); - info->ram->normal = norm_mig_pages_transferred(); - info->ram->normal_bytes = norm_mig_bytes_transferred(); - info->ram->dirty_pages_rate = s->dirty_pages_rate; - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = s->dirty_sync_count; + populate_ram_info(info, s); if (blk_mig_active()) { info->has_disk = true; @@ -665,18 +674,7 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->has_setup_time = true; info->setup_time = s->setup_time; - info->has_ram = true; - info->ram = g_malloc0(sizeof(*info->ram)); - info->ram->transferred = ram_bytes_transferred(); - info->ram->remaining = ram_bytes_remaining(); - info->ram->total = ram_bytes_total(); - info->ram->duplicate = dup_mig_pages_transferred(); - info->ram->skipped = skipped_mig_pages_transferred(); - info->ram->normal = norm_mig_pages_transferred(); - info->ram->normal_bytes = norm_mig_bytes_transferred(); - info->ram->dirty_pages_rate = s->dirty_pages_rate; - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = s->dirty_sync_count; + populate_ram_info(info, s); if (blk_mig_active()) { info->has_disk = true; @@ -699,17 +697,7 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->has_setup_time = true; info->setup_time = s->setup_time; - info->has_ram = true; - info->ram = g_malloc0(sizeof(*info->ram)); - info->ram->transferred = ram_bytes_transferred(); - info->ram->remaining = 0; - info->ram->total = ram_bytes_total(); - info->ram->duplicate = dup_mig_pages_transferred(); - info->ram->skipped = skipped_mig_pages_transferred(); - info->ram->normal = norm_mig_pages_transferred(); - info->ram->normal_bytes = norm_mig_bytes_transferred(); - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = s->dirty_sync_count; + populate_ram_info(info, s); break; case MIGRATION_STATUS_FAILED: info->has_status = true; @@ -732,6 +720,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, { MigrationState *s = migrate_get_current(); MigrationCapabilityStatusList *cap; + bool old_postcopy_cap = migrate_postcopy_ram(); if (migration_is_setup_or_active(s->state)) { error_setg(errp, QERR_MIGRATION_ACTIVE); @@ -754,6 +743,19 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] = false; } + /* This check is reasonably expensive, so only when it's being + * set the first time, also it's only the destination that needs + * special support. + */ + if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && + !postcopy_ram_supported_by_host()) { + /* postcopy_ram_supported_by_host will have emitted a more + * detailed message + */ + error_report("Postcopy is not supported"); + s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] = + false; + } } } @@ -1004,6 +1006,7 @@ MigrationState *migrate_init(const MigrationParams *params) s->dirty_sync_count = 0; s->start_postcopy = false; s->postcopy_after_devices = false; + s->postcopy_requests = 0; s->migration_thread_running = false; s->last_req_rb = NULL; error_free(s->error); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 47250b675d..abe8c60a90 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -51,7 +51,6 @@ struct PostcopyDiscardState { #if defined(__linux__) #include <poll.h> -#include <sys/mman.h> #include <sys/ioctl.h> #include <sys/syscall.h> #include <asm/types.h> /* for __u64 */ diff --git a/migration/ram.c b/migration/ram.c index 844ea4694f..42fb8ac6d6 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1169,6 +1169,7 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname, { RAMBlock *ramblock; + ms->postcopy_requests++; rcu_read_lock(); if (!rbname) { /* Reuse last RAMBlock */ @@ -1557,7 +1558,9 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, } else { discard_length = zero - one; } - postcopy_discard_send_range(ms, pds, one, discard_length); + if (discard_length) { + postcopy_discard_send_range(ms, pds, one, discard_length); + } current = one + discard_length; } else { current = one; diff --git a/migration/rdma.c b/migration/rdma.c index 51bafc702b..5110ec828d 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -1511,7 +1511,7 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, while (1) { /* - * Coroutine doesn't start until process_incoming_migration() + * Coroutine doesn't start until migration_fd_process_incoming() * so don't yield unless we know we're running inside of a coroutine. */ if (rdma->migration_started_on_destination) { @@ -3620,7 +3620,7 @@ static void rdma_accept_incoming_migration(void *opaque) } rdma->migration_started_on_destination = 1; - process_incoming_migration(f); + migration_fd_process_incoming(f); } void rdma_start_incoming_migration(const char *host_port, Error **errp) diff --git a/migration/socket.c b/migration/socket.c index 977a8d3c1d..5c0a38f7b9 100644 --- a/migration/socket.c +++ b/migration/socket.c @@ -83,7 +83,7 @@ static void socket_outgoing_migration(Object *src, migrate_fd_error(data->s, err); } else { trace_migration_socket_outgoing_connected(data->hostname); - migration_set_outgoing_channel(data->s, sioc, data->hostname); + migration_channel_connect(data->s, sioc, data->hostname); } object_unref(src); } @@ -140,8 +140,8 @@ static gboolean socket_accept_incoming_migration(QIOChannel *ioc, trace_migration_socket_incoming_accepted(); - migration_set_incoming_channel(migrate_get_current(), - QIO_CHANNEL(sioc)); + migration_channel_process_incoming(migrate_get_current(), + QIO_CHANNEL(sioc)); object_unref(OBJECT(sioc)); out: diff --git a/migration/tls.c b/migration/tls.c index 75f959ff9c..12c053d15f 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -72,14 +72,14 @@ static void migration_tls_incoming_handshake(Object *src, error_report("%s", error_get_pretty(err)); } else { trace_migration_tls_incoming_handshake_complete(); - migration_set_incoming_channel(migrate_get_current(), ioc); + migration_channel_process_incoming(migrate_get_current(), ioc); } object_unref(OBJECT(ioc)); } -void migration_tls_set_incoming_channel(MigrationState *s, - QIOChannel *ioc, - Error **errp) +void migration_tls_channel_process_incoming(MigrationState *s, + QIOChannel *ioc, + Error **errp) { QCryptoTLSCreds *creds; QIOChannelTLS *tioc; @@ -119,16 +119,16 @@ static void migration_tls_outgoing_handshake(Object *src, migrate_fd_error(s, err); } else { trace_migration_tls_outgoing_handshake_complete(); - migration_set_outgoing_channel(s, ioc, NULL); + migration_channel_connect(s, ioc, NULL); } object_unref(OBJECT(ioc)); } -void migration_tls_set_outgoing_channel(MigrationState *s, - QIOChannel *ioc, - const char *hostname, - Error **errp) +void migration_tls_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error **errp) { QCryptoTLSCreds *creds; QIOChannelTLS *tioc; diff --git a/nbd/client.c b/nbd/client.c index 31b88f3a31..287487c6c2 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -33,8 +33,10 @@ static int nbd_errno_to_system_errno(int err) return ENOMEM; case NBD_ENOSPC: return ENOSPC; - case NBD_EINVAL: default: + TRACE("Squashing unexpected error %d to EINVAL", err); + /* fallthrough */ + case NBD_EINVAL: return EINVAL; } } @@ -109,25 +111,27 @@ static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type, switch (type) { case NBD_REP_ERR_UNSUP: - TRACE("server doesn't understand request %d, attempting fallback", - opt); + TRACE("server doesn't understand request %" PRIx32 + ", attempting fallback", opt); result = 0; goto cleanup; case NBD_REP_ERR_POLICY: - error_setg(errp, "Denied by server for option %x", opt); + error_setg(errp, "Denied by server for option %" PRIx32, opt); break; case NBD_REP_ERR_INVALID: - error_setg(errp, "Invalid data length for option %x", opt); + error_setg(errp, "Invalid data length for option %" PRIx32, opt); break; case NBD_REP_ERR_TLS_REQD: - error_setg(errp, "TLS negotiation required before option %x", opt); + error_setg(errp, "TLS negotiation required before option %" PRIx32, + opt); break; default: - error_setg(errp, "Unknown error code when asking for option %x", opt); + error_setg(errp, "Unknown error code when asking for option %" PRIx32, + opt); break; } @@ -165,7 +169,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) } opt = be32_to_cpu(opt); if (opt != NBD_OPT_LIST) { - error_setg(errp, "Unexpected option type %x expected %x", + error_setg(errp, "Unexpected option type %" PRIx32 " expected %x", opt, NBD_OPT_LIST); return -1; } @@ -206,8 +210,8 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) error_setg(errp, "incorrect option name length"); return -1; } - if (namelen > 255) { - error_setg(errp, "export name length too long %d", namelen); + if (namelen > NBD_MAX_NAME_SIZE) { + error_setg(errp, "export name length too long %" PRIu32, namelen); return -1; } @@ -234,7 +238,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) g_free(buf); } } else { - error_setg(errp, "Unexpected reply type %x expected %x", + error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x", type, NBD_REP_SERVER); return -1; } @@ -349,7 +353,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, } opt = be32_to_cpu(opt); if (opt != NBD_OPT_STARTTLS) { - error_setg(errp, "Unexpected option type %x expected %x", + error_setg(errp, "Unexpected option type %" PRIx32 " expected %x", opt, NBD_OPT_STARTTLS); return NULL; } @@ -361,7 +365,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, } type = be32_to_cpu(type); if (type != NBD_REP_ACK) { - error_setg(errp, "Server rejected request to start TLS %x", + error_setg(errp, "Server rejected request to start TLS %" PRIx32, type); return NULL; } @@ -373,7 +377,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, } length = be32_to_cpu(length); if (length != 0) { - error_setg(errp, "Start TLS response was not zero %x", + error_setg(errp, "Start TLS response was not zero %" PRIu32, length); return NULL; } @@ -384,7 +388,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, return NULL; } data.loop = g_main_loop_new(g_main_context_default(), FALSE); - TRACE("Starting TLS hanshake"); + TRACE("Starting TLS handshake"); qio_channel_tls_handshake(tioc, nbd_tls_handshake, &data, @@ -474,7 +478,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, } globalflags = be16_to_cpu(globalflags); *flags = globalflags << 16; - TRACE("Global flags are %x", globalflags); + TRACE("Global flags are %" PRIx32, globalflags); if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) { fixedNewStyle = true; TRACE("Server supports fixed new style"); @@ -550,7 +554,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, } exportflags = be16_to_cpu(exportflags); *flags |= exportflags; - TRACE("Export flags are %x", exportflags); + TRACE("Export flags are %" PRIx16, exportflags); } else if (magic == NBD_CLIENT_MAGIC) { if (name) { error_setg(errp, "Server does not support export names"); @@ -572,7 +576,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, error_setg(errp, "Failed to read export flags"); goto fail; } - *flags = be32_to_cpup(flags); + *flags = be32_to_cpu(*flags); } else { error_setg(errp, "Bad magic received"); goto fail; @@ -591,9 +595,15 @@ fail: #ifdef __linux__ int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size) { + unsigned long sectors = size / BDRV_SECTOR_SIZE; + if (size / BDRV_SECTOR_SIZE != sectors) { + LOG("Export size %lld too large for 32-bit kernel", (long long) size); + return -E2BIG; + } + TRACE("Setting NBD socket"); - if (ioctl(fd, NBD_SET_SOCK, sioc->fd) < 0) { + if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) { int serrno = errno; LOG("Failed to set NBD socket"); return -serrno; @@ -601,21 +611,25 @@ int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size) TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE); - if (ioctl(fd, NBD_SET_BLKSIZE, (size_t)BDRV_SECTOR_SIZE) < 0) { + if (ioctl(fd, NBD_SET_BLKSIZE, (unsigned long)BDRV_SECTOR_SIZE) < 0) { int serrno = errno; LOG("Failed setting NBD block size"); return -serrno; } - TRACE("Setting size to %zd block(s)", (size_t)(size / BDRV_SECTOR_SIZE)); + TRACE("Setting size to %lu block(s)", sectors); + if (size % BDRV_SECTOR_SIZE) { + TRACE("Ignoring trailing %d bytes of export", + (int) (size % BDRV_SECTOR_SIZE)); + } - if (ioctl(fd, NBD_SET_SIZE_BLOCKS, (size_t)(size / BDRV_SECTOR_SIZE)) < 0) { + if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) { int serrno = errno; LOG("Failed setting size (in blocks)"); return -serrno; } - if (ioctl(fd, NBD_SET_FLAGS, flags) < 0) { + if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) flags) < 0) { if (errno == ENOTTY) { int read_only = (flags & NBD_FLAG_READ_ONLY) != 0; TRACE("Setting readonly attribute"); @@ -665,6 +679,15 @@ int nbd_client(int fd) errno = serrno; return ret; } + +int nbd_disconnect(int fd) +{ + ioctl(fd, NBD_CLEAR_QUE); + ioctl(fd, NBD_DISCONNECT); + ioctl(fd, NBD_CLEAR_SOCK); + return 0; +} + #else int nbd_init(int fd, QIOChannelSocket *ioc, uint32_t flags, off_t size) { @@ -675,6 +698,10 @@ int nbd_client(int fd) { return -ENOTSUP; } +int nbd_disconnect(int fd) +{ + return -ENOTSUP; +} #endif ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request) @@ -683,14 +710,15 @@ ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request) ssize_t ret; TRACE("Sending request to server: " - "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}", + "{ .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 + ", .type=%" PRIu16 " }", request->from, request->len, request->handle, request->type); - cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC); - cpu_to_be32w((uint32_t*)(buf + 4), request->type); - cpu_to_be64w((uint64_t*)(buf + 8), request->handle); - cpu_to_be64w((uint64_t*)(buf + 16), request->from); - cpu_to_be32w((uint32_t*)(buf + 24), request->len); + stl_be_p(buf, NBD_REQUEST_MAGIC); + stl_be_p(buf + 4, request->type); + stq_be_p(buf + 8, request->handle); + stq_be_p(buf + 16, request->from); + stl_be_p(buf + 24, request->len); ret = write_sync(ioc, buf, sizeof(buf)); if (ret < 0) { @@ -726,18 +754,18 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply) [ 7 .. 15] handle */ - magic = be32_to_cpup((uint32_t*)buf); - reply->error = be32_to_cpup((uint32_t*)(buf + 4)); - reply->handle = be64_to_cpup((uint64_t*)(buf + 8)); + magic = ldl_be_p(buf); + reply->error = ldl_be_p(buf + 4); + reply->handle = ldq_be_p(buf + 8); reply->error = nbd_errno_to_system_errno(reply->error); - TRACE("Got reply: " - "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }", + TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32 + ", handle = %" PRIu64" }", magic, reply->error, reply->handle); if (magic != NBD_REPLY_MAGIC) { - LOG("invalid magic (got 0x%x)", magic); + LOG("invalid magic (got 0x%" PRIx32 ")", magic); return -EINVAL; } return 0; diff --git a/nbd/server.c b/nbd/server.c index b2cfeb9843..a677e266ff 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -52,6 +52,7 @@ struct NBDRequest { QSIMPLEQ_ENTRY(NBDRequest) entry; NBDClient *client; uint8_t *data; + bool complete; }; struct NBDExport { @@ -196,7 +197,7 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) uint64_t magic; uint32_t len; - TRACE("Reply opt=%x type=%x", type, opt); + TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt); magic = cpu_to_be64(NBD_REP_MAGIC); if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { @@ -226,7 +227,7 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) uint64_t magic, name_len; uint32_t opt, type, len; - TRACE("Advertizing export name '%s'", exp->name ? exp->name : ""); + TRACE("Advertising export name '%s'", exp->name ? exp->name : ""); name_len = strlen(exp->name); magic = cpu_to_be64(NBD_REP_MAGIC); if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { @@ -285,13 +286,13 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) { int rc = -EINVAL; - char name[256]; + char name[NBD_MAX_NAME_SIZE + 1]; /* Client sends: [20 .. xx] export name (length bytes) */ TRACE("Checking length"); - if (length > 255) { + if (length >= sizeof(name)) { LOG("Bad length received"); goto fail; } @@ -334,7 +335,10 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, return NULL; } - nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_STARTTLS); + if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, + NBD_OPT_STARTTLS) < 0) { + return NULL; + } tioc = qio_channel_tls_new_server(ioc, client->tlscreds, @@ -392,12 +396,12 @@ static int nbd_negotiate_options(NBDClient *client) TRACE("Checking client flags"); be32_to_cpus(&flags); if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) { - TRACE("Support supports fixed newstyle handshake"); + TRACE("Client supports fixed newstyle handshake"); fixedNewstyle = true; flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; } if (flags != 0) { - TRACE("Unknown client flags 0x%x received", flags); + TRACE("Unknown client flags 0x%" PRIx32 " received", flags); return -EIO; } @@ -431,12 +435,12 @@ static int nbd_negotiate_options(NBDClient *client) } length = be32_to_cpu(length); - TRACE("Checking option 0x%x", clientflags); + TRACE("Checking option 0x%" PRIx32, clientflags); if (client->tlscreds && client->ioc == (QIOChannel *)client->sioc) { QIOChannel *tioc; if (!fixedNewstyle) { - TRACE("Unsupported option 0x%x", clientflags); + TRACE("Unsupported option 0x%" PRIx32, clientflags); return -EINVAL; } switch (clientflags) { @@ -455,12 +459,16 @@ static int nbd_negotiate_options(NBDClient *client) return -EINVAL; default: - TRACE("Option 0x%x not permitted before TLS", clientflags); + TRACE("Option 0x%" PRIx32 " not permitted before TLS", + clientflags); if (nbd_negotiate_drop_sync(client->ioc, length) != length) { return -EIO; } - nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD, - clientflags); + ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD, + clientflags); + if (ret < 0) { + return ret; + } break; } } else if (fixedNewstyle) { @@ -484,21 +492,29 @@ static int nbd_negotiate_options(NBDClient *client) } if (client->tlscreds) { TRACE("TLS already enabled"); - nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_INVALID, - clientflags); + ret = nbd_negotiate_send_rep(client->ioc, + NBD_REP_ERR_INVALID, + clientflags); } else { TRACE("TLS not configured"); - nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_POLICY, - clientflags); + ret = nbd_negotiate_send_rep(client->ioc, + NBD_REP_ERR_POLICY, + clientflags); + } + if (ret < 0) { + return ret; } break; default: - TRACE("Unsupported option 0x%x", clientflags); + TRACE("Unsupported option 0x%" PRIx32, clientflags); if (nbd_negotiate_drop_sync(client->ioc, length) != length) { return -EIO; } - nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP, - clientflags); + ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP, + clientflags); + if (ret < 0) { + return ret; + } break; } } else { @@ -511,7 +527,7 @@ static int nbd_negotiate_options(NBDClient *client) return nbd_negotiate_handle_export_name(client, length); default: - TRACE("Unsupported option 0x%x", clientflags); + TRACE("Unsupported option 0x%" PRIx32, clientflags); return -EINVAL; } } @@ -560,6 +576,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) oldStyle = client->exp != NULL && !client->tlscreds; if (oldStyle) { assert ((client->exp->nbdflags & ~65535) == 0); + TRACE("advertising size %" PRIu64 " and flags %x", + client->exp->size, client->exp->nbdflags | myflags); stq_be_p(buf + 8, NBD_CLIENT_MAGIC); stq_be_p(buf + 16, client->exp->size); stw_be_p(buf + 26, client->exp->nbdflags | myflags); @@ -589,6 +607,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) } assert ((client->exp->nbdflags & ~65535) == 0); + TRACE("advertising size %" PRIu64 " and flags %x", + client->exp->size, client->exp->nbdflags | myflags); stq_be_p(buf + 18, client->exp->size); stw_be_p(buf + 26, client->exp->nbdflags | myflags); if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) != @@ -604,24 +624,6 @@ fail: return rc; } -#ifdef __linux__ - -int nbd_disconnect(int fd) -{ - ioctl(fd, NBD_CLEAR_QUE); - ioctl(fd, NBD_DISCONNECT); - ioctl(fd, NBD_CLEAR_SOCK); - return 0; -} - -#else - -int nbd_disconnect(int fd) -{ - return -ENOTSUP; -} -#endif - static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) { uint8_t buf[NBD_REQUEST_SIZE]; @@ -646,18 +648,18 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) [24 .. 27] len */ - magic = be32_to_cpup((uint32_t*)buf); - request->type = be32_to_cpup((uint32_t*)(buf + 4)); - request->handle = be64_to_cpup((uint64_t*)(buf + 8)); - request->from = be64_to_cpup((uint64_t*)(buf + 16)); - request->len = be32_to_cpup((uint32_t*)(buf + 24)); + magic = ldl_be_p(buf); + request->type = ldl_be_p(buf + 4); + request->handle = ldq_be_p(buf + 8); + request->from = ldq_be_p(buf + 16); + request->len = ldl_be_p(buf + 24); - TRACE("Got request: " - "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }", + TRACE("Got request: { magic = 0x%" PRIx32 ", .type = %" PRIx32 + ", from = %" PRIu64 " , len = %" PRIu32 " }", magic, request->type, request->from, request->len); if (magic != NBD_REQUEST_MAGIC) { - LOG("invalid magic (got 0x%x)", magic); + LOG("invalid magic (got 0x%" PRIx32 ")", magic); return -EINVAL; } return 0; @@ -670,7 +672,8 @@ static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply) reply->error = system_errno_to_nbd_errno(reply->error); - TRACE("Sending response to client: { .error = %d, handle = %" PRIu64 " }", + TRACE("Sending response to client: { .error = %" PRId32 + ", handle = %" PRIu64 " }", reply->error, reply->handle); /* Reply @@ -969,7 +972,13 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, return rc; } -static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request) +/* Collect a client request. Return 0 if request looks valid, -EAGAIN + * to keep trying the collection, -EIO to drop connection right away, + * and any other negative value to report an error to the client + * (although the caller may still need to disconnect after reporting + * the error). */ +static ssize_t nbd_co_receive_request(NBDRequest *req, + struct nbd_request *request) { NBDClient *client = req->client; uint32_t command; @@ -987,19 +996,34 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque goto out; } + TRACE("Decoding type"); + + command = request->type & NBD_CMD_MASK_COMMAND; + if (command != NBD_CMD_WRITE) { + /* No payload, we are ready to read the next request. */ + req->complete = true; + } + + if (command == NBD_CMD_DISC) { + /* Special case: we're going to disconnect without a reply, + * whether or not flags, from, or len are bogus */ + TRACE("Request type is DISCONNECT"); + rc = -EIO; + goto out; + } + + /* Check for sanity in the parameters, part 1. Defer as many + * checks as possible until after reading any NBD_CMD_WRITE + * payload, so we can try and keep the connection alive. */ if ((request->from + request->len) < request->from) { - LOG("integer overflow detected! " - "you're probably being attacked"); + LOG("integer overflow detected, you're probably being attacked"); rc = -EINVAL; goto out; } - TRACE("Decoding type"); - - command = request->type & NBD_CMD_MASK_COMMAND; if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { if (request->len > NBD_MAX_BUFFER_SIZE) { - LOG("len (%u) is larger than max len (%u)", + LOG("len (%" PRIu32" ) is larger than max len (%u)", request->len, NBD_MAX_BUFFER_SIZE); rc = -EINVAL; goto out; @@ -1012,14 +1036,30 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque } } if (command == NBD_CMD_WRITE) { - TRACE("Reading %u byte(s)", request->len); + TRACE("Reading %" PRIu32 " byte(s)", request->len); if (read_sync(client->ioc, req->data, request->len) != request->len) { LOG("reading from socket failed"); rc = -EIO; goto out; } + req->complete = true; + } + + /* Sanity checks, part 2. */ + if (request->from + request->len > client->exp->size) { + LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 + ", Size: %" PRIu64, request->from, request->len, + (uint64_t)client->exp->size); + rc = command == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; + goto out; } + if (request->type & ~NBD_CMD_MASK_COMMAND & ~NBD_CMD_FLAG_FUA) { + LOG("unsupported flags (got 0x%x)", + request->type & ~NBD_CMD_MASK_COMMAND); + return -EINVAL; + } + rc = 0; out: @@ -1038,6 +1078,7 @@ static void nbd_trip(void *opaque) struct nbd_reply reply; ssize_t ret; uint32_t command; + int flags; TRACE("Reading request."); if (client->closing) { @@ -1061,14 +1102,6 @@ static void nbd_trip(void *opaque) goto error_reply; } command = request.type & NBD_CMD_MASK_COMMAND; - if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) { - LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64 - ", Offset: %" PRIu64 "\n", - request.from, request.len, - (uint64_t)exp->size, (uint64_t)exp->dev_offset); - LOG("requested operation past EOF--bad client?"); - goto invalid_request; - } if (client->closing) { /* @@ -1099,7 +1132,7 @@ static void nbd_trip(void *opaque) goto error_reply; } - TRACE("Read %u byte(s)", request.len); + TRACE("Read %" PRIu32" byte(s)", request.len); if (nbd_co_send_reply(req, &reply, request.len) < 0) goto out; break; @@ -1114,31 +1147,27 @@ static void nbd_trip(void *opaque) TRACE("Writing to device"); + flags = 0; + if (request.type & NBD_CMD_FLAG_FUA) { + flags |= BDRV_REQ_FUA; + } ret = blk_pwrite(exp->blk, request.from + exp->dev_offset, - req->data, request.len, 0); + req->data, request.len, flags); if (ret < 0) { LOG("writing to file failed"); reply.error = -ret; goto error_reply; } - if (request.type & NBD_CMD_FLAG_FUA) { - ret = blk_co_flush(exp->blk); - if (ret < 0) { - LOG("flush failed"); - reply.error = -ret; - goto error_reply; - } - } - if (nbd_co_send_reply(req, &reply, 0) < 0) { goto out; } break; + case NBD_CMD_DISC: - TRACE("Request type is DISCONNECT"); - errno = 0; - goto out; + /* unreachable, thanks to special case in nbd_co_receive_request() */ + abort(); + case NBD_CMD_FLUSH: TRACE("Request type is FLUSH"); @@ -1173,11 +1202,13 @@ static void nbd_trip(void *opaque) } break; default: - LOG("invalid request type (%u) received", request.type); - invalid_request: + LOG("invalid request type (%" PRIu32 ") received", request.type); reply.error = EINVAL; error_reply: - if (nbd_co_send_reply(req, &reply, 0) < 0) { + /* We must disconnect after NBD_CMD_WRITE if we did not + * read the payload. + */ + if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) { goto out; } break; diff --git a/net/netmap.c b/net/netmap.c index 6cc0db5ee1..64967b947e 100644 --- a/net/netmap.c +++ b/net/netmap.c @@ -26,7 +26,6 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> #include <net/if.h> -#include <sys/mman.h> #define NETMAP_WITH_LIBS #include <net/netmap.h> #include <net/netmap_user.h> diff --git a/os-posix.c b/os-posix.c index 107fde38bf..3755265582 100644 --- a/os-posix.c +++ b/os-posix.c @@ -26,7 +26,6 @@ #include "qemu/osdep.h" #include <sys/wait.h> /*needed for MAP_POPULATE before including qemu-options.h */ -#include <sys/mman.h> #include <pwd.h> #include <grp.h> #include <libgen.h> diff --git a/qapi-schema.json b/qapi-schema.json index 48c3a6f5cd..40b1db4271 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -382,13 +382,17 @@ # # @dirty-sync-count: number of times that dirty ram was synchronized (since 2.1) # +# @postcopy-requests: The number of page requests received from the destination +# (since 2.7) +# # Since: 0.14.0 ## { 'struct': 'MigrationStats', 'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' , 'duplicate': 'int', 'skipped': 'int', 'normal': 'int', 'normal-bytes': 'int', 'dirty-pages-rate' : 'int', - 'mbps' : 'number', 'dirty-sync-count' : 'int' } } + 'mbps' : 'number', 'dirty-sync-count' : 'int', + 'postcopy-requests' : 'int' } } ## # @XBZRLECacheStats @@ -486,7 +490,7 @@ # # @error-desc: #optional the human readable error description string, when # @status is 'failed'. Clients should not attempt to parse the -# error strings. (Since 2.6) +# error strings. (Since 2.7) # # Since: 0.14.0 ## @@ -631,7 +635,7 @@ # migration URI does not already include a hostname. For # example if using fd: or exec: based migration, the # hostname must be provided so that the server's x509 -# certificate identity canbe validated. (Since 2.7) +# certificate identity can be validated. (Since 2.7) # # Since: 2.4 ## @@ -672,7 +676,7 @@ # migration URI does not already include a hostname. For # example if using fd: or exec: based migration, the # hostname must be provided so that the server's x509 -# certificate identity canbe validated. (Since 2.7) +# certificate identity can be validated. (Since 2.7) # # Since: 2.4 ## @@ -708,14 +712,14 @@ # be for a 'client' endpoint, while for the incoming side the # credentials must be for a 'server' endpoint. Setting this # will enable TLS for all migrations. The default is unset, -# resulting in unsecured migration at the QEMU level. (Since 2.6) +# resulting in unsecured migration at the QEMU level. (Since 2.7) # # @tls-hostname: hostname of the target host for the migration. This is # required when using x509 based TLS credentials and the # migration URI does not already include a hostname. For # example if using fd: or exec: based migration, the # hostname must be provided so that the server's x509 -# certificate identity canbe validated. (Since 2.6) +# certificate identity can be validated. (Since 2.7) # # Since: 2.4 ## diff --git a/qemu-char.c b/qemu-char.c index e3127772c4..84f49acbac 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -47,7 +47,6 @@ #include <sys/times.h> #include <sys/wait.h> #include <termios.h> -#include <sys/mman.h> #include <sys/ioctl.h> #include <sys/resource.h> #include <sys/socket.h> diff --git a/qemu-img.c b/qemu-img.c index 251386b49d..14e2661a5c 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3570,7 +3570,7 @@ static int img_bench(int argc, char **argv) BlockBackend *blk = NULL; BenchData data = {}; int flags = 0; - bool writethrough; + bool writethrough = false; struct timeval t1, t2; int i; diff --git a/qemu-nbd.c b/qemu-nbd.c index 6554f0ab65..9519db324b 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -154,8 +154,8 @@ static void read_partition(uint8_t *p, struct partition_record *r) r->end_cylinder = p[7] | ((p[6] << 2) & 0x300); r->end_sector = p[6] & 0x3f; - r->start_sector_abs = le32_to_cpup((uint32_t *)(p + 8)); - r->nb_sectors_abs = le32_to_cpup((uint32_t *)(p + 12)); + r->start_sector_abs = ldl_le_p(p + 8); + r->nb_sectors_abs = ldl_le_p(p + 12); } static int find_partition(BlockBackend *blk, int partition, diff --git a/qemu-options.hx b/qemu-options.hx index 0e42ba55be..17f15ad1fe 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3214,6 +3214,8 @@ STEXI @item -L @var{path} @findex -L Set the directory for the BIOS, VGA BIOS and keymaps. + +To list all the data directories, use @code{-L help}. ETEXI DEF("bios", HAS_ARG, QEMU_OPTION_bios, \ diff --git a/scripts/clean-includes b/scripts/clean-includes index 37b73b5433..4412a5590a 100755 --- a/scripts/clean-includes +++ b/scripts/clean-includes @@ -105,6 +105,8 @@ for f in "$@"; do *include/qemu/osdep.h | \ *include/qemu/compiler.h | \ *include/glib-compat.h | \ + *include/sysemu/os-posix.h | \ + *include/sysemu/os-win32.h | \ *include/standard-headers/ ) # Removing include lines from osdep.h itself would be counterproductive. echo "SKIPPING $f (special case header)" @@ -145,6 +147,7 @@ for f in "$@"; do <stdlib.h> <stdio.h> <string.h> <strings.h> <inttypes.h> <limits.h> <unistd.h> <time.h> <ctype.h> <errno.h> <fcntl.h> <sys/stat.h> <sys/time.h> <assert.h> <signal.h> <glib.h> + <sys/stat.h> <sys/time.h> <assert.h> <signal.h> <glib.h> <sys/mman.h> "sysemu/os-posix.h, sysemu/os-win32.h "glib-compat.h" "qemu/typedefs.h" ))' "$f" diff --git a/target-arm/kvm.c b/target-arm/kvm.c index 83da447cb7..5c2bd7a10b 100644 --- a/target-arm/kvm.c +++ b/target-arm/kvm.c @@ -10,7 +10,6 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <linux/kvm.h> diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c index c35c676e14..069da0c5fd 100644 --- a/target-arm/kvm32.c +++ b/target-arm/kvm32.c @@ -10,7 +10,6 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <linux/kvm.h> diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c index 2d6a310ebb..5faa76c57e 100644 --- a/target-arm/kvm64.c +++ b/target-arm/kvm64.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <sys/ptrace.h> #include <linux/elf.h> diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 895a386d3b..3665fecb5f 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -41,6 +41,7 @@ #include "sysemu/sysemu.h" #include "hw/qdev-properties.h" +#include "hw/i386/topology.h" #ifndef CONFIG_USER_ONLY #include "exec/address-spaces.h" #include "hw/hw.h" @@ -677,6 +678,14 @@ static ObjectClass *x86_cpu_class_by_name(const char *cpu_model) return oc; } +static char *x86_cpu_class_get_model_name(X86CPUClass *cc) +{ + const char *class_name = object_class_get_name(OBJECT_CLASS(cc)); + assert(g_str_has_suffix(class_name, X86_CPU_TYPE_SUFFIX)); + return g_strndup(class_name, + strlen(class_name) - strlen(X86_CPU_TYPE_SUFFIX)); +} + struct X86CPUDefinition { const char *name; uint32_t level; @@ -1239,6 +1248,51 @@ static X86CPUDefinition builtin_x86_defs[] = { .model_id = "Intel Core Processor (Broadwell)", }, { + .name = "Skylake-Client", + .level = 0xd, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, + .model = 94, + .stepping = 3, + .features[FEAT_1_EDX] = + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | + CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | + CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | + CPUID_DE | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | + CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | + CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | + CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | + CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | + CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | + CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, + /* Missing: XSAVES (not supported by some Linux versions, + * including v4.1 to v4.6). + * KVM doesn't yet expose any XSAVES state save component, + * and the only one defined in Skylake (processor tracing) + * probably will block migration anyway. + */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .xlevel = 0x80000008, + .model_id = "Intel Core Processor (Skylake)", + }, + { .name = "Opteron_G1", .level = 5, .vendor = CPUID_VENDOR_AMD, @@ -1501,16 +1555,17 @@ static void host_x86_cpu_initfn(Object *obj) CPUX86State *env = &cpu->env; KVMState *s = kvm_state; - assert(kvm_enabled()); - /* We can't fill the features array here because we don't know yet if * "migratable" is true or false. */ cpu->host_features = true; - env->cpuid_level = kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX); - env->cpuid_xlevel = kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX); - env->cpuid_xlevel2 = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX); + /* If KVM is disabled, x86_cpu_realizefn() will report an error later */ + if (kvm_enabled()) { + env->cpuid_level = kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX); + env->cpuid_xlevel = kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX); + env->cpuid_xlevel2 = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX); + } object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort); } @@ -1894,6 +1949,14 @@ static inline void feat2prop(char *s) } } +/* Compatibily hack to maintain legacy +-feat semantic, + * where +-feat overwrites any feature set by + * feat=on|feat even if the later is parsed after +-feat + * (i.e. "-x2apic,x2apic=on" will result in x2apic disabled) + */ +static FeatureWordArray plus_features = { 0 }; +static FeatureWordArray minus_features = { 0 }; + /* Parse "+feature,-feature,feature=foo" CPU feature string */ static void x86_cpu_parse_featurestr(CPUState *cs, char *features, @@ -1901,97 +1964,61 @@ static void x86_cpu_parse_featurestr(CPUState *cs, char *features, { X86CPU *cpu = X86_CPU(cs); char *featurestr; /* Single 'key=value" string being parsed */ - FeatureWord w; - /* Features to be added */ - FeatureWordArray plus_features = { 0 }; - /* Features to be removed */ - FeatureWordArray minus_features = { 0 }; - uint32_t numvalue; - CPUX86State *env = &cpu->env; Error *local_err = NULL; - featurestr = features ? strtok(features, ",") : NULL; + if (!features) { + return; + } - while (featurestr) { - char *val; + for (featurestr = strtok(features, ","); + featurestr && !local_err; + featurestr = strtok(NULL, ",")) { + const char *name; + const char *val = NULL; + char *eq = NULL; + + /* Compatibility syntax: */ if (featurestr[0] == '+') { add_flagname_to_bitmaps(featurestr + 1, plus_features, &local_err); + continue; } else if (featurestr[0] == '-') { add_flagname_to_bitmaps(featurestr + 1, minus_features, &local_err); - } else if ((val = strchr(featurestr, '='))) { - *val = 0; val++; - feat2prop(featurestr); - if (!strcmp(featurestr, "xlevel")) { - char *err; - char num[32]; - - numvalue = strtoul(val, &err, 0); - if (!*val || *err) { - error_setg(errp, "bad numerical value %s", val); - return; - } - if (numvalue < 0x80000000) { - error_report("xlevel value shall always be >= 0x80000000" - ", fixup will be removed in future versions"); - numvalue += 0x80000000; - } - snprintf(num, sizeof(num), "%" PRIu32, numvalue); - object_property_parse(OBJECT(cpu), num, featurestr, &local_err); - } else if (!strcmp(featurestr, "tsc-freq")) { - int64_t tsc_freq; - char *err; - char num[32]; - - tsc_freq = qemu_strtosz_suffix_unit(val, &err, - QEMU_STRTOSZ_DEFSUFFIX_B, 1000); - if (tsc_freq < 0 || *err) { - error_setg(errp, "bad numerical value %s", val); - return; - } - snprintf(num, sizeof(num), "%" PRId64, tsc_freq); - object_property_parse(OBJECT(cpu), num, "tsc-frequency", - &local_err); - } else if (!strcmp(featurestr, "hv-spinlocks")) { - char *err; - const int min = 0xFFF; - char num[32]; - numvalue = strtoul(val, &err, 0); - if (!*val || *err) { - error_setg(errp, "bad numerical value %s", val); - return; - } - if (numvalue < min) { - error_report("hv-spinlocks value shall always be >= 0x%x" - ", fixup will be removed in future versions", - min); - numvalue = min; - } - snprintf(num, sizeof(num), "%" PRId32, numvalue); - object_property_parse(OBJECT(cpu), num, featurestr, &local_err); - } else { - object_property_parse(OBJECT(cpu), val, featurestr, &local_err); - } - } else { - feat2prop(featurestr); - object_property_parse(OBJECT(cpu), "on", featurestr, &local_err); + continue; } - if (local_err) { - error_propagate(errp, local_err); - return; + + eq = strchr(featurestr, '='); + if (eq) { + *eq++ = 0; + val = eq; + } else { + val = "on"; } - featurestr = strtok(NULL, ","); - } - if (cpu->host_features) { - for (w = 0; w < FEATURE_WORDS; w++) { - env->features[w] = - x86_cpu_get_supported_feature_word(w, cpu->migratable); + feat2prop(featurestr); + name = featurestr; + + /* Special case: */ + if (!strcmp(name, "tsc-freq")) { + int64_t tsc_freq; + char *err; + char num[32]; + + tsc_freq = qemu_strtosz_suffix_unit(val, &err, + QEMU_STRTOSZ_DEFSUFFIX_B, 1000); + if (tsc_freq < 0 || *err) { + error_setg(errp, "bad numerical value %s", val); + return; + } + snprintf(num, sizeof(num), "%" PRId64, tsc_freq); + val = num; + name = "tsc-frequency"; } + + object_property_parse(OBJECT(cpu), val, name, &local_err); } - for (w = 0; w < FEATURE_WORDS; w++) { - env->features[w] |= plus_features[w]; - env->features[w] &= ~minus_features[w]; + if (local_err) { + error_propagate(errp, local_err); } } @@ -2175,7 +2202,6 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) X86CPU *cpu_x86_create(const char *cpu_model, Error **errp) { X86CPU *cpu = NULL; - X86CPUClass *xcc; ObjectClass *oc; gchar **model_pieces; char *name, *features; @@ -2194,12 +2220,6 @@ X86CPU *cpu_x86_create(const char *cpu_model, Error **errp) error_setg(&error, "Unable to find CPU definition: %s", name); goto out; } - xcc = X86_CPU_CLASS(oc); - - if (xcc->kvm_required && !kvm_enabled()) { - error_setg(&error, "CPU model '%s' requires KVM", name); - goto out; - } cpu = X86_CPU(object_new(object_class_get_name(oc))); @@ -2222,25 +2242,7 @@ out: X86CPU *cpu_x86_init(const char *cpu_model) { - Error *error = NULL; - X86CPU *cpu; - - cpu = cpu_x86_create(cpu_model, &error); - if (error) { - goto out; - } - - object_property_set_bool(OBJECT(cpu), true, "realized", &error); - -out: - if (error) { - error_report_err(error); - if (cpu != NULL) { - object_unref(OBJECT(cpu)); - cpu = NULL; - } - } - return cpu; + return X86_CPU(cpu_generic_init(TYPE_X86_CPU, cpu_model)); } static void x86_cpu_cpudef_class_init(ObjectClass *oc, void *data) @@ -2447,6 +2449,36 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = 0; } break; + case 0xB: + /* Extended Topology Enumeration Leaf */ + if (!cpu->enable_cpuid_0xb) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + + *ecx = count & 0xff; + *edx = cpu->apic_id; + + switch (count) { + case 0: + *eax = apicid_core_offset(smp_cores, smp_threads); + *ebx = smp_threads; + *ecx |= CPUID_TOPOLOGY_LEVEL_SMT; + break; + case 1: + *eax = apicid_pkg_offset(smp_cores, smp_threads); + *ebx = smp_cores * smp_threads; + *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; + break; + default: + *eax = 0; + *ebx = 0; + *ecx |= CPUID_TOPOLOGY_LEVEL_INVALID; + } + + assert(!(*eax & ~0x1f)); + *ebx &= 0xffff; /* The count doesn't need to be reliable. */ + break; case 0xD: { KVMState *s = cs->kvm_state; uint64_t ena_mask; @@ -2874,12 +2906,37 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) CPUX86State *env = &cpu->env; Error *local_err = NULL; static bool ht_warned; + FeatureWord w; + + if (xcc->kvm_required && !kvm_enabled()) { + char *name = x86_cpu_class_get_model_name(xcc); + error_setg(&local_err, "CPU model '%s' requires KVM", name); + g_free(name); + goto out; + } if (cpu->apic_id < 0) { error_setg(errp, "apic-id property was not initialized properly"); return; } + /*TODO: cpu->host_features incorrectly overwrites features + * set using "feat=on|off". Once we fix this, we can convert + * plus_features & minus_features to global properties + * inside x86_cpu_parse_featurestr() too. + */ + if (cpu->host_features) { + for (w = 0; w < FEATURE_WORDS; w++) { + env->features[w] = + x86_cpu_get_supported_feature_word(w, cpu->migratable); + } + } + + for (w = 0; w < FEATURE_WORDS; w++) { + cpu->env.features[w] |= plus_features[w]; + cpu->env.features[w] &= ~minus_features[w]; + } + if (env->features[FEAT_7_0_EBX] && env->cpuid_level < 7) { env->cpuid_level = 7; } @@ -3206,6 +3263,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, 0), DEFINE_PROP_UINT32("xlevel2", X86CPU, env.cpuid_xlevel2, 0), DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), + DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 0426459bba..d9ab884c2b 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -636,6 +636,11 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_MWAIT_IBE (1U << 1) /* Interrupts can exit capability */ #define CPUID_MWAIT_EMX (1U << 0) /* enumeration supported */ +/* CPUID[0xB].ECX level types */ +#define CPUID_TOPOLOGY_LEVEL_INVALID (0U << 8) +#define CPUID_TOPOLOGY_LEVEL_SMT (1U << 8) +#define CPUID_TOPOLOGY_LEVEL_CORE (2U << 8) + #ifndef HYPERV_SPINLOCK_NEVER_RETRY #define HYPERV_SPINLOCK_NEVER_RETRY 0xFFFFFFFF #endif @@ -1173,6 +1178,9 @@ struct X86CPU { */ bool enable_pmu; + /* Compatibility bits for old machine types: */ + bool enable_cpuid_0xb; + /* in order to simplify APIC support, we leave this pointer to the user */ struct DeviceState *apic_state; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index abf50e6632..ff92b1d118 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -15,7 +15,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <sys/utsname.h> #include <linux/kvm.h> @@ -107,6 +106,8 @@ static int has_xsave; static int has_xcrs; static int has_pit_state2; +static struct kvm_cpuid2 *cpuid_cache; + int kvm_has_pit_state2(void) { return has_pit_state2; @@ -200,9 +201,14 @@ static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s) { struct kvm_cpuid2 *cpuid; int max = 1; + + if (cpuid_cache != NULL) { + return cpuid_cache; + } while ((cpuid = try_get_cpuid(s, max)) == NULL) { max *= 2; } + cpuid_cache = cpuid; return cpuid; } @@ -320,8 +326,6 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES; } - g_free(cpuid); - /* fallback for older kernels */ if ((function == KVM_CPUID_FEATURES) && !found) { ret = get_para_features(s); diff --git a/target-mips/kvm.c b/target-mips/kvm.c index a854e4de59..f3f832d498 100644 --- a/target-mips/kvm.c +++ b/target-mips/kvm.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <linux/kvm.h> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 6c153611c0..16208649c5 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -17,7 +17,6 @@ #include "qemu/osdep.h" #include <dirent.h> #include <sys/ioctl.h> -#include <sys/mman.h> #include <sys/vfs.h> #include <linux/kvm.h> diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index f108cd3875..45e94ca48a 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -23,7 +23,6 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <linux/kvm.h> #include <asm/ptrace.h> diff --git a/tests/Makefile.include b/tests/Makefile.include index 4b4bfa5930..bf620b8dd4 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -234,6 +234,7 @@ endif check-qtest-i386-y += tests/test-netfilter$(EXESUF) check-qtest-i386-y += tests/test-filter-mirror$(EXESUF) check-qtest-i386-y += tests/test-filter-redirector$(EXESUF) +check-qtest-i386-y += tests/postcopy-test$(EXESUF) check-qtest-x86_64-y += $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) @@ -599,6 +600,7 @@ tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o $(libqos-usb-obj-y) tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y) tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y) tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o +tests/postcopy-test$(EXESUF): tests/postcopy-test.o tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y) tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) diff --git a/tests/e1000e-test.c b/tests/e1000e-test.c index dbf4859f88..d497b0857c 100644 --- a/tests/e1000e-test.c +++ b/tests/e1000e-test.c @@ -25,7 +25,6 @@ #include "qemu/osdep.h" -#include <glib.h> #include "libqtest.h" #include "qemu-common.h" #include "libqos/pci-pc.h" diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c index bff999cf12..c1d9b3eb9e 100644 --- a/tests/i440fx-test.c +++ b/tests/i440fx-test.c @@ -13,7 +13,6 @@ */ #include "qemu/osdep.h" -#include <sys/mman.h> #include "libqtest.h" #include "libqos/pci.h" diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c index 010860a5b7..0957ee7555 100644 --- a/tests/ivshmem-test.c +++ b/tests/ivshmem-test.c @@ -10,7 +10,6 @@ #include "qemu/osdep.h" #include <glib/gstdio.h> -#include <sys/mman.h> #include "contrib/ivshmem-server/ivshmem-server.h" #include "libqos/pci-pc.h" #include "libqtest.h" diff --git a/tests/libqtest.c b/tests/libqtest.c index 5c82348265..eb00f1392b 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -26,7 +26,7 @@ #include "qapi/qmp/qjson.h" #define MAX_IRQ 256 -#define SOCKET_TIMEOUT 5 +#define SOCKET_TIMEOUT 50 QTestState *global_qtest; diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c new file mode 100644 index 0000000000..35d5180173 --- /dev/null +++ b/tests/postcopy-test.c @@ -0,0 +1,453 @@ +/* + * QTest testcase for postcopy + * + * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates + * based on the vhost-user-test.c that is: + * Copyright (c) 2014 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" + +#include "libqtest.h" +#include "qemu/option.h" +#include "qemu/range.h" +#include "sysemu/char.h" +#include "sysemu/sysemu.h" + +#include <qemu/sockets.h> + +const unsigned start_address = 1024 * 1024; +const unsigned end_address = 100 * 1024 * 1024; +bool got_stop; + +#if defined(__linux__) +#include <sys/syscall.h> +#include <sys/vfs.h> +#endif + +#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) +#include <sys/eventfd.h> +#include <sys/ioctl.h> +#include <linux/userfaultfd.h> + +static bool ufd_version_check(void) +{ + struct uffdio_api api_struct; + uint64_t ioctl_mask; + + int ufd = ufd = syscall(__NR_userfaultfd, O_CLOEXEC); + + if (ufd == -1) { + g_test_message("Skipping test: userfaultfd not available"); + return false; + } + + api_struct.api = UFFD_API; + api_struct.features = 0; + if (ioctl(ufd, UFFDIO_API, &api_struct)) { + g_test_message("Skipping test: UFFDIO_API failed"); + return false; + } + + ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | + (__u64)1 << _UFFDIO_UNREGISTER; + if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) { + g_test_message("Skipping test: Missing userfault feature"); + return false; + } + + return true; +} + +#else +static bool ufd_version_check(void) +{ + g_test_message("Skipping test: Userfault not available (builtdtime)"); + return false; +} + +#endif + +static const char *tmpfs; + +/* A simple PC boot sector that modifies memory (1-100MB) quickly + * outputing a 'B' every so often if it's still running. + */ +unsigned char bootsect[] = { + 0xfa, 0x0f, 0x01, 0x16, 0x74, 0x7c, 0x66, 0xb8, 0x01, 0x00, 0x00, 0x00, + 0x0f, 0x22, 0xc0, 0x66, 0xea, 0x20, 0x7c, 0x00, 0x00, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe4, 0x92, 0x0c, 0x02, + 0xe6, 0x92, 0xb8, 0x10, 0x00, 0x00, 0x00, 0x8e, 0xd8, 0x66, 0xb8, 0x41, + 0x00, 0x66, 0xba, 0xf8, 0x03, 0xee, 0xb3, 0x00, 0xb8, 0x00, 0x00, 0x10, + 0x00, 0xfe, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x40, + 0x06, 0x7c, 0xf2, 0xfe, 0xc3, 0x75, 0xe9, 0x66, 0xb8, 0x42, 0x00, 0x66, + 0xba, 0xf8, 0x03, 0xee, 0xeb, 0xde, 0x66, 0x90, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00, 0x27, 0x00, 0x5c, 0x7c, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0xaa +}; + +/* + * Wait for some output in the serial output file, + * we get an 'A' followed by an endless string of 'B's + * but on the destination we won't have the A. + */ +static void wait_for_serial(const char *side) +{ + char *serialpath = g_strdup_printf("%s/%s", tmpfs, side); + FILE *serialfile = fopen(serialpath, "r"); + + do { + int readvalue = fgetc(serialfile); + + switch (readvalue) { + case 'A': + /* Fine */ + break; + + case 'B': + /* It's alive! */ + fclose(serialfile); + g_free(serialpath); + return; + + case EOF: + fseek(serialfile, 0, SEEK_SET); + usleep(1000); + break; + + default: + fprintf(stderr, "Unexpected %d on %s serial\n", readvalue, side); + g_assert_not_reached(); + } + } while (true); +} + +/* + * Events can get in the way of responses we are actually waiting for. + */ +static QDict *return_or_event(QDict *response) +{ + const char *event_string; + if (!qdict_haskey(response, "event")) { + return response; + } + + /* OK, it was an event */ + event_string = qdict_get_str(response, "event"); + if (!strcmp(event_string, "STOP")) { + got_stop = true; + } + QDECREF(response); + return return_or_event(qtest_qmp_receive(global_qtest)); +} + + +/* + * It's tricky to use qemu's migration event capability with qtest, + * events suddenly appearing confuse the qmp()/hmp() responses. + * so wait for a couple of passes to have happened before + * going postcopy. + */ + +static uint64_t get_migration_pass(void) +{ + QDict *rsp, *rsp_return, *rsp_ram; + uint64_t result; + + rsp = return_or_event(qmp("{ 'execute': 'query-migrate' }")); + rsp_return = qdict_get_qdict(rsp, "return"); + if (!qdict_haskey(rsp_return, "ram")) { + /* Still in setup */ + result = 0; + } else { + rsp_ram = qdict_get_qdict(rsp_return, "ram"); + result = qdict_get_try_int(rsp_ram, "dirty-sync-count", 0); + QDECREF(rsp); + } + return result; +} + +static void wait_for_migration_complete(void) +{ + QDict *rsp, *rsp_return; + bool completed; + + do { + const char *status; + + rsp = return_or_event(qmp("{ 'execute': 'query-migrate' }")); + rsp_return = qdict_get_qdict(rsp, "return"); + status = qdict_get_str(rsp_return, "status"); + completed = strcmp(status, "completed") == 0; + g_assert_cmpstr(status, !=, "failed"); + QDECREF(rsp); + usleep(1000 * 100); + } while (!completed); +} + +static void wait_for_migration_pass(void) +{ + uint64_t initial_pass = get_migration_pass(); + uint64_t pass; + + /* Wait for the 1st sync */ + do { + initial_pass = get_migration_pass(); + if (got_stop || initial_pass) { + break; + } + usleep(1000 * 100); + } while (true); + + do { + usleep(1000 * 100); + pass = get_migration_pass(); + } while (pass == initial_pass && !got_stop); +} + +static void check_guests_ram(void) +{ + /* Our ASM test will have been incrementing one byte from each page from + * 1MB to <100MB in order. + * This gives us a constraint that any page's byte should be equal or less + * than the previous pages byte (mod 256); and they should all be equal + * except for one transition at the point where we meet the incrementer. + * (We're running this with the guest stopped). + */ + unsigned address; + uint8_t first_byte; + uint8_t last_byte; + bool hit_edge = false; + bool bad = false; + + qtest_memread(global_qtest, start_address, &first_byte, 1); + last_byte = first_byte; + + for (address = start_address + 4096; address < end_address; address += 4096) + { + uint8_t b; + qtest_memread(global_qtest, address, &b, 1); + if (b != last_byte) { + if (((b + 1) % 256) == last_byte && !hit_edge) { + /* This is OK, the guest stopped at the point of + * incrementing the previous page but didn't get + * to us yet. + */ + hit_edge = true; + } else { + fprintf(stderr, "Memory content inconsistency at %x" + " first_byte = %x last_byte = %x current = %x" + " hit_edge = %x\n", + address, first_byte, last_byte, b, hit_edge); + bad = true; + } + } + last_byte = b; + } + g_assert_false(bad); +} + +static void cleanup(const char *filename) +{ + char *path = g_strdup_printf("%s/%s", tmpfs, filename); + + unlink(path); +} + +static void test_migrate(void) +{ + char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + QTestState *global = global_qtest, *from, *to; + unsigned char dest_byte_a, dest_byte_b, dest_byte_c, dest_byte_d; + gchar *cmd; + QDict *rsp; + + char *bootpath = g_strdup_printf("%s/bootsect", tmpfs); + FILE *bootfile = fopen(bootpath, "wb"); + + got_stop = false; + g_assert_cmpint(fwrite(bootsect, 512, 1, bootfile), ==, 1); + fclose(bootfile); + + cmd = g_strdup_printf("-machine accel=kvm:tcg -m 150M" + " -name pcsource,debug-threads=on" + " -serial file:%s/src_serial" + " -drive file=%s,format=raw", + tmpfs, bootpath); + from = qtest_start(cmd); + g_free(cmd); + + cmd = g_strdup_printf("-machine accel=kvm:tcg -m 150M" + " -name pcdest,debug-threads=on" + " -serial file:%s/dest_serial" + " -drive file=%s,format=raw" + " -incoming %s", + tmpfs, bootpath, uri); + to = qtest_init(cmd); + g_free(cmd); + + global_qtest = from; + rsp = qmp("{ 'execute': 'migrate-set-capabilities'," + "'arguments': { " + "'capabilities': [ {" + "'capability': 'postcopy-ram'," + "'state': true } ] } }"); + g_assert(qdict_haskey(rsp, "return")); + QDECREF(rsp); + + global_qtest = to; + rsp = qmp("{ 'execute': 'migrate-set-capabilities'," + "'arguments': { " + "'capabilities': [ {" + "'capability': 'postcopy-ram'," + "'state': true } ] } }"); + g_assert(qdict_haskey(rsp, "return")); + QDECREF(rsp); + + /* We want to pick a speed slow enough that the test completes + * quickly, but that it doesn't complete precopy even on a slow + * machine, so also set the downtime. + */ + global_qtest = from; + rsp = qmp("{ 'execute': 'migrate_set_speed'," + "'arguments': { 'value': 100000000 } }"); + g_assert(qdict_haskey(rsp, "return")); + QDECREF(rsp); + + /* 1ms downtime - it should never finish precopy */ + rsp = qmp("{ 'execute': 'migrate_set_downtime'," + "'arguments': { 'value': 0.001 } }"); + g_assert(qdict_haskey(rsp, "return")); + QDECREF(rsp); + + + /* Wait for the first serial output from the source */ + wait_for_serial("src_serial"); + + cmd = g_strdup_printf("{ 'execute': 'migrate'," + "'arguments': { 'uri': '%s' } }", + uri); + rsp = qmp(cmd); + g_free(cmd); + g_assert(qdict_haskey(rsp, "return")); + QDECREF(rsp); + + wait_for_migration_pass(); + + rsp = return_or_event(qmp("{ 'execute': 'migrate-start-postcopy' }")); + g_assert(qdict_haskey(rsp, "return")); + QDECREF(rsp); + + if (!got_stop) { + qmp_eventwait("STOP"); + } + + global_qtest = to; + qmp_eventwait("RESUME"); + + wait_for_serial("dest_serial"); + global_qtest = from; + wait_for_migration_complete(); + + qtest_quit(from); + + global_qtest = to; + + qtest_memread(to, start_address, &dest_byte_a, 1); + + /* Destination still running, wait for a byte to change */ + do { + qtest_memread(to, start_address, &dest_byte_b, 1); + usleep(10 * 1000); + } while (dest_byte_a == dest_byte_b); + + qmp("{ 'execute' : 'stop'}"); + /* With it stopped, check nothing changes */ + qtest_memread(to, start_address, &dest_byte_c, 1); + sleep(1); + qtest_memread(to, start_address, &dest_byte_d, 1); + g_assert_cmpint(dest_byte_c, ==, dest_byte_d); + + check_guests_ram(); + + qtest_quit(to); + g_free(uri); + + global_qtest = global; + + cleanup("bootsect"); + cleanup("migsocket"); + cleanup("src_serial"); + cleanup("dest_serial"); +} + +int main(int argc, char **argv) +{ + char template[] = "/tmp/postcopy-test-XXXXXX"; + int ret; + + g_test_init(&argc, &argv, NULL); + + if (!ufd_version_check()) { + return 0; + } + + tmpfs = mkdtemp(template); + if (!tmpfs) { + g_test_message("mkdtemp on path (%s): %s\n", template, strerror(errno)); + } + g_assert(tmpfs); + + module_call_init(MODULE_INIT_QOM); + + qtest_add_func("/postcopy", test_migrate); + + ret = g_test_run(); + + g_assert_cmpint(ret, ==, 0); + + ret = rmdir(tmpfs); + if (ret != 0) { + g_test_message("unable to rmdir: path (%s): %s\n", + tmpfs, strerror(errno)); + } + + return ret; +} diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out index 055c553cdb..a95c4b0be8 100644 --- a/tests/qemu-iotests/087.out +++ b/tests/qemu-iotests/087.out @@ -42,22 +42,14 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on Testing: -S QMP_VERSION {"return": {}} -IMGFMT built-in AES encryption is deprecated -Support for it will be removed in a future release. -You can use 'qemu-img convert' to switch to an -unencrypted IMGFMT image, or a LUKS raw image. -{"error": {"class": "GenericError", "desc": "blockdev-add doesn't support encrypted devices"}} +{"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} Testing: QMP_VERSION {"return": {}} -IMGFMT built-in AES encryption is deprecated -Support for it will be removed in a future release. -You can use 'qemu-img convert' to switch to an -unencrypted IMGFMT image, or a LUKS raw image. -{"error": {"class": "GenericError", "desc": "Guest must be stopped for opening of encrypted image"}} +{"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} diff --git a/tests/qemu-iotests/095 b/tests/qemu-iotests/095 index dad04b9ac9..030adb22e1 100755 --- a/tests/qemu-iotests/095 +++ b/tests/qemu-iotests/095 @@ -74,6 +74,8 @@ _send_qemu_cmd $h "{ 'execute': 'block-commit', 'arguments': { 'device': 'test', 'top': '"${TEST_IMG}.snp1"' } }" "BLOCK_JOB_COMPLETED" +_cleanup_qemu + echo echo "=== Base image info after commit and resize ===" TEST_IMG="${TEST_IMG}.base" _img_info | _filter_img_info diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 new file mode 100755 index 0000000000..4057b5e2aa --- /dev/null +++ b/tests/qemu-iotests/155 @@ -0,0 +1,261 @@ +#!/usr/bin/env python +# +# Test whether the backing BDSs are correct after completion of a +# mirror block job; in "existing" modes (drive-mirror with +# mode=existing and blockdev-mirror) the backing chain should not be +# overridden. +# +# Copyright (C) 2016 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os +import iotests +from iotests import qemu_img + +back0_img = os.path.join(iotests.test_dir, 'back0.' + iotests.imgfmt) +back1_img = os.path.join(iotests.test_dir, 'back1.' + iotests.imgfmt) +back2_img = os.path.join(iotests.test_dir, 'back2.' + iotests.imgfmt) +source_img = os.path.join(iotests.test_dir, 'source.' + iotests.imgfmt) +target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) + + +# Class variables for controlling its behavior: +# +# existing: If True, explicitly create the target image and blockdev-add it +# target_backing: If existing is True: Use this filename as the backing file +# of the target image +# (None: no backing file) +# target_blockdev_backing: If existing is True: Pass this dict as "backing" +# for the blockdev-add command +# (None: do not pass "backing") +# target_real_backing: If existing is True: The real filename of the backing +# image during runtime, only makes sense if +# target_blockdev_backing is not None +# (None: same as target_backing) + +class BaseClass(iotests.QMPTestCase): + target_blockdev_backing = None + target_real_backing = None + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, back0_img, '1M') + qemu_img('create', '-f', iotests.imgfmt, '-b', back0_img, back1_img) + qemu_img('create', '-f', iotests.imgfmt, '-b', back1_img, back2_img) + qemu_img('create', '-f', iotests.imgfmt, '-b', back2_img, source_img) + + self.vm = iotests.VM() + self.vm.add_drive(None, '', 'none') + self.vm.launch() + + # Add the BDS via blockdev-add so it stays around after the mirror block + # job has been completed + result = self.vm.qmp('blockdev-add', + options={'node-name': 'source', + 'driver': iotests.imgfmt, + 'file': {'driver': 'file', + 'filename': source_img}}) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('x-blockdev-insert-medium', + device='drive0', node_name='source') + self.assert_qmp(result, 'return', {}) + + self.assertIntactSourceBackingChain() + + if self.existing: + if self.target_backing: + qemu_img('create', '-f', iotests.imgfmt, + '-b', self.target_backing, target_img, '1M') + else: + qemu_img('create', '-f', iotests.imgfmt, target_img, '1M') + + if self.cmd == 'blockdev-mirror': + options = { 'node-name': 'target', + 'driver': iotests.imgfmt, + 'file': { 'driver': 'file', + 'filename': target_img } } + if self.target_blockdev_backing: + options['backing'] = self.target_blockdev_backing + + result = self.vm.qmp('blockdev-add', options=options) + self.assert_qmp(result, 'return', {}) + + def tearDown(self): + self.vm.shutdown() + os.remove(source_img) + os.remove(back2_img) + os.remove(back1_img) + os.remove(back0_img) + try: + os.remove(target_img) + except OSError: + pass + + def findBlockNode(self, node_name, id=None): + if id: + result = self.vm.qmp('query-block') + for device in result['return']: + if device['device'] == id: + if node_name: + self.assert_qmp(device, 'inserted/node-name', node_name) + return device['inserted'] + else: + result = self.vm.qmp('query-named-block-nodes') + for node in result['return']: + if node['node-name'] == node_name: + return node + + self.fail('Cannot find node %s/%s' % (id, node_name)) + + def assertIntactSourceBackingChain(self): + node = self.findBlockNode('source') + + self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename', + source_img) + self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename', + back2_img) + self.assert_qmp(node, 'image' + '/backing-image' * 2 + '/filename', + back1_img) + self.assert_qmp(node, 'image' + '/backing-image' * 3 + '/filename', + back0_img) + self.assert_qmp_absent(node, 'image' + '/backing-image' * 4) + + def assertCorrectBackingImage(self, node, default_image): + if self.existing: + if self.target_real_backing: + image = self.target_real_backing + else: + image = self.target_backing + else: + image = default_image + + if image: + self.assert_qmp(node, 'image/backing-image/filename', image) + else: + self.assert_qmp_absent(node, 'image/backing-image') + + +# Class variables for controlling its behavior: +# +# cmd: Mirroring command to execute, either drive-mirror or blockdev-mirror + +class MirrorBaseClass(BaseClass): + def runMirror(self, sync): + if self.cmd == 'blockdev-mirror': + result = self.vm.qmp(self.cmd, device='drive0', sync=sync, + target='target') + else: + if self.existing: + mode = 'existing' + else: + mode = 'absolute-paths' + result = self.vm.qmp(self.cmd, device='drive0', sync=sync, + target=target_img, format=iotests.imgfmt, + mode=mode, node_name='target') + + self.assert_qmp(result, 'return', {}) + + self.vm.event_wait('BLOCK_JOB_READY') + + result = self.vm.qmp('block-job-complete', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.vm.event_wait('BLOCK_JOB_COMPLETED') + + def testFull(self): + self.runMirror('full') + + node = self.findBlockNode('target', 'drive0') + self.assertCorrectBackingImage(node, None) + self.assertIntactSourceBackingChain() + + def testTop(self): + self.runMirror('top') + + node = self.findBlockNode('target', 'drive0') + self.assertCorrectBackingImage(node, back2_img) + self.assertIntactSourceBackingChain() + + def testNone(self): + self.runMirror('none') + + node = self.findBlockNode('target', 'drive0') + self.assertCorrectBackingImage(node, source_img) + self.assertIntactSourceBackingChain() + + +class TestDriveMirrorAbsolutePaths(MirrorBaseClass): + cmd = 'drive-mirror' + existing = False + +class TestDriveMirrorExistingNoBacking(MirrorBaseClass): + cmd = 'drive-mirror' + existing = True + target_backing = None + +class TestDriveMirrorExistingBacking(MirrorBaseClass): + cmd = 'drive-mirror' + existing = True + target_backing = 'null-co://' + +class TestBlockdevMirrorNoBacking(MirrorBaseClass): + cmd = 'blockdev-mirror' + existing = True + target_backing = None + +class TestBlockdevMirrorBacking(MirrorBaseClass): + cmd = 'blockdev-mirror' + existing = True + target_backing = 'null-co://' + +class TestBlockdevMirrorForcedBacking(MirrorBaseClass): + cmd = 'blockdev-mirror' + existing = True + target_backing = None + target_blockdev_backing = { 'driver': 'null-co' } + target_real_backing = 'null-co://' + + +class TestCommit(BaseClass): + existing = False + + def testCommit(self): + result = self.vm.qmp('block-commit', device='drive0', base=back1_img) + self.assert_qmp(result, 'return', {}) + + self.vm.event_wait('BLOCK_JOB_READY') + + result = self.vm.qmp('block-job-complete', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.vm.event_wait('BLOCK_JOB_COMPLETED') + + node = self.findBlockNode(None, 'drive0') + self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename', + back1_img) + self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename', + back0_img) + self.assert_qmp_absent(node, 'image' + '/backing-image' * 2 + + '/filename') + + self.assertIntactSourceBackingChain() + + +BaseClass = None +MirrorBaseClass = None + +if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2']) diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out new file mode 100644 index 0000000000..4176bb9402 --- /dev/null +++ b/tests/qemu-iotests/155.out @@ -0,0 +1,5 @@ +................... +---------------------------------------------------------------------- +Ran 19 tests + +OK diff --git a/tests/qemu-iotests/156 b/tests/qemu-iotests/156 new file mode 100755 index 0000000000..cc95ff1f98 --- /dev/null +++ b/tests/qemu-iotests/156 @@ -0,0 +1,174 @@ +#!/bin/bash +# +# Tests oVirt-like storage migration: +# - Create snapshot +# - Create target image with (not yet existing) target backing chain +# (i.e. just write the name of a soon-to-be-copied-over backing file into it) +# - drive-mirror the snapshot to the target with mode=existing and sync=top +# - In the meantime, copy the original source files to the destination via +# conventional means (i.e. outside of qemu) +# - Complete the drive-mirror job +# - Delete all source images +# +# Copyright (C) 2016 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +status=1 # failure is the default! + +_cleanup() +{ + rm -f "$TEST_IMG{,.target}{,.backing,.overlay}" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 qed +_supported_proto generic +_supported_os Linux + +# Create source disk +TEST_IMG="$TEST_IMG.backing" _make_test_img 1M +_make_test_img -b "$TEST_IMG.backing" 1M + +$QEMU_IO -c 'write -P 1 0 256k' "$TEST_IMG.backing" | _filter_qemu_io +$QEMU_IO -c 'write -P 2 64k 192k' "$TEST_IMG" | _filter_qemu_io + +_launch_qemu -drive if=none,id=source,file="$TEST_IMG" + +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'qmp_capabilities' }" \ + 'return' + +# Create snapshot +TEST_IMG="$TEST_IMG.overlay" _make_test_img -b "$TEST_IMG" 1M +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'blockdev-snapshot-sync', + 'arguments': { 'device': 'source', + 'snapshot-file': '$TEST_IMG.overlay', + 'format': '$IMGFMT', + 'mode': 'existing' } }" \ + 'return' + +# Write something to the snapshot +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': + 'qemu-io source \"write -P 3 128k 128k\"' } }" \ + 'return' + +# Create target image +TEST_IMG="$TEST_IMG.target.overlay" _make_test_img -b "$TEST_IMG.target" 1M + +# Mirror snapshot +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'drive-mirror', + 'arguments': { 'device': 'source', + 'target': '$TEST_IMG.target.overlay', + 'mode': 'existing', + 'sync': 'top' } }" \ + 'return' + +# Wait for convergence +_send_qemu_cmd $QEMU_HANDLE \ + '' \ + 'BLOCK_JOB_READY' + +# Write some more +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': + 'qemu-io source \"write -P 4 192k 64k\"' } }" \ + 'return' + +# Copy source backing chain to the target before completing the job +cp "$TEST_IMG.backing" "$TEST_IMG.target.backing" +cp "$TEST_IMG" "$TEST_IMG.target" +$QEMU_IMG rebase -u -b "$TEST_IMG.target.backing" "$TEST_IMG.target" + +# Complete block job +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'block-job-complete', + 'arguments': { 'device': 'source' } }" \ + '' + +_send_qemu_cmd $QEMU_HANDLE \ + '' \ + 'BLOCK_JOB_COMPLETED' + +# Remove the source images +rm -f "$TEST_IMG{,.backing,.overlay}" + +echo + +# Check online disk contents +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': + 'qemu-io source \"read -P 1 0k 64k\"' } }" \ + 'return' + +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': + 'qemu-io source \"read -P 2 64k 64k\"' } }" \ + 'return' + +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': + 'qemu-io source \"read -P 3 128k 64k\"' } }" \ + 'return' + +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': + 'qemu-io source \"read -P 4 192k 64k\"' } }" \ + 'return' + +echo + +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'quit' }" \ + 'return' + +wait=1 _cleanup_qemu + +echo + +# Check offline disk contents +$QEMU_IO -c 'read -P 1 0k 64k' \ + -c 'read -P 2 64k 64k' \ + -c 'read -P 3 128k 64k' \ + -c 'read -P 4 192k 64k' \ + "$TEST_IMG.target.overlay" | _filter_qemu_io + +echo + +# success, all done +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/156.out b/tests/qemu-iotests/156.out new file mode 100644 index 0000000000..3af82ae540 --- /dev/null +++ b/tests/qemu-iotests/156.out @@ -0,0 +1,48 @@ +QA output created by 156 +Formatting 'TEST_DIR/t.IMGFMT.backing', fmt=IMGFMT size=1048576 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.backing +wrote 262144/262144 bytes at offset 0 +256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 196608/196608 bytes at offset 65536 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": {}} +Formatting 'TEST_DIR/t.IMGFMT.overlay', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT +{"return": {}} +wrote 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +Formatting 'TEST_DIR/t.IMGFMT.target.overlay', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.target +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "source", "len": 131072, "offset": 131072, "speed": 0, "type": "mirror"}} +wrote 65536/65536 bytes at offset 196608 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "source", "len": 196608, "offset": 196608, "speed": 0, "type": "mirror"}} + +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +read 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +read 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +read 65536/65536 bytes at offset 196608 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} + +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} + +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 196608 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +*** done diff --git a/tests/qemu-iotests/README b/tests/qemu-iotests/README index 4ccfdd1cc0..6079b401ae 100644 --- a/tests/qemu-iotests/README +++ b/tests/qemu-iotests/README @@ -17,4 +17,5 @@ additional options to test further image formats or I/O methods. * Feedback and patches Please send improvements to the test suite, general feedback or just -reports of failing tests cases to qemu-devel@savannah.nongnu.org. +reports of failing tests cases to qemu-devel@nongnu.org with a CC: +to qemu-block@nongnu.org. diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index ab1d76efdf..1c6fcb6018 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -154,3 +154,5 @@ 150 rw auto quick 152 rw auto quick 154 rw auto backing quick +155 rw auto +156 rw auto quick diff --git a/tests/qht-bench.c b/tests/qht-bench.c index ad8efbca95..76360a0cf5 100644 --- a/tests/qht-bench.c +++ b/tests/qht-bench.c @@ -5,7 +5,6 @@ * See the COPYING file in the top-level directory. */ #include "qemu/osdep.h" -#include <glib.h> #include "qemu/processor.h" #include "qemu/atomic.h" #include "qemu/qht.h" diff --git a/tests/test-qdist.c b/tests/test-qdist.c index a67f26057e..0298986ac9 100644 --- a/tests/test-qdist.c +++ b/tests/test-qdist.c @@ -5,7 +5,6 @@ * See the COPYING file in the top-level directory. */ #include "qemu/osdep.h" -#include <glib.h> #include "qemu/qdist.h" #include <math.h> diff --git a/tests/test-qht-par.c b/tests/test-qht-par.c index f09e004ec6..d8a83caf5c 100644 --- a/tests/test-qht-par.c +++ b/tests/test-qht-par.c @@ -5,7 +5,6 @@ * See the COPYING file in the top-level directory. */ #include "qemu/osdep.h" -#include <glib.h> #define TEST_QHT_STRING "tests/qht-bench 1>/dev/null 2>&1 -R -S0.1 -D10000 -N1 " diff --git a/tests/test-qht.c b/tests/test-qht.c index c8eb9305ed..f1d628371d 100644 --- a/tests/test-qht.c +++ b/tests/test-qht.c @@ -5,7 +5,6 @@ * See the COPYING file in the top-level directory. */ #include "qemu/osdep.h" -#include <glib.h> #include "qemu/qht.h" #define N 5000 diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c index 36b3cd8b8c..45fa2b6148 100644 --- a/tests/vhost-user-bridge.c +++ b/tests/vhost-user-bridge.c @@ -33,7 +33,6 @@ #include <sys/socket.h> #include <sys/un.h> #include <sys/unistd.h> -#include <sys/mman.h> #include <sys/eventfd.h> #include <arpa/inet.h> #include <netdb.h> diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 6e3a4c0d57..8b2164b99d 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -17,7 +17,6 @@ #include "sysemu/sysemu.h" #include <linux/vhost.h> -#include <sys/mman.h> #include <sys/vfs.h> #include <qemu/sockets.h> diff --git a/trace-events b/trace-events index 2f14205de0..104b64fae1 100644 --- a/trace-events +++ b/trace-events @@ -73,7 +73,7 @@ bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x" -bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d" +bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u" # block/stream.c stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" @@ -606,16 +606,16 @@ qemu_system_shutdown_request(void) "" qemu_system_powerdown_request(void) "" # block/qcow2.c -qcow2_writev_start_req(void *co, int64_t sector, int nb_sectors) "co %p sector %" PRIx64 " nb_sectors %d" +qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d" qcow2_writev_done_req(void *co, int ret) "co %p ret %d" qcow2_writev_start_part(void *co) "co %p" -qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d" +qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d" qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64 qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d" qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d" # block/qcow2-cluster.c -qcow2_alloc_clusters_offset(void *co, uint64_t offset, int num) "co %p offset %" PRIx64 " num %d" +qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d" qcow2_handle_copied(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64 qcow2_handle_alloc(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64 qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d" diff --git a/translate-all.c b/translate-all.c index e8b88b4485..3f402dfe04 100644 --- a/translate-all.c +++ b/translate-all.c @@ -18,8 +18,6 @@ */ #ifdef _WIN32 #include <windows.h> -#else -#include <sys/mman.h> #endif #include "qemu/osdep.h" diff --git a/util/cutils.c b/util/cutils.c index 43d1afbbec..5830a688dc 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -256,13 +256,7 @@ static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len) return i * sizeof(VECTYPE); } -/* - * GCC before version 4.9 has a bug which will cause the target - * attribute work incorrectly and failed to compile in some case, - * restrict the gcc version to 4.9+ to prevent the failure. - */ - -#if defined CONFIG_AVX2_OPT && QEMU_GNUC_PREREQ(4, 9) +#if defined CONFIG_AVX2_OPT #pragma GCC push_options #pragma GCC target("avx2") #include <cpuid.h> diff --git a/util/hbitmap.c b/util/hbitmap.c index 7121b11c01..99fd2ba37b 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -269,6 +269,7 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) start >>= hb->granularity; last >>= hb->granularity; count = last - start + 1; + assert(last < hb->size); hb->count += count - hb_count_between(hb, start, last); hb_set_between(hb, HBITMAP_LEVELS - 1, start, last); @@ -348,6 +349,7 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) start >>= hb->granularity; last >>= hb->granularity; + assert(last < hb->size); hb->count -= hb_count_between(hb, start, last); hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last); @@ -371,6 +373,7 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item) /* Compute position and bit in the last layer. */ uint64_t pos = item >> hb->granularity; unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1)); + assert(pos < hb->size); return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0; } diff --git a/util/memfd.c b/util/memfd.c index b374238a59..4571d1aba8 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -29,8 +29,6 @@ #include <glib/gprintf.h> -#include <sys/mman.h> - #include "qemu/memfd.h" #ifdef CONFIG_MEMFD diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c index 0b4cc7f7f1..629d97a362 100644 --- a/util/mmap-alloc.c +++ b/util/mmap-alloc.c @@ -11,7 +11,6 @@ */ #include "qemu/osdep.h" #include <qemu/mmap-alloc.h> -#include <sys/mman.h> #define HUGETLBFS_MAGIC 0x958458f6 diff --git a/util/osdep.c b/util/osdep.c index 9a7a439e13..ff004e8074 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -25,10 +25,6 @@ /* Needed early for CONFIG_BSD etc. */ -#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE) -#include <sys/mman.h> -#endif - #ifdef CONFIG_SOLARIS #include <sys/statvfs.h> /* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 4adde93ac1..e2e1d4d39f 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -36,7 +36,6 @@ #include "trace.h" #include "qapi/error.h" #include "qemu/sockets.h" -#include <sys/mman.h> #include <libgen.h> #include <sys/signal.h> #include "qemu/cutils.h" diff --git a/util/qdist.c b/util/qdist.c index 4ea2e34fc2..56f573837d 100644 --- a/util/qdist.c +++ b/util/qdist.c @@ -6,6 +6,7 @@ * License: GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ +#include "qemu/osdep.h" #include "qemu/qdist.h" #include <math.h> diff --git a/util/qht.c b/util/qht.c index 6f749098f4..40d6e218f7 100644 --- a/util/qht.c +++ b/util/qht.c @@ -65,6 +65,7 @@ * + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014. * https://lwn.net/Articles/612021/ */ +#include "qemu/osdep.h" #include "qemu/qht.h" #include "qemu/atomic.h" #include "qemu/rcu.h" @@ -154,7 +154,7 @@ CharDriverState *sclp_hds[MAX_SCLP_CONSOLES]; int win2k_install_hack = 0; int singlestep = 0; int smp_cpus = 1; -int max_cpus = 0; +int max_cpus = 1; int smp_cores = 1; int smp_threads = 1; int acpi_enabled = 1; @@ -1218,7 +1218,6 @@ static QemuOptsList qemu_smp_opts = { static void smp_parse(QemuOpts *opts) { if (opts) { - unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); unsigned cores = qemu_opt_get_number(opts, "cores", 0); @@ -1246,6 +1245,17 @@ static void smp_parse(QemuOpts *opts) } max_cpus = qemu_opt_get_number(opts, "maxcpus", cpus); + + if (max_cpus > MAX_CPUMASK_BITS) { + error_report("unsupported number of maxcpus"); + exit(1); + } + + if (max_cpus < cpus) { + error_report("maxcpus must be equal to or greater than smp"); + exit(1); + } + if (sockets * cores * threads > max_cpus) { error_report("cpu topology: " "sockets (%u) * cores (%u) * threads (%u) > " @@ -1255,25 +1265,11 @@ static void smp_parse(QemuOpts *opts) } smp_cpus = cpus; - smp_cores = cores > 0 ? cores : 1; - smp_threads = threads > 0 ? threads : 1; - - } - - if (max_cpus == 0) { - max_cpus = smp_cpus; - } - - if (max_cpus > MAX_CPUMASK_BITS) { - error_report("unsupported number of maxcpus"); - exit(1); - } - if (max_cpus < smp_cpus) { - error_report("maxcpus must be equal to or greater than smp"); - exit(1); + smp_cores = cores; + smp_threads = threads; } - if (smp_cpus > 1 || smp_cores > 1 || smp_threads > 1) { + if (smp_cpus > 1) { Error *blocker = NULL; error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); replay_add_blocker(blocker); @@ -2968,6 +2964,7 @@ int main(int argc, char **argv, char **envp) FILE *vmstate_dump_file = NULL; Error *main_loop_err = NULL; Error *err = NULL; + bool list_data_dirs = false; qemu_init_cpu_loop(); qemu_mutex_lock_iothread(); @@ -3354,7 +3351,9 @@ int main(int argc, char **argv, char **envp) add_device_config(DEV_GDB, optarg); break; case QEMU_OPTION_L: - if (data_dir_idx < ARRAY_SIZE(data_dir)) { + if (is_help_option(optarg)) { + list_data_dirs = true; + } else if (data_dir_idx < ARRAY_SIZE(data_dir)) { data_dir[data_dir_idx++] = optarg; } break; @@ -4086,6 +4085,14 @@ int main(int argc, char **argv, char **envp) data_dir[data_dir_idx++] = CONFIG_QEMU_DATADIR; } + /* -L help lists the data directories and exits. */ + if (list_data_dirs) { + for (i = 0; i < data_dir_idx; i++) { + printf("%s\n", data_dir[i]); + } + exit(0); + } + smp_parse(qemu_opts_find(qemu_find_opts("smp-opts"), NULL)); machine_class->max_cpus = machine_class->max_cpus ?: 1; /* Default to UP */ @@ -9,7 +9,6 @@ */ #include "qemu/osdep.h" -#include <sys/mman.h> #include "cpu.h" #include "hw/pci/pci.h" diff --git a/xen-mapcache.c b/xen-mapcache.c index 49f394a777..8f3a592013 100644 --- a/xen-mapcache.c +++ b/xen-mapcache.c @@ -17,7 +17,6 @@ #include "qemu/bitmap.h" #include <xen/hvm/params.h> -#include <sys/mman.h> #include "sysemu/xen-mapcache.h" #include "trace.h" |