diff options
149 files changed, 4223 insertions, 2154 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 7b7543a54e..839f7ca063 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1489,7 +1489,7 @@ F: tests/vmstate-static-checker-data/ F: docs/migration.txt Seccomp -M: Eduardo Otubo <eduardo.otubo@profitbricks.com> +M: Eduardo Otubo <otubo@redhat.com> S: Supported F: qemu-seccomp.c F: include/sysemu/seccomp.h diff --git a/accel/accel.c b/accel/accel.c index 7c079a5611..fa8584488e 100644 --- a/accel/accel.c +++ b/accel/accel.c @@ -120,6 +120,12 @@ void configure_accelerator(MachineState *ms) } } +void accel_register_compat_props(AccelState *accel) +{ + AccelClass *class = ACCEL_GET_CLASS(accel); + register_compat_props_array(class->global_props); +} + static void register_accel_types(void) { type_register_static(&accel_type); diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 743776ae19..1900936038 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -746,41 +746,6 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) return ram_addr; } -/* NOTE: this function can trigger an exception */ -/* NOTE2: the returned address is not exactly the physical address: it - * is actually a ram_addr_t (in system mode; the user mode emulation - * version of this function returns a guest virtual address). - */ -tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) -{ - int mmu_idx, page_index, pd; - void *p; - MemoryRegion *mr; - CPUState *cpu = ENV_GET_CPU(env1); - CPUIOTLBEntry *iotlbentry; - - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - mmu_idx = cpu_mmu_index(env1, true); - if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code != - (addr & TARGET_PAGE_MASK))) { - cpu_ldub_code(env1, addr); - } - iotlbentry = &env1->iotlb[mmu_idx][page_index]; - pd = iotlbentry->addr & ~TARGET_PAGE_MASK; - mr = iotlb_to_region(cpu, pd, iotlbentry->attrs); - if (memory_region_is_unassigned(mr)) { - cpu_unassigned_access(cpu, addr, false, true, 0, 4); - /* The CPU's unassigned access hook might have longjumped out - * with an exception. If it didn't (or there was no hook) then - * we can't proceed further. - */ - report_bad_exec(cpu, addr); - exit(1); - } - p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend); - return qemu_ram_addr_from_host_nofail(p); -} - static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, target_ulong addr, uintptr_t retaddr, int size) { @@ -868,6 +833,53 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ (ADDR) & TARGET_PAGE_MASK) +/* NOTE: this function can trigger an exception */ +/* NOTE2: the returned address is not exactly the physical address: it + * is actually a ram_addr_t (in system mode; the user mode emulation + * version of this function returns a guest virtual address). + */ +tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) +{ + int mmu_idx, index, pd; + void *p; + MemoryRegion *mr; + CPUState *cpu = ENV_GET_CPU(env); + CPUIOTLBEntry *iotlbentry; + + index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + mmu_idx = cpu_mmu_index(env, true); + if (unlikely(env->tlb_table[mmu_idx][index].addr_code != + (addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK)))) { + if (!VICTIM_TLB_HIT(addr_read, addr)) { + tlb_fill(ENV_GET_CPU(env), addr, MMU_INST_FETCH, mmu_idx, 0); + } + } + iotlbentry = &env->iotlb[mmu_idx][index]; + pd = iotlbentry->addr & ~TARGET_PAGE_MASK; + mr = iotlb_to_region(cpu, pd, iotlbentry->attrs); + if (memory_region_is_unassigned(mr)) { + qemu_mutex_lock_iothread(); + if (memory_region_request_mmio_ptr(mr, addr)) { + qemu_mutex_unlock_iothread(); + /* A MemoryRegion is potentially added so re-run the + * get_page_addr_code. + */ + return get_page_addr_code(env, addr); + } + qemu_mutex_unlock_iothread(); + + cpu_unassigned_access(cpu, addr, false, true, 0, 4); + /* The CPU's unassigned access hook might have longjumped out + * with an exception. If it didn't (or there was no hook) then + * we can't proceed further. + */ + report_bad_exec(cpu, addr); + exit(1); + } + p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend); + return qemu_ram_addr_from_host_nofail(p); +} + /* Probe for whether the specified guest write access is permitted. * If it is not permitted then an exception will be taken in the same * way as if this were a real write access (and we will not return). diff --git a/block/Makefile.objs b/block/Makefile.objs index ea955302c8..f9368b52b8 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -1,6 +1,6 @@ block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o -block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o +block-obj-y += qed.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o block-obj-y += quorum.o diff --git a/block/blkdebug.c b/block/blkdebug.c index 0618fc71c6..a1b24b9b0d 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -575,7 +575,7 @@ static int blkdebug_co_flush(BlockDriverState *bs) } static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int count, + int64_t offset, int bytes, BdrvRequestFlags flags) { uint32_t align = MAX(bs->bl.request_alignment, @@ -586,29 +586,29 @@ static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs, * preferred alignment (so that we test the fallback to writes on * unaligned portions), and check that the block layer never hands * us anything unaligned that crosses an alignment boundary. */ - if (count < align) { + if (bytes < align) { assert(QEMU_IS_ALIGNED(offset, align) || - QEMU_IS_ALIGNED(offset + count, align) || + QEMU_IS_ALIGNED(offset + bytes, align) || DIV_ROUND_UP(offset, align) == - DIV_ROUND_UP(offset + count, align)); + DIV_ROUND_UP(offset + bytes, align)); return -ENOTSUP; } assert(QEMU_IS_ALIGNED(offset, align)); - assert(QEMU_IS_ALIGNED(count, align)); + assert(QEMU_IS_ALIGNED(bytes, align)); if (bs->bl.max_pwrite_zeroes) { - assert(count <= bs->bl.max_pwrite_zeroes); + assert(bytes <= bs->bl.max_pwrite_zeroes); } - err = rule_check(bs, offset, count); + err = rule_check(bs, offset, bytes); if (err) { return err; } - return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags); + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); } static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, - int64_t offset, int count) + int64_t offset, int bytes) { uint32_t align = bs->bl.pdiscard_alignment; int err; @@ -616,29 +616,29 @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, /* Only pass through requests that are larger than requested * minimum alignment, and ensure that unaligned requests do not * cross optimum discard boundaries. */ - if (count < bs->bl.request_alignment) { + if (bytes < bs->bl.request_alignment) { assert(QEMU_IS_ALIGNED(offset, align) || - QEMU_IS_ALIGNED(offset + count, align) || + QEMU_IS_ALIGNED(offset + bytes, align) || DIV_ROUND_UP(offset, align) == - DIV_ROUND_UP(offset + count, align)); + DIV_ROUND_UP(offset + bytes, align)); return -ENOTSUP; } assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); - assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment)); - if (align && count >= align) { + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + if (align && bytes >= align) { assert(QEMU_IS_ALIGNED(offset, align)); - assert(QEMU_IS_ALIGNED(count, align)); + assert(QEMU_IS_ALIGNED(bytes, align)); } if (bs->bl.max_pdiscard) { - assert(count <= bs->bl.max_pdiscard); + assert(bytes <= bs->bl.max_pdiscard); } - err = rule_check(bs, offset, count); + err = rule_check(bs, offset, bytes); if (err) { return err; } - return bdrv_co_pdiscard(bs->file->bs, offset, count); + return bdrv_co_pdiscard(bs->file->bs, offset, bytes); } static void blkdebug_close(BlockDriverState *bs) @@ -839,9 +839,13 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options) } if (!force_json && bs->file->bs->exact_filename[0]) { - snprintf(bs->exact_filename, sizeof(bs->exact_filename), - "blkdebug:%s:%s", s->config_file ?: "", - bs->file->bs->exact_filename); + int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "blkdebug:%s:%s", s->config_file ?: "", + bs->file->bs->exact_filename); + if (ret >= sizeof(bs->exact_filename)) { + /* An overflow makes the filename unusable, so do not report any */ + bs->exact_filename[0] = 0; + } } opts = qdict_new(); diff --git a/block/blkreplay.c b/block/blkreplay.c index 6aa5fd4156..61e44a1949 100755 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -96,10 +96,10 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs, } static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int count, BdrvRequestFlags flags) + int64_t offset, int bytes, BdrvRequestFlags flags) { uint64_t reqid = blkreplay_next_id(); - int ret = bdrv_co_pwrite_zeroes(bs->file, offset, count, flags); + int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); block_request_create(reqid, bs, qemu_coroutine_self()); qemu_coroutine_yield(); @@ -107,10 +107,10 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs, } static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs, - int64_t offset, int count) + int64_t offset, int bytes) { uint64_t reqid = blkreplay_next_id(); - int ret = bdrv_co_pdiscard(bs->file->bs, offset, count); + int ret = bdrv_co_pdiscard(bs->file->bs, offset, bytes); block_request_create(reqid, bs, qemu_coroutine_self()); qemu_coroutine_yield(); diff --git a/block/blkverify.c b/block/blkverify.c index 6b0a603cf0..06369f9eac 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -301,10 +301,14 @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options) if (bs->file->bs->exact_filename[0] && s->test_file->bs->exact_filename[0]) { - snprintf(bs->exact_filename, sizeof(bs->exact_filename), - "blkverify:%s:%s", - bs->file->bs->exact_filename, - s->test_file->bs->exact_filename); + int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "blkverify:%s:%s", + bs->file->bs->exact_filename, + s->test_file->bs->exact_filename); + if (ret >= sizeof(bs->exact_filename)) { + /* An overflow makes the filename unusable, so do not report any */ + bs->exact_filename[0] = 0; + } } } diff --git a/block/block-backend.c b/block/block-backend.c index a2bbae90b1..0df3457a09 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1099,9 +1099,9 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, } int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags) + int bytes, BdrvRequestFlags flags) { - return blk_prw(blk, offset, NULL, count, blk_write_entry, + return blk_prw(blk, offset, NULL, bytes, blk_write_entry, flags | BDRV_REQ_ZERO_WRITE); } @@ -1311,10 +1311,10 @@ static void blk_aio_pdiscard_entry(void *opaque) } BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, - int64_t offset, int count, + int64_t offset, int bytes, BlockCompletionFunc *cb, void *opaque) { - return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0, + return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, cb, opaque); } @@ -1374,14 +1374,14 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); } -int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count) +int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) { - int ret = blk_check_byte_request(blk, offset, count); + int ret = blk_check_byte_request(blk, offset, bytes); if (ret < 0) { return ret; } - return bdrv_co_pdiscard(blk_bs(blk), offset, count); + return bdrv_co_pdiscard(blk_bs(blk), offset, bytes); } int blk_co_flush(BlockBackend *blk) @@ -1760,9 +1760,9 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, } int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags) + int bytes, BdrvRequestFlags flags) { - return blk_co_pwritev(blk, offset, count, NULL, + return blk_co_pwritev(blk, offset, bytes, NULL, flags | BDRV_REQ_ZERO_WRITE); } @@ -1789,9 +1789,9 @@ static void blk_pdiscard_entry(void *opaque) rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); } -int blk_pdiscard(BlockBackend *blk, int64_t offset, int count) +int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) { - return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0); + return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); } int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, diff --git a/block/commit.c b/block/commit.c index af6fa68cf3..8c09c3dbcd 100644 --- a/block/commit.c +++ b/block/commit.c @@ -119,6 +119,13 @@ static void commit_complete(BlockJob *job, void *opaque) } g_free(s->backing_file_str); blk_unref(s->top); + + /* If there is more than one reference to the job (e.g. if called from + * block_job_finish_sync()), block_job_completed() won't free it and + * therefore the blockers on the intermediate nodes remain. This would + * cause bdrv_set_backing_hd() to fail. */ + block_job_remove_all_bdrv(job); + block_job_completed(&s->common, ret); g_free(data); diff --git a/block/file-posix.c b/block/file-posix.c index de2d3a2e3c..3927fabf06 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1485,7 +1485,7 @@ static int aio_worker(void *arg) static int paio_submit_co(BlockDriverState *bs, int fd, int64_t offset, QEMUIOVector *qiov, - int count, int type) + int bytes, int type) { RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); ThreadPool *pool; @@ -1494,22 +1494,22 @@ static int paio_submit_co(BlockDriverState *bs, int fd, acb->aio_type = type; acb->aio_fildes = fd; - acb->aio_nbytes = count; + acb->aio_nbytes = bytes; acb->aio_offset = offset; if (qiov) { acb->aio_iov = qiov->iov; acb->aio_niov = qiov->niov; - assert(qiov->size == count); + assert(qiov->size == bytes); } - trace_paio_submit_co(offset, count, type); + trace_paio_submit_co(offset, bytes, type); pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); return thread_pool_submit_co(pool, aio_worker, acb); } static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, - int64_t offset, QEMUIOVector *qiov, int count, + int64_t offset, QEMUIOVector *qiov, int bytes, BlockCompletionFunc *cb, void *opaque, int type) { RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); @@ -1519,7 +1519,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, acb->aio_type = type; acb->aio_fildes = fd; - acb->aio_nbytes = count; + acb->aio_nbytes = bytes; acb->aio_offset = offset; if (qiov) { @@ -1528,7 +1528,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, assert(qiov->size == acb->aio_nbytes); } - trace_paio_submit(acb, opaque, offset, count, type); + trace_paio_submit(acb, opaque, offset, bytes, type); pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); } @@ -2109,26 +2109,26 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, } static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, - int64_t offset, int count, + int64_t offset, int bytes, BlockCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - return paio_submit(bs, s->fd, offset, NULL, count, + return paio_submit(bs, s->fd, offset, NULL, bytes, cb, opaque, QEMU_AIO_DISCARD); } static int coroutine_fn raw_co_pwrite_zeroes( BlockDriverState *bs, int64_t offset, - int count, BdrvRequestFlags flags) + int bytes, BdrvRequestFlags flags) { BDRVRawState *s = bs->opaque; if (!(flags & BDRV_REQ_MAY_UNMAP)) { - return paio_submit_co(bs, s->fd, offset, NULL, count, + return paio_submit_co(bs, s->fd, offset, NULL, bytes, QEMU_AIO_WRITE_ZEROES); } else if (s->discard_zeroes) { - return paio_submit_co(bs, s->fd, offset, NULL, count, + return paio_submit_co(bs, s->fd, offset, NULL, bytes, QEMU_AIO_DISCARD); } return -ENOTSUP; @@ -2560,7 +2560,7 @@ static int fd_open(BlockDriverState *bs) } static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs, - int64_t offset, int count, + int64_t offset, int bytes, BlockCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; @@ -2568,12 +2568,12 @@ static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs, if (fd_open(bs) < 0) { return NULL; } - return paio_submit(bs, s->fd, offset, NULL, count, + return paio_submit(bs, s->fd, offset, NULL, bytes, cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); } static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int count, BdrvRequestFlags flags) + int64_t offset, int bytes, BdrvRequestFlags flags) { BDRVRawState *s = bs->opaque; int rc; @@ -2583,10 +2583,10 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, return rc; } if (!(flags & BDRV_REQ_MAY_UNMAP)) { - return paio_submit_co(bs, s->fd, offset, NULL, count, + return paio_submit_co(bs, s->fd, offset, NULL, bytes, QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV); } else if (s->discard_zeroes) { - return paio_submit_co(bs, s->fd, offset, NULL, count, + return paio_submit_co(bs, s->fd, offset, NULL, bytes, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); } return -ENOTSUP; diff --git a/block/io.c b/block/io.c index 91611ffb2a..9bba730a7e 100644 --- a/block/io.c +++ b/block/io.c @@ -34,16 +34,8 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ -static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child, - int64_t offset, - QEMUIOVector *qiov, - BdrvRequestFlags flags, - BlockCompletionFunc *cb, - void *opaque, - bool is_write); -static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int count, BdrvRequestFlags flags); + int64_t offset, int bytes, BdrvRequestFlags flags); void bdrv_parent_drained_begin(BlockDriverState *bs) { @@ -674,12 +666,12 @@ int bdrv_write(BdrvChild *child, int64_t sector_num, } int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, - int count, BdrvRequestFlags flags) + int bytes, BdrvRequestFlags flags) { QEMUIOVector qiov; struct iovec iov = { .iov_base = NULL, - .iov_len = count, + .iov_len = bytes, }; qemu_iovec_init_external(&qiov, &iov, 1); @@ -1220,7 +1212,7 @@ int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, #define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int count, BdrvRequestFlags flags) + int64_t offset, int bytes, BdrvRequestFlags flags) { BlockDriver *drv = bs->drv; QEMUIOVector qiov; @@ -1238,12 +1230,12 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, assert(alignment % bs->bl.request_alignment == 0); head = offset % alignment; - tail = (offset + count) % alignment; + tail = (offset + bytes) % alignment; max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment); assert(max_write_zeroes >= bs->bl.request_alignment); - while (count > 0 && !ret) { - int num = count; + while (bytes > 0 && !ret) { + int num = bytes; /* Align request. Block drivers can expect the "bulk" of the request * to be aligned, and that unaligned requests do not cross cluster @@ -1253,7 +1245,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, /* Make a small request up to the first aligned sector. For * convenience, limit this request to max_transfer even if * we don't need to fall back to writes. */ - num = MIN(MIN(count, max_transfer), alignment - head); + num = MIN(MIN(bytes, max_transfer), alignment - head); head = (head + num) % alignment; assert(num < max_write_zeroes); } else if (tail && num > alignment) { @@ -1314,7 +1306,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, } offset += num; - count -= num; + bytes -= num; } fail: @@ -1666,15 +1658,15 @@ int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num, } int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, - int count, BdrvRequestFlags flags) + int bytes, BdrvRequestFlags flags) { - trace_bdrv_co_pwrite_zeroes(child->bs, offset, count, flags); + trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags); if (!(child->bs->open_flags & BDRV_O_UNMAP)) { flags &= ~BDRV_REQ_MAY_UNMAP; } - return bdrv_co_pwritev(child, offset, count, NULL, + return bdrv_co_pwritev(child, offset, bytes, NULL, BDRV_REQ_ZERO_WRITE | flags); } @@ -1980,17 +1972,24 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, bool is_read) { BlockDriver *drv = bs->drv; + int ret = -ENOTSUP; + + bdrv_inc_in_flight(bs); if (!drv) { - return -ENOMEDIUM; + ret = -ENOMEDIUM; } else if (drv->bdrv_load_vmstate) { - return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos) - : drv->bdrv_save_vmstate(bs, qiov, pos); + if (is_read) { + ret = drv->bdrv_load_vmstate(bs, qiov, pos); + } else { + ret = drv->bdrv_save_vmstate(bs, qiov, pos); + } } else if (bs->file) { - return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read); + ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read); } - return -ENOTSUP; + bdrv_dec_in_flight(bs); + return ret; } static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque) @@ -2016,9 +2015,7 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data); bdrv_coroutine_enter(bs, co); - while (data.ret == -EINPROGRESS) { - aio_poll(bdrv_get_aio_context(bs), true); - } + BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS); return data.ret; } } @@ -2075,28 +2072,6 @@ int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) /**************************************************************/ /* async I/Os */ -BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - trace_bdrv_aio_readv(child->bs, sector_num, nb_sectors, opaque); - - assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size); - return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov, - 0, cb, opaque, false); -} - -BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - trace_bdrv_aio_writev(child->bs, sector_num, nb_sectors, opaque); - - assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size); - return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov, - 0, cb, opaque, true); -} - void bdrv_aio_cancel(BlockAIOCB *acb) { qemu_aio_ref(acb); @@ -2129,147 +2104,6 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb) } /**************************************************************/ -/* async block device emulation */ - -typedef struct BlockRequest { - union { - /* Used during read, write, trim */ - struct { - int64_t offset; - int bytes; - int flags; - QEMUIOVector *qiov; - }; - /* Used during ioctl */ - struct { - int req; - void *buf; - }; - }; - BlockCompletionFunc *cb; - void *opaque; - - int error; -} BlockRequest; - -typedef struct BlockAIOCBCoroutine { - BlockAIOCB common; - BdrvChild *child; - BlockRequest req; - bool is_write; - bool need_bh; - bool *done; -} BlockAIOCBCoroutine; - -static const AIOCBInfo bdrv_em_co_aiocb_info = { - .aiocb_size = sizeof(BlockAIOCBCoroutine), -}; - -static void bdrv_co_complete(BlockAIOCBCoroutine *acb) -{ - if (!acb->need_bh) { - bdrv_dec_in_flight(acb->common.bs); - acb->common.cb(acb->common.opaque, acb->req.error); - qemu_aio_unref(acb); - } -} - -static void bdrv_co_em_bh(void *opaque) -{ - BlockAIOCBCoroutine *acb = opaque; - - assert(!acb->need_bh); - bdrv_co_complete(acb); -} - -static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb) -{ - acb->need_bh = false; - if (acb->req.error != -EINPROGRESS) { - BlockDriverState *bs = acb->common.bs; - - aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); - } -} - -/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */ -static void coroutine_fn bdrv_co_do_rw(void *opaque) -{ - BlockAIOCBCoroutine *acb = opaque; - - if (!acb->is_write) { - acb->req.error = bdrv_co_preadv(acb->child, acb->req.offset, - acb->req.qiov->size, acb->req.qiov, acb->req.flags); - } else { - acb->req.error = bdrv_co_pwritev(acb->child, acb->req.offset, - acb->req.qiov->size, acb->req.qiov, acb->req.flags); - } - - bdrv_co_complete(acb); -} - -static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child, - int64_t offset, - QEMUIOVector *qiov, - BdrvRequestFlags flags, - BlockCompletionFunc *cb, - void *opaque, - bool is_write) -{ - Coroutine *co; - BlockAIOCBCoroutine *acb; - - /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ - bdrv_inc_in_flight(child->bs); - - acb = qemu_aio_get(&bdrv_em_co_aiocb_info, child->bs, cb, opaque); - acb->child = child; - acb->need_bh = true; - acb->req.error = -EINPROGRESS; - acb->req.offset = offset; - acb->req.qiov = qiov; - acb->req.flags = flags; - acb->is_write = is_write; - - co = qemu_coroutine_create(bdrv_co_do_rw, acb); - bdrv_coroutine_enter(child->bs, co); - - bdrv_co_maybe_schedule_bh(acb); - return &acb->common; -} - -static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) -{ - BlockAIOCBCoroutine *acb = opaque; - BlockDriverState *bs = acb->common.bs; - - acb->req.error = bdrv_co_flush(bs); - bdrv_co_complete(acb); -} - -BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque) -{ - trace_bdrv_aio_flush(bs, opaque); - - Coroutine *co; - BlockAIOCBCoroutine *acb; - - /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ - bdrv_inc_in_flight(bs); - - acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); - acb->need_bh = true; - acb->req.error = -EINPROGRESS; - - co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb); - bdrv_coroutine_enter(bs, co); - - bdrv_co_maybe_schedule_bh(acb); - return &acb->common; -} - -/**************************************************************/ /* Coroutine block device emulation */ typedef struct FlushCo { @@ -2414,18 +2248,18 @@ int bdrv_flush(BlockDriverState *bs) typedef struct DiscardCo { BlockDriverState *bs; int64_t offset; - int count; + int bytes; int ret; } DiscardCo; static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque) { DiscardCo *rwco = opaque; - rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->count); + rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->bytes); } int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, - int count) + int bytes) { BdrvTrackedRequest req; int max_pdiscard, ret; @@ -2435,7 +2269,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, return -ENOMEDIUM; } - ret = bdrv_check_byte_request(bs, offset, count); + ret = bdrv_check_byte_request(bs, offset, bytes); if (ret < 0) { return ret; } else if (bs->read_only) { @@ -2460,10 +2294,10 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment); assert(align % bs->bl.request_alignment == 0); head = offset % align; - tail = (offset + count) % align; + tail = (offset + bytes) % align; bdrv_inc_in_flight(bs); - tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD); + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); if (ret < 0) { @@ -2474,13 +2308,13 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, align); assert(max_pdiscard >= bs->bl.request_alignment); - while (count > 0) { + while (bytes > 0) { int ret; - int num = count; + int num = bytes; if (head) { /* Make small requests to get to alignment boundaries. */ - num = MIN(count, align - head); + num = MIN(bytes, align - head); if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) { num %= bs->bl.request_alignment; } @@ -2524,7 +2358,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, } offset += num; - count -= num; + bytes -= num; } ret = 0; out: @@ -2536,13 +2370,13 @@ out: return ret; } -int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count) +int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { Coroutine *co; DiscardCo rwco = { .bs = bs, .offset = offset, - .count = count, + .bytes = bytes, .ret = NOT_DONE, }; diff --git a/block/iscsi.c b/block/iscsi.c index b5f7a228b9..54067e2620 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1116,14 +1116,14 @@ iscsi_getlength(BlockDriverState *bs) } static int -coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) +coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; struct unmap_list list; int r = 0; - if (!is_byte_request_lun_aligned(offset, count, iscsilun)) { + if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) { return -ENOTSUP; } @@ -1133,7 +1133,7 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) } list.lba = offset / iscsilun->block_size; - list.num = count / iscsilun->block_size; + list.num = bytes / iscsilun->block_size; iscsi_co_init_iscsitask(iscsilun, &iTask); qemu_mutex_lock(&iscsilun->mutex); @@ -1174,7 +1174,7 @@ retry: } iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, - count >> BDRV_SECTOR_BITS); + bytes >> BDRV_SECTOR_BITS); out_unlock: qemu_mutex_unlock(&iscsilun->mutex); @@ -1183,7 +1183,7 @@ out_unlock: static int coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int count, BdrvRequestFlags flags) + int bytes, BdrvRequestFlags flags) { IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; @@ -1192,7 +1192,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, bool use_16_for_ws = iscsilun->use_16_for_rw; int r = 0; - if (!is_byte_request_lun_aligned(offset, count, iscsilun)) { + if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) { return -ENOTSUP; } @@ -1215,7 +1215,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, } lba = offset / iscsilun->block_size; - nb_blocks = count / iscsilun->block_size; + nb_blocks = bytes / iscsilun->block_size; if (iscsilun->zeroblock == NULL) { iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size); @@ -1273,17 +1273,17 @@ retry: if (iTask.status != SCSI_STATUS_GOOD) { iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, - count >> BDRV_SECTOR_BITS); + bytes >> BDRV_SECTOR_BITS); r = iTask.err_code; goto out_unlock; } if (flags & BDRV_REQ_MAY_UNMAP) { iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, - count >> BDRV_SECTOR_BITS); + bytes >> BDRV_SECTOR_BITS); } else { iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS, - count >> BDRV_SECTOR_BITS); + bytes >> BDRV_SECTOR_BITS); } out_unlock: diff --git a/block/mirror.c b/block/mirror.c index 19afcc6f1a..68744a17e8 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1063,15 +1063,15 @@ static int64_t coroutine_fn bdrv_mirror_top_get_block_status( } static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int count, BdrvRequestFlags flags) + int64_t offset, int bytes, BdrvRequestFlags flags) { - return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags); + return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags); } static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, - int64_t offset, int count) + int64_t offset, int bytes) { - return bdrv_co_pdiscard(bs->backing->bs, offset, count); + return bdrv_co_pdiscard(bs->backing->bs, offset, bytes); } static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts) diff --git a/block/nbd-client.c b/block/nbd-client.c index d64e775385..02e928142e 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -259,14 +259,14 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, } int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int count, BdrvRequestFlags flags) + int bytes, BdrvRequestFlags flags) { ssize_t ret; NBDClientSession *client = nbd_get_client_session(bs); NBDRequest request = { .type = NBD_CMD_WRITE_ZEROES, .from = offset, - .len = count, + .len = bytes, }; NBDReply reply; @@ -316,13 +316,13 @@ int nbd_client_co_flush(BlockDriverState *bs) return -reply.error; } -int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) +int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { NBDClientSession *client = nbd_get_client_session(bs); NBDRequest request = { .type = NBD_CMD_TRIM, .from = offset, - .len = count, + .len = bytes, }; NBDReply reply; ssize_t ret; diff --git a/block/nbd-client.h b/block/nbd-client.h index 891ba44a20..49636bc621 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -42,12 +42,12 @@ int nbd_client_init(BlockDriverState *bs, Error **errp); void nbd_client_close(BlockDriverState *bs); -int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count); +int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes); int nbd_client_co_flush(BlockDriverState *bs); int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int count, BdrvRequestFlags flags); + int bytes, BdrvRequestFlags flags); int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); diff --git a/block/nbd.c b/block/nbd.c index e946ea944d..d529305330 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -64,11 +64,11 @@ static int nbd_parse_uri(const char *filename, QDict *options) } /* transport */ - if (!strcmp(uri->scheme, "nbd")) { + if (!g_strcmp0(uri->scheme, "nbd")) { is_unix = false; - } else if (!strcmp(uri->scheme, "nbd+tcp")) { + } else if (!g_strcmp0(uri->scheme, "nbd+tcp")) { is_unix = false; - } else if (!strcmp(uri->scheme, "nbd+unix")) { + } else if (!g_strcmp0(uri->scheme, "nbd+unix")) { is_unix = true; } else { ret = -EINVAL; diff --git a/block/nfs.c b/block/nfs.c index 6b8b5b653d..c3c5de0113 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -82,7 +82,7 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp) error_setg(errp, "Invalid URI specified"); goto out; } - if (strcmp(uri->scheme, "nfs") != 0) { + if (g_strcmp0(uri->scheme, "nfs") != 0) { error_setg(errp, "URI scheme must be 'nfs'"); goto out; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index d779ea19cf..3d341fd9cb 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -403,30 +403,21 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, return 0; } -static int coroutine_fn do_perform_cow(BlockDriverState *bs, - uint64_t src_cluster_offset, - uint64_t cluster_offset, - int offset_in_cluster, - int bytes) +static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, + uint64_t src_cluster_offset, + unsigned offset_in_cluster, + QEMUIOVector *qiov) { - BDRVQcow2State *s = bs->opaque; - QEMUIOVector qiov; - struct iovec iov; int ret; - iov.iov_len = bytes; - iov.iov_base = qemu_try_blockalign(bs, iov.iov_len); - if (iov.iov_base == NULL) { - return -ENOMEM; + if (qiov->size == 0) { + return 0; } - qemu_iovec_init_external(&qiov, &iov, 1); - BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); if (!bs->drv) { - ret = -ENOMEDIUM; - goto out; + return -ENOMEDIUM; } /* Call .bdrv_co_readv() directly instead of using the public block-layer @@ -434,43 +425,60 @@ static int coroutine_fn do_perform_cow(BlockDriverState *bs, * which can lead to deadlock when block layer copy-on-read is enabled. */ ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster, - bytes, &qiov, 0); + qiov->size, qiov, 0); if (ret < 0) { - goto out; + return ret; } - if (bs->encrypted) { - Error *err = NULL; + return 0; +} + +static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, + uint64_t src_cluster_offset, + unsigned offset_in_cluster, + uint8_t *buffer, + unsigned bytes) +{ + if (bytes && bs->encrypted) { + BDRVQcow2State *s = bs->opaque; int64_t sector = (src_cluster_offset + offset_in_cluster) >> BDRV_SECTOR_BITS; assert(s->cipher); assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); assert((bytes & ~BDRV_SECTOR_MASK) == 0); - if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base, - bytes >> BDRV_SECTOR_BITS, true, &err) < 0) { - ret = -EIO; - error_free(err); - goto out; + if (qcow2_encrypt_sectors(s, sector, buffer, buffer, + bytes >> BDRV_SECTOR_BITS, true, NULL) < 0) { + return false; } } + return true; +} + +static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, + uint64_t cluster_offset, + unsigned offset_in_cluster, + QEMUIOVector *qiov) +{ + int ret; + + if (qiov->size == 0) { + return 0; + } ret = qcow2_pre_write_overlap_check(bs, 0, - cluster_offset + offset_in_cluster, bytes); + cluster_offset + offset_in_cluster, qiov->size); if (ret < 0) { - goto out; + return ret; } BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster, - bytes, &qiov, 0); + qiov->size, qiov, 0); if (ret < 0) { - goto out; + return ret; } - ret = 0; -out: - qemu_vfree(iov.iov_base); - return ret; + return 0; } @@ -548,7 +556,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, /* find the cluster offset for the given disk offset */ - l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); + l2_index = offset_to_l2_index(s, offset); *cluster_offset = be64_to_cpu(l2_table[l2_index]); nb_clusters = size_to_clusters(s, bytes_needed); @@ -685,7 +693,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, /* find the cluster offset for the given disk offset */ - l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); + l2_index = offset_to_l2_index(s, offset); *new_l2_table = l2_table; *new_l2_index = l2_index; @@ -753,31 +761,133 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return cluster_offset; } -static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) +static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) { BDRVQcow2State *s = bs->opaque; + Qcow2COWRegion *start = &m->cow_start; + Qcow2COWRegion *end = &m->cow_end; + unsigned buffer_size; + unsigned data_bytes = end->offset - (start->offset + start->nb_bytes); + bool merge_reads; + uint8_t *start_buffer, *end_buffer; + QEMUIOVector qiov; int ret; - if (r->nb_bytes == 0) { + assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); + assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes); + assert(start->offset + start->nb_bytes <= end->offset); + assert(!m->data_qiov || m->data_qiov->size == data_bytes); + + if (start->nb_bytes == 0 && end->nb_bytes == 0) { return 0; } + /* If we have to read both the start and end COW regions and the + * middle region is not too large then perform just one read + * operation */ + merge_reads = start->nb_bytes && end->nb_bytes && data_bytes <= 16384; + if (merge_reads) { + buffer_size = start->nb_bytes + data_bytes + end->nb_bytes; + } else { + /* If we have to do two reads, add some padding in the middle + * if necessary to make sure that the end region is optimally + * aligned. */ + size_t align = bdrv_opt_mem_align(bs); + assert(align > 0 && align <= UINT_MAX); + assert(QEMU_ALIGN_UP(start->nb_bytes, align) <= + UINT_MAX - end->nb_bytes); + buffer_size = QEMU_ALIGN_UP(start->nb_bytes, align) + end->nb_bytes; + } + + /* Reserve a buffer large enough to store all the data that we're + * going to read */ + start_buffer = qemu_try_blockalign(bs, buffer_size); + if (start_buffer == NULL) { + return -ENOMEM; + } + /* The part of the buffer where the end region is located */ + end_buffer = start_buffer + buffer_size - end->nb_bytes; + + qemu_iovec_init(&qiov, 2 + (m->data_qiov ? m->data_qiov->niov : 0)); + qemu_co_mutex_unlock(&s->lock); - ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes); - qemu_co_mutex_lock(&s->lock); + /* First we read the existing data from both COW regions. We + * either read the whole region in one go, or the start and end + * regions separately. */ + if (merge_reads) { + qemu_iovec_add(&qiov, start_buffer, buffer_size); + ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); + } else { + qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); + ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); + if (ret < 0) { + goto fail; + } + qemu_iovec_reset(&qiov); + qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); + ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov); + } if (ret < 0) { - return ret; + goto fail; } + /* Encrypt the data if necessary before writing it */ + if (bs->encrypted) { + if (!do_perform_cow_encrypt(bs, m->offset, start->offset, + start_buffer, start->nb_bytes) || + !do_perform_cow_encrypt(bs, m->offset, end->offset, + end_buffer, end->nb_bytes)) { + ret = -EIO; + goto fail; + } + } + + /* And now we can write everything. If we have the guest data we + * can write everything in one single operation */ + if (m->data_qiov) { + qemu_iovec_reset(&qiov); + if (start->nb_bytes) { + qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); + } + qemu_iovec_concat(&qiov, m->data_qiov, 0, data_bytes); + if (end->nb_bytes) { + qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); + } + /* NOTE: we have a write_aio blkdebug event here followed by + * a cow_write one in do_perform_cow_write(), but there's only + * one single I/O operation */ + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); + ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); + } else { + /* If there's no guest data then write both COW regions separately */ + qemu_iovec_reset(&qiov); + qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); + ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); + if (ret < 0) { + goto fail; + } + + qemu_iovec_reset(&qiov); + qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); + ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov); + } + +fail: + qemu_co_mutex_lock(&s->lock); + /* * Before we update the L2 table to actually point to the new cluster, we * need to be sure that the refcounts have been increased and COW was * handled. */ - qcow2_cache_depends_on_flush(s->l2_table_cache); + if (ret == 0) { + qcow2_cache_depends_on_flush(s->l2_table_cache); + } - return 0; + qemu_vfree(start_buffer); + qemu_iovec_destroy(&qiov); + return ret; } int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) @@ -797,12 +907,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) } /* copy content of unmodified sectors */ - ret = perform_cow(bs, m, &m->cow_start); - if (ret < 0) { - goto err; - } - - ret = perform_cow(bs, m, &m->cow_end); + ret = perform_cow(bs, m); if (ret < 0) { goto err; } diff --git a/block/qcow2.c b/block/qcow2.c index b3ba5daa93..2f94f0326e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -356,7 +356,7 @@ static int validate_table_offset(BlockDriverState *bs, uint64_t offset, } /* Tables must be cluster aligned */ - if (offset & (s->cluster_size - 1)) { + if (offset_into_cluster(s, offset) != 0) { return -EINVAL; } @@ -1575,6 +1575,44 @@ fail: return ret; } +/* Check if it's possible to merge a write request with the writing of + * the data from the COW regions */ +static bool merge_cow(uint64_t offset, unsigned bytes, + QEMUIOVector *hd_qiov, QCowL2Meta *l2meta) +{ + QCowL2Meta *m; + + for (m = l2meta; m != NULL; m = m->next) { + /* If both COW regions are empty then there's nothing to merge */ + if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) { + continue; + } + + /* The data (middle) region must be immediately after the + * start region */ + if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) { + continue; + } + + /* The end region must be immediately after the data (middle) + * region */ + if (m->offset + m->cow_end.offset != offset + bytes) { + continue; + } + + /* Make sure that adding both COW regions to the QEMUIOVector + * does not exceed IOV_MAX */ + if (hd_qiov->niov > IOV_MAX - 2) { + continue; + } + + m->data_qiov = hd_qiov; + return true; + } + + return false; +} + static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) @@ -1657,16 +1695,22 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, goto fail; } - qemu_co_mutex_unlock(&s->lock); - BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - trace_qcow2_writev_data(qemu_coroutine_self(), - cluster_offset + offset_in_cluster); - ret = bdrv_co_pwritev(bs->file, - cluster_offset + offset_in_cluster, - cur_bytes, &hd_qiov, 0); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) { - goto fail; + /* If we need to do COW, check if it's possible to merge the + * writing of the guest data together with that of the COW regions. + * If it's not possible (or not necessary) then write the + * guest data now. */ + if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) { + qemu_co_mutex_unlock(&s->lock); + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); + trace_qcow2_writev_data(qemu_coroutine_self(), + cluster_offset + offset_in_cluster); + ret = bdrv_co_pwritev(bs->file, + cluster_offset + offset_in_cluster, + cur_bytes, &hd_qiov, 0); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto fail; + } } while (l2meta != NULL) { @@ -2464,16 +2508,16 @@ static bool is_zero_sectors(BlockDriverState *bs, int64_t start, } static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int count, BdrvRequestFlags flags) + int64_t offset, int bytes, BdrvRequestFlags flags) { int ret; BDRVQcow2State *s = bs->opaque; uint32_t head = offset % s->cluster_size; - uint32_t tail = (offset + count) % s->cluster_size; + uint32_t tail = (offset + bytes) % s->cluster_size; - trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count); - if (offset + count == bs->total_sectors * BDRV_SECTOR_SIZE) { + trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes); + if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) { tail = 0; } @@ -2482,12 +2526,12 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, uint64_t off; unsigned int nr; - assert(head + count <= s->cluster_size); + assert(head + bytes <= s->cluster_size); /* check whether remainder of cluster already reads as zero */ if (!(is_zero_sectors(bs, cl_start, DIV_ROUND_UP(head, BDRV_SECTOR_SIZE)) && - is_zero_sectors(bs, (offset + count) >> BDRV_SECTOR_BITS, + is_zero_sectors(bs, (offset + bytes) >> BDRV_SECTOR_BITS, DIV_ROUND_UP(-tail & (s->cluster_size - 1), BDRV_SECTOR_SIZE)))) { return -ENOTSUP; @@ -2496,7 +2540,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, qemu_co_mutex_lock(&s->lock); /* We can have new write after previous check */ offset = cl_start << BDRV_SECTOR_BITS; - count = s->cluster_size; + bytes = s->cluster_size; nr = s->cluster_size; ret = qcow2_get_cluster_offset(bs, offset, &nr, &off); if (ret != QCOW2_CLUSTER_UNALLOCATED && @@ -2509,33 +2553,33 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, qemu_co_mutex_lock(&s->lock); } - trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count); + trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes); /* Whatever is left can use real zero clusters */ - ret = qcow2_cluster_zeroize(bs, offset, count, flags); + ret = qcow2_cluster_zeroize(bs, offset, bytes, flags); qemu_co_mutex_unlock(&s->lock); return ret; } static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, - int64_t offset, int count) + int64_t offset, int bytes) { int ret; BDRVQcow2State *s = bs->opaque; - if (!QEMU_IS_ALIGNED(offset | count, s->cluster_size)) { - assert(count < s->cluster_size); + if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) { + assert(bytes < s->cluster_size); /* Ignore partial clusters, except for the special case of the * complete partial cluster at the end of an unaligned file */ if (!QEMU_IS_ALIGNED(offset, s->cluster_size) || - offset + count != bs->total_sectors * BDRV_SECTOR_SIZE) { + offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) { return -ENOTSUP; } } qemu_co_mutex_lock(&s->lock); - ret = qcow2_cluster_discard(bs, offset, count, QCOW2_DISCARD_REQUEST, + ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST, false); qemu_co_mutex_unlock(&s->lock); return ret; diff --git a/block/qcow2.h b/block/qcow2.h index 1801dc30dc..87b15eb4aa 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -301,10 +301,10 @@ typedef struct Qcow2COWRegion { * Offset of the COW region in bytes from the start of the first cluster * touched by the request. */ - uint64_t offset; + unsigned offset; /** Number of bytes to copy */ - int nb_bytes; + unsigned nb_bytes; } Qcow2COWRegion; /** @@ -343,6 +343,13 @@ typedef struct QCowL2Meta */ Qcow2COWRegion cow_end; + /** + * The I/O vector with the data from the actual guest write request. + * If non-NULL, this is meant to be merged together with the data + * from @cow_start and @cow_end into one single write operation. + */ + QEMUIOVector *data_qiov; + /** Pointer to next L2Meta of the same write request */ struct QCowL2Meta *next; diff --git a/block/qed-cluster.c b/block/qed-cluster.c index 8f5da74c4d..d8d6e66a0f 100644 --- a/block/qed-cluster.c +++ b/block/qed-cluster.c @@ -61,108 +61,82 @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s, return i - index; } -typedef struct { - BDRVQEDState *s; - uint64_t pos; - size_t len; - - QEDRequest *request; - - /* User callback */ - QEDFindClusterFunc *cb; - void *opaque; -} QEDFindClusterCB; - -static void qed_find_cluster_cb(void *opaque, int ret) -{ - QEDFindClusterCB *find_cluster_cb = opaque; - BDRVQEDState *s = find_cluster_cb->s; - QEDRequest *request = find_cluster_cb->request; - uint64_t offset = 0; - size_t len = 0; - unsigned int index; - unsigned int n; - - qed_acquire(s); - if (ret) { - goto out; - } - - index = qed_l2_index(s, find_cluster_cb->pos); - n = qed_bytes_to_clusters(s, - qed_offset_into_cluster(s, find_cluster_cb->pos) + - find_cluster_cb->len); - n = qed_count_contiguous_clusters(s, request->l2_table->table, - index, n, &offset); - - if (qed_offset_is_unalloc_cluster(offset)) { - ret = QED_CLUSTER_L2; - } else if (qed_offset_is_zero_cluster(offset)) { - ret = QED_CLUSTER_ZERO; - } else if (qed_check_cluster_offset(s, offset)) { - ret = QED_CLUSTER_FOUND; - } else { - ret = -EINVAL; - } - - len = MIN(find_cluster_cb->len, n * s->header.cluster_size - - qed_offset_into_cluster(s, find_cluster_cb->pos)); - -out: - find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len); - qed_release(s); - g_free(find_cluster_cb); -} - /** * Find the offset of a data cluster * * @s: QED state * @request: L2 cache entry * @pos: Byte position in device - * @len: Number of bytes - * @cb: Completion function - * @opaque: User data for completion function + * @len: Number of bytes (may be shortened on return) + * @img_offset: Contains offset in the image file on success * * This function translates a position in the block device to an offset in the - * image file. It invokes the cb completion callback to report back the - * translated offset or unallocated range in the image file. + * image file. The translated offset or unallocated range in the image file is + * reported back in *img_offset and *len. * * If the L2 table exists, request->l2_table points to the L2 table cache entry * and the caller must free the reference when they are finished. The cache * entry is exposed in this way to avoid callers having to read the L2 table * again later during request processing. If request->l2_table is non-NULL it * will be unreferenced before taking on the new cache entry. + * + * On success QED_CLUSTER_FOUND is returned and img_offset/len are a contiguous + * range in the image file. + * + * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1 + * table offset, respectively. len is number of contiguous unallocated bytes. */ -void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, - size_t len, QEDFindClusterFunc *cb, void *opaque) +int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request, + uint64_t pos, size_t *len, + uint64_t *img_offset) { - QEDFindClusterCB *find_cluster_cb; uint64_t l2_offset; + uint64_t offset = 0; + unsigned int index; + unsigned int n; + int ret; /* Limit length to L2 boundary. Requests are broken up at the L2 boundary * so that a request acts on one L2 table at a time. */ - len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos); + *len = MIN(*len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos); l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)]; if (qed_offset_is_unalloc_cluster(l2_offset)) { - cb(opaque, QED_CLUSTER_L1, 0, len); - return; + *img_offset = 0; + return QED_CLUSTER_L1; } if (!qed_check_table_offset(s, l2_offset)) { - cb(opaque, -EINVAL, 0, 0); - return; + *img_offset = *len = 0; + return -EINVAL; + } + + ret = qed_read_l2_table(s, request, l2_offset); + qed_acquire(s); + if (ret) { + goto out; + } + + index = qed_l2_index(s, pos); + n = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, pos) + *len); + n = qed_count_contiguous_clusters(s, request->l2_table->table, + index, n, &offset); + + if (qed_offset_is_unalloc_cluster(offset)) { + ret = QED_CLUSTER_L2; + } else if (qed_offset_is_zero_cluster(offset)) { + ret = QED_CLUSTER_ZERO; + } else if (qed_check_cluster_offset(s, offset)) { + ret = QED_CLUSTER_FOUND; + } else { + ret = -EINVAL; } - find_cluster_cb = g_malloc(sizeof(*find_cluster_cb)); - find_cluster_cb->s = s; - find_cluster_cb->pos = pos; - find_cluster_cb->len = len; - find_cluster_cb->cb = cb; - find_cluster_cb->opaque = opaque; - find_cluster_cb->request = request; + *len = MIN(*len, + n * s->header.cluster_size - qed_offset_into_cluster(s, pos)); - qed_read_l2_table(s, request, l2_offset, - qed_find_cluster_cb, find_cluster_cb); +out: + *img_offset = offset; + qed_release(s); + return ret; } diff --git a/block/qed-gencb.c b/block/qed-gencb.c deleted file mode 100644 index faf8ecc840..0000000000 --- a/block/qed-gencb.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * QEMU Enhanced Disk Format - * - * Copyright IBM, Corp. 2010 - * - * Authors: - * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> - * - * This work is licensed under the terms of the GNU LGPL, version 2 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "qed.h" - -void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque) -{ - GenericCB *gencb = g_malloc(len); - gencb->cb = cb; - gencb->opaque = opaque; - return gencb; -} - -void gencb_complete(void *opaque, int ret) -{ - GenericCB *gencb = opaque; - BlockCompletionFunc *cb = gencb->cb; - void *user_opaque = gencb->opaque; - - g_free(gencb); - cb(user_opaque, ret); -} diff --git a/block/qed-table.c b/block/qed-table.c index b12c298a8a..ebee2c50f0 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -18,99 +18,38 @@ #include "qed.h" #include "qemu/bswap.h" -typedef struct { - GenericCB gencb; - BDRVQEDState *s; - QEDTable *table; - - struct iovec iov; +static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table) +{ QEMUIOVector qiov; -} QEDReadTableCB; + int noffsets; + int i, ret; -static void qed_read_table_cb(void *opaque, int ret) -{ - QEDReadTableCB *read_table_cb = opaque; - QEDTable *table = read_table_cb->table; - BDRVQEDState *s = read_table_cb->s; - int noffsets = read_table_cb->qiov.size / sizeof(uint64_t); - int i; + struct iovec iov = { + .iov_base = table->offsets, + .iov_len = s->header.cluster_size * s->header.table_size, + }; + qemu_iovec_init_external(&qiov, &iov, 1); - /* Handle I/O error */ - if (ret) { + trace_qed_read_table(s, offset, table); + + ret = bdrv_preadv(s->bs->file, offset, &qiov); + if (ret < 0) { goto out; } /* Byteswap offsets */ qed_acquire(s); + noffsets = qiov.size / sizeof(uint64_t); for (i = 0; i < noffsets; i++) { table->offsets[i] = le64_to_cpu(table->offsets[i]); } qed_release(s); + ret = 0; out: /* Completion */ - trace_qed_read_table_cb(s, read_table_cb->table, ret); - gencb_complete(&read_table_cb->gencb, ret); -} - -static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, - BlockCompletionFunc *cb, void *opaque) -{ - QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb), - cb, opaque); - QEMUIOVector *qiov = &read_table_cb->qiov; - - trace_qed_read_table(s, offset, table); - - read_table_cb->s = s; - read_table_cb->table = table; - read_table_cb->iov.iov_base = table->offsets, - read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size, - - qemu_iovec_init_external(qiov, &read_table_cb->iov, 1); - bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov, - qiov->size / BDRV_SECTOR_SIZE, - qed_read_table_cb, read_table_cb); -} - -typedef struct { - GenericCB gencb; - BDRVQEDState *s; - QEDTable *orig_table; - QEDTable *table; - bool flush; /* flush after write? */ - - struct iovec iov; - QEMUIOVector qiov; -} QEDWriteTableCB; - -static void qed_write_table_cb(void *opaque, int ret) -{ - QEDWriteTableCB *write_table_cb = opaque; - BDRVQEDState *s = write_table_cb->s; - - trace_qed_write_table_cb(s, - write_table_cb->orig_table, - write_table_cb->flush, - ret); - - if (ret) { - goto out; - } - - if (write_table_cb->flush) { - /* We still need to flush first */ - write_table_cb->flush = false; - qed_acquire(s); - bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb, - write_table_cb); - qed_release(s); - return; - } - -out: - qemu_vfree(write_table_cb->table); - gencb_complete(&write_table_cb->gencb, ret); + trace_qed_read_table_cb(s, table, ret); + return ret; } /** @@ -122,17 +61,17 @@ out: * @index: Index of first element * @n: Number of elements * @flush: Whether or not to sync to disk - * @cb: Completion function - * @opaque: Argument for completion function */ -static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, - unsigned int index, unsigned int n, bool flush, - BlockCompletionFunc *cb, void *opaque) +static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, + unsigned int index, unsigned int n, bool flush) { - QEDWriteTableCB *write_table_cb; unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1; unsigned int start, end, i; + QEDTable *new_table; + struct iovec iov; + QEMUIOVector qiov; size_t len_bytes; + int ret; trace_qed_write_table(s, offset, table, index, n); @@ -142,157 +81,115 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, len_bytes = (end - start) * sizeof(uint64_t); - write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque); - write_table_cb->s = s; - write_table_cb->orig_table = table; - write_table_cb->flush = flush; - write_table_cb->table = qemu_blockalign(s->bs, len_bytes); - write_table_cb->iov.iov_base = write_table_cb->table->offsets; - write_table_cb->iov.iov_len = len_bytes; - qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1); + new_table = qemu_blockalign(s->bs, len_bytes); + iov = (struct iovec) { + .iov_base = new_table->offsets, + .iov_len = len_bytes, + }; + qemu_iovec_init_external(&qiov, &iov, 1); /* Byteswap table */ for (i = start; i < end; i++) { uint64_t le_offset = cpu_to_le64(table->offsets[i]); - write_table_cb->table->offsets[i - start] = le_offset; + new_table->offsets[i - start] = le_offset; } /* Adjust for offset into table */ offset += start * sizeof(uint64_t); - bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, - &write_table_cb->qiov, - write_table_cb->qiov.size / BDRV_SECTOR_SIZE, - qed_write_table_cb, write_table_cb); -} + ret = bdrv_pwritev(s->bs->file, offset, &qiov); + trace_qed_write_table_cb(s, table, flush, ret); + if (ret < 0) { + goto out; + } -/** - * Propagate return value from async callback - */ -static void qed_sync_cb(void *opaque, int ret) -{ - *(int *)opaque = ret; + if (flush) { + qed_acquire(s); + ret = bdrv_flush(s->bs); + qed_release(s); + if (ret < 0) { + goto out; + } + } + + ret = 0; +out: + qemu_vfree(new_table); + return ret; } int qed_read_l1_table_sync(BDRVQEDState *s) { - int ret = -EINPROGRESS; - - qed_read_table(s, s->header.l1_table_offset, - s->l1_table, qed_sync_cb, &ret); - BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); - - return ret; + return qed_read_table(s, s->header.l1_table_offset, s->l1_table); } -void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n, - BlockCompletionFunc *cb, void *opaque) +int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n) { BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE); - qed_write_table(s, s->header.l1_table_offset, - s->l1_table, index, n, false, cb, opaque); + return qed_write_table(s, s->header.l1_table_offset, + s->l1_table, index, n, false); } int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, unsigned int n) { - int ret = -EINPROGRESS; + return qed_write_l1_table(s, index, n); +} - qed_write_l1_table(s, index, n, qed_sync_cb, &ret); - BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); +int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset) +{ + int ret; - return ret; -} + qed_unref_l2_cache_entry(request->l2_table); -typedef struct { - GenericCB gencb; - BDRVQEDState *s; - uint64_t l2_offset; - QEDRequest *request; -} QEDReadL2TableCB; + /* Check for cached L2 entry */ + request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); + if (request->l2_table) { + return 0; + } -static void qed_read_l2_table_cb(void *opaque, int ret) -{ - QEDReadL2TableCB *read_l2_table_cb = opaque; - QEDRequest *request = read_l2_table_cb->request; - BDRVQEDState *s = read_l2_table_cb->s; - CachedL2Table *l2_table = request->l2_table; - uint64_t l2_offset = read_l2_table_cb->l2_offset; + request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); + request->l2_table->table = qed_alloc_table(s); + + BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD); + ret = qed_read_table(s, offset, request->l2_table->table); qed_acquire(s); if (ret) { /* can't trust loaded L2 table anymore */ - qed_unref_l2_cache_entry(l2_table); + qed_unref_l2_cache_entry(request->l2_table); request->l2_table = NULL; } else { - l2_table->offset = l2_offset; + request->l2_table->offset = offset; - qed_commit_l2_cache_entry(&s->l2_cache, l2_table); + qed_commit_l2_cache_entry(&s->l2_cache, request->l2_table); /* This is guaranteed to succeed because we just committed the entry * to the cache. */ - request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset); + request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); assert(request->l2_table != NULL); } qed_release(s); - gencb_complete(&read_l2_table_cb->gencb, ret); -} - -void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset, - BlockCompletionFunc *cb, void *opaque) -{ - QEDReadL2TableCB *read_l2_table_cb; - - qed_unref_l2_cache_entry(request->l2_table); - - /* Check for cached L2 entry */ - request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); - if (request->l2_table) { - cb(opaque, 0); - return; - } - - request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); - request->l2_table->table = qed_alloc_table(s); - - read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque); - read_l2_table_cb->s = s; - read_l2_table_cb->l2_offset = offset; - read_l2_table_cb->request = request; - - BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD); - qed_read_table(s, offset, request->l2_table->table, - qed_read_l2_table_cb, read_l2_table_cb); + return ret; } int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset) { - int ret = -EINPROGRESS; - - qed_read_l2_table(s, request, offset, qed_sync_cb, &ret); - BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); - - return ret; + return qed_read_l2_table(s, request, offset); } -void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, - unsigned int index, unsigned int n, bool flush, - BlockCompletionFunc *cb, void *opaque) +int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, + unsigned int index, unsigned int n, bool flush) { BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE); - qed_write_table(s, request->l2_table->offset, - request->l2_table->table, index, n, flush, cb, opaque); + return qed_write_table(s, request->l2_table->offset, + request->l2_table->table, index, n, flush); } int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, unsigned int index, unsigned int n, bool flush) { - int ret = -EINPROGRESS; - - qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret); - BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); - - return ret; + return qed_write_l2_table(s, request, index, n, flush); } diff --git a/block/qed.c b/block/qed.c index 8d899fd479..385381a78a 100644 --- a/block/qed.c +++ b/block/qed.c @@ -21,10 +21,6 @@ #include "qapi/qmp/qerror.h" #include "sysemu/block-backend.h" -static const AIOCBInfo qed_aiocb_info = { - .aiocb_size = sizeof(QEDAIOCB), -}; - static int bdrv_qed_probe(const uint8_t *buf, int buf_size, const char *filename) { @@ -92,49 +88,13 @@ int qed_write_header_sync(BDRVQEDState *s) return 0; } -typedef struct { - GenericCB gencb; - BDRVQEDState *s; - struct iovec iov; - QEMUIOVector qiov; - int nsectors; - uint8_t *buf; -} QEDWriteHeaderCB; - -static void qed_write_header_cb(void *opaque, int ret) -{ - QEDWriteHeaderCB *write_header_cb = opaque; - - qemu_vfree(write_header_cb->buf); - gencb_complete(write_header_cb, ret); -} - -static void qed_write_header_read_cb(void *opaque, int ret) -{ - QEDWriteHeaderCB *write_header_cb = opaque; - BDRVQEDState *s = write_header_cb->s; - - if (ret) { - qed_write_header_cb(write_header_cb, ret); - return; - } - - /* Update header */ - qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf); - - bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov, - write_header_cb->nsectors, qed_write_header_cb, - write_header_cb); -} - /** * Update header in-place (does not rewrite backing filename or other strings) * * This function only updates known header fields in-place and does not affect * extra data after the QED header. */ -static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb, - void *opaque) +static int coroutine_fn qed_write_header(BDRVQEDState *s) { /* We must write full sectors for O_DIRECT but cannot necessarily generate * the data following the header if an unrecognized compat feature is @@ -144,18 +104,35 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb, int nsectors = DIV_ROUND_UP(sizeof(QEDHeader), BDRV_SECTOR_SIZE); size_t len = nsectors * BDRV_SECTOR_SIZE; - QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb), - cb, opaque); - - write_header_cb->s = s; - write_header_cb->nsectors = nsectors; - write_header_cb->buf = qemu_blockalign(s->bs, len); - write_header_cb->iov.iov_base = write_header_cb->buf; - write_header_cb->iov.iov_len = len; - qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1); - - bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors, - qed_write_header_read_cb, write_header_cb); + uint8_t *buf; + struct iovec iov; + QEMUIOVector qiov; + int ret; + + buf = qemu_blockalign(s->bs, len); + iov = (struct iovec) { + .iov_base = buf, + .iov_len = len, + }; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = bdrv_co_preadv(s->bs->file, 0, qiov.size, &qiov, 0); + if (ret < 0) { + goto out; + } + + /* Update header */ + qed_header_cpu_to_le(&s->header, (QEDHeader *) buf); + + ret = bdrv_co_pwritev(s->bs->file, 0, qiov.size, &qiov, 0); + if (ret < 0) { + goto out; + } + + ret = 0; +out: + qemu_vfree(buf); + return ret; } static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size) @@ -272,20 +249,6 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) return l2_table; } -static void qed_aio_next_io(QEDAIOCB *acb, int ret); - -static void qed_aio_start_io(QEDAIOCB *acb) -{ - qed_aio_next_io(acb, 0); -} - -static void qed_aio_next_io_cb(void *opaque, int ret) -{ - QEDAIOCB *acb = opaque; - - qed_aio_next_io(acb, ret); -} - static void qed_plug_allocating_write_reqs(BDRVQEDState *s) { assert(!s->allocating_write_reqs_plugged); @@ -295,61 +258,47 @@ static void qed_plug_allocating_write_reqs(BDRVQEDState *s) static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) { - QEDAIOCB *acb; - assert(s->allocating_write_reqs_plugged); s->allocating_write_reqs_plugged = false; - - acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); - if (acb) { - qed_aio_start_io(acb); - } + qemu_co_enter_next(&s->allocating_write_reqs); } -static void qed_finish_clear_need_check(void *opaque, int ret) -{ - /* Do nothing */ -} - -static void qed_flush_after_clear_need_check(void *opaque, int ret) +static void coroutine_fn qed_need_check_timer_entry(void *opaque) { BDRVQEDState *s = opaque; + int ret; - bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s); + /* The timer should only fire when allocating writes have drained */ + assert(!s->allocating_acb); - /* No need to wait until flush completes */ - qed_unplug_allocating_write_reqs(s); -} + trace_qed_need_check_timer_cb(s); -static void qed_clear_need_check(void *opaque, int ret) -{ - BDRVQEDState *s = opaque; + qed_acquire(s); + qed_plug_allocating_write_reqs(s); - if (ret) { + /* Ensure writes are on disk before clearing flag */ + ret = bdrv_co_flush(s->bs->file->bs); + qed_release(s); + if (ret < 0) { qed_unplug_allocating_write_reqs(s); return; } s->header.features &= ~QED_F_NEED_CHECK; - qed_write_header(s, qed_flush_after_clear_need_check, s); + ret = qed_write_header(s); + (void) ret; + + qed_unplug_allocating_write_reqs(s); + + ret = bdrv_co_flush(s->bs); + (void) ret; } static void qed_need_check_timer_cb(void *opaque) { - BDRVQEDState *s = opaque; - - /* The timer should only fire when allocating writes have drained */ - assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs)); - - trace_qed_need_check_timer_cb(s); - - qed_acquire(s); - qed_plug_allocating_write_reqs(s); - - /* Ensure writes are on disk before clearing flag */ - bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s); - qed_release(s); + Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque); + qemu_coroutine_enter(co); } void qed_acquire(BDRVQEDState *s) @@ -423,7 +372,7 @@ static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags, int ret; s->bs = bs; - QSIMPLEQ_INIT(&s->allocating_write_reqs); + qemu_co_queue_init(&s->allocating_write_reqs); ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header)); if (ret < 0) { @@ -776,14 +725,14 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, .file = file, }; QEDRequest request = { .l2_table = NULL }; + uint64_t offset; + int ret; - qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb); + ret = qed_find_cluster(s, &request, cb.pos, &len, &offset); + qed_is_allocated_cb(&cb, ret, offset, len); - /* Now sleep if the callback wasn't invoked immediately */ - while (cb.status == BDRV_BLOCK_OFFSET_MASK) { - cb.co = qemu_coroutine_self(); - qemu_coroutine_yield(); - } + /* The callback was invoked immediately */ + assert(cb.status != BDRV_BLOCK_OFFSET_MASK); qed_unref_l2_cache_entry(request.l2_table); @@ -792,7 +741,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, static BDRVQEDState *acb_to_s(QEDAIOCB *acb) { - return acb->common.bs->opaque; + return acb->bs->opaque; } /** @@ -808,13 +757,13 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb) * This function reads qiov->size bytes starting at pos from the backing file. * If there is no backing file then zeroes are read. */ -static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, - QEMUIOVector *qiov, - QEMUIOVector **backing_qiov, - BlockCompletionFunc *cb, void *opaque) +static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos, + QEMUIOVector *qiov, + QEMUIOVector **backing_qiov) { uint64_t backing_length = 0; size_t size; + int ret; /* If there is a backing file, get its length. Treat the absence of a * backing file like a zero length backing file. @@ -822,8 +771,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, if (s->bs->backing) { int64_t l = bdrv_getlength(s->bs->backing->bs); if (l < 0) { - cb(opaque, l); - return; + return l; } backing_length = l; } @@ -836,8 +784,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, /* Complete now if there are no backing file sectors to read */ if (pos >= backing_length) { - cb(opaque, 0); - return; + return 0; } /* If the read straddles the end of the backing file, shorten it */ @@ -849,46 +796,11 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, qemu_iovec_concat(*backing_qiov, qiov, 0, size); BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); - bdrv_aio_readv(s->bs->backing, pos / BDRV_SECTOR_SIZE, - *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque); -} - -typedef struct { - GenericCB gencb; - BDRVQEDState *s; - QEMUIOVector qiov; - QEMUIOVector *backing_qiov; - struct iovec iov; - uint64_t offset; -} CopyFromBackingFileCB; - -static void qed_copy_from_backing_file_cb(void *opaque, int ret) -{ - CopyFromBackingFileCB *copy_cb = opaque; - qemu_vfree(copy_cb->iov.iov_base); - gencb_complete(©_cb->gencb, ret); -} - -static void qed_copy_from_backing_file_write(void *opaque, int ret) -{ - CopyFromBackingFileCB *copy_cb = opaque; - BDRVQEDState *s = copy_cb->s; - - if (copy_cb->backing_qiov) { - qemu_iovec_destroy(copy_cb->backing_qiov); - g_free(copy_cb->backing_qiov); - copy_cb->backing_qiov = NULL; - } - - if (ret) { - qed_copy_from_backing_file_cb(copy_cb, ret); - return; + ret = bdrv_co_preadv(s->bs->backing, pos, size, *backing_qiov, 0); + if (ret < 0) { + return ret; } - - BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE); - bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE, - ©_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE, - qed_copy_from_backing_file_cb, copy_cb); + return 0; } /** @@ -898,32 +810,48 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret) * @pos: Byte position in device * @len: Number of bytes * @offset: Byte offset in image file - * @cb: Completion function - * @opaque: User data for completion function */ -static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, - uint64_t len, uint64_t offset, - BlockCompletionFunc *cb, - void *opaque) +static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, + uint64_t pos, uint64_t len, + uint64_t offset) { - CopyFromBackingFileCB *copy_cb; + QEMUIOVector qiov; + QEMUIOVector *backing_qiov = NULL; + struct iovec iov; + int ret; /* Skip copy entirely if there is no work to do */ if (len == 0) { - cb(opaque, 0); - return; + return 0; } - copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque); - copy_cb->s = s; - copy_cb->offset = offset; - copy_cb->backing_qiov = NULL; - copy_cb->iov.iov_base = qemu_blockalign(s->bs, len); - copy_cb->iov.iov_len = len; - qemu_iovec_init_external(©_cb->qiov, ©_cb->iov, 1); + iov = (struct iovec) { + .iov_base = qemu_blockalign(s->bs, len), + .iov_len = len, + }; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = qed_read_backing_file(s, pos, &qiov, &backing_qiov); - qed_read_backing_file(s, pos, ©_cb->qiov, ©_cb->backing_qiov, - qed_copy_from_backing_file_write, copy_cb); + if (backing_qiov) { + qemu_iovec_destroy(backing_qiov); + g_free(backing_qiov); + backing_qiov = NULL; + } + + if (ret) { + goto out; + } + + BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE); + ret = bdrv_co_pwritev(s->bs->file, offset, qiov.size, &qiov, 0); + if (ret < 0) { + goto out; + } + ret = 0; +out: + qemu_vfree(iov.iov_base); + return ret; } /** @@ -938,8 +866,9 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, * The cluster offset may be an allocated byte offset in the image file, the * zero cluster marker, or the unallocated cluster marker. */ -static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, - unsigned int n, uint64_t cluster) +static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table, + int index, unsigned int n, + uint64_t cluster) { int i; for (i = index; i < index + n; i++) { @@ -951,27 +880,9 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, } } -static void qed_aio_complete_bh(void *opaque) +static void coroutine_fn qed_aio_complete(QEDAIOCB *acb) { - QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); - BlockCompletionFunc *cb = acb->common.cb; - void *user_opaque = acb->common.opaque; - int ret = acb->bh_ret; - - qemu_aio_unref(acb); - - /* Invoke callback */ - qed_acquire(s); - cb(user_opaque, ret); - qed_release(s); -} - -static void qed_aio_complete(QEDAIOCB *acb, int ret) -{ - BDRVQEDState *s = acb_to_s(acb); - - trace_qed_aio_complete(s, acb, ret); /* Free resources */ qemu_iovec_destroy(&acb->cur_qiov); @@ -983,22 +894,16 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) acb->qiov->iov[0].iov_base = NULL; } - /* Arrange for a bh to invoke the completion function */ - acb->bh_ret = ret; - aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs), - qed_aio_complete_bh, acb); - /* Start next allocating write request waiting behind this one. Note that * requests enqueue themselves when they first hit an unallocated cluster * but they wait until the entire request is finished before waking up the * next request in the queue. This ensures that we don't cycle through * requests multiple times but rather finish one at a time completely. */ - if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { - QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next); - acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); - if (acb) { - qed_aio_start_io(acb); + if (acb == s->allocating_acb) { + s->allocating_acb = NULL; + if (!qemu_co_queue_empty(&s->allocating_write_reqs)) { + qemu_co_enter_next(&s->allocating_write_reqs); } else if (s->header.features & QED_F_NEED_CHECK) { qed_start_need_check_timer(s); } @@ -1006,15 +911,21 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) } /** - * Commit the current L2 table to the cache + * Update L1 table with new L2 table offset and write it out */ -static void qed_commit_l2_update(void *opaque, int ret) +static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb) { - QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); CachedL2Table *l2_table = acb->request.l2_table; uint64_t l2_offset = l2_table->offset; + int index, ret; + + index = qed_l1_index(s, acb->cur_pos); + s->l1_table->offsets[index] = l2_table->offset; + ret = qed_write_l1_table(s, index, 1); + + /* Commit the current L2 table to the cache */ qed_commit_l2_cache_entry(&s->l2_cache, l2_table); /* This is guaranteed to succeed because we just committed the entry to the @@ -1023,41 +934,18 @@ static void qed_commit_l2_update(void *opaque, int ret) acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset); assert(acb->request.l2_table != NULL); - qed_aio_next_io(acb, ret); + return ret; } -/** - * Update L1 table with new L2 table offset and write it out - */ -static void qed_aio_write_l1_update(void *opaque, int ret) -{ - QEDAIOCB *acb = opaque; - BDRVQEDState *s = acb_to_s(acb); - int index; - - if (ret) { - qed_aio_complete(acb, ret); - return; - } - - index = qed_l1_index(s, acb->cur_pos); - s->l1_table->offsets[index] = acb->request.l2_table->offset; - - qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb); -} /** * Update L2 table with new cluster offsets and write them out */ -static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset) +static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset) { BDRVQEDState *s = acb_to_s(acb); bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1; - int index; - - if (ret) { - goto err; - } + int index, ret; if (need_alloc) { qed_unref_l2_cache_entry(acb->request.l2_table); @@ -1070,115 +958,99 @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset) if (need_alloc) { /* Write out the whole new L2 table */ - qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true, - qed_aio_write_l1_update, acb); + ret = qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true); + if (ret) { + return ret; + } + return qed_aio_write_l1_update(acb); } else { /* Write out only the updated part of the L2 table */ - qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false, - qed_aio_next_io_cb, acb); - } - return; - -err: - qed_aio_complete(acb, ret); -} - -static void qed_aio_write_l2_update_cb(void *opaque, int ret) -{ - QEDAIOCB *acb = opaque; - qed_aio_write_l2_update(acb, ret, acb->cur_cluster); -} - -/** - * Flush new data clusters before updating the L2 table - * - * This flush is necessary when a backing file is in use. A crash during an - * allocating write could result in empty clusters in the image. If the write - * only touched a subregion of the cluster, then backing image sectors have - * been lost in the untouched region. The solution is to flush after writing a - * new data cluster and before updating the L2 table. - */ -static void qed_aio_write_flush_before_l2_update(void *opaque, int ret) -{ - QEDAIOCB *acb = opaque; - BDRVQEDState *s = acb_to_s(acb); - - if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) { - qed_aio_complete(acb, -EIO); + ret = qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, + false); + if (ret) { + return ret; + } } + return 0; } /** * Write data to the image file */ -static void qed_aio_write_main(void *opaque, int ret) +static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb) { - QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); uint64_t offset = acb->cur_cluster + qed_offset_into_cluster(s, acb->cur_pos); - BlockCompletionFunc *next_fn; + int ret; - trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size); + trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size); - if (ret) { - qed_aio_complete(acb, ret); - return; + BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); + ret = bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size, + &acb->cur_qiov, 0); + if (ret < 0) { + return ret; } - if (acb->find_cluster_ret == QED_CLUSTER_FOUND) { - next_fn = qed_aio_next_io_cb; - } else { + if (acb->find_cluster_ret != QED_CLUSTER_FOUND) { if (s->bs->backing) { - next_fn = qed_aio_write_flush_before_l2_update; - } else { - next_fn = qed_aio_write_l2_update_cb; + /* + * Flush new data clusters before updating the L2 table + * + * This flush is necessary when a backing file is in use. A crash + * during an allocating write could result in empty clusters in the + * image. If the write only touched a subregion of the cluster, + * then backing image sectors have been lost in the untouched + * region. The solution is to flush after writing a new data + * cluster and before updating the L2 table. + */ + ret = bdrv_co_flush(s->bs->file->bs); + if (ret < 0) { + return ret; + } + } + ret = qed_aio_write_l2_update(acb, acb->cur_cluster); + if (ret < 0) { + return ret; } } - - BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); - bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, - &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE, - next_fn, acb); + return 0; } /** - * Populate back untouched region of new data cluster + * Populate untouched regions of new data cluster */ -static void qed_aio_write_postfill(void *opaque, int ret) +static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb) { - QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); - uint64_t start = acb->cur_pos + acb->cur_qiov.size; - uint64_t len = - qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start; - uint64_t offset = acb->cur_cluster + - qed_offset_into_cluster(s, acb->cur_pos) + - acb->cur_qiov.size; + uint64_t start, len, offset; + int ret; - if (ret) { - qed_aio_complete(acb, ret); - return; + /* Populate front untouched region of new data cluster */ + start = qed_start_of_cluster(s, acb->cur_pos); + len = qed_offset_into_cluster(s, acb->cur_pos); + + trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); + ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster); + if (ret < 0) { + return ret; } - trace_qed_aio_write_postfill(s, acb, start, len, offset); - qed_copy_from_backing_file(s, start, len, offset, - qed_aio_write_main, acb); -} + /* Populate back untouched region of new data cluster */ + start = acb->cur_pos + acb->cur_qiov.size; + len = qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start; + offset = acb->cur_cluster + + qed_offset_into_cluster(s, acb->cur_pos) + + acb->cur_qiov.size; -/** - * Populate front untouched region of new data cluster - */ -static void qed_aio_write_prefill(void *opaque, int ret) -{ - QEDAIOCB *acb = opaque; - BDRVQEDState *s = acb_to_s(acb); - uint64_t start = qed_start_of_cluster(s, acb->cur_pos); - uint64_t len = qed_offset_into_cluster(s, acb->cur_pos); + trace_qed_aio_write_postfill(s, acb, start, len, offset); + ret = qed_copy_from_backing_file(s, start, len, offset); + if (ret < 0) { + return ret; + } - trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); - qed_copy_from_backing_file(s, start, len, acb->cur_cluster, - qed_aio_write_postfill, acb); + return qed_aio_write_main(acb); } /** @@ -1194,18 +1066,6 @@ static bool qed_should_set_need_check(BDRVQEDState *s) return !(s->header.features & QED_F_NEED_CHECK); } -static void qed_aio_write_zero_cluster(void *opaque, int ret) -{ - QEDAIOCB *acb = opaque; - - if (ret) { - qed_aio_complete(acb, ret); - return; - } - - qed_aio_write_l2_update(acb, 0, 1); -} - /** * Write new data cluster * @@ -1214,23 +1074,24 @@ static void qed_aio_write_zero_cluster(void *opaque, int ret) * * This path is taken when writing to previously unallocated clusters. */ -static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) +static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len) { BDRVQEDState *s = acb_to_s(acb); - BlockCompletionFunc *cb; + int ret; /* Cancel timer when the first allocating request comes in */ - if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) { + if (s->allocating_acb == NULL) { qed_cancel_need_check_timer(s); } /* Freeze this request if another allocating write is in progress */ - if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { - QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next); - } - if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) || - s->allocating_write_reqs_plugged) { - return; /* wait for existing request to finish */ + if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) { + if (s->allocating_acb != NULL) { + qemu_co_queue_wait(&s->allocating_write_reqs, NULL); + assert(s->allocating_acb == NULL); + } + s->allocating_acb = acb; + return -EAGAIN; /* start over with looking up table entries */ } acb->cur_nclusters = qed_bytes_to_clusters(s, @@ -1240,22 +1101,29 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) if (acb->flags & QED_AIOCB_ZERO) { /* Skip ahead if the clusters are already zero */ if (acb->find_cluster_ret == QED_CLUSTER_ZERO) { - qed_aio_start_io(acb); - return; + return 0; } - - cb = qed_aio_write_zero_cluster; } else { - cb = qed_aio_write_prefill; acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); } if (qed_should_set_need_check(s)) { s->header.features |= QED_F_NEED_CHECK; - qed_write_header(s, cb, acb); + ret = qed_write_header(s); + if (ret < 0) { + return ret; + } + } + + if (acb->flags & QED_AIOCB_ZERO) { + ret = qed_aio_write_l2_update(acb, 1); } else { - cb(acb, 0); + ret = qed_aio_write_cow(acb); + } + if (ret < 0) { + return ret; } + return 0; } /** @@ -1267,17 +1135,17 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) * * This path is taken when writing to already allocated clusters. */ -static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) +static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, + size_t len) { /* Allocate buffer for zero writes */ if (acb->flags & QED_AIOCB_ZERO) { struct iovec *iov = acb->qiov->iov; if (!iov->iov_base) { - iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len); + iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len); if (iov->iov_base == NULL) { - qed_aio_complete(acb, -ENOMEM); - return; + return -ENOMEM; } memset(iov->iov_base, 0, iov->iov_len); } @@ -1288,22 +1156,19 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); /* Do the actual write */ - qed_aio_write_main(acb, 0); + return qed_aio_write_main(acb); } /** * Write data cluster * * @opaque: Write request - * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1, - * or -errno + * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1 * @offset: Cluster offset in bytes * @len: Length in bytes - * - * Callback from qed_find_cluster(). */ -static void qed_aio_write_data(void *opaque, int ret, - uint64_t offset, size_t len) +static int coroutine_fn qed_aio_write_data(void *opaque, int ret, + uint64_t offset, size_t len) { QEDAIOCB *acb = opaque; @@ -1313,18 +1178,15 @@ static void qed_aio_write_data(void *opaque, int ret, switch (ret) { case QED_CLUSTER_FOUND: - qed_aio_write_inplace(acb, offset, len); - break; + return qed_aio_write_inplace(acb, offset, len); case QED_CLUSTER_L2: case QED_CLUSTER_L1: case QED_CLUSTER_ZERO: - qed_aio_write_alloc(acb, len); - break; + return qed_aio_write_alloc(acb, len); default: - qed_aio_complete(acb, ret); - break; + g_assert_not_reached(); } } @@ -1332,166 +1194,139 @@ static void qed_aio_write_data(void *opaque, int ret, * Read data cluster * * @opaque: Read request - * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1, - * or -errno + * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1 * @offset: Cluster offset in bytes * @len: Length in bytes - * - * Callback from qed_find_cluster(). */ -static void qed_aio_read_data(void *opaque, int ret, - uint64_t offset, size_t len) +static int coroutine_fn qed_aio_read_data(void *opaque, int ret, + uint64_t offset, size_t len) { QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); - BlockDriverState *bs = acb->common.bs; + BlockDriverState *bs = acb->bs; /* Adjust offset into cluster */ offset += qed_offset_into_cluster(s, acb->cur_pos); trace_qed_aio_read_data(s, acb, ret, offset, len); - if (ret < 0) { - goto err; - } - qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); /* Handle zero cluster and backing file reads */ if (ret == QED_CLUSTER_ZERO) { qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size); - qed_aio_start_io(acb); - return; + return 0; } else if (ret != QED_CLUSTER_FOUND) { - qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, - &acb->backing_qiov, qed_aio_next_io_cb, acb); - return; + return qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, + &acb->backing_qiov); } BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE, - &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE, - qed_aio_next_io_cb, acb); - return; - -err: - qed_aio_complete(acb, ret); + ret = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size, + &acb->cur_qiov, 0); + if (ret < 0) { + return ret; + } + return 0; } /** * Begin next I/O or complete the request */ -static void qed_aio_next_io(QEDAIOCB *acb, int ret) +static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb) { BDRVQEDState *s = acb_to_s(acb); - QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ? - qed_aio_write_data : qed_aio_read_data; + uint64_t offset; + size_t len; + int ret; - trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); + while (1) { + trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size); - if (acb->backing_qiov) { - qemu_iovec_destroy(acb->backing_qiov); - g_free(acb->backing_qiov); - acb->backing_qiov = NULL; - } + if (acb->backing_qiov) { + qemu_iovec_destroy(acb->backing_qiov); + g_free(acb->backing_qiov); + acb->backing_qiov = NULL; + } - /* Handle I/O error */ - if (ret) { - qed_aio_complete(acb, ret); - return; - } + acb->qiov_offset += acb->cur_qiov.size; + acb->cur_pos += acb->cur_qiov.size; + qemu_iovec_reset(&acb->cur_qiov); - acb->qiov_offset += acb->cur_qiov.size; - acb->cur_pos += acb->cur_qiov.size; - qemu_iovec_reset(&acb->cur_qiov); + /* Complete request */ + if (acb->cur_pos >= acb->end_pos) { + ret = 0; + break; + } - /* Complete request */ - if (acb->cur_pos >= acb->end_pos) { - qed_aio_complete(acb, 0); - return; + /* Find next cluster and start I/O */ + len = acb->end_pos - acb->cur_pos; + ret = qed_find_cluster(s, &acb->request, acb->cur_pos, &len, &offset); + if (ret < 0) { + break; + } + + if (acb->flags & QED_AIOCB_WRITE) { + ret = qed_aio_write_data(acb, ret, offset, len); + } else { + ret = qed_aio_read_data(acb, ret, offset, len); + } + + if (ret < 0 && ret != -EAGAIN) { + break; + } } - /* Find next cluster and start I/O */ - qed_find_cluster(s, &acb->request, - acb->cur_pos, acb->end_pos - acb->cur_pos, - io_fn, acb); + trace_qed_aio_complete(s, acb, ret); + qed_aio_complete(acb); + return ret; } -static BlockAIOCB *qed_aio_setup(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, - void *opaque, int flags) -{ - QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque); - - trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, - opaque, flags); +static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *qiov, int nb_sectors, + int flags) +{ + QEDAIOCB acb = { + .bs = bs, + .cur_pos = (uint64_t) sector_num * BDRV_SECTOR_SIZE, + .end_pos = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE, + .qiov = qiov, + .flags = flags, + }; + qemu_iovec_init(&acb.cur_qiov, qiov->niov); - acb->flags = flags; - acb->qiov = qiov; - acb->qiov_offset = 0; - acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; - acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE; - acb->backing_qiov = NULL; - acb->request.l2_table = NULL; - qemu_iovec_init(&acb->cur_qiov, qiov->niov); + trace_qed_aio_setup(bs->opaque, &acb, sector_num, nb_sectors, NULL, flags); /* Start request */ - qed_aio_start_io(acb); - return &acb->common; + return qed_aio_next_io(&acb); } -static BlockAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, - void *opaque) +static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) { - return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); + return qed_co_request(bs, sector_num, qiov, nb_sectors, 0); } -static BlockAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, - void *opaque) -{ - return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, - opaque, QED_AIOCB_WRITE); -} - -typedef struct { - Coroutine *co; - int ret; - bool done; -} QEDWriteZeroesCB; - -static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret) +static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) { - QEDWriteZeroesCB *cb = opaque; - - cb->done = true; - cb->ret = ret; - if (cb->co) { - aio_co_wake(cb->co); - } + return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE); } static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int count, + int bytes, BdrvRequestFlags flags) { - BlockAIOCB *blockacb; BDRVQEDState *s = bs->opaque; - QEDWriteZeroesCB cb = { .done = false }; QEMUIOVector qiov; struct iovec iov; /* Fall back if the request is not aligned */ if (qed_offset_into_cluster(s, offset) || - qed_offset_into_cluster(s, count)) { + qed_offset_into_cluster(s, bytes)) { return -ENOTSUP; } @@ -1499,22 +1334,12 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, * then it will be allocated during request processing. */ iov.iov_base = NULL; - iov.iov_len = count; + iov.iov_len = bytes; qemu_iovec_init_external(&qiov, &iov, 1); - blockacb = qed_aio_setup(bs, offset >> BDRV_SECTOR_BITS, &qiov, - count >> BDRV_SECTOR_BITS, - qed_co_pwrite_zeroes_cb, &cb, - QED_AIOCB_WRITE | QED_AIOCB_ZERO); - if (!blockacb) { - return -EIO; - } - if (!cb.done) { - cb.co = qemu_coroutine_self(); - qemu_coroutine_yield(); - } - assert(cb.done); - return cb.ret; + return qed_co_request(bs, offset >> BDRV_SECTOR_BITS, &qiov, + bytes >> BDRV_SECTOR_BITS, + QED_AIOCB_WRITE | QED_AIOCB_ZERO); } static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, Error **errp) @@ -1710,8 +1535,8 @@ static BlockDriver bdrv_qed = { .bdrv_create = bdrv_qed_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = bdrv_qed_co_get_block_status, - .bdrv_aio_readv = bdrv_qed_aio_readv, - .bdrv_aio_writev = bdrv_qed_aio_writev, + .bdrv_co_readv = bdrv_qed_co_readv, + .bdrv_co_writev = bdrv_qed_co_writev, .bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes, .bdrv_truncate = bdrv_qed_truncate, .bdrv_getlength = bdrv_qed_getlength, diff --git a/block/qed.h b/block/qed.h index ce8c314089..dd3a2d5519 100644 --- a/block/qed.h +++ b/block/qed.h @@ -129,8 +129,7 @@ enum { }; typedef struct QEDAIOCB { - BlockAIOCB common; - int bh_ret; /* final return status for completion bh */ + BlockDriverState *bs; QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */ int flags; /* QED_AIOCB_* bits ORed together */ uint64_t end_pos; /* request end on block device, in bytes */ @@ -163,7 +162,8 @@ typedef struct { uint32_t l2_mask; /* Allocating write request queue */ - QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs; + QEDAIOCB *allocating_acb; + CoQueue allocating_write_reqs; bool allocating_write_reqs_plugged; /* Periodic flush and clear need check flag */ @@ -177,42 +177,10 @@ enum { QED_CLUSTER_L1, /* cluster missing in L1 */ }; -/** - * qed_find_cluster() completion callback - * - * @opaque: User data for completion callback - * @ret: QED_CLUSTER_FOUND Success - * QED_CLUSTER_L2 Data cluster unallocated in L2 - * QED_CLUSTER_L1 L2 unallocated in L1 - * -errno POSIX error occurred - * @offset: Data cluster offset - * @len: Contiguous bytes starting from cluster offset - * - * This function is invoked when qed_find_cluster() completes. - * - * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range - * in the image file. - * - * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1 - * table offset, respectively. len is number of contiguous unallocated bytes. - */ -typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len); - void qed_acquire(BDRVQEDState *s); void qed_release(BDRVQEDState *s); /** - * Generic callback for chaining async callbacks - */ -typedef struct { - BlockCompletionFunc *cb; - void *opaque; -} GenericCB; - -void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque); -void gencb_complete(void *opaque, int ret); - -/** * Header functions */ int qed_write_header_sync(BDRVQEDState *s); @@ -231,25 +199,23 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table); * Table I/O functions */ int qed_read_l1_table_sync(BDRVQEDState *s); -void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n, - BlockCompletionFunc *cb, void *opaque); +int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n); int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, unsigned int n); int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset); -void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset, - BlockCompletionFunc *cb, void *opaque); -void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, - unsigned int index, unsigned int n, bool flush, - BlockCompletionFunc *cb, void *opaque); +int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset); +int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, + unsigned int index, unsigned int n, bool flush); int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, unsigned int index, unsigned int n, bool flush); /** * Cluster functions */ -void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, - size_t len, QEDFindClusterFunc *cb, void *opaque); +int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request, + uint64_t pos, size_t *len, + uint64_t *img_offset); /** * Consistency check diff --git a/block/raw-format.c b/block/raw-format.c index 36e65036f0..0d185fe41b 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -264,7 +264,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, } static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int count, + int64_t offset, int bytes, BdrvRequestFlags flags) { BDRVRawState *s = bs->opaque; @@ -272,18 +272,18 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, return -EINVAL; } offset += s->offset; - return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags); + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); } static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, - int64_t offset, int count) + int64_t offset, int bytes) { BDRVRawState *s = bs->opaque; if (offset > UINT64_MAX - s->offset) { return -EINVAL; } offset += s->offset; - return bdrv_co_pdiscard(bs->file->bs, offset, count); + return bdrv_co_pdiscard(bs->file->bs, offset, bytes); } static int64_t raw_getlength(BlockDriverState *bs) diff --git a/block/rbd.c b/block/rbd.c index ff44e5f437..9da02cdceb 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1065,11 +1065,11 @@ static int qemu_rbd_snap_list(BlockDriverState *bs, #ifdef LIBRBD_SUPPORTS_DISCARD static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs, int64_t offset, - int count, + int bytes, BlockCompletionFunc *cb, void *opaque) { - return rbd_start_aio(bs, offset, NULL, count, cb, opaque, + return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque, RBD_AIO_DISCARD); } #endif diff --git a/block/sheepdog.c b/block/sheepdog.c index c9236679c6..08d7b11e9d 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1046,11 +1046,11 @@ static void sd_parse_uri(SheepdogConfig *cfg, const char *filename, } /* transport */ - if (!strcmp(uri->scheme, "sheepdog")) { + if (!g_strcmp0(uri->scheme, "sheepdog")) { is_unix = false; - } else if (!strcmp(uri->scheme, "sheepdog+tcp")) { + } else if (!g_strcmp0(uri->scheme, "sheepdog+tcp")) { is_unix = false; - } else if (!strcmp(uri->scheme, "sheepdog+unix")) { + } else if (!g_strcmp0(uri->scheme, "sheepdog+unix")) { is_unix = true; } else { error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp'," @@ -2935,7 +2935,7 @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, - int count) + int bytes) { SheepdogAIOCB acb; BDRVSheepdogState *s = bs->opaque; @@ -2953,11 +2953,11 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, iov.iov_len = sizeof(zero); discard_iov.iov = &iov; discard_iov.niov = 1; - if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) { + if (!QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)) { return -ENOTSUP; } sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS, - count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ); + bytes >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ); sd_co_rw_vector(&acb); sd_aio_complete(&acb); diff --git a/block/ssh.c b/block/ssh.c index bac3453c3e..52964416da 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -204,7 +204,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp) return -EINVAL; } - if (strcmp(uri->scheme, "ssh") != 0) { + if (g_strcmp0(uri->scheme, "ssh") != 0) { error_setg(errp, "URI scheme must be 'ssh'"); goto err; } diff --git a/block/throttle-groups.c b/block/throttle-groups.c index a181cb1dee..da2b490c38 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -49,7 +49,7 @@ * Again, all this is handled internally and is mostly transparent to * the outside. The 'throttle_timers' field however has an additional * constraint because it may be temporarily invalid (see for example - * bdrv_set_aio_context()). Therefore in this file a thread will + * blk_set_aio_context()). Therefore in this file a thread will * access some other BlockBackend's timers only after verifying that * that BlockBackend has throttled requests in the queue. */ diff --git a/block/trace-events b/block/trace-events index 9a71c7fb04..752de6a054 100644 --- a/block/trace-events +++ b/block/trace-events @@ -9,9 +9,6 @@ blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x" # block/io.c -bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p" -bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" -bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x" diff --git a/blockjob.c b/blockjob.c index a0d7e29b83..70a78188b7 100644 --- a/blockjob.c +++ b/blockjob.c @@ -139,7 +139,7 @@ static void block_job_resume(BlockJob *job) block_job_enter(job); } -static void block_job_ref(BlockJob *job) +void block_job_ref(BlockJob *job) { ++job->refcnt; } @@ -148,7 +148,7 @@ static void block_job_attached_aio_context(AioContext *new_context, void *opaque); static void block_job_detach_aio_context(void *opaque); -static void block_job_unref(BlockJob *job) +void block_job_unref(BlockJob *job) { if (--job->refcnt == 0) { BlockDriverState *bs = blk_bs(job->blk); diff --git a/chardev/char.c b/chardev/char.c index bcfc065d16..2b679a2295 100644 --- a/chardev/char.c +++ b/chardev/char.c @@ -951,6 +951,18 @@ void qmp_chardev_remove(const char *id, Error **errp) object_unparent(OBJECT(chr)); } +void qmp_chardev_send_break(const char *id, Error **errp) +{ + Chardev *chr; + + chr = qemu_chr_find(id); + if (chr == NULL) { + error_setg(errp, "Chardev '%s' not found", id); + return; + } + qemu_chr_be_event(chr, CHR_EVENT_BREAK); +} + void qemu_chr_cleanup(void) { object_unparent(get_chardevs_root()); @@ -3029,6 +3029,8 @@ int main(void) { EOF IFS=: for curses_inc in $curses_inc_list; do + # Make sure we get the wide character prototypes + curses_inc="-DNCURSES_WIDECHAR $curses_inc" IFS=: for curses_lib in $curses_lib_list; do unset IFS diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h index 0844a403dc..474c79d003 100644 --- a/fsdev/file-op-9p.h +++ b/fsdev/file-op-9p.h @@ -76,6 +76,8 @@ typedef struct FsDriverEntry { int export_flags; FileOperations *ops; FsThrottle fst; + mode_t fmode; + mode_t dmode; } FsDriverEntry; typedef struct FsContext @@ -88,6 +90,8 @@ typedef struct FsContext FsThrottle *fst; /* fs driver specific data */ void *private; + mode_t fmode; + mode_t dmode; } FsContext; typedef struct V9fsPath { diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c index bf5713008a..7c31ffffaf 100644 --- a/fsdev/qemu-fsdev-opts.c +++ b/fsdev/qemu-fsdev-opts.c @@ -38,6 +38,12 @@ static QemuOptsList qemu_fsdev_opts = { }, { .name = "sock_fd", .type = QEMU_OPT_NUMBER, + }, { + .name = "fmode", + .type = QEMU_OPT_NUMBER, + }, { + .name = "dmode", + .type = QEMU_OPT_NUMBER, }, THROTTLE_OPTS, @@ -75,6 +81,12 @@ static QemuOptsList qemu_virtfs_opts = { }, { .name = "sock_fd", .type = QEMU_OPT_NUMBER, + }, { + .name = "fmode", + .type = QEMU_OPT_NUMBER, + }, { + .name = "dmode", + .type = QEMU_OPT_NUMBER, }, { /*End of list */ } diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index ae169011b1..ba98e581ab 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -100,9 +100,9 @@ ETEXI { .name = "registers", - .args_type = "", - .params = "", - .help = "show the cpu registers", + .args_type = "cpustate_all:-a", + .params = "[-a]", + .help = "show the cpu registers (-a: all - show register info for all cpus)", .cmd = hmp_info_registers, }, diff --git a/hmp-commands.hx b/hmp-commands.hx index e763606fe5..275ccdfbc7 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1745,6 +1745,22 @@ Removes the chardev @var{id}. ETEXI { + .name = "chardev-send-break", + .args_type = "id:s", + .params = "id", + .help = "send a break on chardev", + .cmd = hmp_chardev_send_break, + .command_completion = chardev_remove_completion, + }, + +STEXI +@item chardev-send-break id +@findex chardev-send-break +Send a break on the chardev @var{id}. + +ETEXI + + { .name = "qemu-io", .args_type = "device:B,command:s", .params = "[device] \"[command]\"", @@ -43,6 +43,7 @@ #include "exec/ramlist.h" #include "hw/intc/intc.h" #include "migration/snapshot.h" +#include "migration/misc.h" #ifdef CONFIG_SPICE #include <spice/enums.h> @@ -164,6 +165,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info = qmp_query_migrate(NULL); caps = qmp_query_migrate_capabilities(NULL); + migration_global_dump(mon); + /* do not display parameters during setup */ if (info->has_status && caps) { monitor_printf(mon, "capabilities: "); @@ -2233,6 +2236,14 @@ void hmp_chardev_remove(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &local_err); } +void hmp_chardev_send_break(Monitor *mon, const QDict *qdict) +{ + Error *local_err = NULL; + + qmp_chardev_send_break(qdict_get_str(qdict, "id"), &local_err); + hmp_handle_error(mon, &local_err); +} + void hmp_qemu_io(Monitor *mon, const QDict *qdict) { BlockBackend *blk; @@ -103,6 +103,7 @@ void hmp_nbd_server_add(Monitor *mon, const QDict *qdict); void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict); void hmp_chardev_add(Monitor *mon, const QDict *qdict); void hmp_chardev_remove(Monitor *mon, const QDict *qdict); +void hmp_chardev_send_break(Monitor *mon, const QDict *qdict); void hmp_qemu_io(Monitor *mon, const QDict *qdict); void hmp_cpu_add(Monitor *mon, const QDict *qdict); void hmp_object_add(Monitor *mon, const QDict *qdict); diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 1e78b7c9e9..6e478f4765 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -633,7 +633,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, if (fs_ctx->export_flags & V9FS_SM_MAPPED || fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - err = mknodat(dirfd, name, SM_LOCAL_MODE_BITS | S_IFREG, 0); + err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0); if (err == -1) { goto out; } @@ -685,7 +685,7 @@ static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path, if (fs_ctx->export_flags & V9FS_SM_MAPPED || fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - err = mkdirat(dirfd, name, SM_LOCAL_DIR_MODE_BITS); + err = mkdirat(dirfd, name, fs_ctx->dmode); if (err == -1) { goto out; } @@ -786,7 +786,7 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, /* Determine the security model */ if (fs_ctx->export_flags & V9FS_SM_MAPPED || fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - fd = openat_file(dirfd, name, flags, SM_LOCAL_MODE_BITS); + fd = openat_file(dirfd, name, flags, fs_ctx->fmode); if (fd == -1) { goto out; } @@ -849,7 +849,7 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath, ssize_t oldpath_size, write_size; fd = openat_file(dirfd, name, O_CREAT | O_EXCL | O_RDWR, - SM_LOCAL_MODE_BITS); + fs_ctx->fmode); if (fd == -1) { goto out; } @@ -1100,7 +1100,7 @@ static int local_remove(FsContext *ctx, const char *path) goto out; } - if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) { + if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) { goto err_out; } @@ -1467,6 +1467,23 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) return -1; } + if (fse->export_flags & V9FS_SM_MAPPED || + fse->export_flags & V9FS_SM_MAPPED_FILE) { + fse->fmode = + qemu_opt_get_number(opts, "fmode", SM_LOCAL_MODE_BITS) & 0777; + fse->dmode = + qemu_opt_get_number(opts, "dmode", SM_LOCAL_DIR_MODE_BITS) & 0777; + } else { + if (qemu_opt_find(opts, "fmode")) { + error_report("fmode is only valid for mapped 9p modes"); + return -1; + } + if (qemu_opt_find(opts, "dmode")) { + error_report("dmode is only valid for mapped 9p modes"); + return -1; + } + } + fse->path = g_strdup(path); return 0; diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c index 4b6d4e6a3f..df0a8de08a 100644 --- a/hw/9pfs/9p-synth.c +++ b/hw/9pfs/9p-synth.c @@ -494,8 +494,7 @@ static int synth_name_to_path(FsContext *ctx, V9fsPath *dir_path, } out: /* Copy the node pointer to fid */ - target->data = g_malloc(sizeof(void *)); - memcpy(target->data, &node, sizeof(void *)); + target->data = g_memdup(&node, sizeof(void *)); target->size = sizeof(void *); return 0; } diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 96d2683348..6c92bad5b3 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -624,15 +624,11 @@ void pdu_free(V9fsPDU *pdu) QLIST_INSERT_HEAD(&s->free_list, pdu, next); } -/* - * We don't do error checking for pdu_marshal/unmarshal here - * because we always expect to have enough space to encode - * error details - */ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) { int8_t id = pdu->id + 1; /* Response */ V9fsState *s = pdu->s; + int ret; if (len < 0) { int err = -len; @@ -644,11 +640,19 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) str.data = strerror(err); str.size = strlen(str.data); - len += pdu_marshal(pdu, len, "s", &str); + ret = pdu_marshal(pdu, len, "s", &str); + if (ret < 0) { + goto out_notify; + } + len += ret; id = P9_RERROR; } - len += pdu_marshal(pdu, len, "d", err); + ret = pdu_marshal(pdu, len, "d", err); + if (ret < 0) { + goto out_notify; + } + len += ret; if (s->proto_version == V9FS_PROTO_2000L) { id = P9_RLERROR; @@ -657,12 +661,15 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) } /* fill out the header */ - pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag); + if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) { + goto out_notify; + } /* keep these in sync */ pdu->size = len; pdu->id = id; +out_notify: pdu->s->transport->push_and_notify(pdu); /* Now wakeup anybody waiting in flush for this request */ @@ -1664,7 +1671,7 @@ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, unsigned int niov; if (is_write) { - pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov); + pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip); } else { pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip); } @@ -3533,6 +3540,9 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp) s->ops = fse->ops; + s->ctx.fmode = fse->fmode; + s->ctx.dmode = fse->dmode; + s->fid_list = NULL; qemu_co_rwlock_init(&s->rename_lock); diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index c886ba78d2..d1cfeaf10e 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -124,6 +124,11 @@ typedef struct { uint8_t id; uint16_t tag_le; } QEMU_PACKED P9MsgHeader; +/* According to the specification, 9p messages start with a 7-byte header. + * Since most of the code uses this header size in literal form, we must be + * sure this is indeed the case. + */ +QEMU_BUILD_BUG_ON(sizeof(P9MsgHeader) != 7); struct V9fsPDU { @@ -358,7 +363,7 @@ struct V9fsTransport { void (*init_in_iov_from_pdu)(V9fsPDU *pdu, struct iovec **piov, unsigned int *pniov, size_t size); void (*init_out_iov_from_pdu)(V9fsPDU *pdu, struct iovec **piov, - unsigned int *pniov); + unsigned int *pniov, size_t size); void (*push_and_notify)(V9fsPDU *pdu); }; diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 245abd8aae..62650b0a6b 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -53,23 +53,22 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq) goto out_free_pdu; } - if (elem->in_num == 0) { + if (iov_size(elem->in_sg, elem->in_num) < 7) { virtio_error(vdev, "The guest sent a VirtFS request without space for " "the reply"); goto out_free_req; } - QEMU_BUILD_BUG_ON(sizeof(out) != 7); - v->elems[pdu->idx] = elem; - len = iov_to_buf(elem->out_sg, elem->out_num, 0, - &out, sizeof(out)); - if (len != sizeof(out)) { + len = iov_to_buf(elem->out_sg, elem->out_num, 0, &out, 7); + if (len != 7) { virtio_error(vdev, "The guest sent a malformed VirtFS request: " "header size is %zd, should be 7", len); goto out_free_req; } + v->elems[pdu->idx] = elem; + pdu_submit(pdu, &out); } @@ -147,8 +146,16 @@ static ssize_t virtio_pdu_vmarshal(V9fsPDU *pdu, size_t offset, V9fsState *s = pdu->s; V9fsVirtioState *v = container_of(s, V9fsVirtioState, state); VirtQueueElement *elem = v->elems[pdu->idx]; + ssize_t ret; + + ret = v9fs_iov_vmarshal(elem->in_sg, elem->in_num, offset, 1, fmt, ap); + if (ret < 0) { + VirtIODevice *vdev = VIRTIO_DEVICE(v); - return v9fs_iov_vmarshal(elem->in_sg, elem->in_num, offset, 1, fmt, ap); + virtio_error(vdev, "Failed to encode VirtFS reply type %d", + pdu->id + 1); + } + return ret; } static ssize_t virtio_pdu_vunmarshal(V9fsPDU *pdu, size_t offset, @@ -157,28 +164,52 @@ static ssize_t virtio_pdu_vunmarshal(V9fsPDU *pdu, size_t offset, V9fsState *s = pdu->s; V9fsVirtioState *v = container_of(s, V9fsVirtioState, state); VirtQueueElement *elem = v->elems[pdu->idx]; + ssize_t ret; + + ret = v9fs_iov_vunmarshal(elem->out_sg, elem->out_num, offset, 1, fmt, ap); + if (ret < 0) { + VirtIODevice *vdev = VIRTIO_DEVICE(v); - return v9fs_iov_vunmarshal(elem->out_sg, elem->out_num, offset, 1, fmt, ap); + virtio_error(vdev, "Failed to decode VirtFS request type %d", pdu->id); + } + return ret; } -/* The size parameter is used by other transports. Do not drop it. */ static void virtio_init_in_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, unsigned int *pniov, size_t size) { V9fsState *s = pdu->s; V9fsVirtioState *v = container_of(s, V9fsVirtioState, state); VirtQueueElement *elem = v->elems[pdu->idx]; + size_t buf_size = iov_size(elem->in_sg, elem->in_num); + + if (buf_size < size) { + VirtIODevice *vdev = VIRTIO_DEVICE(v); + + virtio_error(vdev, + "VirtFS reply type %d needs %zu bytes, buffer has %zu", + pdu->id + 1, size, buf_size); + } *piov = elem->in_sg; *pniov = elem->in_num; } static void virtio_init_out_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, - unsigned int *pniov) + unsigned int *pniov, size_t size) { V9fsState *s = pdu->s; V9fsVirtioState *v = container_of(s, V9fsVirtioState, state); VirtQueueElement *elem = v->elems[pdu->idx]; + size_t buf_size = iov_size(elem->out_sg, elem->out_num); + + if (buf_size < size) { + VirtIODevice *vdev = VIRTIO_DEVICE(v); + + virtio_error(vdev, + "VirtFS request type %d needs %zu bytes, buffer has %zu", + pdu->id, size, buf_size); + } *piov = elem->out_sg; *pniov = elem->out_num; diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 922cc967be..ee87f08926 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -54,6 +54,8 @@ typedef struct Xen9pfsDev { Xen9pfsRing *rings; } Xen9pfsDev; +static void xen_9pfs_disconnect(struct XenDevice *xendev); + static void xen_9pfs_in_sg(Xen9pfsRing *ring, struct iovec *in_sg, int *num, @@ -125,10 +127,19 @@ static ssize_t xen_9pfs_pdu_vmarshal(V9fsPDU *pdu, Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state); struct iovec in_sg[2]; int num; + ssize_t ret; xen_9pfs_in_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings], in_sg, &num, pdu->idx, ROUND_UP(offset + 128, 512)); - return v9fs_iov_vmarshal(in_sg, num, offset, 0, fmt, ap); + + ret = v9fs_iov_vmarshal(in_sg, num, offset, 0, fmt, ap); + if (ret < 0) { + xen_pv_printf(&xen_9pfs->xendev, 0, + "Failed to encode VirtFS request type %d\n", pdu->id + 1); + xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing); + xen_9pfs_disconnect(&xen_9pfs->xendev); + } + return ret; } static ssize_t xen_9pfs_pdu_vunmarshal(V9fsPDU *pdu, @@ -139,15 +150,25 @@ static ssize_t xen_9pfs_pdu_vunmarshal(V9fsPDU *pdu, Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state); struct iovec out_sg[2]; int num; + ssize_t ret; xen_9pfs_out_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings], out_sg, &num, pdu->idx); - return v9fs_iov_vunmarshal(out_sg, num, offset, 0, fmt, ap); + + ret = v9fs_iov_vunmarshal(out_sg, num, offset, 0, fmt, ap); + if (ret < 0) { + xen_pv_printf(&xen_9pfs->xendev, 0, + "Failed to decode VirtFS request type %d\n", pdu->id); + xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing); + xen_9pfs_disconnect(&xen_9pfs->xendev); + } + return ret; } static void xen_9pfs_init_out_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, - unsigned int *pniov) + unsigned int *pniov, + size_t size) { Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state); Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings]; @@ -169,11 +190,22 @@ static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu, Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state); Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings]; int num; + size_t buf_size; g_free(ring->sg); ring->sg = g_malloc0(sizeof(*ring->sg) * 2); xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, size); + + buf_size = iov_size(ring->sg, num); + if (buf_size < size) { + xen_pv_printf(&xen_9pfs->xendev, 0, "Xen 9pfs request type %d" + "needs %zu bytes, buffer has %zu\n", pdu->id, size, + buf_size); + xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing); + xen_9pfs_disconnect(&xen_9pfs->xendev); + } + *piov = ring->sg; *pniov = num; } @@ -217,7 +249,7 @@ static int xen_9pfs_init(struct XenDevice *xendev) static int xen_9pfs_receive(Xen9pfsRing *ring) { P9MsgHeader h; - RING_IDX cons, prod, masked_prod, masked_cons; + RING_IDX cons, prod, masked_prod, masked_cons, queued; V9fsPDU *pdu; if (ring->inprogress) { @@ -228,8 +260,8 @@ static int xen_9pfs_receive(Xen9pfsRing *ring) prod = ring->intf->out_prod; xen_rmb(); - if (xen_9pfs_queued(prod, cons, XEN_FLEX_RING_SIZE(ring->ring_order)) < - sizeof(h)) { + queued = xen_9pfs_queued(prod, cons, XEN_FLEX_RING_SIZE(ring->ring_order)); + if (queued < sizeof(h)) { return 0; } ring->inprogress = true; @@ -240,6 +272,9 @@ static int xen_9pfs_receive(Xen9pfsRing *ring) xen_9pfs_read_packet((uint8_t *) &h, ring->ring.out, sizeof(h), masked_prod, &masked_cons, XEN_FLEX_RING_SIZE(ring->ring_order)); + if (queued < le32_to_cpu(h.size_le)) { + return 0; + } /* cannot fail, because we only handle one request per ring at a time */ pdu = pdu_alloc(&ring->priv->state); @@ -268,15 +303,30 @@ static void xen_9pfs_evtchn_event(void *opaque) qemu_bh_schedule(ring->bh); } -static int xen_9pfs_free(struct XenDevice *xendev) +static void xen_9pfs_disconnect(struct XenDevice *xendev) { + Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev); int i; + + for (i = 0; i < xen_9pdev->num_rings; i++) { + if (xen_9pdev->rings[i].evtchndev != NULL) { + qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), + NULL, NULL, NULL); + xenevtchn_unbind(xen_9pdev->rings[i].evtchndev, + xen_9pdev->rings[i].local_port); + xen_9pdev->rings[i].evtchndev = NULL; + } + } +} + +static int xen_9pfs_free(struct XenDevice *xendev) +{ Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev); + int i; - g_free(xen_9pdev->id); - g_free(xen_9pdev->tag); - g_free(xen_9pdev->path); - g_free(xen_9pdev->security_model); + if (xen_9pdev->rings[0].evtchndev != NULL) { + xen_9pfs_disconnect(xendev); + } for (i = 0; i < xen_9pdev->num_rings; i++) { if (xen_9pdev->rings[i].data != NULL) { @@ -289,16 +339,15 @@ static int xen_9pfs_free(struct XenDevice *xendev) xen_9pdev->rings[i].intf, 1); } - if (xen_9pdev->rings[i].evtchndev > 0) { - qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), - NULL, NULL, NULL); - xenevtchn_unbind(xen_9pdev->rings[i].evtchndev, - xen_9pdev->rings[i].local_port); - } if (xen_9pdev->rings[i].bh != NULL) { qemu_bh_delete(xen_9pdev->rings[i].bh); } } + + g_free(xen_9pdev->id); + g_free(xen_9pdev->tag); + g_free(xen_9pdev->path); + g_free(xen_9pdev->security_model); g_free(xen_9pdev->rings); return 0; } @@ -422,11 +471,6 @@ static void xen_9pfs_alloc(struct XenDevice *xendev) xenstore_write_be_int(xendev, "max-ring-page-order", MAX_RING_ORDER); } -static void xen_9pfs_disconnect(struct XenDevice *xendev) -{ - /* Dynamic hotplug of PV filesystems at runtime is not supported. */ -} - struct XenDevOps xen_9pfs_ops = { .size = sizeof(Xen9pfsDev), .flags = DEVOPS_FLAG_NEED_GNTDEV, diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 28f6b6ee35..401129073b 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -1217,7 +1217,7 @@ static const VMStateDescription vmstate_fdc = { VMSTATE_UINT8(config, FDCtrl), VMSTATE_UINT8(lock, FDCtrl), VMSTATE_UINT8(pwrd, FDCtrl), - VMSTATE_UINT8_EQUAL(num_floppies, FDCtrl), + VMSTATE_UINT8_EQUAL(num_floppies, FDCtrl, NULL), VMSTATE_STRUCT_ARRAY(drives, FDCtrl, MAX_FD, 1, vmstate_fdrive, FDrive), VMSTATE_END_OF_LIST() diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 381dc7c5fb..6071dc12d8 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -21,7 +21,7 @@ * cmb_size_mb=<cmb_size_mb[optional]> * * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at - * offset 0 in BAR2 and supports SQS only for now. + * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. */ #include "qemu/osdep.h" @@ -93,8 +93,8 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq) } } -static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, - uint32_t len, NvmeCtrl *n) +static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, + uint64_t prp2, uint32_t len, NvmeCtrl *n) { hwaddr trans_len = n->page_size - (prp1 % n->page_size); trans_len = MIN(len, trans_len); @@ -102,10 +102,15 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, if (!prp1) { return NVME_INVALID_FIELD | NVME_DNR; + } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && + prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { + qsg->nsg = 0; + qemu_iovec_init(iov, num_prps); + qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len); + } else { + pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); + qemu_sglist_add(qsg, prp1, trans_len); } - - pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); - qemu_sglist_add(qsg, prp1, trans_len); len -= trans_len; if (len) { if (!prp2) { @@ -118,7 +123,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, nents = (len + n->page_size - 1) >> n->page_bits; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); - pci_dma_read(&n->parent_obj, prp2, (void *)prp_list, prp_trans); + nvme_addr_read(n, prp2, (void *)prp_list, prp_trans); while (len != 0) { uint64_t prp_ent = le64_to_cpu(prp_list[i]); @@ -130,7 +135,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, i = 0; nents = (len + n->page_size - 1) >> n->page_bits; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); - pci_dma_read(&n->parent_obj, prp_ent, (void *)prp_list, + nvme_addr_read(n, prp_ent, (void *)prp_list, prp_trans); prp_ent = le64_to_cpu(prp_list[i]); } @@ -140,7 +145,11 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, } trans_len = MIN(len, n->page_size); - qemu_sglist_add(qsg, prp_ent, trans_len); + if (qsg->nsg){ + qemu_sglist_add(qsg, prp_ent, trans_len); + } else { + qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len); + } len -= trans_len; i++; } @@ -148,7 +157,11 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, if (prp2 & (n->page_size - 1)) { goto unmap; } - qemu_sglist_add(qsg, prp2, len); + if (qsg->nsg) { + qemu_sglist_add(qsg, prp2, len); + } else { + qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len); + } } } return NVME_SUCCESS; @@ -162,16 +175,24 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint64_t prp1, uint64_t prp2) { QEMUSGList qsg; + QEMUIOVector iov; + uint16_t status = NVME_SUCCESS; - if (nvme_map_prp(&qsg, prp1, prp2, len, n)) { + if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { return NVME_INVALID_FIELD | NVME_DNR; } - if (dma_buf_read(ptr, len, &qsg)) { + if (qsg.nsg > 0) { + if (dma_buf_read(ptr, len, &qsg)) { + status = NVME_INVALID_FIELD | NVME_DNR; + } qemu_sglist_destroy(&qsg); - return NVME_INVALID_FIELD | NVME_DNR; + } else { + if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { + status = NVME_INVALID_FIELD | NVME_DNR; + } + qemu_iovec_destroy(&iov); } - qemu_sglist_destroy(&qsg); - return NVME_SUCCESS; + return status; } static void nvme_post_cqes(void *opaque) @@ -285,20 +306,27 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, return NVME_LBA_RANGE | NVME_DNR; } - if (nvme_map_prp(&req->qsg, prp1, prp2, data_size, n)) { + if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) { block_acct_invalid(blk_get_stats(n->conf.blk), acct); return NVME_INVALID_FIELD | NVME_DNR; } - assert((nlb << data_shift) == req->qsg.size); - - req->has_sg = true; dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct); - req->aiocb = is_write ? - dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, - nvme_rw_cb, req) : - dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, - nvme_rw_cb, req); + if (req->qsg.nsg > 0) { + req->has_sg = true; + req->aiocb = is_write ? + dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, + nvme_rw_cb, req) : + dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, + nvme_rw_cb, req); + } else { + req->has_sg = false; + req->aiocb = is_write ? + blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb, + req) : + blk_aio_preadv(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb, + req); + } return NVME_NO_COMPLETE; } @@ -987,11 +1015,14 @@ static int nvme_init(PCIDevice *pci_dev) NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb); + n->cmbloc = n->bar.cmbloc; + n->cmbsz = n->bar.cmbsz; + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index b4961d2547..6aab338ff5 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -712,6 +712,7 @@ typedef struct NvmeRequest { NvmeCqe cqe; BlockAcctCookie acct; QEMUSGList qsg; + QEMUIOVector iov; QTAILQ_ENTRY(NvmeRequest)entry; } NvmeRequest; diff --git a/hw/block/trace-events b/hw/block/trace-events index 65e83dc258..c332c01ea8 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -1,11 +1,11 @@ # See docs/tracing.txt for syntax documentation. # hw/block/virtio-blk.c -virtio_blk_req_complete(void *req, int status) "req %p status %d" -virtio_blk_rw_complete(void *req, int ret) "req %p ret %d" -virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" -virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" -virtio_blk_submit_multireq(void *mrb, int start, int num_reqs, uint64_t offset, size_t size, bool is_write) "mrb %p start %d num_reqs %d offset %"PRIu64" size %zu is_write %d" +virtio_blk_req_complete(void *vdev, void *req, int status) "vdev %p req %p status %d" +virtio_blk_rw_complete(void *vdev, void *req, int ret) "vdev %p req %p ret %d" +virtio_blk_handle_write(void *vdev, void *req, uint64_t sector, size_t nsectors) "vdev %p req %p sector %"PRIu64" nsectors %zu" +virtio_blk_handle_read(void *vdev, void *req, uint64_t sector, size_t nsectors) "vdev %p req %p sector %"PRIu64" nsectors %zu" +virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint64_t offset, size_t size, bool is_write) "vdev %p mrb %p start %d num_reqs %d offset %"PRIu64" size %zu is_write %d" # hw/block/hd-geometry.c hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 604d37dfc8..c0bd247b37 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -50,7 +50,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) VirtIOBlock *s = req->dev; VirtIODevice *vdev = VIRTIO_DEVICE(s); - trace_virtio_blk_req_complete(req, status); + trace_virtio_blk_req_complete(vdev, req, status); stb_p(&req->in->status, status); virtqueue_push(req->vq, &req->elem, req->in_len); @@ -88,12 +88,13 @@ static void virtio_blk_rw_complete(void *opaque, int ret) { VirtIOBlockReq *next = opaque; VirtIOBlock *s = next->dev; + VirtIODevice *vdev = VIRTIO_DEVICE(s); aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); while (next) { VirtIOBlockReq *req = next; next = req->mr_next; - trace_virtio_blk_rw_complete(req, ret); + trace_virtio_blk_rw_complete(vdev, req, ret); if (req->qiov.nalloc != -1) { /* If nalloc is != 1 req->qiov is a local copy of the original @@ -355,7 +356,8 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; } - trace_virtio_blk_submit_multireq(mrb, start, num_reqs, + trace_virtio_blk_submit_multireq(VIRTIO_DEVICE(mrb->reqs[start]->dev), + mrb, start, num_reqs, sector_num << BDRV_SECTOR_BITS, qiov->size, is_write); block_acct_merge_done(blk_get_stats(blk), @@ -526,11 +528,11 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) if (is_write) { qemu_iovec_init_external(&req->qiov, iov, out_num); - trace_virtio_blk_handle_write(req, req->sector_num, + trace_virtio_blk_handle_write(vdev, req, req->sector_num, req->qiov.size / BDRV_SECTOR_SIZE); } else { qemu_iovec_init_external(&req->qiov, in_iov, in_num); - trace_virtio_blk_handle_read(req, req->sector_num, + trace_virtio_blk_handle_read(vdev, req, req->sector_num, req->qiov.size / BDRV_SECTOR_SIZE); } diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 3a22805fbc..d42ed7070d 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -36,8 +36,6 @@ static int batch_maps = 0; -static int max_requests = 32; - /* ------------------------------------------------------------- */ #define BLOCK_SIZE 512 @@ -84,6 +82,8 @@ struct ioreq { BlockAcctCookie acct; }; +#define MAX_RING_PAGE_ORDER 4 + struct XenBlkDev { struct XenDevice xendev; /* must be first */ char *params; @@ -94,7 +94,8 @@ struct XenBlkDev { bool directiosafe; const char *fileproto; const char *filename; - int ring_ref; + unsigned int ring_ref[1 << MAX_RING_PAGE_ORDER]; + unsigned int nr_ring_ref; void *sring; int64_t file_blk; int64_t file_size; @@ -110,6 +111,7 @@ struct XenBlkDev { int requests_total; int requests_inflight; int requests_finished; + unsigned int max_requests; /* Persistent grants extension */ gboolean feature_discard; @@ -199,7 +201,7 @@ static struct ioreq *ioreq_start(struct XenBlkDev *blkdev) struct ioreq *ioreq = NULL; if (QLIST_EMPTY(&blkdev->freelist)) { - if (blkdev->requests_total >= max_requests) { + if (blkdev->requests_total >= blkdev->max_requests) { goto out; } /* allocate new struct */ @@ -769,31 +771,30 @@ static int blk_send_response_one(struct ioreq *ioreq) struct XenBlkDev *blkdev = ioreq->blkdev; int send_notify = 0; int have_requests = 0; - blkif_response_t resp; - void *dst; - - resp.id = ioreq->req.id; - resp.operation = ioreq->req.operation; - resp.status = ioreq->status; + blkif_response_t *resp; /* Place on the response ring for the relevant domain. */ switch (blkdev->protocol) { case BLKIF_PROTOCOL_NATIVE: - dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt); + resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.native, + blkdev->rings.native.rsp_prod_pvt); break; case BLKIF_PROTOCOL_X86_32: - dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part, - blkdev->rings.x86_32_part.rsp_prod_pvt); + resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.x86_32_part, + blkdev->rings.x86_32_part.rsp_prod_pvt); break; case BLKIF_PROTOCOL_X86_64: - dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part, - blkdev->rings.x86_64_part.rsp_prod_pvt); + resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.x86_64_part, + blkdev->rings.x86_64_part.rsp_prod_pvt); break; default: - dst = NULL; return 0; } - memcpy(dst, &resp, sizeof(resp)); + + resp->id = ioreq->req.id; + resp->operation = ioreq->req.operation; + resp->status = ioreq->status; + blkdev->rings.common.rsp_prod_pvt++; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify); @@ -905,7 +906,7 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) ioreq_runio_qemu_aio(ioreq); } - if (blkdev->more_work && blkdev->requests_inflight < max_requests) { + if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) { qemu_bh_schedule(blkdev->bh); } } @@ -918,15 +919,6 @@ static void blk_bh(void *opaque) blk_handle_requests(blkdev); } -/* - * We need to account for the grant allocations requiring contiguous - * chunks; the worst case number would be - * max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1, - * but in order to keep things simple just use - * 2 * max_req * max_seg. - */ -#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg)) - static void blk_alloc(struct XenDevice *xendev) { struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); @@ -938,11 +930,6 @@ static void blk_alloc(struct XenDevice *xendev) if (xen_mode != XEN_EMULATE) { batch_maps = 1; } - if (xengnttab_set_max_grants(xendev->gnttabdev, - MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) { - xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n", - strerror(errno)); - } } static void blk_parse_discard(struct XenBlkDev *blkdev) @@ -1023,13 +1010,23 @@ static int blk_init(struct XenDevice *xendev) blkdev->file_blk = BLOCK_SIZE; + blkdev->feature_grant_copy = + (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0); + + xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n", + blkdev->feature_grant_copy ? "enabled" : "disabled"); + /* fill info * blk_connect supplies sector-size and sectors */ xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1); - xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1); + xenstore_write_be_int(&blkdev->xendev, "feature-persistent", + !blkdev->feature_grant_copy); xenstore_write_be_int(&blkdev->xendev, "info", info); + xenstore_write_be_int(&blkdev->xendev, "max-ring-page-order", + MAX_RING_PAGE_ORDER); + blk_parse_discard(blkdev); g_free(directiosafe); @@ -1051,12 +1048,25 @@ out_error: return -1; } +/* + * We need to account for the grant allocations requiring contiguous + * chunks; the worst case number would be + * max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1, + * but in order to keep things simple just use + * 2 * max_req * max_seg. + */ +#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg)) + static int blk_connect(struct XenDevice *xendev) { struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); int pers, index, qflags; bool readonly = true; bool writethrough = true; + int order, ring_ref; + unsigned int ring_size, max_grants; + unsigned int i; + uint32_t *domids; /* read-only ? */ if (blkdev->directiosafe) { @@ -1131,9 +1141,42 @@ static int blk_connect(struct XenDevice *xendev) xenstore_write_be_int64(&blkdev->xendev, "sectors", blkdev->file_size / blkdev->file_blk); - if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) { + if (xenstore_read_fe_int(&blkdev->xendev, "ring-page-order", + &order) == -1) { + blkdev->nr_ring_ref = 1; + + if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", + &ring_ref) == -1) { + return -1; + } + blkdev->ring_ref[0] = ring_ref; + + } else if (order >= 0 && order <= MAX_RING_PAGE_ORDER) { + blkdev->nr_ring_ref = 1 << order; + + for (i = 0; i < blkdev->nr_ring_ref; i++) { + char *key; + + key = g_strdup_printf("ring-ref%u", i); + if (!key) { + return -1; + } + + if (xenstore_read_fe_int(&blkdev->xendev, key, + &ring_ref) == -1) { + g_free(key); + return -1; + } + blkdev->ring_ref[i] = ring_ref; + + g_free(key); + } + } else { + xen_pv_printf(xendev, 0, "invalid ring-page-order: %d\n", + order); return -1; } + if (xenstore_read_fe_int(&blkdev->xendev, "event-channel", &blkdev->xendev.remote_port) == -1) { return -1; @@ -1156,41 +1199,85 @@ static int blk_connect(struct XenDevice *xendev) blkdev->protocol = BLKIF_PROTOCOL_NATIVE; } - blkdev->sring = xengnttab_map_grant_ref(blkdev->xendev.gnttabdev, - blkdev->xendev.dom, - blkdev->ring_ref, - PROT_READ | PROT_WRITE); + ring_size = XC_PAGE_SIZE * blkdev->nr_ring_ref; + switch (blkdev->protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + blkdev->max_requests = __CONST_RING_SIZE(blkif, ring_size); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + blkdev->max_requests = __CONST_RING_SIZE(blkif_x86_32, ring_size); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + blkdev->max_requests = __CONST_RING_SIZE(blkif_x86_64, ring_size); + break; + } + default: + return -1; + } + + /* Calculate the maximum number of grants needed by ioreqs */ + max_grants = MAX_GRANTS(blkdev->max_requests, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + /* Add on the number needed for the ring pages */ + max_grants += blkdev->nr_ring_ref; + + if (xengnttab_set_max_grants(blkdev->xendev.gnttabdev, max_grants)) { + xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n", + strerror(errno)); + return -1; + } + + domids = g_malloc0_n(blkdev->nr_ring_ref, sizeof(uint32_t)); + for (i = 0; i < blkdev->nr_ring_ref; i++) { + domids[i] = blkdev->xendev.dom; + } + + blkdev->sring = xengnttab_map_grant_refs(blkdev->xendev.gnttabdev, + blkdev->nr_ring_ref, + domids, + blkdev->ring_ref, + PROT_READ | PROT_WRITE); + + g_free(domids); + if (!blkdev->sring) { return -1; } + blkdev->cnt_map++; switch (blkdev->protocol) { case BLKIF_PROTOCOL_NATIVE: { blkif_sring_t *sring_native = blkdev->sring; - BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE); + BACK_RING_INIT(&blkdev->rings.native, sring_native, ring_size); break; } case BLKIF_PROTOCOL_X86_32: { blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring; - BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE); + BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, ring_size); break; } case BLKIF_PROTOCOL_X86_64: { blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring; - BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE); + BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, ring_size); break; } } if (blkdev->feature_persistent) { /* Init persistent grants */ - blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST; + blkdev->max_grants = blkdev->max_requests * + BLKIF_MAX_SEGMENTS_PER_REQUEST; blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp, NULL, NULL, batch_maps ? @@ -1202,15 +1289,9 @@ static int blk_connect(struct XenDevice *xendev) xen_be_bind_evtchn(&blkdev->xendev); - blkdev->feature_grant_copy = - (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0); - - xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n", - blkdev->feature_grant_copy ? "enabled" : "disabled"); - - xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, " + xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, nr-ring-ref %u, " "remote port %d, local port %d\n", - blkdev->xendev.protocol, blkdev->ring_ref, + blkdev->xendev.protocol, blkdev->nr_ring_ref, blkdev->xendev.remote_port, blkdev->xendev.local_port); return 0; } @@ -1227,7 +1308,8 @@ static void blk_disconnect(struct XenDevice *xendev) xen_pv_unbind_evtchn(&blkdev->xendev); if (blkdev->sring) { - xengnttab_unmap(blkdev->xendev.gnttabdev, blkdev->sring, 1); + xengnttab_unmap(blkdev->xendev.gnttabdev, blkdev->sring, + blkdev->nr_ring_ref); blkdev->cnt_map--; blkdev->sring = NULL; } diff --git a/hw/core/machine.c b/hw/core/machine.c index 2e7e9778cd..ecb55528e8 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -770,19 +770,6 @@ static void machine_class_finalize(ObjectClass *klass, void *data) g_free(mc->name); } -static void register_compat_prop(const char *driver, - const char *property, - const char *value) -{ - GlobalProperty *p = g_new0(GlobalProperty, 1); - /* Machine compat_props must never cause errors: */ - p->errp = &error_abort; - p->driver = driver; - p->property = property; - p->value = value; - qdev_prop_register_global(p); -} - static void machine_register_compat_for_subclass(ObjectClass *oc, void *opaque) { GlobalProperty *p = opaque; diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 68cd65345c..f11d57831b 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -1084,6 +1084,27 @@ void qdev_prop_register_global(GlobalProperty *prop) global_props = g_list_append(global_props, prop); } +void register_compat_prop(const char *driver, + const char *property, + const char *value) +{ + GlobalProperty *p = g_new0(GlobalProperty, 1); + + /* Any compat_props must never cause error */ + p->errp = &error_abort; + p->driver = driver; + p->property = property; + p->value = value; + qdev_prop_register_global(p); +} + +void register_compat_props_array(GlobalProperty *prop) +{ + for (; prop && prop->driver; prop++) { + register_compat_prop(prop->driver, prop->property, prop->value); + } +} + void qdev_prop_register_global_list(GlobalProperty *props) { int i; diff --git a/hw/display/qxl.c b/hw/display/qxl.c index ad09bb98f9..3c1688e7cb 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -2373,12 +2373,12 @@ static VMStateDescription qxl_vmstate = { VMSTATE_UINT32(last_release_offset, PCIQXLDevice), VMSTATE_UINT32(mode, PCIQXLDevice), VMSTATE_UINT32(ssd.unique, PCIQXLDevice), - VMSTATE_INT32_EQUAL(num_memslots, PCIQXLDevice), + VMSTATE_INT32_EQUAL(num_memslots, PCIQXLDevice, NULL), VMSTATE_STRUCT_ARRAY(guest_slots, PCIQXLDevice, NUM_MEMSLOTS, 0, qxl_memslot, struct guest_slots), VMSTATE_STRUCT(guest_primary.surface, PCIQXLDevice, 0, qxl_surface, QXLSurfaceCreate), - VMSTATE_INT32_EQUAL(ssd.num_surfaces, PCIQXLDevice), + VMSTATE_INT32_EQUAL(ssd.num_surfaces, PCIQXLDevice, NULL), VMSTATE_VARRAY_INT32(guest_surfaces.cmds, PCIQXLDevice, ssd.num_surfaces, 0, vmstate_info_uint64, uint64_t), diff --git a/hw/display/vga.c b/hw/display/vga.c index dcc95f88e2..80508b83f4 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -2099,7 +2099,7 @@ const VMStateDescription vmstate_vga_common = { VMSTATE_BUFFER(palette, VGACommonState), VMSTATE_INT32(bank_offset, VGACommonState), - VMSTATE_UINT8_EQUAL(is_vbe_vmstate, VGACommonState), + VMSTATE_UINT8_EQUAL(is_vbe_vmstate, VGACommonState, NULL), VMSTATE_UINT16(vbe_index, VGACommonState), VMSTATE_UINT16_ARRAY(vbe_regs, VGACommonState, VBE_DISPI_INDEX_NB), VMSTATE_UINT32(vbe_start_addr, VGACommonState), diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 58dc0b2737..0506d2c1b0 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -962,7 +962,7 @@ static const VMStateDescription vmstate_virtio_gpu_scanouts = { .version_id = 1, .fields = (VMStateField[]) { VMSTATE_INT32(enable, struct VirtIOGPU), - VMSTATE_UINT32_EQUAL(conf.max_outputs, struct VirtIOGPU), + VMSTATE_UINT32_EQUAL(conf.max_outputs, struct VirtIOGPU, NULL), VMSTATE_STRUCT_VARRAY_UINT32(scanout, struct VirtIOGPU, conf.max_outputs, 1, vmstate_virtio_gpu_scanout, diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c index ec5f27d67e..c989cef1cd 100644 --- a/hw/display/vmware_vga.c +++ b/hw/display/vmware_vga.c @@ -1192,7 +1192,7 @@ static const VMStateDescription vmstate_vmware_vga_internal = { .minimum_version_id = 0, .post_load = vmsvga_post_load, .fields = (VMStateField[]) { - VMSTATE_INT32_EQUAL(new_depth, struct vmsvga_state_s), + VMSTATE_INT32_EQUAL(new_depth, struct vmsvga_state_s, NULL), VMSTATE_INT32(enable, struct vmsvga_state_s), VMSTATE_INT32(config, struct vmsvga_state_s), VMSTATE_INT32(cursor.id, struct vmsvga_state_s), diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 46a2bc41ab..22dbef64c6 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -314,12 +314,9 @@ static void pc_init1(MachineState *machine, static void pc_compat_2_3(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); - savevm_skip_section_footers(); if (kvm_enabled()) { pcms->smm = ON_OFF_AUTO_OFF; } - global_state_set_optional(); - savevm_skip_configuration(); } static void pc_compat_2_2(MachineState *machine) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index f60826d6e0..874d3fe280 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1669,7 +1669,7 @@ const VMStateDescription vmstate_ahci = { VMSTATE_UINT32(control_regs.impl, AHCIState), VMSTATE_UINT32(control_regs.version, AHCIState), VMSTATE_UINT32(idp_index, AHCIState), - VMSTATE_INT32_EQUAL(ports, AHCIState), + VMSTATE_INT32_EQUAL(ports, AHCIState, NULL), VMSTATE_END_OF_LIST() }, }; diff --git a/hw/input/hid.c b/hw/input/hid.c index 93887ecc43..0d049ff61c 100644 --- a/hw/input/hid.c +++ b/hw/input/hid.c @@ -487,6 +487,7 @@ void hid_reset(HIDState *hs) memset(hs->kbd.keycodes, 0, sizeof(hs->kbd.keycodes)); memset(hs->kbd.key, 0, sizeof(hs->kbd.key)); hs->kbd.keys = 0; + hs->kbd.modifiers = 0; break; case HID_MOUSE: case HID_TABLET: diff --git a/hw/input/ps2.c b/hw/input/ps2.c index 1d3a440bbd..3ba05efd06 100644 --- a/hw/input/ps2.c +++ b/hw/input/ps2.c @@ -85,12 +85,12 @@ typedef struct { int rptr, wptr, count; } PS2Queue; -typedef struct { +struct PS2State { PS2Queue queue; int32_t write_cmd; void (*update_irq)(void *, int); void *update_arg; -} PS2State; +}; typedef struct { PS2State common; @@ -551,9 +551,17 @@ static uint8_t translate_table[256] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, }; -void ps2_queue(void *opaque, int b) +static void ps2_reset_queue(PS2State *s) +{ + PS2Queue *q = &s->queue; + + q->rptr = 0; + q->wptr = 0; + q->count = 0; +} + +void ps2_queue(PS2State *s, int b) { - PS2State *s = (PS2State *)opaque; PS2Queue *q = &s->queue; if (q->count >= PS2_QUEUE_SIZE - 1) @@ -692,13 +700,12 @@ static void ps2_keyboard_event(DeviceState *dev, QemuConsole *src, } } -uint32_t ps2_read_data(void *opaque) +uint32_t ps2_read_data(PS2State *s) { - PS2State *s = (PS2State *)opaque; PS2Queue *q; int val, index; - trace_ps2_read_data(opaque); + trace_ps2_read_data(s); q = &s->queue; if (q->count == 0) { /* NOTE: if no data left, we return the last keyboard one @@ -733,6 +740,7 @@ static void ps2_reset_keyboard(PS2KbdState *s) trace_ps2_reset_keyboard(s); s->scan_enabled = 1; s->scancode_set = 2; + ps2_reset_queue(&s->common); ps2_set_ledstate(s, 0); } @@ -1081,12 +1089,8 @@ void ps2_write_mouse(void *opaque, int val) static void ps2_common_reset(PS2State *s) { - PS2Queue *q; s->write_cmd = -1; - q = &s->queue; - q->rptr = 0; - q->wptr = 0; - q->count = 0; + ps2_reset_queue(s); s->update_irq(s->update_arg, 0); } diff --git a/hw/input/vmmouse.c b/hw/input/vmmouse.c index 4747da9a8d..b6d22086f4 100644 --- a/hw/input/vmmouse.c +++ b/hw/input/vmmouse.c @@ -243,7 +243,7 @@ static const VMStateDescription vmstate_vmmouse = { .minimum_version_id = 0, .post_load = vmmouse_post_load, .fields = (VMStateField[]) { - VMSTATE_INT32_EQUAL(queue_size, VMMouseState), + VMSTATE_INT32_EQUAL(queue_size, VMMouseState, NULL), VMSTATE_UINT32_ARRAY(queue, VMMouseState, VMMOUSE_QUEUE_SIZE), VMSTATE_UINT16(nb_queue, VMMouseState), VMSTATE_UINT16(status, VMMouseState), diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c index f966d0604a..9dd285b923 100644 --- a/hw/intc/openpic.c +++ b/hw/intc/openpic.c @@ -45,6 +45,7 @@ #include "qemu/bitops.h" #include "qapi/qmp/qerror.h" #include "qemu/log.h" +#include "qemu/timer.h" //#define DEBUG_OPENPIC @@ -54,8 +55,10 @@ static const int debug_openpic = 1; static const int debug_openpic = 0; #endif +static int get_current_cpu(void); #define DPRINTF(fmt, ...) do { \ if (debug_openpic) { \ + printf("Core%d: ", get_current_cpu()); \ printf(fmt , ## __VA_ARGS__); \ } \ } while (0) @@ -246,9 +249,31 @@ typedef struct IRQSource { #define IDR_EP 0x80000000 /* external pin */ #define IDR_CI 0x40000000 /* critical interrupt */ +/* Convert between openpic clock ticks and nanosecs. In the hardware the clock + frequency is driven by board inputs to the PIC which the PIC would then + divide by 4 or 8. For now hard code to 25MZ. +*/ +#define OPENPIC_TIMER_FREQ_MHZ 25 +#define OPENPIC_TIMER_NS_PER_TICK (1000 / OPENPIC_TIMER_FREQ_MHZ) +static inline uint64_t ns_to_ticks(uint64_t ns) +{ + return ns / OPENPIC_TIMER_NS_PER_TICK; +} +static inline uint64_t ticks_to_ns(uint64_t ticks) +{ + return ticks * OPENPIC_TIMER_NS_PER_TICK; +} + typedef struct OpenPICTimer { uint32_t tccr; /* Global timer current count register */ uint32_t tbcr; /* Global timer base count register */ + int n_IRQ; + bool qemu_timer_active; /* Is the qemu_timer is running? */ + struct QEMUTimer *qemu_timer; + struct OpenPICState *opp; /* Device timer is part of. */ + /* The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last + current_count written or read, only defined if qemu_timer_active. */ + uint64_t origin_time; } OpenPICTimer; typedef struct OpenPICMSI { @@ -795,6 +820,65 @@ static uint64_t openpic_gbl_read(void *opaque, hwaddr addr, unsigned len) return retval; } +static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled); + +static void qemu_timer_cb(void *opaque) +{ + OpenPICTimer *tmr = opaque; + OpenPICState *opp = tmr->opp; + uint32_t n_IRQ = tmr->n_IRQ; + uint32_t val = tmr->tbcr & ~TBCR_CI; + uint32_t tog = ((tmr->tccr & TCCR_TOG) ^ TCCR_TOG); /* invert toggle. */ + + DPRINTF("%s n_IRQ=%d\n", __func__, n_IRQ); + /* Reload current count from base count and setup timer. */ + tmr->tccr = val | tog; + openpic_tmr_set_tmr(tmr, val, /*enabled=*/true); + /* Raise the interrupt. */ + opp->src[n_IRQ].destmask = read_IRQreg_idr(opp, n_IRQ); + openpic_set_irq(opp, n_IRQ, 1); + openpic_set_irq(opp, n_IRQ, 0); +} + +/* If enabled is true, arranges for an interrupt to be raised val clocks into + the future, if enabled is false cancels the timer. */ +static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled) +{ + uint64_t ns = ticks_to_ns(val & ~TCCR_TOG); + /* A count of zero causes a timer to be set to expire immediately. This + effectively stops the simulation since the timer is constantly expiring + which prevents guest code execution, so we don't honor that + configuration. On real hardware, this situation would generate an + interrupt on every clock cycle if the interrupt was unmasked. */ + if ((ns == 0) || !enabled) { + tmr->qemu_timer_active = false; + tmr->tccr = tmr->tccr & TCCR_TOG; + timer_del(tmr->qemu_timer); /* set timer to never expire. */ + } else { + tmr->qemu_timer_active = true; + uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + tmr->origin_time = now; + timer_mod(tmr->qemu_timer, now + ns); /* set timer expiration. */ + } +} + +/* Returns the currrent tccr value, i.e., timer value (in clocks) with + appropriate TOG. */ +static uint64_t openpic_tmr_get_timer(OpenPICTimer *tmr) +{ + uint64_t retval; + if (!tmr->qemu_timer_active) { + retval = tmr->tccr; + } else { + uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + uint64_t used = now - tmr->origin_time; /* nsecs */ + uint32_t used_ticks = (uint32_t)ns_to_ticks(used); + uint32_t count = (tmr->tccr & ~TCCR_TOG) - used_ticks; + retval = (uint32_t)((tmr->tccr & TCCR_TOG) | (count & ~TCCR_TOG)); + } + return retval; +} + static void openpic_tmr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) { @@ -819,10 +903,15 @@ static void openpic_tmr_write(void *opaque, hwaddr addr, uint64_t val, case 0x00: /* TCCR */ break; case 0x10: /* TBCR */ - if ((opp->timers[idx].tccr & TCCR_TOG) != 0 && - (val & TBCR_CI) == 0 && - (opp->timers[idx].tbcr & TBCR_CI) != 0) { - opp->timers[idx].tccr &= ~TCCR_TOG; + /* Did the enable status change? */ + if ((opp->timers[idx].tbcr & TBCR_CI) != (val & TBCR_CI)) { + /* Did "Count Inhibit" transition from 1 to 0? */ + if ((val & TBCR_CI) == 0) { + opp->timers[idx].tccr = val & ~TCCR_TOG; + } + openpic_tmr_set_tmr(&opp->timers[idx], + (val & ~TBCR_CI), + /*enabled=*/((val & TBCR_CI) == 0)); } opp->timers[idx].tbcr = val; break; @@ -854,7 +943,7 @@ static uint64_t openpic_tmr_read(void *opaque, hwaddr addr, unsigned len) idx = (addr >> 6) & 0x3; switch (addr & 0x30) { case 0x00: /* TCCR */ - retval = opp->timers[idx].tccr; + retval = openpic_tmr_get_timer(&opp->timers[idx]); break; case 0x10: /* TBCR */ retval = opp->timers[idx].tbcr; @@ -1136,7 +1225,10 @@ static uint32_t openpic_iack(OpenPICState *opp, IRQDest *dst, int cpu) IRQ_resetbit(&dst->raised, irq); } - if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + OPENPIC_MAX_IPI))) { + /* Timers and IPIs support multicast. */ + if (((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + OPENPIC_MAX_IPI))) || + ((irq >= opp->irq_tim0) && (irq < (opp->irq_tim0 + OPENPIC_MAX_TMR)))) { + DPRINTF("irq is IPI or TMR\n"); src->destmask &= ~(1 << cpu); if (src->destmask && !src->level) { /* trigger on CPUs that didn't know about it yet */ @@ -1341,6 +1433,10 @@ static void openpic_reset(DeviceState *d) for (i = 0; i < OPENPIC_MAX_TMR; i++) { opp->timers[i].tccr = 0; opp->timers[i].tbcr = TBCR_CI; + if (opp->timers[i].qemu_timer_active) { + timer_del(opp->timers[i].qemu_timer); /* Inhibit timer */ + opp->timers[i].qemu_timer_active = false; + } } /* Go out of RESET state */ opp->gcr = 0; @@ -1391,6 +1487,15 @@ static void fsl_common_init(OpenPICState *opp) opp->src[i].type = IRQ_TYPE_FSLSPECIAL; opp->src[i].level = false; } + + for (i = 0; i < OPENPIC_MAX_TMR; i++) { + opp->timers[i].n_IRQ = opp->irq_tim0 + i; + opp->timers[i].qemu_timer_active = false; + opp->timers[i].qemu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + &qemu_timer_cb, + &opp->timers[i]); + opp->timers[i].opp = opp; + } } static void map_list(OpenPICState *opp, const MemReg *list, int *count) @@ -1499,7 +1604,7 @@ static const VMStateDescription vmstate_openpic = { VMSTATE_UINT32(max_irq, OpenPICState), VMSTATE_STRUCT_VARRAY_UINT32(src, OpenPICState, max_irq, 0, vmstate_openpic_irqsource, IRQSource), - VMSTATE_UINT32_EQUAL(nb_cpus, OpenPICState), + VMSTATE_UINT32_EQUAL(nb_cpus, OpenPICState, NULL), VMSTATE_STRUCT_VARRAY_UINT32(dst, OpenPICState, nb_cpus, 0, vmstate_openpic_irqdest, IRQDest), VMSTATE_STRUCT_ARRAY(timers, OpenPICState, OPENPIC_MAX_TMR, 0, diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 7ccfb53c55..a84ba51ad8 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -344,10 +344,14 @@ static void icp_realize(DeviceState *dev, Error **errp) } qemu_register_reset(icp_reset, dev); + vmstate_register(NULL, icp->cs->cpu_index, &vmstate_icp_server, icp); } static void icp_unrealize(DeviceState *dev, Error **errp) { + ICPState *icp = ICP(dev); + + vmstate_unregister(NULL, &vmstate_icp_server, icp); qemu_unregister_reset(icp_reset, dev); } @@ -355,7 +359,6 @@ static void icp_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - dc->vmsd = &vmstate_icp_server; dc->realize = icp_realize; dc->unrealize = icp_unrealize; } @@ -574,7 +577,7 @@ static const VMStateDescription vmstate_ics_simple = { .post_load = ics_simple_dispatch_post_load, .fields = (VMStateField[]) { /* Sanity check */ - VMSTATE_UINT32_EQUAL(nr_irqs, ICSState), + VMSTATE_UINT32_EQUAL(nr_irqs, ICSState, NULL), VMSTATE_STRUCT_VARRAY_POINTER_UINT32(irqs, ICSState, nr_irqs, vmstate_ics_simple_irq, diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index 20198466f0..08a79c3e3e 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -57,3 +57,4 @@ obj-$(CONFIG_EDU) += edu.o obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o obj-$(CONFIG_AUX) += auxbus.o obj-$(CONFIG_ASPEED_SOC) += aspeed_scu.o aspeed_sdmc.o +obj-y += mmio_interface.o diff --git a/hw/misc/applesmc.c b/hw/misc/applesmc.c index 77fab5b9d2..7896812304 100644 --- a/hw/misc/applesmc.c +++ b/hw/misc/applesmc.c @@ -39,21 +39,43 @@ /* #define DEBUG_SMC */ #define APPLESMC_DEFAULT_IOBASE 0x300 -/* data port used by Apple SMC */ -#define APPLESMC_DATA_PORT 0x0 -/* command/status port used by Apple SMC */ -#define APPLESMC_CMD_PORT 0x4 -#define APPLESMC_NR_PORTS 32 -#define APPLESMC_READ_CMD 0x10 -#define APPLESMC_WRITE_CMD 0x11 -#define APPLESMC_GET_KEY_BY_INDEX_CMD 0x12 -#define APPLESMC_GET_KEY_TYPE_CMD 0x13 +enum { + APPLESMC_DATA_PORT = 0x00, + APPLESMC_CMD_PORT = 0x04, + APPLESMC_ERR_PORT = 0x1e, + APPLESMC_NUM_PORTS = 0x20, +}; + +enum { + APPLESMC_READ_CMD = 0x10, + APPLESMC_WRITE_CMD = 0x11, + APPLESMC_GET_KEY_BY_INDEX_CMD = 0x12, + APPLESMC_GET_KEY_TYPE_CMD = 0x13, +}; + +enum { + APPLESMC_ST_CMD_DONE = 0x00, + APPLESMC_ST_DATA_READY = 0x01, + APPLESMC_ST_BUSY = 0x02, + APPLESMC_ST_ACK = 0x04, + APPLESMC_ST_NEW_CMD = 0x08, +}; + +enum { + APPLESMC_ST_1E_CMD_INTRUPTED = 0x80, + APPLESMC_ST_1E_STILL_BAD_CMD = 0x81, + APPLESMC_ST_1E_BAD_CMD = 0x82, + APPLESMC_ST_1E_NOEXIST = 0x84, + APPLESMC_ST_1E_WRITEONLY = 0x85, + APPLESMC_ST_1E_READONLY = 0x86, + APPLESMC_ST_1E_BAD_INDEX = 0xb8, +}; #ifdef DEBUG_SMC #define smc_debug(...) fprintf(stderr, "AppleSMC: " __VA_ARGS__) #else -#define smc_debug(...) do { } while(0) +#define smc_debug(...) do { } while (0) #endif static char default_osk[64] = "This is a dummy key. Enter the real key " @@ -74,15 +96,17 @@ struct AppleSMCState { MemoryRegion io_data; MemoryRegion io_cmd; + MemoryRegion io_err; uint32_t iobase; uint8_t cmd; uint8_t status; - uint8_t key[4]; + uint8_t status_1e; + uint8_t last_ret; + char key[4]; uint8_t read_pos; uint8_t data_len; uint8_t data_pos; uint8_t data[255]; - uint8_t charactic[4]; char *osk; QLIST_HEAD(, AppleSMCData) data_def; }; @@ -91,89 +115,138 @@ static void applesmc_io_cmd_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { AppleSMCState *s = opaque; - - smc_debug("CMD Write B: %#x = %#x\n", addr, val); - switch(val) { - case APPLESMC_READ_CMD: - s->status = 0x0c; - break; + uint8_t status = s->status & 0x0f; + + smc_debug("CMD received: 0x%02x\n", (uint8_t)val); + switch (val) { + case APPLESMC_READ_CMD: + /* did last command run through OK? */ + if (status == APPLESMC_ST_CMD_DONE || status == APPLESMC_ST_NEW_CMD) { + s->cmd = val; + s->status = APPLESMC_ST_NEW_CMD | APPLESMC_ST_ACK; + } else { + smc_debug("ERROR: previous command interrupted!\n"); + s->status = APPLESMC_ST_NEW_CMD; + s->status_1e = APPLESMC_ST_1E_CMD_INTRUPTED; + } + break; + default: + smc_debug("UNEXPECTED CMD 0x%02x\n", (uint8_t)val); + s->status = APPLESMC_ST_NEW_CMD; + s->status_1e = APPLESMC_ST_1E_BAD_CMD; } - s->cmd = val; s->read_pos = 0; s->data_pos = 0; } -static void applesmc_fill_data(AppleSMCState *s) +static struct AppleSMCData *applesmc_find_key(AppleSMCState *s) { struct AppleSMCData *d; QLIST_FOREACH(d, &s->data_def, node) { if (!memcmp(d->key, s->key, 4)) { - smc_debug("Key matched (%s Len=%d Data=%s)\n", d->key, - d->len, d->data); - memcpy(s->data, d->data, d->len); - return; + return d; } } + return NULL; } static void applesmc_io_data_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { AppleSMCState *s = opaque; + struct AppleSMCData *d; - smc_debug("DATA Write B: %#x = %#x\n", addr, val); - switch(s->cmd) { - case APPLESMC_READ_CMD: - if(s->read_pos < 4) { - s->key[s->read_pos] = val; - s->status = 0x04; - } else if(s->read_pos == 4) { - s->data_len = val; - s->status = 0x05; + smc_debug("DATA received: 0x%02x\n", (uint8_t)val); + switch (s->cmd) { + case APPLESMC_READ_CMD: + if ((s->status & 0x0f) == APPLESMC_ST_CMD_DONE) { + break; + } + if (s->read_pos < 4) { + s->key[s->read_pos] = val; + s->status = APPLESMC_ST_ACK; + } else if (s->read_pos == 4) { + d = applesmc_find_key(s); + if (d != NULL) { + memcpy(s->data, d->data, d->len); + s->data_len = d->len; s->data_pos = 0; - smc_debug("Key = %c%c%c%c Len = %d\n", s->key[0], - s->key[1], s->key[2], s->key[3], val); - applesmc_fill_data(s); + s->status = APPLESMC_ST_ACK | APPLESMC_ST_DATA_READY; + s->status_1e = APPLESMC_ST_CMD_DONE; /* clear on valid key */ + } else { + smc_debug("READ_CMD: key '%c%c%c%c' not found!\n", + s->key[0], s->key[1], s->key[2], s->key[3]); + s->status = APPLESMC_ST_CMD_DONE; + s->status_1e = APPLESMC_ST_1E_NOEXIST; } - s->read_pos++; - break; + } + s->read_pos++; + break; + default: + s->status = APPLESMC_ST_CMD_DONE; + s->status_1e = APPLESMC_ST_1E_STILL_BAD_CMD; } } -static uint64_t applesmc_io_data_read(void *opaque, hwaddr addr1, - unsigned size) +static void applesmc_io_err_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + smc_debug("ERR_CODE received: 0x%02x, ignoring!\n", (uint8_t)val); + /* NOTE: writing to the error port not supported! */ +} + +static uint64_t applesmc_io_data_read(void *opaque, hwaddr addr, unsigned size) { AppleSMCState *s = opaque; - uint8_t retval = 0; - - switch(s->cmd) { - case APPLESMC_READ_CMD: - if(s->data_pos < s->data_len) { - retval = s->data[s->data_pos]; - smc_debug("READ_DATA[%d] = %#hhx\n", s->data_pos, - retval); - s->data_pos++; - if(s->data_pos == s->data_len) { - s->status = 0x00; - smc_debug("EOF\n"); - } else - s->status = 0x05; + + switch (s->cmd) { + case APPLESMC_READ_CMD: + if (!(s->status & APPLESMC_ST_DATA_READY)) { + break; + } + if (s->data_pos < s->data_len) { + s->last_ret = s->data[s->data_pos]; + smc_debug("READ '%c%c%c%c'[%d] = %02x\n", + s->key[0], s->key[1], s->key[2], s->key[3], + s->data_pos, s->last_ret); + s->data_pos++; + if (s->data_pos == s->data_len) { + s->status = APPLESMC_ST_CMD_DONE; + smc_debug("READ '%c%c%c%c' Len=%d complete!\n", + s->key[0], s->key[1], s->key[2], s->key[3], + s->data_len); + } else { + s->status = APPLESMC_ST_ACK | APPLESMC_ST_DATA_READY; } + } + break; + default: + s->status = APPLESMC_ST_CMD_DONE; + s->status_1e = APPLESMC_ST_1E_STILL_BAD_CMD; } - smc_debug("DATA Read b: %#x = %#x\n", addr1, retval); + smc_debug("DATA sent: 0x%02x\n", s->last_ret); - return retval; + return s->last_ret; } -static uint64_t applesmc_io_cmd_read(void *opaque, hwaddr addr1, unsigned size) +static uint64_t applesmc_io_cmd_read(void *opaque, hwaddr addr, unsigned size) { AppleSMCState *s = opaque; - smc_debug("CMD Read B: %#x\n", addr1); + smc_debug("CMD sent: 0x%02x\n", s->status); return s->status; } +static uint64_t applesmc_io_err_read(void *opaque, hwaddr addr, unsigned size) +{ + AppleSMCState *s = opaque; + + /* NOTE: read does not clear the 1e status */ + smc_debug("ERR_CODE sent: 0x%02x\n", s->status_1e); + return s->status_1e; +} + static void applesmc_add_key(AppleSMCState *s, const char *key, int len, const char *data) { @@ -196,6 +269,9 @@ static void qdev_applesmc_isa_reset(DeviceState *dev) QLIST_FOREACH_SAFE(d, &s->data_def, node, next) { QLIST_REMOVE(d, node); } + s->status = 0x00; + s->status_1e = 0x00; + s->last_ret = 0x00; applesmc_add_key(s, "REV ", 6, "\x01\x13\x0f\x00\x00\x03"); applesmc_add_key(s, "OSK0", 32, s->osk); @@ -225,20 +301,35 @@ static const MemoryRegionOps applesmc_cmd_io_ops = { }, }; +static const MemoryRegionOps applesmc_err_io_ops = { + .write = applesmc_io_err_write, + .read = applesmc_io_err_read, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + static void applesmc_isa_realize(DeviceState *dev, Error **errp) { AppleSMCState *s = APPLE_SMC(dev); memory_region_init_io(&s->io_data, OBJECT(s), &applesmc_data_io_ops, s, - "applesmc-data", 4); + "applesmc-data", 1); isa_register_ioport(&s->parent_obj, &s->io_data, s->iobase + APPLESMC_DATA_PORT); memory_region_init_io(&s->io_cmd, OBJECT(s), &applesmc_cmd_io_ops, s, - "applesmc-cmd", 4); + "applesmc-cmd", 1); isa_register_ioport(&s->parent_obj, &s->io_cmd, s->iobase + APPLESMC_CMD_PORT); + memory_region_init_io(&s->io_err, OBJECT(s), &applesmc_err_io_ops, s, + "applesmc-err", 1); + isa_register_ioport(&s->parent_obj, &s->io_err, + s->iobase + APPLESMC_ERR_PORT); + if (!s->osk || (strlen(s->osk) != 64)) { fprintf(stderr, "WARNING: Using AppleSMC with invalid key\n"); s->osk = default_osk; diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c index 2a277bdb86..6dbdc03677 100644 --- a/hw/misc/max111x.c +++ b/hw/misc/max111x.c @@ -116,7 +116,7 @@ static const VMStateDescription vmstate_max111x = { VMSTATE_UINT8(tb1, MAX111xState), VMSTATE_UINT8(rb2, MAX111xState), VMSTATE_UINT8(rb3, MAX111xState), - VMSTATE_INT32_EQUAL(inputs, MAX111xState), + VMSTATE_INT32_EQUAL(inputs, MAX111xState, NULL), VMSTATE_INT32(com, MAX111xState), VMSTATE_ARRAY_INT32_UNSAFE(input, MAX111xState, inputs, vmstate_info_uint8, uint8_t), diff --git a/hw/misc/mmio_interface.c b/hw/misc/mmio_interface.c new file mode 100644 index 0000000000..6f004d2bab --- /dev/null +++ b/hw/misc/mmio_interface.c @@ -0,0 +1,128 @@ +/* + * mmio_interface.c + * + * Copyright (C) 2017 : GreenSocs + * http://www.greensocs.com/ , email: info@greensocs.com + * + * Developed by : + * Frederic Konrad <fred.konrad@greensocs.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option)any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "trace.h" +#include "hw/qdev-properties.h" +#include "hw/misc/mmio_interface.h" +#include "qapi/error.h" + +#ifndef DEBUG_MMIO_INTERFACE +#define DEBUG_MMIO_INTERFACE 0 +#endif + +static uint64_t mmio_interface_counter; + +#define DPRINTF(fmt, ...) do { \ + if (DEBUG_MMIO_INTERFACE) { \ + qemu_log("mmio_interface: 0x%" PRIX64 ": " fmt, s->id, ## __VA_ARGS__);\ + } \ +} while (0); + +static void mmio_interface_init(Object *obj) +{ + MMIOInterface *s = MMIO_INTERFACE(obj); + + if (DEBUG_MMIO_INTERFACE) { + s->id = mmio_interface_counter++; + } + + DPRINTF("interface created\n"); + s->host_ptr = 0; + s->subregion = 0; +} + +static void mmio_interface_realize(DeviceState *dev, Error **errp) +{ + MMIOInterface *s = MMIO_INTERFACE(dev); + + DPRINTF("realize from 0x%" PRIX64 " to 0x%" PRIX64 " map host pointer" + " %p\n", s->start, s->end, s->host_ptr); + + if (!s->host_ptr) { + error_setg(errp, "host_ptr property must be set"); + } + + if (!s->subregion) { + error_setg(errp, "subregion property must be set"); + } + + memory_region_init_ram_ptr(&s->ram_mem, OBJECT(s), "ram", + s->end - s->start + 1, s->host_ptr); + memory_region_set_readonly(&s->ram_mem, s->ro); + memory_region_add_subregion(s->subregion, s->start, &s->ram_mem); +} + +static void mmio_interface_unrealize(DeviceState *dev, Error **errp) +{ + MMIOInterface *s = MMIO_INTERFACE(dev); + + DPRINTF("unrealize from 0x%" PRIX64 " to 0x%" PRIX64 " map host pointer" + " %p\n", s->start, s->end, s->host_ptr); + memory_region_del_subregion(s->subregion, &s->ram_mem); +} + +static void mmio_interface_finalize(Object *obj) +{ + MMIOInterface *s = MMIO_INTERFACE(obj); + + DPRINTF("finalize from 0x%" PRIX64 " to 0x%" PRIX64 " map host pointer" + " %p\n", s->start, s->end, s->host_ptr); + object_unparent(OBJECT(&s->ram_mem)); +} + +static Property mmio_interface_properties[] = { + DEFINE_PROP_UINT64("start", MMIOInterface, start, 0), + DEFINE_PROP_UINT64("end", MMIOInterface, end, 0), + DEFINE_PROP_PTR("host_ptr", MMIOInterface, host_ptr), + DEFINE_PROP_BOOL("ro", MMIOInterface, ro, false), + DEFINE_PROP_MEMORY_REGION("subregion", MMIOInterface, subregion), + DEFINE_PROP_END_OF_LIST(), +}; + +static void mmio_interface_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = mmio_interface_realize; + dc->unrealize = mmio_interface_unrealize; + dc->props = mmio_interface_properties; +} + +static const TypeInfo mmio_interface_info = { + .name = TYPE_MMIO_INTERFACE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(MMIOInterface), + .instance_init = mmio_interface_init, + .instance_finalize = mmio_interface_finalize, + .class_init = mmio_interface_class_init, +}; + +static void mmio_interface_register_types(void) +{ + type_register_static(&mmio_interface_info); +} + +type_init(mmio_interface_register_types) diff --git a/hw/nvram/eeprom93xx.c b/hw/nvram/eeprom93xx.c index 848692abc0..2fd0e3c29f 100644 --- a/hw/nvram/eeprom93xx.c +++ b/hw/nvram/eeprom93xx.c @@ -143,7 +143,7 @@ static const VMStateDescription vmstate_eeprom = { VMSTATE_UINT8(addrbits, eeprom_t), VMSTATE_UINT16_HACK_TEST(size, eeprom_t, is_old_eeprom_version), VMSTATE_UNUSED_TEST(is_old_eeprom_version, 1), - VMSTATE_UINT16_EQUAL_V(size, eeprom_t, EEPROM_VERSION), + VMSTATE_UINT16_EQUAL_V(size, eeprom_t, EEPROM_VERSION, NULL), VMSTATE_UINT16(data, eeprom_t), VMSTATE_VARRAY_UINT16_UNSAFE(contents, eeprom_t, size, 0, vmstate_info_uint16, uint16_t), diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 98ccc27533..b7fee4bdf2 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -74,7 +74,7 @@ static const VMStateDescription vmstate_pcibus = { .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_INT32_EQUAL(nirq, PCIBus), + VMSTATE_INT32_EQUAL(nirq, PCIBus, NULL), VMSTATE_VARRAY_INT32(irq_count, PCIBus, nirq, 0, vmstate_info_int32, int32_t), diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c index 828052b0c0..97200742b4 100644 --- a/hw/pci/pcie_aer.c +++ b/hw/pci/pcie_aer.c @@ -813,7 +813,7 @@ const VMStateDescription vmstate_pcie_aer_log = { .minimum_version_id = 1, .fields = (VMStateField[]) { VMSTATE_UINT16(log_num, PCIEAERLog), - VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog), + VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog, NULL), VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid), VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num, vmstate_pcie_aer_err, PCIEAERErr), diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index d16646c95d..36d3dcd89a 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -36,7 +36,6 @@ #include "hw/pci/pci_host.h" #include "hw/ppc/ppc.h" #include "hw/boards.h" -#include "hw/audio/soundhw.h" #include "qemu/error-report.h" #include "qemu/log.h" #include "hw/ide.h" @@ -782,9 +781,6 @@ static void ibm_40p_init(MachineState *machine) qbus_walk_children(BUS(isa_bus), prep_set_cmos_checksum, NULL, NULL, NULL, &cmos_checksum); - /* initialize audio subsystem */ - soundhw_init(); - /* add some more devices */ if (defaults_enabled()) { isa_create_simple(isa_bus, "i8042"); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index ede5167bc0..0ee9fac50b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -127,9 +127,49 @@ error: return NULL; } +static bool pre_2_10_vmstate_dummy_icp_needed(void *opaque) +{ + /* Dummy entries correspond to unused ICPState objects in older QEMUs, + * and newer QEMUs don't even have them. In both cases, we don't want + * to send anything on the wire. + */ + return false; +} + +static const VMStateDescription pre_2_10_vmstate_dummy_icp = { + .name = "icp/server", + .version_id = 1, + .minimum_version_id = 1, + .needed = pre_2_10_vmstate_dummy_icp_needed, + .fields = (VMStateField[]) { + VMSTATE_UNUSED(4), /* uint32_t xirr */ + VMSTATE_UNUSED(1), /* uint8_t pending_priority */ + VMSTATE_UNUSED(1), /* uint8_t mfrr */ + VMSTATE_END_OF_LIST() + }, +}; + +static void pre_2_10_vmstate_register_dummy_icp(int i) +{ + vmstate_register(NULL, i, &pre_2_10_vmstate_dummy_icp, + (void *)(uintptr_t) i); +} + +static void pre_2_10_vmstate_unregister_dummy_icp(int i) +{ + vmstate_unregister(NULL, &pre_2_10_vmstate_dummy_icp, + (void *)(uintptr_t) i); +} + +static inline int xics_max_server_number(void) +{ + return DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(), smp_threads); +} + static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) { sPAPRMachineState *spapr = SPAPR_MACHINE(machine); + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); if (kvm_enabled()) { if (machine_kernel_irqchip_allowed(machine) && @@ -151,6 +191,17 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) return; } } + + if (smc->pre_2_10_has_unused_icps) { + int i; + + for (i = 0; i < xics_max_server_number(); i++) { + /* Dummy entries get deregistered when real ICPState objects + * are registered during CPU core hotplug. + */ + pre_2_10_vmstate_register_dummy_icp(i); + } + } } static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, @@ -979,7 +1030,6 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, void *fdt; sPAPRPHBState *phb; char *buf; - int smt = kvmppc_smt_threads(); fdt = g_malloc0(FDT_MAX_SIZE); _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE))); @@ -1019,7 +1069,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2)); /* /interrupt controller */ - spapr_dt_xics(DIV_ROUND_UP(max_cpus * smt, smp_threads), fdt, PHANDLE_XICP); + spapr_dt_xics(xics_max_server_number(), fdt, PHANDLE_XICP); ret = spapr_populate_memory(spapr, fdt); if (ret < 0) { @@ -1326,7 +1376,6 @@ static void ppc_spapr_reset(void) * Set the GR bit in PATB so that we know there is no HPT. */ spapr->patb_entry = PATBE1_GR; } else { - spapr->patb_entry = 0; spapr_setup_hpt_and_vrma(spapr); } @@ -1346,6 +1395,8 @@ static void ppc_spapr_reset(void) if (!spapr->cas_reboot) { spapr_ovec_cleanup(spapr->ov5_cas); spapr->ov5_cas = spapr_ovec_new(); + + ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal); } fdt = spapr_build_fdt(spapr, rtas_addr, spapr->rtas_size); @@ -1443,6 +1494,18 @@ static int spapr_post_load(void *opaque, int version_id) err = spapr_rtc_import_offset(&spapr->rtc, spapr->rtc_offset); } + if (spapr->patb_entry) { + PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + bool radix = !!(spapr->patb_entry & PATBE1_GR); + bool gtse = !!(cpu->env.spr[SPR_LPCR] & LPCR_GTSE); + + err = kvmppc_configure_v3_mmu(cpu, radix, gtse, spapr->patb_entry); + if (err) { + error_report("Process table config unsupported by the host"); + return -EINVAL; + } + } + return err; } @@ -1558,13 +1621,19 @@ static int htab_save_setup(QEMUFile *f, void *opaque) sPAPRMachineState *spapr = opaque; /* "Iteration" header */ - qemu_put_be32(f, spapr->htab_shift); + if (!spapr->htab_shift) { + qemu_put_be32(f, -1); + } else { + qemu_put_be32(f, spapr->htab_shift); + } if (spapr->htab) { spapr->htab_save_index = 0; spapr->htab_first_pass = true; } else { - assert(kvm_enabled()); + if (spapr->htab_shift) { + assert(kvm_enabled()); + } } @@ -1710,7 +1779,12 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) int rc = 0; /* Iteration header */ - qemu_put_be32(f, 0); + if (!spapr->htab_shift) { + qemu_put_be32(f, -1); + return 0; + } else { + qemu_put_be32(f, 0); + } if (!spapr->htab) { assert(kvm_enabled()); @@ -1744,7 +1818,12 @@ static int htab_save_complete(QEMUFile *f, void *opaque) int fd; /* Iteration header */ - qemu_put_be32(f, 0); + if (!spapr->htab_shift) { + qemu_put_be32(f, -1); + return 0; + } else { + qemu_put_be32(f, 0); + } if (!spapr->htab) { int rc; @@ -1788,6 +1867,11 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) section_hdr = qemu_get_be32(f); + if (section_hdr == -1) { + spapr_free_hpt(spapr); + return 0; + } + if (section_hdr) { Error *local_err = NULL; @@ -2131,7 +2215,7 @@ static void ppc_spapr_init(MachineState *machine) machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu; } - ppc_cpu_parse_features(machine->cpu_model); + spapr_cpu_parse_features(spapr); spapr_init_cpus(spapr); @@ -2503,6 +2587,10 @@ static void spapr_machine_initfn(Object *obj) " place of standard EPOW events when possible" " (required for memory hot-unplug support)", NULL); + + ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr, + "Maximum permitted CPU compatibility mode", + &error_fatal); } static void spapr_machine_finalizefn(Object *obj) @@ -2548,12 +2636,6 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, spapr_drc_attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp); addr += SPAPR_MEMORY_BLOCK_SIZE; - if (!dev->hotplugged) { - sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - /* guests expect coldplugged LMBs to be pre-allocated */ - drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE); - drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED); - } } /* send hotplug notification to the * guest only in case of hotplugged memory @@ -2806,9 +2888,24 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms); CPUCore *cc = CPU_CORE(dev); CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL); + if (smc->pre_2_10_has_unused_icps) { + sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); + sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(cc)); + const char *typename = object_class_get_name(scc->cpu_class); + size_t size = object_type_get_instance_size(typename); + int i; + + for (i = 0; i < cc->nr_threads; i++) { + CPUState *cs = CPU(sc->threads + i * size); + + pre_2_10_vmstate_register_dummy_icp(cs->cpu_index); + } + } + assert(core_slot); core_slot->cpu = NULL; object_unparent(OBJECT(dev)); @@ -2860,6 +2957,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, { sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); MachineClass *mc = MACHINE_GET_CLASS(spapr); + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev)); CPUCore *cc = CPU_CORE(dev); CPUState *cs = CPU(core->threads); @@ -2905,17 +3003,23 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, * of hotplugged CPUs. */ spapr_hotplug_req_add_by_index(drc); - } else { - /* - * Set the right DRC states for cold plugged CPU. - */ - if (drc) { - sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE); - drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED); - } } core_slot->cpu = OBJECT(dev); + + if (smc->pre_2_10_has_unused_icps) { + sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(cc)); + const char *typename = object_class_get_name(scc->cpu_class); + size_t size = object_type_get_instance_size(typename); + int i; + + for (i = 0; i < cc->nr_threads; i++) { + sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev); + void *obj = sc->threads + i * size; + + cs = CPU(obj); + pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index); + } + } } static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -3356,7 +3460,12 @@ DEFINE_SPAPR_MACHINE(2_10, "2.10", true); * pseries-2.9 */ #define SPAPR_COMPAT_2_9 \ - HW_COMPAT_2_9 + HW_COMPAT_2_9 \ + { \ + .driver = TYPE_POWERPC_CPU, \ + .property = "pre-2.10-migration", \ + .value = "on", \ + }, \ static void spapr_machine_2_9_instance_options(MachineState *machine) { @@ -3365,9 +3474,12 @@ static void spapr_machine_2_9_instance_options(MachineState *machine) static void spapr_machine_2_9_class_options(MachineClass *mc) { + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + spapr_machine_2_10_class_options(mc); SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9); mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + smc->pre_2_10_has_unused_icps = true; } DEFINE_SPAPR_MACHINE(2_9, "2.9", false); @@ -3580,9 +3692,6 @@ DEFINE_SPAPR_MACHINE(2_4, "2.4", false); static void spapr_machine_2_3_instance_options(MachineState *machine) { spapr_machine_2_4_instance_options(machine); - savevm_skip_section_footers(); - global_state_set_optional(); - savevm_skip_configuration(); } static void spapr_machine_2_3_class_options(MachineClass *mc) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 9fb896b407..ea278ce2a7 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -20,6 +20,57 @@ #include "sysemu/numa.h" #include "qemu/error-report.h" +void spapr_cpu_parse_features(sPAPRMachineState *spapr) +{ + /* + * Backwards compatibility hack: + * + * CPUs had a "compat=" property which didn't make sense for + * anything except pseries. It was replaced by "max-cpu-compat" + * machine option. This supports old command lines like + * -cpu POWER8,compat=power7 + * By stripping the compat option and applying it to the machine + * before passing it on to the cpu level parser. + */ + gchar **inpieces; + int i, j; + gchar *compat_str = NULL; + + inpieces = g_strsplit(MACHINE(spapr)->cpu_model, ",", 0); + + /* inpieces[0] is the actual model string */ + i = 1; + j = 1; + while (inpieces[i]) { + if (g_str_has_prefix(inpieces[i], "compat=")) { + /* in case of multiple compat= options */ + g_free(compat_str); + compat_str = inpieces[i]; + } else { + j++; + } + + i++; + /* Excise compat options from list */ + inpieces[j] = inpieces[i]; + } + + if (compat_str) { + char *val = compat_str + strlen("compat="); + gchar *newprops = g_strjoinv(",", inpieces); + + object_property_set_str(OBJECT(spapr), val, "max-cpu-compat", + &error_fatal); + + ppc_cpu_parse_features(newprops); + g_free(newprops); + } else { + ppc_cpu_parse_features(MACHINE(spapr)->cpu_model); + } + + g_strfreev(inpieces); +} + static void spapr_cpu_reset(void *opaque) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); @@ -67,16 +118,6 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, /* Enable PAPR mode in TCG or KVM */ cpu_ppc_set_papr(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); - if (cpu->max_compat) { - Error *local_err = NULL; - - ppc_set_compat(cpu, cpu->max_compat, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - } - qemu_register_reset(spapr_cpu_reset, cpu); spapr_cpu_reset(cpu); } @@ -137,7 +178,7 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp) sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); CPUState *cs = CPU(child); PowerPCCPU *cpu = POWERPC_CPU(cs); - Object *obj = NULL; + Object *obj; object_property_set_bool(child, true, "realized", &local_err); if (local_err) { @@ -157,13 +198,14 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp) object_property_add_const_link(obj, ICP_PROP_CPU, child, &error_abort); object_property_set_bool(obj, true, "realized", &local_err); if (local_err) { - goto error; + goto free_icp; } return; -error: +free_icp: object_unparent(obj); +error: error_propagate(errp, local_err); } diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 5cb75bbf34..bd40b84cfc 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -46,30 +46,64 @@ uint32_t spapr_drc_index(sPAPRDRConnector *drc) | (drc->id & DRC_INDEX_ID_MASK); } -static uint32_t set_isolation_state(sPAPRDRConnector *drc, - sPAPRDRIsolationState state) +static uint32_t drc_isolate_physical(sPAPRDRConnector *drc) { - trace_spapr_drc_set_isolation_state(spapr_drc_index(drc), state); - /* if the guest is configuring a device attached to this DRC, we * should reset the configuration state at this point since it may * no longer be reliable (guest released device and needs to start * over, or unplug occurred so the FDT is no longer valid) */ - if (state == SPAPR_DR_ISOLATION_STATE_ISOLATED) { - g_free(drc->ccs); - drc->ccs = NULL; - } + g_free(drc->ccs); + drc->ccs = NULL; - if (state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) { - /* cannot unisolate a non-existent resource, and, or resources - * which are in an 'UNUSABLE' allocation state. (PAPR 2.7, 13.5.3.5) - */ - if (!drc->dev || - drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { - return RTAS_OUT_NO_SUCH_INDICATOR; + drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; + + /* if we're awaiting release, but still in an unconfigured state, + * it's likely the guest is still in the process of configuring + * the device and is transitioning the devices to an ISOLATED + * state as a part of that process. so we only complete the + * removal when this transition happens for a device in a + * configured state, as suggested by the state diagram from PAPR+ + * 2.7, 13.4 + */ + if (drc->awaiting_release) { + uint32_t drc_index = spapr_drc_index(drc); + if (drc->configured) { + trace_spapr_drc_set_isolation_state_finalizing(drc_index); + spapr_drc_detach(drc, DEVICE(drc->dev), NULL); + } else { + trace_spapr_drc_set_isolation_state_deferring(drc_index); } } + drc->configured = false; + + return RTAS_OUT_SUCCESS; +} + +static uint32_t drc_unisolate_physical(sPAPRDRConnector *drc) +{ + /* cannot unisolate a non-existent resource, and, or resources + * which are in an 'UNUSABLE' allocation state. (PAPR 2.7, + * 13.5.3.5) + */ + if (!drc->dev) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + + drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; + + return RTAS_OUT_SUCCESS; +} + +static uint32_t drc_isolate_logical(sPAPRDRConnector *drc) +{ + /* if the guest is configuring a device attached to this DRC, we + * should reset the configuration state at this point since it may + * no longer be reliable (guest released device and needs to start + * over, or unplug occurred so the FDT is no longer valid) + */ + g_free(drc->ccs); + drc->ccs = NULL; /* * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't @@ -81,66 +115,87 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, * If the LMB being removed doesn't belong to a DIMM device that is * actually being unplugged, fail the isolation request here. */ - if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) { - if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && - !drc->awaiting_release) { - return RTAS_OUT_HW_ERROR; - } + if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB + && !drc->awaiting_release) { + return RTAS_OUT_HW_ERROR; } - drc->isolation_state = state; + drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; - if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) { - /* if we're awaiting release, but still in an unconfigured state, - * it's likely the guest is still in the process of configuring - * the device and is transitioning the devices to an ISOLATED - * state as a part of that process. so we only complete the - * removal when this transition happens for a device in a - * configured state, as suggested by the state diagram from - * PAPR+ 2.7, 13.4 - */ - if (drc->awaiting_release) { - uint32_t drc_index = spapr_drc_index(drc); - if (drc->configured) { - trace_spapr_drc_set_isolation_state_finalizing(drc_index); - spapr_drc_detach(drc, DEVICE(drc->dev), NULL); - } else { - trace_spapr_drc_set_isolation_state_deferring(drc_index); - } + /* if we're awaiting release, but still in an unconfigured state, + * it's likely the guest is still in the process of configuring + * the device and is transitioning the devices to an ISOLATED + * state as a part of that process. so we only complete the + * removal when this transition happens for a device in a + * configured state, as suggested by the state diagram from PAPR+ + * 2.7, 13.4 + */ + if (drc->awaiting_release) { + uint32_t drc_index = spapr_drc_index(drc); + if (drc->configured) { + trace_spapr_drc_set_isolation_state_finalizing(drc_index); + spapr_drc_detach(drc, DEVICE(drc->dev), NULL); + } else { + trace_spapr_drc_set_isolation_state_deferring(drc_index); } - drc->configured = false; } + drc->configured = false; return RTAS_OUT_SUCCESS; } -static uint32_t set_allocation_state(sPAPRDRConnector *drc, - sPAPRDRAllocationState state) +static uint32_t drc_unisolate_logical(sPAPRDRConnector *drc) { - trace_spapr_drc_set_allocation_state(spapr_drc_index(drc), state); + /* cannot unisolate a non-existent resource, and, or resources + * which are in an 'UNUSABLE' allocation state. (PAPR 2.7, + * 13.5.3.5) + */ + if (!drc->dev || + drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + + drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; + + return RTAS_OUT_SUCCESS; +} - if (state == SPAPR_DR_ALLOCATION_STATE_USABLE) { - /* if there's no resource/device associated with the DRC, there's - * no way for us to put it in an allocation state consistent with - * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should - * result in an RTAS return code of -3 / "no such indicator" +static uint32_t drc_set_usable(sPAPRDRConnector *drc) +{ + /* if there's no resource/device associated with the DRC, there's + * no way for us to put it in an allocation state consistent with + * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should + * result in an RTAS return code of -3 / "no such indicator" + */ + if (!drc->dev) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + if (drc->awaiting_release && drc->awaiting_allocation) { + /* kernel is acknowledging a previous hotplug event + * while we are already removing it. + * it's safe to ignore awaiting_allocation here since we know the + * situation is predicated on the guest either already having done + * so (boot-time hotplug), or never being able to acquire in the + * first place (hotplug followed by immediate unplug). */ - if (!drc->dev) { - return RTAS_OUT_NO_SUCH_INDICATOR; - } + return RTAS_OUT_NO_SUCH_INDICATOR; } - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) { - drc->allocation_state = state; - if (drc->awaiting_release && - drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { - uint32_t drc_index = spapr_drc_index(drc); - trace_spapr_drc_set_allocation_state_finalizing(drc_index); - spapr_drc_detach(drc, DEVICE(drc->dev), NULL); - } else if (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) { - drc->awaiting_allocation = false; - } + drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE; + drc->awaiting_allocation = false; + + return RTAS_OUT_SUCCESS; +} + +static uint32_t drc_set_unusable(sPAPRDRConnector *drc) +{ + drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE; + if (drc->awaiting_release) { + uint32_t drc_index = spapr_drc_index(drc); + trace_spapr_drc_set_allocation_state_finalizing(drc_index); + spapr_drc_detach(drc, DEVICE(drc->dev), NULL); } + return RTAS_OUT_SUCCESS; } @@ -172,12 +227,6 @@ static const char *spapr_drc_name(sPAPRDRConnector *drc) return g_strdup_printf("%s%d", drck->drc_name_prefix, drc->id); } -/* has the guest been notified of device attachment? */ -static void set_signalled(sPAPRDRConnector *drc) -{ - drc->signalled = true; -} - /* * dr-entity-sense sensor value * returned via get-sensor-state RTAS calls @@ -304,33 +353,12 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, } g_assert(fdt || coldplug); - /* NOTE: setting initial isolation state to UNISOLATED means we can't - * detach unless guest has a userspace/kernel that moves this state - * back to ISOLATED in response to an unplug event, or this is done - * manually by the admin prior. if we force things while the guest - * may be accessing the device, we can easily crash the guest, so we - * we defer completion of removal in such cases to the reset() hook. - */ - if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) { - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; - } drc->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE; drc->dev = d; drc->fdt = fdt; drc->fdt_start_offset = fdt_start_offset; drc->configured = coldplug; - /* 'logical' DR resources such as memory/cpus are in some cases treated - * as a pool of resources from which the guest is free to choose from - * based on only a count. for resources that can be assigned in this - * fashion, we must assume the resource is signalled immediately - * since a single hotplug request might make an arbitrary number of - * such attached resources available to the guest, as opposed to - * 'physical' DR resources such as PCI where each device/resource is - * signalled individually. - */ - drc->signalled = (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) - ? true : coldplug; if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) { drc->awaiting_allocation = true; @@ -342,49 +370,8 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, NULL, 0, NULL); } -void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp) +static void spapr_drc_release(sPAPRDRConnector *drc) { - trace_spapr_drc_detach(spapr_drc_index(drc)); - - /* if we've signalled device presence to the guest, or if the guest - * has gone ahead and configured the device (via manually-executed - * device add via drmgr in guest, namely), we need to wait - * for the guest to quiesce the device before completing detach. - * Otherwise, we can assume the guest hasn't seen it and complete the - * detach immediately. Note that there is a small race window - * just before, or during, configuration, which is this context - * refers mainly to fetching the device tree via RTAS. - * During this window the device access will be arbitrated by - * associated DRC, which will simply fail the RTAS calls as invalid. - * This is recoverable within guest and current implementations of - * drmgr should be able to cope. - */ - if (!drc->signalled && !drc->configured) { - /* if the guest hasn't seen the device we can't rely on it to - * set it back to an isolated state via RTAS, so do it here manually - */ - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; - } - - if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) { - trace_spapr_drc_awaiting_isolated(spapr_drc_index(drc)); - drc->awaiting_release = true; - return; - } - - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI && - drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { - trace_spapr_drc_awaiting_unusable(spapr_drc_index(drc)); - drc->awaiting_release = true; - return; - } - - if (drc->awaiting_allocation) { - drc->awaiting_release = true; - trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc)); - return; - } - drc->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE; /* Calling release callbacks based on spapr_drc_type(drc). */ @@ -412,6 +399,32 @@ void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp) drc->dev = NULL; } +void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp) +{ + trace_spapr_drc_detach(spapr_drc_index(drc)); + + if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) { + trace_spapr_drc_awaiting_isolated(spapr_drc_index(drc)); + drc->awaiting_release = true; + return; + } + + if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI && + drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { + trace_spapr_drc_awaiting_unusable(spapr_drc_index(drc)); + drc->awaiting_release = true; + return; + } + + if (drc->awaiting_allocation) { + drc->awaiting_release = true; + trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc)); + return; + } + + spapr_drc_release(drc); +} + static bool release_pending(sPAPRDRConnector *drc) { return drc->awaiting_release; @@ -420,7 +433,6 @@ static bool release_pending(sPAPRDRConnector *drc) static void reset(DeviceState *d) { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d); - sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); trace_spapr_drc_reset(spapr_drc_index(drc)); @@ -428,32 +440,26 @@ static void reset(DeviceState *d) drc->ccs = NULL; /* immediately upon reset we can safely assume DRCs whose devices - * are pending removal can be safely removed, and that they will - * subsequently be left in an ISOLATED state. move the DRC to this - * state in these cases (which will in turn complete any pending - * device removals) + * are pending removal can be safely removed. */ if (drc->awaiting_release) { - drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_ISOLATED); - /* generally this should also finalize the removal, but if the device - * hasn't yet been configured we normally defer removal under the - * assumption that this transition is taking place as part of device - * configuration. so check if we're still waiting after this, and - * force removal if we are - */ - if (drc->awaiting_release) { - spapr_drc_detach(drc, DEVICE(drc->dev), NULL); - } - - /* non-PCI devices may be awaiting a transition to UNUSABLE */ - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI && - drc->awaiting_release) { - drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_UNUSABLE); - } + spapr_drc_release(drc); } - if (drck->dr_entity_sense(drc) == SPAPR_DR_ENTITY_SENSE_PRESENT) { - drck->set_signalled(drc); + drc->awaiting_allocation = false; + + if (drc->dev) { + /* A device present at reset is coldplugged */ + drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; + if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) { + drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE; + } + } else { + /* Otherwise device is absent, but might be hotplugged */ + drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; + if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) { + drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE; + } } } @@ -479,7 +485,7 @@ static bool spapr_drc_needed(void *opaque) case SPAPR_DR_CONNECTOR_TYPE_LMB: rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) && (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) && - drc->configured && drc->signalled && !drc->awaiting_release); + drc->configured && !drc->awaiting_release); break; case SPAPR_DR_CONNECTOR_TYPE_PHB: case SPAPR_DR_CONNECTOR_TYPE_VIO: @@ -501,7 +507,6 @@ static const VMStateDescription vmstate_spapr_drc = { VMSTATE_BOOL(configured, sPAPRDRConnector), VMSTATE_BOOL(awaiting_release, sPAPRDRConnector), VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector), - VMSTATE_BOOL(signalled, sPAPRDRConnector), VMSTATE_END_OF_LIST() } }; @@ -596,10 +601,7 @@ static void spapr_dr_connector_class_init(ObjectClass *k, void *data) dk->reset = reset; dk->realize = realize; dk->unrealize = unrealize; - drck->set_isolation_state = set_isolation_state; - drck->set_allocation_state = set_allocation_state; drck->release_pending = release_pending; - drck->set_signalled = set_signalled; /* * Reason: it crashes FIXME find and document the real reason */ @@ -611,6 +613,8 @@ static void spapr_drc_physical_class_init(ObjectClass *k, void *data) sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); drck->dr_entity_sense = physical_entity_sense; + drck->isolate = drc_isolate_physical; + drck->unisolate = drc_unisolate_physical; } static void spapr_drc_logical_class_init(ObjectClass *k, void *data) @@ -618,6 +622,8 @@ static void spapr_drc_logical_class_init(ObjectClass *k, void *data) sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); drck->dr_entity_sense = logical_entity_sense; + drck->isolate = drc_isolate_logical; + drck->unisolate = drc_unisolate_logical; } static void spapr_drc_cpu_class_init(ObjectClass *k, void *data) @@ -858,24 +864,45 @@ static uint32_t rtas_set_isolation_state(uint32_t idx, uint32_t state) sPAPRDRConnectorClass *drck; if (!drc) { - return RTAS_OUT_PARAM_ERROR; + return RTAS_OUT_NO_SUCH_INDICATOR; } + trace_spapr_drc_set_isolation_state(spapr_drc_index(drc), state); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - return drck->set_isolation_state(drc, state); + + switch (state) { + case SPAPR_DR_ISOLATION_STATE_ISOLATED: + return drck->isolate(drc); + + case SPAPR_DR_ISOLATION_STATE_UNISOLATED: + return drck->unisolate(drc); + + default: + return RTAS_OUT_PARAM_ERROR; + } } static uint32_t rtas_set_allocation_state(uint32_t idx, uint32_t state) { sPAPRDRConnector *drc = spapr_drc_by_index(idx); - sPAPRDRConnectorClass *drck; - if (!drc) { - return RTAS_OUT_PARAM_ERROR; + if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_LOGICAL)) { + return RTAS_OUT_NO_SUCH_INDICATOR; } - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - return drck->set_allocation_state(drc, state); + trace_spapr_drc_set_allocation_state(spapr_drc_index(drc), state); + + switch (state) { + case SPAPR_DR_ALLOCATION_STATE_USABLE: + return drc_set_usable(drc); + + case SPAPR_DR_ALLOCATION_STATE_UNUSABLE: + return drc_set_unusable(drc); + + default: + return RTAS_OUT_PARAM_ERROR; + } } static uint32_t rtas_set_dr_indicator(uint32_t idx, uint32_t state) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 171aedc7e0..587a3dacb2 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -475,13 +475,6 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) RTAS_LOG_TYPE_EPOW))); } -static void spapr_hotplug_set_signalled(uint32_t drc_index) -{ - sPAPRDRConnector *drc = spapr_drc_by_index(drc_index); - sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->set_signalled(drc); -} - static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, sPAPRDRConnectorType drc_type, union drc_identifier *drc_id) @@ -528,9 +521,6 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, switch (drc_type) { case SPAPR_DR_CONNECTOR_TYPE_PCI: hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI; - if (hp->hotplug_action == RTAS_LOG_V6_HP_ACTION_ADD) { - spapr_hotplug_set_signalled(drc_id->index); - } break; case SPAPR_DR_CONNECTOR_TYPE_LMB: hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_MEMORY; diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index aa1ffea9e5..8624ce8d5b 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1045,11 +1045,11 @@ static target_ulong h_signal_sys_reset(PowerPCCPU *cpu, } } -static uint32_t cas_check_pvr(PowerPCCPU *cpu, target_ulong *addr, - Error **errp) +static uint32_t cas_check_pvr(sPAPRMachineState *spapr, PowerPCCPU *cpu, + target_ulong *addr, Error **errp) { bool explicit_match = false; /* Matched the CPU's real PVR */ - uint32_t max_compat = cpu->max_compat; + uint32_t max_compat = spapr->max_compat_pvr; uint32_t best_compat = 0; int i; @@ -1105,7 +1105,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, bool guest_radix; Error *local_err = NULL; - cas_pvr = cas_check_pvr(cpu, &addr, &local_err); + cas_pvr = cas_check_pvr(spapr, cpu, &addr, &local_err); if (local_err) { error_report_err(local_err); return H_HARDWARE; diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 0341bc069d..8656a54a3e 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -231,7 +231,7 @@ static const VMStateDescription vmstate_spapr_tce_table = { .post_load = spapr_tce_table_post_load, .fields = (VMStateField []) { /* Sanity check */ - VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable), + VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable, NULL), /* IOMMU state */ VMSTATE_UINT32(mig_nb_table, sPAPRTCETable), diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 0b447f2eed..3b37dcdc09 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1848,7 +1848,7 @@ static const VMStateDescription vmstate_spapr_pci_lsi = { .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi), + VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi, NULL), VMSTATE_END_OF_LIST() }, @@ -1936,7 +1936,7 @@ static const VMStateDescription vmstate_spapr_pci = { .pre_save = spapr_pci_pre_save, .post_load = spapr_pci_post_load, .fields = (VMStateField[]) { - VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState), + VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState, NULL), VMSTATE_UINT32_TEST(mig_liobn, sPAPRPHBState, pre_2_8_migration), VMSTATE_UINT64_TEST(mig_mem_win_addr, sPAPRPHBState, pre_2_8_migration), VMSTATE_UINT64_TEST(mig_mem_win_size, sPAPRPHBState, pre_2_8_migration), diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index a0ee4fd265..ea3bc8bd9e 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -557,8 +557,8 @@ const VMStateDescription vmstate_spapr_vio = { .minimum_version_id = 1, .fields = (VMStateField[]) { /* Sanity check */ - VMSTATE_UINT32_EQUAL(reg, VIOsPAPRDevice), - VMSTATE_UINT32_EQUAL(irq, VIOsPAPRDevice), + VMSTATE_UINT32_EQUAL(reg, VIOsPAPRDevice, NULL), + VMSTATE_UINT32_EQUAL(irq, VIOsPAPRDevice, NULL), /* General VIO device state */ VMSTATE_UINT64(signal_state, VIOsPAPRDevice), diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c index da8adfa443..e833028393 100644 --- a/hw/ssi/xilinx_spips.c +++ b/hw/ssi/xilinx_spips.c @@ -496,6 +496,18 @@ static const MemoryRegionOps spips_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; +static void xilinx_qspips_invalidate_mmio_ptr(XilinxQSPIPS *q) +{ + XilinxSPIPS *s = &q->parent_obj; + + if (q->lqspi_cached_addr != ~0ULL) { + /* Invalidate the current mapped mmio */ + memory_region_invalidate_mmio_ptr(&s->mmlqspi, q->lqspi_cached_addr, + LQSPI_CACHE_SIZE); + q->lqspi_cached_addr = ~0ULL; + } +} + static void xilinx_qspips_write(void *opaque, hwaddr addr, uint64_t value, unsigned size) { @@ -505,7 +517,7 @@ static void xilinx_qspips_write(void *opaque, hwaddr addr, addr >>= 2; if (addr == R_LQSPI_CFG) { - q->lqspi_cached_addr = ~0ULL; + xilinx_qspips_invalidate_mmio_ptr(q); } } @@ -517,27 +529,20 @@ static const MemoryRegionOps qspips_ops = { #define LQSPI_CACHE_SIZE 1024 -static uint64_t -lqspi_read(void *opaque, hwaddr addr, unsigned int size) +static void lqspi_load_cache(void *opaque, hwaddr addr) { - int i; XilinxQSPIPS *q = opaque; XilinxSPIPS *s = opaque; - uint32_t ret; - - if (addr >= q->lqspi_cached_addr && - addr <= q->lqspi_cached_addr + LQSPI_CACHE_SIZE - 4) { - uint8_t *retp = &q->lqspi_buf[addr - q->lqspi_cached_addr]; - ret = cpu_to_le32(*(uint32_t *)retp); - DB_PRINT_L(1, "addr: %08x, data: %08x\n", (unsigned)addr, - (unsigned)ret); - return ret; - } else { - int flash_addr = (addr / num_effective_busses(s)); - int slave = flash_addr >> LQSPI_ADDRESS_BITS; - int cache_entry = 0; - uint32_t u_page_save = s->regs[R_LQSPI_STS] & ~LQSPI_CFG_U_PAGE; - + int i; + int flash_addr = ((addr & ~(LQSPI_CACHE_SIZE - 1)) + / num_effective_busses(s)); + int slave = flash_addr >> LQSPI_ADDRESS_BITS; + int cache_entry = 0; + uint32_t u_page_save = s->regs[R_LQSPI_STS] & ~LQSPI_CFG_U_PAGE; + + if (addr < q->lqspi_cached_addr || + addr > q->lqspi_cached_addr + LQSPI_CACHE_SIZE - 4) { + xilinx_qspips_invalidate_mmio_ptr(q); s->regs[R_LQSPI_STS] &= ~LQSPI_CFG_U_PAGE; s->regs[R_LQSPI_STS] |= slave ? LQSPI_CFG_U_PAGE : 0; @@ -589,12 +594,43 @@ lqspi_read(void *opaque, hwaddr addr, unsigned int size) xilinx_spips_update_cs_lines(s); q->lqspi_cached_addr = flash_addr * num_effective_busses(s); + } +} + +static void *lqspi_request_mmio_ptr(void *opaque, hwaddr addr, unsigned *size, + unsigned *offset) +{ + XilinxQSPIPS *q = opaque; + hwaddr offset_within_the_region = addr & ~(LQSPI_CACHE_SIZE - 1); + + lqspi_load_cache(opaque, offset_within_the_region); + *size = LQSPI_CACHE_SIZE; + *offset = offset_within_the_region; + return q->lqspi_buf; +} + +static uint64_t +lqspi_read(void *opaque, hwaddr addr, unsigned int size) +{ + XilinxQSPIPS *q = opaque; + uint32_t ret; + + if (addr >= q->lqspi_cached_addr && + addr <= q->lqspi_cached_addr + LQSPI_CACHE_SIZE - 4) { + uint8_t *retp = &q->lqspi_buf[addr - q->lqspi_cached_addr]; + ret = cpu_to_le32(*(uint32_t *)retp); + DB_PRINT_L(1, "addr: %08x, data: %08x\n", (unsigned)addr, + (unsigned)ret); + return ret; + } else { + lqspi_load_cache(opaque, addr); return lqspi_read(opaque, addr, size); } } static const MemoryRegionOps lqspi_ops = { .read = lqspi_read, + .request_ptr = lqspi_request_mmio_ptr, .endianness = DEVICE_NATIVE_ENDIAN, .valid = { .min_access_size = 1, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c index ca72a80f27..e3562a4c60 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -415,7 +415,7 @@ static const VMStateDescription vmstate_uhci = { .post_load = uhci_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(dev, UHCIState), - VMSTATE_UINT8_EQUAL(num_ports_vmstate, UHCIState), + VMSTATE_UINT8_EQUAL(num_ports_vmstate, UHCIState, NULL), VMSTATE_STRUCT_ARRAY(ports, UHCIState, NB_PORTS, 1, vmstate_uhci_port, UHCIPort), VMSTATE_UINT16(cmd, UHCIState), diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 20d6a08616..301920ec1b 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1740,7 +1740,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) bool pcie_port = pci_bus_is_express(pci_dev->bus) && !pci_bus_is_root(pci_dev->bus); - if (!kvm_has_many_ioeventfds()) { + if (kvm_enabled() && !kvm_has_many_ioeventfds()) { proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD; } diff --git a/hw/xen/xen-common.c b/hw/xen/xen-common.c index d3fa705a82..632a938dcc 100644 --- a/hw/xen/xen-common.c +++ b/hw/xen/xen-common.c @@ -138,20 +138,35 @@ static int xen_init(MachineState *ms) return -1; } qemu_add_vm_change_state_handler(xen_change_state_handler, NULL); - - global_state_set_optional(); - savevm_skip_configuration(); - savevm_skip_section_footers(); - return 0; } +static GlobalProperty xen_compat_props[] = { + { + .driver = "migration", + .property = "store-global-state", + .value = "off", + }, + { + .driver = "migration", + .property = "send-configuration", + .value = "off", + }, + { + .driver = "migration", + .property = "send-section-footer", + .value = "off", + }, + { /* end of list */ }, +}; + static void xen_accel_class_init(ObjectClass *oc, void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "Xen"; ac->init_machine = xen_init; ac->allowed = &xen_allowed; + ac->global_props = xen_compat_props; } #define TYPE_XEN_ACCEL ACCEL_CLASS_NAME("xen") diff --git a/include/block/block.h b/include/block/block.h index a4f09df95a..85e4be7462 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -276,7 +276,7 @@ int bdrv_read(BdrvChild *child, int64_t sector_num, int bdrv_write(BdrvChild *child, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, - int count, BdrvRequestFlags flags); + int bytes, BdrvRequestFlags flags); int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes); int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov); @@ -295,7 +295,7 @@ int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num, * because it may allocate memory for the entire region. */ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, - int count, BdrvRequestFlags flags); + int bytes, BdrvRequestFlags flags); BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file); int bdrv_get_backing_file_depth(BlockDriverState *bs); @@ -353,14 +353,6 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp); /* async block I/O */ -BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t sector_num, - QEMUIOVector *iov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num, - QEMUIOVector *iov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque); void bdrv_aio_cancel(BlockAIOCB *acb); void bdrv_aio_cancel_async(BlockAIOCB *acb); @@ -419,8 +411,8 @@ void bdrv_drain_all(void); } \ waited_; }) -int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count); -int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int count); +int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes); +int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes); int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 748970055e..15fa602150 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -142,7 +142,7 @@ struct BlockDriver { BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque); BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs, - int64_t offset, int count, + int64_t offset, int bytes, BlockCompletionFunc *cb, void *opaque); int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, @@ -163,9 +163,9 @@ struct BlockDriver { * will be called instead. */ int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, - int64_t offset, int count, BdrvRequestFlags flags); + int64_t offset, int bytes, BdrvRequestFlags flags); int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, - int64_t offset, int count); + int64_t offset, int bytes); /* * Building block for bdrv_block_status[_above]. The driver should diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 09c7c694b5..67c0968fa5 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -321,6 +321,24 @@ void block_job_iostatus_reset(BlockJob *job); BlockJobTxn *block_job_txn_new(void); /** + * block_job_ref: + * + * Add a reference to BlockJob refcnt, it will be decreased with + * block_job_unref, and then be freed if it comes to be the last + * reference. + */ +void block_job_ref(BlockJob *job); + +/** + * block_job_unref: + * + * Release a reference that was previously acquired with block_job_ref + * or block_job_create. If it's the last reference to the object, it will be + * freed. + */ +void block_job_unref(BlockJob *job); + +/** * block_job_txn_unref: * * Release a reference that was previously acquired with block_job_txn_add_job diff --git a/include/exec/memory.h b/include/exec/memory.h index 37f8e78e71..8503685455 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -137,6 +137,15 @@ struct MemoryRegionOps { uint64_t data, unsigned size, MemTxAttrs attrs); + /* Instruction execution pre-callback: + * @addr is the address of the access relative to the @mr. + * @size is the size of the area returned by the callback. + * @offset is the location of the pointer inside @mr. + * + * Returns a pointer to a location which contains guest code. + */ + void *(*request_ptr)(void *opaque, hwaddr addr, unsigned *size, + unsigned *offset); enum device_endian endianness; /* Guest-visible constraints: */ @@ -1363,6 +1372,32 @@ void memory_global_dirty_log_stop(void); void mtree_info(fprintf_function mon_printf, void *f, bool flatview); /** + * memory_region_request_mmio_ptr: request a pointer to an mmio + * MemoryRegion. If it is possible map a RAM MemoryRegion with this pointer. + * When the device wants to invalidate the pointer it will call + * memory_region_invalidate_mmio_ptr. + * + * @mr: #MemoryRegion to check + * @addr: address within that region + * + * Returns true on success, false otherwise. + */ +bool memory_region_request_mmio_ptr(MemoryRegion *mr, hwaddr addr); + +/** + * memory_region_invalidate_mmio_ptr: invalidate the pointer to an mmio + * previously requested. + * In the end that means that if something wants to execute from this area it + * will need to request the pointer again. + * + * @mr: #MemoryRegion associated to the pointer. + * @addr: address within that region + * @size: size of that area. + */ +void memory_region_invalidate_mmio_ptr(MemoryRegion *mr, hwaddr offset, + unsigned size); + +/** * memory_region_dispatch_read: perform a read directly to the specified * MemoryRegion. * diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 73d1bea8b6..c04f4f67f6 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -386,8 +386,9 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, int k; int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); unsigned long * const *src; - unsigned long idx = (page * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; - unsigned long offset = BIT_WORD((page * BITS_PER_LONG) % + unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); + unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; + unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % DIRTY_MEMORY_BLOCK_SIZE); rcu_read_lock(); @@ -414,9 +415,11 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, rcu_read_unlock(); } else { + ram_addr_t offset = rb->offset; + for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { if (cpu_physical_memory_test_and_clear_dirty( - start + addr, + start + addr + offset, TARGET_PAGE_SIZE, DIRTY_MEMORY_MIGRATION)) { *real_dirty_pages += 1; diff --git a/include/hw/compat.h b/include/hw/compat.h index 26cd5851a5..08f36004da 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -181,6 +181,18 @@ .driver = TYPE_PCI_DEVICE,\ .property = "x-pcie-lnksta-dllla",\ .value = "off",\ + },{\ + .driver = "migration",\ + .property = "send-configuration",\ + .value = "off",\ + },{\ + .driver = "migration",\ + .property = "send-section-footer",\ + .value = "off",\ + },{\ + .driver = "migration",\ + .property = "store-global-state",\ + .value = "off",\ }, #define HW_COMPAT_2_2 \ diff --git a/include/hw/input/ps2.h b/include/hw/input/ps2.h index 7f0a80af9d..94709b8502 100644 --- a/include/hw/input/ps2.h +++ b/include/hw/input/ps2.h @@ -36,8 +36,8 @@ void *ps2_kbd_init(void (*update_irq)(void *, int), void *update_arg); void *ps2_mouse_init(void (*update_irq)(void *, int), void *update_arg); void ps2_write_mouse(void *, int val); void ps2_write_keyboard(void *, int val); -uint32_t ps2_read_data(void *); -void ps2_queue(void *, int b); +uint32_t ps2_read_data(PS2State *s); +void ps2_queue(PS2State *s, int b); void ps2_keyboard_set_translation(void *opaque, int mode); void ps2_mouse_fake_event(void *opaque); diff --git a/include/hw/misc/mmio_interface.h b/include/hw/misc/mmio_interface.h new file mode 100644 index 0000000000..90d34fb228 --- /dev/null +++ b/include/hw/misc/mmio_interface.h @@ -0,0 +1,49 @@ +/* + * mmio_interface.h + * + * Copyright (C) 2017 : GreenSocs + * http://www.greensocs.com/ , email: info@greensocs.com + * + * Developed by : + * Frederic Konrad <fred.konrad@greensocs.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option)any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef MMIO_INTERFACE_H +#define MMIO_INTERFACE_H + +#include "exec/memory.h" + +#define TYPE_MMIO_INTERFACE "mmio_interface" +#define MMIO_INTERFACE(obj) OBJECT_CHECK(MMIOInterface, (obj), \ + TYPE_MMIO_INTERFACE) + +typedef struct MMIOInterface { + DeviceState parent_obj; + + MemoryRegion *subregion; + MemoryRegion ram_mem; + uint64_t start; + uint64_t end; + bool ro; + uint64_t id; + void *host_ptr; +} MMIOInterface; + +void mmio_interface_map(MMIOInterface *s); +void mmio_interface_unmap(MMIOInterface *s); + +#endif /* MMIO_INTERFACE_H */ diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index f973b02845..a66bbac352 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -53,6 +53,7 @@ struct sPAPRMachineClass { bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ const char *tcg_default_cpu; /* which (TCG) CPU to simulate by default */ + bool pre_2_10_has_unused_icps; void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, @@ -86,16 +87,19 @@ struct sPAPRMachineState { uint64_t rtc_offset; /* Now used only during incoming migration */ struct PPCTimebase tb; bool has_graphics; - sPAPROptionVector *ov5; /* QEMU-supported option vectors */ - sPAPROptionVector *ov5_cas; /* negotiated (via CAS) option vectors */ - bool cas_reboot; - bool cas_legacy_guest_workaround; Notifier epow_notifier; QTAILQ_HEAD(, sPAPREventLogEntry) pending_events; bool use_hotplug_event_source; sPAPREventSource *event_sources; + /* ibm,client-architecture-support option negotiation */ + bool cas_reboot; + bool cas_legacy_guest_workaround; + sPAPROptionVector *ov5; /* QEMU-supported option vectors */ + sPAPROptionVector *ov5_cas; /* negotiated (via CAS) option vectors */ + uint32_t max_compat_pvr; + /* Migration state */ int htab_save_index; bool htab_first_pass; @@ -635,6 +639,7 @@ void spapr_hotplug_req_add_by_count_indexed(sPAPRDRConnectorType drc_type, uint32_t count, uint32_t index); void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, uint32_t count, uint32_t index); +void spapr_cpu_parse_features(sPAPRMachineState *spapr); void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset, sPAPRMachineState *spapr); diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index bc9f98851e..d9cacb368f 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -199,7 +199,6 @@ typedef struct sPAPRDRConnector { sPAPRConfigureConnectorState *ccs; bool awaiting_release; - bool signalled; bool awaiting_allocation; /* device pointer, via link property */ @@ -216,16 +215,11 @@ typedef struct sPAPRDRConnectorClass { const char *drc_name_prefix; /* used other places in device tree */ sPAPRDREntitySense (*dr_entity_sense)(sPAPRDRConnector *drc); - - /* accessors for guest-visible (generally via RTAS) DR state */ - uint32_t (*set_isolation_state)(sPAPRDRConnector *drc, - sPAPRDRIsolationState state); - uint32_t (*set_allocation_state)(sPAPRDRConnector *drc, - sPAPRDRAllocationState state); + uint32_t (*isolate)(sPAPRDRConnector *drc); + uint32_t (*unisolate)(sPAPRDRConnector *drc); /* QEMU interfaces for managing hotplug operations */ bool (*release_pending)(sPAPRDRConnector *drc); - void (*set_signalled)(sPAPRDRConnector *drc); } sPAPRDRConnectorClass; uint32_t spapr_drc_index(sPAPRDRConnector *drc); diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 1e5c928f32..0604c337e0 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -177,6 +177,8 @@ extern PropertyInfo qdev_prop_arraylen; DEFINE_PROP_UNSIGNED(_n, _s, _f, 0, qdev_prop_blocksize, uint16_t) #define DEFINE_PROP_PCI_HOST_DEVADDR(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress) +#define DEFINE_PROP_MEMORY_REGION(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_ptr, MemoryRegion *) #define DEFINE_PROP_END_OF_LIST() \ {} @@ -208,6 +210,35 @@ void error_set_from_qdev_prop_error(Error **errp, int ret, DeviceState *dev, Property *prop, const char *value); /** + * register_compat_prop: + * + * Register internal (not user-provided) global property, changing the + * default value of a given property in a device type. This can be used + * for enabling machine-type compatibility or for enabling + * accelerator-specific defaults in devices. + * + * The property values set using this function must be always valid and + * never report setter errors, as the property will have + * GlobalProperty::errp set to &error_abort. + * + * User-provided global properties should override internal global + * properties, so callers of this function should ensure that it is + * called before user-provided global properties are registered. + * + * @driver: Device type to be affected + * @property: Property whose default value is going to be changed + * @value: New default value for the property + */ +void register_compat_prop(const char *driver, const char *property, + const char *value); +/* + * register_compat_props_array(): using register_compat_prop(), which + * only registers internal global properties (which has lower priority + * than user-provided global properties) + */ +void register_compat_props_array(GlobalProperty *prop); + +/** * qdev_property_add_static: * @dev: Device to add the property to. * @prop: The qdev property definition. diff --git a/include/migration/global_state.h b/include/migration/global_state.h index 90faea72b4..d307de8350 100644 --- a/include/migration/global_state.h +++ b/include/migration/global_state.h @@ -16,7 +16,6 @@ #include "sysemu/sysemu.h" void register_global_state(void); -void global_state_set_optional(void); int global_state_store(void); void global_state_store_running(void); bool global_state_received(void); diff --git a/include/migration/misc.h b/include/migration/misc.h index 65c7070262..22551216bb 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -41,10 +41,9 @@ int64_t self_announce_delay(int round) /* migration/savevm.c */ void dump_vmstate_json_to_file(FILE *out_fp); -void savevm_skip_section_footers(void); -void savevm_skip_configuration(void); /* migration/migration.c */ +void migration_object_init(void); void qemu_start_incoming_migration(const char *uri, Error **errp); bool migration_is_idle(void); void add_migration_state_change_notifier(Notifier *notify); @@ -54,4 +53,7 @@ bool migration_has_finished(MigrationState *); bool migration_has_failed(MigrationState *); /* ...and after the device transmission */ bool migration_in_postcopy_after_devices(MigrationState *); +void migration_only_migratable_set(void); +void migration_global_dump(Monitor *mon); + #endif diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index e85fbd81fc..85e43da568 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -155,6 +155,7 @@ typedef enum { struct VMStateField { const char *name; + const char *err_hint; size_t offset; size_t size; size_t start; @@ -256,6 +257,18 @@ extern const VMStateInfo vmstate_info_qtailq; .offset = vmstate_offset_value(_state, _field, _type), \ } +#define VMSTATE_SINGLE_FULL(_field, _state, _test, _version, _info, \ + _type, _err_hint) { \ + .name = (stringify(_field)), \ + .err_hint = (_err_hint), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size = sizeof(_type), \ + .info = &(_info), \ + .flags = VMS_SINGLE, \ + .offset = vmstate_offset_value(_state, _field, _type), \ +} + /* Validate state using a boolean predicate. */ #define VMSTATE_VALIDATE(_name, _test) { \ .name = (_name), \ @@ -762,29 +775,35 @@ extern const VMStateInfo vmstate_info_qtailq; #define VMSTATE_UINT64(_f, _s) \ VMSTATE_UINT64_V(_f, _s, 0) -#define VMSTATE_UINT8_EQUAL(_f, _s) \ - VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint8_equal, uint8_t) +#define VMSTATE_UINT8_EQUAL(_f, _s, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, 0, \ + vmstate_info_uint8_equal, uint8_t, _err_hint) -#define VMSTATE_UINT16_EQUAL(_f, _s) \ - VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint16_equal, uint16_t) +#define VMSTATE_UINT16_EQUAL(_f, _s, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, 0, \ + vmstate_info_uint16_equal, uint16_t, _err_hint) -#define VMSTATE_UINT16_EQUAL_V(_f, _s, _v) \ - VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16_equal, uint16_t) +#define VMSTATE_UINT16_EQUAL_V(_f, _s, _v, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, _v, \ + vmstate_info_uint16_equal, uint16_t, _err_hint) -#define VMSTATE_INT32_EQUAL(_f, _s) \ - VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_equal, int32_t) +#define VMSTATE_INT32_EQUAL(_f, _s, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, 0, \ + vmstate_info_int32_equal, int32_t, _err_hint) -#define VMSTATE_UINT32_EQUAL_V(_f, _s, _v) \ - VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32_equal, uint32_t) +#define VMSTATE_UINT32_EQUAL_V(_f, _s, _v, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, _v, \ + vmstate_info_uint32_equal, uint32_t, _err_hint) -#define VMSTATE_UINT32_EQUAL(_f, _s) \ - VMSTATE_UINT32_EQUAL_V(_f, _s, 0) +#define VMSTATE_UINT32_EQUAL(_f, _s, _err_hint) \ + VMSTATE_UINT32_EQUAL_V(_f, _s, 0, _err_hint) -#define VMSTATE_UINT64_EQUAL_V(_f, _s, _v) \ - VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64_equal, uint64_t) +#define VMSTATE_UINT64_EQUAL_V(_f, _s, _v, _err_hint) \ + VMSTATE_SINGLE_FULL(_f, _s, 0, _v, \ + vmstate_info_uint64_equal, uint64_t, _err_hint) -#define VMSTATE_UINT64_EQUAL(_f, _s) \ - VMSTATE_UINT64_EQUAL_V(_f, _s, 0) +#define VMSTATE_UINT64_EQUAL(_f, _s, _err_hint) \ + VMSTATE_UINT64_EQUAL_V(_f, _s, 0, _err_hint) #define VMSTATE_INT32_POSITIVE_LE(_f, _s) \ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t) diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index f745d5faf7..2706aabedf 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -76,6 +76,7 @@ typedef struct PixelFormat PixelFormat; typedef struct PostcopyDiscardState PostcopyDiscardState; typedef struct Property Property; typedef struct PropertyInfo PropertyInfo; +typedef struct PS2State PS2State; typedef struct QEMUBH QEMUBH; typedef struct QemuConsole QemuConsole; typedef struct QEMUFile QEMUFile; diff --git a/include/sysemu/accel.h b/include/sysemu/accel.h index 15944c152c..ecc5c84621 100644 --- a/include/sysemu/accel.h +++ b/include/sysemu/accel.h @@ -24,6 +24,7 @@ #define HW_ACCEL_H #include "qom/object.h" +#include "hw/qdev-properties.h" typedef struct AccelState { /*< private >*/ @@ -40,6 +41,14 @@ typedef struct AccelClass { int (*available)(void); int (*init_machine)(MachineState *ms); bool *allowed; + /* + * Array of global properties that would be applied when specific + * accelerator is chosen. It works like MachineClass.compat_props + * but it's for accelerators not machines. Accelerator-provided + * global properties may be overridden by machine-type + * compat_props or user-provided global properties. + */ + GlobalProperty *global_props; } AccelClass; #define TYPE_ACCEL "accel" @@ -57,5 +66,7 @@ typedef struct AccelClass { extern int tcg_tb_size; void configure_accelerator(MachineState *ms); +/* Register accelerator specific global properties */ +void accel_register_compat_props(AccelState *accel); #endif diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 999eb2333a..1e05281fff 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -130,7 +130,7 @@ BlockBackend *blk_by_dev(void *dev); BlockBackend *blk_by_qdev_id(const char *id, Error **errp); void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, - int count); + int bytes); int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); @@ -138,13 +138,13 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags); + int bytes, BdrvRequestFlags flags); BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags, + int bytes, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); -int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count); -int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count, +int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes); +int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, BdrvRequestFlags flags); int64_t blk_getlength(BlockBackend *blk); void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); @@ -157,7 +157,7 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, BlockCompletionFunc *cb, void *opaque); BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int count, +BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes, BlockCompletionFunc *cb, void *opaque); void blk_aio_cancel(BlockAIOCB *acb); void blk_aio_cancel_async(BlockAIOCB *acb); @@ -165,7 +165,7 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf); int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque); -int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count); +int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes); int blk_co_flush(BlockBackend *blk); int blk_flush(BlockBackend *blk); int blk_commit_all(void); @@ -220,11 +220,11 @@ int blk_get_open_flags_from_root_state(BlockBackend *blk); void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, BlockCompletionFunc *cb, void *opaque); int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags); + int bytes, BdrvRequestFlags flags); int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, - int count); + int bytes); int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp); -int blk_pdiscard(BlockBackend *blk, int64_t offset, int count); +int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes); int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size); int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 9841a527a1..b21369672a 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -15,7 +15,6 @@ /* vl.c */ extern const char *bios_name; -extern int only_migratable; extern const char *qemu_name; extern QemuUUID qemu_uuid; extern bool qemu_uuid_set; diff --git a/linux-user/elfload.c b/linux-user/elfload.c index ce77317e09..2a902f7806 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -802,14 +802,15 @@ static uint32_t get_elf_hwcap2(void) #define ARCH_DLINFO \ do { \ PowerPCCPU *cpu = POWERPC_CPU(thread_cpu); \ - NEW_AUX_ENT(AT_DCACHEBSIZE, cpu->env.dcache_line_size); \ - NEW_AUX_ENT(AT_ICACHEBSIZE, cpu->env.icache_line_size); \ - NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \ /* \ - * Now handle glibc compatibility. \ + * Handle glibc compatibility: these magic entries must \ + * be at the lowest addresses in the final auxv. \ */ \ NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ + NEW_AUX_ENT(AT_DCACHEBSIZE, cpu->env.dcache_line_size); \ + NEW_AUX_ENT(AT_ICACHEBSIZE, cpu->env.icache_line_size); \ + NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \ } while (0) static inline void init_thread(struct target_pt_regs *_regs, struct image_info *infop) @@ -1760,6 +1761,13 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, } while(0) /* There must be exactly DLINFO_ITEMS entries here. */ +#ifdef ARCH_DLINFO + /* + * ARCH_DLINFO must come first so platform specific code can enforce + * special alignment requirements on the AUXV if necessary (eg. PPC). + */ + ARCH_DLINFO; +#endif NEW_AUX_ENT(AT_PHDR, (abi_ulong)(info->load_addr + exec->e_phoff)); NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr))); NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum)); @@ -1782,13 +1790,6 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, if (u_platform) { NEW_AUX_ENT(AT_PLATFORM, u_platform); } -#ifdef ARCH_DLINFO - /* - * ARCH_DLINFO must come last so platform specific code can enforce - * special alignment requirements on the AUXV if necessary (eg. PPC). - */ - ARCH_DLINFO; -#endif NEW_AUX_ENT (AT_NULL, 0); #undef NEW_AUX_ENT @@ -30,6 +30,8 @@ #include "exec/ram_addr.h" #include "sysemu/kvm.h" #include "sysemu/sysemu.h" +#include "hw/misc/mmio_interface.h" +#include "hw/qdev-properties.h" //#define DEBUG_UNASSIGNED @@ -2430,6 +2432,115 @@ void memory_listener_unregister(MemoryListener *listener) listener->address_space = NULL; } +bool memory_region_request_mmio_ptr(MemoryRegion *mr, hwaddr addr) +{ + void *host; + unsigned size = 0; + unsigned offset = 0; + Object *new_interface; + + if (!mr || !mr->ops->request_ptr) { + return false; + } + + /* + * Avoid an update if the request_ptr call + * memory_region_invalidate_mmio_ptr which seems to be likely when we use + * a cache. + */ + memory_region_transaction_begin(); + + host = mr->ops->request_ptr(mr->opaque, addr - mr->addr, &size, &offset); + + if (!host || !size) { + memory_region_transaction_commit(); + return false; + } + + new_interface = object_new("mmio_interface"); + qdev_prop_set_uint64(DEVICE(new_interface), "start", offset); + qdev_prop_set_uint64(DEVICE(new_interface), "end", offset + size - 1); + qdev_prop_set_bit(DEVICE(new_interface), "ro", true); + qdev_prop_set_ptr(DEVICE(new_interface), "host_ptr", host); + qdev_prop_set_ptr(DEVICE(new_interface), "subregion", mr); + object_property_set_bool(OBJECT(new_interface), true, "realized", NULL); + + memory_region_transaction_commit(); + return true; +} + +typedef struct MMIOPtrInvalidate { + MemoryRegion *mr; + hwaddr offset; + unsigned size; + int busy; + int allocated; +} MMIOPtrInvalidate; + +#define MAX_MMIO_INVALIDATE 10 +static MMIOPtrInvalidate mmio_ptr_invalidate_list[MAX_MMIO_INVALIDATE]; + +static void memory_region_do_invalidate_mmio_ptr(CPUState *cpu, + run_on_cpu_data data) +{ + MMIOPtrInvalidate *invalidate_data = (MMIOPtrInvalidate *)data.host_ptr; + MemoryRegion *mr = invalidate_data->mr; + hwaddr offset = invalidate_data->offset; + unsigned size = invalidate_data->size; + MemoryRegionSection section = memory_region_find(mr, offset, size); + + qemu_mutex_lock_iothread(); + + /* Reset dirty so this doesn't happen later. */ + cpu_physical_memory_test_and_clear_dirty(offset, size, 1); + + if (section.mr != mr) { + /* memory_region_find add a ref on section.mr */ + memory_region_unref(section.mr); + if (MMIO_INTERFACE(section.mr->owner)) { + /* We found the interface just drop it. */ + object_property_set_bool(section.mr->owner, false, "realized", + NULL); + object_unref(section.mr->owner); + object_unparent(section.mr->owner); + } + } + + qemu_mutex_unlock_iothread(); + + if (invalidate_data->allocated) { + g_free(invalidate_data); + } else { + invalidate_data->busy = 0; + } +} + +void memory_region_invalidate_mmio_ptr(MemoryRegion *mr, hwaddr offset, + unsigned size) +{ + size_t i; + MMIOPtrInvalidate *invalidate_data = NULL; + + for (i = 0; i < MAX_MMIO_INVALIDATE; i++) { + if (atomic_cmpxchg(&(mmio_ptr_invalidate_list[i].busy), 0, 1) == 0) { + invalidate_data = &mmio_ptr_invalidate_list[i]; + break; + } + } + + if (!invalidate_data) { + invalidate_data = g_malloc0(sizeof(MMIOPtrInvalidate)); + invalidate_data->allocated = 1; + } + + invalidate_data->mr = mr; + invalidate_data->offset = offset; + invalidate_data->size = size; + + async_safe_run_on_cpu(first_cpu, memory_region_do_invalidate_mmio_ptr, + RUN_ON_CPU_HOST_PTR(invalidate_data)); +} + void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) { memory_region_ref(root); diff --git a/migration/global_state.c b/migration/global_state.c index f792cf5242..dcbbcb28be 100644 --- a/migration/global_state.c +++ b/migration/global_state.c @@ -15,12 +15,12 @@ #include "qemu/error-report.h" #include "qapi/error.h" #include "qapi/util.h" +#include "migration.h" #include "migration/global_state.h" #include "migration/vmstate.h" #include "trace.h" typedef struct { - bool optional; uint32_t size; uint8_t runstate[100]; RunState state; @@ -57,11 +57,6 @@ RunState global_state_get_runstate(void) return global_state.state; } -void global_state_set_optional(void) -{ - global_state.optional = true; -} - static bool global_state_needed(void *opaque) { GlobalState *s = opaque; @@ -69,7 +64,7 @@ static bool global_state_needed(void *opaque) /* If it is not optional, it is mandatory */ - if (s->optional == false) { + if (migrate_get_current()->store_global_state) { return true; } diff --git a/migration/migration.c b/migration/migration.c index f588329f4c..51ccd1a4c5 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -42,6 +42,8 @@ #include "exec/target_page.h" #include "io/channel-buffer.h" #include "migration/colo.h" +#include "hw/boards.h" +#include "monitor/monitor.h" #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ @@ -98,32 +100,37 @@ enum mig_rp_message_type { migrations at once. For now we don't need to add dynamic creation of migration */ +static MigrationState *current_migration; + +void migration_object_init(void) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + + /* This can only be called once. */ + assert(!current_migration); + current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); + + /* + * We cannot really do this in migration_instance_init() since at + * that time global properties are not yet applied, then this + * value will be definitely replaced by something else. + */ + if (ms->enforce_config_section) { + current_migration->send_configuration = true; + } +} + /* For outgoing */ MigrationState *migrate_get_current(void) { - static bool once; - static MigrationState current_migration = { - .state = MIGRATION_STATUS_NONE, - .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE, - .mbps = -1, - .parameters = { - .compress_level = DEFAULT_MIGRATE_COMPRESS_LEVEL, - .compress_threads = DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT, - .decompress_threads = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT, - .cpu_throttle_initial = DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL, - .cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT, - .max_bandwidth = MAX_THROTTLE, - .downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME, - .x_checkpoint_delay = DEFAULT_MIGRATE_X_CHECKPOINT_DELAY, - }, - }; + /* This can only be called after the object created. */ + assert(current_migration); + return current_migration; +} - if (!once) { - current_migration.parameters.tls_creds = g_strdup(""); - current_migration.parameters.tls_hostname = g_strdup(""); - once = true; - } - return ¤t_migration; +void migration_only_migratable_set(void) +{ + migrate_get_current()->only_migratable = true; } MigrationIncomingState *migration_incoming_get_current(void) @@ -997,7 +1004,7 @@ static GSList *migration_blockers; int migrate_add_blocker(Error *reason, Error **errp) { - if (only_migratable) { + if (migrate_get_current()->only_migratable) { error_propagate(errp, error_copy(reason)); error_prepend(errp, "disallowing migration blocker " "(--only_migratable) for: "); @@ -1304,6 +1311,15 @@ bool migrate_use_block(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; } +bool migrate_use_return_path(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; +} + bool migrate_use_block_incremental(void) { MigrationState *s; @@ -1968,10 +1984,11 @@ void migrate_fd_connect(MigrationState *s) notifier_list_notify(&migration_state_notifiers, s); /* - * Open the return path; currently for postcopy but other things might - * also want it. + * Open the return path. For postcopy, it is used exclusively. For + * precopy, only if user specified "return-path" capability would + * QEMU uses the return path. */ - if (migrate_postcopy_ram()) { + if (migrate_postcopy_ram() || migrate_use_return_path()) { if (open_return_path_on_source(s)) { error_report("Unable to open return-path for postcopy"); migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, @@ -1987,3 +2004,76 @@ void migrate_fd_connect(MigrationState *s) s->migration_thread_running = true; } +void migration_global_dump(Monitor *mon) +{ + MigrationState *ms = migrate_get_current(); + + monitor_printf(mon, "globals: store-global-state=%d, only_migratable=%d, " + "send-configuration=%d, send-section-footer=%d\n", + ms->store_global_state, ms->only_migratable, + ms->send_configuration, ms->send_section_footer); +} + +static Property migration_properties[] = { + DEFINE_PROP_BOOL("store-global-state", MigrationState, + store_global_state, true), + DEFINE_PROP_BOOL("only-migratable", MigrationState, only_migratable, false), + DEFINE_PROP_BOOL("send-configuration", MigrationState, + send_configuration, true), + DEFINE_PROP_BOOL("send-section-footer", MigrationState, + send_section_footer, true), + DEFINE_PROP_END_OF_LIST(), +}; + +static void migration_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->user_creatable = false; + dc->props = migration_properties; +} + +static void migration_instance_init(Object *obj) +{ + MigrationState *ms = MIGRATION_OBJ(obj); + + ms->state = MIGRATION_STATUS_NONE; + ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE; + ms->mbps = -1; + ms->parameters = (MigrationParameters) { + .compress_level = DEFAULT_MIGRATE_COMPRESS_LEVEL, + .compress_threads = DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT, + .decompress_threads = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT, + .cpu_throttle_initial = DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL, + .cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT, + .max_bandwidth = MAX_THROTTLE, + .downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME, + .x_checkpoint_delay = DEFAULT_MIGRATE_X_CHECKPOINT_DELAY, + }; + ms->parameters.tls_creds = g_strdup(""); + ms->parameters.tls_hostname = g_strdup(""); +} + +static const TypeInfo migration_type = { + .name = TYPE_MIGRATION, + /* + * NOTE: "migration" itself is not really a device. We used + * TYPE_DEVICE here only to leverage some existing QDev features + * like "-global" properties, and HW_COMPAT_* fields (which are + * finally applied as global properties as well). If one day the + * global property feature can be migrated from QDev to QObject in + * general, then we can switch to QObject as well. + */ + .parent = TYPE_DEVICE, + .class_init = migration_class_init, + .class_size = sizeof(MigrationClass), + .instance_size = sizeof(MigrationState), + .instance_init = migration_instance_init, +}; + +static void register_migration_types(void) +{ + type_register_static(&migration_type); +} + +type_init(register_migration_types); diff --git a/migration/migration.h b/migration/migration.h index d9a268a3af..148c9facbc 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -19,6 +19,7 @@ #include "qapi-types.h" #include "exec/cpu-common.h" #include "qemu/coroutine_int.h" +#include "hw/qdev.h" /* State for the incoming migration */ struct MigrationIncomingState { @@ -62,8 +63,26 @@ struct MigrationIncomingState { MigrationIncomingState *migration_incoming_get_current(void); void migration_incoming_state_destroy(void); +#define TYPE_MIGRATION "migration" + +#define MIGRATION_CLASS(klass) \ + OBJECT_CLASS_CHECK(MigrationClass, (klass), TYPE_MIGRATION) +#define MIGRATION_OBJ(obj) \ + OBJECT_CHECK(MigrationState, (obj), TYPE_MIGRATION) +#define MIGRATION_GET_CLASS(obj) \ + OBJECT_GET_CLASS(MigrationClass, (obj), TYPE_MIGRATION) + +typedef struct MigrationClass { + /*< private >*/ + DeviceClass parent_class; +} MigrationClass; + struct MigrationState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ size_t bytes_xfer; size_t xfer_limit; QemuThread thread; @@ -114,6 +133,20 @@ struct MigrationState /* Do we have to clean up -b/-i from old migrate parameters */ /* This feature is deprecated and will be removed */ bool must_remove_block_options; + + /* + * Global switch on whether we need to store the global state + * during migration. + */ + bool store_global_state; + + /* Whether the VM is only allowing for migratable devices */ + bool only_migratable; + + /* Whether we send QEMU_VM_CONFIGURATION during migration */ + bool send_configuration; + /* Whether we send section footer during migration */ + bool send_section_footer; }; void migrate_set_state(int *state, int old_state, int new_state); @@ -144,6 +177,7 @@ bool migrate_colo_enabled(void); bool migrate_use_block(void); bool migrate_use_block_incremental(void); +bool migrate_use_return_path(void); bool migrate_use_compression(void); int migrate_compress_level(void); diff --git a/migration/savevm.c b/migration/savevm.c index 6bfd4893e0..be3f885119 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -62,8 +62,6 @@ const unsigned int postcopy_ram_discard_version = 0; -static bool skip_section_footers; - /* Subcommands for QEMU_VM_COMMAND */ enum qemu_vm_cmd { MIG_CMD_INVALID = 0, /* Must be 0 */ @@ -287,7 +285,6 @@ typedef struct SaveStateEntry { typedef struct SaveState { QTAILQ_HEAD(, SaveStateEntry) handlers; int global_section_id; - bool skip_configuration; uint32_t len; const char *name; uint32_t target_page_bits; @@ -296,15 +293,8 @@ typedef struct SaveState { static SaveState savevm_state = { .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers), .global_section_id = 0, - .skip_configuration = false, }; -void savevm_skip_configuration(void) -{ - savevm_state.skip_configuration = true; -} - - static void configuration_pre_save(void *opaque) { SaveState *state = opaque; @@ -769,11 +759,6 @@ static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc) vmstate_save_state(f, se->vmsd, se->opaque, vmdesc); } -void savevm_skip_section_footers(void) -{ - skip_section_footers = true; -} - /* * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL) */ @@ -801,7 +786,7 @@ static void save_section_header(QEMUFile *f, SaveStateEntry *se, */ static void save_section_footer(QEMUFile *f, SaveStateEntry *se) { - if (!skip_section_footers) { + if (migrate_get_current()->send_section_footer) { qemu_put_byte(f, QEMU_VM_SECTION_FOOTER); qemu_put_be32(f, se->section_id); } @@ -958,23 +943,16 @@ bool qemu_savevm_state_blocked(Error **errp) return false; } -static bool enforce_config_section(void) -{ - MachineState *machine = MACHINE(qdev_get_machine()); - return machine->enforce_config_section; -} - void qemu_savevm_state_header(QEMUFile *f) { trace_savevm_state_header(); qemu_put_be32(f, QEMU_VM_FILE_MAGIC); qemu_put_be32(f, QEMU_VM_FILE_VERSION); - if (!savevm_state.skip_configuration || enforce_config_section()) { + if (migrate_get_current()->send_configuration) { qemu_put_byte(f, QEMU_VM_CONFIGURATION); vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0); } - } void qemu_savevm_state_begin(QEMUFile *f) @@ -1810,7 +1788,7 @@ static bool check_section_footer(QEMUFile *f, SaveStateEntry *se) uint8_t read_mark; uint32_t read_section_id; - if (skip_section_footers) { + if (!migrate_get_current()->send_section_footer) { /* No footer to check */ return true; } @@ -1995,7 +1973,7 @@ int qemu_loadvm_state(QEMUFile *f) return -ENOTSUP; } - if (!savevm_state.skip_configuration || enforce_config_section()) { + if (migrate_get_current()->send_configuration) { if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { error_report("Configuration section missing"); return -EINVAL; @@ -2107,6 +2085,8 @@ int save_snapshot(const char *name, Error **errp) } vm_stop(RUN_STATE_SAVE_VM); + bdrv_drain_all_begin(); + aio_context_acquire(aio_context); memset(sn, 0, sizeof(*sn)); @@ -2144,6 +2124,14 @@ int save_snapshot(const char *name, Error **errp) goto the_end; } + /* The bdrv_all_create_snapshot() call that follows acquires the AioContext + * for itself. BDRV_POLL_WHILE() does not support nested locking because + * it only releases the lock once. Therefore synchronous I/O will deadlock + * unless we release the AioContext before bdrv_all_create_snapshot(). + */ + aio_context_release(aio_context); + aio_context = NULL; + ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs); if (ret < 0) { error_setg(errp, "Error while creating snapshot on '%s'", @@ -2154,7 +2142,12 @@ int save_snapshot(const char *name, Error **errp) ret = 0; the_end: - aio_context_release(aio_context); + if (aio_context) { + aio_context_release(aio_context); + } + + bdrv_drain_all_end(); + if (saved_vm_running) { vm_start(); } @@ -2263,20 +2256,21 @@ int load_snapshot(const char *name, Error **errp) } /* Flush all IO requests so they don't interfere with the new state. */ - bdrv_drain_all(); + bdrv_drain_all_begin(); ret = bdrv_all_goto_snapshot(name, &bs); if (ret < 0) { error_setg(errp, "Error %d while activating snapshot '%s' on '%s'", ret, name, bdrv_get_device_name(bs)); - return ret; + goto err_drain; } /* restore the VM state */ f = qemu_fopen_bdrv(bs_vm_state, 0); if (!f) { error_setg(errp, "Could not open VM state file"); - return -EINVAL; + ret = -EINVAL; + goto err_drain; } qemu_system_reset(SHUTDOWN_CAUSE_NONE); @@ -2284,15 +2278,21 @@ int load_snapshot(const char *name, Error **errp) aio_context_acquire(aio_context); ret = qemu_loadvm_state(f); + migration_incoming_state_destroy(); aio_context_release(aio_context); - migration_incoming_state_destroy(); + bdrv_drain_all_end(); + if (ret < 0) { error_setg(errp, "Error %d while loading VM state", ret); return ret; } return 0; + +err_drain: + bdrv_drain_all_end(); + return ret; } void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev) @@ -2314,7 +2314,7 @@ void vmstate_register_ram_global(MemoryRegion *mr) bool vmstate_check_only_migratable(const VMStateDescription *vmsd) { /* check needed if --only-migratable is specified */ - if (!only_migratable) { + if (!migrate_get_current()->only_migratable) { return true; } diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c index 02f05a3359..c056c98bdb 100644 --- a/migration/vmstate-types.c +++ b/migration/vmstate-types.c @@ -126,6 +126,9 @@ static int get_int32_equal(QEMUFile *f, void *pv, size_t size, return 0; } error_report("%" PRIx32 " != %" PRIx32, *v, v2); + if (field->err_hint) { + error_printf("%s\n", field->err_hint); + } return -EINVAL; } @@ -267,6 +270,9 @@ static int get_uint32_equal(QEMUFile *f, void *pv, size_t size, return 0; } error_report("%" PRIx32 " != %" PRIx32, *v, v2); + if (field->err_hint) { + error_printf("%s\n", field->err_hint); + } return -EINVAL; } @@ -341,6 +347,9 @@ static int get_uint64_equal(QEMUFile *f, void *pv, size_t size, return 0; } error_report("%" PRIx64 " != %" PRIx64, *v, v2); + if (field->err_hint) { + error_printf("%s\n", field->err_hint); + } return -EINVAL; } @@ -364,6 +373,9 @@ static int get_uint8_equal(QEMUFile *f, void *pv, size_t size, return 0; } error_report("%x != %x", *v, v2); + if (field->err_hint) { + error_printf("%s\n", field->err_hint); + } return -EINVAL; } @@ -387,6 +399,9 @@ static int get_uint16_equal(QEMUFile *f, void *pv, size_t size, return 0; } error_report("%x != %x", *v, v2); + if (field->err_hint) { + error_printf("%s\n", field->err_hint); + } return -EINVAL; } @@ -1078,13 +1078,24 @@ int monitor_get_cpu_index(void) static void hmp_info_registers(Monitor *mon, const QDict *qdict) { - CPUState *cs = mon_get_cpu(); + bool all_cpus = qdict_get_try_bool(qdict, "cpustate_all", false); + CPUState *cs; - if (!cs) { - monitor_printf(mon, "No CPU available\n"); - return; + if (all_cpus) { + CPU_FOREACH(cs) { + monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index); + cpu_dump_state(cs, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU); + } + } else { + cs = mon_get_cpu(); + + if (!cs) { + monitor_printf(mon, "No CPU available\n"); + return; + } + + cpu_dump_state(cs, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU); } - cpu_dump_state(cs, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU); } static void hmp_info_jit(Monitor *mon, const QDict *qdict) diff --git a/qapi-schema.json b/qapi-schema.json index 4b50b652d3..37c4b95aad 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -900,12 +900,15 @@ # offers more flexibility. # (Since 2.10) # +# @return-path: If enabled, migration will use the return path even +# for precopy. (since 2.10) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', - 'block' ] } + 'block', 'return-path' ] } ## # @MigrationCapabilityStatus: @@ -5114,6 +5117,26 @@ { 'command': 'chardev-remove', 'data': {'id': 'str'} } ## +# @chardev-send-break: +# +# Send a break to a character device +# +# @id: the chardev's ID, must exist +# +# Returns: Nothing on success +# +# Since: 2.10 +# +# Example: +# +# -> { "execute": "chardev-send-break", "arguments": { "id" : "foo" } } +# <- { "return": {} } +# +## +{ 'command': 'chardev-send-break', 'data': {'id': 'str'} } + + +## # @TpmModel: # # An enumeration of TPM models diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c index c089491c24..63ae115b2a 100644 --- a/qapi/string-input-visitor.c +++ b/qapi/string-input-visitor.c @@ -326,6 +326,16 @@ static void parse_type_number(Visitor *v, const char *name, double *obj, *obj = val; } +static void parse_type_null(Visitor *v, const char *name, Error **errp) +{ + StringInputVisitor *siv = to_siv(v); + + if (!siv->string || siv->string[0]) { + error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null", + "null"); + } +} + static void string_input_free(Visitor *v) { StringInputVisitor *siv = to_siv(v); @@ -349,6 +359,7 @@ Visitor *string_input_visitor_new(const char *str) v->visitor.type_bool = parse_type_bool; v->visitor.type_str = parse_type_str; v->visitor.type_number = parse_type_number; + v->visitor.type_null = parse_type_null; v->visitor.start_list = start_list; v->visitor.next_list = next_list; v->visitor.check_list = check_list; diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c index 53c2175d81..af649e1d6e 100644 --- a/qapi/string-output-visitor.c +++ b/qapi/string-output-visitor.c @@ -256,6 +256,19 @@ static void print_type_number(Visitor *v, const char *name, double *obj, string_output_set(sov, g_strdup_printf("%f", *obj)); } +static void print_type_null(Visitor *v, const char *name, Error **errp) +{ + StringOutputVisitor *sov = to_sov(v); + char *out; + + if (sov->human) { + out = g_strdup("<null>"); + } else { + out = g_strdup(""); + } + string_output_set(sov, out); +} + static void start_list(Visitor *v, const char *name, GenericList **list, size_t size, Error **errp) @@ -341,6 +354,7 @@ Visitor *string_output_visitor_new(bool human, char **result) v->visitor.type_bool = print_type_bool; v->visitor.type_str = print_type_str; v->visitor.type_number = print_type_number; + v->visitor.type_null = print_type_null; v->visitor.start_list = start_list; v->visitor.next_list = next_list; v->visitor.end_list = end_list; diff --git a/qemu-img.c b/qemu-img.c index 0ad698d7f1..91ad6bebbf 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -887,22 +887,28 @@ static void common_block_job_cb(void *opaque, int ret) static void run_block_job(BlockJob *job, Error **errp) { AioContext *aio_context = blk_get_aio_context(job->blk); + int ret = 0; - /* FIXME In error cases, the job simply goes away and we access a dangling - * pointer below. */ aio_context_acquire(aio_context); + block_job_ref(job); do { aio_poll(aio_context, true); qemu_progress_print(job->len ? ((float)job->offset / job->len * 100.f) : 0.0f, 0); - } while (!job->ready); + } while (!job->ready && !job->completed); - block_job_complete_sync(job, errp); + if (!job->completed) { + ret = block_job_complete_sync(job, errp); + } else { + ret = job->ret; + } + block_job_unref(job); aio_context_release(aio_context); - /* A block job may finish instantaneously without publishing any progress, - * so just signal completion here */ - qemu_progress_print(100.f, 0); + /* publish completion progress only when success */ + if (!ret) { + qemu_progress_print(100.f, 0); + } } static int img_commit(int argc, char **argv) @@ -4249,15 +4255,12 @@ static int img_dd(int argc, char **argv) case 'U': force_share = true; break; - case OPTION_OBJECT: { - QemuOpts *opts; - opts = qemu_opts_parse_noisily(&qemu_object_opts, - optarg, true); - if (!opts) { + case OPTION_OBJECT: + if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) { ret = -1; goto out; } - } break; + break; case OPTION_IMAGE_OPTS: image_opts = true; break; diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 4b2278f040..b0ea327024 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -451,13 +451,13 @@ fail: } static int do_pread(BlockBackend *blk, char *buf, int64_t offset, - int64_t count, int64_t *total) + int64_t bytes, int64_t *total) { - if (count > INT_MAX) { + if (bytes > INT_MAX) { return -ERANGE; } - *total = blk_pread(blk, offset, (uint8_t *)buf, count); + *total = blk_pread(blk, offset, (uint8_t *)buf, bytes); if (*total < 0) { return *total; } @@ -465,13 +465,13 @@ static int do_pread(BlockBackend *blk, char *buf, int64_t offset, } static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset, - int64_t count, int flags, int64_t *total) + int64_t bytes, int flags, int64_t *total) { - if (count > INT_MAX) { + if (bytes > INT_MAX) { return -ERANGE; } - *total = blk_pwrite(blk, offset, (uint8_t *)buf, count, flags); + *total = blk_pwrite(blk, offset, (uint8_t *)buf, bytes, flags); if (*total < 0) { return *total; } @@ -481,7 +481,7 @@ static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset, typedef struct { BlockBackend *blk; int64_t offset; - int64_t count; + int64_t bytes; int64_t *total; int flags; int ret; @@ -492,7 +492,7 @@ static void coroutine_fn co_pwrite_zeroes_entry(void *opaque) { CoWriteZeroes *data = opaque; - data->ret = blk_co_pwrite_zeroes(data->blk, data->offset, data->count, + data->ret = blk_co_pwrite_zeroes(data->blk, data->offset, data->bytes, data->flags); data->done = true; if (data->ret < 0) { @@ -500,23 +500,23 @@ static void coroutine_fn co_pwrite_zeroes_entry(void *opaque) return; } - *data->total = data->count; + *data->total = data->bytes; } static int do_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int64_t count, int flags, int64_t *total) + int64_t bytes, int flags, int64_t *total) { Coroutine *co; CoWriteZeroes data = { .blk = blk, .offset = offset, - .count = count, + .bytes = bytes, .total = total, .flags = flags, .done = false, }; - if (count > INT_MAX) { + if (bytes > INT_MAX) { return -ERANGE; } @@ -533,19 +533,19 @@ static int do_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, } static int do_write_compressed(BlockBackend *blk, char *buf, int64_t offset, - int64_t count, int64_t *total) + int64_t bytes, int64_t *total) { int ret; - if (count >> 9 > BDRV_REQUEST_MAX_SECTORS) { + if (bytes >> 9 > BDRV_REQUEST_MAX_SECTORS) { return -ERANGE; } - ret = blk_pwrite_compressed(blk, offset, buf, count); + ret = blk_pwrite_compressed(blk, offset, buf, bytes); if (ret < 0) { return ret; } - *total = count; + *total = bytes; return 1; } @@ -1701,7 +1701,7 @@ static int discard_f(BlockBackend *blk, int argc, char **argv) struct timeval t1, t2; bool Cflag = false, qflag = false; int c, ret; - int64_t offset, count; + int64_t offset, bytes; while ((c = getopt(argc, argv, "Cq")) != -1) { switch (c) { @@ -1727,11 +1727,11 @@ static int discard_f(BlockBackend *blk, int argc, char **argv) } optind++; - count = cvtnum(argv[optind]); - if (count < 0) { - print_cvtnum_err(count, argv[optind]); + bytes = cvtnum(argv[optind]); + if (bytes < 0) { + print_cvtnum_err(bytes, argv[optind]); return 0; - } else if (count >> BDRV_SECTOR_BITS > BDRV_REQUEST_MAX_SECTORS) { + } else if (bytes >> BDRV_SECTOR_BITS > BDRV_REQUEST_MAX_SECTORS) { printf("length cannot exceed %"PRIu64", given %s\n", (uint64_t)BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS, argv[optind]); @@ -1739,7 +1739,7 @@ static int discard_f(BlockBackend *blk, int argc, char **argv) } gettimeofday(&t1, NULL); - ret = blk_pdiscard(blk, offset, count); + ret = blk_pdiscard(blk, offset, bytes); gettimeofday(&t2, NULL); if (ret < 0) { @@ -1750,7 +1750,7 @@ static int discard_f(BlockBackend *blk, int argc, char **argv) /* Finally, report back -- -C gives a parsable format */ if (!qflag) { t2 = tsub(t2, t1); - print_report("discard", &t2, offset, count, count, 1, Cflag); + print_report("discard", &t2, offset, bytes, bytes, 1, Cflag); } out: diff --git a/qemu-options.hx b/qemu-options.hx index 30c4f9850f..297bd8aca4 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -610,6 +610,166 @@ DEF("blockdev", HAS_ARG, QEMU_OPTION_blockdev, " [,read-only=on|off][,detect-zeroes=on|off|unmap]\n" " [,driver specific parameters...]\n" " configure a block backend\n", QEMU_ARCH_ALL) +STEXI +@item -blockdev @var{option}[,@var{option}[,@var{option}[,...]]] +@findex -blockdev + +Define a new block driver node. Some of the options apply to all block drivers, +other options are only accepted for a specific block driver. See below for a +list of generic options and options for the most common block drivers. + +Options that expect a reference to another node (e.g. @code{file}) can be +given in two ways. Either you specify the node name of an already existing node +(file=@var{node-name}), or you define a new node inline, adding options +for the referenced node after a dot (file.filename=@var{path},file.aio=native). + +A block driver node created with @option{-blockdev} can be used for a guest +device by specifying its node name for the @code{drive} property in a +@option{-device} argument that defines a block device. + +@table @option +@item Valid options for any block driver node: + +@table @code +@item driver +Specifies the block driver to use for the given node. +@item node-name +This defines the name of the block driver node by which it will be referenced +later. The name must be unique, i.e. it must not match the name of a different +block driver node, or (if you use @option{-drive} as well) the ID of a drive. + +If no node name is specified, it is automatically generated. The generated node +name is not intended to be predictable and changes between QEMU invocations. +For the top level, an explicit node name must be specified. +@item read-only +Open the node read-only. Guest write attempts will fail. +@item cache.direct +The host page cache can be avoided with @option{cache.direct=on}. This will +attempt to do disk IO directly to the guest's memory. QEMU may still perform an +internal copy of the data. +@item cache.no-flush +In case you don't care about data integrity over host failures, you can use +@option{cache.no-flush=on}. This option tells QEMU that it never needs to write +any data to the disk but can instead keep things in cache. If anything goes +wrong, like your host losing power, the disk storage getting disconnected +accidentally, etc. your image will most probably be rendered unusable. +@item discard=@var{discard} +@var{discard} is one of "ignore" (or "off") or "unmap" (or "on") and controls +whether @code{discard} (also known as @code{trim} or @code{unmap}) requests are +ignored or passed to the filesystem. Some machine types may not support +discard requests. +@item detect-zeroes=@var{detect-zeroes} +@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic +conversion of plain zero writes by the OS to driver specific optimized +zero write commands. You may even choose "unmap" if @var{discard} is set +to "unmap" to allow a zero write to be converted to an @code{unmap} operation. +@end table + +@item Driver-specific options for @code{file} + +This is the protocol-level block driver for accessing regular files. + +@table @code +@item filename +The path to the image file in the local filesystem +@item aio +Specifies the AIO backend (threads/native, default: threads) +@end table +Example: +@example +-blockdev driver=file,node-name=disk,filename=disk.img +@end example + +@item Driver-specific options for @code{raw} + +This is the image format block driver for raw images. It is usually +stacked on top of a protocol level block driver such as @code{file}. + +@table @code +@item file +Reference to or definition of the data source block driver node +(e.g. a @code{file} driver node) +@end table +Example 1: +@example +-blockdev driver=file,node-name=disk_file,filename=disk.img +-blockdev driver=raw,node-name=disk,file=disk_file +@end example +Example 2: +@example +-blockdev driver=raw,node-name=disk,file.driver=file,file.filename=disk.img +@end example + +@item Driver-specific options for @code{qcow2} + +This is the image format block driver for qcow2 images. It is usually +stacked on top of a protocol level block driver such as @code{file}. + +@table @code +@item file +Reference to or definition of the data source block driver node +(e.g. a @code{file} driver node) + +@item backing +Reference to or definition of the backing file block device (default is taken +from the image file). It is allowed to pass an empty string here in order to +disable the default backing file. + +@item lazy-refcounts +Whether to enable the lazy refcounts feature (on/off; default is taken from the +image file) + +@item cache-size +The maximum total size of the L2 table and refcount block caches in bytes +(default: 1048576 bytes or 8 clusters, whichever is larger) + +@item l2-cache-size +The maximum size of the L2 table cache in bytes +(default: 4/5 of the total cache size) + +@item refcount-cache-size +The maximum size of the refcount block cache in bytes +(default: 1/5 of the total cache size) + +@item cache-clean-interval +Clean unused entries in the L2 and refcount caches. The interval is in seconds. +The default value is 0 and it disables this feature. + +@item pass-discard-request +Whether discard requests to the qcow2 device should be forwarded to the data +source (on/off; default: on if discard=unmap is specified, off otherwise) + +@item pass-discard-snapshot +Whether discard requests for the data source should be issued when a snapshot +operation (e.g. deleting a snapshot) frees clusters in the qcow2 file (on/off; +default: on) + +@item pass-discard-other +Whether discard requests for the data source should be issued on other +occasions where a cluster gets freed (on/off; default: off) + +@item overlap-check +Which overlap checks to perform for writes to the image +(none/constant/cached/all; default: cached). For details or finer +granularity control refer to the QAPI documentation of @code{blockdev-add}. +@end table + +Example 1: +@example +-blockdev driver=file,node-name=my_file,filename=/tmp/disk.qcow2 +-blockdev driver=qcow2,node-name=hda,file=my_file,overlap-check=none,cache-size=16777216 +@end example +Example 2: +@example +-blockdev driver=qcow2,node-name=disk,file.driver=http,file.filename=http://example.com/image.qcow2 +@end example + +@item Driver-specific options for other drivers +Please refer to the QAPI documentation of the @code{blockdev-add} QMP command. + +@end table + +ETEXI DEF("drive", HAS_ARG, QEMU_OPTION_drive, "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n" @@ -630,7 +790,12 @@ STEXI @item -drive @var{option}[,@var{option}[,@var{option}[,...]]] @findex -drive -Define a new drive. Valid options are: +Define a new drive. This includes creating a block driver node (the backend) as +well as a guest device, and is mostly a shortcut for defining the corresponding +@option{-blockdev} and @option{-device} options. + +@option{-drive} accepts all options that are accepted by @option{-blockdev}. In +addition, it knows the following options: @table @option @item file=@var{file} @@ -657,11 +822,31 @@ These options have the same definition as they have in @option{-hdachs}. @var{snapshot} is "on" or "off" and controls snapshot mode for the given drive (see @option{-snapshot}). @item cache=@var{cache} -@var{cache} is "none", "writeback", "unsafe", "directsync" or "writethrough" and controls how the host cache is used to access block data. +@var{cache} is "none", "writeback", "unsafe", "directsync" or "writethrough" +and controls how the host cache is used to access block data. This is a +shortcut that sets the @option{cache.direct} and @option{cache.no-flush} +options (as in @option{-blockdev}), and additionally @option{cache.writeback}, +which provides a default for the @option{write-cache} option of block guest +devices (as in @option{-device}). The modes correspond to the following +settings: + +@c Our texi2pod.pl script doesn't support @multitable, so fall back to using +@c plain ASCII art (well, UTF-8 art really). This looks okay both in the manpage +@c and the HTML output. +@example +@ │ cache.writeback cache.direct cache.no-flush +─────────────┼───────────────────────────────────────────────── +writeback │ on off off +none │ on on off +writethrough │ off off off +directsync │ off on off +unsafe │ on off on +@end example + +The default mode is @option{cache=writeback}. + @item aio=@var{aio} @var{aio} is "threads", or "native" and selects between pthread based disk I/O and native Linux AIO. -@item discard=@var{discard} -@var{discard} is one of "ignore" (or "off") or "unmap" (or "on") and controls whether @dfn{discard} (also known as @dfn{trim} or @dfn{unmap}) requests are ignored or passed to the filesystem. Some machine types may not support discard requests. @item format=@var{format} Specify which disk @var{format} will be used rather than detecting the format. Can be used to specify format=raw to avoid interpreting @@ -676,16 +861,9 @@ Specify which @var{action} to take on write and read errors. Valid actions are: "report" (report the error to the guest), "enospc" (pause QEMU only if the host disk is full; report the error to the guest otherwise). The default setting is @option{werror=enospc} and @option{rerror=report}. -@item readonly -Open drive @option{file} as read-only. Guest write attempts will fail. @item copy-on-read=@var{copy-on-read} @var{copy-on-read} is "on" or "off" and enables whether to copy read backing file sectors into the image file. -@item detect-zeroes=@var{detect-zeroes} -@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic -conversion of plain zero writes by the OS to driver specific optimized -zero write commands. You may even choose "unmap" if @var{discard} is set -to "unmap" to allow a zero write to be converted to an UNMAP operation. @item bps=@var{b},bps_rd=@var{r},bps_wr=@var{w} Specify bandwidth throttling limits in bytes per second, either for all request types or for reads or writes only. Small values can lead to timeouts or hangs @@ -712,34 +890,19 @@ prevent guests from circumventing throttling limits by using many small disks instead of a single larger disk. @end table -By default, the @option{cache=writeback} mode is used. It will report data +By default, the @option{cache.writeback=on} mode is used. It will report data writes as completed as soon as the data is present in the host page cache. This is safe as long as your guest OS makes sure to correctly flush disk caches where needed. If your guest OS does not handle volatile disk write caches correctly and your host crashes or loses power, then the guest may experience data corruption. -For such guests, you should consider using @option{cache=writethrough}. This +For such guests, you should consider using @option{cache.writeback=off}. This means that the host page cache will be used to read and write data, but write notification will be sent to the guest only after QEMU has made sure to flush each write to the disk. Be aware that this has a major impact on performance. -The host page cache can be avoided entirely with @option{cache=none}. This will -attempt to do disk IO directly to the guest's memory. QEMU may still perform -an internal copy of the data. Note that this is considered a writeback mode and -the guest OS must handle the disk write cache correctly in order to avoid data -corruption on host crashes. - -The host page cache can be avoided while only sending write notifications to -the guest when the data has been flushed to the disk using -@option{cache=directsync}. - -In case you don't care about data integrity over host failures, use -@option{cache=unsafe}. This option tells QEMU that it never needs to write any -data to the disk but can instead keep things in cache. If anything goes wrong, -like your host losing power, the disk storage getting disconnected accidentally, -etc. your image will most probably be rendered unusable. When using -the @option{-snapshot} option, unsafe caching is always used. +When using the @option{-snapshot} option, unsafe caching is always used. Copy-on-read avoids accessing the same backing file sectors repeatedly and is useful when the backing file is over a slow network. By default copy-on-read @@ -847,7 +1010,7 @@ ETEXI DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev, "-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n" - " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n" + " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n" " [[,throttling.bps-total=b]|[[,throttling.bps-read=r][,throttling.bps-write=w]]]\n" " [[,throttling.iops-total=i]|[[,throttling.iops-read=r][,throttling.iops-write=w]]]\n" " [[,throttling.bps-total-max=bm]|[[,throttling.bps-read-max=rm][,throttling.bps-write-max=wm]]]\n" @@ -857,7 +1020,7 @@ DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev, STEXI -@item -fsdev @var{fsdriver},id=@var{id},path=@var{path},[security_model=@var{security_model}][,writeout=@var{writeout}][,readonly][,socket=@var{socket}|sock_fd=@var{sock_fd}] +@item -fsdev @var{fsdriver},id=@var{id},path=@var{path},[security_model=@var{security_model}][,writeout=@var{writeout}][,readonly][,socket=@var{socket}|sock_fd=@var{sock_fd}][,fmode=@var{fmode}][,dmode=@var{dmode}] @findex -fsdev Define a new file system device. Valid options are: @table @option @@ -898,6 +1061,12 @@ with virtfs-proxy-helper Enables proxy filesystem driver to use passed socket descriptor for communicating with virtfs-proxy-helper. Usually a helper like libvirt will create socketpair and pass one of the fds as sock_fd +@item fmode=@var{fmode} +Specifies the default mode for newly created files on the host. Works only +with security models "mapped-xattr" and "mapped-file". +@item dmode=@var{dmode} +Specifies the default mode for newly created directories on the host. Works +only with security models "mapped-xattr" and "mapped-file". @end table -fsdev option is used along with -device driver "virtio-9p-pci". @@ -914,12 +1083,12 @@ ETEXI DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs, "-virtfs local,path=path,mount_tag=tag,security_model=[mapped-xattr|mapped-file|passthrough|none]\n" - " [,id=id][,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n", + " [,id=id][,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n", QEMU_ARCH_ALL) STEXI -@item -virtfs @var{fsdriver}[,path=@var{path}],mount_tag=@var{mount_tag}[,security_model=@var{security_model}][,writeout=@var{writeout}][,readonly][,socket=@var{socket}|sock_fd=@var{sock_fd}] +@item -virtfs @var{fsdriver}[,path=@var{path}],mount_tag=@var{mount_tag}[,security_model=@var{security_model}][,writeout=@var{writeout}][,readonly][,socket=@var{socket}|sock_fd=@var{sock_fd}][,fmode=@var{fmode}][,dmode=@var{dmode}] @findex -virtfs The general form of a Virtual File system pass-through options are: @@ -961,6 +1130,12 @@ will create socketpair and pass one of the fds as sock_fd @item sock_fd Enables proxy filesystem driver to use passed 'sock_fd' as the socket descriptor for interfacing with virtfs-proxy-helper +@item fmode=@var{fmode} +Specifies the default mode for newly created files on the host. Works only +with security models "mapped-xattr" and "mapped-file". +@item dmode=@var{dmode} +Specifies the default mode for newly created directories on the host. Works +only with security models "mapped-xattr" and "mapped-file". @end table ETEXI diff --git a/target/ppc/compat.c b/target/ppc/compat.c index e8ec1e19e7..f1b67faa97 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -24,9 +24,11 @@ #include "sysemu/cpus.h" #include "qemu/error-report.h" #include "qapi/error.h" +#include "qapi/visitor.h" #include "cpu-models.h" typedef struct { + const char *name; uint32_t pvr; uint64_t pcr; uint64_t pcr_level; @@ -38,6 +40,7 @@ static const CompatInfo compat_table[] = { * Ordered from oldest to newest - the code relies on this */ { /* POWER6, ISA2.05 */ + .name = "power6", .pvr = CPU_POWERPC_LOGICAL_2_05, .pcr = PCR_COMPAT_3_00 | PCR_COMPAT_2_07 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05 | PCR_TM_DIS | PCR_VSX_DIS, @@ -45,24 +48,28 @@ static const CompatInfo compat_table[] = { .max_threads = 2, }, { /* POWER7, ISA2.06 */ + .name = "power7", .pvr = CPU_POWERPC_LOGICAL_2_06, .pcr = PCR_COMPAT_3_00 | PCR_COMPAT_2_07 | PCR_COMPAT_2_06 | PCR_TM_DIS, .pcr_level = PCR_COMPAT_2_06, .max_threads = 4, }, { + .name = "power7+", .pvr = CPU_POWERPC_LOGICAL_2_06_PLUS, .pcr = PCR_COMPAT_3_00 | PCR_COMPAT_2_07 | PCR_COMPAT_2_06 | PCR_TM_DIS, .pcr_level = PCR_COMPAT_2_06, .max_threads = 4, }, { /* POWER8, ISA2.07 */ + .name = "power8", .pvr = CPU_POWERPC_LOGICAL_2_07, .pcr = PCR_COMPAT_3_00 | PCR_COMPAT_2_07, .pcr_level = PCR_COMPAT_2_07, .max_threads = 8, }, { /* POWER9, ISA3.00 */ + .name = "power9", .pvr = CPU_POWERPC_LOGICAL_3_00, .pcr = PCR_COMPAT_3_00, .pcr_level = PCR_COMPAT_3_00, @@ -189,3 +196,98 @@ int ppc_compat_max_threads(PowerPCCPU *cpu) return n_threads; } + +static void ppc_compat_prop_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint32_t compat_pvr = *((uint32_t *)opaque); + const char *value; + + if (!compat_pvr) { + value = ""; + } else { + const CompatInfo *compat = compat_by_pvr(compat_pvr); + + g_assert(compat); + + value = compat->name; + } + + visit_type_str(v, name, (char **)&value, errp); +} + +static void ppc_compat_prop_set(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + Error *local_err = NULL; + char *value; + uint32_t compat_pvr; + + visit_type_str(v, name, &value, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (strcmp(value, "") == 0) { + compat_pvr = 0; + } else { + int i; + const CompatInfo *compat = NULL; + + for (i = 0; i < ARRAY_SIZE(compat_table); i++) { + if (strcmp(value, compat_table[i].name) == 0) { + compat = &compat_table[i]; + break; + + } + } + + if (!compat) { + error_setg(errp, "Invalid compatibility mode \"%s\"", value); + goto out; + } + compat_pvr = compat->pvr; + } + + *((uint32_t *)opaque) = compat_pvr; + +out: + g_free(value); +} + +void ppc_compat_add_property(Object *obj, const char *name, + uint32_t *compat_pvr, const char *basedesc, + Error **errp) +{ + Error *local_err = NULL; + gchar *namesv[ARRAY_SIZE(compat_table) + 1]; + gchar *names, *desc; + int i; + + object_property_add(obj, name, "string", + ppc_compat_prop_get, ppc_compat_prop_set, NULL, + compat_pvr, &local_err); + if (local_err) { + goto out; + } + + for (i = 0; i < ARRAY_SIZE(compat_table); i++) { + /* + * Have to discard const here, because g_strjoinv() takes + * (gchar **), not (const gchar **) :( + */ + namesv[i] = (gchar *)compat_table[i].name; + } + namesv[ARRAY_SIZE(compat_table)] = NULL; + + names = g_strjoinv(", ", namesv); + desc = g_strdup_printf("%s. Valid values are %s.", basedesc, names); + object_property_set_description(obj, name, desc, &local_err); + + g_free(names); + g_free(desc); + +out: + error_propagate(errp, local_err); +} diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index d10808d9f4..6ee2a26a96 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1189,7 +1189,6 @@ typedef struct PPCVirtualHypervisorClass PPCVirtualHypervisorClass; * PowerPCCPU: * @env: #CPUPPCState * @cpu_dt_id: CPU index used in the device tree. KVM uses this index too - * @max_compat: Maximal supported logical PVR from the command line * @compat_pvr: Current logical PVR, zero if in "raw" mode * * A PowerPC CPU. @@ -1201,7 +1200,6 @@ struct PowerPCCPU { CPUPPCState env; int cpu_dt_id; - uint32_t max_compat; uint32_t compat_pvr; PPCVirtualHypervisor *vhyp; Object *intc; @@ -1213,6 +1211,7 @@ struct PowerPCCPU { uint64_t mig_insns_flags; uint64_t mig_insns_flags2; uint32_t mig_nb_BATs; + bool pre_2_10_migration; }; static inline PowerPCCPU *ppc_env_get_cpu(CPUPPCState *env) @@ -1375,6 +1374,9 @@ void ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp); void ppc_set_compat_all(uint32_t compat_pvr, Error **errp); #endif int ppc_compat_max_threads(PowerPCCPU *cpu); +void ppc_compat_add_property(Object *obj, const char *name, + uint32_t *compat_pvr, const char *basedesc, + Error **errp); #endif /* defined(TARGET_PPC64) */ #include "exec/cpu-all.h" diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 9cb2123187..3a9f0861e7 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -17,6 +17,7 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" +#include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" #include "exec/exec-all.h" @@ -1132,6 +1133,7 @@ void helper_msgsnd(target_ulong rb) return; } + qemu_mutex_lock_iothread(); CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *cenv = &cpu->env; @@ -1141,5 +1143,6 @@ void helper_msgsnd(target_ulong rb) cpu_interrupt(cs, CPU_INTERRUPT_HARD); } } + qemu_mutex_unlock_iothread(); } #endif diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 6cb3a48db1..f578156dd4 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -8,6 +8,7 @@ #include "helper_regs.h" #include "mmu-hash64.h" #include "migration/cpu.h" +#include "qapi/error.h" static int cpu_load_old(QEMUFile *f, void *opaque, int version_id) { @@ -195,6 +196,27 @@ static void cpu_pre_save(void *opaque) } } +/* + * Determine if a given PVR is a "close enough" match to the CPU + * object. For TCG and KVM PR it would probably be sufficient to + * require an exact PVR match. However for KVM HV the user is + * restricted to a PVR exactly matching the host CPU. The correct way + * to handle this is to put the guest into an architected + * compatibility mode. However, to allow a more forgiving transition + * and migration from before this was widely done, we allow migration + * between sufficiently similar PVRs, as determined by the CPU class's + * pvr_match() hook. + */ +static bool pvr_match(PowerPCCPU *cpu, uint32_t pvr) +{ + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + + if (pvr == pcc->pvr) { + return true; + } + return pcc->pvr_match(pcc, pvr); +} + static int cpu_post_load(void *opaque, int version_id) { PowerPCCPU *cpu = opaque; @@ -203,10 +225,31 @@ static int cpu_post_load(void *opaque, int version_id) target_ulong msr; /* - * We always ignore the source PVR. The user or management - * software has to take care of running QEMU in a compatible mode. + * If we're operating in compat mode, we should be ok as long as + * the destination supports the same compatiblity mode. + * + * Otherwise, however, we require that the destination has exactly + * the same CPU model as the source. */ - env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value; + +#if defined(TARGET_PPC64) + if (cpu->compat_pvr) { + Error *local_err = NULL; + + ppc_set_compat(cpu, cpu->compat_pvr, &local_err); + if (local_err) { + error_report_err(local_err); + error_free(local_err); + return -1; + } + } else +#endif + { + if (!pvr_match(cpu, env->spr[SPR_PVR])) { + return -1; + } + } + env->lr = env->spr[SPR_LR]; env->ctr = env->spr[SPR_CTR]; cpu_write_xer(env, env->spr[SPR_XER]); @@ -419,7 +462,7 @@ static const VMStateDescription vmstate_slb = { .needed = slb_needed, .post_load = slb_post_load, .fields = (VMStateField[]) { - VMSTATE_INT32_EQUAL(env.slb_nr, PowerPCCPU), + VMSTATE_INT32_EQUAL(env.slb_nr, PowerPCCPU, NULL), VMSTATE_SLB_ARRAY(env.slb, PowerPCCPU, MAX_SLB_ENTRIES), VMSTATE_END_OF_LIST() } @@ -452,7 +495,7 @@ static const VMStateDescription vmstate_tlb6xx = { .minimum_version_id = 1, .needed = tlb6xx_needed, .fields = (VMStateField[]) { - VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU), + VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU, NULL), VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlb6, PowerPCCPU, env.nb_tlb, vmstate_tlb6xx_entry, @@ -510,7 +553,7 @@ static const VMStateDescription vmstate_tlbemb = { .minimum_version_id = 1, .needed = tlbemb_needed, .fields = (VMStateField[]) { - VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU), + VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU, NULL), VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlbe, PowerPCCPU, env.nb_tlb, vmstate_tlbemb_entry, @@ -551,7 +594,7 @@ static const VMStateDescription vmstate_tlbmas = { .minimum_version_id = 1, .needed = tlbmas_needed, .fields = (VMStateField[]) { - VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU), + VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU, NULL), VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlbm, PowerPCCPU, env.nb_tlb, vmstate_tlbmas_entry, @@ -560,6 +603,25 @@ static const VMStateDescription vmstate_tlbmas = { } }; +static bool compat_needed(void *opaque) +{ + PowerPCCPU *cpu = opaque; + + assert(!(cpu->compat_pvr && !cpu->vhyp)); + return !cpu->pre_2_10_migration && cpu->compat_pvr != 0; +} + +static const VMStateDescription vmstate_compat = { + .name = "cpu/compat", + .version_id = 1, + .minimum_version_id = 1, + .needed = compat_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(compat_pvr, PowerPCCPU), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ppc_cpu = { .name = "cpu", .version_id = 5, @@ -613,6 +675,7 @@ const VMStateDescription vmstate_ppc_cpu = { &vmstate_tlb6xx, &vmstate_tlbemb, &vmstate_tlbmas, + &vmstate_compat, NULL } }; diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index de18c0b69e..69fde65276 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -255,5 +255,5 @@ int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK, prot, mmu_idx, 1UL << page_size); - return 1; + return 0; } diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c index 56a0ab22cf..783bf98217 100644 --- a/target/ppc/translate_init.c +++ b/target/ppc/translate_init.c @@ -33,6 +33,7 @@ #include "hw/qdev-properties.h" #include "hw/ppc/ppc.h" #include "mmu-book3s-v3.h" +#include "sysemu/qtest.h" //#define PPC_DUMP_CPU //#define PPC_DEBUG_SPR @@ -8413,73 +8414,38 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data) pcc->l1_icache_size = 0x10000; } -static void powerpc_get_compat(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - char *value = (char *)""; - Property *prop = opaque; - uint32_t *max_compat = qdev_get_prop_ptr(DEVICE(obj), prop); - - switch (*max_compat) { - case CPU_POWERPC_LOGICAL_2_05: - value = (char *)"power6"; - break; - case CPU_POWERPC_LOGICAL_2_06: - value = (char *)"power7"; - break; - case CPU_POWERPC_LOGICAL_2_07: - value = (char *)"power8"; - break; - case 0: - break; - default: - error_report("Internal error: compat is set to %x", *max_compat); - abort(); - break; - } - - visit_type_str(v, name, &value, errp); -} - -static void powerpc_set_compat(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) +/* + * The CPU used to have a "compat" property which set the + * compatibility mode PVR. However, this was conceptually broken - it + * only makes sense on the pseries machine type (otherwise the guest + * owns the PCR and can control the compatibility mode itself). It's + * been replaced with the 'max-cpu-compat' property on the pseries + * machine type. For backwards compatibility, pseries specially + * parses the -cpu parameter and converts old compat= parameters into + * the appropriate machine parameters. This stub implementation of + * the parameter catches any uses on explicitly created CPUs. + */ +static void getset_compat_deprecated(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) { - Error *error = NULL; - char *value = NULL; - Property *prop = opaque; - uint32_t *max_compat = qdev_get_prop_ptr(DEVICE(obj), prop); - - visit_type_str(v, name, &value, &error); - if (error) { - error_propagate(errp, error); - return; + if (!qtest_enabled()) { + error_report("CPU 'compat' property is deprecated and has no effect; " + "use max-cpu-compat machine property instead"); } - - if (strcmp(value, "power6") == 0) { - *max_compat = CPU_POWERPC_LOGICAL_2_05; - } else if (strcmp(value, "power7") == 0) { - *max_compat = CPU_POWERPC_LOGICAL_2_06; - } else if (strcmp(value, "power8") == 0) { - *max_compat = CPU_POWERPC_LOGICAL_2_07; - } else { - error_setg(errp, "Invalid compatibility mode \"%s\"", value); - } - - g_free(value); + visit_type_null(v, name, NULL); } -static PropertyInfo powerpc_compat_propinfo = { +static PropertyInfo ppc_compat_deprecated_propinfo = { .name = "str", - .description = "compatibility mode, power6/power7/power8", - .get = powerpc_get_compat, - .set = powerpc_set_compat, + .description = "compatibility mode (deprecated)", + .get = getset_compat_deprecated, + .set = getset_compat_deprecated, }; - -#define DEFINE_PROP_POWERPC_COMPAT(_n, _s, _f) \ - DEFINE_PROP(_n, _s, _f, powerpc_compat_propinfo, uint32_t) - static Property powerpc_servercpu_properties[] = { - DEFINE_PROP_POWERPC_COMPAT("compat", PowerPCCPU, max_compat), + { + .name = "compat", + .info = &ppc_compat_deprecated_propinfo, + }, DEFINE_PROP_END_OF_LIST(), }; @@ -9859,14 +9825,14 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) error_append_hint(errp, "Adjust the number of cpus to %d " "or try to raise the number of threads per core\n", cpu->cpu_dt_id * smp_threads / max_smt); - return; + goto unrealize; } #endif if (tcg_enabled()) { if (ppc_fixup_cpu(cpu) != 0) { error_setg(errp, "Unable to emulate selected CPU with TCG"); - return; + goto unrealize; } } @@ -9875,14 +9841,14 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) error_setg(errp, "CPU does not possess a BookE or 4xx MMU. " "Please use qemu-system-ppc or qemu-system-ppc64 instead " "or choose another CPU model."); - return; + goto unrealize; } #endif create_ppc_opcodes(cpu, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); - return; + goto unrealize; } init_ppc_proc(cpu); @@ -10067,6 +10033,10 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) fflush(stdout); } #endif + return; + +unrealize: + cpu_exec_unrealizefn(cs); } static void ppc_cpu_unrealizefn(DeviceState *dev, Error **errp) @@ -10640,6 +10610,8 @@ static gchar *ppc_gdb_arch_name(CPUState *cs) static Property ppc_cpu_properties[] = { DEFINE_PROP_BOOL("pre-2.8-migration", PowerPCCPU, pre_2_8_migration, false), + DEFINE_PROP_BOOL("pre-2.10-migration", PowerPCCPU, pre_2_10_migration, + false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index a4028fb315..9faca04b52 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -304,6 +304,7 @@ void s390x_cpu_debug_excp_handler(CPUState *cs); #undef PSW_MASK_WAIT #undef PSW_MASK_PSTATE #undef PSW_MASK_ASC +#undef PSW_SHIFT_ASC #undef PSW_MASK_CC #undef PSW_MASK_PM #undef PSW_MASK_64 @@ -315,11 +316,12 @@ void s390x_cpu_debug_excp_handler(CPUState *cs); #define PSW_MASK_IO 0x0200000000000000ULL #define PSW_MASK_EXT 0x0100000000000000ULL #define PSW_MASK_KEY 0x00F0000000000000ULL -#define PSW_SHIFT_KEY 56 +#define PSW_SHIFT_KEY 52 #define PSW_MASK_MCHECK 0x0004000000000000ULL #define PSW_MASK_WAIT 0x0002000000000000ULL #define PSW_MASK_PSTATE 0x0001000000000000ULL #define PSW_MASK_ASC 0x0000C00000000000ULL +#define PSW_SHIFT_ASC 46 #define PSW_MASK_CC 0x0000300000000000ULL #define PSW_MASK_PM 0x00000F0000000000ULL #define PSW_MASK_64 0x0000000100000000ULL @@ -336,24 +338,26 @@ void s390x_cpu_debug_excp_handler(CPUState *cs); #define PSW_ASC_SECONDARY 0x0000800000000000ULL #define PSW_ASC_HOME 0x0000C00000000000ULL +/* the address space values shifted */ +#define AS_PRIMARY 0 +#define AS_ACCREG 1 +#define AS_SECONDARY 2 +#define AS_HOME 3 + /* tb flags */ -#define FLAG_MASK_PER (PSW_MASK_PER >> 32) -#define FLAG_MASK_DAT (PSW_MASK_DAT >> 32) -#define FLAG_MASK_IO (PSW_MASK_IO >> 32) -#define FLAG_MASK_EXT (PSW_MASK_EXT >> 32) -#define FLAG_MASK_KEY (PSW_MASK_KEY >> 32) -#define FLAG_MASK_MCHECK (PSW_MASK_MCHECK >> 32) -#define FLAG_MASK_WAIT (PSW_MASK_WAIT >> 32) -#define FLAG_MASK_PSTATE (PSW_MASK_PSTATE >> 32) -#define FLAG_MASK_ASC (PSW_MASK_ASC >> 32) -#define FLAG_MASK_CC (PSW_MASK_CC >> 32) -#define FLAG_MASK_PM (PSW_MASK_PM >> 32) -#define FLAG_MASK_64 (PSW_MASK_64 >> 32) -#define FLAG_MASK_32 0x00001000 +#define FLAG_MASK_PSW_SHIFT 31 +#define FLAG_MASK_PER (PSW_MASK_PER >> FLAG_MASK_PSW_SHIFT) +#define FLAG_MASK_PSTATE (PSW_MASK_PSTATE >> FLAG_MASK_PSW_SHIFT) +#define FLAG_MASK_ASC (PSW_MASK_ASC >> FLAG_MASK_PSW_SHIFT) +#define FLAG_MASK_64 (PSW_MASK_64 >> FLAG_MASK_PSW_SHIFT) +#define FLAG_MASK_32 (PSW_MASK_32 >> FLAG_MASK_PSW_SHIFT) +#define FLAG_MASK_PSW (FLAG_MASK_PER | FLAG_MASK_PSTATE \ + | FLAG_MASK_ASC | FLAG_MASK_64 | FLAG_MASK_32) /* Control register 0 bits */ #define CR0_LOWPROT 0x0000000010000000ULL +#define CR0_SECONDARY 0x0000000004000000ULL #define CR0_EDAT 0x0000000000800000ULL /* MMU */ @@ -361,7 +365,18 @@ void s390x_cpu_debug_excp_handler(CPUState *cs); #define MMU_SECONDARY_IDX 1 #define MMU_HOME_IDX 2 -static inline int cpu_mmu_index (CPUS390XState *env, bool ifetch) +static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key) +{ + uint16_t pkm = env->cregs[3] >> 16; + + if (env->psw.mask & PSW_MASK_PSTATE) { + /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */ + return pkm & (0x80 >> psw_key); + } + return true; +} + +static inline int cpu_mmu_index(CPUS390XState *env, bool ifetch) { switch (env->psw.mask & PSW_MASK_ASC) { case PSW_ASC_PRIMARY: @@ -396,8 +411,7 @@ static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc, { *pc = env->psw.addr; *cs_base = env->ex_value; - *flags = ((env->psw.mask >> 32) & ~FLAG_MASK_CC) | - ((env->psw.mask & PSW_MASK_32) ? FLAG_MASK_32 : 0); + *flags = (env->psw.mask >> FLAG_MASK_PSW_SHIFT) & FLAG_MASK_PSW; } #define MAX_ILEN 6 diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 478bcc604e..63903c2d6f 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -675,6 +675,7 @@ static void check_compatibility(const S390CPUModel *max_model, static void add_qemu_cpu_model_features(S390FeatBitmap fbm) { static const int feats[] = { + S390_FEAT_DAT_ENH, S390_FEAT_STFLE, S390_FEAT_EXTENDED_IMMEDIATE, S390_FEAT_EXTENDED_TRANSLATION_2, @@ -682,9 +683,14 @@ static void add_qemu_cpu_model_features(S390FeatBitmap fbm) S390_FEAT_LONG_DISPLACEMENT_FAST, S390_FEAT_ETF2_ENH, S390_FEAT_STORE_CLOCK_FAST, + S390_FEAT_MOVE_WITH_OPTIONAL_SPEC, S390_FEAT_GENERAL_INSTRUCTIONS_EXT, S390_FEAT_EXECUTE_EXT, + S390_FEAT_FLOATING_POINT_SUPPPORT_ENH, S390_FEAT_STFLE_45, + S390_FEAT_STFLE_49, + S390_FEAT_LOCAL_TLB_CLEARING, + S390_FEAT_STFLE_53, }; int i; diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 69249a5249..964097b2ce 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -105,6 +105,7 @@ DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env) DEF_HELPER_2(stfle, i32, env, i64) DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_4(stpq, TCG_CALL_NO_WG, void, env, i64, i64, i64) +DEF_HELPER_4(mvcos, i32, env, i64, i64, i64) #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) @@ -130,6 +131,7 @@ DEF_HELPER_4(mvcs, i32, env, i64, i64, i64) DEF_HELPER_4(mvcp, i32, env, i64, i64, i64) DEF_HELPER_4(sigp, i32, env, i64, i32, i64) DEF_HELPER_FLAGS_2(sacf, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_4(idte, TCG_CALL_NO_RWG, void, env, i64, i64, i32) DEF_HELPER_FLAGS_4(ipte, TCG_CALL_NO_RWG, void, env, i64, i64, i32) DEF_HELPER_FLAGS_1(ptlb, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_1(purge, TCG_CALL_NO_RWG, void, env) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index d089707073..d3bb8516ed 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -134,6 +134,15 @@ D(0x8500, BRXLE, RSI, Z, 0, 0, 0, 0, bx32, 0, 1) D(0xec44, BRXHG, RIE_e, Z, 0, 0, 0, 0, bx64, 0, 0) D(0xec45, BRXHLE, RIE_e, Z, 0, 0, 0, 0, bx64, 0, 1) +/* BRANCH PREDICTION PRELOAD */ + /* ??? Format is SMI, but implemented as NOP, so we need no fields. */ + C(0xc700, BPP, E, EH, 0, 0, 0, 0, 0, 0) +/* BRANCH PREDICTION RELATIVE PRELOAD */ + /* ??? Format is MII, but implemented as NOP, so we need no fields. */ + C(0xc500, BPRP, E, EH, 0, 0, 0, 0, 0, 0) +/* NEXT INSTRUCTION ACCESS INTENT */ + /* ??? Format is IE, but implemented as NOP, so we need no fields. */ + C(0xb2fa, NIAI, E, EH, 0, 0, 0, 0, 0, 0) /* CHECKSUM */ C(0xb241, CKSM, RRE, Z, r1_o, ra2, new, r1_32, cksm, 0) @@ -427,6 +436,11 @@ /* LOAD AND TRAP */ C(0xe39f, LAT, RXY_a, LAT, 0, m2_32u, r1, 0, lat, 0) C(0xe385, LGAT, RXY_a, LAT, 0, a2, r1, 0, lgat, 0) +/* LOAD AND ZERO RIGHTMOST BYTE */ + C(0xe3eb, LZRF, RXY_a, LZRB, 0, m2_32u, new, r1_32, lzrb, 0) + C(0xe32a, LZRG, RXY_a, LZRB, 0, m2_64, r1, 0, lzrb, 0) +/* LOAD LOGICAL AND ZERO RIGHTMOST BYTE */ + C(0xe33a, LLZRGF, RXY_a, LZRB, 0, m2_32u, r1, 0, lzrb, 0) /* LOAD BYTE */ C(0xb926, LBR, RRE, EI, 0, r2_8s, 0, r1_32, mov2, 0) C(0xb906, LGBR, RRE, EI, 0, r2_8s, 0, r1, mov2, 0) @@ -514,6 +528,13 @@ C(0xb9e2, LOCGR, RRF_c, LOC, r1, r2, r1, 0, loc, 0) C(0xebf2, LOC, RSY_b, LOC, r1, m2_32u, new, r1_32, loc, 0) C(0xebe2, LOCG, RSY_b, LOC, r1, m2_64, r1, 0, loc, 0) +/* LOAD HALFWORD IMMEDIATE ON CONDITION */ + C(0xec42, LOCHI, RIE_g, LOC2, r1, i2, new, r1_32, loc, 0) + C(0xec46, LOCGHI, RIE_g, LOC2, r1, i2, r1, 0, loc, 0) + C(0xec4e, LOCHHI, RIE_g, LOC2, r1_sr32, i2, new, r1_32h, loc, 0) +/* LOAD HIGH ON CONDITION */ + C(0xb9e0, LOCFHR, RRF_c, LOC2, r1_sr32, r2, new, r1_32h, loc, 0) + C(0xebe0, LOCFH, RSY_b, LOC2, r1_sr32, m2_32u, new, r1_32h, loc, 0) /* LOAD PAIR DISJOINT */ D(0xc804, LPD, SSF, ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL) D(0xc805, LPDG, SSF, ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEQ) @@ -590,6 +611,8 @@ C(0xb254, MVPG, RRE, Z, r1_o, r2_o, 0, 0, mvpg, 0) /* MOVE STRING */ C(0xb255, MVST, RRE, Z, r1_o, r2_o, 0, 0, mvst, 0) +/* MOVE WITH OPTIONAL SPECIFICATION */ + C(0xc800, MVCOS, SSF, MVCOS, la1, a2, 0, 0, mvcos, 0) /* MOVE WITH OFFSET */ /* Really format SS_b, but we pack both lengths into one argument for the helper call, so we might as well leave one 8-bit field. */ @@ -676,6 +699,9 @@ /* Implemented as nops of course. */ C(0xe336, PFD, RXY_b, GIE, 0, 0, 0, 0, 0, 0) C(0xc602, PFDRL, RIL_c, GIE, 0, 0, 0, 0, 0, 0) +/* PERFORM PROCESSOR ASSIST */ + /* Implemented as nop of course. */ + C(0xb2e8, PPA, RRF_c, PPA, 0, 0, 0, 0, 0, 0) /* POPULATION COUNT */ C(0xb9e1, POPCNT, RRE, PC, 0, r2_o, r1, 0, popcnt, nz64) @@ -777,6 +803,8 @@ /* STORE ON CONDITION */ D(0xebf3, STOC, RSY_b, LOC, 0, 0, 0, 0, soc, 0, 0) D(0xebe3, STOCG, RSY_b, LOC, 0, 0, 0, 0, soc, 0, 1) +/* STORE HIGH ON CONDITION */ + D(0xebe1, STOCFH, RSY_b, LOC2, 0, 0, 0, 0, soc, 0, 2) /* STORE REVERSED */ C(0xe33f, STRVH, RXY_a, Z, la2, r1_16u, new, m1_16, rev16, 0) C(0xe33e, STRV, RXY_a, Z, la2, r1_32u, new, m1_32, rev32, 0) @@ -900,6 +928,8 @@ C(0x8300, DIAG, RSI, Z, 0, 0, 0, 0, diag, 0) /* INSERT STORAGE KEY EXTENDED */ C(0xb229, ISKE, RRE, Z, 0, r2_o, new, r1_8, iske, 0) +/* INVALIDATE DAT TABLE ENTRY */ + C(0xb98e, IPDE, RRF_b, Z, r1_o, r2_o, 0, 0, idte, 0) /* INVALIDATE PAGE TABLE ENTRY */ C(0xb221, IPTE, RRF_a, Z, r1_o, r2_o, 0, 0, ipte, 0) /* LOAD CONTROL */ diff --git a/target/s390x/insn-format.def b/target/s390x/insn-format.def index 0e898b90bd..a412d90fb7 100644 --- a/target/s390x/insn-format.def +++ b/target/s390x/insn-format.def @@ -11,6 +11,7 @@ F4(RIE_c, R(1, 8), I(2,32, 8), M(3,12), I(4,16,16)) F3(RIE_d, R(1, 8), I(2,16,16), R(3,12)) F3(RIE_e, R(1, 8), I(2,16,16), R(3,12)) F5(RIE_f, R(1, 8), R(2,12), I(3,16,8), I(4,24,8), I(5,32,8)) +F3(RIE_g, R(1, 8), I(2,16,16), M(3,12)) F2(RIL_a, R(1, 8), I(2,16,32)) F2(RIL_b, R(1, 8), I(2,16,32)) F2(RIL_c, M(1, 8), I(2,16,32)) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 80caab9c9d..ede84711d1 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -110,6 +110,20 @@ static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr, } } +static inline uint64_t wrap_address(CPUS390XState *env, uint64_t a) +{ + if (!(env->psw.mask & PSW_MASK_64)) { + if (!(env->psw.mask & PSW_MASK_32)) { + /* 24-Bit mode */ + a &= 0x00ffffff; + } else { + /* 31-Bit mode */ + a &= 0x7fffffff; + } + } + return a; +} + static void fast_memset(CPUS390XState *env, uint64_t dest, uint8_t byte, uint32_t l, uintptr_t ra) { @@ -133,6 +147,68 @@ static void fast_memset(CPUS390XState *env, uint64_t dest, uint8_t byte, } } +#ifndef CONFIG_USER_ONLY +static void fast_memmove_idx(CPUS390XState *env, uint64_t dest, uint64_t src, + uint32_t len, int dest_idx, int src_idx, + uintptr_t ra) +{ + TCGMemOpIdx oi_dest = make_memop_idx(MO_UB, dest_idx); + TCGMemOpIdx oi_src = make_memop_idx(MO_UB, src_idx); + uint32_t len_adj; + void *src_p; + void *dest_p; + uint8_t x; + + while (len > 0) { + src = wrap_address(env, src); + dest = wrap_address(env, dest); + src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, src_idx); + dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, dest_idx); + + if (src_p && dest_p) { + /* Access to both whole pages granted. */ + len_adj = adj_len_to_page(adj_len_to_page(len, src), dest); + memmove(dest_p, src_p, len_adj); + } else { + /* We failed to get access to one or both whole pages. The next + read or write access will likely fill the QEMU TLB for the + next iteration. */ + len_adj = 1; + x = helper_ret_ldub_mmu(env, src, oi_src, ra); + helper_ret_stb_mmu(env, dest, x, oi_dest, ra); + } + src += len_adj; + dest += len_adj; + len -= len_adj; + } +} + +static int mmu_idx_from_as(uint8_t as) +{ + switch (as) { + case AS_PRIMARY: + return MMU_PRIMARY_IDX; + case AS_SECONDARY: + return MMU_SECONDARY_IDX; + case AS_HOME: + return MMU_HOME_IDX; + default: + /* FIXME AS_ACCREG */ + g_assert_not_reached(); + } +} + +static void fast_memmove_as(CPUS390XState *env, uint64_t dest, uint64_t src, + uint32_t len, uint8_t dest_as, uint8_t src_as, + uintptr_t ra) +{ + int src_idx = mmu_idx_from_as(src_as); + int dest_idx = mmu_idx_from_as(dest_as); + + fast_memmove_idx(env, dest, src, len, dest_idx, src_idx, ra); +} +#endif + static void fast_memmove(CPUS390XState *env, uint64_t dest, uint64_t src, uint32_t l, uintptr_t ra) { @@ -408,20 +484,6 @@ uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask, return cc; } -static inline uint64_t wrap_address(CPUS390XState *env, uint64_t a) -{ - if (!(env->psw.mask & PSW_MASK_64)) { - if (!(env->psw.mask & PSW_MASK_32)) { - /* 24-Bit mode */ - a &= 0x00ffffff; - } else { - /* 31-Bit mode */ - a &= 0x7fffffff; - } - } - return a; -} - static inline uint64_t get_address(CPUS390XState *env, int reg) { return wrap_address(env, env->regs[reg]); @@ -1203,13 +1265,22 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2, uintptr_t ra = GETPC(); int dsize = (sizes & 1) ? 1 : 2; int ssize = (sizes & 2) ? 1 : 2; - uint64_t tbl = get_address(env, 1) & ~7; + uint64_t tbl = get_address(env, 1); uint64_t dst = get_address(env, r1); uint64_t len = get_length(env, r1 + 1); uint64_t src = get_address(env, r2); uint32_t cc = 3; int i; + /* The lower address bits of TBL are ignored. For TROO, TROT, it's + the low 3 bits (double-word aligned). For TRTO, TRTT, it's either + the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH). */ + if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) { + tbl &= -4096; + } else { + tbl &= -8; + } + check_alignment(env, len, ssize, ra); /* Lest we fail to service interrupts in a timely manner, */ @@ -1539,6 +1610,57 @@ uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) return cc; } +void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4) +{ + CPUState *cs = CPU(s390_env_get_cpu(env)); + const uintptr_t ra = GETPC(); + uint64_t table, entry, raddr; + uint16_t entries, i, index = 0; + + if (r2 & 0xff000) { + cpu_restore_state(cs, ra); + program_interrupt(env, PGM_SPECIFICATION, 4); + } + + if (!(r2 & 0x800)) { + /* invalidation-and-clearing operation */ + table = r1 & _ASCE_ORIGIN; + entries = (r2 & 0x7ff) + 1; + + switch (r1 & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_REGION1: + index = (r2 >> 53) & 0x7ff; + break; + case _ASCE_TYPE_REGION2: + index = (r2 >> 42) & 0x7ff; + break; + case _ASCE_TYPE_REGION3: + index = (r2 >> 31) & 0x7ff; + break; + case _ASCE_TYPE_SEGMENT: + index = (r2 >> 20) & 0x7ff; + break; + } + for (i = 0; i < entries; i++) { + /* addresses are not wrapped in 24/31bit mode but table index is */ + raddr = table + ((index + i) & 0x7ff) * sizeof(entry); + entry = ldq_phys(cs->as, raddr); + if (!(entry & _REGION_ENTRY_INV)) { + /* we are allowed to not store if already invalid */ + entry |= _REGION_ENTRY_INV; + stq_phys(cs->as, raddr, entry); + } + } + } + + /* We simply flush the complete tlb, therefore we can ignore r3. */ + if (m4 & 1) { + tlb_flush(cs); + } else { + tlb_flush_all_cpus_synced(cs); + } +} + /* invalidate pte */ void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr, uint32_t m4) @@ -1558,19 +1680,24 @@ void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr, /* XXX we exploit the fact that Linux passes the exact virtual address here - it's not obliged to! */ - /* XXX: the LC bit should be considered as 0 if the local-TLB-clearing - facility is not installed. */ if (m4 & 1) { - tlb_flush_page(cs, page); - } else { - tlb_flush_page_all_cpus_synced(cs, page); - } - - /* XXX 31-bit hack */ - if (m4 & 1) { - tlb_flush_page(cs, page ^ 0x80000000); + if (vaddr & ~VADDR_PX) { + tlb_flush_page(cs, page); + /* XXX 31-bit hack */ + tlb_flush_page(cs, page ^ 0x80000000); + } else { + /* looks like we don't have a valid virtual address */ + tlb_flush(cs); + } } else { - tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000); + if (vaddr & ~VADDR_PX) { + tlb_flush_page_all_cpus_synced(cs, page); + /* XXX 31-bit hack */ + tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000); + } else { + /* looks like we don't have a valid virtual address */ + tlb_flush_all_cpus_synced(cs); + } } } @@ -1789,3 +1916,94 @@ void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr) that requires such execution. */ env->ex_value = insn | ilen; } + +uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src, + uint64_t len) +{ + const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY; + const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC; + const uint64_t r0 = env->regs[0]; + const uintptr_t ra = GETPC(); + CPUState *cs = CPU(s390_env_get_cpu(env)); + uint8_t dest_key, dest_as, dest_k, dest_a; + uint8_t src_key, src_as, src_k, src_a; + uint64_t val; + int cc = 0; + + HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n", + __func__, dest, src, len); + + if (!(env->psw.mask & PSW_MASK_DAT)) { + cpu_restore_state(cs, ra); + program_interrupt(env, PGM_SPECIAL_OP, 6); + } + + /* OAC (operand access control) for the first operand -> dest */ + val = (r0 & 0xffff0000ULL) >> 16; + dest_key = (val >> 12) & 0xf; + dest_as = (val >> 6) & 0x3; + dest_k = (val >> 1) & 0x1; + dest_a = val & 0x1; + + /* OAC (operand access control) for the second operand -> src */ + val = (r0 & 0x0000ffffULL); + src_key = (val >> 12) & 0xf; + src_as = (val >> 6) & 0x3; + src_k = (val >> 1) & 0x1; + src_a = val & 0x1; + + if (!dest_k) { + dest_key = psw_key; + } + if (!src_k) { + src_key = psw_key; + } + if (!dest_a) { + dest_as = psw_as; + } + if (!src_a) { + src_as = psw_as; + } + + if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) { + cpu_restore_state(cs, ra); + program_interrupt(env, PGM_SPECIAL_OP, 6); + } + if (!(env->cregs[0] & CR0_SECONDARY) && + (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) { + cpu_restore_state(cs, ra); + program_interrupt(env, PGM_SPECIAL_OP, 6); + } + if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) { + cpu_restore_state(cs, ra); + program_interrupt(env, PGM_PRIVILEGED, 6); + } + + len = wrap_length(env, len); + if (len > 4096) { + cc = 3; + len = 4096; + } + + /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */ + if (src_as == AS_ACCREG || dest_as == AS_ACCREG || + (env->psw.mask & PSW_MASK_PSTATE)) { + qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n", + __func__); + cpu_restore_state(cs, ra); + program_interrupt(env, PGM_ADDRESSING, 6); + } + + /* FIXME: a) LAP + * b) Access using correct keys + * c) AR-mode + */ +#ifdef CONFIG_USER_ONLY + /* psw keys are never valid in user mode, we will never reach this */ + g_assert_not_reached(); +#else + fast_memmove_as(env, dest, src, len, dest_as, src_as, ra); +#endif + + return cc; +} diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 640354271c..592d6b0f38 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -323,11 +323,11 @@ static inline uint64_t ld_code4(CPUS390XState *env, uint64_t pc) static int get_mem_index(DisasContext *s) { switch (s->tb->flags & FLAG_MASK_ASC) { - case PSW_ASC_PRIMARY >> 32: + case PSW_ASC_PRIMARY >> FLAG_MASK_PSW_SHIFT: return 0; - case PSW_ASC_SECONDARY >> 32: + case PSW_ASC_SECONDARY >> FLAG_MASK_PSW_SHIFT: return 1; - case PSW_ASC_HOME >> 32: + case PSW_ASC_HOME >> FLAG_MASK_PSW_SHIFT: return 2; default: tcg_abort(); @@ -387,7 +387,7 @@ static inline void gen_trap(DisasContext *s) #ifndef CONFIG_USER_ONLY static void check_privileged(DisasContext *s) { - if (s->tb->flags & (PSW_MASK_PSTATE >> 32)) { + if (s->tb->flags & FLAG_MASK_PSTATE) { gen_program_exception(s, PGM_PRIVILEGED); } } @@ -1180,39 +1180,10 @@ typedef enum { EXIT_NORETURN, } ExitStatus; -typedef enum DisasFacility { - FAC_Z, /* zarch (default) */ - FAC_CASS, /* compare and swap and store */ - FAC_CASS2, /* compare and swap and store 2*/ - FAC_DFP, /* decimal floating point */ - FAC_DFPR, /* decimal floating point rounding */ - FAC_DO, /* distinct operands */ - FAC_EE, /* execute extensions */ - FAC_EI, /* extended immediate */ - FAC_FPE, /* floating point extension */ - FAC_FPSSH, /* floating point support sign handling */ - FAC_FPRGR, /* FPR-GR transfer */ - FAC_GIE, /* general instructions extension */ - FAC_HFP_MA, /* HFP multiply-and-add/subtract */ - FAC_HW, /* high-word */ - FAC_IEEEE_SIM, /* IEEE exception sumilation */ - FAC_MIE, /* miscellaneous-instruction-extensions */ - FAC_LAT, /* load-and-trap */ - FAC_LOC, /* load/store on condition */ - FAC_LD, /* long displacement */ - FAC_PC, /* population count */ - FAC_SCF, /* store clock fast */ - FAC_SFLE, /* store facility list extended */ - FAC_ILA, /* interlocked access facility 1 */ - FAC_LPP, /* load-program-parameter */ - FAC_DAT_ENH, /* DAT-enhancement */ - FAC_E2, /* extended-translation facility 2 */ -} DisasFacility; - struct DisasInsn { unsigned opc:16; DisasFormat fmt:8; - DisasFacility fac:8; + unsigned fac:8; unsigned spec:8; const char *name; @@ -2409,12 +2380,31 @@ static ExitStatus op_ipm(DisasContext *s, DisasOps *o) } #ifndef CONFIG_USER_ONLY +static ExitStatus op_idte(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m4; + + check_privileged(s); + if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) { + m4 = tcg_const_i32(get_field(s->fields, m4)); + } else { + m4 = tcg_const_i32(0); + } + gen_helper_idte(cpu_env, o->in1, o->in2, m4); + tcg_temp_free_i32(m4); + return NO_EXIT; +} + static ExitStatus op_ipte(DisasContext *s, DisasOps *o) { TCGv_i32 m4; check_privileged(s); - m4 = tcg_const_i32(get_field(s->fields, m4)); + if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) { + m4 = tcg_const_i32(get_field(s->fields, m4)); + } else { + m4 = tcg_const_i32(0); + } gen_helper_ipte(cpu_env, o->in1, o->in2, m4); tcg_temp_free_i32(m4); return NO_EXIT; @@ -2935,6 +2925,12 @@ static ExitStatus op_lurag(DisasContext *s, DisasOps *o) } #endif +static ExitStatus op_lzrb(DisasContext *s, DisasOps *o) +{ + tcg_gen_andi_i64(o->out, o->in2, -256); + return NO_EXIT; +} + static ExitStatus op_mov2(DisasContext *s, DisasOps *o) { o->out = o->in2; @@ -2955,20 +2951,20 @@ static ExitStatus op_mov2e(DisasContext *s, DisasOps *o) o->g_in2 = false; switch (s->tb->flags & FLAG_MASK_ASC) { - case PSW_ASC_PRIMARY >> 32: + case PSW_ASC_PRIMARY >> FLAG_MASK_PSW_SHIFT: tcg_gen_movi_i64(ar1, 0); break; - case PSW_ASC_ACCREG >> 32: + case PSW_ASC_ACCREG >> FLAG_MASK_PSW_SHIFT: tcg_gen_movi_i64(ar1, 1); break; - case PSW_ASC_SECONDARY >> 32: + case PSW_ASC_SECONDARY >> FLAG_MASK_PSW_SHIFT: if (b2) { tcg_gen_ld32u_i64(ar1, cpu_env, offsetof(CPUS390XState, aregs[b2])); } else { tcg_gen_movi_i64(ar1, 0); } break; - case PSW_ASC_HOME >> 32: + case PSW_ASC_HOME >> FLAG_MASK_PSW_SHIFT: tcg_gen_movi_i64(ar1, 2); break; } @@ -3070,6 +3066,14 @@ static ExitStatus op_mvclu(DisasContext *s, DisasOps *o) return NO_EXIT; } +static ExitStatus op_mvcos(DisasContext *s, DisasOps *o) +{ + int r3 = get_field(s->fields, r3); + gen_helper_mvcos(cc_op, cpu_env, o->addr1, o->in2, regs[r3]); + set_cc_static(s); + return NO_EXIT; +} + #ifndef CONFIG_USER_ONLY static ExitStatus op_mvcp(DisasContext *s, DisasOps *o) { @@ -3662,7 +3666,7 @@ static ExitStatus op_sigp(DisasContext *s, DisasOps *o) static ExitStatus op_soc(DisasContext *s, DisasOps *o) { DisasCompare c; - TCGv_i64 a; + TCGv_i64 a, h; TCGLabel *lab; int r1; @@ -3682,10 +3686,21 @@ static ExitStatus op_soc(DisasContext *s, DisasOps *o) r1 = get_field(s->fields, r1); a = get_address(s, 0, get_field(s->fields, b2), get_field(s->fields, d2)); - if (s->insn->data) { + switch (s->insn->data) { + case 1: /* STOCG */ tcg_gen_qemu_st64(regs[r1], a, get_mem_index(s)); - } else { + break; + case 0: /* STOC */ tcg_gen_qemu_st32(regs[r1], a, get_mem_index(s)); + break; + case 2: /* STOCFH */ + h = tcg_temp_new_i64(); + tcg_gen_shri_i64(h, regs[r1], 32); + tcg_gen_qemu_st32(h, a, get_mem_index(s)); + tcg_temp_free_i64(h); + break; + default: + g_assert_not_reached(); } tcg_temp_free_i64(a); @@ -3782,7 +3797,7 @@ static ExitStatus op_spka(DisasContext *s, DisasOps *o) { check_privileged(s); tcg_gen_shri_i64(o->in2, o->in2, 4); - tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, PSW_SHIFT_KEY - 4, 4); + tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, PSW_SHIFT_KEY, 4); return NO_EXIT; } @@ -4360,8 +4375,9 @@ static ExitStatus op_trXX(DisasContext *s, DisasOps *o) TCGv_i32 tst = tcg_temp_new_i32(); int m3 = get_field(s->fields, m3); - /* XXX: the C bit in M3 should be considered as 0 when the - ETF2-enhancement facility is not installed. */ + if (!s390_has_feat(S390_FEAT_ETF2_ENH)) { + m3 = 0; + } if (m3 & 1) { tcg_gen_movi_i32(tst, -1); } else { @@ -5418,6 +5434,39 @@ enum DisasInsnEnum { #define SPEC_prep_0 0 #define SPEC_wout_0 0 +/* Give smaller names to the various facilities. */ +#define FAC_Z S390_FEAT_ZARCH +#define FAC_CASS S390_FEAT_COMPARE_AND_SWAP_AND_STORE +#define FAC_CASS2 S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2 +#define FAC_DFP S390_FEAT_DFP +#define FAC_DFPR S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* DFP-rounding */ +#define FAC_DO S390_FEAT_STFLE_45 /* distinct-operands */ +#define FAC_EE S390_FEAT_EXECUTE_EXT +#define FAC_EI S390_FEAT_EXTENDED_IMMEDIATE +#define FAC_FPE S390_FEAT_FLOATING_POINT_EXT +#define FAC_FPSSH S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* FPS-sign-handling */ +#define FAC_FPRGR S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* FPR-GR-transfer */ +#define FAC_GIE S390_FEAT_GENERAL_INSTRUCTIONS_EXT +#define FAC_HFP_MA S390_FEAT_HFP_MADDSUB +#define FAC_HW S390_FEAT_STFLE_45 /* high-word */ +#define FAC_IEEEE_SIM S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* IEEE-exception-simulation */ +#define FAC_MIE S390_FEAT_STFLE_49 /* misc-instruction-extensions */ +#define FAC_LAT S390_FEAT_STFLE_49 /* load-and-trap */ +#define FAC_LOC S390_FEAT_STFLE_45 /* load/store on condition 1 */ +#define FAC_LOC2 S390_FEAT_STFLE_53 /* load/store on condition 2 */ +#define FAC_LD S390_FEAT_LONG_DISPLACEMENT +#define FAC_PC S390_FEAT_STFLE_45 /* population count */ +#define FAC_SCF S390_FEAT_STORE_CLOCK_FAST +#define FAC_SFLE S390_FEAT_STFLE +#define FAC_ILA S390_FEAT_STFLE_45 /* interlocked-access-facility 1 */ +#define FAC_MVCOS S390_FEAT_MOVE_WITH_OPTIONAL_SPEC +#define FAC_LPP S390_FEAT_SET_PROGRAM_PARAMETERS /* load-program-parameter */ +#define FAC_DAT_ENH S390_FEAT_DAT_ENH +#define FAC_E2 S390_FEAT_EXTENDED_TRANSLATION_2 +#define FAC_EH S390_FEAT_STFLE_49 /* execution-hint */ +#define FAC_PPA S390_FEAT_STFLE_49 /* processor-assist */ +#define FAC_LZRB S390_FEAT_STFLE_53 /* load-and-zero-rightmost-byte */ + static const DisasInsn insn_info[] = { #include "insn-data.def" }; @@ -5529,7 +5578,7 @@ static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s, case 0x80: /* S */ case 0x82: /* S */ case 0x93: /* S */ - case 0xb2: /* S, RRF, RRE */ + case 0xb2: /* S, RRF, RRE, IE */ case 0xb3: /* RRE, RRD, RRF */ case 0xb9: /* RRE, RRF */ case 0xe5: /* SSE, SIL */ @@ -5545,6 +5594,8 @@ static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s, case 0xcc: /* RIL */ op2 = (insn << 12) >> 60; break; + case 0xc5: /* MII */ + case 0xc7: /* SMI */ case 0xd0 ... 0xdf: /* SS */ case 0xe1: /* SS */ case 0xe2: /* SS */ diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c index ec30cb99b2..9880a6964e 100644 --- a/tests/libqos/virtio.c +++ b/tests/libqos/virtio.c @@ -116,6 +116,35 @@ uint8_t qvirtio_wait_status_byte_no_isr(QVirtioDevice *d, return val; } +/* + * qvirtio_wait_used_elem: + * @desc_idx: The next expected vq->desc[] index in the used ring + * @timeout_us: How many microseconds to wait before failing + * + * This function waits for the next completed request on the used ring. + */ +void qvirtio_wait_used_elem(QVirtioDevice *d, + QVirtQueue *vq, + uint32_t desc_idx, + gint64 timeout_us) +{ + gint64 start_time = g_get_monotonic_time(); + + for (;;) { + uint32_t got_desc_idx; + + clock_step(100); + + if (d->bus->get_queue_isr_status(d, vq) && + qvirtqueue_get_buf(vq, &got_desc_idx)) { + g_assert_cmpint(got_desc_idx, ==, desc_idx); + return; + } + + g_assert(g_get_monotonic_time() - start_time <= timeout_us); + } +} + void qvirtio_wait_config_isr(QVirtioDevice *d, gint64 timeout_us) { gint64 start_time = g_get_monotonic_time(); @@ -272,6 +301,37 @@ void qvirtqueue_kick(QVirtioDevice *d, QVirtQueue *vq, uint32_t free_head) } } +/* + * qvirtqueue_get_buf: + * @desc_idx: A pointer that is filled with the vq->desc[] index, may be NULL + * + * This function gets the next used element if there is one ready. + * + * Returns: true if an element was ready, false otherwise + */ +bool qvirtqueue_get_buf(QVirtQueue *vq, uint32_t *desc_idx) +{ + uint16_t idx; + + idx = readw(vq->used + offsetof(struct vring_used, idx)); + if (idx == vq->last_used_idx) { + return false; + } + + if (desc_idx) { + uint64_t elem_addr; + + elem_addr = vq->used + + offsetof(struct vring_used, ring) + + (vq->last_used_idx % vq->size) * + sizeof(struct vring_used_elem); + *desc_idx = readl(elem_addr + offsetof(struct vring_used_elem, id)); + } + + vq->last_used_idx++; + return true; +} + void qvirtqueue_set_used_event(QVirtQueue *vq, uint16_t idx) { g_assert(vq->event); diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h index 3397a080e9..8fbcd1869c 100644 --- a/tests/libqos/virtio.h +++ b/tests/libqos/virtio.h @@ -26,12 +26,13 @@ typedef struct QVirtioDevice { typedef struct QVirtQueue { uint64_t desc; /* This points to an array of struct vring_desc */ uint64_t avail; /* This points to a struct vring_avail */ - uint64_t used; /* This points to a struct vring_desc */ + uint64_t used; /* This points to a struct vring_used */ uint16_t index; uint32_t size; uint32_t free_head; uint32_t num_free; uint32_t align; + uint16_t last_used_idx; bool indirect; bool event; } QVirtQueue; @@ -120,6 +121,10 @@ uint8_t qvirtio_wait_status_byte_no_isr(QVirtioDevice *d, QVirtQueue *vq, uint64_t addr, gint64 timeout_us); +void qvirtio_wait_used_elem(QVirtioDevice *d, + QVirtQueue *vq, + uint32_t desc_idx, + gint64 timeout_us); void qvirtio_wait_config_isr(QVirtioDevice *d, gint64 timeout_us); QVirtQueue *qvirtqueue_setup(QVirtioDevice *d, QGuestAllocator *alloc, uint16_t index); @@ -135,6 +140,7 @@ uint32_t qvirtqueue_add(QVirtQueue *vq, uint64_t data, uint32_t len, bool write, bool next); uint32_t qvirtqueue_add_indirect(QVirtQueue *vq, QVRingIndirectDesc *indirect); void qvirtqueue_kick(QVirtioDevice *d, QVirtQueue *vq, uint32_t free_head); +bool qvirtqueue_get_buf(QVirtQueue *vq, uint32_t *desc_idx); void qvirtqueue_set_used_event(QVirtQueue *vq, uint16_t idx); #endif diff --git a/tests/qemu-iotests/068 b/tests/qemu-iotests/068 index 9c1687d01d..3801b65b9d 100755 --- a/tests/qemu-iotests/068 +++ b/tests/qemu-iotests/068 @@ -45,28 +45,41 @@ _supported_os Linux IMGOPTS="compat=1.1" IMG_SIZE=128K -echo -echo "=== Saving and reloading a VM state to/from a qcow2 image ===" -echo -_make_test_img $IMG_SIZE - case "$QEMU_DEFAULT_MACHINE" in s390-ccw-virtio) platform_parm="-no-shutdown" + hba=virtio-scsi-ccw ;; *) platform_parm="" + hba=virtio-scsi-pci ;; esac -# Give qemu some time to boot before saving the VM state -bash -c 'sleep 1; echo -e "savevm 0\nquit"' |\ - $QEMU $platform_parm -nographic -monitor stdio -serial none -hda "$TEST_IMG" |\ - _filter_qemu | _filter_hmp -# Now try to continue from that VM state (this should just work) -echo quit |\ - $QEMU $platform_parm -nographic -monitor stdio -serial none -hda "$TEST_IMG" -loadvm 0 |\ +_qemu() +{ + $QEMU $platform_parm -nographic -monitor stdio -serial none \ + -drive if=none,id=drive0,file="$TEST_IMG",format="$IMGFMT" \ + -device $hba,id=hba0 \ + -device scsi-hd,drive=drive0 \ + "$@" |\ _filter_qemu | _filter_hmp +} + +for extra_args in \ + "" \ + "-object iothread,id=iothread0 -set device.hba0.iothread=iothread0"; do + echo + echo "=== Saving and reloading a VM state to/from a qcow2 image ($extra_args) ===" + echo + + _make_test_img $IMG_SIZE + + # Give qemu some time to boot before saving the VM state + bash -c 'sleep 1; echo -e "savevm 0\nquit"' | _qemu $extra_args + # Now try to continue from that VM state (this should just work) + echo quit | _qemu $extra_args -loadvm 0 +done # success, all done echo "*** done" diff --git a/tests/qemu-iotests/068.out b/tests/qemu-iotests/068.out index 0fa5340c22..aa063cf711 100644 --- a/tests/qemu-iotests/068.out +++ b/tests/qemu-iotests/068.out @@ -1,6 +1,15 @@ QA output created by 068 -=== Saving and reloading a VM state to/from a qcow2 image === +=== Saving and reloading a VM state to/from a qcow2 image () === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) savevm 0 +(qemu) quit +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +=== Saving and reloading a VM state to/from a qcow2 image (-object iothread,id=iothread0 -set device.hba0.iothread=iothread0) === Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 QEMU X.Y.Z monitor - type 'help' for more information diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185 new file mode 100755 index 0000000000..0eda371f27 --- /dev/null +++ b/tests/qemu-iotests/185 @@ -0,0 +1,206 @@ +#!/bin/bash +# +# Test exiting qemu while jobs are still running +# +# Copyright (C) 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +MIG_SOCKET="${TEST_DIR}/migrate" + +_cleanup() +{ + rm -f "${TEST_IMG}.mid" + rm -f "${TEST_IMG}.copy" + _cleanup_test_img + _cleanup_qemu +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +size=64M +TEST_IMG="${TEST_IMG}.base" _make_test_img $size + +echo +echo === Starting VM === +echo + +qemu_comm_method="qmp" + +_launch_qemu \ + -drive file="${TEST_IMG}.base",cache=$CACHEMODE,driver=$IMGFMT,id=disk +h=$QEMU_HANDLE +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' + +echo +echo === Creating backing chain === +echo + +_send_qemu_cmd $h \ + "{ 'execute': 'blockdev-snapshot-sync', + 'arguments': { 'device': 'disk', + 'snapshot-file': '$TEST_IMG.mid', + 'format': '$IMGFMT', + 'mode': 'absolute-paths' } }" \ + "return" + +_send_qemu_cmd $h \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': + 'qemu-io disk \"write 0 4M\"' } }" \ + "return" + +_send_qemu_cmd $h \ + "{ 'execute': 'blockdev-snapshot-sync', + 'arguments': { 'device': 'disk', + 'snapshot-file': '$TEST_IMG', + 'format': '$IMGFMT', + 'mode': 'absolute-paths' } }" \ + "return" + +echo +echo === Start commit job and exit qemu === +echo + +# Note that the reference output intentionally includes the 'offset' field in +# BLOCK_JOB_CANCELLED events for all of the following block jobs. They are +# predictable and any change in the offsets would hint at a bug in the job +# throttling code. +# +# In order to achieve these predictable offsets, all of the following tests +# use speed=65536. Each job will perform exactly one iteration before it has +# to sleep at least for a second, which is plenty of time for the 'quit' QMP +# command to be received (after receiving the command, the rest runs +# synchronously, so jobs can arbitrarily continue or complete). +# +# The buffer size for commit and streaming is 512k (waiting for 8 seconds after +# the first request), for active commit and mirror it's large enough to cover +# the full 4M, and for backup it's the qcow2 cluster size, which we know is +# 64k. As all of these are at least as large as the speed, we are sure that the +# offset doesn't advance after the first iteration before qemu exits. + +_send_qemu_cmd $h \ + "{ 'execute': 'block-commit', + 'arguments': { 'device': 'disk', + 'base':'$TEST_IMG.base', + 'top': '$TEST_IMG.mid', + 'speed': 65536 } }" \ + "return" + +_send_qemu_cmd $h "{ 'execute': 'quit' }" "return" +wait=1 _cleanup_qemu + +echo +echo === Start active commit job and exit qemu === +echo + +_launch_qemu \ + -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk +h=$QEMU_HANDLE +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' + +_send_qemu_cmd $h \ + "{ 'execute': 'block-commit', + 'arguments': { 'device': 'disk', + 'base':'$TEST_IMG.base', + 'speed': 65536 } }" \ + "return" + +_send_qemu_cmd $h "{ 'execute': 'quit' }" "return" +wait=1 _cleanup_qemu + +echo +echo === Start mirror job and exit qemu === +echo + +_launch_qemu \ + -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk +h=$QEMU_HANDLE +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' + +_send_qemu_cmd $h \ + "{ 'execute': 'drive-mirror', + 'arguments': { 'device': 'disk', + 'target': '$TEST_IMG.copy', + 'format': '$IMGFMT', + 'sync': 'full', + 'speed': 65536 } }" \ + "return" + +_send_qemu_cmd $h "{ 'execute': 'quit' }" "return" +wait=1 _cleanup_qemu + +echo +echo === Start backup job and exit qemu === +echo + +_launch_qemu \ + -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk +h=$QEMU_HANDLE +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' + +_send_qemu_cmd $h \ + "{ 'execute': 'drive-backup', + 'arguments': { 'device': 'disk', + 'target': '$TEST_IMG.copy', + 'format': '$IMGFMT', + 'sync': 'full', + 'speed': 65536 } }" \ + "return" + +_send_qemu_cmd $h "{ 'execute': 'quit' }" "return" +wait=1 _cleanup_qemu + +echo +echo === Start streaming job and exit qemu === +echo + +_launch_qemu \ + -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk +h=$QEMU_HANDLE +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' + +_send_qemu_cmd $h \ + "{ 'execute': 'block-stream', + 'arguments': { 'device': 'disk', + 'speed': 65536 } }" \ + "return" + +_send_qemu_cmd $h "{ 'execute': 'quit' }" "return" +wait=1 _cleanup_qemu + +_check_test_img + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out new file mode 100644 index 0000000000..45bc7cb1a5 --- /dev/null +++ b/tests/qemu-iotests/185.out @@ -0,0 +1,59 @@ +QA output created by 185 +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 + +=== Starting VM === + +{"return": {}} + +=== Creating backing chain === + +Formatting 'TEST_DIR/t.qcow2.mid', fmt=qcow2 size=67108864 backing_file=TEST_DIR/t.qcow2.base backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +{"return": {}} +wrote 4194304/4194304 bytes at offset 0 +4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 backing_file=TEST_DIR/t.qcow2.mid backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +{"return": {}} + +=== Start commit job and exit qemu === + +{"return": {}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "commit"}} + +=== Start active commit job and exit qemu === + +{"return": {}} +{"return": {}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "commit"}} + +=== Start mirror job and exit qemu === + +{"return": {}} +Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +{"return": {}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "mirror"}} + +=== Start backup job and exit qemu === + +{"return": {}} +Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +{"return": {}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 65536, "speed": 65536, "type": "backup"}} + +=== Start streaming job and exit qemu === + +{"return": {}} +{"return": {}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}} +No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu index 7a78a00999..76ef298d3f 100644 --- a/tests/qemu-iotests/common.qemu +++ b/tests/qemu-iotests/common.qemu @@ -222,5 +222,8 @@ function _cleanup_qemu() rm -f "${QEMU_FIFO_IN}_${i}" "${QEMU_FIFO_OUT}_${i}" eval "exec ${QEMU_IN[$i]}<&-" # close file descriptors eval "exec ${QEMU_OUT[$i]}<&-" + + unset QEMU_IN[$i] + unset QEMU_OUT[$i] done } diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index a6acafffd7..318ae74b10 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -175,3 +175,4 @@ 181 rw auto migration 182 rw auto quick 183 rw auto migration +185 rw auto diff --git a/tests/test-char.c b/tests/test-char.c index 9e361c8d09..87c724c5c2 100644 --- a/tests/test-char.c +++ b/tests/test-char.c @@ -53,7 +53,9 @@ static void fe_event(void *opaque, int event) FeHandler *h = opaque; h->last_event = event; - quit = true; + if (event != CHR_EVENT_BREAK) { + quit = true; + } } #ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS @@ -517,7 +519,7 @@ static void char_file_test(void) file.in = fifo; file.has_in = true; - chr = qemu_chardev_new(NULL, TYPE_CHARDEV_FILE, &backend, + chr = qemu_chardev_new("label-file", TYPE_CHARDEV_FILE, &backend, &error_abort); qemu_chr_fe_init(&be, chr, &error_abort); @@ -527,6 +529,12 @@ static void char_file_test(void) fe_event, &fe, NULL, true); + g_assert_cmpint(fe.last_event, !=, CHR_EVENT_BREAK); + qmp_chardev_send_break("label-foo", NULL); + g_assert_cmpint(fe.last_event, !=, CHR_EVENT_BREAK); + qmp_chardev_send_break("label-file", NULL); + g_assert_cmpint(fe.last_event, ==, CHR_EVENT_BREAK); + main_loop(); close(fd); diff --git a/tests/test-hmp.c b/tests/test-hmp.c index 99e35ec15a..6dfa0c36e2 100644 --- a/tests/test-hmp.c +++ b/tests/test-hmp.c @@ -22,6 +22,7 @@ static int verbose; static const char *hmp_cmds[] = { "boot_set ndc", "chardev-add null,id=testchardev1", + "chardev-send-break testchardev2", "chardev-remove testchardev1", "commit all", "cpu-add 1", diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index fd2078c9da..0576cb16ba 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -196,7 +196,7 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, qvirtqueue_kick(dev, vq, free_head); - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US); + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); @@ -218,7 +218,7 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, qvirtqueue_kick(dev, vq, free_head); - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US); + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); @@ -246,7 +246,7 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, qvirtqueue_add(vq, req_addr + 528, 1, true, false); qvirtqueue_kick(dev, vq, free_head); - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US); + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); @@ -267,7 +267,7 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, qvirtqueue_kick(dev, vq, free_head); - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US); + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); @@ -348,7 +348,7 @@ static void pci_indirect(void) free_head = qvirtqueue_add_indirect(&vqpci->vq, indirect); qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, free_head, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); @@ -373,7 +373,7 @@ static void pci_indirect(void) free_head = qvirtqueue_add_indirect(&vqpci->vq, indirect); qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, free_head, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); @@ -484,7 +484,7 @@ static void pci_msix(void) qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, free_head, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); @@ -509,7 +509,7 @@ static void pci_msix(void) qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, free_head, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); @@ -540,6 +540,8 @@ static void pci_idx(void) uint64_t capacity; uint32_t features; uint32_t free_head; + uint32_t write_head; + uint32_t desc_idx; uint8_t status; char *data; @@ -581,7 +583,8 @@ static void pci_idx(void) qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, QVIRTIO_BLK_TIMEOUT_US); + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, free_head, + QVIRTIO_BLK_TIMEOUT_US); /* Write request */ req.type = VIRTIO_BLK_T_OUT; @@ -600,6 +603,7 @@ static void pci_idx(void) qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, false, true); qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); + write_head = free_head; /* No notification expected */ status = qvirtio_wait_status_byte_no_isr(&dev->vdev, @@ -625,8 +629,11 @@ static void pci_idx(void) qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, + /* We get just one notification for both requests */ + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, write_head, QVIRTIO_BLK_TIMEOUT_US); + g_assert(qvirtqueue_get_buf(&vqpci->vq, &desc_idx)); + g_assert_cmpint(desc_idx, ==, free_head); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); diff --git a/tests/virtio-net-test.c b/tests/virtio-net-test.c index 8f94360480..635b942c36 100644 --- a/tests/virtio-net-test.c +++ b/tests/virtio-net-test.c @@ -108,7 +108,7 @@ static void rx_test(QVirtioDevice *dev, ret = iov_send(socket, iov, 2, 0, sizeof(len) + sizeof(test)); g_assert_cmpint(ret, ==, sizeof(test) + sizeof(len)); - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_NET_TIMEOUT_US); + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_NET_TIMEOUT_US); memread(req_addr + VNET_HDR_SIZE, buffer, sizeof(test)); g_assert_cmpstr(buffer, ==, "TEST"); @@ -131,7 +131,7 @@ static void tx_test(QVirtioDevice *dev, free_head = qvirtqueue_add(vq, req_addr, 64, false, false); qvirtqueue_kick(dev, vq, free_head); - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_NET_TIMEOUT_US); + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_NET_TIMEOUT_US); guest_free(alloc, req_addr); ret = qemu_recv(socket, &len, sizeof(len), 0); @@ -182,7 +182,7 @@ static void rx_stop_cont_test(QVirtioDevice *dev, rsp = qmp("{ 'execute' : 'cont'}"); QDECREF(rsp); - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_NET_TIMEOUT_US); + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_NET_TIMEOUT_US); memread(req_addr + VNET_HDR_SIZE, buffer, sizeof(test)); g_assert_cmpstr(buffer, ==, "TEST"); diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c index eff71df81f..87a3b6e81a 100644 --- a/tests/virtio-scsi-test.c +++ b/tests/virtio-scsi-test.c @@ -121,7 +121,7 @@ static uint8_t virtio_scsi_do_command(QVirtIOSCSI *vs, const uint8_t *cdb, } qvirtqueue_kick(vs->dev, vq, free_head); - qvirtio_wait_queue_isr(vs->dev, vq, QVIRTIO_SCSI_TIMEOUT_US); + qvirtio_wait_used_elem(vs->dev, vq, free_head, QVIRTIO_SCSI_TIMEOUT_US); response = readb(resp_addr + offsetof(struct virtio_scsi_cmd_resp, response)); diff --git a/ui/cocoa.m b/ui/cocoa.m index 1f010d3ae7..93e56d0518 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -35,6 +35,7 @@ #include "sysemu/blockdev.h" #include "qemu-version.h" #include <Carbon/Carbon.h> +#include "qom/cpu.h" #ifndef MAC_OS_X_VERSION_10_5 #define MAC_OS_X_VERSION_10_5 1050 @@ -570,7 +571,7 @@ QemuCocoaView *cocoaView; // bitmask. if (qemu_console_is_graphic(NULL)) { - NSEventModifierFlags modifiers = [event modifierFlags]; + NSUInteger modifiers = [event modifierFlags]; if (!!(modifiers & NSEventModifierFlagCapsLock) != !!modifiers_state[Q_KEY_CODE_CAPS_LOCK]) { [self toggleStatefulModifier:Q_KEY_CODE_CAPS_LOCK]; @@ -893,6 +894,7 @@ QemuCocoaView *cocoaView; - (void)openDocumentation:(NSString *)filename; - (IBAction) do_about_menu_item: (id) sender; - (void)make_about_window; +- (void)adjustSpeed:(id)sender; @end @implementation QemuCocoaAppController @@ -1299,6 +1301,34 @@ QemuCocoaView *cocoaView; [superView addSubview: copyright_label]; } +/* Used by the Speed menu items */ +- (void)adjustSpeed:(id)sender +{ + int throttle_pct; /* throttle percentage */ + NSMenu *menu; + + menu = [sender menu]; + if (menu != nil) + { + /* Unselect the currently selected item */ + for (NSMenuItem *item in [menu itemArray]) { + if (item.state == NSOnState) { + [item setState: NSOffState]; + break; + } + } + } + + // check the menu item + [sender setState: NSOnState]; + + // get the throttle percentage + throttle_pct = [sender tag]; + + cpu_throttle_set(throttle_pct); + COCOA_DEBUG("cpu throttling at %d%c\n", cpu_throttle_get_percentage(), '%'); +} + @end @@ -1381,6 +1411,32 @@ int main (int argc, const char * argv[]) { [menuItem setSubmenu:menu]; [[NSApp mainMenu] addItem:menuItem]; + // Speed menu + menu = [[NSMenu alloc] initWithTitle:@"Speed"]; + + // Add the rest of the Speed menu items + int p, percentage, throttle_pct; + for (p = 10; p >= 0; p--) + { + percentage = p * 10 > 1 ? p * 10 : 1; // prevent a 0% menu item + + menuItem = [[[NSMenuItem alloc] + initWithTitle: [NSString stringWithFormat: @"%d%%", percentage] action:@selector(adjustSpeed:) keyEquivalent:@""] autorelease]; + + if (percentage == 100) { + [menuItem setState: NSOnState]; + } + + /* Calculate the throttle percentage */ + throttle_pct = -1 * percentage + 100; + + [menuItem setTag: throttle_pct]; + [menu addItem: menuItem]; + } + menuItem = [[[NSMenuItem alloc] initWithTitle:@"Speed" action:nil keyEquivalent:@""] autorelease]; + [menuItem setSubmenu:menu]; + [[NSApp mainMenu] addItem:menuItem]; + // Window menu menu = [[NSMenu alloc] initWithTitle:@"Window"]; [menu addItem: [[[NSMenuItem alloc] initWithTitle:@"Minimize" action:@selector(performMiniaturize:) keyEquivalent:@"m"] autorelease]]; // Miniaturize diff --git a/ui/input.c b/ui/input.c index 2abd46de93..af05f06368 100644 --- a/ui/input.c +++ b/ui/input.c @@ -256,6 +256,7 @@ static void qemu_input_queue_process(void *opaque) item = QTAILQ_FIRST(queue); g_assert(item->type == QEMU_INPUT_QUEUE_DELAY); QTAILQ_REMOVE(queue, item, node); + queue_count--; g_free(item); while (!QTAILQ_EMPTY(queue)) { diff --git a/ui/keymaps.c b/ui/keymaps.c index 8899a0b31e..fa00b82027 100644 --- a/ui/keymaps.c +++ b/ui/keymaps.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "keymaps.h" #include "sysemu/sysemu.h" +#include "trace.h" static int get_keysym(const name2keysym_t *table, const char *name) @@ -71,18 +72,14 @@ static void add_to_key_range(struct key_range **krp, int code) { static void add_keysym(char *line, int keysym, int keycode, kbd_layout_t *k) { if (keysym < MAX_NORMAL_KEYCODE) { - /* fprintf(stderr,"Setting keysym %s (%d) to %d\n", - line, keysym, keycode); */ + trace_keymap_add("normal", keysym, keycode, line); k->keysym2keycode[keysym] = keycode; } else { if (k->extra_count >= MAX_EXTRA_COUNT) { fprintf(stderr, "Warning: Could not assign keysym %s (0x%x)" " because of memory constraints.\n", line, keysym); } else { -#if 0 - fprintf(stderr, "Setting %d: %d,%d\n", - k->extra_count, keysym, keycode); -#endif + trace_keymap_add("extra", keysym, keycode, line); k->keysym2keycode_extra[k->extra_count]. keysym = keysym; k->keysym2keycode_extra[k->extra_count]. @@ -99,9 +96,11 @@ static kbd_layout_t *parse_keyboard_layout(const name2keysym_t *table, FILE *f; char * filename; char line[1024]; + char keyname[64]; int len; filename = qemu_find_file(QEMU_FILE_TYPE_KEYMAP, language); + trace_keymap_parse(filename); f = filename ? fopen(filename, "r") : NULL; g_free(filename); if (!f) { @@ -130,18 +129,21 @@ static kbd_layout_t *parse_keyboard_layout(const name2keysym_t *table, if (!strncmp(line, "include ", 8)) { parse_keyboard_layout(table, line + 8, k); } else { - char *end_of_keysym = line; - while (*end_of_keysym != 0 && *end_of_keysym != ' ') { - end_of_keysym++; + int offset = 0; + while (line[offset] != 0 && + line[offset] != ' ' && + offset < sizeof(keyname) - 1) { + keyname[offset] = line[offset]; + offset++; } - if (*end_of_keysym) { + keyname[offset] = 0; + if (strlen(keyname)) { int keysym; - *end_of_keysym = 0; - keysym = get_keysym(table, line); + keysym = get_keysym(table, keyname); if (keysym == 0) { /* fprintf(stderr, "Warning: unknown keysym %s\n", line);*/ } else { - const char *rest = end_of_keysym + 1; + const char *rest = line + offset + 1; int keycode = strtol(rest, NULL, 0); if (strstr(rest, "numlock")) { @@ -165,10 +167,10 @@ static kbd_layout_t *parse_keyboard_layout(const name2keysym_t *table, if (strstr(rest, "addupper")) { char *c; - for (c = line; *c; c++) { + for (c = keyname; *c; c++) { *c = qemu_toupper(*c); } - keysym = get_keysym(table, line); + keysym = get_keysym(table, keyname); if (keysym) { add_keysym(line, keysym, keycode | SCANCODE_SHIFT, k); @@ -194,6 +196,7 @@ int keysym2scancode(void *kbd_layout, int keysym) kbd_layout_t *k = kbd_layout; if (keysym < MAX_NORMAL_KEYCODE) { if (k->keysym2keycode[keysym] == 0) { + trace_keymap_unmapped(keysym); fprintf(stderr, "Warning: no scancode found for keysym %d\n", keysym); } @@ -804,6 +804,7 @@ void sdl_display_init(DisplayState *ds, int full_screen, int no_frame) sdl2_console = g_new0(struct sdl2_console, sdl2_num_outputs); for (i = 0; i < sdl2_num_outputs; i++) { QemuConsole *con = qemu_console_lookup_by_index(i); + assert(con != NULL); if (!qemu_console_is_graphic(con)) { sdl2_console[i].hidden = true; } diff --git a/ui/trace-events b/ui/trace-events index 93fe5482e6..19ce5f85f6 100644 --- a/ui/trace-events +++ b/ui/trace-events @@ -46,3 +46,8 @@ qemu_spice_create_primary_surface(int qid, uint32_t sid, void *surface, int asyn qemu_spice_destroy_primary_surface(int qid, uint32_t sid, int async) "%d sid=%u async=%d" qemu_spice_wakeup(uint32_t qid) "%d" qemu_spice_create_update(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom) "lr %d -> %d, tb -> %d -> %d" + +# ui/keymaps.c +keymap_parse(const char *file) "file %s" +keymap_add(const char *type, int sym, int code, const char *line) "%-6s sym=0x%04x code=0x%04x (line: %s)" +keymap_unmapped(int sym) "sym=0x%04x" @@ -188,7 +188,6 @@ bool boot_strict; uint8_t *boot_splash_filedata; size_t boot_splash_filedata_size; uint8_t qemu_extra_params_fw[2]; -int only_migratable; /* turn it off unless user states otherwise */ int icount_align_option; @@ -2969,6 +2968,25 @@ static int qemu_read_default_config_file(void) return 0; } +static void user_register_global_props(void) +{ + qemu_opts_foreach(qemu_find_opts("global"), + global_init_func, NULL, NULL); +} + +/* + * Note: we should see that these properties are actually having a + * priority: accel < machine < user. This means e.g. when user + * specifies something in "-global", it'll always be used with highest + * priority than either machine/accelerator compat properties. + */ +static void register_global_properties(MachineState *ms) +{ + accel_register_compat_props(ms->accelerator); + machine_register_compat_props(ms); + user_register_global_props(); +} + int main(int argc, char **argv, char **envp) { int i; @@ -3934,7 +3952,13 @@ int main(int argc, char **argv, char **envp) incoming = optarg; break; case QEMU_OPTION_only_migratable: - only_migratable = 1; + /* + * TODO: we can remove this option one day, and we + * should all use: + * + * "-global migration.only-migratable=true" + */ + migration_only_migratable_set(); break; case QEMU_OPTION_nodefaults: has_defaults = 0; @@ -4571,10 +4595,17 @@ int main(int argc, char **argv, char **envp) exit (i == 1 ? 1 : 0); } - machine_register_compat_props(current_machine); + /* + * Register all the global properties, including accel properties, + * machine properties, and user-specified ones. + */ + register_global_properties(current_machine); - qemu_opts_foreach(qemu_find_opts("global"), - global_init_func, NULL, NULL); + /* + * Migration object can only be created after global properties + * are applied correctly. + */ + migration_object_init(); /* This checkpoint is required by replay to separate prior clock reading from the other reads, because timer polling functions query |