diff options
282 files changed, 6369 insertions, 2274 deletions
diff --git a/.gitmodules b/.gitmodules index ca323b4d87..5b0c212622 100644 --- a/.gitmodules +++ b/.gitmodules @@ -34,3 +34,6 @@ [submodule "roms/skiboot"] path = roms/skiboot url = git://git.qemu.org/skiboot.git +[submodule "roms/QemuMacDrivers"] + path = roms/QemuMacDrivers + url = git://git.qemu.org/QemuMacDrivers.git diff --git a/.travis.yml b/.travis.yml index 9008a796f0..27a2d9cfb3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -86,9 +86,6 @@ matrix: - env: CONFIG="--enable-trace-backends=ust" TEST_CMD="" compiler: gcc - - env: CONFIG="--with-coroutine=gthread" - TEST_CMD="" - compiler: gcc - env: CONFIG="" os: osx compiler: clang @@ -191,7 +188,7 @@ matrix: compiler: none env: - COMPILER_NAME=gcc CXX=g++-5 CC=gcc-5 - - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user --with-coroutine=gthread" + - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user" - TEST_CMD="" before_script: - ./configure ${CONFIG} --extra-cflags="-g3 -O0 -fsanitize=thread -fuse-ld=gold" || cat config.log diff --git a/MAINTAINERS b/MAINTAINERS index 8224be0d75..ef2ec58a94 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -354,6 +354,12 @@ L: qemu-devel@nongnu.org S: Maintained F: *posix* +NETBSD +L: qemu-devel@nongnu.org +M: Kamil Rytarowski <kamil@netbsd.org> +S: Maintained +K: (?i)NetBSD + W32, W64 L: qemu-devel@nongnu.org M: Stefan Weil <sw@weilnetz.de> @@ -1170,6 +1176,7 @@ F: include/block/ F: qemu-img* F: qemu-io* F: tests/qemu-iotests/ +F: util/qemu-progress.c T: git git://repo.or.cz/qemu/kevin.git block Block I/O path @@ -1177,8 +1184,8 @@ M: Stefan Hajnoczi <stefanha@redhat.com> M: Fam Zheng <famz@redhat.com> L: qemu-block@nongnu.org S: Supported -F: async.c -F: aio-*.c +F: util/async.c +F: util/aio-*.c F: block/io.c F: migration/block* F: include/block/aio.h @@ -1307,8 +1314,8 @@ Main loop M: Paolo Bonzini <pbonzini@redhat.com> S: Maintained F: cpus.c -F: main-loop.c -F: qemu-timer.c +F: util/main-loop.c +F: util/qemu-timer.c F: vl.c Human Monitor (HMP) @@ -1487,6 +1494,7 @@ S: Maintained F: crypto/ F: include/crypto/ F: tests/test-crypto-* +F: qemu.sasl Coroutines M: Stefan Hajnoczi <stefanha@redhat.com> @@ -1577,6 +1585,7 @@ F: default-configs/*-bsd-user.mak Linux user M: Riku Voipio <riku.voipio@iki.fi> +R: Laurent Vivier <laurent@vivier.eu> S: Maintained F: linux-user/ F: default-configs/*-linux-user.mak @@ -552,7 +552,8 @@ multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \ s390-ccw.img \ spapr-rtas.bin slof.bin skiboot.lid \ palcode-clipper \ -u-boot.e500 +u-boot.e500 \ +qemu_vga.ndrv else BLOBS= endif @@ -192,11 +192,20 @@ void path_combine(char *dest, int dest_size, } } +/* Returns whether the image file is opened as read-only. Note that this can + * return false and writing to the image file is still not possible because the + * image is inactivated. */ bool bdrv_is_read_only(BlockDriverState *bs) { return bs->read_only; } +/* Returns whether the image file can be written to right now */ +bool bdrv_is_writable(BlockDriverState *bs) +{ + return !bdrv_is_read_only(bs) && !(bs->open_flags & BDRV_O_INACTIVE); +} + int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) { /* Do not set read_only if copy_on_read is enabled */ @@ -762,6 +771,13 @@ static void bdrv_child_cb_drained_end(BdrvChild *child) bdrv_drained_end(bs); } +static int bdrv_child_cb_inactivate(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + assert(bs->open_flags & BDRV_O_INACTIVE); + return 0; +} + /* * Returns the options and flags that a temporary snapshot should get, based on * the originally requested flags (the originally requested image will have @@ -800,6 +816,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, * the parent. */ qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); + qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); /* Inherit the read-only option from the parent if it's not set */ qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); @@ -821,6 +838,7 @@ const BdrvChildRole child_file = { .inherit_options = bdrv_inherited_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, + .inactivate = bdrv_child_cb_inactivate, }; /* @@ -842,6 +860,7 @@ const BdrvChildRole child_format = { .inherit_options = bdrv_inherited_fmt_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, + .inactivate = bdrv_child_cb_inactivate, }; static void bdrv_backing_attach(BdrvChild *c) @@ -908,6 +927,7 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options, * which is only applied on the top level (BlockBackend) */ qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); + qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); /* backing files always opened read-only */ qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); @@ -926,6 +946,7 @@ const BdrvChildRole child_backing = { .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, + .inactivate = bdrv_child_cb_inactivate, }; static int bdrv_open_flags(BlockDriverState *bs, int flags) @@ -1150,6 +1171,11 @@ QemuOptsList bdrv_runtime_opts = { .type = QEMU_OPT_STRING, .help = "discard operation (ignore/off, unmap/on)", }, + { + .name = BDRV_OPT_FORCE_SHARE, + .type = QEMU_OPT_BOOL, + .help = "always accept other writers (default: off)", + }, { /* end of list */ } }, }; @@ -1189,6 +1215,16 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, drv = bdrv_find_format(driver_name); assert(drv != NULL); + bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false); + + if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) { + error_setg(errp, + BDRV_OPT_FORCE_SHARE + "=on can only be used with read-only images"); + ret = -EINVAL; + goto fail_opts; + } + if (file != NULL) { filename = blk_bs(file)->filename; } else { @@ -1448,6 +1484,22 @@ static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, static void bdrv_child_abort_perm_update(BdrvChild *c); static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, + BdrvChild *c, + const BdrvChildRole *role, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared) +{ + if (bs->drv && bs->drv->bdrv_child_perm) { + bs->drv->bdrv_child_perm(bs, c, role, + parent_perm, parent_shared, + nperm, nshared); + } + if (child_bs && child_bs->force_share) { + *nshared = BLK_PERM_ALL; + } +} + /* * Check whether permissions on this node can be changed in a way that * @cumulative_perms and @cumulative_shared_perms are the new cumulative @@ -1467,7 +1519,7 @@ static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Write permissions never work with read-only images */ if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && - bdrv_is_read_only(bs)) + !bdrv_is_writable(bs)) { error_setg(errp, "Block node is read-only"); return -EPERM; @@ -1492,9 +1544,9 @@ static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Check all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; - drv->bdrv_child_perm(bs, c, c->role, - cumulative_perms, cumulative_shared_perms, - &cur_perm, &cur_shared); + bdrv_child_perm(bs, c->bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); ret = bdrv_child_check_perm(c, cur_perm, cur_shared, ignore_children, errp); if (ret < 0) { @@ -1554,9 +1606,9 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Update all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; - drv->bdrv_child_perm(bs, c, c->role, - cumulative_perms, cumulative_shared_perms, - &cur_perm, &cur_shared); + bdrv_child_perm(bs, c->bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); bdrv_child_set_perm(c, cur_perm, cur_shared); } } @@ -1586,7 +1638,7 @@ static char *bdrv_child_user_desc(BdrvChild *c) return g_strdup("another user"); } -static char *bdrv_perm_names(uint64_t perm) +char *bdrv_perm_names(uint64_t perm) { struct perm_name { uint64_t perm; @@ -1752,7 +1804,7 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared); /* Format drivers may touch metadata even if the guest doesn't write */ - if (!bdrv_is_read_only(bs)) { + if (bdrv_is_writable(bs)) { perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; } @@ -1778,6 +1830,10 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, BLK_PERM_WRITE_UNCHANGED; } + if (bs->open_flags & BDRV_O_INACTIVE) { + shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; + } + *nperm = perm; *nshared = shared; } @@ -1891,8 +1947,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, assert(parent_bs->drv); assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); - parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role, - perm, shared_perm, &perm, &shared_perm); + bdrv_child_perm(parent_bs, child_bs, NULL, child_role, + perm, shared_perm, &perm, &shared_perm); child = bdrv_root_attach_child(child_bs, child_name, child_role, perm, shared_perm, parent_bs, errp); @@ -3916,7 +3972,8 @@ void bdrv_init_with_whitelist(void) void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) { - BdrvChild *child; + BdrvChild *child, *parent; + uint64_t perm, shared_perm; Error *local_err = NULL; int ret; @@ -3952,6 +4009,26 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) error_setg_errno(errp, -ret, "Could not refresh total sector count"); return; } + + /* Update permissions, they may differ for inactive nodes */ + bdrv_get_cumulative_perm(bs, &perm, &shared_perm); + ret = bdrv_check_perm(bs, perm, shared_perm, NULL, &local_err); + if (ret < 0) { + bs->open_flags |= BDRV_O_INACTIVE; + error_propagate(errp, local_err); + return; + } + bdrv_set_perm(bs, perm, shared_perm); + + QLIST_FOREACH(parent, &bs->parents, next_parent) { + if (parent->role->activate) { + parent->role->activate(parent, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + } } void bdrv_invalidate_cache_all(Error **errp) @@ -3976,7 +4053,7 @@ void bdrv_invalidate_cache_all(Error **errp) static int bdrv_inactivate_recurse(BlockDriverState *bs, bool setting_flag) { - BdrvChild *child; + BdrvChild *child, *parent; int ret; if (!setting_flag && bs->drv->bdrv_inactivate) { @@ -3986,6 +4063,27 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs, } } + if (setting_flag) { + uint64_t perm, shared_perm; + + bs->open_flags |= BDRV_O_INACTIVE; + + QLIST_FOREACH(parent, &bs->parents, next_parent) { + if (parent->role->inactivate) { + ret = parent->role->inactivate(parent); + if (ret < 0) { + bs->open_flags &= ~BDRV_O_INACTIVE; + return ret; + } + } + } + + /* Update permissions, they may differ for inactive nodes */ + bdrv_get_cumulative_perm(bs, &perm, &shared_perm); + bdrv_check_perm(bs, perm, shared_perm, NULL, &error_abort); + bdrv_set_perm(bs, perm, shared_perm); + } + QLIST_FOREACH(child, &bs->children, next) { ret = bdrv_inactivate_recurse(child->bs, setting_flag); if (ret < 0) { @@ -3993,9 +4091,6 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs, } } - if (setting_flag) { - bs->open_flags |= BDRV_O_INACTIVE; - } return 0; } diff --git a/block/blkdebug.c b/block/blkdebug.c index 3c088934db..a5196e889d 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -1,6 +1,7 @@ /* * Block protocol for I/O error injection * + * Copyright (C) 2016-2017 Red Hat, Inc. * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com> * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -37,7 +38,12 @@ typedef struct BDRVBlkdebugState { int state; int new_state; - int align; + uint64_t align; + uint64_t max_transfer; + uint64_t opt_write_zero; + uint64_t max_write_zero; + uint64_t opt_discard; + uint64_t max_discard; /* For blkdebug_refresh_filename() */ char *config_file; @@ -342,6 +348,31 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_SIZE, .help = "Required alignment in bytes", }, + { + .name = "max-transfer", + .type = QEMU_OPT_SIZE, + .help = "Maximum transfer size in bytes", + }, + { + .name = "opt-write-zero", + .type = QEMU_OPT_SIZE, + .help = "Optimum write zero alignment in bytes", + }, + { + .name = "max-write-zero", + .type = QEMU_OPT_SIZE, + .help = "Maximum write zero size in bytes", + }, + { + .name = "opt-discard", + .type = QEMU_OPT_SIZE, + .help = "Optimum discard alignment in bytes", + }, + { + .name = "max-discard", + .type = QEMU_OPT_SIZE, + .help = "Maximum discard size in bytes", + }, { /* end of list */ } }, }; @@ -352,8 +383,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, BDRVBlkdebugState *s = bs->opaque; QemuOpts *opts; Error *local_err = NULL; - uint64_t align; int ret; + uint64_t align; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -382,19 +413,69 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, goto out; } - /* Set request alignment */ - align = qemu_opt_get_size(opts, "align", 0); - if (align < INT_MAX && is_power_of_2(align)) { - s->align = align; - } else if (align) { - error_setg(errp, "Invalid alignment"); - ret = -EINVAL; + bs->supported_write_flags = BDRV_REQ_FUA & + bs->file->bs->supported_write_flags; + bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & + bs->file->bs->supported_zero_flags; + ret = -EINVAL; + + /* Set alignment overrides */ + s->align = qemu_opt_get_size(opts, "align", 0); + if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) { + error_setg(errp, "Cannot meet constraints with align %" PRIu64, + s->align); goto out; } + align = MAX(s->align, bs->file->bs->bl.request_alignment); - ret = 0; - goto out; + s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0); + if (s->max_transfer && + (s->max_transfer >= INT_MAX || + !QEMU_IS_ALIGNED(s->max_transfer, align))) { + error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64, + s->max_transfer); + goto out; + } + + s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0); + if (s->opt_write_zero && + (s->opt_write_zero >= INT_MAX || + !QEMU_IS_ALIGNED(s->opt_write_zero, align))) { + error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64, + s->opt_write_zero); + goto out; + } + + s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0); + if (s->max_write_zero && + (s->max_write_zero >= INT_MAX || + !QEMU_IS_ALIGNED(s->max_write_zero, + MAX(s->opt_write_zero, align)))) { + error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64, + s->max_write_zero); + goto out; + } + + s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0); + if (s->opt_discard && + (s->opt_discard >= INT_MAX || + !QEMU_IS_ALIGNED(s->opt_discard, align))) { + error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64, + s->opt_discard); + goto out; + } + + s->max_discard = qemu_opt_get_size(opts, "max-discard", 0); + if (s->max_discard && + (s->max_discard >= INT_MAX || + !QEMU_IS_ALIGNED(s->max_discard, + MAX(s->opt_discard, align)))) { + error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64, + s->max_discard); + goto out; + } + ret = 0; out: if (ret < 0) { g_free(s->config_file); @@ -403,11 +484,30 @@ out: return ret; } -static int inject_error(BlockDriverState *bs, BlkdebugRule *rule) +static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes) { BDRVBlkdebugState *s = bs->opaque; - int error = rule->options.inject.error; - bool immediately = rule->options.inject.immediately; + BlkdebugRule *rule = NULL; + int error; + bool immediately; + + QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { + uint64_t inject_offset = rule->options.inject.offset; + + if (inject_offset == -1 || + (bytes && inject_offset >= offset && + inject_offset < offset + bytes)) + { + break; + } + } + + if (!rule || !rule->options.inject.error) { + return 0; + } + + immediately = rule->options.inject.immediately; + error = rule->options.inject.error; if (rule->options.inject.once) { QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next); @@ -426,21 +526,18 @@ static int coroutine_fn blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - BDRVBlkdebugState *s = bs->opaque; - BlkdebugRule *rule = NULL; - - QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { - uint64_t inject_offset = rule->options.inject.offset; + int err; - if (inject_offset == -1 || - (inject_offset >= offset && inject_offset < offset + bytes)) - { - break; - } + /* Sanity check block layer guarantees */ + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + if (bs->bl.max_transfer) { + assert(bytes <= bs->bl.max_transfer); } - if (rule && rule->options.inject.error) { - return inject_error(bs, rule); + err = rule_check(bs, offset, bytes); + if (err) { + return err; } return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); @@ -450,21 +547,18 @@ static int coroutine_fn blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - BDRVBlkdebugState *s = bs->opaque; - BlkdebugRule *rule = NULL; + int err; - QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { - uint64_t inject_offset = rule->options.inject.offset; - - if (inject_offset == -1 || - (inject_offset >= offset && inject_offset < offset + bytes)) - { - break; - } + /* Sanity check block layer guarantees */ + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + if (bs->bl.max_transfer) { + assert(bytes <= bs->bl.max_transfer); } - if (rule && rule->options.inject.error) { - return inject_error(bs, rule); + err = rule_check(bs, offset, bytes); + if (err) { + return err; } return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); @@ -472,22 +566,81 @@ blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, static int blkdebug_co_flush(BlockDriverState *bs) { - BDRVBlkdebugState *s = bs->opaque; - BlkdebugRule *rule = NULL; + int err = rule_check(bs, 0, 0); - QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { - if (rule->options.inject.offset == -1) { - break; - } + if (err) { + return err; } - if (rule && rule->options.inject.error) { - return inject_error(bs, rule); + return bdrv_co_flush(bs->file->bs); +} + +static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int count, + BdrvRequestFlags flags) +{ + uint32_t align = MAX(bs->bl.request_alignment, + bs->bl.pwrite_zeroes_alignment); + int err; + + /* Only pass through requests that are larger than requested + * preferred alignment (so that we test the fallback to writes on + * unaligned portions), and check that the block layer never hands + * us anything unaligned that crosses an alignment boundary. */ + if (count < align) { + assert(QEMU_IS_ALIGNED(offset, align) || + QEMU_IS_ALIGNED(offset + count, align) || + DIV_ROUND_UP(offset, align) == + DIV_ROUND_UP(offset + count, align)); + return -ENOTSUP; + } + assert(QEMU_IS_ALIGNED(offset, align)); + assert(QEMU_IS_ALIGNED(count, align)); + if (bs->bl.max_pwrite_zeroes) { + assert(count <= bs->bl.max_pwrite_zeroes); } - return bdrv_co_flush(bs->file->bs); + err = rule_check(bs, offset, count); + if (err) { + return err; + } + + return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags); } +static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, + int64_t offset, int count) +{ + uint32_t align = bs->bl.pdiscard_alignment; + int err; + + /* Only pass through requests that are larger than requested + * minimum alignment, and ensure that unaligned requests do not + * cross optimum discard boundaries. */ + if (count < bs->bl.request_alignment) { + assert(QEMU_IS_ALIGNED(offset, align) || + QEMU_IS_ALIGNED(offset + count, align) || + DIV_ROUND_UP(offset, align) == + DIV_ROUND_UP(offset + count, align)); + return -ENOTSUP; + } + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment)); + if (align && count >= align) { + assert(QEMU_IS_ALIGNED(offset, align)); + assert(QEMU_IS_ALIGNED(count, align)); + } + if (bs->bl.max_pdiscard) { + assert(count <= bs->bl.max_pdiscard); + } + + err = rule_check(bs, offset, count); + if (err) { + return err; + } + + return bdrv_co_pdiscard(bs->file->bs, offset, count); +} static void blkdebug_close(BlockDriverState *bs) { @@ -715,6 +868,21 @@ static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp) if (s->align) { bs->bl.request_alignment = s->align; } + if (s->max_transfer) { + bs->bl.max_transfer = s->max_transfer; + } + if (s->opt_write_zero) { + bs->bl.pwrite_zeroes_alignment = s->opt_write_zero; + } + if (s->max_write_zero) { + bs->bl.max_pwrite_zeroes = s->max_write_zero; + } + if (s->opt_discard) { + bs->bl.pdiscard_alignment = s->opt_discard; + } + if (s->max_discard) { + bs->bl.max_pdiscard = s->max_discard; + } } static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state, @@ -742,6 +910,8 @@ static BlockDriver bdrv_blkdebug = { .bdrv_co_preadv = blkdebug_co_preadv, .bdrv_co_pwritev = blkdebug_co_pwritev, .bdrv_co_flush_to_disk = blkdebug_co_flush, + .bdrv_co_pwrite_zeroes = blkdebug_co_pwrite_zeroes, + .bdrv_co_pdiscard = blkdebug_co_pdiscard, .bdrv_debug_event = blkdebug_debug_event, .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, diff --git a/block/block-backend.c b/block/block-backend.c index f5bf13eec9..f3a60081a7 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -130,6 +130,56 @@ static const char *blk_root_get_name(BdrvChild *child) return blk_name(child->opaque); } +/* + * Notifies the user of the BlockBackend that migration has completed. qdev + * devices can tighten their permissions in response (specifically revoke + * shared write permissions that we needed for storage migration). + * + * If an error is returned, the VM cannot be allowed to be resumed. + */ +static void blk_root_activate(BdrvChild *child, Error **errp) +{ + BlockBackend *blk = child->opaque; + Error *local_err = NULL; + + if (!blk->disable_perm) { + return; + } + + blk->disable_perm = false; + + blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk->disable_perm = true; + return; + } +} + +static int blk_root_inactivate(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + + if (blk->disable_perm) { + return 0; + } + + /* Only inactivate BlockBackends for guest devices (which are inactive at + * this point because the VM is stopped) and unattached monitor-owned + * BlockBackends. If there is still any other user like a block job, then + * we simply can't inactivate the image. */ + if (!blk->dev && !blk->name[0]) { + return -EPERM; + } + + blk->disable_perm = true; + if (blk->root) { + bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort); + } + + return 0; +} + static const BdrvChildRole child_root = { .inherit_options = blk_root_inherit_options, @@ -140,6 +190,9 @@ static const BdrvChildRole child_root = { .drained_begin = blk_root_drained_begin, .drained_end = blk_root_drained_end, + + .activate = blk_root_activate, + .inactivate = blk_root_inactivate, }; /* @@ -601,34 +654,6 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) *shared_perm = blk->shared_perm; } -/* - * Notifies the user of all BlockBackends that migration has completed. qdev - * devices can tighten their permissions in response (specifically revoke - * shared write permissions that we needed for storage migration). - * - * If an error is returned, the VM cannot be allowed to be resumed. - */ -void blk_resume_after_migration(Error **errp) -{ - BlockBackend *blk; - Error *local_err = NULL; - - for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { - if (!blk->disable_perm) { - continue; - } - - blk->disable_perm = false; - - blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); - if (local_err) { - error_propagate(errp, local_err); - blk->disable_perm = true; - return; - } - } -} - static int blk_do_attach_dev(BlockBackend *blk, void *dev) { if (blk->dev) { diff --git a/block/crypto.c b/block/crypto.c index 6828180840..10e5ddccaa 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -56,10 +56,10 @@ static int block_crypto_probe_generic(QCryptoBlockFormat format, static ssize_t block_crypto_read_func(QCryptoBlock *block, - void *opaque, size_t offset, uint8_t *buf, size_t buflen, + void *opaque, Error **errp) { BlockDriverState *bs = opaque; @@ -83,10 +83,10 @@ struct BlockCryptoCreateData { static ssize_t block_crypto_write_func(QCryptoBlock *block, - void *opaque, size_t offset, const uint8_t *buf, size_t buflen, + void *opaque, Error **errp) { struct BlockCryptoCreateData *data = opaque; @@ -102,8 +102,8 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block, static ssize_t block_crypto_init_func(QCryptoBlock *block, - void *opaque, size_t headerlen, + void *opaque, Error **errp) { struct BlockCryptoCreateData *data = opaque; diff --git a/block/curl.c b/block/curl.c index aa6e8cc0e5..2a244e2439 100644 --- a/block/curl.c +++ b/block/curl.c @@ -76,15 +76,12 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle, #define CURL_TIMEOUT_DEFAULT 5 #define CURL_TIMEOUT_MAX 10000 -#define FIND_RET_NONE 0 -#define FIND_RET_OK 1 -#define FIND_RET_WAIT 2 - #define CURL_BLOCK_OPT_URL "url" #define CURL_BLOCK_OPT_READAHEAD "readahead" #define CURL_BLOCK_OPT_SSLVERIFY "sslverify" #define CURL_BLOCK_OPT_TIMEOUT "timeout" #define CURL_BLOCK_OPT_COOKIE "cookie" +#define CURL_BLOCK_OPT_COOKIE_SECRET "cookie-secret" #define CURL_BLOCK_OPT_USERNAME "username" #define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret" #define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username" @@ -93,14 +90,17 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle, struct BDRVCURLState; typedef struct CURLAIOCB { - BlockAIOCB common; + Coroutine *co; QEMUIOVector *qiov; - int64_t sector_num; - int nb_sectors; + uint64_t offset; + uint64_t bytes; + int ret; size_t start; size_t end; + + QSIMPLEQ_ENTRY(CURLAIOCB) next; } CURLAIOCB; typedef struct CURLSocket { @@ -115,7 +115,7 @@ typedef struct CURLState CURL *curl; QLIST_HEAD(, CURLSocket) sockets; char *orig_buf; - size_t buf_start; + uint64_t buf_start; size_t buf_off; size_t buf_len; char range[128]; @@ -126,7 +126,7 @@ typedef struct CURLState typedef struct BDRVCURLState { CURLM *multi; QEMUTimer timer; - size_t len; + uint64_t len; CURLState states[CURL_NUM_STATES]; char *url; size_t readahead_size; @@ -136,6 +136,7 @@ typedef struct BDRVCURLState { bool accept_range; AioContext *aio_context; QemuMutex mutex; + QSIMPLEQ_HEAD(, CURLAIOCB) free_state_waitq; char *username; char *password; char *proxyusername; @@ -147,6 +148,7 @@ static void curl_multi_do(void *arg); static void curl_multi_read(void *arg); #ifdef NEED_CURL_TIMER_CALLBACK +/* Called from curl_multi_do_locked, with s->mutex held. */ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque) { BDRVCURLState *s = opaque; @@ -163,6 +165,7 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque) } #endif +/* Called from curl_multi_do_locked, with s->mutex held. */ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, void *userp, void *sp) { @@ -212,6 +215,7 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, return 0; } +/* Called from curl_multi_do_locked, with s->mutex held. */ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) { BDRVCURLState *s = opaque; @@ -226,6 +230,7 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) return realsize; } +/* Called from curl_multi_do_locked, with s->mutex held. */ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque) { CURLState *s = ((CURLState*)opaque); @@ -253,7 +258,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque) continue; if ((s->buf_off >= acb->end)) { - size_t request_length = acb->nb_sectors * BDRV_SECTOR_SIZE; + size_t request_length = acb->bytes; qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start, acb->end - acb->start); @@ -264,9 +269,11 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque) request_length - offset); } - acb->common.cb(acb->common.opaque, 0); - qemu_aio_unref(acb); + acb->ret = 0; s->acb[i] = NULL; + qemu_mutex_unlock(&s->s->mutex); + aio_co_wake(acb->co); + qemu_mutex_lock(&s->s->mutex); } } @@ -275,18 +282,19 @@ read_end: return size * nmemb; } -static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, - CURLAIOCB *acb) +/* Called with s->mutex held. */ +static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len, + CURLAIOCB *acb) { int i; - size_t end = start + len; - size_t clamped_end = MIN(end, s->len); - size_t clamped_len = clamped_end - start; + uint64_t end = start + len; + uint64_t clamped_end = MIN(end, s->len); + uint64_t clamped_len = clamped_end - start; for (i=0; i<CURL_NUM_STATES; i++) { CURLState *state = &s->states[i]; - size_t buf_end = (state->buf_start + state->buf_off); - size_t buf_fend = (state->buf_start + state->buf_len); + uint64_t buf_end = (state->buf_start + state->buf_off); + uint64_t buf_fend = (state->buf_start + state->buf_len); if (!state->orig_buf) continue; @@ -305,9 +313,8 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, if (clamped_len < len) { qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len); } - acb->common.cb(acb->common.opaque, 0); - - return FIND_RET_OK; + acb->ret = 0; + return true; } // Wait for unfinished chunks @@ -325,13 +332,13 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, for (j=0; j<CURL_NUM_ACB; j++) { if (!state->acb[j]) { state->acb[j] = acb; - return FIND_RET_WAIT; + return true; } } } } - return FIND_RET_NONE; + return false; } /* Called with s->mutex held. */ @@ -376,11 +383,11 @@ static void curl_multi_check_completion(BDRVCURLState *s) continue; } + acb->ret = -EIO; + state->acb[i] = NULL; qemu_mutex_unlock(&s->mutex); - acb->common.cb(acb->common.opaque, -EIO); + aio_co_wake(acb->co); qemu_mutex_lock(&s->mutex); - qemu_aio_unref(acb); - state->acb[i] = NULL; } } @@ -449,32 +456,28 @@ static void curl_multi_timeout_do(void *arg) #endif } -static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s) +/* Called with s->mutex held. */ +static CURLState *curl_find_state(BDRVCURLState *s) { CURLState *state = NULL; - int i, j; - - do { - for (i=0; i<CURL_NUM_STATES; i++) { - for (j=0; j<CURL_NUM_ACB; j++) - if (s->states[i].acb[j]) - continue; - if (s->states[i].in_use) - continue; + int i; + for (i = 0; i < CURL_NUM_STATES; i++) { + if (!s->states[i].in_use) { state = &s->states[i]; state->in_use = 1; break; } - if (!state) { - aio_poll(bdrv_get_aio_context(bs), true); - } - } while(!state); + } + return state; +} +static int curl_init_state(BDRVCURLState *s, CURLState *state) +{ if (!state->curl) { state->curl = curl_easy_init(); if (!state->curl) { - return NULL; + return -EIO; } curl_easy_setopt(state->curl, CURLOPT_URL, s->url); curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER, @@ -527,11 +530,18 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s) QLIST_INIT(&state->sockets); state->s = s; - return state; + return 0; } +/* Called with s->mutex held. */ static void curl_clean_state(CURLState *s) { + CURLAIOCB *next; + int j; + for (j = 0; j < CURL_NUM_ACB; j++) { + assert(!s->acb[j]); + } + if (s->s->multi) curl_multi_remove_handle(s->s->multi, s->curl); @@ -543,6 +553,14 @@ static void curl_clean_state(CURLState *s) } s->in_use = 0; + + next = QSIMPLEQ_FIRST(&s->s->free_state_waitq); + if (next) { + QSIMPLEQ_REMOVE_HEAD(&s->s->free_state_waitq, next); + qemu_mutex_unlock(&s->s->mutex); + aio_co_wake(next->co); + qemu_mutex_lock(&s->s->mutex); + } } static void curl_parse_filename(const char *filename, QDict *options, @@ -556,6 +574,7 @@ static void curl_detach_aio_context(BlockDriverState *bs) BDRVCURLState *s = bs->opaque; int i; + qemu_mutex_lock(&s->mutex); for (i = 0; i < CURL_NUM_STATES; i++) { if (s->states[i].in_use) { curl_clean_state(&s->states[i]); @@ -571,6 +590,7 @@ static void curl_detach_aio_context(BlockDriverState *bs) curl_multi_cleanup(s->multi); s->multi = NULL; } + qemu_mutex_unlock(&s->mutex); timer_del(&s->timer); } @@ -624,6 +644,11 @@ static QemuOptsList runtime_opts = { .help = "Pass the cookie or list of cookies with each request" }, { + .name = CURL_BLOCK_OPT_COOKIE_SECRET, + .type = QEMU_OPT_STRING, + .help = "ID of secret used as cookie passed with each request" + }, + { .name = CURL_BLOCK_OPT_USERNAME, .type = QEMU_OPT_STRING, .help = "Username for HTTP auth" @@ -657,6 +682,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; const char *file; const char *cookie; + const char *cookie_secret; double d; const char *secretid; const char *protocol_delimiter; @@ -668,6 +694,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, return -EROFS; } + qemu_mutex_init(&s->mutex); opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { @@ -693,7 +720,22 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true); cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE); - s->cookie = g_strdup(cookie); + cookie_secret = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE_SECRET); + + if (cookie && cookie_secret) { + error_setg(errp, + "curl driver cannot handle both cookie and cookie secret"); + goto out_noclean; + } + + if (cookie_secret) { + s->cookie = qcrypto_secret_lookup_as_utf8(cookie_secret, errp); + if (!s->cookie) { + goto out_noclean; + } + } else { + s->cookie = g_strdup(cookie); + } file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL); if (file == NULL) { @@ -736,14 +778,22 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, } DPRINTF("CURL: Opening %s\n", file); + QSIMPLEQ_INIT(&s->free_state_waitq); s->aio_context = bdrv_get_aio_context(bs); s->url = g_strdup(file); - state = curl_init_state(bs, s); - if (!state) + qemu_mutex_lock(&s->mutex); + state = curl_find_state(s); + qemu_mutex_unlock(&s->mutex); + if (!state) { goto out_noclean; + } // Get file size + if (curl_init_state(s, state) < 0) { + goto out; + } + s->accept_range = false; curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1); curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION, @@ -771,7 +821,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, } #endif - s->len = (size_t)d; + s->len = d; if ((!strncasecmp(s->url, "http://", strlen("http://")) || !strncasecmp(s->url, "https://", strlen("https://"))) @@ -780,13 +830,14 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, "Server does not support 'range' (byte ranges)."); goto out; } - DPRINTF("CURL: Size = %zd\n", s->len); + DPRINTF("CURL: Size = %" PRIu64 "\n", s->len); + qemu_mutex_lock(&s->mutex); curl_clean_state(state); + qemu_mutex_unlock(&s->mutex); curl_easy_cleanup(state->curl); state->curl = NULL; - qemu_mutex_init(&s->mutex); curl_attach_aio_context(bs, bdrv_get_aio_context(bs)); qemu_opts_del(opts); @@ -797,53 +848,51 @@ out: curl_easy_cleanup(state->curl); state->curl = NULL; out_noclean: + qemu_mutex_destroy(&s->mutex); g_free(s->cookie); g_free(s->url); qemu_opts_del(opts); return -EINVAL; } -static const AIOCBInfo curl_aiocb_info = { - .aiocb_size = sizeof(CURLAIOCB), -}; - - -static void curl_readv_bh_cb(void *p) +static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) { CURLState *state; int running; - int ret = -EINPROGRESS; - CURLAIOCB *acb = p; - BlockDriverState *bs = acb->common.bs; BDRVCURLState *s = bs->opaque; - size_t start = acb->sector_num * BDRV_SECTOR_SIZE; - size_t end; + uint64_t start = acb->offset; + uint64_t end; qemu_mutex_lock(&s->mutex); // In case we have the requested data already (e.g. read-ahead), // we can just call the callback and be done. - switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) { - case FIND_RET_OK: - qemu_aio_unref(acb); - // fall through - case FIND_RET_WAIT: - goto out; - default: - break; + if (curl_find_buf(s, start, acb->bytes, acb)) { + goto out; } // No cache found, so let's start a new request - state = curl_init_state(acb->common.bs, s); - if (!state) { - ret = -EIO; + for (;;) { + state = curl_find_state(s); + if (state) { + break; + } + QSIMPLEQ_INSERT_TAIL(&s->free_state_waitq, acb, next); + qemu_mutex_unlock(&s->mutex); + qemu_coroutine_yield(); + qemu_mutex_lock(&s->mutex); + } + + if (curl_init_state(s, state) < 0) { + curl_clean_state(state); + acb->ret = -EIO; goto out; } acb->start = 0; - acb->end = MIN(acb->nb_sectors * BDRV_SECTOR_SIZE, s->len - start); + acb->end = MIN(acb->bytes, s->len - start); state->buf_off = 0; g_free(state->orig_buf); @@ -853,14 +902,14 @@ static void curl_readv_bh_cb(void *p) state->orig_buf = g_try_malloc(state->buf_len); if (state->buf_len && state->orig_buf == NULL) { curl_clean_state(state); - ret = -ENOMEM; + acb->ret = -ENOMEM; goto out; } state->acb[0] = acb; - snprintf(state->range, 127, "%zd-%zd", start, end); - DPRINTF("CURL (AIO): Reading %llu at %zd (%s)\n", - (acb->nb_sectors * BDRV_SECTOR_SIZE), start, state->range); + snprintf(state->range, 127, "%" PRIu64 "-%" PRIu64, start, end); + DPRINTF("CURL (AIO): Reading %" PRIu64 " at %" PRIu64 " (%s)\n", + acb->bytes, start, state->range); curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range); curl_multi_add_handle(s->multi, state->curl); @@ -870,26 +919,24 @@ static void curl_readv_bh_cb(void *p) out: qemu_mutex_unlock(&s->mutex); - if (ret != -EINPROGRESS) { - acb->common.cb(acb->common.opaque, ret); - qemu_aio_unref(acb); - } } -static BlockAIOCB *curl_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) +static int coroutine_fn curl_co_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - CURLAIOCB *acb; - - acb = qemu_aio_get(&curl_aiocb_info, bs, cb, opaque); - - acb->qiov = qiov; - acb->sector_num = sector_num; - acb->nb_sectors = nb_sectors; - - aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb); - return &acb->common; + CURLAIOCB acb = { + .co = qemu_coroutine_self(), + .ret = -EINPROGRESS, + .qiov = qiov, + .offset = offset, + .bytes = bytes + }; + + curl_setup_preadv(bs, &acb); + while (acb.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + return acb.ret; } static void curl_close(BlockDriverState *bs) @@ -920,7 +967,7 @@ static BlockDriver bdrv_http = { .bdrv_close = curl_close, .bdrv_getlength = curl_getlength, - .bdrv_aio_readv = curl_aio_readv, + .bdrv_co_preadv = curl_co_preadv, .bdrv_detach_aio_context = curl_detach_aio_context, .bdrv_attach_aio_context = curl_attach_aio_context, @@ -936,7 +983,7 @@ static BlockDriver bdrv_https = { .bdrv_close = curl_close, .bdrv_getlength = curl_getlength, - .bdrv_aio_readv = curl_aio_readv, + .bdrv_co_preadv = curl_co_preadv, .bdrv_detach_aio_context = curl_detach_aio_context, .bdrv_attach_aio_context = curl_attach_aio_context, @@ -952,7 +999,7 @@ static BlockDriver bdrv_ftp = { .bdrv_close = curl_close, .bdrv_getlength = curl_getlength, - .bdrv_aio_readv = curl_aio_readv, + .bdrv_co_preadv = curl_co_preadv, .bdrv_detach_aio_context = curl_detach_aio_context, .bdrv_attach_aio_context = curl_attach_aio_context, @@ -968,7 +1015,7 @@ static BlockDriver bdrv_ftps = { .bdrv_close = curl_close, .bdrv_getlength = curl_getlength, - .bdrv_aio_readv = curl_aio_readv, + .bdrv_co_preadv = curl_co_preadv, .bdrv_detach_aio_context = curl_detach_aio_context, .bdrv_attach_aio_context = curl_attach_aio_context, diff --git a/block/file-posix.c b/block/file-posix.c index 19c48a043e..4354d49642 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -129,12 +129,23 @@ do { \ #define MAX_BLOCKSIZE 4096 +/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes, + * leaving a few more bytes for its future use. */ +#define RAW_LOCK_PERM_BASE 100 +#define RAW_LOCK_SHARED_BASE 200 + typedef struct BDRVRawState { int fd; + int lock_fd; + bool use_lock; int type; int open_flags; size_t buf_align; + /* The current permissions. */ + uint64_t perm; + uint64_t shared_perm; + #ifdef CONFIG_XFS bool is_xfs:1; #endif @@ -392,6 +403,11 @@ static QemuOptsList raw_runtime_opts = { .type = QEMU_OPT_STRING, .help = "host AIO implementation (threads, native)", }, + { + .name = "locking", + .type = QEMU_OPT_STRING, + .help = "file locking mode (on/off/auto, default: auto)", + }, { /* end of list */ } }, }; @@ -406,6 +422,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, BlockdevAioOptions aio, aio_default; int fd, ret; struct stat st; + OnOffAuto locking; opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -435,6 +452,37 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE); + locking = qapi_enum_parse(OnOffAuto_lookup, qemu_opt_get(opts, "locking"), + ON_OFF_AUTO__MAX, ON_OFF_AUTO_AUTO, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + switch (locking) { + case ON_OFF_AUTO_ON: + s->use_lock = true; +#ifndef F_OFD_SETLK + fprintf(stderr, + "File lock requested but OFD locking syscall is unavailable, " + "falling back to POSIX file locks.\n" + "Due to the implementation, locks can be lost unexpectedly.\n"); +#endif + break; + case ON_OFF_AUTO_OFF: + s->use_lock = false; + break; + case ON_OFF_AUTO_AUTO: +#ifdef F_OFD_SETLK + s->use_lock = true; +#else + s->use_lock = false; +#endif + break; + default: + abort(); + } + s->open_flags = open_flags; raw_parse_flags(bdrv_flags, &s->open_flags); @@ -450,6 +498,21 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } s->fd = fd; + s->lock_fd = -1; + if (s->use_lock) { + fd = qemu_open(filename, s->open_flags); + if (fd < 0) { + ret = -errno; + error_setg_errno(errp, errno, "Could not open '%s' for locking", + filename); + qemu_close(s->fd); + goto fail; + } + s->lock_fd = fd; + } + s->perm = 0; + s->shared_perm = BLK_PERM_ALL; + #ifdef CONFIG_LINUX_AIO /* Currently Linux does AIO only for files opened with O_DIRECT */ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) { @@ -537,6 +600,161 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, return raw_open_common(bs, options, flags, 0, errp); } +typedef enum { + RAW_PL_PREPARE, + RAW_PL_COMMIT, + RAW_PL_ABORT, +} RawPermLockOp; + +#define PERM_FOREACH(i) \ + for ((i) = 0; (1ULL << (i)) <= BLK_PERM_ALL; i++) + +/* Lock bytes indicated by @perm_lock_bits and @shared_perm_lock_bits in the + * file; if @unlock == true, also unlock the unneeded bytes. + * @shared_perm_lock_bits is the mask of all permissions that are NOT shared. + */ +static int raw_apply_lock_bytes(BDRVRawState *s, + uint64_t perm_lock_bits, + uint64_t shared_perm_lock_bits, + bool unlock, Error **errp) +{ + int ret; + int i; + + PERM_FOREACH(i) { + int off = RAW_LOCK_PERM_BASE + i; + if (perm_lock_bits & (1ULL << i)) { + ret = qemu_lock_fd(s->lock_fd, off, 1, false); + if (ret) { + error_setg(errp, "Failed to lock byte %d", off); + return ret; + } + } else if (unlock) { + ret = qemu_unlock_fd(s->lock_fd, off, 1); + if (ret) { + error_setg(errp, "Failed to unlock byte %d", off); + return ret; + } + } + } + PERM_FOREACH(i) { + int off = RAW_LOCK_SHARED_BASE + i; + if (shared_perm_lock_bits & (1ULL << i)) { + ret = qemu_lock_fd(s->lock_fd, off, 1, false); + if (ret) { + error_setg(errp, "Failed to lock byte %d", off); + return ret; + } + } else if (unlock) { + ret = qemu_unlock_fd(s->lock_fd, off, 1); + if (ret) { + error_setg(errp, "Failed to unlock byte %d", off); + return ret; + } + } + } + return 0; +} + +/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */ +static int raw_check_lock_bytes(BDRVRawState *s, + uint64_t perm, uint64_t shared_perm, + Error **errp) +{ + int ret; + int i; + + PERM_FOREACH(i) { + int off = RAW_LOCK_SHARED_BASE + i; + uint64_t p = 1ULL << i; + if (perm & p) { + ret = qemu_lock_fd_test(s->lock_fd, off, 1, true); + if (ret) { + char *perm_name = bdrv_perm_names(p); + error_setg(errp, + "Failed to get \"%s\" lock", + perm_name); + g_free(perm_name); + error_append_hint(errp, + "Is another process using the image?\n"); + return ret; + } + } + } + PERM_FOREACH(i) { + int off = RAW_LOCK_PERM_BASE + i; + uint64_t p = 1ULL << i; + if (!(shared_perm & p)) { + ret = qemu_lock_fd_test(s->lock_fd, off, 1, true); + if (ret) { + char *perm_name = bdrv_perm_names(p); + error_setg(errp, + "Failed to get shared \"%s\" lock", + perm_name); + g_free(perm_name); + error_append_hint(errp, + "Is another process using the image?\n"); + return ret; + } + } + } + return 0; +} + +static int raw_handle_perm_lock(BlockDriverState *bs, + RawPermLockOp op, + uint64_t new_perm, uint64_t new_shared, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + int ret = 0; + Error *local_err = NULL; + + if (!s->use_lock) { + return 0; + } + + if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) { + return 0; + } + + assert(s->lock_fd > 0); + + switch (op) { + case RAW_PL_PREPARE: + ret = raw_apply_lock_bytes(s, s->perm | new_perm, + ~s->shared_perm | ~new_shared, + false, errp); + if (!ret) { + ret = raw_check_lock_bytes(s, new_perm, new_shared, errp); + if (!ret) { + return 0; + } + } + op = RAW_PL_ABORT; + /* fall through to unlock bytes. */ + case RAW_PL_ABORT: + raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err); + if (local_err) { + /* Theoretically the above call only unlocks bytes and it cannot + * fail. Something weird happened, report it. + */ + error_report_err(local_err); + } + break; + case RAW_PL_COMMIT: + raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err); + if (local_err) { + /* Theoretically the above call only unlocks bytes and it cannot + * fail. Something weird happened, report it. + */ + error_report_err(local_err); + } + break; + } + return ret; +} + static int raw_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { @@ -1405,6 +1623,10 @@ static void raw_close(BlockDriverState *bs) qemu_close(s->fd); s->fd = -1; } + if (s->lock_fd >= 0) { + qemu_close(s->lock_fd); + s->lock_fd = -1; + } } static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp) @@ -1949,6 +2171,25 @@ static QemuOptsList raw_create_opts = { } }; +static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared, + Error **errp) +{ + return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp); +} + +static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared) +{ + BDRVRawState *s = bs->opaque; + raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL); + s->perm = perm; + s->shared_perm = shared; +} + +static void raw_abort_perm_update(BlockDriverState *bs) +{ + raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL); +} + BlockDriver bdrv_file = { .format_name = "file", .protocol_name = "file", @@ -1979,7 +2220,9 @@ BlockDriver bdrv_file = { .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, - + .bdrv_check_perm = raw_check_perm, + .bdrv_set_perm = raw_set_perm, + .bdrv_abort_perm_update = raw_abort_perm_update, .create_opts = &raw_create_opts, }; @@ -2438,6 +2681,9 @@ static BlockDriver bdrv_host_device = { .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, + .bdrv_check_perm = raw_check_perm, + .bdrv_set_perm = raw_set_perm, + .bdrv_abort_perm_update = raw_abort_perm_update, .bdrv_probe_blocksizes = hdev_probe_blocksizes, .bdrv_probe_geometry = hdev_probe_geometry, diff --git a/block/file-win32.c b/block/file-win32.c index d1eb0a14b2..8f14f0bdcd 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -344,6 +344,12 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + if (qdict_get_try_bool(options, "locking", false)) { + error_setg(errp, "locking=on is not supported on Windows"); + ret = -EINVAL; + goto fail; + } + filename = qemu_opt_get(opts, "filename"); use_aio = get_aio_option(opts, flags, &local_err); diff --git a/block/io.c b/block/io.c index 40bd94f323..fdd7485c22 100644 --- a/block/io.c +++ b/block/io.c @@ -1784,8 +1784,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID); - ret = bdrv_get_block_status(*file, ret >> BDRV_SECTOR_BITS, - *pnum, pnum, file); + ret = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS, + *pnum, pnum, file); goto out; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 100398c565..347d94b0d2 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -309,14 +309,19 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, uint64_t *l2_table, uint64_t stop_flags) { int i; + QCow2ClusterType first_cluster_type; uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; uint64_t first_entry = be64_to_cpu(l2_table[0]); uint64_t offset = first_entry & mask; - if (!offset) + if (!offset) { return 0; + } - assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL); + /* must be allocated */ + first_cluster_type = qcow2_get_cluster_type(first_entry); + assert(first_cluster_type == QCOW2_CLUSTER_NORMAL || + first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC); for (i = 0; i < nb_clusters; i++) { uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; @@ -328,14 +333,21 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, return i; } -static int count_contiguous_clusters_by_type(int nb_clusters, - uint64_t *l2_table, - int wanted_type) +/* + * Checks how many consecutive unallocated clusters in a given L2 + * table have the same cluster type. + */ +static int count_contiguous_clusters_unallocated(int nb_clusters, + uint64_t *l2_table, + QCow2ClusterType wanted_type) { int i; + assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN || + wanted_type == QCOW2_CLUSTER_UNALLOCATED); for (i = 0; i < nb_clusters; i++) { - int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i])); + uint64_t entry = be64_to_cpu(l2_table[i]); + QCow2ClusterType type = qcow2_get_cluster_type(entry); if (type != wanted_type) { break; @@ -487,6 +499,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int l1_bits, c; unsigned int offset_in_cluster; uint64_t bytes_available, bytes_needed, nb_clusters; + QCow2ClusterType type; int ret; offset_in_cluster = offset_into_cluster(s, offset); @@ -509,13 +522,13 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, l1_index = offset >> l1_bits; if (l1_index >= s->l1_size) { - ret = QCOW2_CLUSTER_UNALLOCATED; + type = QCOW2_CLUSTER_UNALLOCATED; goto out; } l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; if (!l2_offset) { - ret = QCOW2_CLUSTER_UNALLOCATED; + type = QCOW2_CLUSTER_UNALLOCATED; goto out; } @@ -544,38 +557,37 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, * true */ assert(nb_clusters <= INT_MAX); - ret = qcow2_get_cluster_type(*cluster_offset); - switch (ret) { + type = qcow2_get_cluster_type(*cluster_offset); + if (s->qcow_version < 3 && (type == QCOW2_CLUSTER_ZERO_PLAIN || + type == QCOW2_CLUSTER_ZERO_ALLOC)) { + qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" + " in pre-v3 image (L2 offset: %#" PRIx64 + ", L2 index: %#x)", l2_offset, l2_index); + ret = -EIO; + goto fail; + } + switch (type) { case QCOW2_CLUSTER_COMPRESSED: /* Compressed clusters can only be processed one by one */ c = 1; *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; break; - case QCOW2_CLUSTER_ZERO: - if (s->qcow_version < 3) { - qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" - " in pre-v3 image (L2 offset: %#" PRIx64 - ", L2 index: %#x)", l2_offset, l2_index); - ret = -EIO; - goto fail; - } - c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], - QCOW2_CLUSTER_ZERO); - *cluster_offset = 0; - break; + case QCOW2_CLUSTER_ZERO_PLAIN: case QCOW2_CLUSTER_UNALLOCATED: /* how many empty clusters ? */ - c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], - QCOW2_CLUSTER_UNALLOCATED); + c = count_contiguous_clusters_unallocated(nb_clusters, + &l2_table[l2_index], type); *cluster_offset = 0; break; + case QCOW2_CLUSTER_ZERO_ALLOC: case QCOW2_CLUSTER_NORMAL: /* how many allocated clusters ? */ c = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], QCOW_OFLAG_ZERO); + &l2_table[l2_index], QCOW_OFLAG_ZERO); *cluster_offset &= L2E_OFFSET_MASK; if (offset_into_cluster(s, *cluster_offset)) { - qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#" + qcow2_signal_corruption(bs, true, -1, -1, + "Cluster allocation offset %#" PRIx64 " unaligned (L2 offset: %#" PRIx64 ", L2 index: %#x)", *cluster_offset, l2_offset, l2_index); @@ -602,7 +614,7 @@ out: assert(bytes_available - offset_in_cluster <= UINT_MAX); *bytes = bytes_available - offset_in_cluster; - return ret; + return type; fail: qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); @@ -835,7 +847,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) * Don't discard clusters that reach a refcount of 0 (e.g. compressed * clusters), the next write will reuse them anyway. */ - if (j != 0) { + if (!m->keep_old_clusters && j != 0) { for (i = 0; i < j; i++) { qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1, QCOW2_DISCARD_NEVER); @@ -860,7 +872,7 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters, for (i = 0; i < nb_clusters; i++) { uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]); - int cluster_type = qcow2_get_cluster_type(l2_entry); + QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); switch(cluster_type) { case QCOW2_CLUSTER_NORMAL: @@ -870,7 +882,8 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters, break; case QCOW2_CLUSTER_UNALLOCATED: case QCOW2_CLUSTER_COMPRESSED: - case QCOW2_CLUSTER_ZERO: + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_ZERO_ALLOC: break; default: abort(); @@ -1132,8 +1145,9 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, uint64_t entry; uint64_t nb_clusters; int ret; + bool keep_old_clusters = false; - uint64_t alloc_cluster_offset; + uint64_t alloc_cluster_offset = 0; trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, *bytes); @@ -1170,31 +1184,54 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, * wrong with our code. */ assert(nb_clusters > 0); - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO_ALLOC && + (entry & QCOW_OFLAG_COPIED) && + (!*host_offset || + start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK))) + { + /* Try to reuse preallocated zero clusters; contiguous normal clusters + * would be fine, too, but count_cow_clusters() above has limited + * nb_clusters already to a range of COW clusters */ + int preallocated_nb_clusters = + count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], QCOW_OFLAG_COPIED); + assert(preallocated_nb_clusters > 0); - /* Allocate, if necessary at a given offset in the image file */ - alloc_cluster_offset = start_of_cluster(s, *host_offset); - ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, - &nb_clusters); - if (ret < 0) { - goto fail; - } + nb_clusters = preallocated_nb_clusters; + alloc_cluster_offset = entry & L2E_OFFSET_MASK; - /* Can't extend contiguous allocation */ - if (nb_clusters == 0) { - *bytes = 0; - return 0; + /* We want to reuse these clusters, so qcow2_alloc_cluster_link_l2() + * should not free them. */ + keep_old_clusters = true; } - /* !*host_offset would overwrite the image header and is reserved for "no - * host offset preferred". If 0 was a valid host offset, it'd trigger the - * following overlap check; do that now to avoid having an invalid value in - * *host_offset. */ + qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + if (!alloc_cluster_offset) { - ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset, - nb_clusters * s->cluster_size); - assert(ret < 0); - goto fail; + /* Allocate, if necessary at a given offset in the image file */ + alloc_cluster_offset = start_of_cluster(s, *host_offset); + ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, + &nb_clusters); + if (ret < 0) { + goto fail; + } + + /* Can't extend contiguous allocation */ + if (nb_clusters == 0) { + *bytes = 0; + return 0; + } + + /* !*host_offset would overwrite the image header and is reserved for + * "no host offset preferred". If 0 was a valid host offset, it'd + * trigger the following overlap check; do that now to avoid having an + * invalid value in *host_offset. */ + if (!alloc_cluster_offset) { + ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset, + nb_clusters * s->cluster_size); + assert(ret < 0); + goto fail; + } } /* @@ -1225,6 +1262,8 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, .offset = start_of_cluster(s, guest_offset), .nb_clusters = nb_clusters, + .keep_old_clusters = keep_old_clusters, + .cow_start = { .offset = 0, .nb_bytes = offset_into_cluster(s, guest_offset), @@ -1472,24 +1511,25 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, * but rather fall through to the backing file. */ switch (qcow2_get_cluster_type(old_l2_entry)) { - case QCOW2_CLUSTER_UNALLOCATED: - if (full_discard || !bs->backing) { - continue; - } - break; + case QCOW2_CLUSTER_UNALLOCATED: + if (full_discard || !bs->backing) { + continue; + } + break; - case QCOW2_CLUSTER_ZERO: - if (!full_discard) { - continue; - } - break; + case QCOW2_CLUSTER_ZERO_PLAIN: + if (!full_discard) { + continue; + } + break; - case QCOW2_CLUSTER_NORMAL: - case QCOW2_CLUSTER_COMPRESSED: - break; + case QCOW2_CLUSTER_ZERO_ALLOC: + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_COMPRESSED: + break; - default: - abort(); + default: + abort(); } /* First remove L2 entries */ @@ -1509,35 +1549,36 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, return nb_clusters; } -int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, - int nb_sectors, enum qcow2_discard_type type, bool full_discard) +int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, enum qcow2_discard_type type, + bool full_discard) { BDRVQcow2State *s = bs->opaque; - uint64_t end_offset; + uint64_t end_offset = offset + bytes; uint64_t nb_clusters; + int64_t cleared; int ret; - end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); - - /* The caller must cluster-align start; round end down except at EOF */ + /* Caller must pass aligned values, except at image end */ assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); - if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) { - end_offset = start_of_cluster(s, end_offset); - } + assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || + end_offset == bs->total_sectors << BDRV_SECTOR_BITS); - nb_clusters = size_to_clusters(s, end_offset - offset); + nb_clusters = size_to_clusters(s, bytes); s->cache_discards = true; /* Each L2 table is handled by its own loop iteration */ while (nb_clusters > 0) { - ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard); - if (ret < 0) { + cleared = discard_single_l2(bs, offset, nb_clusters, type, + full_discard); + if (cleared < 0) { + ret = cleared; goto fail; } - nb_clusters -= ret; - offset += (ret * s->cluster_size); + nb_clusters -= cleared; + offset += (cleared * s->cluster_size); } ret = 0; @@ -1561,6 +1602,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, int l2_index; int ret; int i; + bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP); ret = get_cluster_table(bs, offset, &l2_table, &l2_index); if (ret < 0) { @@ -1573,12 +1615,22 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, for (i = 0; i < nb_clusters; i++) { uint64_t old_offset; + QCow2ClusterType cluster_type; old_offset = be64_to_cpu(l2_table[l2_index + i]); - /* Update L2 entries */ + /* + * Minimize L2 changes if the cluster already reads back as + * zeroes with correct allocation. + */ + cluster_type = qcow2_get_cluster_type(old_offset); + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN || + (cluster_type == QCOW2_CLUSTER_ZERO_ALLOC && !unmap)) { + continue; + } + qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); - if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) { + if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) { l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); } else { @@ -1591,31 +1643,39 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, return nb_clusters; } -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, - int flags) +int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, int flags) { BDRVQcow2State *s = bs->opaque; + uint64_t end_offset = offset + bytes; uint64_t nb_clusters; + int64_t cleared; int ret; + /* Caller must pass aligned values, except at image end */ + assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); + assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || + end_offset == bs->total_sectors << BDRV_SECTOR_BITS); + /* The zero flag is only supported by version 3 and newer */ if (s->qcow_version < 3) { return -ENOTSUP; } /* Each L2 table is handled by its own loop iteration */ - nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); + nb_clusters = size_to_clusters(s, bytes); s->cache_discards = true; while (nb_clusters > 0) { - ret = zero_single_l2(bs, offset, nb_clusters, flags); - if (ret < 0) { + cleared = zero_single_l2(bs, offset, nb_clusters, flags); + if (cleared < 0) { + ret = cleared; goto fail; } - nb_clusters -= ret; - offset += (ret * s->cluster_size); + nb_clusters -= cleared; + offset += (cleared * s->cluster_size); } ret = 0; @@ -1699,14 +1759,14 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, for (j = 0; j < s->l2_size; j++) { uint64_t l2_entry = be64_to_cpu(l2_table[j]); int64_t offset = l2_entry & L2E_OFFSET_MASK; - int cluster_type = qcow2_get_cluster_type(l2_entry); - bool preallocated = offset != 0; + QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); - if (cluster_type != QCOW2_CLUSTER_ZERO) { + if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && + cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { continue; } - if (!preallocated) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { if (!bs->backing) { /* not backed; therefore we can simply deallocate the * cluster */ @@ -1741,7 +1801,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, "%#" PRIx64 " unaligned (L2 offset: %#" PRIx64 ", L2 index: %#x)", offset, l2_offset, j); - if (!preallocated) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { qcow2_free_clusters(bs, offset, s->cluster_size, QCOW2_DISCARD_ALWAYS); } @@ -1751,7 +1811,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); if (ret < 0) { - if (!preallocated) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { qcow2_free_clusters(bs, offset, s->cluster_size, QCOW2_DISCARD_ALWAYS); } @@ -1760,7 +1820,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0); if (ret < 0) { - if (!preallocated) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { qcow2_free_clusters(bs, offset, s->cluster_size, QCOW2_DISCARD_ALWAYS); } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 4efca7ebdb..7c06061aae 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1028,18 +1028,17 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, } break; case QCOW2_CLUSTER_NORMAL: - case QCOW2_CLUSTER_ZERO: - if (l2_entry & L2E_OFFSET_MASK) { - if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) { - qcow2_signal_corruption(bs, false, -1, -1, - "Cannot free unaligned cluster %#llx", - l2_entry & L2E_OFFSET_MASK); - } else { - qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, - nb_clusters << s->cluster_bits, type); - } + case QCOW2_CLUSTER_ZERO_ALLOC: + if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) { + qcow2_signal_corruption(bs, false, -1, -1, + "Cannot free unaligned cluster %#llx", + l2_entry & L2E_OFFSET_MASK); + } else { + qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, + nb_clusters << s->cluster_bits, type); } break; + case QCOW2_CLUSTER_ZERO_PLAIN: case QCOW2_CLUSTER_UNALLOCATED: break; default: @@ -1059,9 +1058,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset, int l1_size, int addend) { BDRVQcow2State *s = bs->opaque; - uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount; + uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount; bool l1_allocated = false; - int64_t old_offset, old_l2_offset; + int64_t old_entry, old_l2_offset; int i, j, l1_modified = 0, nb_csectors; int ret; @@ -1089,15 +1088,16 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, goto fail; } - for(i = 0;i < l1_size; i++) + for (i = 0; i < l1_size; i++) { be64_to_cpus(&l1_table[i]); + } } else { assert(l1_size == s->l1_size); l1_table = s->l1_table; l1_allocated = false; } - for(i = 0; i < l1_size; i++) { + for (i = 0; i < l1_size; i++) { l2_offset = l1_table[i]; if (l2_offset) { old_l2_offset = l2_offset; @@ -1117,81 +1117,79 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, goto fail; } - for(j = 0; j < s->l2_size; j++) { + for (j = 0; j < s->l2_size; j++) { uint64_t cluster_index; - - offset = be64_to_cpu(l2_table[j]); - old_offset = offset; - offset &= ~QCOW_OFLAG_COPIED; - - switch (qcow2_get_cluster_type(offset)) { - case QCOW2_CLUSTER_COMPRESSED: - nb_csectors = ((offset >> s->csize_shift) & - s->csize_mask) + 1; - if (addend != 0) { - ret = update_refcount(bs, - (offset & s->cluster_offset_mask) & ~511, + uint64_t offset; + + entry = be64_to_cpu(l2_table[j]); + old_entry = entry; + entry &= ~QCOW_OFLAG_COPIED; + offset = entry & L2E_OFFSET_MASK; + + switch (qcow2_get_cluster_type(entry)) { + case QCOW2_CLUSTER_COMPRESSED: + nb_csectors = ((entry >> s->csize_shift) & + s->csize_mask) + 1; + if (addend != 0) { + ret = update_refcount(bs, + (entry & s->cluster_offset_mask) & ~511, nb_csectors * 512, abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); - if (ret < 0) { - goto fail; - } - } - /* compressed clusters are never modified */ - refcount = 2; - break; - - case QCOW2_CLUSTER_NORMAL: - case QCOW2_CLUSTER_ZERO: - if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) { - qcow2_signal_corruption(bs, true, -1, -1, "Data " - "cluster offset %#llx " - "unaligned (L2 offset: %#" - PRIx64 ", L2 index: %#x)", - offset & L2E_OFFSET_MASK, - l2_offset, j); - ret = -EIO; + if (ret < 0) { goto fail; } + } + /* compressed clusters are never modified */ + refcount = 2; + break; + + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_ZERO_ALLOC: + if (offset_into_cluster(s, offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Cluster " + "allocation offset %#" PRIx64 + " unaligned (L2 offset: %#" + PRIx64 ", L2 index: %#x)", + offset, l2_offset, j); + ret = -EIO; + goto fail; + } - cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits; - if (!cluster_index) { - /* unallocated */ - refcount = 0; - break; - } - if (addend != 0) { - ret = qcow2_update_cluster_refcount(bs, + cluster_index = offset >> s->cluster_bits; + assert(cluster_index); + if (addend != 0) { + ret = qcow2_update_cluster_refcount(bs, cluster_index, abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); - if (ret < 0) { - goto fail; - } - } - - ret = qcow2_get_refcount(bs, cluster_index, &refcount); if (ret < 0) { goto fail; } - break; + } - case QCOW2_CLUSTER_UNALLOCATED: - refcount = 0; - break; + ret = qcow2_get_refcount(bs, cluster_index, &refcount); + if (ret < 0) { + goto fail; + } + break; + + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_UNALLOCATED: + refcount = 0; + break; - default: - abort(); + default: + abort(); } if (refcount == 1) { - offset |= QCOW_OFLAG_COPIED; + entry |= QCOW_OFLAG_COPIED; } - if (offset != old_offset) { + if (entry != old_entry) { if (addend > 0) { qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); } - l2_table[j] = cpu_to_be64(offset); + l2_table[j] = cpu_to_be64(entry); qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); } @@ -1441,12 +1439,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, } break; - case QCOW2_CLUSTER_ZERO: - if ((l2_entry & L2E_OFFSET_MASK) == 0) { - break; - } - /* fall through */ - + case QCOW2_CLUSTER_ZERO_ALLOC: case QCOW2_CLUSTER_NORMAL: { uint64_t offset = l2_entry & L2E_OFFSET_MASK; @@ -1476,6 +1469,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, break; } + case QCOW2_CLUSTER_ZERO_PLAIN: case QCOW2_CLUSTER_UNALLOCATED: break; @@ -1638,10 +1632,10 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, for (j = 0; j < s->l2_size; j++) { uint64_t l2_entry = be64_to_cpu(l2_table[j]); uint64_t data_offset = l2_entry & L2E_OFFSET_MASK; - int cluster_type = qcow2_get_cluster_type(l2_entry); + QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); - if ((cluster_type == QCOW2_CLUSTER_NORMAL) || - ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) { + if (cluster_type == QCOW2_CLUSTER_NORMAL || + cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) { ret = qcow2_get_refcount(bs, data_offset >> s->cluster_bits, &refcount); diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 032424322a..44243e0e95 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -440,10 +440,9 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) /* The VM state isn't needed any more in the active L1 table; in fact, it * hurts by causing expensive COW for the next snapshot. */ - qcow2_discard_clusters(bs, qcow2_vm_state_offset(s), - align_offset(sn->vm_state_size, s->cluster_size) - >> BDRV_SECTOR_BITS, - QCOW2_DISCARD_NEVER, false); + qcow2_cluster_discard(bs, qcow2_vm_state_offset(s), + align_offset(sn->vm_state_size, s->cluster_size), + QCOW2_DISCARD_NEVER, false); #ifdef DEBUG_ALLOC { diff --git a/block/qcow2.c b/block/qcow2.c index 1c2697732b..a8d61f0981 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1385,7 +1385,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, *file = bs->file->bs; status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; } - if (ret == QCOW2_CLUSTER_ZERO) { + if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) { status |= BDRV_BLOCK_ZERO; } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { status |= BDRV_BLOCK_DATA; @@ -1482,7 +1482,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, } break; - case QCOW2_CLUSTER_ZERO: + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_ZERO_ALLOC: qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); break; @@ -2139,7 +2140,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, * too, as long as the bulk is allocated here). Therefore, using * floating point arithmetic is fine. */ int64_t meta_size = 0; - uint64_t nreftablee, nrefblocke, nl1e, nl2e; + uint64_t nreftablee, nrefblocke, nl1e, nl2e, refblock_count; int64_t aligned_total_size = align_offset(total_size, cluster_size); int refblock_bits, refblock_size; /* refcount entry size in bytes */ @@ -2182,11 +2183,12 @@ static int qcow2_create2(const char *filename, int64_t total_size, nrefblocke = (aligned_total_size + meta_size + cluster_size) / (cluster_size - rces - rces * sizeof(uint64_t) / cluster_size); - meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size; + refblock_count = DIV_ROUND_UP(nrefblocke, refblock_size); + meta_size += refblock_count * cluster_size; /* total size of refcount tables */ - nreftablee = nrefblocke / refblock_size; - nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t)); + nreftablee = align_offset(refblock_count, + cluster_size / sizeof(uint64_t)); meta_size += nreftablee * sizeof(uint64_t); qemu_opt_set_number(opts, BLOCK_OPT_SIZE, @@ -2449,6 +2451,10 @@ static bool is_zero_sectors(BlockDriverState *bs, int64_t start, BlockDriverState *file; int64_t res; + if (start + count > bs->total_sectors) { + count = bs->total_sectors - start; + } + if (!count) { return true; } @@ -2467,6 +2473,9 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, uint32_t tail = (offset + count) % s->cluster_size; trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count); + if (offset + count == bs->total_sectors * BDRV_SECTOR_SIZE) { + tail = 0; + } if (head || tail) { int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS; @@ -2490,7 +2499,9 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, count = s->cluster_size; nr = s->cluster_size; ret = qcow2_get_cluster_offset(bs, offset, &nr, &off); - if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) { + if (ret != QCOW2_CLUSTER_UNALLOCATED && + ret != QCOW2_CLUSTER_ZERO_PLAIN && + ret != QCOW2_CLUSTER_ZERO_ALLOC) { qemu_co_mutex_unlock(&s->lock); return -ENOTSUP; } @@ -2501,7 +2512,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count); /* Whatever is left can use real zero clusters */ - ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags); + ret = qcow2_cluster_zeroize(bs, offset, count, flags); qemu_co_mutex_unlock(&s->lock); return ret; @@ -2524,8 +2535,8 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, } qemu_co_mutex_lock(&s->lock); - ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS, - QCOW2_DISCARD_REQUEST, false); + ret = qcow2_cluster_discard(bs, offset, count, QCOW2_DISCARD_REQUEST, + false); qemu_co_mutex_unlock(&s->lock); return ret; } @@ -2832,9 +2843,8 @@ fail: static int qcow2_make_empty(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; - uint64_t start_sector; - int sector_step = (QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size) / - BDRV_SECTOR_SIZE); + uint64_t offset, end_offset; + int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size); int l1_clusters, ret = 0; l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); @@ -2851,18 +2861,15 @@ static int qcow2_make_empty(BlockDriverState *bs) /* This fallback code simply discards every active cluster; this is slow, * but works in all cases */ - for (start_sector = 0; start_sector < bs->total_sectors; - start_sector += sector_step) - { + end_offset = bs->total_sectors * BDRV_SECTOR_SIZE; + for (offset = 0; offset < end_offset; offset += step) { /* As this function is generally used after committing an external * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the * default action for this kind of discard is to pass the discard, * which will ideally result in an actually smaller image file, as * is probably desired. */ - ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE, - MIN(sector_step, - bs->total_sectors - start_sector), - QCOW2_DISCARD_SNAPSHOT, true); + ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset), + QCOW2_DISCARD_SNAPSHOT, true); if (ret < 0) { break; } diff --git a/block/qcow2.h b/block/qcow2.h index f8aeb08794..1801dc30dc 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -322,6 +322,9 @@ typedef struct QCowL2Meta /** Number of newly allocated clusters */ int nb_clusters; + /** Do not free the old clusters */ + bool keep_old_clusters; + /** * Requests that overlap with this allocation and wait to be restarted * when the allocating request has completed. @@ -346,12 +349,13 @@ typedef struct QCowL2Meta QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; -enum { +typedef enum QCow2ClusterType { QCOW2_CLUSTER_UNALLOCATED, + QCOW2_CLUSTER_ZERO_PLAIN, + QCOW2_CLUSTER_ZERO_ALLOC, QCOW2_CLUSTER_NORMAL, QCOW2_CLUSTER_COMPRESSED, - QCOW2_CLUSTER_ZERO -}; +} QCow2ClusterType; typedef enum QCow2MetadataOverlap { QCOW2_OL_MAIN_HEADER_BITNR = 0, @@ -440,12 +444,15 @@ static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s) return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits; } -static inline int qcow2_get_cluster_type(uint64_t l2_entry) +static inline QCow2ClusterType qcow2_get_cluster_type(uint64_t l2_entry) { if (l2_entry & QCOW_OFLAG_COMPRESSED) { return QCOW2_CLUSTER_COMPRESSED; } else if (l2_entry & QCOW_OFLAG_ZERO) { - return QCOW2_CLUSTER_ZERO; + if (l2_entry & L2E_OFFSET_MASK) { + return QCOW2_CLUSTER_ZERO_ALLOC; + } + return QCOW2_CLUSTER_ZERO_PLAIN; } else if (!(l2_entry & L2E_OFFSET_MASK)) { return QCOW2_CLUSTER_UNALLOCATED; } else { @@ -544,10 +551,11 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, int compressed_size); int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); -int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, - int nb_sectors, enum qcow2_discard_type type, bool full_discard); -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, - int flags); +int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, enum qcow2_discard_type type, + bool full_discard); +int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, int flags); int qcow2_expand_zero_clusters(BlockDriverState *bs, BlockDriverAmendStatusCB *status_cb, diff --git a/block/replication.c b/block/replication.c index d300c15475..3885f04c31 100644 --- a/block/replication.c +++ b/block/replication.c @@ -22,9 +22,17 @@ #include "qapi/error.h" #include "replication.h" +typedef enum { + BLOCK_REPLICATION_NONE, /* block replication is not started */ + BLOCK_REPLICATION_RUNNING, /* block replication is running */ + BLOCK_REPLICATION_FAILOVER, /* failover is running in background */ + BLOCK_REPLICATION_FAILOVER_FAILED, /* failover failed */ + BLOCK_REPLICATION_DONE, /* block replication is done */ +} ReplicationStage; + typedef struct BDRVReplicationState { ReplicationMode mode; - int replication_state; + ReplicationStage stage; BdrvChild *active_disk; BdrvChild *hidden_disk; BdrvChild *secondary_disk; @@ -36,14 +44,6 @@ typedef struct BDRVReplicationState { int error; } BDRVReplicationState; -enum { - BLOCK_REPLICATION_NONE, /* block replication is not started */ - BLOCK_REPLICATION_RUNNING, /* block replication is running */ - BLOCK_REPLICATION_FAILOVER, /* failover is running in background */ - BLOCK_REPLICATION_FAILOVER_FAILED, /* failover failed */ - BLOCK_REPLICATION_DONE, /* block replication is done */ -}; - static void replication_start(ReplicationState *rs, ReplicationMode mode, Error **errp); static void replication_do_checkpoint(ReplicationState *rs, Error **errp); @@ -141,10 +141,10 @@ static void replication_close(BlockDriverState *bs) { BDRVReplicationState *s = bs->opaque; - if (s->replication_state == BLOCK_REPLICATION_RUNNING) { + if (s->stage == BLOCK_REPLICATION_RUNNING) { replication_stop(s->rs, false, NULL); } - if (s->replication_state == BLOCK_REPLICATION_FAILOVER) { + if (s->stage == BLOCK_REPLICATION_FAILOVER) { block_job_cancel_sync(s->active_disk->bs->job); } @@ -174,7 +174,7 @@ static int64_t replication_getlength(BlockDriverState *bs) static int replication_get_io_status(BDRVReplicationState *s) { - switch (s->replication_state) { + switch (s->stage) { case BLOCK_REPLICATION_NONE: return -EIO; case BLOCK_REPLICATION_RUNNING: @@ -403,7 +403,7 @@ static void backup_job_completed(void *opaque, int ret) BlockDriverState *bs = opaque; BDRVReplicationState *s = bs->opaque; - if (s->replication_state != BLOCK_REPLICATION_FAILOVER) { + if (s->stage != BLOCK_REPLICATION_FAILOVER) { /* The backup job is cancelled unexpectedly */ s->error = -EIO; } @@ -445,7 +445,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, aio_context_acquire(aio_context); s = bs->opaque; - if (s->replication_state != BLOCK_REPLICATION_NONE) { + if (s->stage != BLOCK_REPLICATION_NONE) { error_setg(errp, "Block replication is running or done"); aio_context_release(aio_context); return; @@ -545,7 +545,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, abort(); } - s->replication_state = BLOCK_REPLICATION_RUNNING; + s->stage = BLOCK_REPLICATION_RUNNING; if (s->mode == REPLICATION_MODE_SECONDARY) { secondary_do_checkpoint(s, errp); @@ -581,7 +581,7 @@ static void replication_get_error(ReplicationState *rs, Error **errp) aio_context_acquire(aio_context); s = bs->opaque; - if (s->replication_state != BLOCK_REPLICATION_RUNNING) { + if (s->stage != BLOCK_REPLICATION_RUNNING) { error_setg(errp, "Block replication is not running"); aio_context_release(aio_context); return; @@ -601,7 +601,7 @@ static void replication_done(void *opaque, int ret) BDRVReplicationState *s = bs->opaque; if (ret == 0) { - s->replication_state = BLOCK_REPLICATION_DONE; + s->stage = BLOCK_REPLICATION_DONE; /* refresh top bs's filename */ bdrv_refresh_filename(bs); @@ -610,7 +610,7 @@ static void replication_done(void *opaque, int ret) s->hidden_disk = NULL; s->error = 0; } else { - s->replication_state = BLOCK_REPLICATION_FAILOVER_FAILED; + s->stage = BLOCK_REPLICATION_FAILOVER_FAILED; s->error = -EIO; } } @@ -625,7 +625,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) aio_context_acquire(aio_context); s = bs->opaque; - if (s->replication_state != BLOCK_REPLICATION_RUNNING) { + if (s->stage != BLOCK_REPLICATION_RUNNING) { error_setg(errp, "Block replication is not running"); aio_context_release(aio_context); return; @@ -633,7 +633,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) switch (s->mode) { case REPLICATION_MODE_PRIMARY: - s->replication_state = BLOCK_REPLICATION_DONE; + s->stage = BLOCK_REPLICATION_DONE; s->error = 0; break; case REPLICATION_MODE_SECONDARY: @@ -648,12 +648,12 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) if (!failover) { secondary_do_checkpoint(s, errp); - s->replication_state = BLOCK_REPLICATION_DONE; + s->stage = BLOCK_REPLICATION_DONE; aio_context_release(aio_context); return; } - s->replication_state = BLOCK_REPLICATION_FAILOVER; + s->stage = BLOCK_REPLICATION_FAILOVER; commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs, BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT, NULL, replication_done, bs, true, errp); diff --git a/blockdev.c b/blockdev.c index 0b38c3df71..c63f4e82c7 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2923,10 +2923,9 @@ void qmp_block_resize(bool has_device, const char *device, goto out; } - /* complete all in-flight operations before resizing the device */ - bdrv_drain_all(); - + bdrv_drained_begin(bs); ret = blk_truncate(blk, size, errp); + bdrv_drained_end(bs); out: blk_unref(blk); @@ -3151,6 +3150,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error *local_err = NULL; int flags; int64_t size; + bool set_backing_hd = false; if (!backup->has_speed) { backup->speed = 0; @@ -3201,6 +3201,8 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, } if (backup->sync == MIRROR_SYNC_MODE_NONE) { source = bs; + flags |= BDRV_O_NO_BACKING; + set_backing_hd = true; } size = bdrv_getlength(bs); @@ -3227,7 +3229,9 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, } if (backup->format) { - options = qdict_new(); + if (!options) { + options = qdict_new(); + } qdict_put_str(options, "driver", backup->format); } @@ -3238,6 +3242,14 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, bdrv_set_aio_context(target_bs, aio_context); + if (set_backing_hd) { + bdrv_set_backing_hd(target_bs, source, &local_err); + if (local_err) { + bdrv_unref(target_bs); + goto out; + } + } + if (backup->has_bitmap) { bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); if (!bmap) { diff --git a/bsd-user/main.c b/bsd-user/main.c index 714a692e6f..04f95ddd54 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -744,10 +744,7 @@ int main(int argc, char **argv) qemu_init_cpu_list(); module_call_init(MODULE_INIT_QOM); - if ((envlist = envlist_create()) == NULL) { - (void) fprintf(stderr, "Unable to allocate envlist\n"); - exit(1); - } + envlist = envlist_create(); /* add current environment into the list */ for (wrk = environ; *wrk != NULL; wrk++) { @@ -785,10 +782,7 @@ int main(int argc, char **argv) usage(); } else if (!strcmp(r, "ignore-environment")) { envlist_free(envlist); - if ((envlist = envlist_create()) == NULL) { - (void) fprintf(stderr, "Unable to allocate envlist\n"); - exit(1); - } + envlist = envlist_create(); } else if (!strcmp(r, "U")) { r = argv[optind++]; if (envlist_unsetenv(envlist, r) != 0) @@ -956,10 +950,10 @@ int main(int argc, char **argv) } for (wrk = target_environ; *wrk; wrk++) { - free(*wrk); + g_free(*wrk); } - free(target_environ); + g_free(target_environ); if (qemu_loglevel_mask(CPU_LOG_PAGE)) { qemu_log("guest_base 0x%lx\n", guest_base); @@ -611,6 +611,7 @@ NetBSD) audio_possible_drivers="oss sdl" oss_lib="-lossaudio" HOST_VARIANT_DIR="netbsd" + supported_os="yes" ;; OpenBSD) bsd="yes" @@ -1334,7 +1335,7 @@ Advanced options (experts only): --oss-lib path to OSS library --cpu=CPU Build for host CPU [$cpu] --with-coroutine=BACKEND coroutine backend. Supported options: - gthread, ucontext, sigaltstack, windows + ucontext, sigaltstack, windows --enable-gcov enable test coverage analysis with gcov --gcov=GCOV use specified gcov [$gcov_tool] --disable-blobs disable installing provided firmware blobs @@ -2014,7 +2015,7 @@ if test "$xen" != "no" ; then else xen_libs="-lxenstore -lxenctrl -lxenguest" - xen_stable_libs="-lxencall -lxenforeignmemory -lxengnttab -lxenevtchn" + xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn" # First we test whether Xen headers and libraries are available. # If no, we are done and there is no Xen support. @@ -4418,10 +4419,8 @@ fi # check and set a backend for coroutine # We prefer ucontext, but it's not always possible. The fallback -# is sigcontext. gthread is not selectable except explicitly, because -# it is not functional enough to run QEMU proper. (It is occasionally -# useful for debugging purposes.) On Windows the only valid backend -# is the Windows-specific one. +# is sigcontext. On Windows the only valid backend is the Windows +# specific one. ucontext_works=no if test "$darwin" != "yes"; then @@ -4460,7 +4459,7 @@ else feature_not_found "ucontext" fi ;; - gthread|sigaltstack) + sigaltstack) if test "$mingw32" = "yes"; then error_exit "only the 'windows' coroutine backend is valid for Windows" fi @@ -4472,14 +4471,7 @@ else fi if test "$coroutine_pool" = ""; then - if test "$coroutine" = "gthread"; then - coroutine_pool=no - else - coroutine_pool=yes - fi -fi -if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then - error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)" + coroutine_pool=yes fi if test "$debug_stack_usage" = "yes"; then @@ -6110,12 +6102,14 @@ case "$target_name" in ppc64) TARGET_BASE_ARCH=ppc TARGET_ABI_DIR=ppc + mttcg=yes gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml" ;; ppc64le) TARGET_ARCH=ppc64 TARGET_BASE_ARCH=ppc TARGET_ABI_DIR=ppc + mttcg=yes gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml" ;; ppc64abi32) @@ -50,6 +50,7 @@ #include "qapi-event.h" #include "hw/nmi.h" #include "sysemu/replay.h" +#include "hw/boards.h" #ifdef CONFIG_LINUX @@ -1483,6 +1484,12 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) /* Ignore everything else? */ break; } + } else if (cpu->unplug) { + qemu_tcg_destroy_vcpu(cpu); + cpu->created = false; + qemu_cond_signal(&qemu_cpu_cond); + qemu_mutex_unlock_iothread(); + return NULL; } atomic_mb_set(&cpu->exit_request, 0); @@ -1859,6 +1866,8 @@ void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg) CpuInfoList *qmp_query_cpus(Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(ms); CpuInfoList *head = NULL, *cur_item = NULL; CPUState *cpu; @@ -1909,6 +1918,13 @@ CpuInfoList *qmp_query_cpus(Error **errp) #else info->value->arch = CPU_INFO_ARCH_OTHER; #endif + info->value->has_props = !!mc->cpu_index_to_instance_props; + if (info->value->has_props) { + CpuInstanceProperties *props; + props = g_malloc0(sizeof(*props)); + *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index); + info->value->props = props; + } /* XXX: waiting for the qapi to support GSList */ if (!cur_item) { @@ -930,7 +930,13 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, tlb_addr = tlbe->addr_write; } - /* Notice an IO access, or a notdirty page. */ + /* Check notdirty */ + if (unlikely(tlb_addr & TLB_NOTDIRTY)) { + tlb_set_dirty(ENV_GET_CPU(env), addr); + tlb_addr = tlb_addr & ~TLB_NOTDIRTY; + } + + /* Notice an IO access */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { /* There's really nothing that can be done to support this apart from stop-the-world. */ diff --git a/crypto/block-luks.c b/crypto/block-luks.c index d5a31bbaeb..2b97d89a69 100644 --- a/crypto/block-luks.c +++ b/crypto/block-luks.c @@ -473,9 +473,9 @@ qcrypto_block_luks_load_key(QCryptoBlock *block, * then encrypted. */ rv = readfunc(block, - opaque, slot->key_offset * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, splitkey, splitkeylen, + opaque, errp); if (rv < 0) { goto cleanup; @@ -676,9 +676,10 @@ qcrypto_block_luks_open(QCryptoBlock *block, /* Read the entire LUKS header, minus the key material from * the underlying device */ - rv = readfunc(block, opaque, 0, + rv = readfunc(block, 0, (uint8_t *)&luks->header, sizeof(luks->header), + opaque, errp); if (rv < 0) { ret = rv; @@ -1245,7 +1246,7 @@ qcrypto_block_luks_create(QCryptoBlock *block, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; /* Reserve header space to match payload offset */ - initfunc(block, opaque, block->payload_offset, &local_err); + initfunc(block, block->payload_offset, opaque, &local_err); if (local_err) { error_propagate(errp, local_err); goto error; @@ -1267,9 +1268,10 @@ qcrypto_block_luks_create(QCryptoBlock *block, /* Write out the partition header and key slot headers */ - writefunc(block, opaque, 0, + writefunc(block, 0, (const uint8_t *)&luks->header, sizeof(luks->header), + opaque, &local_err); /* Delay checking local_err until we've byte-swapped */ @@ -1295,10 +1297,11 @@ qcrypto_block_luks_create(QCryptoBlock *block, /* Write out the master key material, starting at the * sector immediately following the partition header. */ - if (writefunc(block, opaque, + if (writefunc(block, luks->header.key_slots[0].key_offset * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, splitkey, splitkeylen, + opaque, errp) != splitkeylen) { goto error; } diff --git a/crypto/init.c b/crypto/init.c index f65207e57d..f131c42306 100644 --- a/crypto/init.c +++ b/crypto/init.c @@ -32,6 +32,8 @@ #include <gcrypt.h> #endif +#include "crypto/random.h" + /* #define DEBUG_GNUTLS */ /* @@ -146,5 +148,9 @@ int qcrypto_init(Error **errp) gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); #endif + if (qcrypto_random_init(errp) < 0) { + return -1; + } + return 0; } diff --git a/crypto/random-gcrypt.c b/crypto/random-gcrypt.c index 0de9a096df..9f1c9ee60e 100644 --- a/crypto/random-gcrypt.c +++ b/crypto/random-gcrypt.c @@ -31,3 +31,5 @@ int qcrypto_random_bytes(uint8_t *buf, gcry_randomize(buf, buflen, GCRY_STRONG_RANDOM); return 0; } + +int qcrypto_random_init(Error **errp G_GNUC_UNUSED) { return 0; } diff --git a/crypto/random-gnutls.c b/crypto/random-gnutls.c index 04b45a8f8f..5350003a0b 100644 --- a/crypto/random-gnutls.c +++ b/crypto/random-gnutls.c @@ -41,3 +41,6 @@ int qcrypto_random_bytes(uint8_t *buf, return 0; } + + +int qcrypto_random_init(Error **errp G_GNUC_UNUSED) { return 0; } diff --git a/crypto/random-platform.c b/crypto/random-platform.c index 82b755afad..92eed0ee78 100644 --- a/crypto/random-platform.c +++ b/crypto/random-platform.c @@ -22,14 +22,16 @@ #include "crypto/random.h" -int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED, - size_t buflen G_GNUC_UNUSED, - Error **errp) -{ - int fd; - int ret = -1; - int got; +#ifdef _WIN32 +#include <wincrypt.h> +static HCRYPTPROV hCryptProv; +#else +static int fd; /* a file handle to either /dev/urandom or /dev/random */ +#endif +int qcrypto_random_init(Error **errp) +{ +#ifndef _WIN32 /* TBD perhaps also add support for BSD getentropy / Linux * getrandom syscalls directly */ fd = open("/dev/urandom", O_RDONLY); @@ -41,6 +43,25 @@ int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED, error_setg(errp, "No /dev/urandom or /dev/random found"); return -1; } +#else + if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, + CRYPT_SILENT | CRYPT_VERIFYCONTEXT)) { + error_setg_win32(errp, GetLastError(), + "Unable to create cryptographic provider"); + return -1; + } +#endif + + return 0; +} + +int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED, + size_t buflen G_GNUC_UNUSED, + Error **errp) +{ +#ifndef _WIN32 + int ret = -1; + int got; while (buflen > 0) { got = read(fd, buf, buflen); @@ -59,6 +80,14 @@ int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED, ret = 0; cleanup: - close(fd); return ret; +#else + if (!CryptGenRandom(hCryptProv, buflen, buf)) { + error_setg_win32(errp, GetLastError(), + "Unable to read random bytes"); + return -1; + } + + return 0; +#endif } diff --git a/device_tree.c b/device_tree.c index 6e06320830..a24ddff02b 100644 --- a/device_tree.c +++ b/device_tree.c @@ -148,6 +148,7 @@ static void read_fstree(void *fdt, const char *dirname) d = opendir(dirname); if (!d) { error_setg(&error_fatal, "%s cannot open %s", __func__, dirname); + return; } while ((de = readdir(d)) != NULL) { diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt index b059405e0e..4274fe9f25 100644 --- a/docs/qdev-device-use.txt +++ b/docs/qdev-device-use.txt @@ -182,15 +182,13 @@ The appropriate DEVNAME depends on the machine type. For type "pc": This lets you control I/O ports and IRQs. -* -usbdevice serial:vendorid=VID,productid=PRID becomes - -device usb-serial,vendorid=VID,productid=PRID +* -usbdevice serial::chardev becomes -device usb-serial,chardev=dev. * -usbdevice braille doesn't support LEGACY-CHARDEV syntax. It always uses "braille". With -device, this useful default is gone, so you have to use something like - -device usb-braille,chardev=braille,vendorid=VID,productid=PRID - -chardev braille,id=braille + -device usb-braille,chardev=braille -chardev braille,id=braille * -virtioconsole becomes -device virtio-serial-pci,class=C,vectors=V,ioeventfd=IOEVENTFD,max_ports=N @@ -2119,10 +2119,10 @@ void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr) * In that case just map until the end of the page. */ if (block->offset == 0) { - return xen_map_cache(addr, 0, 0); + return xen_map_cache(addr, 0, 0, false); } - block->host = xen_map_cache(block->offset, block->max_length, 1); + block->host = xen_map_cache(block->offset, block->max_length, 1, false); } return ramblock_ptr(block, addr); } @@ -2152,10 +2152,10 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr, * In that case just map the requested area. */ if (block->offset == 0) { - return xen_map_cache(addr, *size, 1); + return xen_map_cache(addr, *size, 1, true); } - block->host = xen_map_cache(block->offset, block->max_length, 1); + block->host = xen_map_cache(block->offset, block->max_length, 1, true); } return ramblock_ptr(block, addr); diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index f3ebca4f7a..a2486566af 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -452,6 +452,11 @@ static off_t local_telldir(FsContext *ctx, V9fsFidOpenState *fs) return telldir(fs->dir.stream); } +static bool local_is_mapped_file_metadata(FsContext *fs_ctx, const char *name) +{ + return !strcmp(name, VIRTFS_META_DIR); +} + static struct dirent *local_readdir(FsContext *ctx, V9fsFidOpenState *fs) { struct dirent *entry; @@ -465,8 +470,8 @@ again: if (ctx->export_flags & V9FS_SM_MAPPED) { entry->d_type = DT_UNKNOWN; } else if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - if (!strcmp(entry->d_name, VIRTFS_META_DIR)) { - /* skp the meta data directory */ + if (local_is_mapped_file_metadata(ctx, entry->d_name)) { + /* skip the meta data directory */ goto again; } entry->d_type = DT_UNKNOWN; @@ -559,6 +564,12 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, int err = -1; int dirfd; + if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE && + local_is_mapped_file_metadata(fs_ctx, name)) { + errno = EINVAL; + return -1; + } + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); if (dirfd == -1) { return -1; @@ -605,6 +616,12 @@ static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path, int err = -1; int dirfd; + if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE && + local_is_mapped_file_metadata(fs_ctx, name)) { + errno = EINVAL; + return -1; + } + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); if (dirfd == -1) { return -1; @@ -694,6 +711,12 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, int err = -1; int dirfd; + if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE && + local_is_mapped_file_metadata(fs_ctx, name)) { + errno = EINVAL; + return -1; + } + /* * Mark all the open to not follow symlinks */ @@ -752,6 +775,12 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath, int err = -1; int dirfd; + if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE && + local_is_mapped_file_metadata(fs_ctx, name)) { + errno = EINVAL; + return -1; + } + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); if (dirfd == -1) { return -1; @@ -826,6 +855,12 @@ static int local_link(FsContext *ctx, V9fsPath *oldpath, int ret = -1; int odirfd, ndirfd; + if (ctx->export_flags & V9FS_SM_MAPPED_FILE && + local_is_mapped_file_metadata(ctx, name)) { + errno = EINVAL; + return -1; + } + odirfd = local_opendir_nofollow(ctx, odirpath); if (odirfd == -1) { goto out; @@ -1096,6 +1131,12 @@ static int local_lremovexattr(FsContext *ctx, V9fsPath *fs_path, static int local_name_to_path(FsContext *ctx, V9fsPath *dir_path, const char *name, V9fsPath *target) { + if (ctx->export_flags & V9FS_SM_MAPPED_FILE && + local_is_mapped_file_metadata(ctx, name)) { + errno = EINVAL; + return -1; + } + if (dir_path) { v9fs_path_sprintf(target, "%s/%s", dir_path->data, name); } else if (strcmp(name, "/")) { @@ -1116,6 +1157,13 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir, int ret; int odirfd, ndirfd; + if (ctx->export_flags & V9FS_SM_MAPPED_FILE && + (local_is_mapped_file_metadata(ctx, old_name) || + local_is_mapped_file_metadata(ctx, new_name))) { + errno = EINVAL; + return -1; + } + odirfd = local_opendir_nofollow(ctx, olddir->data); if (odirfd == -1) { return -1; @@ -1206,6 +1254,12 @@ static int local_unlinkat(FsContext *ctx, V9fsPath *dir, int ret; int dirfd; + if (ctx->export_flags & V9FS_SM_MAPPED_FILE && + local_is_mapped_file_metadata(ctx, name)) { + errno = EINVAL; + return -1; + } + dirfd = local_opendir_nofollow(ctx, dir->data); if (dirfd == -1) { return -1; diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 9c7f41af99..5df97c90fa 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -332,12 +332,14 @@ static int xen_9pfs_connect(struct XenDevice *xendev) str = g_strdup_printf("ring-ref%u", i); if (xenstore_read_fe_int(&xen_9pdev->xendev, str, &xen_9pdev->rings[i].ref) == -1) { + g_free(str); goto out; } g_free(str); str = g_strdup_printf("event-channel-%u", i); if (xenstore_read_fe_int(&xen_9pdev->xendev, str, &xen_9pdev->rings[i].evtchn) == -1) { + g_free(str); goto out; } g_free(str); @@ -378,7 +380,7 @@ static int xen_9pfs_connect(struct XenDevice *xendev) if (xen_9pdev->rings[i].evtchndev == NULL) { goto out; } - fcntl(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), F_SETFD, FD_CLOEXEC); + qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev)); xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain (xen_9pdev->rings[i].evtchndev, xendev->dom, diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 4ddfb68b24..36a6cc450e 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -24,6 +24,7 @@ #include "hw/acpi/aml-build.h" #include "qemu/bswap.h" #include "qemu/bitops.h" +#include "sysemu/numa.h" static GArray *build_alloc_array(void) { @@ -1636,3 +1637,28 @@ void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, numamem->base_addr = cpu_to_le64(base); numamem->range_length = cpu_to_le64(len); } + +/* + * ACPI spec 5.2.17 System Locality Distance Information Table + * (Revision 2.0 or later) + */ +void build_slit(GArray *table_data, BIOSLinker *linker) +{ + int slit_start, i, j; + slit_start = table_data->len; + + acpi_data_push(table_data, sizeof(AcpiTableHeader)); + + build_append_int_noprefix(table_data, nb_numa_nodes, 8); + for (i = 0; i < nb_numa_nodes; i++) { + for (j = 0; j < nb_numa_nodes; j++) { + assert(numa_info[i].distance[j]); + build_append_int_noprefix(table_data, numa_info[i].distance[j], 1); + } + } + + build_header(linker, table_data, + (void *)(table_data->data + slit_start), + "SLIT", + table_data->len - slit_start, 1, NULL, NULL); +} diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 8c719d3f9d..a233fe17cf 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -503,7 +503,6 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, /* build Processor object for each processor */ for (i = 0; i < arch_ids->len; i++) { - int j; Aml *dev; Aml *uid = aml_int(i); GArray *madt_buf = g_array_new(0, 1, 1); @@ -557,9 +556,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, * as a result _PXM is required for all CPUs which might * be hot-plugged. For simplicity, add it for all CPUs. */ - j = numa_get_node_for_cpu(i); - if (j < nb_numa_nodes) { - aml_append(dev, aml_name_decl("_PXM", aml_int(j))); + if (arch_ids->cpus[i].props.has_node_id) { + aml_append(dev, aml_name_decl("_PXM", + aml_int(arch_ids->cpus[i].props.node_id))); } aml_append(cpus_dev, dev); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index c409374ab8..f276967365 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -705,7 +705,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) * Reason: part of PIIX4 southbridge, needs to be wired up, * e.g. by mips_malta_init() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; dc->hotpluggable = false; hc->plug = piix4_device_plug_cb; hc->unplug_request = piix4_device_unplug_request_cb; diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c index fe2d5a764c..324626847c 100644 --- a/hw/arm/spitz.c +++ b/hw/arm/spitz.c @@ -1076,7 +1076,7 @@ static void sl_nand_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_sl_nand_info; dc->props = sl_nand_properties; /* Reason: init() method uses drive_get() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo sl_nand_info = { diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 6e5f3399f2..e5852067f5 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -486,30 +486,25 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) AcpiSystemResourceAffinityTable *srat; AcpiSratProcessorGiccAffinity *core; AcpiSratMemoryAffinity *numamem; - int i, j, srat_start; + int i, srat_start; uint64_t mem_base; - uint32_t *cpu_node = g_malloc0(vms->smp_cpus * sizeof(uint32_t)); - - for (i = 0; i < vms->smp_cpus; i++) { - j = numa_get_node_for_cpu(i); - if (j < nb_numa_nodes) { - cpu_node[i] = j; - } - } + MachineClass *mc = MACHINE_GET_CLASS(vms); + const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(MACHINE(vms)); srat_start = table_data->len; srat = acpi_data_push(table_data, sizeof(*srat)); srat->reserved1 = cpu_to_le32(1); - for (i = 0; i < vms->smp_cpus; ++i) { + for (i = 0; i < cpu_list->len; ++i) { + int node_id = cpu_list->cpus[i].props.has_node_id ? + cpu_list->cpus[i].props.node_id : 0; core = acpi_data_push(table_data, sizeof(*core)); core->type = ACPI_SRAT_PROCESSOR_GICC; core->length = sizeof(*core); - core->proximity = cpu_to_le32(cpu_node[i]); + core->proximity = cpu_to_le32(node_id); core->acpi_processor_uid = cpu_to_le32(i); core->flags = cpu_to_le32(1); } - g_free(cpu_node); mem_base = vms->memmap[VIRT_MEM].base; for (i = 0; i < nb_numa_nodes; ++i) { diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 5f62a0321e..c7c8159dfd 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -338,7 +338,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) { int cpu; int addr_cells = 1; - unsigned int i; + const MachineState *ms = MACHINE(vms); /* * From Documentation/devicetree/bindings/arm/cpus.txt @@ -369,6 +369,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) { char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu); ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); + CPUState *cs = CPU(armcpu); qemu_fdt_add_subnode(vms->fdt, nodename); qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "cpu"); @@ -389,9 +390,9 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) armcpu->mp_affinity); } - i = numa_get_node_for_cpu(cpu); - if (i < nb_numa_nodes) { - qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", i); + if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) { + qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", + ms->possible_cpus->cpus[cs->cpu_index].props.node_id); } g_free(nodename); @@ -1194,10 +1195,35 @@ void virt_machine_done(Notifier *notifier, void *data) virt_build_smbios(vms); } +static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) +{ + uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER; + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); + + if (!vmc->disallow_affinity_adjustment) { + /* Adjust MPIDR like 64-bit KVM hosts, which incorporate the + * GIC's target-list limitations. 32-bit KVM hosts currently + * always create clusters of 4 CPUs, but that is expected to + * change when they gain support for gicv3. When KVM is enabled + * it will override the changes we make here, therefore our + * purposes are to make TCG consistent (with 64-bit KVM hosts) + * and to improve SGI efficiency. + */ + if (vms->gic_version == 3) { + clustersz = GICV3_TARGETLIST_BITS; + } else { + clustersz = GIC_TARGETLIST_BITS; + } + } + return arm_cpu_mp_affinity(idx, clustersz); +} + static void machvirt_init(MachineState *machine) { VirtMachineState *vms = VIRT_MACHINE(machine); VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus; qemu_irq pic[NUM_IRQS]; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *secure_sysmem = NULL; @@ -1210,7 +1236,6 @@ static void machvirt_init(MachineState *machine) CPUClass *cc; Error *err = NULL; bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0); - uint8_t clustersz; if (!cpu_model) { cpu_model = "cortex-a15"; @@ -1263,10 +1288,8 @@ static void machvirt_init(MachineState *machine) */ if (vms->gic_version == 3) { virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000; - clustersz = GICV3_TARGETLIST_BITS; } else { virt_max_cpus = GIC_NCPU; - clustersz = GIC_TARGETLIST_BITS; } if (max_cpus > virt_max_cpus) { @@ -1324,21 +1347,35 @@ static void machvirt_init(MachineState *machine) exit(1); } - for (n = 0; n < smp_cpus; n++) { - Object *cpuobj = object_new(typename); - if (!vmc->disallow_affinity_adjustment) { - /* Adjust MPIDR like 64-bit KVM hosts, which incorporate the - * GIC's target-list limitations. 32-bit KVM hosts currently - * always create clusters of 4 CPUs, but that is expected to - * change when they gain support for gicv3. When KVM is enabled - * it will override the changes we make here, therefore our - * purposes are to make TCG consistent (with 64-bit KVM hosts) - * and to improve SGI efficiency. - */ - uint8_t aff1 = n / clustersz; - uint8_t aff0 = n % clustersz; - object_property_set_int(cpuobj, (aff1 << ARM_AFF1_SHIFT) | aff0, - "mp-affinity", NULL); + possible_cpus = mc->possible_cpu_arch_ids(machine); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; + CPUState *cs; + int node_id; + + if (n >= smp_cpus) { + break; + } + + cpuobj = object_new(typename); + object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id, + "mp-affinity", NULL); + + cs = CPU(cpuobj); + cs->cpu_index = n; + + node_id = possible_cpus->cpus[cs->cpu_index].props.node_id; + if (!possible_cpus->cpus[cs->cpu_index].props.has_node_id) { + /* by default CPUState::numa_node was 0 if it's not set via CLI + * keep it this way for now but in future we probably should + * refuse to start up with incomplete numa mapping */ + node_id = 0; + } + if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) { + cs->numa_node = node_id; + } else { + /* CPU isn't device_add compatible yet, this shouldn't happen */ + error_setg(&error_abort, "user set node-id not implemented"); } if (!vms->secure) { @@ -1518,6 +1555,46 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) } } +static CpuInstanceProperties +virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) +{ + MachineClass *mc = MACHINE_GET_CLASS(ms); + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); + + assert(cpu_index < possible_cpus->len); + return possible_cpus->cpus[cpu_index].props; +} + +static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) +{ + int n; + VirtMachineState *vms = VIRT_MACHINE(ms); + + if (ms->possible_cpus) { + assert(ms->possible_cpus->len == max_cpus); + return ms->possible_cpus; + } + + ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + + sizeof(CPUArchId) * max_cpus); + ms->possible_cpus->len = max_cpus; + for (n = 0; n < ms->possible_cpus->len; n++) { + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); + ms->possible_cpus->cpus[n].props.has_thread_id = true; + ms->possible_cpus->cpus[n].props.thread_id = n; + + /* default distribution of CPUs over NUMA nodes */ + if (nb_numa_nodes) { + /* preset values but do not enable them i.e. 'has_node_id = false', + * numa init code will enable them later if manual mapping wasn't + * present on CLI */ + ms->possible_cpus->cpus[n].props.node_id = n % nb_numa_nodes; + } + } + return ms->possible_cpus; +} + static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -1534,6 +1611,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->pci_allow_0_address = true; /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ mc->minimum_page_bits = 12; + mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; + mc->cpu_index_to_instance_props = virt_cpu_index_to_props; } static const TypeInfo virt_machine_info = { diff --git a/hw/audio/marvell_88w8618.c b/hw/audio/marvell_88w8618.c index 511b004287..4f65f8c199 100644 --- a/hw/audio/marvell_88w8618.c +++ b/hw/audio/marvell_88w8618.c @@ -292,7 +292,7 @@ static void mv88w8618_audio_class_init(ObjectClass *klass, void *data) dc->vmsd = &mv88w8618_audio_vmsd; dc->props = mv88w8618_audio_properties; /* Reason: pointer property "wm8750" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo mv88w8618_audio_info = { diff --git a/hw/audio/pcspk.c b/hw/audio/pcspk.c index 798002277b..9b99358d87 100644 --- a/hw/audio/pcspk.c +++ b/hw/audio/pcspk.c @@ -223,7 +223,7 @@ static void pcspk_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_spk; dc->props = pcspk_properties; /* Reason: realize sets global pcspk_state */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo pcspk_info = { diff --git a/hw/block/nvme.c b/hw/block/nvme.c index ae303d44e5..7428db9f0c 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -227,6 +227,29 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, return NVME_NO_COMPLETE; } +static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, + NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)cmd; + const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); + const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); + uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS); + + if (slba + nlb > ns->id_ns.nsze) { + return NVME_LBA_RANGE | NVME_DNR; + } + + req->has_sg = false; + block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, + BLOCK_ACCT_WRITE); + req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, aio_slba, aio_nlb, + BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req); + return NVME_NO_COMPLETE; +} + static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, NvmeRequest *req) { @@ -279,6 +302,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (cmd->opcode) { case NVME_CMD_FLUSH: return nvme_flush(n, ns, cmd, req); + case NVME_CMD_WRITE_ZEROS: + return nvme_write_zeros(n, ns, cmd, req); case NVME_CMD_WRITE: case NVME_CMD_READ: return nvme_rw(n, ns, cmd, req); @@ -895,6 +920,7 @@ static int nvme_init(PCIDevice *pci_dev) id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); + id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS); id->psd[0].mp = cpu_to_le16(0x9c4); id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 8fb0c10756..a0d15649f9 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -179,6 +179,7 @@ enum NvmeIoCommands { NVME_CMD_READ = 0x02, NVME_CMD_WRITE_UNCOR = 0x04, NVME_CMD_COMPARE = 0x05, + NVME_CMD_WRITE_ZEROS = 0x08, NVME_CMD_DSM = 0x09, }; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 98c16a7a9a..604d37dfc8 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -42,9 +42,7 @@ static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, static void virtio_blk_free_request(VirtIOBlockReq *req) { - if (req) { - g_free(req); - } + g_free(req); } static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c index 58f1f02902..46012673c3 100644 --- a/hw/core/generic-loader.c +++ b/hw/core/generic-loader.c @@ -137,20 +137,21 @@ static void generic_loader_realize(DeviceState *dev, Error **errp) #endif if (s->file) { + AddressSpace *as = s->cpu ? s->cpu->as : NULL; + if (!s->force_raw) { size = load_elf_as(s->file, NULL, NULL, &entry, NULL, NULL, - big_endian, 0, 0, 0, s->cpu->as); + big_endian, 0, 0, 0, as); if (size < 0) { size = load_uimage_as(s->file, &entry, NULL, NULL, NULL, NULL, - s->cpu->as); + as); } } if (size < 0 || s->force_raw) { /* Default to the maximum size being the machine's ram size */ - size = load_image_targphys_as(s->file, s->addr, ram_size, - s->cpu->as); + size = load_image_targphys_as(s->file, s->addr, ram_size, as); } else { s->addr = entry; } diff --git a/hw/core/machine.c b/hw/core/machine.c index ada9eea483..fd6a436064 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -17,8 +17,10 @@ #include "qapi/visitor.h" #include "hw/sysbus.h" #include "sysemu/sysemu.h" +#include "sysemu/numa.h" #include "qemu/error-report.h" #include "qemu/cutils.h" +#include "sysemu/numa.h" static char *machine_get_accel(Object *obj, Error **errp) { @@ -388,6 +390,102 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine) return head; } +/** + * machine_set_cpu_numa_node: + * @machine: machine object to modify + * @props: specifies which cpu objects to assign to + * numa node specified by @props.node_id + * @errp: if an error occurs, a pointer to an area to store the error + * + * Associate NUMA node specified by @props.node_id with cpu slots that + * match socket/core/thread-ids specified by @props. It's recommended to use + * query-hotpluggable-cpus.props values to specify affected cpu slots, + * which would lead to exact 1:1 mapping of cpu slots to NUMA node. + * + * However for CLI convenience it's possible to pass in subset of properties, + * which would affect all cpu slots that match it. + * Ex for pc machine: + * -smp 4,cores=2,sockets=2 -numa node,nodeid=0 -numa node,nodeid=1 \ + * -numa cpu,node-id=0,socket_id=0 \ + * -numa cpu,node-id=1,socket_id=1 + * will assign all child cores of socket 0 to node 0 and + * of socket 1 to node 1. + * + * On attempt of reassigning (already assigned) cpu slot to another NUMA node, + * return error. + * Empty subset is disallowed and function will return with error in this case. + */ +void machine_set_cpu_numa_node(MachineState *machine, + const CpuInstanceProperties *props, Error **errp) +{ + MachineClass *mc = MACHINE_GET_CLASS(machine); + bool match = false; + int i; + + if (!mc->possible_cpu_arch_ids) { + error_setg(errp, "mapping of CPUs to NUMA node is not supported"); + return; + } + + /* disabling node mapping is not supported, forbid it */ + assert(props->has_node_id); + + /* force board to initialize possible_cpus if it hasn't been done yet */ + mc->possible_cpu_arch_ids(machine); + + for (i = 0; i < machine->possible_cpus->len; i++) { + CPUArchId *slot = &machine->possible_cpus->cpus[i]; + + /* reject unsupported by board properties */ + if (props->has_thread_id && !slot->props.has_thread_id) { + error_setg(errp, "thread-id is not supported"); + return; + } + + if (props->has_core_id && !slot->props.has_core_id) { + error_setg(errp, "core-id is not supported"); + return; + } + + if (props->has_socket_id && !slot->props.has_socket_id) { + error_setg(errp, "socket-id is not supported"); + return; + } + + /* skip slots with explicit mismatch */ + if (props->has_thread_id && props->thread_id != slot->props.thread_id) { + continue; + } + + if (props->has_core_id && props->core_id != slot->props.core_id) { + continue; + } + + if (props->has_socket_id && props->socket_id != slot->props.socket_id) { + continue; + } + + /* reject assignment if slot is already assigned, for compatibility + * of legacy cpu_index mapping with SPAPR core based mapping do not + * error out if cpu thread and matched core have the same node-id */ + if (slot->props.has_node_id && + slot->props.node_id != props->node_id) { + error_setg(errp, "CPU is already assigned to node-id: %" PRId64, + slot->props.node_id); + return; + } + + /* assign slot to node as it's matched '-numa cpu' key */ + match = true; + slot->props.node_id = props->node_id; + slot->props.has_node_id = props->has_node_id; + } + + if (!match) { + error_setg(errp, "no match found"); + } +} + static void machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -400,6 +498,7 @@ static void machine_class_init(ObjectClass *oc, void *data) * On Linux, each node's border has to be 8MB aligned */ mc->numa_mem_align_shift = 23; + mc->numa_auto_assign_ram = numa_default_auto_assign_ram; object_class_property_add_str(oc, "accel", machine_get_accel, machine_set_accel, &error_abort); @@ -580,6 +679,69 @@ bool machine_mem_merge(MachineState *machine) return machine->mem_merge; } +static char *cpu_slot_to_string(const CPUArchId *cpu) +{ + GString *s = g_string_new(NULL); + if (cpu->props.has_socket_id) { + g_string_append_printf(s, "socket-id: %"PRId64, cpu->props.socket_id); + } + if (cpu->props.has_core_id) { + if (s->len) { + g_string_append_printf(s, ", "); + } + g_string_append_printf(s, "core-id: %"PRId64, cpu->props.core_id); + } + if (cpu->props.has_thread_id) { + if (s->len) { + g_string_append_printf(s, ", "); + } + g_string_append_printf(s, "thread-id: %"PRId64, cpu->props.thread_id); + } + return g_string_free(s, false); +} + +static void machine_numa_validate(MachineState *machine) +{ + int i; + GString *s = g_string_new(NULL); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(machine); + + assert(nb_numa_nodes); + for (i = 0; i < possible_cpus->len; i++) { + const CPUArchId *cpu_slot = &possible_cpus->cpus[i]; + + /* at this point numa mappings are initilized by CLI options + * or with default mappings so it's sufficient to list + * all not yet mapped CPUs here */ + /* TODO: make it hard error in future */ + if (!cpu_slot->props.has_node_id) { + char *cpu_str = cpu_slot_to_string(cpu_slot); + g_string_append_printf(s, "%sCPU %d [%s]", s->len ? ", " : "", i, + cpu_str); + g_free(cpu_str); + } + } + if (s->len) { + error_report("warning: CPU(s) not present in any NUMA nodes: %s", + s->str); + error_report("warning: All CPU(s) up to maxcpus should be described " + "in NUMA config, ability to start up with partial NUMA " + "mappings is obsoleted and will be removed in future"); + } + g_string_free(s, true); +} + +void machine_run_board_init(MachineState *machine) +{ + MachineClass *machine_class = MACHINE_GET_CLASS(machine); + + if (nb_numa_nodes) { + machine_numa_validate(machine); + } + machine_class->init(machine); +} + static void machine_class_finalize(ObjectClass *klass, void *data) { MachineClass *mc = MACHINE_CLASS(klass); diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c index 1485d5b285..f9d76c4641 100644 --- a/hw/core/or-irq.c +++ b/hw/core/or-irq.c @@ -91,7 +91,7 @@ static void or_irq_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_or_irq; /* Reason: Needs to be wired up to work, e.g. see stm32f205_soc.c */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo or_irq_type_info = { diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 02b632f6b3..1863db9d0a 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -1118,6 +1118,7 @@ static void device_class_init(ObjectClass *class, void *data) * should override it in their class_init() */ dc->hotpluggable = true; + dc->user_creatable = true; } void device_reset(DeviceState *dev) diff --git a/hw/core/register.c b/hw/core/register.c index dc335a79a9..da38ef3a54 100644 --- a/hw/core/register.c +++ b/hw/core/register.c @@ -288,7 +288,7 @@ static void register_class_init(ObjectClass *oc, void *data) DeviceClass *dc = DEVICE_CLASS(oc); /* Reason: needs to be wired up to work */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo register_info = { diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c index c0f560b289..5d0887f499 100644 --- a/hw/core/sysbus.c +++ b/hw/core/sysbus.c @@ -326,6 +326,17 @@ static void sysbus_device_class_init(ObjectClass *klass, void *data) DeviceClass *k = DEVICE_CLASS(klass); k->init = sysbus_device_init; k->bus_type = TYPE_SYSTEM_BUS; + /* + * device_add plugs devices into a suitable bus. For "real" buses, + * that actually connects the device. For sysbus, the connections + * need to be made separately, and device_add can't do that. The + * device would be left unconnected, and will probably not work + * + * However, a few machines can handle device_add/-device with + * a few specific sysbus devices. In those cases, the device + * subclass needs to override it and set user_creatable=true. + */ + k->user_creatable = false; } static const TypeInfo sysbus_device_type_info = { diff --git a/hw/display/cg3.c b/hw/display/cg3.c index 7ef8a96496..1de15a1d34 100644 --- a/hw/display/cg3.c +++ b/hw/display/cg3.c @@ -94,7 +94,8 @@ static void cg3_update_display(void *opaque) uint32_t dval; int x, y, y_start; unsigned int width, height; - ram_addr_t page, page_min, page_max; + ram_addr_t page; + DirtyBitmapSnapshot *snap = NULL; if (surface_bits_per_pixel(surface) != 32) { return; @@ -103,29 +104,32 @@ static void cg3_update_display(void *opaque) height = s->height; y_start = -1; - page_min = -1; - page_max = 0; - page = 0; pix = memory_region_get_ram_ptr(&s->vram_mem); data = (uint32_t *)surface_data(surface); - memory_region_sync_dirty_bitmap(&s->vram_mem); + if (!s->full_update) { + memory_region_sync_dirty_bitmap(&s->vram_mem); + snap = memory_region_snapshot_and_clear_dirty(&s->vram_mem, 0x0, + memory_region_size(&s->vram_mem), + DIRTY_MEMORY_VGA); + } + for (y = 0; y < height; y++) { - int update = s->full_update; + int update; page = (ram_addr_t)y * width; - update |= memory_region_get_dirty(&s->vram_mem, page, width, - DIRTY_MEMORY_VGA); + + if (s->full_update) { + update = 1; + } else { + update = memory_region_snapshot_get_dirty(&s->vram_mem, snap, page, + width); + } + if (update) { if (y_start < 0) { y_start = y; } - if (page < page_min) { - page_min = page; - } - if (page > page_max) { - page_max = page; - } for (x = 0; x < width; x++) { dval = *pix++; @@ -134,7 +138,7 @@ static void cg3_update_display(void *opaque) } } else { if (y_start >= 0) { - dpy_gfx_update(s->con, 0, y_start, s->width, y - y_start); + dpy_gfx_update(s->con, 0, y_start, width, y - y_start); y_start = -1; } pix += width; @@ -143,17 +147,14 @@ static void cg3_update_display(void *opaque) } s->full_update = 0; if (y_start >= 0) { - dpy_gfx_update(s->con, 0, y_start, s->width, y - y_start); - } - if (page_max >= page_min) { - memory_region_reset_dirty(&s->vram_mem, - page_min, page_max - page_min, DIRTY_MEMORY_VGA); + dpy_gfx_update(s->con, 0, y_start, width, y - y_start); } /* vsync interrupt? */ if (s->regs[0] & CG3_CR_ENABLE_INTS) { s->regs[1] |= CG3_SR_PENDING_INT; qemu_irq_raise(s->irq); } + g_free(snap); } static void cg3_invalidate_display(void *opaque) diff --git a/hw/display/jazz_led.c b/hw/display/jazz_led.c index b72fdb1717..3c97d56434 100644 --- a/hw/display/jazz_led.c +++ b/hw/display/jazz_led.c @@ -227,13 +227,13 @@ static void jazz_led_invalidate_display(void *opaque) static void jazz_led_text_update(void *opaque, console_ch_t *chardata) { LedState *s = opaque; - char buf[2]; + char buf[3]; dpy_text_cursor(s->con, -1, -1); qemu_console_resize(s->con, 2, 1); /* TODO: draw the segments */ - snprintf(buf, 2, "%02hhx\n", s->segments); + snprintf(buf, 3, "%02hhx", s->segments); console_write_ch(chardata++, ATTR2CHTYPE(buf[0], QEMU_COLOR_BLUE, QEMU_COLOR_BLACK, 1)); console_write_ch(chardata++, ATTR2CHTYPE(buf[1], QEMU_COLOR_BLUE, diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 2094adbc9c..9d254ef2e1 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -1414,6 +1414,7 @@ static void sm501_update_display(void *opaque) { SM501State *s = (SM501State *)opaque; DisplaySurface *surface = qemu_console_surface(s->con); + DirtyBitmapSnapshot *snap; int y, c_x = 0, c_y = 0; int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; int width = get_width(s, crt); @@ -1425,9 +1426,7 @@ static void sm501_update_display(void *opaque) draw_hwc_line_func *draw_hwc_line = NULL; int full_update = 0; int y_start = -1; - ram_addr_t page_min = ~0l; - ram_addr_t page_max = 0l; - ram_addr_t offset; + ram_addr_t offset = 0; uint32_t *palette; uint8_t hwc_palette[3 * 3]; uint8_t *hwc_src = NULL; @@ -1479,17 +1478,17 @@ static void sm501_update_display(void *opaque) /* draw each line according to conditions */ memory_region_sync_dirty_bitmap(&s->local_mem_region); + snap = memory_region_snapshot_and_clear_dirty(&s->local_mem_region, + offset, width * height * src_bpp, DIRTY_MEMORY_VGA); for (y = 0, offset = 0; y < height; y++, offset += width * src_bpp) { int update, update_hwc; - ram_addr_t page0 = offset; - ram_addr_t page1 = offset + width * src_bpp - 1; /* check if hardware cursor is enabled and we're within its range */ update_hwc = draw_hwc_line && c_y <= y && y < c_y + SM501_HWC_HEIGHT; update = full_update || update_hwc; /* check dirty flags for each line */ - update |= memory_region_get_dirty(&s->local_mem_region, page0, - page1 - page0, DIRTY_MEMORY_VGA); + update |= memory_region_snapshot_get_dirty(&s->local_mem_region, snap, + offset, width * src_bpp); /* draw line and change status */ if (update) { @@ -1507,12 +1506,6 @@ static void sm501_update_display(void *opaque) if (y_start < 0) { y_start = y; } - if (page0 < page_min) { - page_min = page0; - } - if (page1 > page_max) { - page_max = page1; - } } else { if (y_start >= 0) { /* flush to display */ @@ -1521,18 +1514,12 @@ static void sm501_update_display(void *opaque) } } } + g_free(snap); /* complete flush to display */ if (y_start >= 0) { dpy_gfx_update(s->con, 0, y_start, width, y - y_start); } - - /* clear dirty flags */ - if (page_min != ~0l) { - memory_region_reset_dirty(&s->local_mem_region, - page_min, page_max + TARGET_PAGE_SIZE, - DIRTY_MEMORY_VGA); - } } static const GraphicHwOps sm501_ops = { diff --git a/hw/display/tcx.c b/hw/display/tcx.c index 0e66dcd055..6593c1d6af 100644 --- a/hw/display/tcx.c +++ b/hw/display/tcx.c @@ -104,36 +104,23 @@ static void tcx_set_dirty(TCXState *s, ram_addr_t addr, int len) } } -static int tcx_check_dirty(TCXState *s, ram_addr_t addr, int len) +static int tcx_check_dirty(TCXState *s, DirtyBitmapSnapshot *snap, + ram_addr_t addr, int len) { int ret; - ret = memory_region_get_dirty(&s->vram_mem, addr, len, DIRTY_MEMORY_VGA); + ret = memory_region_snapshot_get_dirty(&s->vram_mem, snap, addr, len); if (s->depth == 24) { - ret |= memory_region_get_dirty(&s->vram_mem, - s->vram24_offset + addr * 4, len * 4, - DIRTY_MEMORY_VGA); - ret |= memory_region_get_dirty(&s->vram_mem, - s->cplane_offset + addr * 4, len * 4, - DIRTY_MEMORY_VGA); + ret |= memory_region_snapshot_get_dirty(&s->vram_mem, snap, + s->vram24_offset + addr * 4, len * 4); + ret |= memory_region_snapshot_get_dirty(&s->vram_mem, snap, + s->cplane_offset + addr * 4, len * 4); } return ret; } -static void tcx_reset_dirty(TCXState *s, ram_addr_t addr, int len) -{ - memory_region_reset_dirty(&s->vram_mem, addr, len, DIRTY_MEMORY_VGA); - - if (s->depth == 24) { - memory_region_reset_dirty(&s->vram_mem, s->vram24_offset + addr * 4, - len * 4, DIRTY_MEMORY_VGA); - memory_region_reset_dirty(&s->vram_mem, s->cplane_offset + addr * 4, - len * 4, DIRTY_MEMORY_VGA); - } -} - static void update_palette_entries(TCXState *s, int start, int end) { DisplaySurface *surface = qemu_console_surface(s->con); @@ -233,7 +220,8 @@ static void tcx_update_display(void *opaque) { TCXState *ts = opaque; DisplaySurface *surface = qemu_console_surface(ts->con); - ram_addr_t page, page_min, page_max; + ram_addr_t page; + DirtyBitmapSnapshot *snap = NULL; int y, y_start, dd, ds; uint8_t *d, *s; @@ -243,22 +231,20 @@ static void tcx_update_display(void *opaque) page = 0; y_start = -1; - page_min = -1; - page_max = 0; d = surface_data(surface); s = ts->vram; dd = surface_stride(surface); ds = 1024; memory_region_sync_dirty_bitmap(&ts->vram_mem); + snap = memory_region_snapshot_and_clear_dirty(&ts->vram_mem, 0x0, + memory_region_size(&ts->vram_mem), + DIRTY_MEMORY_VGA); + for (y = 0; y < ts->height; y++, page += ds) { - if (tcx_check_dirty(ts, page, ds)) { + if (tcx_check_dirty(ts, snap, page, ds)) { if (y_start < 0) y_start = y; - if (page < page_min) - page_min = page; - if (page > page_max) - page_max = page; tcx_draw_line32(ts, d, s, ts->width); if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) { @@ -280,17 +266,15 @@ static void tcx_update_display(void *opaque) dpy_gfx_update(ts->con, 0, y_start, ts->width, y - y_start); } - /* reset modified pages */ - if (page_max >= page_min) { - tcx_reset_dirty(ts, page_min, page_max - page_min); - } + g_free(snap); } static void tcx24_update_display(void *opaque) { TCXState *ts = opaque; DisplaySurface *surface = qemu_console_surface(ts->con); - ram_addr_t page, page_min, page_max; + ram_addr_t page; + DirtyBitmapSnapshot *snap = NULL; int y, y_start, dd, ds; uint8_t *d, *s; uint32_t *cptr, *s24; @@ -301,8 +285,6 @@ static void tcx24_update_display(void *opaque) page = 0; y_start = -1; - page_min = -1; - page_max = 0; d = surface_data(surface); s = ts->vram; s24 = ts->vram24; @@ -311,14 +293,15 @@ static void tcx24_update_display(void *opaque) ds = 1024; memory_region_sync_dirty_bitmap(&ts->vram_mem); + snap = memory_region_snapshot_and_clear_dirty(&ts->vram_mem, 0x0, + memory_region_size(&ts->vram_mem), + DIRTY_MEMORY_VGA); + for (y = 0; y < ts->height; y++, page += ds) { - if (tcx_check_dirty(ts, page, ds)) { + if (tcx_check_dirty(ts, snap, page, ds)) { if (y_start < 0) y_start = y; - if (page < page_min) - page_min = page; - if (page > page_max) - page_max = page; + tcx24_draw_line32(ts, d, s, ts->width, cptr, s24); if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) { tcx_draw_cursor32(ts, d, y, ts->width); @@ -341,10 +324,7 @@ static void tcx24_update_display(void *opaque) dpy_gfx_update(ts->con, 0, y_start, ts->width, y - y_start); } - /* reset modified pages */ - if (page_max >= page_min) { - tcx_reset_dirty(ts, page_min, page_max - page_min); - } + g_free(snap); } static void tcx_invalidate_display(void *opaque) diff --git a/hw/display/vga.c b/hw/display/vga.c index b2516c8d21..dcc95f88e2 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -1630,7 +1630,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) if (!full_update) { vga_sync_dirty_bitmap(s); snap = memory_region_snapshot_and_clear_dirty(&s->vram, addr1, - bwidth * height, + line_offset * height, DIRTY_MEMORY_VGA); } diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c index f49b7fe8cd..8c106a662d 100644 --- a/hw/display/virtio-gpu-3d.c +++ b/hw/display/virtio-gpu-3d.c @@ -600,6 +600,22 @@ void virtio_gpu_virgl_reset(VirtIOGPU *g) } } +void virtio_gpu_gl_block(void *opaque, bool block) +{ + VirtIOGPU *g = opaque; + + if (block) { + g->renderer_blocked++; + } else { + g->renderer_blocked--; + } + assert(g->renderer_blocked >= 0); + + if (g->renderer_blocked == 0) { + virtio_gpu_process_cmdq(g); + } +} + int virtio_gpu_virgl_init(VirtIOGPU *g) { int ret; diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index e1056f34df..cfb5dfa336 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -929,28 +929,14 @@ static int virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info) return 0; } -static void virtio_gpu_gl_block(void *opaque, bool block) -{ - VirtIOGPU *g = opaque; - - if (block) { - g->renderer_blocked++; - } else { - g->renderer_blocked--; - } - assert(g->renderer_blocked >= 0); - - if (g->renderer_blocked == 0) { - virtio_gpu_process_cmdq(g); - } -} - const GraphicHwOps virtio_gpu_ops = { .invalidate = virtio_gpu_invalidate_display, .gfx_update = virtio_gpu_update_display, .text_update = virtio_gpu_text_update, .ui_info = virtio_gpu_ui_info, +#ifdef CONFIG_VIRGL .gl_block = virtio_gpu_gl_block, +#endif }; static const VMStateDescription vmstate_virtio_gpu_scanout = { diff --git a/hw/dma/i8257.c b/hw/dma/i8257.c index 8bd82e8bc8..bd23e893bf 100644 --- a/hw/dma/i8257.c +++ b/hw/dma/i8257.c @@ -601,7 +601,7 @@ static void i8257_class_init(ObjectClass *klass, void *data) idc->schedule = i8257_dma_schedule; idc->register_channel = i8257_dma_register_channel; /* Reason: needs to be wired up by isa_bus_dma() to work */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo i8257_info = { diff --git a/hw/dma/sparc32_dma.c b/hw/dma/sparc32_dma.c index 9d545e412e..9c6bdc6295 100644 --- a/hw/dma/sparc32_dma.c +++ b/hw/dma/sparc32_dma.c @@ -305,7 +305,7 @@ static void sparc32_dma_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_dma; dc->props = sparc32_dma_properties; /* Reason: pointer property "iommu_opaque" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo sparc32_dma_info = { diff --git a/hw/gpio/omap_gpio.c b/hw/gpio/omap_gpio.c index dabef4a119..1df394eb12 100644 --- a/hw/gpio/omap_gpio.c +++ b/hw/gpio/omap_gpio.c @@ -773,7 +773,7 @@ static void omap_gpio_class_init(ObjectClass *klass, void *data) dc->reset = omap_gpif_reset; dc->props = omap_gpio_properties; /* Reason: pointer property "clk" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo omap_gpio_info = { @@ -804,7 +804,7 @@ static void omap2_gpio_class_init(ObjectClass *klass, void *data) dc->reset = omap2_gpif_reset; dc->props = omap2_gpio_properties; /* Reason: pointer properties "iclk", "fclk0", ..., "fclk5" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo omap2_gpio_info = { diff --git a/hw/i2c/omap_i2c.c b/hw/i2c/omap_i2c.c index f7c92ea00c..f6e80bee25 100644 --- a/hw/i2c/omap_i2c.c +++ b/hw/i2c/omap_i2c.c @@ -491,7 +491,7 @@ static void omap_i2c_class_init(ObjectClass *klass, void *data) dc->props = omap_i2c_properties; dc->reset = omap_i2c_reset; /* Reason: pointer properties "iclk", "fclk" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; dc->realize = omap_i2c_realize; } diff --git a/hw/i2c/smbus_eeprom.c b/hw/i2c/smbus_eeprom.c index 5b7bd891bc..b13ec0fe7a 100644 --- a/hw/i2c/smbus_eeprom.c +++ b/hw/i2c/smbus_eeprom.c @@ -123,7 +123,7 @@ static void smbus_eeprom_class_initfn(ObjectClass *klass, void *data) sc->read_data = eeprom_read_data; dc->props = smbus_eeprom_properties; /* Reason: pointer property "data" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo smbus_eeprom_info = { diff --git a/hw/i2c/smbus_ich9.c b/hw/i2c/smbus_ich9.c index 48fab22625..ea51e09186 100644 --- a/hw/i2c/smbus_ich9.c +++ b/hw/i2c/smbus_ich9.c @@ -103,7 +103,7 @@ static void ich9_smb_class_init(ObjectClass *klass, void *data) * Reason: part of ICH9 southbridge, needs to be wired up by * pc_q35_init() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index c75f73ebb1..afcadacd2e 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2335,7 +2335,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) srat->reserved1 = cpu_to_le32(1); for (i = 0; i < apic_ids->len; i++) { - int j = numa_get_node_for_cpu(i); + int node_id = apic_ids->cpus[i].props.has_node_id ? + apic_ids->cpus[i].props.node_id : 0; uint32_t apic_id = apic_ids->cpus[i].arch_id; if (apic_id < 255) { @@ -2345,9 +2346,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) core->type = ACPI_SRAT_PROCESSOR_APIC; core->length = sizeof(*core); core->local_apic_id = apic_id; - if (j < nb_numa_nodes) { - core->proximity_lo = j; - } + core->proximity_lo = node_id; memset(core->proximity_hi, 0, 3); core->local_sapic_eid = 0; core->flags = cpu_to_le32(1); @@ -2358,9 +2357,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) core->type = ACPI_SRAT_PROCESSOR_x2APIC; core->length = sizeof(*core); core->x2apic_id = cpu_to_le32(apic_id); - if (j < nb_numa_nodes) { - core->proximity_domain = cpu_to_le32(j); - } + core->proximity_domain = cpu_to_le32(node_id); core->flags = cpu_to_le32(1); } } @@ -2707,6 +2704,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) if (pcms->numa_nodes) { acpi_add_table(table_offsets, tables_blob); build_srat(tables_blob, tables->linker, machine); + if (have_numa_distance) { + acpi_add_table(table_offsets, tables_blob); + build_slit(tables_blob, tables->linker); + } } if (acpi_get_mcfg(&mcfg)) { acpi_add_table(table_offsets, tables_blob); diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 516ebae952..329058dac8 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1199,6 +1199,8 @@ static void amdvi_class_init(ObjectClass *klass, void* data) dc->vmsd = &vmstate_amdvi; dc->hotpluggable = false; dc_class->realize = amdvi_realize; + /* Supported by the pc-q35-* machine types */ + dc->user_creatable = true; } static const TypeInfo amdvi = { diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index a12b1761f5..9ba2162cd9 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3019,6 +3019,8 @@ static void vtd_class_init(ObjectClass *klass, void *data) dc->hotpluggable = false; x86_class->realize = vtd_realize; x86_class->int_remap = vtd_int_remap; + /* Supported by the pc-q35-* machine types */ + dc->user_creatable = true; } static const TypeInfo vtd_info = { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8063241140..816bfa872c 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -597,7 +597,7 @@ static void port92_class_initfn(ObjectClass *klass, void *data) * wiring: its A20 output line needs to be wired up by * port92_init(). */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo port92_info = { @@ -747,7 +747,9 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) { FWCfgState *fw_cfg; uint64_t *numa_fw_cfg; - int i, j; + int i; + const CPUArchIdList *cpus; + MachineClass *mc = MACHINE_GET_CLASS(pcms); fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); @@ -782,12 +784,12 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) */ numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); - for (i = 0; i < max_cpus; i++) { - unsigned int apic_id = x86_cpu_apic_id_from_index(i); + cpus = mc->possible_cpu_arch_ids(MACHINE(pcms)); + for (i = 0; i < cpus->len; i++) { + unsigned int apic_id = cpus->cpus[i].arch_id; assert(apic_id < pcms->apic_id_limit); - j = numa_get_node_for_cpu(i); - if (j < nb_numa_nodes) { - numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); + if (cpus->cpus[i].props.has_node_id) { + numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id); } } for (i = 0; i < nb_numa_nodes; i++) { @@ -1891,6 +1893,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { int idx; + int node_id; CPUState *cs; CPUArchId *cpu_slot; X86CPUTopoInfo topo; @@ -1980,6 +1983,22 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, cs = CPU(cpu); cs->cpu_index = idx; + + node_id = cpu_slot->props.node_id; + if (!cpu_slot->props.has_node_id) { + /* by default CPUState::numa_node was 0 if it's not set via CLI + * keep it this way for now but in future we probably should + * refuse to start up with incomplete numa mapping */ + node_id = 0; + } + if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) { + cs->numa_node = node_id; + } else if (cs->numa_node != node_id) { + error_setg(errp, "node-id %d must match numa node specified" + "with -numa option for cpu-index %d", + cs->numa_node, cs->cpu_index); + return; + } } static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, @@ -2241,12 +2260,14 @@ static void pc_machine_reset(void) } } -static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index) +static CpuInstanceProperties +pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index) { - X86CPUTopoInfo topo; - x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index, - &topo); - return topo.pkg_id; + MachineClass *mc = MACHINE_GET_CLASS(ms); + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); + + assert(cpu_index < possible_cpus->len); + return possible_cpus->cpus[cpu_index].props; } static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) @@ -2278,6 +2299,15 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) ms->possible_cpus->cpus[i].props.core_id = topo.core_id; ms->possible_cpus->cpus[i].props.has_thread_id = true; ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; + + /* default distribution of CPUs over NUMA nodes */ + if (nb_numa_nodes) { + /* preset values but do not enable them i.e. 'has_node_id = false', + * numa init code will enable them later if manual mapping wasn't + * present on CLI */ + ms->possible_cpus->cpus[i].props.node_id = + topo.pkg_id % nb_numa_nodes; + } } return ms->possible_cpus; } @@ -2321,7 +2351,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->save_tsc_khz = true; pcmc->linuxboot_dma_enabled = true; mc->get_hotplug_handler = pc_get_hotpug_handler; - mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; + mc->cpu_index_to_instance_props = pc_cpu_index_to_props; mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 8f3d85ca58..2234bd0461 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -54,6 +54,7 @@ #endif #include "migration/migration.h" #include "kvm_i386.h" +#include "sysemu/numa.h" #define MAX_IDE_BUS 2 @@ -442,6 +443,7 @@ static void pc_i440fx_2_10_machine_options(MachineClass *m) pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; + m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; } DEFINE_I440FX_MACHINE(v2_10, "pc-i440fx-2.10", NULL, diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index cf9a788ec7..f243203844 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -47,6 +47,7 @@ #include "hw/usb.h" #include "qemu/error-report.h" #include "migration/migration.h" +#include "sysemu/numa.h" /* ICH9 AHCI has 6 ports */ #define MAX_SATA_PORTS 6 @@ -305,6 +306,7 @@ static void pc_q35_2_10_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; + m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; } DEFINE_Q35_MACHINE(v2_10, "pc-q35-2.10", NULL, diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c index 31debdfb2c..e60156c04f 100644 --- a/hw/i386/xen/xen-mapcache.c +++ b/hw/i386/xen/xen-mapcache.c @@ -62,6 +62,7 @@ typedef struct MapCacheRev { hwaddr paddr_index; hwaddr size; QTAILQ_ENTRY(MapCacheRev) next; + bool dma; } MapCacheRev; typedef struct MapCache { @@ -202,7 +203,7 @@ static void xen_remap_bucket(MapCacheEntry *entry, } static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size, - uint8_t lock) + uint8_t lock, bool dma) { MapCacheEntry *entry, *pentry = NULL; hwaddr address_index; @@ -289,6 +290,7 @@ tryagain: if (lock) { MapCacheRev *reventry = g_malloc0(sizeof(MapCacheRev)); entry->lock++; + reventry->dma = dma; reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset; reventry->paddr_index = mapcache->last_entry->paddr_index; reventry->size = entry->size; @@ -300,12 +302,12 @@ tryagain: } uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, - uint8_t lock) + uint8_t lock, bool dma) { uint8_t *p; mapcache_lock(); - p = xen_map_cache_unlocked(phys_addr, size, lock); + p = xen_map_cache_unlocked(phys_addr, size, lock, dma); mapcache_unlock(); return p; } @@ -426,8 +428,11 @@ void xen_invalidate_map_cache(void) mapcache_lock(); QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { - DPRINTF("There should be no locked mappings at this time, " - "but "TARGET_FMT_plx" -> %p is present\n", + if (!reventry->dma) { + continue; + } + fprintf(stderr, "Locked DMA mapping while invalidating mapcache!" + " "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); } diff --git a/hw/input/virtio-input-hid.c b/hw/input/virtio-input-hid.c index 3ee0c1814a..46c038110c 100644 --- a/hw/input/virtio-input-hid.c +++ b/hw/input/virtio-input-hid.c @@ -484,12 +484,14 @@ static struct virtio_input_config virtio_tablet_config[] = { .select = VIRTIO_INPUT_CFG_ABS_INFO, .subsel = ABS_X, .size = sizeof(virtio_input_absinfo), - .u.abs.max = const_le32(INPUT_EVENT_ABS_SIZE - 1), + .u.abs.min = const_le32(INPUT_EVENT_ABS_MIN), + .u.abs.max = const_le32(INPUT_EVENT_ABS_MAX), },{ .select = VIRTIO_INPUT_CFG_ABS_INFO, .subsel = ABS_Y, .size = sizeof(virtio_input_absinfo), - .u.abs.max = const_le32(INPUT_EVENT_ABS_SIZE - 1), + .u.abs.min = const_le32(INPUT_EVENT_ABS_MIN), + .u.abs.max = const_le32(INPUT_EVENT_ABS_MAX), }, { /* end of list */ }, }; diff --git a/hw/input/vmmouse.c b/hw/input/vmmouse.c index 6d15a887c6..4747da9a8d 100644 --- a/hw/input/vmmouse.c +++ b/hw/input/vmmouse.c @@ -286,7 +286,7 @@ static void vmmouse_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_vmmouse; dc->props = vmmouse_properties; /* Reason: pointer property "ps2_mouse" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo vmmouse_info = { diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index c3829e31b5..1ef56f8d10 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -501,7 +501,7 @@ static void apic_common_class_init(ObjectClass *klass, void *data) * Reason: APIC and CPU need to be wired up by * x86_cpu_apic_create() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo apic_common_type = { diff --git a/hw/intc/etraxfs_pic.c b/hw/intc/etraxfs_pic.c index 64a6f4b4ba..1bfde2f09e 100644 --- a/hw/intc/etraxfs_pic.c +++ b/hw/intc/etraxfs_pic.c @@ -173,7 +173,7 @@ static void etraxfs_pic_class_init(ObjectClass *klass, void *data) dc->props = etraxfs_pic_properties; /* * Note: pointer property "interrupt_vector" may remain null, thus - * no need for dc->cannot_instantiate_with_device_add_yet = true; + * no need for dc->user_creatable = false; */ } diff --git a/hw/intc/grlib_irqmp.c b/hw/intc/grlib_irqmp.c index ac7e63f38b..94659ee256 100644 --- a/hw/intc/grlib_irqmp.c +++ b/hw/intc/grlib_irqmp.c @@ -360,7 +360,7 @@ static void grlib_irqmp_class_init(ObjectClass *klass, void *data) dc->reset = grlib_irqmp_reset; dc->props = grlib_irqmp_properties; /* Reason: pointer properties "set_pil_in", "set_pil_in_opaque" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; dc->realize = grlib_irqmp_realize; } diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c index d9a5e8b217..c2fd563b5b 100644 --- a/hw/intc/i8259_common.c +++ b/hw/intc/i8259_common.c @@ -144,7 +144,7 @@ static void pic_common_class_init(ObjectClass *klass, void *data) * wiring of the slave to the master is hard-coded in device model * code. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo pic_common_type = { diff --git a/hw/intc/nios2_iic.c b/hw/intc/nios2_iic.c index 190b6fdbf3..016426f964 100644 --- a/hw/intc/nios2_iic.c +++ b/hw/intc/nios2_iic.c @@ -80,7 +80,7 @@ static void altera_iic_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); /* Reason: needs to be wired up, e.g. by nios2_10m50_ghrd_init() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; dc->realize = altera_iic_realize; } diff --git a/hw/intc/omap_intc.c b/hw/intc/omap_intc.c index 877be67971..ccdda89dab 100644 --- a/hw/intc/omap_intc.c +++ b/hw/intc/omap_intc.c @@ -401,7 +401,7 @@ static void omap_intc_class_init(ObjectClass *klass, void *data) dc->reset = omap_inth_reset; dc->props = omap_intc_properties; /* Reason: pointer property "clk" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; dc->realize = omap_intc_realize; } @@ -656,7 +656,7 @@ static void omap2_intc_class_init(ObjectClass *klass, void *data) dc->reset = omap_inth_reset; dc->props = omap2_intc_properties; /* Reason: pointer property "iclk", "fclk" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; dc->realize = omap2_intc_realize; } diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 42e0e0ef84..dd93531ae3 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -213,6 +213,7 @@ static void ics_get_kvm_state(ICSState *ics) irq->priority = irq->saved_priority; } + irq->status = 0; if (state & KVM_XICS_PENDING) { if (state & KVM_XICS_LEVEL_SENSITIVE) { irq->status |= XICS_STATUS_ASSERTED; @@ -228,6 +229,12 @@ static void ics_get_kvm_state(ICSState *ics) | XICS_STATUS_REJECTED; } } + if (state & KVM_XICS_PRESENTED) { + irq->status |= XICS_STATUS_PRESENTED; + } + if (state & KVM_XICS_QUEUED) { + irq->status |= XICS_STATUS_QUEUED; + } } } @@ -265,6 +272,12 @@ static int ics_set_kvm_state(ICSState *ics, int version_id) state |= KVM_XICS_PENDING; } } + if (irq->status & XICS_STATUS_PRESENTED) { + state |= KVM_XICS_PRESENTED; + } + if (irq->status & XICS_STATUS_QUEUED) { + state |= KVM_XICS_QUEUED; + } ret = ioctl(kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr); if (ret != 0) { diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index a0866c3856..e2215dcf4d 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -805,7 +805,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) * Reason: part of ICH9 southbridge, needs to be wired up by * pc_q35_init() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; hc->plug = ich9_pm_device_plug_cb; hc->unplug_request = ich9_pm_device_unplug_request_cb; hc->unplug = ich9_pm_device_unplug_cb; diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c index 5500fcc4d6..f811eba59d 100644 --- a/hw/isa/piix4.c +++ b/hw/isa/piix4.c @@ -123,7 +123,7 @@ static void piix4_class_init(ObjectClass *klass, void *data) * Reason: part of PIIX4 southbridge, needs to be wired up, * e.g. by mips_malta_init() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; dc->hotpluggable = false; } diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c index 41d5254f8e..50dc83df77 100644 --- a/hw/isa/vt82c686.c +++ b/hw/isa/vt82c686.c @@ -494,7 +494,7 @@ static void via_class_init(ObjectClass *klass, void *data) * Reason: part of VIA VT82C686 southbridge, needs to be wired up, * e.g. by mips_fulong2e_init() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo via_info = { diff --git a/hw/microblaze/boot.c b/hw/microblaze/boot.c index 1834d22a61..457a08a2fe 100644 --- a/hw/microblaze/boot.c +++ b/hw/microblaze/boot.c @@ -189,7 +189,7 @@ void microblaze_load_kernel(MicroBlazeCPU *cpu, hwaddr ddr_base, ram_size - initrd_offset); } if (initrd_size < 0) { - error_report("qemu: could not load initrd '%s'", + error_report("could not load initrd '%s'", initrd_filename); exit(EXIT_FAILURE); } diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c index 4811843ab6..e8b2eef688 100644 --- a/hw/mips/gt64xxx_pci.c +++ b/hw/mips/gt64xxx_pci.c @@ -1224,7 +1224,7 @@ static void gt64120_pci_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo gt64120_pci_info = { diff --git a/hw/misc/vmport.c b/hw/misc/vmport.c index be40930b8b..165500223f 100644 --- a/hw/misc/vmport.c +++ b/hw/misc/vmport.c @@ -163,7 +163,7 @@ static void vmport_class_initfn(ObjectClass *klass, void *data) dc->realize = vmport_realizefn; /* Reason: realize sets global port_state */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo vmport_info = { diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c index efa33ad40a..b53fcaa8bc 100644 --- a/hw/net/dp8393x.c +++ b/hw/net/dp8393x.c @@ -934,7 +934,7 @@ static void dp8393x_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_dp8393x; dc->props = dp8393x_properties; /* Reason: dma_mr property can't be set */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo dp8393x_info = { diff --git a/hw/net/etraxfs_eth.c b/hw/net/etraxfs_eth.c index efaa49faae..013c8d0a41 100644 --- a/hw/net/etraxfs_eth.c +++ b/hw/net/etraxfs_eth.c @@ -630,7 +630,7 @@ static void etraxfs_eth_class_init(ObjectClass *klass, void *data) k->init = fs_eth_init; dc->props = etraxfs_eth_properties; /* Reason: pointer properties "dma_out", "dma_in" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo etraxfs_eth_info = { diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c index aa2b0d5a85..9da1932970 100644 --- a/hw/net/fsl_etsec/etsec.c +++ b/hw/net/fsl_etsec/etsec.c @@ -416,6 +416,8 @@ static void etsec_class_init(ObjectClass *klass, void *data) dc->realize = etsec_realize; dc->reset = etsec_reset; dc->props = etsec_properties; + /* Supported by ppce500 machine */ + dc->user_creatable = true; } static TypeInfo etsec_info = { diff --git a/hw/net/lance.c b/hw/net/lance.c index 573d724bcf..92b0c68274 100644 --- a/hw/net/lance.c +++ b/hw/net/lance.c @@ -165,7 +165,7 @@ static void lance_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_lance; dc->props = lance_properties; /* Reason: pointer property "dma" */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo lance_info = { diff --git a/hw/nios2/boot.c b/hw/nios2/boot.c index e0a9aff2f4..2b31f5b844 100644 --- a/hw/nios2/boot.c +++ b/hw/nios2/boot.c @@ -197,7 +197,7 @@ void nios2_load_kernel(Nios2CPU *cpu, hwaddr ddr_base, ram_size - initrd_offset); } if (initrd_size < 0) { - error_report("qemu: could not load initrd '%s'", + error_report("could not load initrd '%s'", initrd_filename); exit(EXIT_FAILURE); } diff --git a/hw/pci-bridge/dec.c b/hw/pci-bridge/dec.c index 840c96198a..cca93620ac 100644 --- a/hw/pci-bridge/dec.c +++ b/hw/pci-bridge/dec.c @@ -128,7 +128,7 @@ static void dec_21154_pci_host_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo dec_21154_pci_host_info = { diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index 6ac187fa32..ff59abf208 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -150,7 +150,7 @@ static void pxb_host_class_init(ObjectClass *class, void *data) dc->fw_name = "pci"; /* Reason: Internal part of the pxb/pxb-pcie device, not usable by itself */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; sbc->explicit_ofw_unit_address = pxb_host_ofw_unit_address; hc->root_bus_path = pxb_host_root_bus_path; } diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index 653e711121..edc88f4c65 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -810,7 +810,7 @@ static void pbm_pci_host_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo pbm_pci_host_info = { diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c index 1999ece590..85a3bb0dd2 100644 --- a/hw/pci-host/bonito.c +++ b/hw/pci-host/bonito.c @@ -825,7 +825,7 @@ static void bonito_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo bonito_info = { diff --git a/hw/pci-host/gpex.c b/hw/pci-host/gpex.c index 66055ee5cc..e2629ce70d 100644 --- a/hw/pci-host/gpex.c +++ b/hw/pci-host/gpex.c @@ -136,7 +136,7 @@ static void gpex_root_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo gpex_root_info = { diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c index 2c8acdaaca..2e281f6155 100644 --- a/hw/pci-host/grackle.c +++ b/hw/pci-host/grackle.c @@ -134,7 +134,7 @@ static void grackle_pci_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo grackle_pci_info = { diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index bf4221d4bf..2d02de12d9 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -685,7 +685,7 @@ static void pci_piix3_class_init(ObjectClass *klass, void *data) * Reason: part of PIIX3 southbridge, needs to be wired up by * pc_piix.c's pc_init1() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo piix3_pci_type_info = { @@ -739,7 +739,7 @@ static void i440fx_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; dc->hotpluggable = false; } @@ -868,7 +868,7 @@ static void i440fx_pcihost_class_init(ObjectClass *klass, void *data) dc->fw_name = "pci"; dc->props = i440fx_props; /* Reason: needs to be wired up by pc_init1 */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo i440fx_pcihost_info = { diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c index e502bc0505..becc0eeb76 100644 --- a/hw/pci-host/ppce500.c +++ b/hw/pci-host/ppce500.c @@ -508,7 +508,7 @@ static void e500_host_bridge_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo e500_host_bridge_info = { diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c index 260a119a9e..900a6edfcf 100644 --- a/hw/pci-host/prep.c +++ b/hw/pci-host/prep.c @@ -364,7 +364,7 @@ static void raven_class_init(ObjectClass *klass, void *data) * Reason: PCI-facing part of the host bridge, not usable without * the host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo raven_info = { diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 344f77b10c..cd5c49616e 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -156,7 +156,7 @@ static void q35_host_class_init(ObjectClass *klass, void *data) dc->realize = q35_host_realize; dc->props = mch_props; /* Reason: needs to be wired up by pc_q35_init */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->fw_name = "pci"; } @@ -549,7 +549,7 @@ static void mch_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo mch_info = { diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c index df342ac3cb..6cf5e59f86 100644 --- a/hw/pci-host/uninorth.c +++ b/hw/pci-host/uninorth.c @@ -366,7 +366,7 @@ static void unin_main_pci_host_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo unin_main_pci_host_info = { @@ -390,7 +390,7 @@ static void u3_agp_pci_host_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo u3_agp_pci_host_info = { @@ -414,7 +414,7 @@ static void unin_agp_pci_host_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo unin_agp_pci_host_info = { @@ -438,7 +438,7 @@ static void unin_internal_pci_host_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo unin_internal_pci_host_info = { diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c index 27fde46126..aa1fdf75fd 100644 --- a/hw/pci-host/versatile.c +++ b/hw/pci-host/versatile.c @@ -479,7 +479,7 @@ static void versatile_pci_host_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo versatile_pci_host_info = { diff --git a/hw/pci-host/xilinx-pcie.c b/hw/pci-host/xilinx-pcie.c index 8b71e2d950..a968cea2af 100644 --- a/hw/pci-host/xilinx-pcie.c +++ b/hw/pci-host/xilinx-pcie.c @@ -309,7 +309,7 @@ static void xilinx_pcie_root_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo xilinx_pcie_root_info = { diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 68aaedc06d..bae1c0ac99 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -80,6 +80,8 @@ #define CLOCKFREQ (266UL * 1000UL * 1000UL) #define BUSFREQ (100UL * 1000UL * 1000UL) +#define NDRV_VGA_FILENAME "qemu_vga.ndrv" + /* UniN device */ static void unin_write(void *opaque, hwaddr addr, uint64_t value, unsigned size) @@ -160,7 +162,8 @@ static void ppc_core99_init(MachineState *machine) MACIOIDEState *macio_ide; BusState *adb_bus; MacIONVRAMState *nvr; - int bios_size; + int bios_size, ndrv_size; + uint8_t *ndrv_file; MemoryRegion *pic_mem, *escc_mem; MemoryRegion *escc_bar = g_new(MemoryRegion, 1); int ppc_boot_device; @@ -494,6 +497,19 @@ static void ppc_core99_init(MachineState *machine) fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ); fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_NVRAM_ADDR, nvram_addr); + /* MacOS NDRV VGA driver */ + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, NDRV_VGA_FILENAME); + if (filename) { + ndrv_size = get_image_size(filename); + if (ndrv_size != -1) { + ndrv_file = g_malloc(ndrv_size); + ndrv_size = load_image(filename, ndrv_file); + + fw_cfg_add_file(fw_cfg, "ndrv/qemu_vga.ndrv", ndrv_file, ndrv_size); + } + g_free(filename); + } + qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); } diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 5df94e239b..97bb8541d7 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -53,6 +53,8 @@ #define CLOCKFREQ 266000000UL #define BUSFREQ 66000000UL +#define NDRV_VGA_FILENAME "qemu_vga.ndrv" + static void fw_cfg_boot_set(void *opaque, const char *boot_device, Error **errp) { @@ -99,7 +101,8 @@ static void ppc_heathrow_init(MachineState *machine) MACIOIDEState *macio_ide; DeviceState *dev; BusState *adb_bus; - int bios_size; + int bios_size, ndrv_size; + uint8_t *ndrv_file; MemoryRegion *pic_mem; MemoryRegion *escc_mem, *escc_bar = g_new(MemoryRegion, 1); uint16_t ppc_boot_device; @@ -355,6 +358,19 @@ static void ppc_heathrow_init(MachineState *machine) fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ); fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ); + /* MacOS NDRV VGA driver */ + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, NDRV_VGA_FILENAME); + if (filename) { + ndrv_size = get_image_size(filename); + if (ndrv_size != -1) { + ndrv_file = g_malloc(ndrv_size); + ndrv_size = load_image(filename, ndrv_file); + + fw_cfg_add_file(fw_cfg, "ndrv/qemu_vga.ndrv", ndrv_file, ndrv_size); + } + g_free(filename); + } + qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); } diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index d4bcdb027f..231ed9735b 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -511,7 +511,7 @@ static void ppc_powernv_reset(void) * This is the internal simulator but it could also be an external * BMC. */ - obj = object_resolve_path_type("", TYPE_IPMI_BMC, NULL); + obj = object_resolve_path_type("", "ipmi-bmc-sim", NULL); if (obj) { pnv->bmc = IPMI_BMC(obj); } @@ -610,7 +610,7 @@ static void ppc_powernv_init(MachineState *machine) /* Create the processor chips */ chip_typename = g_strdup_printf(TYPE_PNV_CHIP "-%s", machine->cpu_model); if (!object_class_by_name(chip_typename)) { - error_report("qemu: invalid CPU model '%s' for %s machine", + error_report("invalid CPU model '%s' for %s machine", machine->cpu_model, MACHINE_GET_CLASS(machine)->name); exit(1); } diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c index dc19682970..6953f8b9ac 100644 --- a/hw/ppc/ppc4xx_pci.c +++ b/hw/ppc/ppc4xx_pci.c @@ -351,7 +351,7 @@ static void ppc4xx_host_bridge_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo ppc4xx_host_bridge_info = { diff --git a/hw/ppc/ppc_booke.c b/hw/ppc/ppc_booke.c index 60baffaf1d..23bcf1b138 100644 --- a/hw/ppc/ppc_booke.c +++ b/hw/ppc/ppc_booke.c @@ -282,7 +282,6 @@ void store_booke_tcr(CPUPPCState *env, target_ulong val) ppc_tb_t *tb_env = env->tb_env; booke_timer_t *booke_timer = tb_env->opaque; - tb_env = env->tb_env; env->spr[SPR_BOOKE_TCR] = val; kvmppc_set_tcr(cpu); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 80d12d005c..0980d733cd 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -219,7 +219,7 @@ static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset, /* 16: Vector */ 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */ /* 18: Vec. Scalar, 20: Vec. XOR, 22: HTM */ - 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 18 - 23 */ + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */ /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */ /* 30: MMR, 32: LE atomic, 34: EBB + ext EBB */ @@ -855,6 +855,8 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) * option vector 5: */ static void spapr_dt_ov5_platform_support(void *fdt, int chosen) { + PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu); + char val[2 * 3] = { 24, 0x00, /* Hash/Radix, filled in below. */ 25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */ @@ -870,8 +872,13 @@ static void spapr_dt_ov5_platform_support(void *fdt, int chosen) val[1] = 0x00; /* Hash */ } } else { - /* TODO: TCG case, hash */ - val[1] = 0x00; + if (first_ppc_cpu->env.mmu_model & POWERPC_MMU_V3) { + /* V3 MMU supports both hash and radix (with dynamic switching) */ + val[1] = 0xC0; + } else { + /* Otherwise we can only do hash */ + val[1] = 0x00; + } } _FDT(fdt_setprop(fdt, chosen, "ibm,arch-vec-5-platform-support", val, sizeof(val))); @@ -2101,8 +2108,8 @@ static void ppc_spapr_init(MachineState *machine) } spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY); - if (kvmppc_has_cap_mmu_radix()) { - /* KVM always allows GTSE with radix... */ + if (!kvm_enabled() || kvmppc_has_cap_mmu_radix()) { + /* KVM and TCG always allow GTSE with radix... */ spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_GTSE); } /* ... but not with hash (currently). */ @@ -2824,9 +2831,11 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); Error *local_err = NULL; CPUCore *cc = CPU_CORE(dev); + sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev); char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model); const char *type = object_get_typename(OBJECT(dev)); CPUArchId *core_slot; + int node_id; int index; if (dev->hotplugged && !mc->has_hotpluggable_cpus) { @@ -2861,6 +2870,21 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } + node_id = core_slot->props.node_id; + if (!core_slot->props.has_node_id) { + /* by default CPUState::numa_node was 0 if it's not set via CLI + * keep it this way for now but in future we probably should + * refuse to start up with incomplete numa mapping */ + node_id = 0; + } + if (sc->node_id == CPU_UNSET_NUMA_NODE_ID) { + sc->node_id = node_id; + } else if (sc->node_id != node_id) { + error_setg(&local_err, "node-id %d must match numa node specified" + "with -numa option for cpu-index %d", sc->node_id, cc->core_id); + goto out; + } + out: g_free(base_core_type); error_propagate(errp, local_err); @@ -2981,11 +3005,18 @@ static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine, return NULL; } -static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index) +static CpuInstanceProperties +spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index) { - /* Allocate to NUMA nodes on a "socket" basis (not that concept of - * socket means much for the paravirtualized PAPR platform) */ - return cpu_index / smp_threads / smp_cores; + CPUArchId *core_slot; + MachineClass *mc = MACHINE_GET_CLASS(machine); + + /* make sure possible_cpu are intialized */ + mc->possible_cpu_arch_ids(machine); + /* get CPU core slot containing thread that matches cpu_index */ + core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL); + assert(core_slot); + return core_slot->props; } static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine) @@ -3012,8 +3043,15 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine) machine->possible_cpus->cpus[i].arch_id = core_id; machine->possible_cpus->cpus[i].props.has_core_id = true; machine->possible_cpus->cpus[i].props.core_id = core_id; - /* TODO: add 'has_node/node' here to describe - to which node core belongs */ + + /* default distribution of CPUs over NUMA nodes */ + if (nb_numa_nodes) { + /* preset values but do not enable them i.e. 'has_node_id = false', + * numa init code will enable them later if manual mapping wasn't + * present on CLI */ + machine->possible_cpus->cpus[i].props.node_id = + core_id / smp_threads / smp_cores % nb_numa_nodes; + } } return machine->possible_cpus; } @@ -3138,7 +3176,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) hc->pre_plug = spapr_machine_device_pre_plug; hc->plug = spapr_machine_device_plug; hc->unplug = spapr_machine_device_unplug; - mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; + mc->cpu_index_to_instance_props = spapr_cpu_index_to_props; mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids; hc->unplug_request = spapr_machine_device_unplug_request; @@ -3242,6 +3280,7 @@ static void spapr_machine_2_9_class_options(MachineClass *mc) { spapr_machine_2_10_class_options(mc); SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9); + mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; } DEFINE_SPAPR_MACHINE(2_9, "2.9", false); diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 4389ef4c2a..a17ea07ef1 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -176,13 +176,11 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) const char *typename = object_class_get_name(scc->cpu_class); size_t size = object_type_get_instance_size(typename); Error *local_err = NULL; - int core_node_id = numa_get_node_for_cpu(cc->core_id);; void *obj; int i, j; sc->threads = g_malloc0(size * cc->nr_threads); for (i = 0; i < cc->nr_threads; i++) { - int node_id; char id[32]; CPUState *cs; @@ -192,17 +190,8 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) cs = CPU(obj); cs->cpu_index = cc->core_id + i; - /* Set NUMA node for the added CPUs */ - node_id = numa_get_node_for_cpu(cs->cpu_index); - if (node_id != core_node_id) { - error_setg(&local_err, "Invalid node-id=%d of thread[cpu-index: %d]" - " on CPU[core-id: %d, node-id: %d], node-id must be the same", - node_id, cs->cpu_index, cc->core_id, core_node_id); - goto err; - } - if (node_id < nb_numa_nodes) { - cs->numa_node = node_id; - } + /* Set NUMA node for the threads belonged to core */ + cs->numa_node = sc->node_id; snprintf(id, sizeof(id), "thread[%d]", i); object_property_add_child(OBJECT(sc), id, obj, &local_err); @@ -263,6 +252,11 @@ static const char *spapr_core_models[] = { "POWER9_v1.0", }; +static Property spapr_cpu_core_properties[] = { + DEFINE_PROP_INT32("node-id", sPAPRCPUCore, node_id, CPU_UNSET_NUMA_NODE_ID), + DEFINE_PROP_END_OF_LIST() +}; + void spapr_cpu_core_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -270,6 +264,7 @@ void spapr_cpu_core_class_init(ObjectClass *oc, void *data) dc->realize = spapr_cpu_core_realize; dc->unrealize = spapr_cpu_core_unrealizefn; + dc->props = spapr_cpu_core_properties; scc->cpu_class = cpu_class_by_name(TYPE_POWERPC_CPU, data); g_assert(scc->cpu_class); } diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index a1cdc875b1..9fa5545991 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -675,7 +675,7 @@ static void spapr_dr_connector_class_init(ObjectClass *k, void *data) /* * Reason: it crashes FIXME find and document the real reason */ - dk->cannot_instantiate_with_device_add_yet = true; + dk->user_creatable = false; } static const TypeInfo spapr_dr_connector_info = { diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 9f18f75b88..0d608d6e28 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -936,7 +936,7 @@ static target_ulong h_register_process_table(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args) { - CPUPPCState *env = &cpu->env; + CPUState *cs; target_ulong flags = args[0]; target_ulong proc_tbl = args[1]; target_ulong page_size = args[2]; @@ -992,16 +992,12 @@ static target_ulong h_register_process_table(PowerPCCPU *cpu, spapr_check_setup_free_hpt(spapr, spapr->patb_entry, cproc); spapr->patb_entry = cproc; /* Save new process table */ - if ((flags & FLAG_RADIX) || (flags & FLAG_HASH_PROC_TBL)) { - /* Use Process TBL */ - env->spr[SPR_LPCR] |= LPCR_UPRT; - } else { - env->spr[SPR_LPCR] &= ~LPCR_UPRT; - } - if (flags & FLAG_GTSE) { /* Partition Uses Guest Translation Shootdwn */ - env->spr[SPR_LPCR] |= LPCR_GTSE; - } else { - env->spr[SPR_LPCR] &= ~LPCR_GTSE; + + /* Update the UPRT and GTSE bits in the LPCR for all cpus */ + CPU_FOREACH(cs) { + set_spr(cs, SPR_LPCR, LPCR_UPRT | LPCR_GTSE, + ((flags & (FLAG_RADIX | FLAG_HASH_PROC_TBL)) ? LPCR_UPRT : 0) | + ((flags & FLAG_GTSE) ? LPCR_GTSE : 0)); } if (kvm_enabled()) { diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index e7567e2e8f..a7cff32bbf 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1994,6 +1994,8 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) dc->props = spapr_phb_properties; dc->reset = spapr_phb_reset; dc->vmsd = &vmstate_spapr_pci; + /* Supported by TYPE_SPAPR_MACHINE */ + dc->user_creatable = true; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); hp->plug = spapr_phb_hot_plug_child; hp->unplug = spapr_phb_hot_unplug_child; diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index a8a1bab50a..66a6fbeb8c 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -872,7 +872,6 @@ static void s390_pcihost_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); - dc->cannot_instantiate_with_device_add_yet = true; dc->reset = s390_pcihost_reset; k->init = s390_pcihost_init; hc->plug = s390_pcihost_hot_plug; diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c index b4f6dd58dd..83d6023894 100644 --- a/hw/s390x/sclp.c +++ b/hw/s390x/sclp.c @@ -505,10 +505,10 @@ static void sclp_realize(DeviceState *dev, Error **errp) ret = s390_set_memory_limit(machine->maxram_size, &hw_limit); if (ret == -E2BIG) { - error_setg(&err, "qemu: host supports a maximum of %" PRIu64 " GB", + error_setg(&err, "host supports a maximum of %" PRIu64 " GB", hw_limit >> 30); } else if (ret) { - error_setg(&err, "qemu: setting the guest size failed"); + error_setg(&err, "setting the guest size failed"); } out: diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c index 1f2f0ed44a..4008c81002 100644 --- a/hw/sd/milkymist-memcard.c +++ b/hw/sd/milkymist-memcard.c @@ -299,7 +299,7 @@ static void milkymist_memcard_class_init(ObjectClass *klass, void *data) dc->reset = milkymist_memcard_reset; dc->vmsd = &vmstate_milkymist_memcard; /* Reason: init() method uses drive_get_next() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo milkymist_memcard_info = { diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c index 82c63a4fb5..55c8098ecd 100644 --- a/hw/sd/pl181.c +++ b/hw/sd/pl181.c @@ -515,7 +515,7 @@ static void pl181_class_init(ObjectClass *klass, void *data) k->vmsd = &vmstate_pl181; k->reset = pl181_reset; /* Reason: init() method uses drive_get_next() */ - k->cannot_instantiate_with_device_add_yet = true; + k->user_creatable = false; k->realize = pl181_realize; } diff --git a/hw/sh4/sh_pci.c b/hw/sh4/sh_pci.c index 1747628f3d..38395c082b 100644 --- a/hw/sh4/sh_pci.c +++ b/hw/sh4/sh_pci.c @@ -171,7 +171,7 @@ static void sh_pci_host_class_init(ObjectClass *klass, void *data) * PCI-facing part of the host bridge, not usable without the * host-facing part, which can't be device_add'ed, yet. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo sh_pci_host_info = { diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c index e18299a482..976d5200f1 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -287,7 +287,7 @@ static void pit_common_class_init(ObjectClass *klass, void *data) * wired to the HPET, and because of that, some wiring is always * done by board code. */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo pit_common_type = { diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index 4165450250..93de3e1cc5 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -973,7 +973,7 @@ static void rtc_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_rtc; dc->props = mc146818rtc_properties; /* Reason: needs to be wired up by rtc_init() */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static void rtc_finalize(Object *obj) diff --git a/hw/tricore/tricore_testboard.c b/hw/tricore/tricore_testboard.c index 19dd587207..8910bf0f27 100644 --- a/hw/tricore/tricore_testboard.c +++ b/hw/tricore/tricore_testboard.c @@ -50,7 +50,7 @@ static void tricore_load_kernel(CPUTriCoreState *env) NULL, 0, EM_TRICORE, 1, 0); if (kernel_size <= 0) { - error_report("qemu: no kernel file '%s'", + error_report("no kernel file '%s'", tricoretb_binfo.kernel_filename); exit(1); } diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c index 9fe7333946..47b7519910 100644 --- a/hw/usb/dev-hub.c +++ b/hw/usb/dev-hub.c @@ -208,6 +208,7 @@ static void usb_hub_wakeup(USBPort *port1) USBHubPort *port = &s->ports[port1->index]; if (port->wPortStatus & PORT_STAT_SUSPEND) { + port->wPortStatus &= ~PORT_STAT_SUSPEND; port->wPortChange |= PORT_STAT_C_SUSPEND; usb_wakeup(s->intr, 0); } diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c index 6d5137383b..83a4f0e6fb 100644 --- a/hw/usb/dev-serial.c +++ b/hw/usb/dev-serial.c @@ -513,27 +513,18 @@ static USBDevice *usb_serial_init(USBBus *bus, const char *filename) { USBDevice *dev; Chardev *cdrv; - uint32_t vendorid = 0, productid = 0; char label[32]; static int index; while (*filename && *filename != ':') { const char *p; - char *e; + if (strstart(filename, "vendorid=", &p)) { - vendorid = strtol(p, &e, 16); - if (e == p || (*e && *e != ',' && *e != ':')) { - error_report("bogus vendor ID %s", p); - return NULL; - } - filename = e; + error_report("vendorid is not supported anymore"); + return NULL; } else if (strstart(filename, "productid=", &p)) { - productid = strtol(p, &e, 16); - if (e == p || (*e && *e != ',' && *e != ':')) { - error_report("bogus product ID %s", p); - return NULL; - } - filename = e; + error_report("productid is not supported anymore"); + return NULL; } else { error_report("unrecognized serial USB option %s", filename); return NULL; @@ -554,10 +545,7 @@ static USBDevice *usb_serial_init(USBBus *bus, const char *filename) dev = usb_create(bus, "usb-serial"); qdev_prop_set_chr(&dev->qdev, "chardev", cdrv); - if (vendorid) - qdev_prop_set_uint16(&dev->qdev, "vendorid", vendorid); - if (productid) - qdev_prop_set_uint16(&dev->qdev, "productid", productid); + return dev; } diff --git a/hw/usb/dev-smartcard-reader.c b/hw/usb/dev-smartcard-reader.c index 757b8b3f5a..49cb1829b5 100644 --- a/hw/usb/dev-smartcard-reader.c +++ b/hw/usb/dev-smartcard-reader.c @@ -813,7 +813,10 @@ static void ccid_write_data_block(USBCCIDState *s, uint8_t slot, uint8_t seq, if (p->b.bError) { DPRINTF(s, D_VERBOSE, "error %d\n", p->b.bError); } - memcpy(p->abData, data, len); + if (len) { + g_assert_nonnull(data); + memcpy(p->abData, data, len); + } ccid_reset_error_status(s); usb_wakeup(s->bulk, 0); } diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index a2d3143bf4..77d8e1137a 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -50,7 +50,7 @@ /* Very pessimistic, let's hope it's enough for all cases */ #define EV_QUEUE (((3 * 24) + 16) * MAXSLOTS) -#define TRB_LINK_LIMIT 4 +#define TRB_LINK_LIMIT 32 #define COMMAND_LIMIT 256 #define TRANSFER_LIMIT 256 @@ -1790,9 +1790,6 @@ static void xhci_stall_ep(XHCITransfer *xfer) } } -static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer, - XHCIEPContext *epctx); - static int xhci_setup_packet(XHCITransfer *xfer) { USBEndpoint *ep; @@ -1806,7 +1803,7 @@ static int xhci_setup_packet(XHCITransfer *xfer) ep = xhci_epid_to_usbep(xfer->epctx); if (!ep) { DPRINTF("xhci: slot %d has no device\n", - xfer->slotid); + xfer->epctx->slotid); return -1; } } @@ -1980,7 +1977,7 @@ static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer, XHCIEPContext *epctx { uint64_t mfindex; - DPRINTF("xhci_submit(slotid=%d,epid=%d)\n", xfer->slotid, xfer->epid); + DPRINTF("xhci_submit(slotid=%d,epid=%d)\n", epctx->slotid, epctx->epid); xfer->in_xfer = epctx->type>>2; diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index b001a27f05..ad5ef783a6 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -229,21 +229,10 @@ static void usbredir_log(void *priv, int level, const char *msg) static void usbredir_log_data(USBRedirDevice *dev, const char *desc, const uint8_t *data, int len) { - int i, j, n; - if (dev->debug < usbredirparser_debug_data) { return; } - - for (i = 0; i < len; i += j) { - char buf[128]; - - n = sprintf(buf, "%s", desc); - for (j = 0; j < 8 && i + j < len; j++) { - n += sprintf(buf + n, " %02X", data[i + j]); - } - error_report("%s", buf); - } + qemu_hexdump((char *)data, stderr, desc, len); } /* diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c index 2c60310cf9..fab196cebf 100644 --- a/hw/vfio/amd-xgbe.c +++ b/hw/vfio/amd-xgbe.c @@ -38,6 +38,8 @@ static void vfio_amd_xgbe_class_init(ObjectClass *klass, void *data) dc->realize = amd_xgbe_realize; dc->desc = "VFIO AMD XGBE"; dc->vmsd = &vfio_platform_amd_xgbe_vmstate; + /* Supported by TYPE_VIRT_MACHINE */ + dc->user_creatable = true; } static const TypeInfo vfio_amd_xgbe_dev_info = { diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c index bb15d588e5..7bb17af7ad 100644 --- a/hw/vfio/calxeda-xgmac.c +++ b/hw/vfio/calxeda-xgmac.c @@ -38,6 +38,8 @@ static void vfio_calxeda_xgmac_class_init(ObjectClass *klass, void *data) dc->realize = calxeda_xgmac_realize; dc->desc = "VFIO Calxeda XGMAC"; dc->vmsd = &vfio_platform_calxeda_xgmac_vmstate; + /* Supported by TYPE_VIRT_MACHINE */ + dc->user_creatable = true; } static const TypeInfo vfio_calxeda_xgmac_dev_info = { diff --git a/hw/xen/xen_backend.c b/hw/xen/xen_backend.c index c85f1637e4..3570f37e56 100644 --- a/hw/xen/xen_backend.c +++ b/hw/xen/xen_backend.c @@ -147,7 +147,7 @@ static struct XenDevice *xen_be_get_xendev(const char *type, int dom, int dev, qdev_unplug(DEVICE(xendev), NULL); return NULL; } - fcntl(xenevtchn_fd(xendev->evtchndev), F_SETFD, FD_CLOEXEC); + qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev)); if (ops->flags & DEVOPS_FLAG_NEED_GNTDEV) { xendev->gnttabdev = xengnttab_open(NULL, 0); @@ -619,6 +619,8 @@ static void xendev_class_init(ObjectClass *klass, void *data) dc->props = xendev_properties; set_bit(DEVICE_CATEGORY_MISC, dc->categories); + /* xen-backend devices can be plugged/unplugged dynamically */ + dc->user_creatable = true; } static const TypeInfo xendev_type_info = { diff --git a/include/block/aio.h b/include/block/aio.h index 406e32305a..e9aeeaec94 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -454,8 +454,14 @@ static inline void aio_disable_external(AioContext *ctx) */ static inline void aio_enable_external(AioContext *ctx) { - assert(ctx->external_disable_cnt > 0); - atomic_dec(&ctx->external_disable_cnt); + int old; + + old = atomic_fetch_dec(&ctx->external_disable_cnt); + assert(old > 0); + if (old == 1) { + /* Kick event loop so it re-arms file descriptors */ + aio_notify(ctx); + } } /** diff --git a/include/block/block.h b/include/block/block.h index 862eb56fc7..9b355e92d8 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -109,6 +109,7 @@ typedef struct HDGeometry { #define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" #define BDRV_OPT_READ_ONLY "read-only" #define BDRV_OPT_DISCARD "discard" +#define BDRV_OPT_FORCE_SHARE "force-share" #define BDRV_SECTOR_BITS 9 @@ -120,29 +121,32 @@ typedef struct HDGeometry { #define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) /* - * Allocation status flags - * BDRV_BLOCK_DATA: data is read from a file returned by bdrv_get_block_status. - * BDRV_BLOCK_ZERO: sectors read as zero - * BDRV_BLOCK_OFFSET_VALID: sector stored as raw data in a file returned by - * bdrv_get_block_status. + * Allocation status flags for bdrv_get_block_status() and friends. + * + * Public flags: + * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer + * BDRV_BLOCK_ZERO: offset reads as zero + * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this - * layer (as opposed to the backing file) - * BDRV_BLOCK_RAW: used internally to indicate that the request - * was answered by the raw driver and that one - * should look in bs->file directly. + * layer (short for DATA || ZERO), set by block layer * - * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 represent the offset in - * bs->file where sector data can be read from as raw data. + * Internal flag: + * BDRV_BLOCK_RAW: used internally to indicate that the request was + * answered by a passthrough driver such as raw and that the + * block layer should recompute the answer from bs->file. * - * DATA == 0 && ZERO == 0 means that data is read from backing_hd if present. + * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) + * represent the offset in the returned BDS that is allocated for the + * corresponding raw data; however, whether that offset actually contains + * data also depends on BDRV_BLOCK_DATA and BDRV_BLOCK_ZERO, as follows: * * DATA ZERO OFFSET_VALID - * t t t sectors read as zero, bs->file is zero at offset - * t f t sectors read as valid from bs->file at offset - * f t t sectors preallocated, read as zero, bs->file not + * t t t sectors read as zero, returned file is zero at offset + * t f t sectors read as valid from file at offset + * f t t sectors preallocated, read as zero, returned file not * necessarily zero at offset * f f t sectors preallocated but read from backing_hd, - * bs->file contains garbage at offset + * returned file contains garbage at offset * t t f sectors preallocated, read as zero, unknown offset * t f f sectors read from unknown file or offset * f t f not allocated or unknown offset, read as zero @@ -224,6 +228,8 @@ enum { BLK_PERM_ALL = 0x1f, }; +char *bdrv_perm_names(uint64_t perm); + /* disk I/O throttling */ void bdrv_init(void); void bdrv_init_with_whitelist(void); @@ -366,8 +372,6 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp); void bdrv_invalidate_cache_all(Error **errp); int bdrv_inactivate_all(void); -void blk_resume_after_migration(Error **errp); - /* Ensure contents are flushed to disk. */ int bdrv_flush(BlockDriverState *bs); int coroutine_fn bdrv_co_flush(BlockDriverState *bs); @@ -434,6 +438,7 @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, int64_t sector_num, int nb_sectors, int *pnum); bool bdrv_is_read_only(BlockDriverState *bs); +bool bdrv_is_writable(BlockDriverState *bs); int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); bool bdrv_is_sg(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 87739405d5..8d3724cce6 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -165,6 +165,13 @@ struct BlockDriver { int64_t offset, int count, BdrvRequestFlags flags); int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, int64_t offset, int count); + + /* + * Building block for bdrv_block_status[_above]. The driver should + * answer only according to the current layer, and should not + * set BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h + * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. + */ int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file); @@ -473,6 +480,12 @@ struct BdrvChildRole { void (*drained_begin)(BdrvChild *child); void (*drained_end)(BdrvChild *child); + /* Notifies the parent that the child has been activated/inactivated (e.g. + * when migration is completing) and it can start/stop requesting + * permissions and doing I/O on it. */ + void (*activate)(BdrvChild *child, Error **errp); + int (*inactivate)(BdrvChild *child); + void (*attach)(BdrvChild *child); void (*detach)(BdrvChild *child); }; @@ -518,6 +531,7 @@ struct BlockDriverState { bool valid_key; /* if true, a valid encryption key has been set */ bool sg; /* if true, the device is a /dev/sg* */ bool probed; /* if true, format was probed rather than specified */ + bool force_share; /* if true, always allow all shared permissions */ BlockDriver *drv; /* NULL means no media */ void *opaque; diff --git a/include/crypto/block.h b/include/crypto/block.h index 4a053a3ffa..013a435f1b 100644 --- a/include/crypto/block.h +++ b/include/crypto/block.h @@ -30,22 +30,22 @@ typedef struct QCryptoBlock QCryptoBlock; * and QCryptoBlockOpenOptions in qapi/crypto.json */ typedef ssize_t (*QCryptoBlockReadFunc)(QCryptoBlock *block, - void *opaque, size_t offset, uint8_t *buf, size_t buflen, + void *opaque, Error **errp); typedef ssize_t (*QCryptoBlockInitFunc)(QCryptoBlock *block, - void *opaque, size_t headerlen, + void *opaque, Error **errp); typedef ssize_t (*QCryptoBlockWriteFunc)(QCryptoBlock *block, - void *opaque, size_t offset, const uint8_t *buf, size_t buflen, + void *opaque, Error **errp); /** diff --git a/include/crypto/random.h b/include/crypto/random.h index a101353202..a07229ce96 100644 --- a/include/crypto/random.h +++ b/include/crypto/random.h @@ -40,5 +40,14 @@ int qcrypto_random_bytes(uint8_t *buf, size_t buflen, Error **errp); +/** + * qcrypto_random_init: + * @errp: pointer to a NULL-initialized error object + * + * Initializes the handles used by qcrypto_random_bytes + * + * Returns 0 on success, -1 on error + */ +int qcrypto_random_init(Error **errp); #endif /* QCRYPTO_RANDOM_H */ diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index eb07c2d43c..88d0738d76 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -392,4 +392,5 @@ GCC_FMT_ATTR(2, 3); void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, uint64_t len, int node, MemoryAffinityFlags flags); +void build_slit(GArray *table_data, BIOSLinker *linker); #endif diff --git a/include/hw/boards.h b/include/hw/boards.h index 31d9c72fb0..76ce0219ff 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -32,6 +32,7 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, MachineClass *find_default_machine(void); extern MachineState *current_machine; +void machine_run_board_init(MachineState *machine); bool machine_usb(MachineState *machine); bool machine_kernel_irqchip_allowed(MachineState *machine); bool machine_kernel_irqchip_required(MachineState *machine); @@ -42,6 +43,9 @@ bool machine_dump_guest_core(MachineState *machine); bool machine_mem_merge(MachineState *machine); void machine_register_compat_props(MachineState *machine); HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); +void machine_set_cpu_numa_node(MachineState *machine, + const CpuInstanceProperties *props, + Error **errp); /** * CPUArchId: @@ -74,7 +78,10 @@ typedef struct { * of HotplugHandler object, which handles hotplug operation * for a given @dev. It may return NULL if @dev doesn't require * any actions to be performed by hotplug handler. - * @cpu_index_to_socket_id: + * @cpu_index_to_instance_props: + * used to provide @cpu_index to socket/core/thread number mapping, allowing + * legacy code to perform maping from cpu_index to topology properties + * Returns: tuple of socket/core/thread ids given cpu_index belongs to. * used to provide @cpu_index to socket number mapping, allowing * a machine to group CPU threads belonging to the same socket/package * Returns: socket number given cpu_index belongs to. @@ -136,10 +143,13 @@ struct MachineClass { int minimum_page_bits; bool has_hotpluggable_cpus; int numa_mem_align_shift; + void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size); HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); - unsigned (*cpu_index_to_socket_id)(unsigned cpu_index); + CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine, + unsigned cpu_index); const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine); }; diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index c1288f974d..9c5437dabc 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -21,6 +21,7 @@ #include "hw/boards.h" #include "hw/sysbus.h" +#include "hw/ipmi/ipmi.h" #include "hw/ppc/pnv_lpc.h" #include "hw/ppc/pnv_psi.h" #include "hw/ppc/pnv_occ.h" @@ -118,8 +119,6 @@ typedef struct PnvChipClass { #define POWERNV_MACHINE(obj) \ OBJECT_CHECK(PnvMachineState, (obj), TYPE_POWERNV_MACHINE) -typedef struct IPMIBmc IPMIBmc; - typedef struct PnvMachineState { /*< private >*/ MachineState parent_obj; diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h index ccf969af94..023b4f0fec 100644 --- a/include/hw/ppc/pnv_lpc.h +++ b/include/hw/ppc/pnv_lpc.h @@ -19,12 +19,12 @@ #ifndef _PPC_PNV_LPC_H #define _PPC_PNV_LPC_H +#include "hw/ppc/pnv_psi.h" + #define TYPE_PNV_LPC "pnv-lpc" #define PNV_LPC(obj) \ OBJECT_CHECK(PnvLpcController, (obj), TYPE_PNV_LPC) -typedef struct PnvPsi PnvPsi; - typedef struct PnvLpcController { DeviceState parent; diff --git a/include/hw/ppc/pnv_occ.h b/include/hw/ppc/pnv_occ.h index f8ec330abf..82f299dc76 100644 --- a/include/hw/ppc/pnv_occ.h +++ b/include/hw/ppc/pnv_occ.h @@ -19,11 +19,11 @@ #ifndef _PPC_PNV_OCC_H #define _PPC_PNV_OCC_H +#include "hw/ppc/pnv_psi.h" + #define TYPE_PNV_OCC "pnv-occ" #define PNV_OCC(obj) OBJECT_CHECK(PnvOCC, (obj), TYPE_PNV_OCC) -typedef struct PnvPsi PnvPsi; - typedef struct PnvOCC { DeviceState xd; diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h index 3c35665221..93051e9ecf 100644 --- a/include/hw/ppc/spapr_cpu_core.h +++ b/include/hw/ppc/spapr_cpu_core.h @@ -27,6 +27,7 @@ typedef struct sPAPRCPUCore { /*< public >*/ void *threads; + int node_id; } sPAPRCPUCore; typedef struct sPAPRCPUCoreClass { diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index c215dc72a4..05e6acbb35 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -29,6 +29,7 @@ #define XICS_H #include "hw/qdev.h" +#include "target/ppc/cpu-qom.h" #define XICS_IPI 0x2 #define XICS_BUID 0x1 @@ -46,7 +47,6 @@ typedef struct ICSStateClass ICSStateClass; typedef struct ICSState ICSState; typedef struct ICSIRQState ICSIRQState; typedef struct XICSFabric XICSFabric; -typedef struct PowerPCCPU PowerPCCPU; #define TYPE_ICP "icp" #define ICP(obj) OBJECT_CHECK(ICPState, (obj), TYPE_ICP) @@ -144,6 +144,8 @@ struct ICSIRQState { #define XICS_STATUS_SENT 0x2 #define XICS_STATUS_REJECTED 0x4 #define XICS_STATUS_MASKED_PENDING 0x8 +#define XICS_STATUS_PRESENTED 0x10 +#define XICS_STATUS_QUEUED 0x20 uint8_t status; /* (flags & XICS_FLAGS_IRQ_MASK) == 0 means the interrupt is not allocated */ #define XICS_FLAGS_IRQ_LSI 0x1 diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 4bf86b0ad8..e69489ec6c 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -103,16 +103,17 @@ typedef struct DeviceClass { Property *props; /* - * Shall we hide this device model from -device / device_add? + * Can this device be instantiated with -device / device_add? * All devices should support instantiation with device_add, and * this flag should not exist. But we're not there, yet. Some * devices fail to instantiate with cryptic error messages. * Others instantiate, but don't work. Exposing users to such - * behavior would be cruel; this flag serves to protect them. It - * should never be set without a comment explaining why it is set. + * behavior would be cruel; clearing this flag will protect them. + * It should never be cleared without a comment explaining why it + * is cleared. * TODO remove once we're there */ - bool cannot_instantiate_with_device_add_yet; + bool user_creatable; bool hotpluggable; /* callbacks */ diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 1d69fa7a8f..d206fc93dd 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -134,12 +134,12 @@ extern PropertyInfo qdev_prop_arraylen; * device_add, so add code like this: * |* Reason: pointer property "NAME-OF-YOUR-PROP" *| * DeviceClass *dc = DEVICE_CLASS(class); - * dc->cannot_instantiate_with_device_add_yet = true; + * dc->user_creatable = false; * * - If the property may safely remain null, document it like this: * |* * * Note: pointer property "interrupt_vector" may remain null, thus - * * no need for dc->cannot_instantiate_with_device_add_yet = true; + * * no need for dc->user_creatable = false; * *| */ #define DEFINE_PROP_PTR(_n, _s, _f) \ diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index f3ffdceca4..83f474ffc3 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -169,6 +169,7 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd); void virtio_gpu_virgl_fence_poll(VirtIOGPU *g); void virtio_gpu_virgl_reset(VirtIOGPU *g); +void virtio_gpu_gl_block(void *opaque, bool block); int virtio_gpu_virgl_init(VirtIOGPU *g); #endif diff --git a/include/io/channel-file.h b/include/io/channel-file.h index d2462c2ed7..79245f1183 100644 --- a/include/io/channel-file.h +++ b/include/io/channel-file.h @@ -71,7 +71,7 @@ qio_channel_file_new_fd(int fd); /** * qio_channel_file_new_path: - * @fd: the file descriptor + * @path: the file path * @flags: the open flags (O_RDONLY|O_WRONLY|O_RDWR, etc) * @mode: the file creation mode if O_WRONLY is set in @flags * @errp: pointer to initialized error object diff --git a/include/io/channel.h b/include/io/channel.h index 5d48906998..db9bb022a1 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -315,7 +315,7 @@ ssize_t qio_channel_read(QIOChannel *ioc, Error **errp); /** - * qio_channel_writev: + * qio_channel_write: * @ioc: the channel object * @buf: the memory regions to send data from * @buflen: the length of @buf diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h index 1881284cb5..3f0926cf40 100644 --- a/include/qemu/bitops.h +++ b/include/qemu/bitops.h @@ -201,16 +201,6 @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr, return find_next_zero_bit(addr, size, 0); } -static inline unsigned long hweight_long(unsigned long w) -{ - unsigned long count; - - for (count = 0; w; w >>= 1) { - count += w & 1; - } - return count; -} - /** * rol8 - rotate an 8-bit value left * @word: value to rotate diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 122ff06ff6..1c9f5e260c 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -341,6 +341,9 @@ int qemu_close(int fd); #ifndef _WIN32 int qemu_dup(int fd); #endif +int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive); +int qemu_unlock_fd(int fd, int64_t start, int64_t len); +int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive); #if defined(__HAIKU__) && defined(__i386__) #define FMT_pid "%ld" diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index f08d327aec..7d8505730c 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -97,5 +97,6 @@ typedef struct SSIBus SSIBus; typedef struct uWireSlave uWireSlave; typedef struct VirtIODevice VirtIODevice; typedef struct Visitor Visitor; +typedef struct node_info NodeInfo; #endif /* QEMU_TYPEDEFS_H */ diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 5d10359c8f..55214ce131 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -258,6 +258,8 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data); struct qemu_work_item; +#define CPU_UNSET_NUMA_NODE_ID -1 + /** * CPUState: * @cpu_index: CPU index (informative). diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 8f09dcf918..7ffde5b119 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -8,6 +8,7 @@ #include "hw/boards.h" extern int nb_numa_nodes; /* Number of NUMA nodes */ +extern bool have_numa_distance; struct numa_addr_range { ram_addr_t mem_start; @@ -15,24 +16,23 @@ struct numa_addr_range { QLIST_ENTRY(numa_addr_range) entry; }; -typedef struct node_info { +struct node_info { uint64_t node_mem; - unsigned long *node_cpu; struct HostMemoryBackend *node_memdev; bool present; QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */ -} NodeInfo; + uint8_t distance[MAX_NODES]; +}; extern NodeInfo numa_info[MAX_NODES]; -void parse_numa_opts(MachineClass *mc); -void numa_post_machine_init(void); +void parse_numa_opts(MachineState *ms); void query_numa_node_mem(uint64_t node_mem[]); extern QemuOptsList qemu_numa_opts; void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); uint32_t numa_get_node(ram_addr_t addr, Error **errp); - -/* on success returns node index in numa_info, - * on failure returns nb_numa_nodes */ -int numa_get_node_for_cpu(int idx); +void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size); +void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size); #endif diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 15656b7c36..be9e22c955 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -166,6 +166,10 @@ extern int mem_prealloc; #define MAX_NODES 128 #define NUMA_NODE_UNASSIGNED MAX_NODES +#define NUMA_DISTANCE_MIN 10 +#define NUMA_DISTANCE_DEFAULT 20 +#define NUMA_DISTANCE_MAX 254 +#define NUMA_DISTANCE_UNREACHABLE 255 #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h index b8c93b9bce..01daaad00c 100644 --- a/include/sysemu/xen-mapcache.h +++ b/include/sysemu/xen-mapcache.h @@ -17,7 +17,7 @@ typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr start_addr, void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque); uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, - uint8_t lock); + uint8_t lock, bool dma); ram_addr_t xen_ram_addr_from_mapcache(void *ptr); void xen_invalidate_map_cache_entry(uint8_t *buffer); void xen_invalidate_map_cache(void); @@ -31,7 +31,8 @@ static inline void xen_map_cache_init(phys_offset_to_gaddr_t f, static inline uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, - uint8_t lock) + uint8_t lock, + bool dma) { abort(); } diff --git a/include/ui/console.h b/include/ui/console.h index d759338816..7262bef6d3 100644 --- a/include/ui/console.h +++ b/include/ui/console.h @@ -527,4 +527,7 @@ static inline void early_gtk_display_init(int opengl) } #endif +/* egl-headless.c */ +void egl_headless_init(void); + #endif diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h index 88a13e827b..c785d60e91 100644 --- a/include/ui/egl-helpers.h +++ b/include/ui/egl-helpers.h @@ -21,7 +21,8 @@ int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc); EGLSurface qemu_egl_init_surface_x11(EGLContext ectx, Window win); -int qemu_egl_init_dpy(EGLNativeDisplayType dpy, bool gles, bool debug); +int qemu_egl_init_dpy_x11(EGLNativeDisplayType dpy); +int qemu_egl_init_dpy_mesa(EGLNativeDisplayType dpy); EGLContext qemu_egl_init_ctx(void); #endif /* EGL_HELPERS_H */ diff --git a/include/ui/input.h b/include/ui/input.h index d06a12dd4c..3cfd0f3363 100644 --- a/include/ui/input.h +++ b/include/ui/input.h @@ -8,7 +8,8 @@ #define INPUT_EVENT_MASK_REL (1<<INPUT_EVENT_KIND_REL) #define INPUT_EVENT_MASK_ABS (1<<INPUT_EVENT_KIND_ABS) -#define INPUT_EVENT_ABS_SIZE 0x8000 +#define INPUT_EVENT_ABS_MIN 0x0000 +#define INPUT_EVENT_ABS_MAX 0x7FFF typedef struct QemuInputHandler QemuInputHandler; typedef struct QemuInputHandlerState QemuInputHandlerState; @@ -54,12 +55,14 @@ void qemu_input_update_buttons(QemuConsole *src, uint32_t *button_map, uint32_t button_old, uint32_t button_new); bool qemu_input_is_absolute(void); -int qemu_input_scale_axis(int value, int size_in, int size_out); +int qemu_input_scale_axis(int value, + int min_in, int max_in, + int min_out, int max_out); InputEvent *qemu_input_event_new_move(InputEventKind kind, InputAxis axis, int value); void qemu_input_queue_rel(QemuConsole *src, InputAxis axis, int value); -void qemu_input_queue_abs(QemuConsole *src, InputAxis axis, - int value, int size); +void qemu_input_queue_abs(QemuConsole *src, InputAxis axis, int value, + int min_in, int max_in); void qemu_input_check_mode_change(void); void qemu_add_mouse_mode_change_notifier(Notifier *notify); diff --git a/linux-user/main.c b/linux-user/main.c index 79d621b872..ad03c9e8b2 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -4229,10 +4229,7 @@ int main(int argc, char **argv, char **envp) qemu_init_cpu_list(); module_call_init(MODULE_INIT_QOM); - if ((envlist = envlist_create()) == NULL) { - (void) fprintf(stderr, "Unable to allocate envlist\n"); - exit(EXIT_FAILURE); - } + envlist = envlist_create(); /* add current environment into the list */ for (wrk = environ; *wrk != NULL; wrk++) { @@ -4429,10 +4426,10 @@ int main(int argc, char **argv, char **envp) } for (wrk = target_environ; *wrk; wrk++) { - free(*wrk); + g_free(*wrk); } - free(target_environ); + g_free(target_environ); if (qemu_loglevel_mask(CPU_LOG_PAGE)) { qemu_log("guest_base 0x%lx\n", guest_base); diff --git a/migration/migration.c b/migration/migration.c index 799952ce99..a5ade23e24 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -338,20 +338,11 @@ static void process_incoming_migration_bh(void *opaque) Error *local_err = NULL; MigrationIncomingState *mis = opaque; - /* Make sure all file formats flush their mutable metadata */ + /* Make sure all file formats flush their mutable metadata. + * If we get an error here, just don't restart the VM yet. */ bdrv_invalidate_cache_all(&local_err); if (local_err) { - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_FAILED); error_report_err(local_err); - migrate_decompress_threads_join(); - exit(EXIT_FAILURE); - } - - /* If we get an error here, just don't restart the VM yet. */ - blk_resume_after_migration(&local_err); - if (local_err) { - error_free(local_err); local_err = NULL; autostart = false; } diff --git a/migration/savevm.c b/migration/savevm.c index 352a8f23b5..7f66d58a7e 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1612,16 +1612,11 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) qemu_announce_self(); - /* Make sure all file formats flush their mutable metadata */ + /* Make sure all file formats flush their mutable metadata. + * If we get an error here, just don't restart the VM yet. */ bdrv_invalidate_cache_all(&local_err); if (local_err) { error_report_err(local_err); - } - - /* If we get an error here, just don't restart the VM yet. */ - blk_resume_after_migration(&local_err); - if (local_err) { - error_free(local_err); local_err = NULL; autostart = false; } @@ -3254,7 +3254,7 @@ void device_add_completion(ReadLineState *rs, int nb_args, const char *str) TYPE_DEVICE); name = object_class_get_name(OBJECT_CLASS(dc)); - if (!dc->cannot_instantiate_with_device_add_yet + if (dc->user_creatable && !strncmp(name, str, len)) { readline_add_completion(rs, name); } @@ -51,6 +51,7 @@ static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one. * For all nodes, nodeid < max_numa_nodeid */ int nb_numa_nodes; +bool have_numa_distance; NodeInfo numa_info[MAX_NODES]; void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) @@ -140,10 +141,12 @@ uint32_t numa_get_node(ram_addr_t addr, Error **errp) return -1; } -static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) +static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, + QemuOpts *opts, Error **errp) { uint16_t nodenr; uint16List *cpus = NULL; + MachineClass *mc = MACHINE_GET_CLASS(ms); if (node->has_nodeid) { nodenr = node->nodeid; @@ -162,7 +165,12 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) return; } + if (!mc->cpu_index_to_instance_props) { + error_report("NUMA is not supported by this machine-type"); + exit(1); + } for (cpus = node->cpus; cpus; cpus = cpus->next) { + CpuInstanceProperties props; if (cpus->value >= max_cpus) { error_setg(errp, "CPU index (%" PRIu16 ")" @@ -170,11 +178,14 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) cpus->value, max_cpus); return; } - bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); + props = mc->cpu_index_to_instance_props(ms, cpus->value); + props.node_id = nodenr; + props.has_node_id = true; + machine_set_cpu_numa_node(ms, &props, &error_fatal); } if (node->has_mem && node->has_memdev) { - error_setg(errp, "qemu: cannot specify both mem= and memdev="); + error_setg(errp, "cannot specify both mem= and memdev="); return; } @@ -182,7 +193,7 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) have_memdevs = node->has_memdev; } if (node->has_memdev != have_memdevs) { - error_setg(errp, "qemu: memdev option must be specified for either " + error_setg(errp, "memdev option must be specified for either " "all or no nodes"); return; } @@ -212,9 +223,47 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); } +static void parse_numa_distance(NumaDistOptions *dist, Error **errp) +{ + uint16_t src = dist->src; + uint16_t dst = dist->dst; + uint8_t val = dist->val; + + if (src >= MAX_NODES || dst >= MAX_NODES) { + error_setg(errp, + "Invalid node %" PRIu16 + ", max possible could be %" PRIu16, + MAX(src, dst), MAX_NODES); + return; + } + + if (!numa_info[src].present || !numa_info[dst].present) { + error_setg(errp, "Source/Destination NUMA node is missing. " + "Please use '-numa node' option to declare it first."); + return; + } + + if (val < NUMA_DISTANCE_MIN) { + error_setg(errp, "NUMA distance (%" PRIu8 ") is invalid, " + "it shouldn't be less than %d.", + val, NUMA_DISTANCE_MIN); + return; + } + + if (src == dst && val != NUMA_DISTANCE_MIN) { + error_setg(errp, "Local distance of node %d should be %d.", + src, NUMA_DISTANCE_MIN); + return; + } + + numa_info[src].distance[dst] = val; + have_numa_distance = true; +} + static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) { NumaOptions *object = NULL; + MachineState *ms = opaque; Error *err = NULL; { @@ -229,12 +278,33 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) switch (object->type) { case NUMA_OPTIONS_TYPE_NODE: - numa_node_parse(&object->u.node, opts, &err); + parse_numa_node(ms, &object->u.node, opts, &err); if (err) { goto end; } nb_numa_nodes++; break; + case NUMA_OPTIONS_TYPE_DIST: + parse_numa_distance(&object->u.dist, &err); + if (err) { + goto end; + } + break; + case NUMA_OPTIONS_TYPE_CPU: + if (!object->u.cpu.has_node_id) { + error_setg(&err, "Missing mandatory node-id property"); + goto end; + } + if (!numa_info[object->u.cpu.node_id].present) { + error_setg(&err, "Invalid node-id=%" PRId64 ", NUMA node must be " + "defined with -numa node,nodeid=ID before it's used with " + "-numa cpu,node-id=ID", object->u.cpu.node_id); + goto end; + } + + machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu), + &err); + break; default: abort(); } @@ -249,60 +319,118 @@ end: return 0; } -static char *enumerate_cpus(unsigned long *cpus, int max_cpus) +/* If all node pair distances are symmetric, then only distances + * in one direction are enough. If there is even one asymmetric + * pair, though, then all distances must be provided. The + * distance from a node to itself is always NUMA_DISTANCE_MIN, + * so providing it is never necessary. + */ +static void validate_numa_distance(void) { - int cpu; - bool first = true; - GString *s = g_string_new(NULL); + int src, dst; + bool is_asymmetrical = false; + + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = src; dst < nb_numa_nodes; dst++) { + if (numa_info[src].distance[dst] == 0 && + numa_info[dst].distance[src] == 0) { + if (src != dst) { + error_report("The distance between node %d and %d is " + "missing, at least one distance value " + "between each nodes should be provided.", + src, dst); + exit(EXIT_FAILURE); + } + } - for (cpu = find_first_bit(cpus, max_cpus); - cpu < max_cpus; - cpu = find_next_bit(cpus, max_cpus, cpu + 1)) { - g_string_append_printf(s, "%s%d", first ? "" : " ", cpu); - first = false; + if (numa_info[src].distance[dst] != 0 && + numa_info[dst].distance[src] != 0 && + numa_info[src].distance[dst] != + numa_info[dst].distance[src]) { + is_asymmetrical = true; + } + } + } + + if (is_asymmetrical) { + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = 0; dst < nb_numa_nodes; dst++) { + if (src != dst && numa_info[src].distance[dst] == 0) { + error_report("At least one asymmetrical pair of " + "distances is given, please provide distances " + "for both directions of all node pairs."); + exit(EXIT_FAILURE); + } + } + } } - return g_string_free(s, FALSE); } -static void validate_numa_cpus(void) +static void complete_init_numa_distance(void) { - int i; - unsigned long *seen_cpus = bitmap_new(max_cpus); + int src, dst; - for (i = 0; i < nb_numa_nodes; i++) { - if (bitmap_intersects(seen_cpus, numa_info[i].node_cpu, max_cpus)) { - bitmap_and(seen_cpus, seen_cpus, - numa_info[i].node_cpu, max_cpus); - error_report("CPU(s) present in multiple NUMA nodes: %s", - enumerate_cpus(seen_cpus, max_cpus)); - g_free(seen_cpus); - exit(EXIT_FAILURE); + /* Fixup NUMA distance by symmetric policy because if it is an + * asymmetric distance table, it should be a complete table and + * there would not be any missing distance except local node, which + * is verified by validate_numa_distance above. + */ + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = 0; dst < nb_numa_nodes; dst++) { + if (numa_info[src].distance[dst] == 0) { + if (src == dst) { + numa_info[src].distance[dst] = NUMA_DISTANCE_MIN; + } else { + numa_info[src].distance[dst] = numa_info[dst].distance[src]; + } + } } - bitmap_or(seen_cpus, seen_cpus, - numa_info[i].node_cpu, max_cpus); } +} - if (!bitmap_full(seen_cpus, max_cpus)) { - char *msg; - bitmap_complement(seen_cpus, seen_cpus, max_cpus); - msg = enumerate_cpus(seen_cpus, max_cpus); - error_report("warning: CPU(s) not present in any NUMA nodes: %s", msg); - error_report("warning: All CPU(s) up to maxcpus should be described " - "in NUMA config"); - g_free(msg); +void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size) +{ + int i; + uint64_t usedmem = 0; + + /* Align each node according to the alignment + * requirements of the machine class + */ + + for (i = 0; i < nb_nodes - 1; i++) { + nodes[i].node_mem = (size / nb_nodes) & + ~((1 << mc->numa_mem_align_shift) - 1); + usedmem += nodes[i].node_mem; } - g_free(seen_cpus); + nodes[i].node_mem = size - usedmem; } -void parse_numa_opts(MachineClass *mc) +void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size) { int i; + uint64_t usedmem = 0, node_mem; + uint64_t granularity = size / nb_nodes; + uint64_t propagate = 0; + + for (i = 0; i < nb_nodes - 1; i++) { + node_mem = (granularity + propagate) & + ~((1 << mc->numa_mem_align_shift) - 1); + propagate = granularity + propagate - node_mem; + nodes[i].node_mem = node_mem; + usedmem += node_mem; + } + nodes[i].node_mem = size - usedmem; +} - for (i = 0; i < MAX_NODES; i++) { - numa_info[i].node_cpu = bitmap_new(max_cpus); - } +void parse_numa_opts(MachineState *ms) +{ + int i; + const CPUArchIdList *possible_cpus; + MachineClass *mc = MACHINE_GET_CLASS(ms); - if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, NULL, NULL)) { + if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, NULL)) { exit(1); } @@ -336,17 +464,8 @@ void parse_numa_opts(MachineClass *mc) } } if (i == nb_numa_nodes) { - uint64_t usedmem = 0; - - /* Align each node according to the alignment - * requirements of the machine class - */ - for (i = 0; i < nb_numa_nodes - 1; i++) { - numa_info[i].node_mem = (ram_size / nb_numa_nodes) & - ~((1 << mc->numa_mem_align_shift) - 1); - usedmem += numa_info[i].node_mem; - } - numa_info[i].node_mem = ram_size - usedmem; + assert(mc->numa_auto_assign_ram); + mc->numa_auto_assign_ram(mc, numa_info, nb_numa_nodes, ram_size); } numa_total = 0; @@ -366,47 +485,52 @@ void parse_numa_opts(MachineClass *mc) numa_set_mem_ranges(); - for (i = 0; i < nb_numa_nodes; i++) { - if (!bitmap_empty(numa_info[i].node_cpu, max_cpus)) { + /* assign CPUs to nodes using board provided default mapping */ + if (!mc->cpu_index_to_instance_props || !mc->possible_cpu_arch_ids) { + error_report("default CPUs to NUMA node mapping isn't supported"); + exit(1); + } + + possible_cpus = mc->possible_cpu_arch_ids(ms); + for (i = 0; i < possible_cpus->len; i++) { + if (possible_cpus->cpus[i].props.has_node_id) { break; } } - /* Historically VCPUs were assigned in round-robin order to NUMA - * nodes. However it causes issues with guest not handling it nice - * in case where cores/threads from a multicore CPU appear on - * different nodes. So allow boards to override default distribution - * rule grouping VCPUs by socket so that VCPUs from the same socket - * would be on the same node. - */ - if (i == nb_numa_nodes) { + + /* no CPUs are assigned to NUMA nodes */ + if (i == possible_cpus->len) { for (i = 0; i < max_cpus; i++) { - unsigned node_id = i % nb_numa_nodes; - if (mc->cpu_index_to_socket_id) { - node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes; - } + CpuInstanceProperties props; + /* fetch default mapping from board and enable it */ + props = mc->cpu_index_to_instance_props(ms, i); + props.has_node_id = true; - set_bit(i, numa_info[node_id].node_cpu); + machine_set_cpu_numa_node(ms, &props, &error_fatal); } } - validate_numa_cpus(); - } else { - numa_set_mem_node_id(0, ram_size, 0); - } -} - -void numa_post_machine_init(void) -{ - CPUState *cpu; - int i; + /* QEMU needs at least all unique node pair distances to build + * the whole NUMA distance table. QEMU treats the distance table + * as symmetric by default, i.e. distance A->B == distance B->A. + * Thus, QEMU is able to complete the distance table + * initialization even though only distance A->B is provided and + * distance B->A is not. QEMU knows the distance of a node to + * itself is always 10, so A->A distances may be omitted. When + * the distances of two nodes of a pair differ, i.e. distance + * A->B != distance B->A, then that means the distance table is + * asymmetric. In this case, the distances for both directions + * of all node pairs are required. + */ + if (have_numa_distance) { + /* Validate enough NUMA distance information was provided. */ + validate_numa_distance(); - CPU_FOREACH(cpu) { - for (i = 0; i < nb_numa_nodes; i++) { - assert(cpu->cpu_index < max_cpus); - if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { - cpu->numa_node = i; - } + /* Validation succeeded, now fill in any missing distances. */ + complete_init_numa_distance(); } + } else { + numa_set_mem_node_id(0, ram_size, 0); } } @@ -560,20 +684,6 @@ MemdevList *qmp_query_memdev(Error **errp) return list; } -int numa_get_node_for_cpu(int idx) -{ - int i; - - assert(idx < max_cpus); - - for (i = 0; i < nb_numa_nodes; i++) { - if (test_bit(idx, numa_info[i].node_cpu)) { - break; - } - } - return i; -} - void ram_block_notifier_add(RAMBlockNotifier *n) { QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next); diff --git a/pc-bios/README b/pc-bios/README index dcead369bf..ebc699d322 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -47,3 +47,6 @@ (OpenPower Abstraction Layer) firmware for OpenPOWER systems. It can run an hypervisor OS or simply a host OS on the "baremetal" platform, also known as the PowerNV (Non-Virtualized) platform. + +- QemuMacDrivers (https://github.com/ozbenh/QemuMacDrivers) is a project to + provide virtualised drivers for PPC MacOS guests. diff --git a/pc-bios/qemu_vga.ndrv b/pc-bios/qemu_vga.ndrv Binary files differnew file mode 100644 index 0000000000..6e02f74d61 --- /dev/null +++ b/pc-bios/qemu_vga.ndrv diff --git a/qapi-schema.json b/qapi-schema.json index 5728b7f363..80603cfc51 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1325,6 +1325,9 @@ # # @thread_id: ID of the underlying host thread # +# @props: properties describing to which node/socket/core/thread +# virtual CPU belongs to, provided if supported by board (since 2.10) +# # @arch: architecture of the cpu, which determines which additional fields # will be listed (since 2.6) # @@ -1335,7 +1338,8 @@ ## { 'union': 'CpuInfo', 'base': {'CPU': 'int', 'current': 'bool', 'halted': 'bool', - 'qom_path': 'str', 'thread_id': 'int', 'arch': 'CpuInfoArch' }, + 'qom_path': 'str', 'thread_id': 'int', + '*props': 'CpuInstanceProperties', 'arch': 'CpuInfoArch' }, 'discriminator': 'arch', 'data': { 'x86': 'CpuInfoX86', 'sparc': 'CpuInfoSPARC', @@ -5682,10 +5686,16 @@ ## # @NumaOptionsType: # +# @node: NUMA nodes configuration +# +# @dist: NUMA distance configuration (since 2.10) +# +# @cpu: property based CPU(s) to node mapping (Since: 2.10) +# # Since: 2.1 ## { 'enum': 'NumaOptionsType', - 'data': [ 'node' ] } + 'data': [ 'node', 'dist', 'cpu' ] } ## # @NumaOptions: @@ -5698,7 +5708,9 @@ 'base': { 'type': 'NumaOptionsType' }, 'discriminator': 'type', 'data': { - 'node': 'NumaNodeOptions' }} + 'node': 'NumaNodeOptions', + 'dist': 'NumaDistOptions', + 'cpu': 'NumaCpuOptions' }} ## # @NumaNodeOptions: @@ -5727,6 +5739,41 @@ '*memdev': 'str' }} ## +# @NumaDistOptions: +# +# Set the distance between 2 NUMA nodes. +# +# @src: source NUMA node. +# +# @dst: destination NUMA node. +# +# @val: NUMA distance from source node to destination node. +# When a node is unreachable from another node, set the distance +# between them to 255. +# +# Since: 2.10 +## +{ 'struct': 'NumaDistOptions', + 'data': { + 'src': 'uint16', + 'dst': 'uint16', + 'val': 'uint8' }} + +## +# @NumaCpuOptions: +# +# Option "-numa cpu" overrides default cpu to node mapping. +# It accepts the same set of cpu properties as returned by +# query-hotpluggable-cpus[].props, where node-id could be used to +# override default node mapping. +# +# Since: 2.10 +## +{ 'struct': 'NumaCpuOptions', + 'base': 'CpuInstanceProperties', + 'data' : {} } + +## # @HostMemPolicy: # # Host memory policy types diff --git a/qapi/block-core.json b/qapi/block-core.json index 614181b553..ea0b3e8b13 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2127,11 +2127,15 @@ # # @filename: path to the image file # @aio: AIO backend (default: threads) (since: 2.8) +# @locking: whether to enable file locking. If set to 'auto', only enable +# when Open File Descriptor (OFD) locking API is available +# (default: auto, since 2.10) # # Since: 2.9 ## { 'struct': 'BlockdevOptionsFile', 'data': { 'filename': 'str', + '*locking': 'OnOffAuto', '*aio': 'BlockdevAioOptions' } } ## @@ -2430,8 +2434,33 @@ # # @config: filename of the configuration file # -# @align: required alignment for requests in bytes, -# must be power of 2, or 0 for default +# @align: required alignment for requests in bytes, must be +# positive power of 2, or 0 for default +# +# @max-transfer: maximum size for I/O transfers in bytes, must be +# positive multiple of @align and of the underlying +# file's request alignment (but need not be a power of +# 2), or 0 for default (since 2.10) +# +# @opt-write-zero: preferred alignment for write zero requests in bytes, +# must be positive multiple of @align and of the +# underlying file's request alignment (but need not be a +# power of 2), or 0 for default (since 2.10) +# +# @max-write-zero: maximum size for write zero requests in bytes, must be +# positive multiple of @align, of @opt-write-zero, and of +# the underlying file's request alignment (but need not +# be a power of 2), or 0 for default (since 2.10) +# +# @opt-discard: preferred alignment for discard requests in bytes, must +# be positive multiple of @align and of the underlying +# file's request alignment (but need not be a power of +# 2), or 0 for default (since 2.10) +# +# @max-discard: maximum size for discard requests in bytes, must be +# positive multiple of @align, of @opt-discard, and of +# the underlying file's request alignment (but need not +# be a power of 2), or 0 for default (since 2.10) # # @inject-error: array of error injection descriptions # @@ -2442,7 +2471,9 @@ { 'struct': 'BlockdevOptionsBlkdebug', 'data': { 'image': 'BlockdevRef', '*config': 'str', - '*align': 'int', + '*align': 'int', '*max-transfer': 'int32', + '*opt-write-zero': 'int32', '*max-write-zero': 'int32', + '*opt-discard': 'int32', '*max-discard': 'int32', '*inject-error': ['BlkdebugInjectErrorOptions'], '*set-state': ['BlkdebugSetStateOptions'] } } @@ -2782,11 +2813,15 @@ # "name1=content1; name2=content2;" as explained by # CURLOPT_COOKIE(3). Defaults to no cookies. # +# @cookie-secret: ID of a QCryptoSecret object providing the cookie data in a +# secure way. See @cookie for the format. (since 2.10) +# # Since: 2.9 ## { 'struct': 'BlockdevOptionsCurlHttp', 'base': 'BlockdevOptionsCurlBase', - 'data': { '*cookie': 'str' } } + 'data': { '*cookie': 'str', + '*cookie-secret': 'str'} } ## # @BlockdevOptionsCurlHttps: @@ -2801,12 +2836,16 @@ # @sslverify: Whether to verify the SSL certificate's validity (defaults to # true) # +# @cookie-secret: ID of a QCryptoSecret object providing the cookie data in a +# secure way. See @cookie for the format. (since 2.10) +# # Since: 2.9 ## { 'struct': 'BlockdevOptionsCurlHttps', 'base': 'BlockdevOptionsCurlBase', 'data': { '*cookie': 'str', - '*sslverify': 'bool' } } + '*sslverify': 'bool', + '*cookie-secret': 'str'} } ## # @BlockdevOptionsCurlFtp: @@ -2898,6 +2937,8 @@ # (default: false) # @detect-zeroes: detect and optimize zero writes (Since 2.1) # (default: off) +# @force-share: force share all permission on added nodes. +# Requires read-only=true. (Since 2.10) # # Remaining options are determined by the block driver. # @@ -2909,6 +2950,7 @@ '*discard': 'BlockdevDiscardOptions', '*cache': 'BlockdevCacheOptions', '*read-only': 'bool', + '*force-share': 'bool', '*detect-zeroes': 'BlockdevDetectZeroesOptions' }, 'discriminator': 'driver', 'data': { diff --git a/qdev-monitor.c b/qdev-monitor.c index e61d596ef7..3ecbf0bd25 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -114,7 +114,7 @@ static void qdev_print_devinfo(DeviceClass *dc) if (dc->desc) { error_printf(", desc \"%s\"", dc->desc); } - if (dc->cannot_instantiate_with_device_add_yet) { + if (!dc->user_creatable) { error_printf(", no-user"); } error_printf("\n"); @@ -156,7 +156,7 @@ static void qdev_print_devinfos(bool show_no_user) ? !test_bit(i, dc->categories) : !bitmap_empty(dc->categories, DEVICE_CATEGORY_MAX)) || (!show_no_user - && dc->cannot_instantiate_with_device_add_yet)) { + && !dc->user_creatable)) { continue; } if (!cat_printed) { @@ -241,7 +241,7 @@ static DeviceClass *qdev_get_device_class(const char **driver, Error **errp) } dc = DEVICE_CLASS(oc); - if (dc->cannot_instantiate_with_device_add_yet || + if (!dc->user_creatable || (qdev_hotplug && !dc->hotpluggable)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver", "pluggable device type"); diff --git a/qemu-doc.texi b/qemu-doc.texi index 794ab4a080..de0cc30790 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -182,7 +182,7 @@ Gravis Ultrasound GF1 sound card @item CS4231A compatible sound card @item -PCI UHCI USB controller and a virtual USB hub. +PCI UHCI, OHCI, EHCI or XHCI USB controller and a virtual USB-1.1 hub. @end itemize SMP is supported with up to 255 CPUs. @@ -1357,10 +1357,10 @@ monitor (@pxref{pcsys_keys}). @node pcsys_usb @section USB emulation -QEMU emulates a PCI UHCI USB controller. You can virtually plug -virtual USB devices or real host USB devices (experimental, works only -on Linux hosts). QEMU will automatically create and connect virtual USB hubs -as necessary to connect multiple USB devices. +QEMU can emulate a PCI UHCI, OHCI, EHCI or XHCI USB controller. You can +plug virtual USB devices or real host USB devices (only works with certain +host operating systems). QEMU will automatically create and connect virtual +USB hubs as necessary to connect multiple USB devices. @menu * usb_devices:: @@ -1369,53 +1369,64 @@ as necessary to connect multiple USB devices. @node usb_devices @subsection Connecting USB devices -USB devices can be connected with the @option{-usbdevice} commandline option -or the @code{usb_add} monitor command. Available devices are: +USB devices can be connected with the @option{-device usb-...} command line +option or the @code{device_add} monitor command. Available devices are: @table @code -@item mouse +@item usb-mouse Virtual Mouse. This will override the PS/2 mouse emulation when activated. -@item tablet +@item usb-tablet Pointer device that uses absolute coordinates (like a touchscreen). This means QEMU is able to report the mouse position without having to grab the mouse. Also overrides the PS/2 mouse emulation when activated. -@item disk:@var{file} -Mass storage device based on @var{file} (@pxref{disk_images}) -@item host:@var{bus.addr} -Pass through the host device identified by @var{bus.addr} -(Linux only) -@item host:@var{vendor_id:product_id} -Pass through the host device identified by @var{vendor_id:product_id} -(Linux only) -@item wacom-tablet +@item usb-storage,drive=@var{drive_id} +Mass storage device backed by @var{drive_id} (@pxref{disk_images}) +@item usb-uas +USB attached SCSI device, see +@url{http://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt} +for details +@item usb-bot +Bulk-only transport storage device, see +@url{http://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt} +for details here, too +@item usb-mtp,x-root=@var{dir} +Media transfer protocol device, using @var{dir} as root of the file tree +that is presented to the guest. +@item usb-host,hostbus=@var{bus},hostaddr=@var{addr} +Pass through the host device identified by @var{bus} and @var{addr} +@item usb-host,vendorid=@var{vendor},productid=@var{product} +Pass through the host device identified by @var{vendor} and @var{product} ID +@item usb-wacom-tablet Virtual Wacom PenPartner tablet. This device is similar to the @code{tablet} above but it can be used with the tslib library because in addition to touch coordinates it reports touch pressure. -@item keyboard +@item usb-kbd Standard USB keyboard. Will override the PS/2 keyboard (if present). -@item serial:[vendorid=@var{vendor_id}][,product_id=@var{product_id}]:@var{dev} +@item usb-serial,chardev=@var{id} Serial converter. This emulates an FTDI FT232BM chip connected to host character -device @var{dev}. The available character devices are the same as for the -@code{-serial} option. The @code{vendorid} and @code{productid} options can be -used to override the default 0403:6001. For instance, -@example -usb_add serial:productid=FA00:tcp:192.168.0.2:4444 -@end example -will connect to tcp port 4444 of ip 192.168.0.2, and plug that to the virtual -serial converter, faking a Matrix Orbital LCD Display (USB ID 0403:FA00). -@item braille +device @var{id}. +@item usb-braille,chardev=@var{id} Braille device. This will use BrlAPI to display the braille output on a real -or fake device. -@item net:@var{options} -Network adapter that supports CDC ethernet and RNDIS protocols. @var{options} -specifies NIC options as with @code{-net nic,}@var{options} (see description). +or fake device referenced by @var{id}. +@item usb-net[,netdev=@var{id}] +Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} +specifies a netdev defined with @code{-netdev @dots{},id=@var{id}}. For instance, user-mode networking can be used with @example -qemu-system-i386 [...OPTIONS...] -net user,vlan=0 -usbdevice net:vlan=0 -@end example -Currently this cannot be used in machines that support PCI NICs. -@item bt[:@var{hci-type}] -Bluetooth dongle whose type is specified in the same format as with +qemu-system-i386 [...] -netdev user,id=net0 -device usb-net,netdev=net0 +@end example +@item usb-ccid +Smartcard reader device +@item usb-audio +USB audio device +@item usb-bt-dongle +Bluetooth dongle for the transport layer of HCI. It is connected to HCI +scatternet 0 by default (corresponds to @code{-bt hci,vlan=0}). +Note that the syntax for the @code{-device usb-bt-dongle} option is not as +useful yet as it was with the legacy @code{-usbdevice} option. So to +configure an USB bluetooth device, you might need to use +"@code{-usbdevice bt}[:@var{hci-type}]" instead. This configures a +bluetooth dongle whose type is specified in the same format as with the @option{-bt hci} option, @pxref{bt-hcis,,allowed HCI types}. If no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. This USB device implements the USB Transport Layer of HCI. Example @@ -1460,11 +1471,11 @@ hubs, it won't work). @item Add the device in QEMU by using: @example -usb_add host:1234:5678 +device_add usb-host,vendorid=0x1234,productid=0x5678 @end example -Normally the guest OS should report that a new USB device is -plugged. You can use the option @option{-usbdevice} to do the same. +Normally the guest OS should report that a new USB device is plugged. +You can use the option @option{-device usb-host,...} to do the same. @item Now you can try to use the host USB device in QEMU. @@ -1732,37 +1743,45 @@ SASL service config /etc/sasl2/qemu.conf. If running QEMU as an unprivileged user, an environment variable SASL_CONF_PATH can be used to make it search alternate locations for the service config. -The default configuration might contain +If the TLS option is enabled for VNC, then it will provide session encryption, +otherwise the SASL mechanism will have to provide encryption. In the latter +case the list of possible plugins that can be used is drastically reduced. In +fact only the GSSAPI SASL mechanism provides an acceptable level of security +by modern standards. Previous versions of QEMU referred to the DIGEST-MD5 +mechanism, however, it has multiple serious flaws described in detail in +RFC 6331 and thus should never be used any more. The SCRAM-SHA-1 mechanism +provides a simple username/password auth facility similar to DIGEST-MD5, but +does not support session encryption, so can only be used in combination with +TLS. + +When not using TLS the recommended configuration is @example -mech_list: digest-md5 -sasldb_path: /etc/qemu/passwd.db +mech_list: gssapi +keytab: /etc/qemu/krb5.tab @end example -This says to use the 'Digest MD5' mechanism, which is similar to the HTTP -Digest-MD5 mechanism. The list of valid usernames & passwords is maintained -in the /etc/qemu/passwd.db file, and can be updated using the saslpasswd2 -command. While this mechanism is easy to configure and use, it is not -considered secure by modern standards, so only suitable for developers / -ad-hoc testing. +This says to use the 'GSSAPI' mechanism with the Kerberos v5 protocol, with +the server principal stored in /etc/qemu/krb5.tab. For this to work the +administrator of your KDC must generate a Kerberos principal for the server, +with a name of 'qemu/somehost.example.com@@EXAMPLE.COM' replacing +'somehost.example.com' with the fully qualified host name of the machine +running QEMU, and 'EXAMPLE.COM' with the Kerberos Realm. -A more serious deployment might use Kerberos, which is done with the 'gssapi' -mechanism +When using TLS, if username+password authentication is desired, then a +reasonable configuration is @example -mech_list: gssapi -keytab: /etc/qemu/krb5.tab +mech_list: scram-sha-1 +sasldb_path: /etc/qemu/passwd.db @end example -For this to work the administrator of your KDC must generate a Kerberos -principal for the server, with a name of 'qemu/somehost.example.com@@EXAMPLE.COM' -replacing 'somehost.example.com' with the fully qualified host name of the -machine running QEMU, and 'EXAMPLE.COM' with the Kerberos Realm. +The saslpasswd2 program can be used to populate the passwd.db file with +accounts. -Other configurations will be left as an exercise for the reader. It should -be noted that only Digest-MD5 and GSSAPI provides a SSF layer for data -encryption. For all other mechanisms, VNC should always be configured to -use TLS and x509 certificates to protect security credentials from snooping. +Other SASL configurations will be left as an exercise for the reader. Note that +all mechanisms except GSSAPI, should be combined with use of TLS to ensure a +secure data channel. @node gdb_usage @section GDB usage @@ -1878,8 +1897,8 @@ resolution modes which the Cirrus Logic BIOS does not support (i.e. >= Windows 9x does not correctly use the CPU HLT instruction. The result is that it takes host CPU cycles even when idle. You can install the utility from -@url{http://www.user.cityline.ru/~maxamn/amnhltm.zip} to solve this -problem. Note that no such tool is needed for NT, 2000 or XP. +@url{http://web.archive.org/web/20060212132151/http://www.user.cityline.ru/~maxamn/amnhltm.zip} +to solve this problem. Note that no such tool is needed for NT, 2000 or XP. @subsubsection Windows 2000 disk full problem @@ -1927,9 +1946,9 @@ vvfat block device ("-hdb fat:directory_which_holds_the_SP"). @subsubsection CPU usage reduction DOS does not correctly use the CPU HLT instruction. The result is that -it takes host CPU cycles even when idle. You can install the utility -from @url{http://www.vmware.com/software/dosidle210.zip} to solve this -problem. +it takes host CPU cycles even when idle. You can install the utility from +@url{http://web.archive.org/web/20051222085335/http://www.vmware.com/software/dosidle210.zip} +to solve this problem. @node QEMU System emulator for non PC targets @chapter QEMU System emulator for non PC targets diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index bf4ce59019..e5bc28fc3c 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -10,15 +10,15 @@ STEXI ETEXI DEF("bench", img_bench, - "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] filename") + "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] [-U] filename") STEXI -@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] @var{filename} +@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename} ETEXI DEF("check", img_check, - "check [-q] [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] filename") + "check [-q] [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] [-U] filename") STEXI -@item check [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] @var{filename} +@item check [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] [-U] @var{filename} ETEXI DEF("create", img_create, @@ -34,45 +34,45 @@ STEXI ETEXI DEF("compare", img_compare, - "compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] filename1 filename2") + "compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] [-U] filename1 filename2") STEXI -@item compare [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-q] [-s] @var{filename1} @var{filename2} +@item compare [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-q] [-s] [-U] @var{filename1} @var{filename2} ETEXI DEF("convert", img_convert, - "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename") + "convert [--object objectdef] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename") STEXI -@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [--object @var{objectdef}] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF("dd", img_dd, - "dd [--image-opts] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] if=input of=output") + "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] if=input of=output") STEXI -@item dd [--image-opts] [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output} +@item dd [--image-opts] [-U] [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output} ETEXI DEF("info", img_info, - "info [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [--backing-chain] filename") + "info [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [--backing-chain] [-U] filename") STEXI -@item info [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename} +@item info [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] [-U] @var{filename} ETEXI DEF("map", img_map, - "map [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] filename") + "map [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-U] filename") STEXI -@item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} +@item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [-U] @var{filename} ETEXI DEF("snapshot", img_snapshot, - "snapshot [--object objectdef] [--image-opts] [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename") + "snapshot [--object objectdef] [--image-opts] [-U] [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename") STEXI -@item snapshot [--object @var{objectdef}] [--image-opts] [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename} +@item snapshot [--object @var{objectdef}] [--image-opts] [-U] [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename} ETEXI DEF("rebase", img_rebase, - "rebase [--object objectdef] [--image-opts] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename") + "rebase [--object objectdef] [--image-opts] [-U] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename") STEXI -@item rebase [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} +@item rebase [--object @var{objectdef}] [--image-opts] [-U] [-q] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} ETEXI DEF("resize", img_resize, diff --git a/qemu-img.c b/qemu-img.c index f3b0ab49cc..b506839ef0 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -28,6 +28,7 @@ #include "qapi/qobject-output-visitor.h" #include "qapi/qmp/qerror.h" #include "qapi/qmp/qjson.h" +#include "qapi/qmp/qbool.h" #include "qemu/cutils.h" #include "qemu/config-file.h" #include "qemu/option.h" @@ -283,12 +284,20 @@ static int img_open_password(BlockBackend *blk, const char *filename, static BlockBackend *img_open_opts(const char *optstr, QemuOpts *opts, int flags, bool writethrough, - bool quiet) + bool quiet, bool force_share) { QDict *options; Error *local_err = NULL; BlockBackend *blk; options = qemu_opts_to_qdict(opts, NULL); + if (force_share) { + if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE) + && !qdict_get_bool(options, BDRV_OPT_FORCE_SHARE)) { + error_report("--force-share/-U conflicts with image options"); + return NULL; + } + qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + } blk = blk_new_open(NULL, NULL, options, flags, &local_err); if (!blk) { error_reportf_err(local_err, "Could not open '%s': ", optstr); @@ -305,17 +314,20 @@ static BlockBackend *img_open_opts(const char *optstr, static BlockBackend *img_open_file(const char *filename, const char *fmt, int flags, - bool writethrough, bool quiet) + bool writethrough, bool quiet, + bool force_share) { BlockBackend *blk; Error *local_err = NULL; - QDict *options = NULL; + QDict *options = qdict_new(); if (fmt) { - options = qdict_new(); qdict_put_str(options, "driver", fmt); } + if (force_share) { + qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + } blk = blk_new_open(filename, NULL, options, flags, &local_err); if (!blk) { error_reportf_err(local_err, "Could not open '%s': ", filename); @@ -334,7 +346,7 @@ static BlockBackend *img_open_file(const char *filename, static BlockBackend *img_open(bool image_opts, const char *filename, const char *fmt, int flags, bool writethrough, - bool quiet) + bool quiet, bool force_share) { BlockBackend *blk; if (image_opts) { @@ -348,9 +360,11 @@ static BlockBackend *img_open(bool image_opts, if (!opts) { return NULL; } - blk = img_open_opts(filename, opts, flags, writethrough, quiet); + blk = img_open_opts(filename, opts, flags, writethrough, quiet, + force_share); } else { - blk = img_open_file(filename, fmt, flags, writethrough, quiet); + blk = img_open_file(filename, fmt, flags, writethrough, quiet, + force_share); } return blk; } @@ -650,6 +664,7 @@ static int img_check(int argc, char **argv) ImageCheck *check; bool quiet = false; bool image_opts = false; + bool force_share = false; fmt = NULL; output = NULL; @@ -664,9 +679,10 @@ static int img_check(int argc, char **argv) {"output", required_argument, 0, OPTION_OUTPUT}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hf:r:T:q", + c = getopt_long(argc, argv, ":hf:r:T:qU", long_options, &option_index); if (c == -1) { break; @@ -705,6 +721,9 @@ static int img_check(int argc, char **argv) case 'q': quiet = true; break; + case 'U': + force_share = true; + break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -744,7 +763,8 @@ static int img_check(int argc, char **argv) return 1; } - blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet); + blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, + force_share); if (!blk) { return 1; } @@ -947,7 +967,8 @@ static int img_commit(int argc, char **argv) return 1; } - blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet); + blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, + false); if (!blk) { return 1; } @@ -1206,6 +1227,7 @@ static int img_compare(int argc, char **argv) int c, pnum; uint64_t progress_base; bool image_opts = false; + bool force_share = false; cache = BDRV_DEFAULT_CACHE; for (;;) { @@ -1213,9 +1235,10 @@ static int img_compare(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hf:F:T:pqs", + c = getopt_long(argc, argv, ":hf:F:T:pqsU", long_options, NULL); if (c == -1) { break; @@ -1248,6 +1271,9 @@ static int img_compare(int argc, char **argv) case 's': strict = true; break; + case 'U': + force_share = true; + break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -1293,13 +1319,15 @@ static int img_compare(int argc, char **argv) goto out3; } - blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet); + blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet, + force_share); if (!blk1) { ret = 2; goto out3; } - blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet); + blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet, + force_share); if (!blk2) { ret = 2; goto out2; @@ -1733,13 +1761,13 @@ static void coroutine_fn convert_co_do_copy(void *opaque) qemu_co_mutex_lock(&s->lock); if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) { qemu_co_mutex_unlock(&s->lock); - goto out; + break; } n = convert_iteration_sectors(s, s->sector_num); if (n < 0) { qemu_co_mutex_unlock(&s->lock); s->ret = n; - goto out; + break; } /* save current sector and allocation status to local variables */ sector_num = s->sector_num; @@ -1764,7 +1792,6 @@ static void coroutine_fn convert_co_do_copy(void *opaque) error_report("error while reading sector %" PRId64 ": %s", sector_num, strerror(-ret)); s->ret = ret; - goto out; } } else if (!s->min_sparse && status == BLK_ZERO) { status = BLK_DATA; @@ -1773,22 +1800,20 @@ static void coroutine_fn convert_co_do_copy(void *opaque) if (s->wr_in_order) { /* keep writes in order */ - while (s->wr_offs != sector_num) { - if (s->ret != -EINPROGRESS) { - goto out; - } + while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) { s->wait_sector_num[index] = sector_num; qemu_coroutine_yield(); } s->wait_sector_num[index] = -1; } - ret = convert_co_write(s, sector_num, n, buf, status); - if (ret < 0) { - error_report("error while writing sector %" PRId64 - ": %s", sector_num, strerror(-ret)); - s->ret = ret; - goto out; + if (s->ret == -EINPROGRESS) { + ret = convert_co_write(s, sector_num, n, buf, status); + if (ret < 0) { + error_report("error while writing sector %" PRId64 + ": %s", sector_num, strerror(-ret)); + s->ret = ret; + } } if (s->wr_in_order) { @@ -1809,7 +1834,6 @@ static void coroutine_fn convert_co_do_copy(void *opaque) } } -out: qemu_vfree(buf); s->co[index] = NULL; s->running_coroutines--; @@ -1871,7 +1895,7 @@ static int convert_do_copy(ImgConvertState *s) qemu_coroutine_enter(s->co[i]); } - while (s->ret == -EINPROGRESS) { + while (s->running_coroutines) { main_loop_wait(false); } @@ -1902,6 +1926,7 @@ static int img_convert(int argc, char **argv) bool writethrough, src_writethrough, quiet = false, image_opts = false, skip_create = false, progress = false; int64_t ret = -EINVAL; + bool force_share = false; ImgConvertState s = (ImgConvertState) { /* Need at least 4k of zeros for sparse detection */ @@ -1916,9 +1941,10 @@ static int img_convert(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:W", + c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:WU", long_options, NULL); if (c == -1) { break; @@ -2021,6 +2047,9 @@ static int img_convert(int argc, char **argv) case 'W': s.wr_in_order = false; break; + case 'U': + force_share = true; + break; case OPTION_OBJECT: { QemuOpts *object_opts; object_opts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -2080,7 +2109,8 @@ static int img_convert(int argc, char **argv) for (bs_i = 0; bs_i < s.src_num; bs_i++) { s.src[bs_i] = img_open(image_opts, argv[optind + bs_i], - fmt, src_flags, src_writethrough, quiet); + fmt, src_flags, src_writethrough, quiet, + force_share); if (!s.src[bs_i]) { ret = -1; goto out; @@ -2233,7 +2263,8 @@ static int img_convert(int argc, char **argv) * the bdrv_create() call which takes different params. * Not critical right now, so fix can wait... */ - s.target = img_open_file(out_filename, out_fmt, flags, writethrough, quiet); + s.target = img_open_file(out_filename, out_fmt, flags, writethrough, quiet, + false); if (!s.target) { ret = -1; goto out; @@ -2384,7 +2415,7 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) static ImageInfoList *collect_image_info_list(bool image_opts, const char *filename, const char *fmt, - bool chain) + bool chain, bool force_share) { ImageInfoList *head = NULL; ImageInfoList **last = &head; @@ -2407,7 +2438,8 @@ static ImageInfoList *collect_image_info_list(bool image_opts, g_hash_table_insert(filenames, (gpointer)filename, NULL); blk = img_open(image_opts, filename, fmt, - BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false); + BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false, + force_share); if (!blk) { goto err; } @@ -2459,6 +2491,7 @@ static int img_info(int argc, char **argv) const char *filename, *fmt, *output; ImageInfoList *list; bool image_opts = false; + bool force_share = false; fmt = NULL; output = NULL; @@ -2471,9 +2504,10 @@ static int img_info(int argc, char **argv) {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":f:h", + c = getopt_long(argc, argv, ":f:hU", long_options, &option_index); if (c == -1) { break; @@ -2491,6 +2525,9 @@ static int img_info(int argc, char **argv) case 'f': fmt = optarg; break; + case 'U': + force_share = true; + break; case OPTION_OUTPUT: output = optarg; break; @@ -2530,7 +2567,8 @@ static int img_info(int argc, char **argv) return 1; } - list = collect_image_info_list(image_opts, filename, fmt, chain); + list = collect_image_info_list(image_opts, filename, fmt, chain, + force_share); if (!list) { return 1; } @@ -2676,6 +2714,7 @@ static int img_map(int argc, char **argv) MapEntry curr = { .length = 0 }, next; int ret = 0; bool image_opts = false; + bool force_share = false; fmt = NULL; output = NULL; @@ -2687,9 +2726,10 @@ static int img_map(int argc, char **argv) {"output", required_argument, 0, OPTION_OUTPUT}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":f:h", + c = getopt_long(argc, argv, ":f:hU", long_options, &option_index); if (c == -1) { break; @@ -2707,6 +2747,9 @@ static int img_map(int argc, char **argv) case 'f': fmt = optarg; break; + case 'U': + force_share = true; + break; case OPTION_OUTPUT: output = optarg; break; @@ -2743,7 +2786,7 @@ static int img_map(int argc, char **argv) return 1; } - blk = img_open(image_opts, filename, fmt, 0, false, false); + blk = img_open(image_opts, filename, fmt, 0, false, false, force_share); if (!blk) { return 1; } @@ -2806,6 +2849,7 @@ static int img_snapshot(int argc, char **argv) bool quiet = false; Error *err = NULL; bool image_opts = false; + bool force_share = false; bdrv_oflags = BDRV_O_RDWR; /* Parse commandline parameters */ @@ -2814,9 +2858,10 @@ static int img_snapshot(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":la:c:d:hq", + c = getopt_long(argc, argv, ":la:c:d:hqU", long_options, NULL); if (c == -1) { break; @@ -2866,6 +2911,9 @@ static int img_snapshot(int argc, char **argv) case 'q': quiet = true; break; + case 'U': + force_share = true; + break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -2892,7 +2940,8 @@ static int img_snapshot(int argc, char **argv) } /* Open the image */ - blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet); + blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet, + force_share); if (!blk) { return 1; } @@ -2956,6 +3005,7 @@ static int img_rebase(int argc, char **argv) int c, flags, src_flags, ret; bool writethrough, src_writethrough; int unsafe = 0; + bool force_share = false; int progress = 0; bool quiet = false; Error *local_err = NULL; @@ -2972,9 +3022,10 @@ static int img_rebase(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hf:F:b:upt:T:q", + c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU", long_options, NULL); if (c == -1) { break; @@ -3024,6 +3075,9 @@ static int img_rebase(int argc, char **argv) case OPTION_IMAGE_OPTS: image_opts = true; break; + case 'U': + force_share = true; + break; } } @@ -3072,7 +3126,8 @@ static int img_rebase(int argc, char **argv) * Ignore the old backing file for unsafe rebase in case we want to correct * the reference to a renamed or moved backing file. */ - blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet); + blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, + false); if (!blk) { ret = -1; goto out; @@ -3097,6 +3152,13 @@ static int img_rebase(int argc, char **argv) qdict_put_str(options, "driver", bs->backing_format); } + if (force_share) { + if (!options) { + options = qdict_new(); + } + qdict_put(options, BDRV_OPT_FORCE_SHARE, + qbool_from_bool(true)); + } bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); blk_old_backing = blk_new_open(backing_name, NULL, options, src_flags, &local_err); @@ -3109,11 +3171,12 @@ static int img_rebase(int argc, char **argv) } if (out_baseimg[0]) { + options = qdict_new(); if (out_basefmt) { - options = qdict_new(); qdict_put_str(options, "driver", out_basefmt); - } else { - options = NULL; + } + if (force_share) { + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } blk_new_backing = blk_new_open(out_baseimg, NULL, @@ -3419,7 +3482,8 @@ static int img_resize(int argc, char **argv) qemu_opts_del(param); blk = img_open(image_opts, filename, fmt, - BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet); + BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet, + false); if (!blk) { ret = -1; goto out; @@ -3573,7 +3637,8 @@ static int img_amend(int argc, char **argv) goto out; } - blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet); + blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, + false); if (!blk) { ret = -1; goto out; @@ -3741,6 +3806,7 @@ static int img_bench(int argc, char **argv) bool writethrough = false; struct timeval t1, t2; int i; + bool force_share = false; for (;;) { static const struct option long_options[] = { @@ -3749,9 +3815,10 @@ static int img_bench(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"pattern", required_argument, 0, OPTION_PATTERN}, {"no-drain", no_argument, 0, OPTION_NO_DRAIN}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:w", long_options, NULL); + c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL); if (c == -1) { break; } @@ -3845,6 +3912,9 @@ static int img_bench(int argc, char **argv) flags |= BDRV_O_RDWR; is_write = true; break; + case 'U': + force_share = true; + break; case OPTION_PATTERN: { unsigned long res; @@ -3892,7 +3962,8 @@ static int img_bench(int argc, char **argv) goto out; } - blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet); + blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, + force_share); if (!blk) { ret = -1; goto out; @@ -4059,6 +4130,7 @@ static int img_dd(int argc, char **argv) const char *fmt = NULL; int64_t size = 0; int64_t block_count = 0, out_pos, in_pos; + bool force_share = false; struct DdInfo dd = { .flags = 0, .count = 0, @@ -4087,10 +4159,11 @@ static int img_dd(int argc, char **argv) const struct option long_options[] = { { "help", no_argument, 0, 'h'}, { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + { "force-share", no_argument, 0, 'U'}, { 0, 0, 0, 0 } }; - while ((c = getopt_long(argc, argv, ":hf:O:", long_options, NULL))) { + while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) { if (c == EOF) { break; } @@ -4110,6 +4183,9 @@ static int img_dd(int argc, char **argv) case 'h': help(); break; + case 'U': + force_share = true; + break; case OPTION_IMAGE_OPTS: image_opts = true; break; @@ -4154,7 +4230,8 @@ static int img_dd(int argc, char **argv) ret = -1; goto out; } - blk1 = img_open(image_opts, in.filename, fmt, 0, false, false); + blk1 = img_open(image_opts, in.filename, fmt, 0, false, false, + force_share); if (!blk1) { ret = -1; @@ -4222,7 +4299,7 @@ static int img_dd(int argc, char **argv) } blk2 = img_open(image_opts, out.filename, out_fmt, BDRV_O_RDWR, - false, false); + false, false, false); if (!blk2) { ret = -1; diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 21af9e65b2..4b2278f040 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -740,13 +740,13 @@ static int read_f(BlockBackend *blk, int argc, char **argv) } if (bflag) { - if (offset & 0x1ff) { - printf("offset %" PRId64 " is not sector aligned\n", + if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) { + printf("%" PRId64 " is not a sector-aligned value for 'offset'\n", offset); return 0; } - if (count & 0x1ff) { - printf("count %"PRId64" is not sector aligned\n", + if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) { + printf("%"PRId64" is not a sector-aligned value for 'count'\n", count); return 0; } @@ -1050,14 +1050,14 @@ static int write_f(BlockBackend *blk, int argc, char **argv) } if (bflag || cflag) { - if (offset & 0x1ff) { - printf("offset %" PRId64 " is not sector aligned\n", + if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) { + printf("%" PRId64 " is not a sector-aligned value for 'offset'\n", offset); return 0; } - if (count & 0x1ff) { - printf("count %"PRId64" is not sector aligned\n", + if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) { + printf("%"PRId64" is not a sector-aligned value for 'count'\n", count); return 0; } @@ -1760,7 +1760,7 @@ out: static int alloc_f(BlockBackend *blk, int argc, char **argv) { BlockDriverState *bs = blk_bs(blk); - int64_t offset, sector_num, nb_sectors, remaining; + int64_t offset, sector_num, nb_sectors, remaining, count; char s1[64]; int num, ret; int64_t sum_alloc; @@ -1769,25 +1769,31 @@ static int alloc_f(BlockBackend *blk, int argc, char **argv) if (offset < 0) { print_cvtnum_err(offset, argv[1]); return 0; - } else if (offset & 0x1ff) { - printf("offset %" PRId64 " is not sector aligned\n", + } else if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) { + printf("%" PRId64 " is not a sector-aligned value for 'offset'\n", offset); return 0; } if (argc == 3) { - nb_sectors = cvtnum(argv[2]); - if (nb_sectors < 0) { - print_cvtnum_err(nb_sectors, argv[2]); + count = cvtnum(argv[2]); + if (count < 0) { + print_cvtnum_err(count, argv[2]); return 0; - } else if (nb_sectors > INT_MAX) { - printf("length argument cannot exceed %d, given %s\n", - INT_MAX, argv[2]); + } else if (count > INT_MAX * BDRV_SECTOR_SIZE) { + printf("length argument cannot exceed %llu, given %s\n", + INT_MAX * BDRV_SECTOR_SIZE, argv[2]); return 0; } } else { - nb_sectors = 1; + count = BDRV_SECTOR_SIZE; + } + if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) { + printf("%" PRId64 " is not a sector-aligned value for 'count'\n", + count); + return 0; } + nb_sectors = count >> BDRV_SECTOR_BITS; remaining = nb_sectors; sum_alloc = 0; @@ -1811,8 +1817,8 @@ static int alloc_f(BlockBackend *blk, int argc, char **argv) cvtstr(offset, s1, sizeof(s1)); - printf("%"PRId64"/%"PRId64" sectors allocated at offset %s\n", - sum_alloc, nb_sectors, s1); + printf("%"PRId64"/%"PRId64" bytes allocated at offset %s\n", + sum_alloc << BDRV_SECTOR_BITS, nb_sectors << BDRV_SECTOR_BITS, s1); return 0; } @@ -1822,8 +1828,8 @@ static const cmdinfo_t alloc_cmd = { .argmin = 1, .argmax = 2, .cfunc = alloc_f, - .args = "off [sectors]", - .oneline = "checks if a sector is present in the file", + .args = "offset [count]", + .oneline = "checks if offset is allocated in the file", }; @@ -1862,7 +1868,7 @@ static int map_f(BlockBackend *blk, int argc, char **argv) { int64_t offset; int64_t nb_sectors, total_sectors; - char s1[64]; + char s1[64], s2[64]; int64_t num; int ret; const char *retstr; @@ -1888,10 +1894,11 @@ static int map_f(BlockBackend *blk, int argc, char **argv) } retstr = ret ? " allocated" : "not allocated"; - cvtstr(offset << 9ULL, s1, sizeof(s1)); - printf("[% 24" PRId64 "] % 8" PRId64 "/% 8" PRId64 " sectors %s " - "at offset %s (%d)\n", - offset << 9ULL, num, nb_sectors, retstr, s1, ret); + cvtstr(num << BDRV_SECTOR_BITS, s1, sizeof(s1)); + cvtstr(offset << BDRV_SECTOR_BITS, s2, sizeof(s2)); + printf("%s (0x%" PRIx64 ") bytes %s at offset %s (0x%" PRIx64 ")\n", + s1, num << BDRV_SECTOR_BITS, retstr, + s2, offset << BDRV_SECTOR_BITS); offset += num; nb_sectors -= num; @@ -20,6 +20,7 @@ #include "qemu/readline.h" #include "qemu/log.h" #include "qapi/qmp/qstring.h" +#include "qapi/qmp/qbool.h" #include "qom/object_interfaces.h" #include "sysemu/block-backend.h" #include "block/block_int.h" @@ -53,7 +54,8 @@ static const cmdinfo_t close_cmd = { .oneline = "close the current open file", }; -static int openfile(char *name, int flags, bool writethrough, QDict *opts) +static int openfile(char *name, int flags, bool writethrough, bool force_share, + QDict *opts) { Error *local_err = NULL; BlockDriverState *bs; @@ -64,6 +66,18 @@ static int openfile(char *name, int flags, bool writethrough, QDict *opts) return 1; } + if (force_share) { + if (!opts) { + opts = qdict_new(); + } + if (qdict_haskey(opts, BDRV_OPT_FORCE_SHARE) + && !qdict_get_bool(opts, BDRV_OPT_FORCE_SHARE)) { + error_report("-U conflicts with image options"); + QDECREF(opts); + return 1; + } + qdict_put(opts, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + } qemuio_blk = blk_new_open(name, NULL, opts, flags, &local_err); if (!qemuio_blk) { error_reportf_err(local_err, "can't open%s%s: ", @@ -108,6 +122,7 @@ static void open_help(void) " -r, -- open file read-only\n" " -s, -- use snapshot file\n" " -n, -- disable host cache, short for -t none\n" +" -U, -- force shared permissions\n" " -k, -- use kernel AIO implementation (on Linux only)\n" " -t, -- use the given cache mode for the image\n" " -d, -- use the given discard mode for the image\n" @@ -124,7 +139,7 @@ static const cmdinfo_t open_cmd = { .argmin = 1, .argmax = -1, .flags = CMD_NOFILE_OK, - .args = "[-rsnk] [-t cache] [-d discard] [-o options] [path]", + .args = "[-rsnkU] [-t cache] [-d discard] [-o options] [path]", .oneline = "open the file specified by path", .help = open_help, }; @@ -147,8 +162,9 @@ static int open_f(BlockBackend *blk, int argc, char **argv) int c; QemuOpts *qopts; QDict *opts; + bool force_share = false; - while ((c = getopt(argc, argv, "snro:kt:d:")) != -1) { + while ((c = getopt(argc, argv, "snro:kt:d:U")) != -1) { switch (c) { case 's': flags |= BDRV_O_SNAPSHOT; @@ -188,6 +204,9 @@ static int open_f(BlockBackend *blk, int argc, char **argv) return 0; } break; + case 'U': + force_share = true; + break; default: qemu_opts_reset(&empty_opts); return qemuio_command_usage(&open_cmd); @@ -211,9 +230,9 @@ static int open_f(BlockBackend *blk, int argc, char **argv) qemu_opts_reset(&empty_opts); if (optind == argc - 1) { - return openfile(argv[optind], flags, writethrough, opts); + return openfile(argv[optind], flags, writethrough, force_share, opts); } else if (optind == argc) { - return openfile(NULL, flags, writethrough, opts); + return openfile(NULL, flags, writethrough, force_share, opts); } else { QDECREF(opts); return qemuio_command_usage(&open_cmd); @@ -257,6 +276,7 @@ static void usage(const char *name) " -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n" " specify tracing options\n" " see qemu-img(1) man page for full description\n" +" -U, --force-share force shared permissions\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" "\n" @@ -436,7 +456,7 @@ static QemuOptsList file_opts = { int main(int argc, char **argv) { int readonly = 0; - const char *sopt = "hVc:d:f:rsnmkt:T:"; + const char *sopt = "hVc:d:f:rsnmkt:T:U"; const struct option lopt[] = { { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'V' }, @@ -452,6 +472,7 @@ int main(int argc, char **argv) { "trace", required_argument, NULL, 'T' }, { "object", required_argument, NULL, OPTION_OBJECT }, { "image-opts", no_argument, NULL, OPTION_IMAGE_OPTS }, + { "force-share", no_argument, 0, 'U'}, { NULL, 0, NULL, 0 } }; int c; @@ -462,6 +483,7 @@ int main(int argc, char **argv) QDict *opts = NULL; const char *format = NULL; char *trace_file = NULL; + bool force_share = false; #ifdef CONFIG_POSIX signal(SIGPIPE, SIG_IGN); @@ -524,6 +546,9 @@ int main(int argc, char **argv) case 'h': usage(progname); exit(0); + case 'U': + force_share = true; + break; case OPTION_OBJECT: { QemuOpts *qopts; qopts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -595,7 +620,7 @@ int main(int argc, char **argv) exit(1); } opts = qemu_opts_to_qdict(qopts, NULL); - if (openfile(NULL, flags, writethrough, opts)) { + if (openfile(NULL, flags, writethrough, force_share, opts)) { exit(1); } } else { @@ -603,7 +628,8 @@ int main(int argc, char **argv) opts = qdict_new(); qdict_put_str(opts, "driver", format); } - if (openfile(argv[optind], flags, writethrough, opts)) { + if (openfile(argv[optind], flags, writethrough, + force_share, opts)) { exit(1); } } diff --git a/qemu-options.hx b/qemu-options.hx index 70c0ded12e..f07a310eb1 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -139,13 +139,18 @@ ETEXI DEF("numa", HAS_ARG, QEMU_OPTION_numa, "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" - "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n", QEMU_ARCH_ALL) + "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" + "-numa dist,src=source,dst=destination,val=distance\n", QEMU_ARCH_ALL) STEXI @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] @itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] +@itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} +@itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}] @findex -numa Define a NUMA node and assign RAM and VCPUs to it. +Set the NUMA distance from a source node to a destination node. +Legacy VCPU assignment uses @samp{cpus} option where @var{firstcpu} and @var{lastcpu} are CPU indexes. Each @samp{cpus} option represent a contiguous range of CPU indexes (or a single VCPU if @var{lastcpu} is omitted). A non-contiguous @@ -159,6 +164,24 @@ a NUMA node: -numa node,cpus=0-2,cpus=5 @end example +@samp{cpu} option is a new alternative to @samp{cpus} option +which uses @samp{socket-id|core-id|thread-id} properties to assign +CPU objects to a @var{node} using topology layout properties of CPU. +The set of properties is machine specific, and depends on used +machine type/@samp{smp} options. It could be queried with +@samp{hotpluggable-cpus} monitor command. +@samp{node-id} property specifies @var{node} to which CPU object +will be assigned, it's required for @var{node} to be declared +with @samp{node} option before it's used with @samp{cpu} option. + +For example: +@example +-M pc \ +-smp 1,sockets=2,maxcpus=2 \ +-numa node,nodeid=0 -numa node,nodeid=1 \ +-numa cpu,node-id=0,socket-id=0 -numa cpu,node-id=1,socket-id=1 +@end example + @samp{mem} assigns a given RAM amount to a node. @samp{memdev} assigns RAM from a given memory backend device to a node. If @samp{mem} and @samp{memdev} are omitted in all nodes, RAM is @@ -167,6 +190,17 @@ split equally between them. @samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, if one node uses @samp{memdev}, all of them have to use it. +@var{source} and @var{destination} are NUMA node IDs. +@var{distance} is the NUMA distance from @var{source} to @var{destination}. +The distance from a node to itself is always 10. If any pair of nodes is +given a distance, then all pairs must be given distances. Although, when +distances are only given in one direction for each pair of nodes, then +the distances in the opposite directions are assumed to be the same. If, +however, an asymmetrical pair of distances is given for even one node +pair, then all node pairs must be provided distance values for both +directions, even when they are symmetrical. When a node is unreachable +from another node, set the pair's distance to 255. + Note that the -@option{numa} option doesn't allocate any of the specified resources, it just assigns existing resources to NUMA nodes. This means that one still has to use the @option{-m}, @@ -604,7 +638,7 @@ Special files such as iSCSI devices can be specified using protocol specific URLs. See the section for "Device URL Syntax" for more information. @item if=@var{interface} This option defines on which type on interface the drive is connected. -Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio. +Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio, none. @item bus=@var{bus},unit=@var{unit} These options define where is connected the drive by defining the bus number and the unit id. @@ -876,7 +910,7 @@ ETEXI DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs, "-virtfs local,path=path,mount_tag=tag,security_model=[mapped-xattr|mapped-file|passthrough|none]\n" - " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n", + " [,id=id][,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n", QEMU_ARCH_ALL) STEXI @@ -1,36 +1,44 @@ -# If you want to use the non-TLS socket, then you *must* include -# the GSSAPI or DIGEST-MD5 mechanisms, because they are the only -# ones that can offer session encryption as well as authentication. +# If you want to use VNC remotely without TLS, then you *must* +# pick a mechanism which provides session encryption as well +# as authentication. # -# If you're only using TLS, then you can turn on any mechanisms +# If you are only using TLS, then you can turn on any mechanisms # you like for authentication, because TLS provides the encryption # -# Default to a simple username+password mechanism -# NB digest-md5 is no longer considered secure by current standards -mech_list: digest-md5 +# If you are only using UNIX sockets then encryption is not +# required at all. +# +# NB, previously DIGEST-MD5 was set as the default mechanism for +# QEMU VNC. Per RFC 6331 this is vulnerable to many serious security +# flaws as should no longer be used. Thus GSSAPI is now the default. +# +# To use GSSAPI requires that a QEMU service principal is +# added to the Kerberos server for each host running QEMU. +# This principal needs to be exported to the keytab file listed below +mech_list: gssapi -# Before you can use GSSAPI, you need a service principle on the -# KDC server for libvirt, and that to be exported to the keytab -# file listed below -#mech_list: gssapi +# If using TLS with VNC, or a UNIX socket only, it is possible to +# enable plugins which don't provide session encryption. The +# 'scram-sha-1' plugin allows plain username/password authentication +# to be performed # -# You can also list many mechanisms at once, then the user can choose -# by adding '?auth=sasl.gssapi' to their libvirt URI, eg -# qemu+tcp://hostname/system?auth=sasl.gssapi -#mech_list: digest-md5 gssapi +#mech_list: scram-sha-1 + +# You can also list many mechanisms at once, and the VNC server will +# negotiate which to use by considering the list enabled on the VNC +# client. +#mech_list: scram-sha-1 gssapi # Some older builds of MIT kerberos on Linux ignore this option & # instead need KRB5_KTNAME env var. # For modern Linux, and other OS, this should be sufficient # -# There is no default value here, uncomment if you need this -#keytab: /etc/qemu/krb5.tab +# This file needs to be populated with the service principal that +# was created on the Kerberos v5 server. If switching to a non-gssapi +# mechanism this can be commented out. +keytab: /etc/qemu/krb5.tab -# If using digest-md5 for username/passwds, then this is the file +# If using scram-sha-1 for username/passwds, then this is the file # containing the passwds. Use 'saslpasswd2 -a qemu [username]' # to add entries, and 'sasldblistusers2 -f [sasldb_path]' to browse it -sasldb_path: /etc/qemu/passwd.db - - -auxprop_plugin: sasldb - +#sasldb_path: /etc/qemu/passwd.db diff --git a/qga/commands-posix.c b/qga/commands-posix.c index ba06be4c86..284ecc6d7e 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -2125,9 +2125,11 @@ static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk, * we think this VM does not support online/offline memory block, * any other solution? */ - if (!dp && errno == ENOENT) { - result->response = - GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED; + if (!dp) { + if (errno == ENOENT) { + result->response = + GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED; + } goto out1; } closedir(dp); @@ -196,18 +196,12 @@ void qmp_cont(Error **errp) } /* Continuing after completed migration. Images have been inactivated to - * allow the destination to take control. Need to get control back now. */ - if (runstate_check(RUN_STATE_FINISH_MIGRATE) || - runstate_check(RUN_STATE_POSTMIGRATE)) - { - bdrv_invalidate_cache_all(&local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - } - - blk_resume_after_migration(&local_err); + * allow the destination to take control. Need to get control back now. + * + * If there are no inactive block nodes (e.g. because the VM was just + * paused rather than completing a migration), bdrv_inactivate_all() simply + * doesn't do anything. */ + bdrv_invalidate_cache_all(&local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -382,6 +382,7 @@ static void cpu_common_unrealizefn(DeviceState *dev, Error **errp) static void cpu_common_initfn(Object *obj) { + uint32_t count; CPUState *cpu = CPU(obj); CPUClass *cc = CPU_GET_CLASS(obj); @@ -396,7 +397,10 @@ static void cpu_common_initfn(Object *obj) QTAILQ_INIT(&cpu->breakpoints); QTAILQ_INIT(&cpu->watchpoints); - cpu->trace_dstate = bitmap_new(trace_get_vcpu_event_count()); + count = trace_get_vcpu_event_count(); + if (count) { + cpu->trace_dstate = bitmap_new(count); + } cpu_exec_initfn(cpu); } @@ -449,7 +453,7 @@ static void cpu_class_init(ObjectClass *klass, void *data) * Reason: CPUs still need special care by board code: wiring up * IRQs, adding reset handlers, halting non-first CPUs, ... */ - dc->cannot_instantiate_with_device_add_yet = true; + dc->user_creatable = false; } static const TypeInfo cpu_type_info = { diff --git a/roms/QemuMacDrivers b/roms/QemuMacDrivers new file mode 160000 +Subproject d4e7d7ac663fcb55f1b93575445fcbca372f17a diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 73cee81b79..45027b9281 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl -w +#!/usr/bin/env perl # (c) 2001, Dave Jones. (the file handling bit) # (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit) # (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite) @@ -6,6 +6,7 @@ # Licensed under the terms of the GNU GPL License version 2 use strict; +use warnings; my $P = $0; $P =~ s@.*/@@g; diff --git a/scripts/clean-header-guards.pl b/scripts/clean-header-guards.pl index 54ab99ae29..5e67f1998c 100755 --- a/scripts/clean-header-guards.pl +++ b/scripts/clean-header-guards.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl -w +#!/usr/bin/env perl # # Clean up include guards in headers # @@ -28,6 +28,7 @@ # "cc -E -DGUARD_H -c -P -", and fed the test program on stdin. use strict; +use warnings; use Getopt::Std; # Stuff we don't want to clean because we import it into our tree: diff --git a/scripts/cleanup-trace-events.pl b/scripts/cleanup-trace-events.pl index 7e808efb6a..e93abc00da 100755 --- a/scripts/cleanup-trace-events.pl +++ b/scripts/cleanup-trace-events.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # Copyright (C) 2013 Red Hat, Inc. # # Authors: diff --git a/scripts/disas-objdump.pl b/scripts/disas-objdump.pl index 8f7e8182a1..bec905f04b 100755 --- a/scripts/disas-objdump.pl +++ b/scripts/disas-objdump.pl @@ -1,4 +1,6 @@ -#!/usr/bin/perl -w +#!/usr/bin/env perl + +use warnings; use File::Temp qw/ tempfile /; use Getopt::Long; diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 96e66a80a0..711a9a6bd0 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl -w +#!/usr/bin/env perl # (c) 2007, Joe Perches <joe@perches.com> # created from checkpatch.pl # @@ -11,6 +11,7 @@ # Licensed under the terms of the GNU GPL License version 2 use strict; +use warnings; my $P = $0; my $V = '0.26'; diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh index 0f1aa63872..8afc3eb5bb 100755 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -284,12 +284,12 @@ while true ; do shift # check given cpu is in the supported CPU list for cpu in ${qemu_target_list} ; do - if [ "$cpu" == "$1" ] ; then + if [ "$cpu" = "$1" ] ; then break fi done - if [ "$cpu" == "$1" ] ; then + if [ "$cpu" = "$1" ] ; then qemu_target_list="$1" else echo "ERROR: unknown CPU \"$1\"" 1>&2 diff --git a/scripts/shaderinclude.pl b/scripts/shaderinclude.pl index 81b5146332..cd3bb40b12 100644 --- a/scripts/shaderinclude.pl +++ b/scripts/shaderinclude.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl use strict; use warnings; diff --git a/scripts/switch-timer-api b/scripts/switch-timer-api index b0e230b9f1..41736d11dd 100755 --- a/scripts/switch-timer-api +++ b/scripts/switch-timer-api @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl use strict; use warnings; diff --git a/scripts/texi2pod.pl b/scripts/texi2pod.pl index 6e8fec41a1..39ce584a32 100755 --- a/scripts/texi2pod.pl +++ b/scripts/texi2pod.pl @@ -1,4 +1,4 @@ -#! /usr/bin/perl -w +#! /usr/bin/env perl # Copyright (C) 1999, 2000, 2001, 2003 Free Software Foundation, Inc. @@ -22,6 +22,8 @@ # markup to Perl POD format. It's intended to be used to extract # something suitable for a manpage from a Texinfo document. +use warnings; + $output = 0; $skipping = 0; %sects = (); diff --git a/target/arm/cpu.c b/target/arm/cpu.c index b357aee778..c185eb19ac 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -458,6 +458,13 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) } } +uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz) +{ + uint32_t Aff1 = idx / clustersz; + uint32_t Aff0 = idx % clustersz; + return (Aff1 << ARM_AFF1_SHIFT) | Aff0; +} + static void arm_cpu_initfn(Object *obj) { CPUState *cs = CPU(obj); @@ -709,9 +716,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * so these bits always RAZ. */ if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) { - uint32_t Aff1 = cs->cpu_index / ARM_DEFAULT_CPUS_PER_CLUSTER; - uint32_t Aff0 = cs->cpu_index % ARM_DEFAULT_CPUS_PER_CLUSTER; - cpu->mp_affinity = (Aff1 << ARM_AFF1_SHIFT) | Aff0; + cpu->mp_affinity = arm_cpu_mp_affinity(cs->cpu_index, + ARM_DEFAULT_CPUS_PER_CLUSTER); } if (cpu->reset_hivecs) { @@ -1567,6 +1573,7 @@ static Property arm_cpu_properties[] = { DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0), DEFINE_PROP_UINT64("mp-affinity", ARMCPU, mp_affinity, ARM64_AFFINITY_INVALID), + DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 1055bfef3d..048faed9b9 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -710,6 +710,8 @@ static inline ARMCPU *arm_env_get_cpu(CPUARMState *env) return container_of(env, ARMCPU, env); } +uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz); + #define ENV_GET_CPU(e) CPU(arm_env_get_cpu(e)) #define ENV_OFFSET offsetof(ARMCPU, env) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 7e87031fad..a41d595c23 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2635,28 +2635,23 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, X86CPU *cpu = x86_env_get_cpu(env); CPUState *cs = CPU(cpu); uint32_t pkg_offset; + uint32_t limit; - /* test if maximum index reached */ - if (index & 0x80000000) { - if (index > env->cpuid_xlevel) { - if (env->cpuid_xlevel2 > 0) { - /* Handle the Centaur's CPUID instruction. */ - if (index > env->cpuid_xlevel2) { - index = env->cpuid_xlevel2; - } else if (index < 0xC0000000) { - index = env->cpuid_xlevel; - } - } else { - /* Intel documentation states that invalid EAX input will - * return the same information as EAX=cpuid_level - * (Intel SDM Vol. 2A - Instruction Set Reference - CPUID) - */ - index = env->cpuid_level; - } - } + /* Calculate & apply limits for different index ranges */ + if (index >= 0xC0000000) { + limit = env->cpuid_xlevel2; + } else if (index >= 0x80000000) { + limit = env->cpuid_xlevel; } else { - if (index > env->cpuid_level) - index = env->cpuid_level; + limit = env->cpuid_level; + } + + if (index > limit) { + /* Intel documentation states that invalid EAX input will + * return the same information as EAX=cpuid_level + * (Intel SDM Vol. 2A - Instruction Set Reference - CPUID) + */ + index = env->cpuid_level; } switch(index) { @@ -3991,6 +3986,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1), DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1), #endif + DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID), DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false), { .name = "hv-spinlocks", .info = &qdev_prop_spinlocks }, DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false), @@ -4079,7 +4075,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) cc->cpu_exec_enter = x86_cpu_exec_enter; cc->cpu_exec_exit = x86_cpu_exec_exit; - dc->cannot_instantiate_with_device_add_yet = false; + dc->user_creatable = true; } static const TypeInfo x86_cpu_type_info = { diff --git a/target/i386/machine.c b/target/i386/machine.c index 78ae2f986b..3cb272948e 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -136,178 +136,48 @@ static const VMStateDescription vmstate_mtrr_var = { #define VMSTATE_MTRR_VARS(_field, _state, _n, _v) \ VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_mtrr_var, MTRRVar) -static int put_fpreg_error(QEMUFile *f, void *opaque, size_t size, - VMStateField *field, QJSON *vmdesc) -{ - fprintf(stderr, "call put_fpreg() with invalid arguments\n"); - exit(0); - return 0; -} - -/* XXX: add that in a FPU generic layer */ -union x86_longdouble { - uint64_t mant; - uint16_t exp; -}; +typedef struct x86_FPReg_tmp { + FPReg *parent; + uint64_t tmp_mant; + uint16_t tmp_exp; +} x86_FPReg_tmp; -#define MANTD1(fp) (fp & ((1LL << 52) - 1)) -#define EXPBIAS1 1023 -#define EXPD1(fp) ((fp >> 52) & 0x7FF) -#define SIGND1(fp) ((fp >> 32) & 0x80000000) - -static void fp64_to_fp80(union x86_longdouble *p, uint64_t temp) +static void fpreg_pre_save(void *opaque) { - int e; - /* mantissa */ - p->mant = (MANTD1(temp) << 11) | (1LL << 63); - /* exponent + sign */ - e = EXPD1(temp) - EXPBIAS1 + 16383; - e |= SIGND1(temp) >> 16; - p->exp = e; -} + x86_FPReg_tmp *tmp = opaque; -static int get_fpreg(QEMUFile *f, void *opaque, size_t size, - VMStateField *field) -{ - FPReg *fp_reg = opaque; - uint64_t mant; - uint16_t exp; - - qemu_get_be64s(f, &mant); - qemu_get_be16s(f, &exp); - fp_reg->d = cpu_set_fp80(mant, exp); - return 0; -} - -static int put_fpreg(QEMUFile *f, void *opaque, size_t size, - VMStateField *field, QJSON *vmdesc) -{ - FPReg *fp_reg = opaque; - uint64_t mant; - uint16_t exp; /* we save the real CPU data (in case of MMX usage only 'mant' contains the MMX register */ - cpu_get_fp80(&mant, &exp, fp_reg->d); - qemu_put_be64s(f, &mant); - qemu_put_be16s(f, &exp); - - return 0; + cpu_get_fp80(&tmp->tmp_mant, &tmp->tmp_exp, tmp->parent->d); } -static const VMStateInfo vmstate_fpreg = { - .name = "fpreg", - .get = get_fpreg, - .put = put_fpreg, -}; - -static int get_fpreg_1_mmx(QEMUFile *f, void *opaque, size_t size, - VMStateField *field) +static int fpreg_post_load(void *opaque, int version) { - union x86_longdouble *p = opaque; - uint64_t mant; + x86_FPReg_tmp *tmp = opaque; - qemu_get_be64s(f, &mant); - p->mant = mant; - p->exp = 0xffff; + tmp->parent->d = cpu_set_fp80(tmp->tmp_mant, tmp->tmp_exp); return 0; } -static const VMStateInfo vmstate_fpreg_1_mmx = { - .name = "fpreg_1_mmx", - .get = get_fpreg_1_mmx, - .put = put_fpreg_error, -}; - -static int get_fpreg_1_no_mmx(QEMUFile *f, void *opaque, size_t size, - VMStateField *field) -{ - union x86_longdouble *p = opaque; - uint64_t mant; - - qemu_get_be64s(f, &mant); - fp64_to_fp80(p, mant); - return 0; -} - -static const VMStateInfo vmstate_fpreg_1_no_mmx = { - .name = "fpreg_1_no_mmx", - .get = get_fpreg_1_no_mmx, - .put = put_fpreg_error, +static const VMStateDescription vmstate_fpreg_tmp = { + .name = "fpreg_tmp", + .post_load = fpreg_post_load, + .pre_save = fpreg_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT64(tmp_mant, x86_FPReg_tmp), + VMSTATE_UINT16(tmp_exp, x86_FPReg_tmp), + VMSTATE_END_OF_LIST() + } }; -static bool fpregs_is_0(void *opaque, int version_id) -{ - X86CPU *cpu = opaque; - CPUX86State *env = &cpu->env; - - return (env->fpregs_format_vmstate == 0); -} - -static bool fpregs_is_1_mmx(void *opaque, int version_id) -{ - X86CPU *cpu = opaque; - CPUX86State *env = &cpu->env; - int guess_mmx; - - guess_mmx = ((env->fptag_vmstate == 0xff) && - (env->fpus_vmstate & 0x3800) == 0); - return (guess_mmx && (env->fpregs_format_vmstate == 1)); -} - -static bool fpregs_is_1_no_mmx(void *opaque, int version_id) -{ - X86CPU *cpu = opaque; - CPUX86State *env = &cpu->env; - int guess_mmx; - - guess_mmx = ((env->fptag_vmstate == 0xff) && - (env->fpus_vmstate & 0x3800) == 0); - return (!guess_mmx && (env->fpregs_format_vmstate == 1)); -} - -#define VMSTATE_FP_REGS(_field, _state, _n) \ - VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_0, vmstate_fpreg, FPReg), \ - VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_1_mmx, vmstate_fpreg_1_mmx, FPReg), \ - VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_1_no_mmx, vmstate_fpreg_1_no_mmx, FPReg) - -static bool version_is_5(void *opaque, int version_id) -{ - return version_id == 5; -} - -#ifdef TARGET_X86_64 -static bool less_than_7(void *opaque, int version_id) -{ - return version_id < 7; -} - -static int get_uint64_as_uint32(QEMUFile *f, void *pv, size_t size, - VMStateField *field) -{ - uint64_t *v = pv; - *v = qemu_get_be32(f); - return 0; -} - -static int put_uint64_as_uint32(QEMUFile *f, void *pv, size_t size, - VMStateField *field, QJSON *vmdesc) -{ - uint64_t *v = pv; - qemu_put_be32(f, *v); - - return 0; -} - -static const VMStateInfo vmstate_hack_uint64_as_uint32 = { - .name = "uint64_as_uint32", - .get = get_uint64_as_uint32, - .put = put_uint64_as_uint32, +static const VMStateDescription vmstate_fpreg = { + .name = "fpreg", + .fields = (VMStateField[]) { + VMSTATE_WITH_TMP(FPReg, x86_FPReg_tmp, vmstate_fpreg_tmp), + VMSTATE_END_OF_LIST() + } }; -#define VMSTATE_HACK_UINT32(_f, _s, _t) \ - VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_hack_uint64_as_uint32, uint64_t) -#endif - static void cpu_pre_save(void *opaque) { X86CPU *cpu = opaque; @@ -356,6 +226,10 @@ static int cpu_post_load(void *opaque, int version_id) return -EINVAL; } + if (env->fpregs_format_vmstate) { + error_report("Unsupported old non-softfloat CPU state"); + return -EINVAL; + } /* * Real mode guest segments register DPL should be zero. * Older KVM version were setting it wrongly. @@ -930,7 +804,7 @@ static const VMStateDescription vmstate_mcg_ext_ctl = { VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, - .minimum_version_id = 3, + .minimum_version_id = 11, .pre_save = cpu_pre_save, .post_load = cpu_post_load, .fields = (VMStateField[]) { @@ -943,7 +817,8 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_UINT16(env.fpus_vmstate, X86CPU), VMSTATE_UINT16(env.fptag_vmstate, X86CPU), VMSTATE_UINT16(env.fpregs_format_vmstate, X86CPU), - VMSTATE_FP_REGS(env.fpregs, X86CPU, 8), + + VMSTATE_STRUCT_ARRAY(env.fpregs, X86CPU, 8, 0, vmstate_fpreg, FPReg), VMSTATE_SEGMENT_ARRAY(env.segs, X86CPU, 6), VMSTATE_SEGMENT(env.ldt, X86CPU), @@ -952,16 +827,8 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_SEGMENT(env.idt, X86CPU), VMSTATE_UINT32(env.sysenter_cs, X86CPU), -#ifdef TARGET_X86_64 - /* Hack: In v7 size changed from 32 to 64 bits on x86_64 */ - VMSTATE_HACK_UINT32(env.sysenter_esp, X86CPU, less_than_7), - VMSTATE_HACK_UINT32(env.sysenter_eip, X86CPU, less_than_7), - VMSTATE_UINTTL_V(env.sysenter_esp, X86CPU, 7), - VMSTATE_UINTTL_V(env.sysenter_eip, X86CPU, 7), -#else VMSTATE_UINTTL(env.sysenter_esp, X86CPU), VMSTATE_UINTTL(env.sysenter_eip, X86CPU), -#endif VMSTATE_UINTTL(env.cr[0], X86CPU), VMSTATE_UINTTL(env.cr[2], X86CPU), @@ -982,46 +849,45 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_UINT64(env.fmask, X86CPU), VMSTATE_UINT64(env.kernelgsbase, X86CPU), #endif - VMSTATE_UINT32_V(env.smbase, X86CPU, 4), - - VMSTATE_UINT64_V(env.pat, X86CPU, 5), - VMSTATE_UINT32_V(env.hflags2, X86CPU, 5), - - VMSTATE_UINT32_TEST(parent_obj.halted, X86CPU, version_is_5), - VMSTATE_UINT64_V(env.vm_hsave, X86CPU, 5), - VMSTATE_UINT64_V(env.vm_vmcb, X86CPU, 5), - VMSTATE_UINT64_V(env.tsc_offset, X86CPU, 5), - VMSTATE_UINT64_V(env.intercept, X86CPU, 5), - VMSTATE_UINT16_V(env.intercept_cr_read, X86CPU, 5), - VMSTATE_UINT16_V(env.intercept_cr_write, X86CPU, 5), - VMSTATE_UINT16_V(env.intercept_dr_read, X86CPU, 5), - VMSTATE_UINT16_V(env.intercept_dr_write, X86CPU, 5), - VMSTATE_UINT32_V(env.intercept_exceptions, X86CPU, 5), - VMSTATE_UINT8_V(env.v_tpr, X86CPU, 5), + VMSTATE_UINT32(env.smbase, X86CPU), + + VMSTATE_UINT64(env.pat, X86CPU), + VMSTATE_UINT32(env.hflags2, X86CPU), + + VMSTATE_UINT64(env.vm_hsave, X86CPU), + VMSTATE_UINT64(env.vm_vmcb, X86CPU), + VMSTATE_UINT64(env.tsc_offset, X86CPU), + VMSTATE_UINT64(env.intercept, X86CPU), + VMSTATE_UINT16(env.intercept_cr_read, X86CPU), + VMSTATE_UINT16(env.intercept_cr_write, X86CPU), + VMSTATE_UINT16(env.intercept_dr_read, X86CPU), + VMSTATE_UINT16(env.intercept_dr_write, X86CPU), + VMSTATE_UINT32(env.intercept_exceptions, X86CPU), + VMSTATE_UINT8(env.v_tpr, X86CPU), /* MTRRs */ - VMSTATE_UINT64_ARRAY_V(env.mtrr_fixed, X86CPU, 11, 8), - VMSTATE_UINT64_V(env.mtrr_deftype, X86CPU, 8), + VMSTATE_UINT64_ARRAY(env.mtrr_fixed, X86CPU, 11), + VMSTATE_UINT64(env.mtrr_deftype, X86CPU), VMSTATE_MTRR_VARS(env.mtrr_var, X86CPU, MSR_MTRRcap_VCNT, 8), /* KVM-related states */ - VMSTATE_INT32_V(env.interrupt_injected, X86CPU, 9), - VMSTATE_UINT32_V(env.mp_state, X86CPU, 9), - VMSTATE_UINT64_V(env.tsc, X86CPU, 9), - VMSTATE_INT32_V(env.exception_injected, X86CPU, 11), - VMSTATE_UINT8_V(env.soft_interrupt, X86CPU, 11), - VMSTATE_UINT8_V(env.nmi_injected, X86CPU, 11), - VMSTATE_UINT8_V(env.nmi_pending, X86CPU, 11), - VMSTATE_UINT8_V(env.has_error_code, X86CPU, 11), - VMSTATE_UINT32_V(env.sipi_vector, X86CPU, 11), + VMSTATE_INT32(env.interrupt_injected, X86CPU), + VMSTATE_UINT32(env.mp_state, X86CPU), + VMSTATE_UINT64(env.tsc, X86CPU), + VMSTATE_INT32(env.exception_injected, X86CPU), + VMSTATE_UINT8(env.soft_interrupt, X86CPU), + VMSTATE_UINT8(env.nmi_injected, X86CPU), + VMSTATE_UINT8(env.nmi_pending, X86CPU), + VMSTATE_UINT8(env.has_error_code, X86CPU), + VMSTATE_UINT32(env.sipi_vector, X86CPU), /* MCE */ - VMSTATE_UINT64_V(env.mcg_cap, X86CPU, 10), - VMSTATE_UINT64_V(env.mcg_status, X86CPU, 10), - VMSTATE_UINT64_V(env.mcg_ctl, X86CPU, 10), - VMSTATE_UINT64_ARRAY_V(env.mce_banks, X86CPU, MCE_BANKS_DEF * 4, 10), + VMSTATE_UINT64(env.mcg_cap, X86CPU), + VMSTATE_UINT64(env.mcg_status, X86CPU), + VMSTATE_UINT64(env.mcg_ctl, X86CPU), + VMSTATE_UINT64_ARRAY(env.mce_banks, X86CPU, MCE_BANKS_DEF * 4), /* rdtscp */ - VMSTATE_UINT64_V(env.tsc_aux, X86CPU, 11), + VMSTATE_UINT64(env.tsc_aux, X86CPU), /* KVM pvclock msr */ - VMSTATE_UINT64_V(env.system_time_msr, X86CPU, 11), - VMSTATE_UINT64_V(env.wall_clock_msr, X86CPU, 11), + VMSTATE_UINT64(env.system_time_msr, X86CPU), + VMSTATE_UINT64(env.wall_clock_msr, X86CPU), /* XSAVE related fields */ VMSTATE_UINT64_V(env.xcr0, X86CPU, 12), VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 12), diff --git a/target/ppc/Makefile.objs b/target/ppc/Makefile.objs index f963777277..f92ba67ebd 100644 --- a/target/ppc/Makefile.objs +++ b/target/ppc/Makefile.objs @@ -4,6 +4,7 @@ obj-y += translate.o ifeq ($(CONFIG_SOFTMMU),y) obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o arch_dump.o obj-$(TARGET_PPC64) += mmu-hash64.o mmu-book3s-v3.o compat.o +obj-$(TARGET_PPC64) += mmu-radix64.o endif obj-$(CONFIG_KVM) += kvm.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o diff --git a/target/ppc/cpu-models.h b/target/ppc/cpu-models.h index d587e69bbc..b563c45b68 100644 --- a/target/ppc/cpu-models.h +++ b/target/ppc/cpu-models.h @@ -561,6 +561,7 @@ enum { CPU_POWERPC_POWER8NVL_BASE = 0x004C0000, CPU_POWERPC_POWER8NVL_v10 = 0x004C0100, CPU_POWERPC_POWER9_BASE = 0x004E0000, + CPU_POWERPC_POWER9_DD1 = 0x004E0100, CPU_POWERPC_970_v22 = 0x00390202, CPU_POWERPC_970FX_v10 = 0x00391100, CPU_POWERPC_970FX_v20 = 0x003C0200, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index e0ff0412d6..401e10e7da 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -30,6 +30,8 @@ #define TARGET_LONG_BITS 64 #define TARGET_PAGE_BITS 12 +#define TCG_GUEST_DEFAULT_MO 0 + /* Note that the official physical address space bits is 62-M where M is implementation dependent. I've not looked up M for the set of cpus we emulate at the system level. */ @@ -480,6 +482,8 @@ struct ppc_slb_t { #define DSISR_ISSTORE 0x02000000 /* Not permitted by virtual page class key protection */ #define DSISR_AMR 0x00200000 +/* Unsupported Radix Tree Configuration */ +#define DSISR_R_BADCONFIG 0x00080000 /* SRR1 error code fields */ @@ -1221,6 +1225,7 @@ static inline PowerPCCPU *ppc_env_get_cpu(CPUPPCState *env) PowerPCCPUClass *ppc_cpu_class_by_pvr(uint32_t pvr); PowerPCCPUClass *ppc_cpu_class_by_pvr_mask(uint32_t pvr); +PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc); struct PPCVirtualHypervisor { Object parent; diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index f4ee7aacd2..a6bcb47aa2 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -728,6 +728,9 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) cs->exception_index = POWERPC_EXCP_NONE; env->error_code = 0; + /* Reset the reservation */ + env->reserve_addr = -1; + /* Any interrupt is context synchronizing, check if TCG TLB * needs a delayed flush on ppc64 */ diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 8574c369e6..51249ce79e 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2380,6 +2380,17 @@ static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) #if defined(TARGET_PPC64) pcc->radix_page_info = kvm_get_radix_page_info(); + + if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { + /* + * POWER9 DD1 has some bugs which make it not really ISA 3.00 + * compliant. More importantly, advertising ISA 3.00 + * architected mode may prevent guests from activating + * necessary DD1 workarounds. + */ + pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 + | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); + } #endif /* defined(TARGET_PPC64) */ } @@ -2413,18 +2424,6 @@ bool kvmppc_has_cap_mmu_hash_v3(void) return cap_mmu_hash_v3; } -static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc) -{ - ObjectClass *oc = OBJECT_CLASS(pcc); - - while (oc && !object_class_is_abstract(oc)) { - oc = object_class_get_parent(oc); - } - assert(oc); - - return POWERPC_CPU_CLASS(oc); -} - PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c index 005c96340a..e7798b3582 100644 --- a/target/ppc/mmu-book3s-v3.c +++ b/target/ppc/mmu-book3s-v3.c @@ -22,15 +22,13 @@ #include "cpu.h" #include "mmu-hash64.h" #include "mmu-book3s-v3.h" -#include "qemu/error-report.h" +#include "mmu-radix64.h" int ppc64_v3_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx) { if (ppc64_radix_guest(cpu)) { /* Guest uses radix */ - /* TODO - Unsupported */ - error_report("Guest Radix Support Unimplemented"); - exit(1); + return ppc_radix64_handle_mmu_fault(cpu, eaddr, rwx, mmu_idx); } else { /* Guest uses hash */ return ppc_hash64_handle_mmu_fault(cpu, eaddr, rwx, mmu_idx); } diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h index 636f6ab95f..56095dab52 100644 --- a/target/ppc/mmu-book3s-v3.h +++ b/target/ppc/mmu-book3s-v3.h @@ -25,6 +25,11 @@ /* Partition Table Entry Fields */ #define PATBE1_GR 0x8000000000000000 +/* Process Table Entry */ +struct prtb_entry { + uint64_t prtbe0, prtbe1; +}; + #ifdef TARGET_PPC64 static inline bool ppc64_use_proc_tbl(PowerPCCPU *cpu) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c new file mode 100644 index 0000000000..de18c0b69e --- /dev/null +++ b/target/ppc/mmu-radix64.c @@ -0,0 +1,259 @@ +/* + * PowerPC Radix MMU mulation helpers for QEMU. + * + * Copyright (c) 2016 Suraj Jitindar Singh, IBM Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "qemu/error-report.h" +#include "sysemu/kvm.h" +#include "kvm_ppc.h" +#include "exec/log.h" +#include "mmu-radix64.h" +#include "mmu-book3s-v3.h" + +static bool ppc_radix64_get_fully_qualified_addr(CPUPPCState *env, vaddr eaddr, + uint64_t *lpid, uint64_t *pid) +{ + /* We don't have HV support yet and shouldn't get here with it set anyway */ + assert(!msr_hv); + + if (!msr_hv) { /* !MSR[HV] -> Guest */ + switch (eaddr & R_EADDR_QUADRANT) { + case R_EADDR_QUADRANT0: /* Guest application */ + *lpid = env->spr[SPR_LPIDR]; + *pid = env->spr[SPR_BOOKS_PID]; + break; + case R_EADDR_QUADRANT1: /* Illegal */ + case R_EADDR_QUADRANT2: + return false; + case R_EADDR_QUADRANT3: /* Guest OS */ + *lpid = env->spr[SPR_LPIDR]; + *pid = 0; /* pid set to 0 -> addresses guest operating system */ + break; + } + } + + return true; +} + +static void ppc_radix64_raise_segi(PowerPCCPU *cpu, int rwx, vaddr eaddr) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + + if (rwx == 2) { /* Instruction Segment Interrupt */ + cs->exception_index = POWERPC_EXCP_ISEG; + } else { /* Data Segment Interrupt */ + cs->exception_index = POWERPC_EXCP_DSEG; + env->spr[SPR_DAR] = eaddr; + } + env->error_code = 0; +} + +static void ppc_radix64_raise_si(PowerPCCPU *cpu, int rwx, vaddr eaddr, + uint32_t cause) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + + if (rwx == 2) { /* Instruction Storage Interrupt */ + cs->exception_index = POWERPC_EXCP_ISI; + env->error_code = cause; + } else { /* Data Storage Interrupt */ + cs->exception_index = POWERPC_EXCP_DSI; + if (rwx == 1) { /* Write -> Store */ + cause |= DSISR_ISSTORE; + } + env->spr[SPR_DSISR] = cause; + env->spr[SPR_DAR] = eaddr; + env->error_code = 0; + } +} + + +static bool ppc_radix64_check_prot(PowerPCCPU *cpu, int rwx, uint64_t pte, + int *fault_cause, int *prot) +{ + CPUPPCState *env = &cpu->env; + const int need_prot[] = { PAGE_READ, PAGE_WRITE, PAGE_EXEC }; + + /* Check Page Attributes (pte58:59) */ + if (((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO) && (rwx == 2)) { + /* + * Radix PTE entries with the non-idempotent I/O attribute are treated + * as guarded storage + */ + *fault_cause |= SRR1_NOEXEC_GUARD; + return true; + } + + /* Determine permissions allowed by Encoded Access Authority */ + if ((pte & R_PTE_EAA_PRIV) && msr_pr) { /* Insufficient Privilege */ + *prot = 0; + } else if (msr_pr || (pte & R_PTE_EAA_PRIV)) { + *prot = ppc_radix64_get_prot_eaa(pte); + } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) */ + *prot = ppc_radix64_get_prot_eaa(pte); + *prot &= ppc_radix64_get_prot_amr(cpu); /* Least combined permissions */ + } + + /* Check if requested access type is allowed */ + if (need_prot[rwx] & ~(*prot)) { /* Page Protected for that Access */ + *fault_cause |= DSISR_PROTFAULT; + return true; + } + + return false; +} + +static void ppc_radix64_set_rc(PowerPCCPU *cpu, int rwx, uint64_t pte, + hwaddr pte_addr, int *prot) +{ + CPUState *cs = CPU(cpu); + uint64_t npte; + + npte = pte | R_PTE_R; /* Always set reference bit */ + + if (rwx == 1) { /* Store/Write */ + npte |= R_PTE_C; /* Set change bit */ + } else { + /* + * Treat the page as read-only for now, so that a later write + * will pass through this function again to set the C bit. + */ + *prot &= ~PAGE_WRITE; + } + + if (pte ^ npte) { /* If pte has changed then write it back */ + stq_phys(cs->as, pte_addr, npte); + } +} + +static uint64_t ppc_radix64_walk_tree(PowerPCCPU *cpu, int rwx, vaddr eaddr, + uint64_t base_addr, uint64_t nls, + hwaddr *raddr, int *psize, + int *fault_cause, int *prot, + hwaddr *pte_addr) +{ + CPUState *cs = CPU(cpu); + uint64_t index, pde; + + if (nls < 5) { /* Directory maps less than 2**5 entries */ + *fault_cause |= DSISR_R_BADCONFIG; + return 0; + } + + /* Read page <directory/table> entry from guest address space */ + index = eaddr >> (*psize - nls); /* Shift */ + index &= ((1UL << nls) - 1); /* Mask */ + pde = ldq_phys(cs->as, base_addr + (index * sizeof(pde))); + if (!(pde & R_PTE_VALID)) { /* Invalid Entry */ + *fault_cause |= DSISR_NOPTE; + return 0; + } + + *psize -= nls; + + /* Check if Leaf Entry -> Page Table Entry -> Stop the Search */ + if (pde & R_PTE_LEAF) { + uint64_t rpn = pde & R_PTE_RPN; + uint64_t mask = (1UL << *psize) - 1; + + if (ppc_radix64_check_prot(cpu, rwx, pde, fault_cause, prot)) { + return 0; /* Protection Denied Access */ + } + + /* Or high bits of rpn and low bits to ea to form whole real addr */ + *raddr = (rpn & ~mask) | (eaddr & mask); + *pte_addr = base_addr + (index * sizeof(pde)); + return pde; + } + + /* Next Level of Radix Tree */ + return ppc_radix64_walk_tree(cpu, rwx, eaddr, pde & R_PDE_NLB, + pde & R_PDE_NLS, raddr, psize, + fault_cause, prot, pte_addr); +} + +int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, + int mmu_idx) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + PPCVirtualHypervisorClass *vhc = + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + hwaddr raddr, pte_addr; + uint64_t lpid = 0, pid = 0, offset, size, patbe, prtbe0, pte; + int page_size, prot, fault_cause = 0; + + assert((rwx == 0) || (rwx == 1) || (rwx == 2)); + assert(!msr_hv); /* For now there is no Radix PowerNV Support */ + assert(cpu->vhyp); + assert(ppc64_use_proc_tbl(cpu)); + + /* Real Mode Access */ + if (((rwx == 2) && (msr_ir == 0)) || ((rwx != 2) && (msr_dr == 0))) { + /* In real mode top 4 effective addr bits (mostly) ignored */ + raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL; + + tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK, + PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx, + TARGET_PAGE_SIZE); + return 0; + } + + /* Virtual Mode Access - get the fully qualified address */ + if (!ppc_radix64_get_fully_qualified_addr(env, eaddr, &lpid, &pid)) { + ppc_radix64_raise_segi(cpu, rwx, eaddr); + return 1; + } + + /* Get Process Table */ + patbe = vhc->get_patbe(cpu->vhyp); + + /* Index Process Table by PID to Find Corresponding Process Table Entry */ + offset = pid * sizeof(struct prtb_entry); + size = 1ULL << ((patbe & PATBE1_R_PRTS) + 12); + if (offset >= size) { + /* offset exceeds size of the process table */ + ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_NOPTE); + return 1; + } + prtbe0 = ldq_phys(cs->as, (patbe & PATBE1_R_PRTB) + offset); + + /* Walk Radix Tree from Process Table Entry to Convert EA to RA */ + page_size = PRTBE_R_GET_RTS(prtbe0); + pte = ppc_radix64_walk_tree(cpu, rwx, eaddr & R_EADDR_MASK, + prtbe0 & PRTBE_R_RPDB, prtbe0 & PRTBE_R_RPDS, + &raddr, &page_size, &fault_cause, &prot, + &pte_addr); + if (!pte) { + ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause); + return 1; + } + + /* Update Reference and Change Bits */ + ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, &prot); + + tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK, + prot, mmu_idx, 1UL << page_size); + return 1; +} diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h new file mode 100644 index 0000000000..1d5c7cfea5 --- /dev/null +++ b/target/ppc/mmu-radix64.h @@ -0,0 +1,72 @@ +#ifndef MMU_RADIX64_H +#define MMU_RADIX64_H + +#ifndef CONFIG_USER_ONLY + +/* Radix Quadrants */ +#define R_EADDR_MASK 0x3FFFFFFFFFFFFFFF +#define R_EADDR_QUADRANT 0xC000000000000000 +#define R_EADDR_QUADRANT0 0x0000000000000000 +#define R_EADDR_QUADRANT1 0x4000000000000000 +#define R_EADDR_QUADRANT2 0x8000000000000000 +#define R_EADDR_QUADRANT3 0xC000000000000000 + +/* Radix Partition Table Entry Fields */ +#define PATBE1_R_PRTB 0x0FFFFFFFFFFFF000 +#define PATBE1_R_PRTS 0x000000000000001F + +/* Radix Process Table Entry Fields */ +#define PRTBE_R_GET_RTS(rts) \ + ((((rts >> 58) & 0x18) | ((rts >> 5) & 0x7)) + 31) +#define PRTBE_R_RPDB 0x0FFFFFFFFFFFFF00 +#define PRTBE_R_RPDS 0x000000000000001F + +/* Radix Page Directory/Table Entry Fields */ +#define R_PTE_VALID 0x8000000000000000 +#define R_PTE_LEAF 0x4000000000000000 +#define R_PTE_SW0 0x2000000000000000 +#define R_PTE_RPN 0x01FFFFFFFFFFF000 +#define R_PTE_SW1 0x0000000000000E00 +#define R_GET_SW(sw) (((sw >> 58) & 0x8) | ((sw >> 9) & 0x7)) +#define R_PTE_R 0x0000000000000100 +#define R_PTE_C 0x0000000000000080 +#define R_PTE_ATT 0x0000000000000030 +#define R_PTE_ATT_NORMAL 0x0000000000000000 +#define R_PTE_ATT_SAO 0x0000000000000010 +#define R_PTE_ATT_NI_IO 0x0000000000000020 +#define R_PTE_ATT_TOLERANT_IO 0x0000000000000030 +#define R_PTE_EAA_PRIV 0x0000000000000008 +#define R_PTE_EAA_R 0x0000000000000004 +#define R_PTE_EAA_RW 0x0000000000000002 +#define R_PTE_EAA_X 0x0000000000000001 +#define R_PDE_NLB PRTBE_R_RPDB +#define R_PDE_NLS PRTBE_R_RPDS + +#ifdef TARGET_PPC64 + +int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, + int mmu_idx); + +static inline int ppc_radix64_get_prot_eaa(uint64_t pte) +{ + return (pte & R_PTE_EAA_R ? PAGE_READ : 0) | + (pte & R_PTE_EAA_RW ? PAGE_READ | PAGE_WRITE : 0) | + (pte & R_PTE_EAA_X ? PAGE_EXEC : 0); +} + +static inline int ppc_radix64_get_prot_amr(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */ + int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 IAMR63:62 */ + + return (amr & 0x2 ? 0 : PAGE_WRITE) | /* Access denied if bit is set */ + (amr & 0x1 ? 0 : PAGE_READ) | + (iamr & 0x1 ? 0 : PAGE_EXEC); +} + +#endif /* TARGET_PPC64 */ + +#endif /* CONFIG_USER_ONLY */ + +#endif /* MMU_RADIX64_H */ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index f40b5a1abf..c0cd64d927 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -73,6 +73,7 @@ static TCGv cpu_cfar; #endif static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32; static TCGv cpu_reserve; +static TCGv cpu_reserve_val; static TCGv cpu_fpscr; static TCGv_i32 cpu_access_type; @@ -181,6 +182,9 @@ void ppc_translate_init(void) cpu_reserve = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, reserve_addr), "reserve_addr"); + cpu_reserve_val = tcg_global_mem_new(cpu_env, + offsetof(CPUPPCState, reserve_val), + "reserve_val"); cpu_fpscr = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, fpscr), "fpscr"); @@ -214,6 +218,7 @@ struct DisasContext { bool vsx_enabled; bool spe_enabled; bool tm_enabled; + bool gtse; ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */ int singlestep_enabled; uint64_t insns_flags; @@ -2967,6 +2972,7 @@ static void gen_stswx(DisasContext *ctx) /* eieio */ static void gen_eieio(DisasContext *ctx) { + tcg_gen_mb(TCG_MO_LD_ST | TCG_BAR_SC); } #if !defined(CONFIG_USER_ONLY) @@ -3004,6 +3010,7 @@ static void gen_isync(DisasContext *ctx) if (!ctx->pr) { gen_check_tlb_flush(ctx, false); } + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); gen_stop_exception(ctx); } @@ -3023,7 +3030,8 @@ static void gen_##name(DisasContext *ctx) \ } \ tcg_gen_qemu_ld_tl(gpr, t0, ctx->mem_idx, memop); \ tcg_gen_mov_tl(cpu_reserve, t0); \ - tcg_gen_st_tl(gpr, cpu_env, offsetof(CPUPPCState, reserve_val)); \ + tcg_gen_mov_tl(cpu_reserve_val, gpr); \ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); \ tcg_temp_free(t0); \ } @@ -3155,14 +3163,31 @@ static void gen_conditional_store(DisasContext *ctx, TCGv EA, static void gen_conditional_store(DisasContext *ctx, TCGv EA, int reg, int memop) { - TCGLabel *l1; + TCGLabel *l1 = gen_new_label(); + TCGLabel *l2 = gen_new_label(); + TCGv t0; - tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); - l1 = gen_new_label(); tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1); - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ); - tcg_gen_qemu_st_tl(cpu_gpr[reg], EA, ctx->mem_idx, memop); + + t0 = tcg_temp_new(); + tcg_gen_atomic_cmpxchg_tl(t0, cpu_reserve, cpu_reserve_val, + cpu_gpr[reg], ctx->mem_idx, + DEF_MEMOP(memop) | MO_ALIGN); + tcg_gen_setcond_tl(TCG_COND_EQ, t0, t0, cpu_reserve_val); + tcg_gen_shli_tl(t0, t0, CRF_EQ_BIT); + tcg_gen_or_tl(t0, t0, cpu_so); + tcg_gen_trunc_tl_i32(cpu_crf[0], t0); + tcg_temp_free(t0); + tcg_gen_br(l2); + gen_set_label(l1); + + /* Address mismatch implies failure. But we still need to provide the + memory barrier semantics of the instruction. */ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); + + gen_set_label(l2); tcg_gen_movi_tl(cpu_reserve, -1); } #endif @@ -3291,6 +3316,7 @@ static void gen_sync(DisasContext *ctx) if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { gen_check_tlb_flush(ctx, true); } + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); } /* wait */ @@ -4513,7 +4539,12 @@ static void gen_tlbie(DisasContext *ctx) GEN_PRIV; #else TCGv_i32 t1; - CHK_HV; + + if (ctx->gtse) { + CHK_SV; /* If gtse is set then tblie is supervisor privileged */ + } else { + CHK_HV; /* Else hypervisor privileged */ + } if (NARROW_MODE(ctx)) { TCGv t0 = tcg_temp_new(); @@ -6547,6 +6578,8 @@ GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, PPC_MEM_TLBIA), * different ISA versions */ GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, PPC_MEM_TLBIE), GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x001F0001, PPC_MEM_TLBIE), +GEN_HANDLER_E(tlbiel, 0x1F, 0x12, 0x08, 0x00100001, PPC_NONE, PPC2_ISA300), +GEN_HANDLER_E(tlbie, 0x1F, 0x12, 0x09, 0x00100001, PPC_NONE, PPC2_ISA300), GEN_HANDLER(tlbsync, 0x1F, 0x16, 0x11, 0x03FFF801, PPC_MEM_TLBSYNC), #if defined(TARGET_PPC64) GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x031FFC01, PPC_SLBI), @@ -7227,6 +7260,7 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) ctx.tm_enabled = false; } #endif + ctx.gtse = !!(env->spr[SPR_LPCR] & LPCR_GTSE); if ((env->flags & POWERPC_FLAG_SE) && msr_se) ctx.singlestep_enabled = CPU_SINGLE_STEP; else diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c index e82e3e65e1..56a0ab22cf 100644 --- a/target/ppc/translate_init.c +++ b/target/ppc/translate_init.c @@ -8960,7 +8960,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) PPC_FLOAT_EXT | PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ | PPC_MEM_SYNC | PPC_MEM_EIEIO | - PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | + PPC_MEM_TLBSYNC | PPC_64B | PPC_64BX | PPC_ALTIVEC | PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD | @@ -10285,6 +10285,18 @@ PowerPCCPU *cpu_ppc_init(const char *cpu_model) return POWERPC_CPU(cpu_generic_init(TYPE_POWERPC_CPU, cpu_model)); } +PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc) +{ + ObjectClass *oc = OBJECT_CLASS(pcc); + + while (oc && !object_class_is_abstract(oc)) { + oc = object_class_get_parent(oc); + } + assert(oc); + + return POWERPC_CPU_CLASS(oc); +} + /* Sort by PVR, ordering special case "host" last. */ static gint ppc_cpu_list_compare(gconstpointer a, gconstpointer b) { @@ -10316,6 +10328,7 @@ static void ppc_cpu_list_entry(gpointer data, gpointer user_data) ObjectClass *oc = data; CPUListState *s = user_data; PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + DeviceClass *family = DEVICE_CLASS(ppc_cpu_get_family_class(pcc)); const char *typename = object_class_get_name(oc); char *name; int i; @@ -10338,8 +10351,18 @@ static void ppc_cpu_list_entry(gpointer data, gpointer user_data) if (alias_oc != oc) { continue; } - (*s->cpu_fprintf)(s->file, "PowerPC %-16s (alias for %s)\n", - alias->alias, name); + /* + * If running with KVM, we might update the family alias later, so + * avoid printing the wrong alias here and use "preferred" instead + */ + if (strcmp(alias->alias, family->desc) == 0) { + (*s->cpu_fprintf)(s->file, + "PowerPC %-16s (alias for preferred %s CPU)\n", + alias->alias, family->desc); + } else { + (*s->cpu_fprintf)(s->file, "PowerPC %-16s (alias for %s)\n", + alias->alias, name); + } } g_free(name); } @@ -10436,14 +10459,6 @@ static bool ppc_cpu_has_work(CPUState *cs) return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD); } -static void ppc_cpu_exec_enter(CPUState *cs) -{ - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - - env->reserve_addr = -1; -} - /* CPUClass::reset() */ static void ppc_cpu_reset(CPUState *s) { @@ -10660,7 +10675,6 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data) cc->get_phys_page_debug = ppc_cpu_get_phys_page_debug; cc->vmsd = &vmstate_ppc_cpu; #endif - cc->cpu_exec_enter = ppc_cpu_exec_enter; #if defined(CONFIG_SOFTMMU) cc->write_elf64_note = ppc64_cpu_write_elf64_note; cc->write_elf32_note = ppc32_cpu_write_elf32_note; diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 066dcd17df..a1bf2ba5a7 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -430,6 +430,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) cc->write_elf64_note = s390_cpu_write_elf64_note; cc->cpu_exec_interrupt = s390_cpu_exec_interrupt; cc->debug_excp_handler = s390x_cpu_debug_excp_handler; + cc->do_unaligned_access = s390x_cpu_do_unaligned_access; #endif cc->disas_set_info = s390_cpu_disas_set_info; diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 058ddad83a..240b8a5c22 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -480,6 +480,9 @@ int s390_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw, #ifndef CONFIG_USER_ONLY void do_restart_interrupt(CPUS390XState *env); +void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr); static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb, uint8_t *ar) @@ -1075,6 +1078,9 @@ struct sysib_322 { #define SIGP_MODE_Z_ARCH_TRANS_ALL_PSW 1 #define SIGP_MODE_Z_ARCH_TRANS_CUR_PSW 2 +/* SIGP order code mask corresponding to bit positions 56-63 */ +#define SIGP_ORDER_MASK 0x000000ff + void load_psw(CPUS390XState *env, uint64_t mask, uint64_t addr); int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc, target_ulong *raddr, int *flags, bool exc); diff --git a/target/s390x/helper.c b/target/s390x/helper.c index 68bd2f9784..997849008f 100644 --- a/target/s390x/helper.c +++ b/target/s390x/helper.c @@ -718,4 +718,20 @@ void s390x_cpu_debug_excp_handler(CPUState *cs) cpu_loop_exit_noexc(cs); } } + +/* Unaligned accesses are only diagnosed with MO_ALIGN. At the moment, + this is only for the atomic operations, for which we want to raise a + specification exception. */ +void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + S390CPU *cpu = S390_CPU(cs); + CPUS390XState *env = &cpu->env; + + if (retaddr) { + cpu_restore_state(cs, retaddr); + } + program_interrupt(env, PGM_SPECIFICATION, ILEN_LATER); +} #endif /* CONFIG_USER_ONLY */ diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 9102071d0a..0b70770e4e 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -25,6 +25,7 @@ DEF_HELPER_3(cxgb, i64, env, s64, i32) DEF_HELPER_3(celgb, i64, env, i64, i32) DEF_HELPER_3(cdlgb, i64, env, i64, i32) DEF_HELPER_3(cxlgb, i64, env, i64, i32) +DEF_HELPER_4(cdsg, void, env, i64, i32, i32) DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64) @@ -83,6 +84,8 @@ DEF_HELPER_FLAGS_5(calc_cc, TCG_CALL_NO_RWG_SE, i32, env, i32, i64, i64, i64) DEF_HELPER_FLAGS_2(sfpc, TCG_CALL_NO_RWG, void, env, i64) DEF_HELPER_FLAGS_2(sfas, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_1(popcnt, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env) +DEF_HELPER_2(stfle, i32, env, i64) #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index 075ff597c3..55a7c529b4 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -239,12 +239,12 @@ D(0xec7d, CLGIJ, RIE_c, GIE, r1_o, i2_8u, 0, 0, cj, 0, 1) /* COMPARE AND SWAP */ - D(0xba00, CS, RS_a, Z, r3_32u, r1_32u, new, r1_32, cs, 0, 0) - D(0xeb14, CSY, RSY_a, LD, r3_32u, r1_32u, new, r1_32, cs, 0, 0) - D(0xeb30, CSG, RSY_a, Z, r3_o, r1_o, new, r1, cs, 0, 1) + D(0xba00, CS, RS_a, Z, r3_32u, r1_32u, new, r1_32, cs, 0, MO_TEUL) + D(0xeb14, CSY, RSY_a, LD, r3_32u, r1_32u, new, r1_32, cs, 0, MO_TEUL) + D(0xeb30, CSG, RSY_a, Z, r3_o, r1_o, new, r1, cs, 0, MO_TEQ) /* COMPARE DOUBLE AND SWAP */ - D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, 1) - D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, 1) + D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ) + D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ) C(0xeb3e, CDSG, RSY_a, Z, 0, 0, 0, 0, cdsg, 0) /* COMPARE AND TRAP */ @@ -390,20 +390,20 @@ /* LOAD ADDRESS RELATIVE LONG */ C(0xc000, LARL, RIL_b, Z, 0, ri2, 0, r1, mov2, 0) /* LOAD AND ADD */ - C(0xebf8, LAA, RSY_a, ILA, r3_32s, m2_32s_atomic, new, m2_32_r1_atomic, add, adds32) - C(0xebe8, LAAG, RSY_a, ILA, r3, m2_64_atomic, new, m2_64_r1_atomic, add, adds64) + D(0xebf8, LAA, RSY_a, ILA, r3_32s, a2, new, in2_r1_32, laa, adds32, MO_TESL) + D(0xebe8, LAAG, RSY_a, ILA, r3, a2, new, in2_r1, laa, adds64, MO_TEQ) /* LOAD AND ADD LOGICAL */ - C(0xebfa, LAAL, RSY_a, ILA, r3_32s, m2_32s_atomic, new, m2_32_r1_atomic, add, addu32) - C(0xebea, LAALG, RSY_a, ILA, r3, m2_64_atomic, new, m2_64_r1_atomic, add, addu64) + D(0xebfa, LAAL, RSY_a, ILA, r3_32u, a2, new, in2_r1_32, laa, addu32, MO_TEUL) + D(0xebea, LAALG, RSY_a, ILA, r3, a2, new, in2_r1, laa, addu64, MO_TEQ) /* LOAD AND AND */ - C(0xebf4, LAN, RSY_a, ILA, r3_32s, m2_32s_atomic, new, m2_32_r1_atomic, and, nz32) - C(0xebe4, LANG, RSY_a, ILA, r3, m2_64_atomic, new, m2_64_r1_atomic, and, nz64) + D(0xebf4, LAN, RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lan, nz32, MO_TESL) + D(0xebe4, LANG, RSY_a, ILA, r3, a2, new, in2_r1, lan, nz64, MO_TEQ) /* LOAD AND EXCLUSIVE OR */ - C(0xebf7, LAX, RSY_a, ILA, r3_32s, m2_32s_atomic, new, m2_32_r1_atomic, xor, nz32) - C(0xebe7, LAXG, RSY_a, ILA, r3, m2_64_atomic, new, m2_64_r1_atomic, xor, nz64) + D(0xebf7, LAX, RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lax, nz32, MO_TESL) + D(0xebe7, LAXG, RSY_a, ILA, r3, a2, new, in2_r1, lax, nz64, MO_TEQ) /* LOAD AND OR */ - C(0xebf6, LAO, RSY_a, ILA, r3_32s, m2_32s_atomic, new, m2_32_r1_atomic, or, nz32) - C(0xebe6, LAOG, RSY_a, ILA, r3, m2_64_atomic, new, m2_64_r1_atomic, or, nz64) + D(0xebf6, LAO, RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lao, nz32, MO_TESL) + D(0xebe6, LAOG, RSY_a, ILA, r3, a2, new, in2_r1, lao, nz64, MO_TEQ) /* LOAD AND TEST */ C(0x1200, LTR, RR_a, Z, 0, r2_o, 0, cond_r1r2_32, mov2, s32) C(0xb902, LTGR, RRE, Z, 0, r2_o, 0, r1, mov2, s64) @@ -504,7 +504,9 @@ C(0xb9e2, LOCGR, RRF_c, LOC, r1, r2, r1, 0, loc, 0) C(0xebf2, LOC, RSY_b, LOC, r1, m2_32u, new, r1_32, loc, 0) C(0xebe2, LOCG, RSY_b, LOC, r1, m2_64, r1, 0, loc, 0) -/* LOAD PAIR DISJOINT TODO */ +/* LOAD PAIR DISJOINT */ + D(0xc804, LPD, SSF, ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL) + D(0xc805, LPDG, SSF, ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEQ) /* LOAD POSITIVE */ C(0x1000, LPR, RR_a, Z, 0, r2_32s, new, r1_32, abs, abs32) C(0xb900, LPGR, RRE, Z, 0, r2, r1, 0, abs, abs64) @@ -747,6 +749,8 @@ C(0xe33e, STRV, RXY_a, Z, la2, r1_32u, new, m1_32, rev32, 0) C(0xe32f, STRVG, RXY_a, Z, la2, r1_o, new, m1_64, rev64, 0) +/* STORE FACILITY LIST EXTENDED */ + C(0xb2b0, STFLE, S, SFLE, 0, a2, 0, 0, stfle, 0) /* STORE FPC */ C(0xb29c, STFPC, S, Z, 0, a2, new, m2_32, efpc, 0) @@ -843,6 +847,8 @@ /* LOAD CONTROL */ C(0xb700, LCTL, RS_a, Z, 0, a2, 0, 0, lctl, 0) C(0xeb2f, LCTLG, RSY_a, Z, 0, a2, 0, 0, lctlg, 0) +/* LOAD PROGRAM PARAMETER */ + C(0xb280, LPP, S, LPP, 0, m2_64, 0, 0, lpp, 0) /* LOAD PSW */ C(0x8200, LPSW, S, Z, 0, a2, 0, 0, lpsw, 0) /* LOAD PSW EXTENDED */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index 1a249d8359..fb105429be 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -1764,8 +1764,6 @@ static int sigp_set_architecture(S390CPU *cpu, uint32_t param, return SIGP_CC_ORDER_CODE_ACCEPTED; } -#define SIGP_ORDER_MASK 0x000000ff - static int handle_sigp(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) { CPUS390XState *env = &cpu->env; diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 675aba2e44..f6e5bcec5d 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -23,6 +23,7 @@ #include "exec/helper-proto.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" +#include "qemu/int128.h" #if !defined(CONFIG_USER_ONLY) #include "hw/s390x/storage-keys.h" @@ -844,6 +845,45 @@ uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array, return cc; } +void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, + uint32_t r1, uint32_t r3) +{ + uintptr_t ra = GETPC(); + Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); + Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); + Int128 oldv; + bool fail; + + if (parallel_cpus) { +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + fail = !int128_eq(oldv, cmpv); +#endif + } else { + uint64_t oldh, oldl; + + oldh = cpu_ldq_data_ra(env, addr + 0, ra); + oldl = cpu_ldq_data_ra(env, addr + 8, ra); + + oldv = int128_make128(oldl, oldh); + fail = !int128_eq(oldv, cmpv); + if (fail) { + newv = oldv; + } + + cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); + cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); + } + + env->cc_op = fail; + env->regs[r1] = int128_gethi(oldv); + env->regs[r1 + 1] = int128_getlo(oldv); +} + #if !defined(CONFIG_USER_ONLY) void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) { diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c index eca82441d0..23ec52cf35 100644 --- a/target/s390x/misc_helper.c +++ b/target/s390x/misc_helper.c @@ -517,8 +517,7 @@ uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1, /* Remember: Use "R1 or R1 + 1, whichever is the odd-numbered register" as parameter (input). Status (output) is always R1. */ - /* sigp contains the order code in bit positions 56-63, mask it here. */ - switch (order_code & 0xff) { + switch (order_code & SIGP_ORDER_MASK) { case SIGP_SET_ARCH: /* switch arch */ break; @@ -678,3 +677,62 @@ void HELPER(per_ifetch)(CPUS390XState *env, uint64_t addr) } } #endif + +/* The maximum bit defined at the moment is 129. */ +#define MAX_STFL_WORDS 3 + +/* Canonicalize the current cpu's features into the 64-bit words required + by STFLE. Return the index-1 of the max word that is non-zero. */ +static unsigned do_stfle(CPUS390XState *env, uint64_t words[MAX_STFL_WORDS]) +{ + S390CPU *cpu = s390_env_get_cpu(env); + const unsigned long *features = cpu->model->features; + unsigned max_bit = 0; + S390Feat feat; + + memset(words, 0, sizeof(uint64_t) * MAX_STFL_WORDS); + + if (test_bit(S390_FEAT_ZARCH, features)) { + /* z/Architecture is always active if around */ + words[0] = 1ull << (63 - 2); + } + + for (feat = find_first_bit(features, S390_FEAT_MAX); + feat < S390_FEAT_MAX; + feat = find_next_bit(features, S390_FEAT_MAX, feat + 1)) { + const S390FeatDef *def = s390_feat_def(feat); + if (def->type == S390_FEAT_TYPE_STFL) { + unsigned bit = def->bit; + if (bit > max_bit) { + max_bit = bit; + } + assert(bit / 64 < MAX_STFL_WORDS); + words[bit / 64] |= 1ULL << (63 - bit % 64); + } + } + + return max_bit / 64; +} + +void HELPER(stfl)(CPUS390XState *env) +{ + uint64_t words[MAX_STFL_WORDS]; + + do_stfle(env, words); + cpu_stl_data(env, 200, words[0] >> 32); +} + +uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr) +{ + uint64_t words[MAX_STFL_WORDS]; + unsigned count_m1 = env->regs[0] & 0xff; + unsigned max_m1 = do_stfle(env, words); + unsigned i; + + for (i = 0; i <= count_m1; ++i) { + cpu_stq_data(env, addr + 8 * i, words[i]); + } + + env->regs[0] = deposit64(env->regs[0], 0, 8, max_m1); + return (count_m1 >= max_m1 ? 0 : 3); +} diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 01c62176bf..4c48c593cd 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -1194,6 +1194,7 @@ typedef enum DisasFacility { FAC_SCF, /* store clock fast */ FAC_SFLE, /* store facility list extended */ FAC_ILA, /* interlocked access facility 1 */ + FAC_LPP, /* load-program-parameter */ } DisasFacility; struct DisasInsn { @@ -1517,6 +1518,21 @@ static ExitStatus op_bc(DisasContext *s, DisasOps *o) int imm = is_imm ? get_field(s->fields, i2) : 0; DisasCompare c; + /* BCR with R2 = 0 causes no branching */ + if (have_field(s->fields, r2) && get_field(s->fields, r2) == 0) { + if (m1 == 14) { + /* Perform serialization */ + /* FIXME: check for fast-BCR-serialization facility */ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); + } + if (m1 == 15) { + /* Perform serialization */ + /* FIXME: perform checkpoint-synchronisation */ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); + } + return NO_EXIT; + } + disas_jcc(s, &c, m1); return help_branch(s, &c, is_imm, imm, o->in2); } @@ -1942,102 +1958,47 @@ static ExitStatus op_cps(DisasContext *s, DisasOps *o) static ExitStatus op_cs(DisasContext *s, DisasOps *o) { - /* FIXME: needs an atomic solution for CONFIG_USER_ONLY. */ int d2 = get_field(s->fields, d2); int b2 = get_field(s->fields, b2); - int is_64 = s->insn->data; - TCGv_i64 addr, mem, cc, z; + TCGv_i64 addr, cc; /* Note that in1 = R3 (new value) and in2 = (zero-extended) R1 (expected value). */ - /* Load the memory into the (temporary) output. While the PoO only talks - about moving the memory to R1 on inequality, if we include equality it - means that R1 is equal to the memory in all conditions. */ addr = get_address(s, 0, b2, d2); - if (is_64) { - tcg_gen_qemu_ld64(o->out, addr, get_mem_index(s)); - } else { - tcg_gen_qemu_ld32u(o->out, addr, get_mem_index(s)); - } + tcg_gen_atomic_cmpxchg_i64(o->out, addr, o->in2, o->in1, + get_mem_index(s), s->insn->data | MO_ALIGN); + tcg_temp_free_i64(addr); /* Are the memory and expected values (un)equal? Note that this setcond produces the output CC value, thus the NE sense of the test. */ cc = tcg_temp_new_i64(); tcg_gen_setcond_i64(TCG_COND_NE, cc, o->in2, o->out); - - /* If the memory and expected values are equal (CC==0), copy R3 to MEM. - Recall that we are allowed to unconditionally issue the store (and - thus any possible write trap), so (re-)store the original contents - of MEM in case of inequality. */ - z = tcg_const_i64(0); - mem = tcg_temp_new_i64(); - tcg_gen_movcond_i64(TCG_COND_EQ, mem, cc, z, o->in1, o->out); - if (is_64) { - tcg_gen_qemu_st64(mem, addr, get_mem_index(s)); - } else { - tcg_gen_qemu_st32(mem, addr, get_mem_index(s)); - } - tcg_temp_free_i64(z); - tcg_temp_free_i64(mem); - tcg_temp_free_i64(addr); - - /* Store CC back to cc_op. Wait until after the store so that any - exception gets the old cc_op value. */ tcg_gen_extrl_i64_i32(cc_op, cc); tcg_temp_free_i64(cc); set_cc_static(s); + return NO_EXIT; } static ExitStatus op_cdsg(DisasContext *s, DisasOps *o) { - /* FIXME: needs an atomic solution for CONFIG_USER_ONLY. */ int r1 = get_field(s->fields, r1); int r3 = get_field(s->fields, r3); int d2 = get_field(s->fields, d2); int b2 = get_field(s->fields, b2); - TCGv_i64 addrh, addrl, memh, meml, outh, outl, cc, z; + TCGv_i64 addr; + TCGv_i32 t_r1, t_r3; /* Note that R1:R1+1 = expected value and R3:R3+1 = new value. */ + addr = get_address(s, 0, b2, d2); + t_r1 = tcg_const_i32(r1); + t_r3 = tcg_const_i32(r3); + gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); + tcg_temp_free_i64(addr); + tcg_temp_free_i32(t_r1); + tcg_temp_free_i32(t_r3); - addrh = get_address(s, 0, b2, d2); - addrl = get_address(s, 0, b2, d2 + 8); - outh = tcg_temp_new_i64(); - outl = tcg_temp_new_i64(); - - tcg_gen_qemu_ld64(outh, addrh, get_mem_index(s)); - tcg_gen_qemu_ld64(outl, addrl, get_mem_index(s)); - - /* Fold the double-word compare with arithmetic. */ - cc = tcg_temp_new_i64(); - z = tcg_temp_new_i64(); - tcg_gen_xor_i64(cc, outh, regs[r1]); - tcg_gen_xor_i64(z, outl, regs[r1 + 1]); - tcg_gen_or_i64(cc, cc, z); - tcg_gen_movi_i64(z, 0); - tcg_gen_setcond_i64(TCG_COND_NE, cc, cc, z); - - memh = tcg_temp_new_i64(); - meml = tcg_temp_new_i64(); - tcg_gen_movcond_i64(TCG_COND_EQ, memh, cc, z, regs[r3], outh); - tcg_gen_movcond_i64(TCG_COND_EQ, meml, cc, z, regs[r3 + 1], outl); - tcg_temp_free_i64(z); - - tcg_gen_qemu_st64(memh, addrh, get_mem_index(s)); - tcg_gen_qemu_st64(meml, addrl, get_mem_index(s)); - tcg_temp_free_i64(memh); - tcg_temp_free_i64(meml); - tcg_temp_free_i64(addrh); - tcg_temp_free_i64(addrl); - - /* Save back state now that we've passed all exceptions. */ - tcg_gen_mov_i64(regs[r1], outh); - tcg_gen_mov_i64(regs[r1 + 1], outl); - tcg_gen_extrl_i64_i32(cc_op, cc); - tcg_temp_free_i64(outh); - tcg_temp_free_i64(outl); - tcg_temp_free_i64(cc); set_cc_static(s); return NO_EXIT; } @@ -2363,6 +2324,50 @@ static ExitStatus op_iske(DisasContext *s, DisasOps *o) } #endif +static ExitStatus op_laa(DisasContext *s, DisasOps *o) +{ + /* The real output is indeed the original value in memory; + recompute the addition for the computation of CC. */ + tcg_gen_atomic_fetch_add_i64(o->in2, o->in2, o->in1, get_mem_index(s), + s->insn->data | MO_ALIGN); + /* However, we need to recompute the addition for setting CC. */ + tcg_gen_add_i64(o->out, o->in1, o->in2); + return NO_EXIT; +} + +static ExitStatus op_lan(DisasContext *s, DisasOps *o) +{ + /* The real output is indeed the original value in memory; + recompute the addition for the computation of CC. */ + tcg_gen_atomic_fetch_and_i64(o->in2, o->in2, o->in1, get_mem_index(s), + s->insn->data | MO_ALIGN); + /* However, we need to recompute the operation for setting CC. */ + tcg_gen_and_i64(o->out, o->in1, o->in2); + return NO_EXIT; +} + +static ExitStatus op_lao(DisasContext *s, DisasOps *o) +{ + /* The real output is indeed the original value in memory; + recompute the addition for the computation of CC. */ + tcg_gen_atomic_fetch_or_i64(o->in2, o->in2, o->in1, get_mem_index(s), + s->insn->data | MO_ALIGN); + /* However, we need to recompute the operation for setting CC. */ + tcg_gen_or_i64(o->out, o->in1, o->in2); + return NO_EXIT; +} + +static ExitStatus op_lax(DisasContext *s, DisasOps *o) +{ + /* The real output is indeed the original value in memory; + recompute the addition for the computation of CC. */ + tcg_gen_atomic_fetch_xor_i64(o->in2, o->in2, o->in1, get_mem_index(s), + s->insn->data | MO_ALIGN); + /* However, we need to recompute the operation for setting CC. */ + tcg_gen_xor_i64(o->out, o->in1, o->in2); + return NO_EXIT; +} + static ExitStatus op_ldeb(DisasContext *s, DisasOps *o) { gen_helper_ldeb(o->out, cpu_env, o->in2); @@ -2558,6 +2563,7 @@ static ExitStatus op_lctlg(DisasContext *s, DisasOps *o) tcg_temp_free_i32(r3); return NO_EXIT; } + static ExitStatus op_lra(DisasContext *s, DisasOps *o) { check_privileged(s); @@ -2567,6 +2573,14 @@ static ExitStatus op_lra(DisasContext *s, DisasOps *o) return NO_EXIT; } +static ExitStatus op_lpp(DisasContext *s, DisasOps *o) +{ + check_privileged(s); + + tcg_gen_st_i64(o->in2, cpu_env, offsetof(CPUS390XState, pp)); + return NO_EXIT; +} + static ExitStatus op_lpsw(DisasContext *s, DisasOps *o) { TCGv_i64 t1, t2; @@ -2750,6 +2764,31 @@ static ExitStatus op_lm64(DisasContext *s, DisasOps *o) return NO_EXIT; } +static ExitStatus op_lpd(DisasContext *s, DisasOps *o) +{ + TCGv_i64 a1, a2; + TCGMemOp mop = s->insn->data; + + /* In a parallel context, stop the world and single step. */ + if (parallel_cpus) { + potential_page_fault(s); + gen_exception(EXCP_ATOMIC); + return EXIT_NORETURN; + } + + /* In a serial context, perform the two loads ... */ + a1 = get_address(s, 0, get_field(s->fields, b1), get_field(s->fields, d1)); + a2 = get_address(s, 0, get_field(s->fields, b2), get_field(s->fields, d2)); + tcg_gen_qemu_ld_i64(o->out, a1, get_mem_index(s), mop | MO_ALIGN); + tcg_gen_qemu_ld_i64(o->out2, a2, get_mem_index(s), mop | MO_ALIGN); + tcg_temp_free_i64(a1); + tcg_temp_free_i64(a2); + + /* ... and indicate that we performed them while interlocked. */ + gen_op_movi_cc(s, 0); + return NO_EXIT; +} + #ifndef CONFIG_USER_ONLY static ExitStatus op_lura(DisasContext *s, DisasOps *o) { @@ -3382,6 +3421,7 @@ static ExitStatus op_sigp(DisasContext *s, DisasOps *o) check_privileged(s); potential_page_fault(s); gen_helper_sigp(cc_op, cpu_env, o->in2, r1, o->in1); + set_cc_static(s); tcg_temp_free_i32(r1); return NO_EXIT; } @@ -3628,15 +3668,8 @@ static ExitStatus op_spt(DisasContext *s, DisasOps *o) static ExitStatus op_stfl(DisasContext *s, DisasOps *o) { - TCGv_i64 f, a; - /* We really ought to have more complete indication of facilities - that we implement. Address this when STFLE is implemented. */ check_privileged(s); - f = tcg_const_i64(0xc0000000); - a = tcg_const_i64(200); - tcg_gen_qemu_st32(f, a, get_mem_index(s)); - tcg_temp_free_i64(f); - tcg_temp_free_i64(a); + gen_helper_stfl(cpu_env); return NO_EXIT; } @@ -3802,6 +3835,14 @@ static ExitStatus op_sturg(DisasContext *s, DisasOps *o) } #endif +static ExitStatus op_stfle(DisasContext *s, DisasOps *o) +{ + potential_page_fault(s); + gen_helper_stfle(cc_op, cpu_env, o->in2); + set_cc_static(s); + return NO_EXIT; +} + static ExitStatus op_st8(DisasContext *s, DisasOps *o) { tcg_gen_qemu_st8(o->in1, o->in2, get_mem_index(s)); @@ -4420,6 +4461,22 @@ static void wout_r1_D32(DisasContext *s, DisasFields *f, DisasOps *o) } #define SPEC_wout_r1_D32 SPEC_r1_even +static void wout_r3_P32(DisasContext *s, DisasFields *f, DisasOps *o) +{ + int r3 = get_field(f, r3); + store_reg32_i64(r3, o->out); + store_reg32_i64(r3 + 1, o->out2); +} +#define SPEC_wout_r3_P32 SPEC_r3_even + +static void wout_r3_P64(DisasContext *s, DisasFields *f, DisasOps *o) +{ + int r3 = get_field(f, r3); + store_reg(r3, o->out); + store_reg(r3 + 1, o->out2); +} +#define SPEC_wout_r3_P64 SPEC_r3_even + static void wout_e1(DisasContext *s, DisasFields *f, DisasOps *o) { store_freg32_i64(get_field(f, r1), o->out); @@ -4486,21 +4543,17 @@ static void wout_m2_32(DisasContext *s, DisasFields *f, DisasOps *o) } #define SPEC_wout_m2_32 0 -static void wout_m2_32_r1_atomic(DisasContext *s, DisasFields *f, DisasOps *o) +static void wout_in2_r1(DisasContext *s, DisasFields *f, DisasOps *o) { - /* XXX release reservation */ - tcg_gen_qemu_st32(o->out, o->addr1, get_mem_index(s)); - store_reg32_i64(get_field(f, r1), o->in2); + store_reg(get_field(f, r1), o->in2); } -#define SPEC_wout_m2_32_r1_atomic 0 +#define SPEC_wout_in2_r1 0 -static void wout_m2_64_r1_atomic(DisasContext *s, DisasFields *f, DisasOps *o) +static void wout_in2_r1_32(DisasContext *s, DisasFields *f, DisasOps *o) { - /* XXX release reservation */ - tcg_gen_qemu_st64(o->out, o->addr1, get_mem_index(s)); - store_reg(get_field(f, r1), o->in2); + store_reg32_i64(get_field(f, r1), o->in2); } -#define SPEC_wout_m2_64_r1_atomic 0 +#define SPEC_wout_in2_r1_32 0 /* ====================================================================== */ /* The "INput 1" generators. These load the first operand to an insn. */ @@ -4944,24 +4997,6 @@ static void in2_mri2_64(DisasContext *s, DisasFields *f, DisasOps *o) } #define SPEC_in2_mri2_64 0 -static void in2_m2_32s_atomic(DisasContext *s, DisasFields *f, DisasOps *o) -{ - /* XXX should reserve the address */ - in1_la2(s, f, o); - o->in2 = tcg_temp_new_i64(); - tcg_gen_qemu_ld32s(o->in2, o->addr1, get_mem_index(s)); -} -#define SPEC_in2_m2_32s_atomic 0 - -static void in2_m2_64_atomic(DisasContext *s, DisasFields *f, DisasOps *o) -{ - /* XXX should reserve the address */ - in1_la2(s, f, o); - o->in2 = tcg_temp_new_i64(); - tcg_gen_qemu_ld64(o->in2, o->addr1, get_mem_index(s)); -} -#define SPEC_in2_m2_64_atomic 0 - static void in2_i2(DisasContext *s, DisasFields *f, DisasOps *o) { o->in2 = tcg_const_i64(get_field(f, i2)); diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index 9a481c35dc..9da7e1ed38 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -301,6 +301,7 @@ static void superh_cpu_class_init(ObjectClass *oc, void *data) #ifdef CONFIG_USER_ONLY cc->handle_mmu_fault = superh_cpu_handle_mmu_fault; #else + cc->do_unaligned_access = superh_cpu_do_unaligned_access; cc->get_phys_page_debug = superh_cpu_get_phys_page_debug; #endif cc->disas_set_info = superh_cpu_disas_set_info; diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index cad8989f7e..6c07c6b24b 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -24,6 +24,7 @@ #include "cpu-qom.h" #define TARGET_LONG_BITS 32 +#define ALIGNED_ONLY /* CPU Subtypes */ #define SH_CPU_SH7750 (1 << 0) @@ -92,14 +93,6 @@ #define DELAY_SLOT (1 << 0) #define DELAY_SLOT_CONDITIONAL (1 << 1) -#define DELAY_SLOT_TRUE (1 << 2) -#define DELAY_SLOT_CLEARME (1 << 3) -/* The dynamic value of the DELAY_SLOT_TRUE flag determines whether the jump - * after the delay slot should be taken or not. It is calculated from SR_T. - * - * It is unclear if it is permitted to modify the SR_T flag in a delay slot. - * The use of DELAY_SLOT_TRUE flag makes us accept such SR_T modification. - */ typedef struct tlb_t { uint32_t vpn; /* virtual page number */ @@ -149,7 +142,8 @@ typedef struct CPUSH4State { uint32_t sgr; /* saved global register 15 */ uint32_t dbr; /* debug base register */ uint32_t pc; /* program counter */ - uint32_t delayed_pc; /* target of delayed jump */ + uint32_t delayed_pc; /* target of delayed branch */ + uint32_t delayed_cond; /* condition of delayed branch */ uint32_t mach; /* multiply and accumulate high */ uint32_t macl; /* multiply and accumulate low */ uint32_t pr; /* procedure register */ @@ -222,6 +216,9 @@ void superh_cpu_dump_state(CPUState *cpu, FILE *f, hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); int superh_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); int superh_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr); void sh4_translate_init(void); SuperHCPU *cpu_sh4_init(const char *cpu_model); @@ -383,8 +380,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, { *pc = env->pc; *cs_base = 0; - *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL - | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */ + *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) /* Bits 0-1 */ | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */ | (env->sr & (1u << SR_FD)) /* Bit 15 */ diff --git a/target/sh4/helper.c b/target/sh4/helper.c index 036c5ca56c..8f8ce81401 100644 --- a/target/sh4/helper.c +++ b/target/sh4/helper.c @@ -168,10 +168,8 @@ void superh_cpu_do_interrupt(CPUState *cs) /* Branch instruction should be executed again before delay slot. */ env->spc -= 2; /* Clear flags for exception/interrupt routine. */ - env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL | DELAY_SLOT_TRUE); + env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); } - if (env->flags & DELAY_SLOT_CLEARME) - env->flags = 0; if (do_exp) { env->expevt = cs->exception_index; diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index 684d3f3758..528a40ac1d 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -24,6 +24,22 @@ #ifndef CONFIG_USER_ONLY +void superh_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + switch (access_type) { + case MMU_INST_FETCH: + case MMU_DATA_LOAD: + cs->exception_index = 0x0e0; + break; + case MMU_DATA_STORE: + cs->exception_index = 0x100; + break; + } + cpu_loop_exit_restore(cs, retaddr); +} + void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { @@ -32,10 +48,7 @@ void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type, ret = superh_cpu_handle_mmu_fault(cs, addr, access_type, mmu_idx); if (ret) { /* now we have a real cpu fault */ - if (retaddr) { - cpu_restore_state(cs, retaddr); - } - cpu_loop_exit(cs); + cpu_loop_exit_restore(cs, retaddr); } } @@ -59,10 +72,7 @@ static inline void QEMU_NORETURN raise_exception(CPUSH4State *env, int index, CPUState *cs = CPU(sh_env_get_cpu(env)); cs->exception_index = index; - if (retaddr) { - cpu_restore_state(cs, retaddr); - } - cpu_loop_exit(cs); + cpu_loop_exit_restore(cs, retaddr); } void helper_raise_illegal_instruction(CPUSH4State *env) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index c89a14733f..0bc2f9ff19 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -37,7 +37,8 @@ typedef struct DisasContext { struct TranslationBlock *tb; target_ulong pc; uint16_t opcode; - uint32_t flags; + uint32_t tbflags; /* should stay unmodified during the TB translation */ + uint32_t envflags; /* should stay in sync with env->flags using TCG ops */ int bstate; int memidx; uint32_t delayed_pc; @@ -49,7 +50,7 @@ typedef struct DisasContext { #if defined(CONFIG_USER_ONLY) #define IS_USER(ctx) 1 #else -#define IS_USER(ctx) (!(ctx->flags & (1u << SR_MD))) +#define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD))) #endif enum { @@ -71,7 +72,7 @@ static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst; static TCGv cpu_fregs[32]; /* internal register indexes */ -static TCGv cpu_flags, cpu_delayed_pc; +static TCGv cpu_flags, cpu_delayed_pc, cpu_delayed_cond; #include "exec/gen-icount.h" @@ -146,6 +147,10 @@ void sh4_translate_init(void) cpu_delayed_pc = tcg_global_mem_new_i32(cpu_env, offsetof(CPUSH4State, delayed_pc), "_delayed_pc_"); + cpu_delayed_cond = tcg_global_mem_new_i32(cpu_env, + offsetof(CPUSH4State, + delayed_cond), + "_delayed_cond_"); cpu_ldst = tcg_global_mem_new_i32(cpu_env, offsetof(CPUSH4State, ldst), "_ldst_"); @@ -199,12 +204,23 @@ static void gen_write_sr(TCGv src) { tcg_gen_andi_i32(cpu_sr, src, ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T))); - tcg_gen_shri_i32(cpu_sr_q, src, SR_Q); - tcg_gen_andi_i32(cpu_sr_q, cpu_sr_q, 1); - tcg_gen_shri_i32(cpu_sr_m, src, SR_M); - tcg_gen_andi_i32(cpu_sr_m, cpu_sr_m, 1); - tcg_gen_shri_i32(cpu_sr_t, src, SR_T); - tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1); + tcg_gen_extract_i32(cpu_sr_q, src, SR_Q, 1); + tcg_gen_extract_i32(cpu_sr_m, src, SR_M, 1); + tcg_gen_extract_i32(cpu_sr_t, src, SR_T, 1); +} + +static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc) +{ + if (save_pc) { + tcg_gen_movi_i32(cpu_pc, ctx->pc); + } + if (ctx->delayed_pc != (uint32_t) -1) { + tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc); + } + if ((ctx->tbflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) + != ctx->envflags) { + tcg_gen_movi_i32(cpu_flags, ctx->envflags); + } } static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest) @@ -241,6 +257,7 @@ static void gen_jump(DisasContext * ctx) /* Target is not statically known, it comes necessarily from a delayed jump as immediate jump are conditinal jumps */ tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc); + tcg_gen_discard_i32(cpu_delayed_pc); if (ctx->singlestep_enabled) gen_helper_debug(cpu_env); tcg_gen_exit_tb(0); @@ -249,24 +266,17 @@ static void gen_jump(DisasContext * ctx) } } -static inline void gen_branch_slot(uint32_t delayed_pc, int t) -{ - TCGLabel *label = gen_new_label(); - tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc); - tcg_gen_brcondi_i32(t ? TCG_COND_EQ : TCG_COND_NE, cpu_sr_t, 0, label); - tcg_gen_ori_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE); - gen_set_label(label); -} - /* Immediate conditional jump (bt or bf) */ static void gen_conditional_jump(DisasContext * ctx, target_ulong ift, target_ulong ifnott) { TCGLabel *l1 = gen_new_label(); + gen_save_cpu_state(ctx, false); tcg_gen_brcondi_i32(TCG_COND_NE, cpu_sr_t, 0, l1); gen_goto_tb(ctx, 0, ifnott); gen_set_label(l1); gen_goto_tb(ctx, 1, ift); + ctx->bstate = BS_BRANCH; } /* Delayed conditional jump (bt or bf) */ @@ -277,20 +287,14 @@ static void gen_delayed_conditional_jump(DisasContext * ctx) l1 = gen_new_label(); ds = tcg_temp_new(); - tcg_gen_andi_i32(ds, cpu_flags, DELAY_SLOT_TRUE); + tcg_gen_mov_i32(ds, cpu_delayed_cond); + tcg_gen_discard_i32(cpu_delayed_cond); tcg_gen_brcondi_i32(TCG_COND_NE, ds, 0, l1); gen_goto_tb(ctx, 1, ctx->pc + 2); gen_set_label(l1); - tcg_gen_andi_i32(cpu_flags, cpu_flags, ~DELAY_SLOT_TRUE); gen_jump(ctx); } -static inline void gen_store_flags(uint32_t flags) -{ - tcg_gen_andi_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE); - tcg_gen_ori_i32(cpu_flags, cpu_flags, flags); -} - static inline void gen_load_fpr64(TCGv_i64 t, int reg) { tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]); @@ -298,13 +302,7 @@ static inline void gen_load_fpr64(TCGv_i64 t, int reg) static inline void gen_store_fpr64 (TCGv_i64 t, int reg) { - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(tmp, t); - tcg_gen_mov_i32(cpu_fregs[reg + 1], tmp); - tcg_gen_shri_i64(t, t, 32); - tcg_gen_extrl_i64_i32(tmp, t); - tcg_gen_mov_i32(cpu_fregs[reg], tmp); - tcg_temp_free_i32(tmp); + tcg_gen_extr_i64_i32(cpu_fregs[reg + 1], cpu_fregs[reg], t); } #define B3_0 (ctx->opcode & 0xf) @@ -317,51 +315,50 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define B11_8 ((ctx->opcode >> 8) & 0xf) #define B15_12 ((ctx->opcode >> 12) & 0xf) -#define REG(x) ((x) < 8 && (ctx->flags & (1u << SR_MD))\ - && (ctx->flags & (1u << SR_RB))\ +#define REG(x) ((x) < 8 && (ctx->tbflags & (1u << SR_MD))\ + && (ctx->tbflags & (1u << SR_RB))\ ? (cpu_gregs[x + 16]) : (cpu_gregs[x])) -#define ALTREG(x) ((x) < 8 && (!(ctx->flags & (1u << SR_MD))\ - || !(ctx->flags & (1u << SR_RB)))\ +#define ALTREG(x) ((x) < 8 && (!(ctx->tbflags & (1u << SR_MD))\ + || !(ctx->tbflags & (1u << SR_RB)))\ ? (cpu_gregs[x + 16]) : (cpu_gregs[x])) -#define FREG(x) (ctx->flags & FPSCR_FR ? (x) ^ 0x10 : (x)) +#define FREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)) #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe)) -#define XREG(x) (ctx->flags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x)) +#define XREG(x) (ctx->tbflags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x)) #define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */ #define CHECK_NOT_DELAY_SLOT \ - if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) \ - { \ - tcg_gen_movi_i32(cpu_pc, ctx->pc); \ - gen_helper_raise_slot_illegal_instruction(cpu_env); \ - ctx->bstate = BS_BRANCH; \ - return; \ - } - -#define CHECK_PRIVILEGED \ - if (IS_USER(ctx)) { \ - tcg_gen_movi_i32(cpu_pc, ctx->pc); \ - if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ - gen_helper_raise_slot_illegal_instruction(cpu_env); \ - } else { \ - gen_helper_raise_illegal_instruction(cpu_env); \ - } \ - ctx->bstate = BS_BRANCH; \ - return; \ - } - -#define CHECK_FPU_ENABLED \ - if (ctx->flags & (1u << SR_FD)) { \ - tcg_gen_movi_i32(cpu_pc, ctx->pc); \ - if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ - gen_helper_raise_slot_fpu_disable(cpu_env); \ - } else { \ - gen_helper_raise_fpu_disable(cpu_env); \ - } \ - ctx->bstate = BS_BRANCH; \ - return; \ - } + if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ + gen_save_cpu_state(ctx, true); \ + gen_helper_raise_slot_illegal_instruction(cpu_env); \ + ctx->bstate = BS_EXCP; \ + return; \ + } + +#define CHECK_PRIVILEGED \ + if (IS_USER(ctx)) { \ + gen_save_cpu_state(ctx, true); \ + if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ + gen_helper_raise_slot_illegal_instruction(cpu_env); \ + } else { \ + gen_helper_raise_illegal_instruction(cpu_env); \ + } \ + ctx->bstate = BS_EXCP; \ + return; \ + } + +#define CHECK_FPU_ENABLED \ + if (ctx->tbflags & (1u << SR_FD)) { \ + gen_save_cpu_state(ctx, true); \ + if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ + gen_helper_raise_slot_fpu_disable(cpu_env); \ + } else { \ + gen_helper_raise_fpu_disable(cpu_env); \ + } \ + ctx->bstate = BS_EXCP; \ + return; \ + } static void _decode_opc(DisasContext * ctx) { @@ -409,7 +406,7 @@ static void _decode_opc(DisasContext * ctx) case 0x000b: /* rts */ CHECK_NOT_DELAY_SLOT tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr); - ctx->flags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT; ctx->delayed_pc = (uint32_t) - 1; return; case 0x0028: /* clrmac */ @@ -431,7 +428,7 @@ static void _decode_opc(DisasContext * ctx) CHECK_NOT_DELAY_SLOT gen_write_sr(cpu_ssr); tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc); - ctx->flags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT; ctx->delayed_pc = (uint32_t) - 1; return; case 0x0058: /* sets */ @@ -497,15 +494,13 @@ static void _decode_opc(DisasContext * ctx) case 0xa000: /* bra disp */ CHECK_NOT_DELAY_SLOT ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2; - tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc); - ctx->flags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT; return; case 0xb000: /* bsr disp */ CHECK_NOT_DELAY_SLOT tcg_gen_movi_i32(cpu_pr, ctx->pc + 4); ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2; - tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc); - ctx->flags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT; return; } @@ -939,7 +934,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED - if (ctx->flags & FPSCR_SZ) { + if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); gen_load_fpr64(fp, XREG(B7_4)); gen_store_fpr64(fp, XREG(B11_8)); @@ -950,7 +945,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED - if (ctx->flags & FPSCR_SZ) { + if (ctx->tbflags & FPSCR_SZ) { TCGv addr_hi = tcg_temp_new(); int fr = XREG(B7_4); tcg_gen_addi_i32(addr_hi, REG(B11_8), 4); @@ -966,7 +961,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED - if (ctx->flags & FPSCR_SZ) { + if (ctx->tbflags & FPSCR_SZ) { TCGv addr_hi = tcg_temp_new(); int fr = XREG(B11_8); tcg_gen_addi_i32(addr_hi, REG(B7_4), 4); @@ -980,7 +975,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED - if (ctx->flags & FPSCR_SZ) { + if (ctx->tbflags & FPSCR_SZ) { TCGv addr_hi = tcg_temp_new(); int fr = XREG(B11_8); tcg_gen_addi_i32(addr_hi, REG(B7_4), 4); @@ -998,7 +993,7 @@ static void _decode_opc(DisasContext * ctx) CHECK_FPU_ENABLED TCGv addr = tcg_temp_new_i32(); tcg_gen_subi_i32(addr, REG(B11_8), 4); - if (ctx->flags & FPSCR_SZ) { + if (ctx->tbflags & FPSCR_SZ) { int fr = XREG(B7_4); tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr, ctx->memidx, MO_TEUL); tcg_gen_subi_i32(addr, addr, 4); @@ -1015,7 +1010,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv addr = tcg_temp_new_i32(); tcg_gen_add_i32(addr, REG(B7_4), REG(0)); - if (ctx->flags & FPSCR_SZ) { + if (ctx->tbflags & FPSCR_SZ) { int fr = XREG(B11_8); tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr, ctx->memidx, MO_TEUL); @@ -1034,7 +1029,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv addr = tcg_temp_new(); tcg_gen_add_i32(addr, REG(B11_8), REG(0)); - if (ctx->flags & FPSCR_SZ) { + if (ctx->tbflags & FPSCR_SZ) { int fr = XREG(B7_4); tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr, ctx->memidx, MO_TEUL); @@ -1056,7 +1051,7 @@ static void _decode_opc(DisasContext * ctx) case 0xf005: /* fcmp/gt Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */ { CHECK_FPU_ENABLED - if (ctx->flags & FPSCR_PR) { + if (ctx->tbflags & FPSCR_PR) { TCGv_i64 fp0, fp1; if (ctx->opcode & 0x0110) @@ -1125,7 +1120,7 @@ static void _decode_opc(DisasContext * ctx) case 0xf00e: /* fmac FR0,RM,Rn */ { CHECK_FPU_ENABLED - if (ctx->flags & FPSCR_PR) { + if (ctx->tbflags & FPSCR_PR) { break; /* illegal instruction */ } else { gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env, @@ -1155,25 +1150,23 @@ static void _decode_opc(DisasContext * ctx) return; case 0x8b00: /* bf label */ CHECK_NOT_DELAY_SLOT - gen_conditional_jump(ctx, ctx->pc + 2, - ctx->pc + 4 + B7_0s * 2); - ctx->bstate = BS_BRANCH; + gen_conditional_jump(ctx, ctx->pc + 2, ctx->pc + 4 + B7_0s * 2); return; case 0x8f00: /* bf/s label */ CHECK_NOT_DELAY_SLOT - gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 0); - ctx->flags |= DELAY_SLOT_CONDITIONAL; + tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1); + ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2; + ctx->envflags |= DELAY_SLOT_CONDITIONAL; return; case 0x8900: /* bt label */ CHECK_NOT_DELAY_SLOT - gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2, - ctx->pc + 2); - ctx->bstate = BS_BRANCH; + gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2, ctx->pc + 2); return; case 0x8d00: /* bt/s label */ CHECK_NOT_DELAY_SLOT - gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 1); - ctx->flags |= DELAY_SLOT_CONDITIONAL; + tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t); + ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2; + ctx->envflags |= DELAY_SLOT_CONDITIONAL; return; case 0x8800: /* cmp/eq #imm,R0 */ tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s); @@ -1281,11 +1274,11 @@ static void _decode_opc(DisasContext * ctx) { TCGv imm; CHECK_NOT_DELAY_SLOT - tcg_gen_movi_i32(cpu_pc, ctx->pc); + gen_save_cpu_state(ctx, true); imm = tcg_const_i32(B7_0); gen_helper_trapa(cpu_env, imm); tcg_temp_free(imm); - ctx->bstate = BS_BRANCH; + ctx->bstate = BS_EXCP; } return; case 0xc800: /* tst #imm,R0 */ @@ -1354,14 +1347,14 @@ static void _decode_opc(DisasContext * ctx) case 0x0023: /* braf Rn */ CHECK_NOT_DELAY_SLOT tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->pc + 4); - ctx->flags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT; ctx->delayed_pc = (uint32_t) - 1; return; case 0x0003: /* bsrf Rn */ CHECK_NOT_DELAY_SLOT tcg_gen_movi_i32(cpu_pr, ctx->pc + 4); tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr); - ctx->flags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT; ctx->delayed_pc = (uint32_t) - 1; return; case 0x4015: /* cmp/pl Rn */ @@ -1377,14 +1370,14 @@ static void _decode_opc(DisasContext * ctx) case 0x402b: /* jmp @Rn */ CHECK_NOT_DELAY_SLOT tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8)); - ctx->flags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT; ctx->delayed_pc = (uint32_t) - 1; return; case 0x400b: /* jsr @Rn */ CHECK_NOT_DELAY_SLOT tcg_gen_movi_i32(cpu_pr, ctx->pc + 4); tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8)); - ctx->flags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT; ctx->delayed_pc = (uint32_t) - 1; return; case 0x400e: /* ldc Rm,SR */ @@ -1508,17 +1501,23 @@ static void _decode_opc(DisasContext * ctx) } ctx->has_movcal = 1; return; - case 0x40a9: - /* MOVUA.L @Rm,R0 (Rm) -> R0 - Load non-boundary-aligned data */ - tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL); - return; - case 0x40e9: - /* MOVUA.L @Rm+,R0 (Rm) -> R0, Rm + 4 -> Rm - Load non-boundary-aligned data */ - tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL); - tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); - return; + case 0x40a9: /* movua.l @Rm,R0 */ + /* Load non-boundary-aligned data */ + if (ctx->features & SH_FEATURE_SH4A) { + tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TEUL | MO_UNALN); + return; + } + break; + case 0x40e9: /* movua.l @Rm+,R0 */ + /* Load non-boundary-aligned data */ + if (ctx->features & SH_FEATURE_SH4A) { + tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TEUL | MO_UNALN); + tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); + return; + } + break; case 0x0029: /* movt Rn */ tcg_gen_mov_i32(REG(B11_8), cpu_sr_t); return; @@ -1576,10 +1575,11 @@ static void _decode_opc(DisasContext * ctx) else break; case 0x00ab: /* synco */ - if (ctx->features & SH_FEATURE_SH4A) - return; - else - break; + if (ctx->features & SH_FEATURE_SH4A) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); + return; + } + break; case 0x4024: /* rotcl Rn */ { TCGv tmp = tcg_temp_new(); @@ -1640,19 +1640,14 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 16); return; case 0x401b: /* tas.b @Rn */ - { - TCGv addr, val; - addr = tcg_temp_local_new(); - tcg_gen_mov_i32(addr, REG(B11_8)); - val = tcg_temp_local_new(); - tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB); + { + TCGv val = tcg_const_i32(0x80); + tcg_gen_atomic_fetch_or_i32(val, REG(B11_8), val, + ctx->memidx, MO_UB); tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0); - tcg_gen_ori_i32(val, val, 0x80); - tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB); - tcg_temp_free(val); - tcg_temp_free(addr); - } - return; + tcg_temp_free(val); + } + return; case 0xf00d: /* fsts FPUL,FRn - FPSCR: Nothing */ CHECK_FPU_ENABLED tcg_gen_mov_i32(cpu_fregs[FREG(B11_8)], cpu_fpul); @@ -1663,7 +1658,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */ CHECK_FPU_ENABLED - if (ctx->flags & FPSCR_PR) { + if (ctx->tbflags & FPSCR_PR) { TCGv_i64 fp; if (ctx->opcode & 0x0100) break; /* illegal instruction */ @@ -1678,7 +1673,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */ CHECK_FPU_ENABLED - if (ctx->flags & FPSCR_PR) { + if (ctx->tbflags & FPSCR_PR) { TCGv_i64 fp; if (ctx->opcode & 0x0100) break; /* illegal instruction */ @@ -1699,7 +1694,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf05d: /* fabs FRn/DRn */ CHECK_FPU_ENABLED - if (ctx->flags & FPSCR_PR) { + if (ctx->tbflags & FPSCR_PR) { if (ctx->opcode & 0x0100) break; /* illegal instruction */ TCGv_i64 fp = tcg_temp_new_i64(); @@ -1713,7 +1708,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf06d: /* fsqrt FRn */ CHECK_FPU_ENABLED - if (ctx->flags & FPSCR_PR) { + if (ctx->tbflags & FPSCR_PR) { if (ctx->opcode & 0x0100) break; /* illegal instruction */ TCGv_i64 fp = tcg_temp_new_i64(); @@ -1731,13 +1726,13 @@ static void _decode_opc(DisasContext * ctx) break; case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */ CHECK_FPU_ENABLED - if (!(ctx->flags & FPSCR_PR)) { + if (!(ctx->tbflags & FPSCR_PR)) { tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0); } return; case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */ CHECK_FPU_ENABLED - if (!(ctx->flags & FPSCR_PR)) { + if (!(ctx->tbflags & FPSCR_PR)) { tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0x3f800000); } return; @@ -1761,7 +1756,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf0ed: /* fipr FVm,FVn */ CHECK_FPU_ENABLED - if ((ctx->flags & FPSCR_PR) == 0) { + if ((ctx->tbflags & FPSCR_PR) == 0) { TCGv m, n; m = tcg_const_i32((ctx->opcode >> 8) & 3); n = tcg_const_i32((ctx->opcode >> 10) & 3); @@ -1774,7 +1769,7 @@ static void _decode_opc(DisasContext * ctx) case 0xf0fd: /* ftrv XMTRX,FVn */ CHECK_FPU_ENABLED if ((ctx->opcode & 0x0300) == 0x0100 && - (ctx->flags & FPSCR_PR) == 0) { + (ctx->tbflags & FPSCR_PR) == 0) { TCGv n; n = tcg_const_i32((ctx->opcode >> 10) & 3); gen_helper_ftrv(cpu_env, n); @@ -1788,31 +1783,25 @@ static void _decode_opc(DisasContext * ctx) ctx->opcode, ctx->pc); fflush(stderr); #endif - tcg_gen_movi_i32(cpu_pc, ctx->pc); - if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { + gen_save_cpu_state(ctx, true); + if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { gen_helper_raise_slot_illegal_instruction(cpu_env); } else { gen_helper_raise_illegal_instruction(cpu_env); } - ctx->bstate = BS_BRANCH; + ctx->bstate = BS_EXCP; } static void decode_opc(DisasContext * ctx) { - uint32_t old_flags = ctx->flags; + uint32_t old_flags = ctx->envflags; _decode_opc(ctx); if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { - if (ctx->flags & DELAY_SLOT_CLEARME) { - gen_store_flags(0); - } else { - /* go out of the delay slot */ - uint32_t new_flags = ctx->flags; - new_flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); - gen_store_flags(new_flags); - } - ctx->flags = 0; + /* go out of the delay slot */ + ctx->envflags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); + tcg_gen_movi_i32(cpu_flags, ctx->envflags); ctx->bstate = BS_BRANCH; if (old_flags & DELAY_SLOT_CONDITIONAL) { gen_delayed_conditional_jump(ctx); @@ -1821,10 +1810,6 @@ static void decode_opc(DisasContext * ctx) } } - - /* go into a delay slot */ - if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) - gen_store_flags(ctx->flags); } void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) @@ -1838,16 +1823,17 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) pc_start = tb->pc; ctx.pc = pc_start; - ctx.flags = (uint32_t)tb->flags; + ctx.tbflags = (uint32_t)tb->flags; + ctx.envflags = tb->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL); ctx.bstate = BS_NONE; - ctx.memidx = (ctx.flags & (1u << SR_MD)) == 0 ? 1 : 0; + ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0; /* We don't know if the delayed pc came from a dynamic or static branch, so assume it is a dynamic branch. */ ctx.delayed_pc = -1; /* use delayed pc from env pointer */ ctx.tb = tb; ctx.singlestep_enabled = cs->singlestep_enabled; ctx.features = env->features; - ctx.has_movcal = (ctx.flags & TB_FLAG_PENDING_MOVCA); + ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA); num_insns = 0; max_insns = tb->cflags & CF_COUNT_MASK; @@ -1860,14 +1846,14 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) gen_tb_start(tb); while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) { - tcg_gen_insn_start(ctx.pc, ctx.flags); + tcg_gen_insn_start(ctx.pc, ctx.envflags); num_insns++; if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) { /* We have hit a breakpoint - make sure PC is up-to-date */ - tcg_gen_movi_i32(cpu_pc, ctx.pc); + gen_save_cpu_state(&ctx, true); gen_helper_debug(cpu_env); - ctx.bstate = BS_BRANCH; + ctx.bstate = BS_EXCP; /* The address covered by the breakpoint must be included in [tb->pc, tb->pc + tb->size) in order to for it to be properly cleared -- thus we increment the PC here so that @@ -1896,23 +1882,20 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) if (tb->cflags & CF_LAST_IO) gen_io_end(); if (cs->singlestep_enabled) { - tcg_gen_movi_i32(cpu_pc, ctx.pc); + gen_save_cpu_state(&ctx, true); gen_helper_debug(cpu_env); } else { switch (ctx.bstate) { case BS_STOP: - /* gen_op_interrupt_restart(); */ - /* fall through */ + gen_save_cpu_state(&ctx, true); + tcg_gen_exit_tb(0); + break; case BS_NONE: - if (ctx.flags) { - gen_store_flags(ctx.flags | DELAY_SLOT_CLEARME); - } + gen_save_cpu_state(&ctx, false); gen_goto_tb(&ctx, 0, ctx.pc); break; case BS_EXCP: - /* gen_op_interrupt_restart(); */ - tcg_gen_exit_tb(0); - break; + /* fall through */ case BS_BRANCH: default: break; @@ -1941,4 +1924,7 @@ void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb, { env->pc = data[0]; env->flags = data[1]; + /* Theoretically delayed_pc should also be restored. In practice the + branch instruction is re-executed after exception, so the delayed + branch target will be recomputed. */ } diff --git a/tests/.gitignore b/tests/.gitignore index a966740c2c..40c2e3e757 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -11,6 +11,8 @@ check-qom-proplist qht-bench rcutorture test-aio +test-aio-multithread +test-arm-mptimer test-base64 test-bitops test-bitcnt @@ -24,6 +26,7 @@ test-crypto-afsplit test-crypto-block test-crypto-cipher test-crypto-hash +test-crypto-hmac test-crypto-ivgen test-crypto-pbkdf test-crypto-secret @@ -37,6 +40,7 @@ test-crypto-tlssession-server/ test-crypto-xts test-cutils test-hbitmap +test-hmp test-int128 test-iov test-io-channel-buffer diff --git a/tests/Makefile.include b/tests/Makefile.include index 31931c0d77..16ff8f399f 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -260,6 +260,7 @@ check-qtest-i386-y += tests/test-filter-mirror$(EXESUF) check-qtest-i386-y += tests/test-filter-redirector$(EXESUF) check-qtest-i386-y += tests/postcopy-test$(EXESUF) check-qtest-i386-y += tests/test-x86-cpuid-compat$(EXESUF) +check-qtest-i386-y += tests/numa-test$(EXESUF) check-qtest-x86_64-y += $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) @@ -300,6 +301,7 @@ check-qtest-ppc64-y += tests/test-netfilter$(EXESUF) check-qtest-ppc64-y += tests/test-filter-mirror$(EXESUF) check-qtest-ppc64-y += tests/test-filter-redirector$(EXESUF) check-qtest-ppc64-y += tests/display-vga-test$(EXESUF) +check-qtest-ppc64-y += tests/numa-test$(EXESUF) check-qtest-ppc64-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF) check-qtest-sh4-y = tests/endianness-test$(EXESUF) @@ -324,6 +326,8 @@ gcov-files-arm-y += arm-softmmu/hw/block/virtio-blk.c check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF) gcov-files-arm-y += hw/timer/arm_mptimer.c +check-qtest-aarch64-y = tests/numa-test$(EXESUF) + check-qtest-microblazeel-y = $(check-qtest-microblaze-y) check-qtest-xtensaeb-y = $(check-qtest-xtensa-y) @@ -753,6 +757,7 @@ tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-use tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y) tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o tests/test-qapi-util$(EXESUF): tests/test-qapi-util.o $(test-util-obj-y) +tests/numa-test$(EXESUF): tests/numa-test.o tests/migration/stress$(EXESUF): tests/migration/stress.o $(call quiet-command, $(LINKPROG) -static -O3 $(PTHREAD_LIB) -o $@ $< ,"LINK","$(TARGET_DIR)$@") diff --git a/tests/acpi-test-data/pc/SLIT.cphp b/tests/acpi-test-data/pc/SLIT.cphp Binary files differnew file mode 100644 index 0000000000..74ec3b4b46 --- /dev/null +++ b/tests/acpi-test-data/pc/SLIT.cphp diff --git a/tests/acpi-test-data/pc/SLIT.memhp b/tests/acpi-test-data/pc/SLIT.memhp Binary files differnew file mode 100644 index 0000000000..74ec3b4b46 --- /dev/null +++ b/tests/acpi-test-data/pc/SLIT.memhp diff --git a/tests/acpi-test-data/pc/SRAT.memhp b/tests/acpi-test-data/pc/SRAT.memhp Binary files differindex 66ce9a8981..a7dddf7760 100644 --- a/tests/acpi-test-data/pc/SRAT.memhp +++ b/tests/acpi-test-data/pc/SRAT.memhp diff --git a/tests/acpi-test-data/q35/SLIT.cphp b/tests/acpi-test-data/q35/SLIT.cphp Binary files differnew file mode 100644 index 0000000000..74ec3b4b46 --- /dev/null +++ b/tests/acpi-test-data/q35/SLIT.cphp diff --git a/tests/acpi-test-data/q35/SLIT.memhp b/tests/acpi-test-data/q35/SLIT.memhp Binary files differnew file mode 100644 index 0000000000..74ec3b4b46 --- /dev/null +++ b/tests/acpi-test-data/q35/SLIT.memhp diff --git a/tests/acpi-test-data/q35/SRAT.memhp b/tests/acpi-test-data/q35/SRAT.memhp Binary files differindex 66ce9a8981..a7dddf7760 100644 --- a/tests/acpi-test-data/q35/SRAT.memhp +++ b/tests/acpi-test-data/q35/SRAT.memhp diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index bdef3b9cee..63da978f0b 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -723,7 +723,8 @@ static void test_acpi_piix4_tcg_cphp(void) data.machine = MACHINE_PC; data.variant = ".cphp"; test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6" - " -numa node -numa node", + " -numa node -numa node" + " -numa dist,src=0,dst=1,val=21", &data); free_test_data(&data); } @@ -736,7 +737,8 @@ static void test_acpi_q35_tcg_cphp(void) data.machine = MACHINE_Q35; data.variant = ".cphp"; test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6" - " -numa node -numa node", + " -numa node -numa node" + " -numa dist,src=0,dst=1,val=21", &data); free_test_data(&data); } @@ -785,7 +787,10 @@ static void test_acpi_q35_tcg_memhp(void) memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".memhp"; - test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", &data); + test_acpi_one(" -m 128,slots=3,maxmem=1G" + " -numa node -numa node" + " -numa dist,src=0,dst=1,val=21", + &data); free_test_data(&data); } @@ -796,7 +801,10 @@ static void test_acpi_piix4_tcg_memhp(void) memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".memhp"; - test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", &data); + test_acpi_one(" -m 128,slots=3,maxmem=1G" + " -numa node -numa node" + " -numa dist,src=0,dst=1,val=21", + &data); free_test_data(&data); } diff --git a/tests/drive_del-test.c b/tests/drive_del-test.c index 121b9c917e..2175139abb 100644 --- a/tests/drive_del-test.c +++ b/tests/drive_del-test.c @@ -92,7 +92,7 @@ static void test_after_failed_device_add(void) static void test_drive_del_device_del(void) { /* Start with a drive used by a device that unplugs instantaneously */ - qtest_start("-drive if=none,id=drive0,file=/dev/null,format=raw" + qtest_start("-drive if=none,id=drive0,file=null-co://,format=raw" " -device virtio-scsi-pci" " -device scsi-hd,drive=drive0,id=dev0"); diff --git a/tests/numa-test.c b/tests/numa-test.c new file mode 100644 index 0000000000..c3475d6d5e --- /dev/null +++ b/tests/numa-test.c @@ -0,0 +1,302 @@ +/* + * NUMA configuration test cases + * + * Copyright (c) 2017 Red Hat Inc. + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "libqtest.h" + +static char *make_cli(const char *generic_cli, const char *test_cli) +{ + return g_strdup_printf("%s %s", generic_cli ? generic_cli : "", test_cli); +} + +static char *hmp_info_numa(void) +{ + QDict *resp; + char *s; + + resp = qmp("{ 'execute': 'human-monitor-command', 'arguments': " + "{ 'command-line': 'info numa '} }"); + g_assert(resp); + g_assert(qdict_haskey(resp, "return")); + s = g_strdup(qdict_get_str(resp, "return")); + g_assert(s); + QDECREF(resp); + return s; +} + +static void test_mon_explicit(const void *data) +{ + char *s; + char *cli; + + cli = make_cli(data, "-smp 8 " + "-numa node,nodeid=0,cpus=0-3 " + "-numa node,nodeid=1,cpus=4-7 "); + qtest_start(cli); + + s = hmp_info_numa(); + g_assert(strstr(s, "node 0 cpus: 0 1 2 3")); + g_assert(strstr(s, "node 1 cpus: 4 5 6 7")); + g_free(s); + + qtest_end(); + g_free(cli); +} + +static void test_mon_default(const void *data) +{ + char *s; + char *cli; + + cli = make_cli(data, "-smp 8 -numa node -numa node"); + qtest_start(cli); + + s = hmp_info_numa(); + g_assert(strstr(s, "node 0 cpus: 0 2 4 6")); + g_assert(strstr(s, "node 1 cpus: 1 3 5 7")); + g_free(s); + + qtest_end(); + g_free(cli); +} + +static void test_mon_partial(const void *data) +{ + char *s; + char *cli; + + cli = make_cli(data, "-smp 8 " + "-numa node,nodeid=0,cpus=0-1 " + "-numa node,nodeid=1,cpus=4-5 "); + qtest_start(cli); + + s = hmp_info_numa(); + g_assert(strstr(s, "node 0 cpus: 0 1 2 3 6 7")); + g_assert(strstr(s, "node 1 cpus: 4 5")); + g_free(s); + + qtest_end(); + g_free(cli); +} + +static QList *get_cpus(QDict **resp) +{ + *resp = qmp("{ 'execute': 'query-cpus' }"); + g_assert(*resp); + g_assert(qdict_haskey(*resp, "return")); + return qdict_get_qlist(*resp, "return"); +} + +static void test_query_cpus(const void *data) +{ + char *cli; + QDict *resp; + QList *cpus; + const QObject *e; + + cli = make_cli(data, "-smp 8 -numa node,cpus=0-3 -numa node,cpus=4-7"); + qtest_start(cli); + cpus = get_cpus(&resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; + int64_t cpu_idx, node; + + cpu = qobject_to_qdict(e); + g_assert(qdict_haskey(cpu, "CPU")); + g_assert(qdict_haskey(cpu, "props")); + + cpu_idx = qdict_get_int(cpu, "CPU"); + props = qdict_get_qdict(cpu, "props"); + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); + if (cpu_idx >= 0 && cpu_idx < 4) { + g_assert_cmpint(node, ==, 0); + } else { + g_assert_cmpint(node, ==, 1); + } + } + + QDECREF(resp); + qtest_end(); + g_free(cli); +} + +static void pc_numa_cpu(const void *data) +{ + char *cli; + QDict *resp; + QList *cpus; + const QObject *e; + + cli = make_cli(data, "-cpu pentium -smp 8,sockets=2,cores=2,threads=2 " + "-numa node,nodeid=0 -numa node,nodeid=1 " + "-numa cpu,node-id=1,socket-id=0 " + "-numa cpu,node-id=0,socket-id=1,core-id=0 " + "-numa cpu,node-id=0,socket-id=1,core-id=1,thread-id=0 " + "-numa cpu,node-id=1,socket-id=1,core-id=1,thread-id=1"); + qtest_start(cli); + cpus = get_cpus(&resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; + int64_t socket, core, thread, node; + + cpu = qobject_to_qdict(e); + g_assert(qdict_haskey(cpu, "props")); + props = qdict_get_qdict(cpu, "props"); + + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); + g_assert(qdict_haskey(props, "socket-id")); + socket = qdict_get_int(props, "socket-id"); + g_assert(qdict_haskey(props, "core-id")); + core = qdict_get_int(props, "core-id"); + g_assert(qdict_haskey(props, "thread-id")); + thread = qdict_get_int(props, "thread-id"); + + if (socket == 0) { + g_assert_cmpint(node, ==, 1); + } else if (socket == 1 && core == 0) { + g_assert_cmpint(node, ==, 0); + } else if (socket == 1 && core == 1 && thread == 0) { + g_assert_cmpint(node, ==, 0); + } else if (socket == 1 && core == 1 && thread == 1) { + g_assert_cmpint(node, ==, 1); + } else { + g_assert(false); + } + } + + QDECREF(resp); + qtest_end(); + g_free(cli); +} + +static void spapr_numa_cpu(const void *data) +{ + char *cli; + QDict *resp; + QList *cpus; + const QObject *e; + + cli = make_cli(data, "-smp 4,cores=4 " + "-numa node,nodeid=0 -numa node,nodeid=1 " + "-numa cpu,node-id=0,core-id=0 " + "-numa cpu,node-id=0,core-id=1 " + "-numa cpu,node-id=0,core-id=2 " + "-numa cpu,node-id=1,core-id=3"); + qtest_start(cli); + cpus = get_cpus(&resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; + int64_t core, node; + + cpu = qobject_to_qdict(e); + g_assert(qdict_haskey(cpu, "props")); + props = qdict_get_qdict(cpu, "props"); + + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); + g_assert(qdict_haskey(props, "core-id")); + core = qdict_get_int(props, "core-id"); + + if (core >= 0 && core < 3) { + g_assert_cmpint(node, ==, 0); + } else if (core == 3) { + g_assert_cmpint(node, ==, 1); + } else { + g_assert(false); + } + } + + QDECREF(resp); + qtest_end(); + g_free(cli); +} + +static void aarch64_numa_cpu(const void *data) +{ + char *cli; + QDict *resp; + QList *cpus; + const QObject *e; + + cli = make_cli(data, "-smp 2 " + "-numa node,nodeid=0 -numa node,nodeid=1 " + "-numa cpu,node-id=1,thread-id=0 " + "-numa cpu,node-id=0,thread-id=1"); + qtest_start(cli); + cpus = get_cpus(&resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; + int64_t thread, node; + + cpu = qobject_to_qdict(e); + g_assert(qdict_haskey(cpu, "props")); + props = qdict_get_qdict(cpu, "props"); + + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); + g_assert(qdict_haskey(props, "thread-id")); + thread = qdict_get_int(props, "thread-id"); + + if (thread == 0) { + g_assert_cmpint(node, ==, 1); + } else if (thread == 1) { + g_assert_cmpint(node, ==, 0); + } else { + g_assert(false); + } + } + + QDECREF(resp); + qtest_end(); + g_free(cli); +} + +int main(int argc, char **argv) +{ + const char *args = NULL; + const char *arch = qtest_get_arch(); + + if (strcmp(arch, "aarch64") == 0) { + args = "-machine virt"; + } + + g_test_init(&argc, &argv, NULL); + + qtest_add_data_func("/numa/mon/default", args, test_mon_default); + qtest_add_data_func("/numa/mon/cpus/explicit", args, test_mon_explicit); + qtest_add_data_func("/numa/mon/cpus/partial", args, test_mon_partial); + qtest_add_data_func("/numa/qmp/cpus/query-cpus", args, test_query_cpus); + + if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) { + qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu); + } + + if (!strcmp(arch, "ppc64")) { + qtest_add_data_func("/numa/spapr/cpu/explicit", args, spapr_numa_cpu); + } + + if (!strcmp(arch, "aarch64")) { + qtest_add_data_func("/numa/aarch64/cpu/explicit", args, + aarch64_numa_cpu); + } + + return g_test_run(); +} diff --git a/tests/nvme-test.c b/tests/nvme-test.c index c8bece4434..7674a446e4 100644 --- a/tests/nvme-test.c +++ b/tests/nvme-test.c @@ -22,7 +22,7 @@ int main(int argc, char **argv) g_test_init(&argc, &argv, NULL); qtest_add_func("/nvme/nop", nop); - qtest_start("-drive id=drv0,if=none,file=/dev/null,format=raw " + qtest_start("-drive id=drv0,if=none,file=null-co://,format=raw " "-device nvme,drive=drv0,serial=foo"); ret = g_test_run(); diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c index de35a18903..e86f87656a 100644 --- a/tests/postcopy-test.c +++ b/tests/postcopy-test.c @@ -41,7 +41,7 @@ static bool ufd_version_check(void) struct uffdio_api api_struct; uint64_t ioctl_mask; - int ufd = ufd = syscall(__NR_userfaultfd, O_CLOEXEC); + int ufd = syscall(__NR_userfaultfd, O_CLOEXEC); if (ufd == -1) { g_test_message("Skipping test: userfaultfd not available"); diff --git a/tests/qemu-iotests/019.out b/tests/qemu-iotests/019.out index 0124264975..17a7c036b9 100644 --- a/tests/qemu-iotests/019.out +++ b/tests/qemu-iotests/019.out @@ -542,8 +542,8 @@ Testing conversion with -B TEST_DIR/t.IMGFMT.base Checking if backing clusters are allocated when they shouldn't -0/128 sectors allocated at offset 1 MiB -0/128 sectors allocated at offset 4.001 GiB +0/65536 bytes allocated at offset 1 MiB +0/65536 bytes allocated at offset 4.001 GiB Reading === IO: pattern 42 @@ -1086,8 +1086,8 @@ Testing conversion with -o backing_file=TEST_DIR/t.IMGFMT.base Checking if backing clusters are allocated when they shouldn't -0/128 sectors allocated at offset 1 MiB -0/128 sectors allocated at offset 4.001 GiB +0/65536 bytes allocated at offset 1 MiB +0/65536 bytes allocated at offset 4.001 GiB Reading === IO: pattern 42 diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 0d472d5f27..e00c11b804 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -63,8 +63,8 @@ class TestSingleDrive(iotests.QMPTestCase): def test_stream_intermediate(self): self.assert_no_active_block_jobs() - self.assertNotEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img), - qemu_io('-f', iotests.imgfmt, '-c', 'map', mid_img), + self.assertNotEqual(qemu_io('-f', 'raw', '-rU', '-c', 'map', backing_img), + qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', mid_img), 'image file map matches backing file before streaming') result = self.vm.qmp('block-stream', device='mid', job_id='stream-mid') @@ -114,7 +114,7 @@ class TestSingleDrive(iotests.QMPTestCase): self.assert_no_active_block_jobs() # The image map is empty before the operation - empty_map = qemu_io('-f', iotests.imgfmt, '-c', 'map', test_img) + empty_map = qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', test_img) # This is a no-op: no data should ever be copied from the base image result = self.vm.qmp('block-stream', device='drive0', base=mid_img) @@ -197,8 +197,8 @@ class TestParallelOps(iotests.QMPTestCase): # Check that the maps don't match before the streaming operations for i in range(2, self.num_imgs, 2): - self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i]), - qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i-1]), + self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[i]), + qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[i-1]), 'image file map matches backing file before streaming') # Create all streaming jobs @@ -351,8 +351,8 @@ class TestParallelOps(iotests.QMPTestCase): def test_stream_base_node_name(self): self.assert_no_active_block_jobs() - self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[4]), - qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[3]), + self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[4]), + qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[3]), 'image file map matches backing file before streaming') # Error: the base node does not exist @@ -422,8 +422,8 @@ class TestQuorum(iotests.QMPTestCase): if not iotests.supports_quorum(): return - self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.children[0]), - qemu_io('-f', iotests.imgfmt, '-c', 'map', self.backing[0]), + self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.children[0]), + qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.backing[0]), 'image file map matches backing file before streaming') self.assert_no_active_block_jobs() diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046 index e528b67cc6..f2ebecf24c 100755 --- a/tests/qemu-iotests/046 +++ b/tests/qemu-iotests/046 @@ -192,7 +192,7 @@ echo "== Verify image content ==" function verify_io() { - if ($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep "compat: 0.10" > /dev/null); then + if ($QEMU_IMG info -U -f "$IMGFMT" "$TEST_IMG" | grep "compat: 0.10" > /dev/null); then # For v2 images, discarded clusters are read from the backing file # Keep the variable empty so that the backing file value can be used as # the default below diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 index aafcd249f6..ba4da65c77 100755 --- a/tests/qemu-iotests/055 +++ b/tests/qemu-iotests/055 @@ -458,17 +458,18 @@ class TestDriveCompression(iotests.QMPTestCase): except OSError: pass - def do_prepare_drives(self, fmt, args): + def do_prepare_drives(self, fmt, args, attach_target): self.vm = iotests.VM().add_drive(test_img) qemu_img('create', '-f', fmt, blockdev_target_img, str(TestDriveCompression.image_len), *args) - self.vm.add_drive(blockdev_target_img, format=fmt, interface="none") + if attach_target: + self.vm.add_drive(blockdev_target_img, format=fmt, interface="none") self.vm.launch() - def do_test_compress_complete(self, cmd, format, **args): - self.do_prepare_drives(format['type'], format['args']) + def do_test_compress_complete(self, cmd, format, attach_target, **args): + self.do_prepare_drives(format['type'], format['args'], attach_target) self.assert_no_active_block_jobs() @@ -484,15 +485,16 @@ class TestDriveCompression(iotests.QMPTestCase): def test_complete_compress_drive_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_complete('drive-backup', format, + self.do_test_compress_complete('drive-backup', format, False, target=blockdev_target_img, mode='existing') def test_complete_compress_blockdev_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_complete('blockdev-backup', format, target='drive1') + self.do_test_compress_complete('blockdev-backup', format, True, + target='drive1') - def do_test_compress_cancel(self, cmd, format, **args): - self.do_prepare_drives(format['type'], format['args']) + def do_test_compress_cancel(self, cmd, format, attach_target, **args): + self.do_prepare_drives(format['type'], format['args'], attach_target) self.assert_no_active_block_jobs() @@ -506,15 +508,16 @@ class TestDriveCompression(iotests.QMPTestCase): def test_compress_cancel_drive_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_cancel('drive-backup', format, + self.do_test_compress_cancel('drive-backup', format, False, target=blockdev_target_img, mode='existing') def test_compress_cancel_blockdev_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_cancel('blockdev-backup', format, target='drive1') + self.do_test_compress_cancel('blockdev-backup', format, True, + target='drive1') - def do_test_compress_pause(self, cmd, format, **args): - self.do_prepare_drives(format['type'], format['args']) + def do_test_compress_pause(self, cmd, format, attach_target, **args): + self.do_prepare_drives(format['type'], format['args'], attach_target) self.assert_no_active_block_jobs() @@ -546,12 +549,13 @@ class TestDriveCompression(iotests.QMPTestCase): def test_compress_pause_drive_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_pause('drive-backup', format, + self.do_test_compress_pause('drive-backup', format, False, target=blockdev_target_img, mode='existing') def test_compress_pause_blockdev_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_pause('blockdev-backup', format, target='drive1') + self.do_test_compress_pause('blockdev-backup', format, True, + target='drive1') if __name__ == '__main__': iotests.main(supported_fmts=['raw', 'qcow2']) diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index 5d40206ef8..9e8f5b9d79 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -135,7 +135,7 @@ qemu-img: Error while amending options: Input/output error Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qcow2: Marking image as corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed +qcow2: Marking image as corrupt: Cluster allocation offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed read failed: Input/output error === Testing unaligned pre-allocated zero cluster === @@ -166,7 +166,7 @@ discard 65536/65536 bytes at offset 0 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qcow2: Image is corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further non-fatal corruption events will be suppressed +qcow2: Image is corrupt: Cluster allocation offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further non-fatal corruption events will be suppressed read failed: Input/output error read failed: Input/output error @@ -176,7 +176,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) qcow2: Image is corrupt: Cannot free unaligned cluster 0x52a00; further non-fatal corruption events will be suppressed -qcow2: Marking image as corrupt: Data cluster offset 0x62a00 unaligned (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed +qcow2: Marking image as corrupt: Cluster allocation offset 0x62a00 unaligned (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed discard 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read failed: Input/output error diff --git a/tests/qemu-iotests/066 b/tests/qemu-iotests/066 index c2116a3088..8638217736 100755 --- a/tests/qemu-iotests/066 +++ b/tests/qemu-iotests/066 @@ -1,6 +1,6 @@ #!/bin/bash # -# Test case for discarding preallocated zero clusters in qcow2 +# Test case for preallocated zero clusters in qcow2 # # Copyright (C) 2013 Red Hat, Inc. # @@ -55,8 +55,134 @@ _make_test_img $IMG_SIZE $QEMU_IO -c "write 0 256k" -c "write -z 0 256k" -c "write 64M 512" \ -c "discard 0 $IMG_SIZE" -c "read -P 0 0 $IMG_SIZE" "$TEST_IMG" \ | _filter_qemu_io + # Check the image (there shouldn't be any leaks) _check_test_img +# Map the image (we want all clusters to be gone) +$QEMU_IMG map "$TEST_IMG" + +_cleanup_test_img + + +echo +echo '=== Writing to preallocated zero clusters ===' +echo + +_make_test_img $IMG_SIZE + +# Create data clusters (not aligned to an L2 table) +$QEMU_IO -c 'write -P 42 1M 256k' "$TEST_IMG" | _filter_qemu_io +orig_map=$($QEMU_IMG map --output=json "$TEST_IMG") + +# Convert the data clusters to preallocated zero clusters +$QEMU_IO -c 'write -z 1M 256k' "$TEST_IMG" | _filter_qemu_io + +# Now write to them (with a COW needed for the head and tail) +$QEMU_IO -c "write -P 23 $(((1024 + 32) * 1024)) 192k" "$TEST_IMG" \ + | _filter_qemu_io + +# Check metadata correctness +_check_test_img + +# Check data correctness +$QEMU_IO -c "read -P 0 $(( 1024 * 1024)) 32k" \ + -c "read -P 23 $(((1024 + 32) * 1024)) 192k" \ + -c "read -P 0 $(((1024 + 32 + 192) * 1024)) 32k" \ + "$TEST_IMG" \ + | _filter_qemu_io + +# Check that we have actually reused the original area +new_map=$($QEMU_IMG map --output=json "$TEST_IMG") +if [ "$new_map" = "$orig_map" ]; then + echo 'Successfully reused original clusters.' +else + echo 'Failed to reuse original clusters.' + echo 'Original map:' + echo "$orig_map" + echo 'New map:' + echo "$new_map" +fi + +_cleanup_test_img + + +echo +echo '=== Writing to a snapshotted preallocated zero cluster ===' +echo + +_make_test_img 64k + +# Create a preallocated zero cluster +$QEMU_IO -c 'write -P 42 0 64k' -c 'write -z 0 64k' "$TEST_IMG" \ + | _filter_qemu_io + +# Snapshot it +$QEMU_IMG snapshot -c foo "$TEST_IMG" + +# Write to the cluster +$QEMU_IO -c 'write -P 23 0 64k' "$TEST_IMG" | _filter_qemu_io + +# Check metadata correctness +_check_test_img + +# Check data correctness +$QEMU_IO -c 'read -P 23 0 64k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG snapshot -a foo "$TEST_IMG" +$QEMU_IO -c 'read -P 0 0 64k' "$TEST_IMG" | _filter_qemu_io + +_cleanup_test_img + + +echo +echo '=== Consecutive write to a preallocated zero cluster ===' +echo + +_make_test_img 192k + +# Create three normal clusters +$QEMU_IO -c 'write -P 42 0 192k' "$TEST_IMG" | _filter_qemu_io +orig_map=$($QEMU_IMG map --output=json "$TEST_IMG") + +# Make the middle cluster a preallocated zero cluster +$QEMU_IO -c 'write -z 64k 64k' "$TEST_IMG" | _filter_qemu_io + +# Try to overwrite everything: This should reuse the whole range. To test that +# this only issues a single continuous write request, use blkdebug. +$QEMU_IO -c 'write -P 42 0 192k' \ + "json:{ + 'driver': '$IMGFMT', + 'file': { + 'driver': 'blkdebug', + 'image.filename': '$TEST_IMG', + 'set-state': [{ + 'event': 'write_aio', + 'new_state': 2 + }], + 'inject-error': [{ + 'event': 'write_aio', + 'state': 2 + }] + } + }" \ + | _filter_qemu_io + +# Check metadata correctness +_check_test_img + +# Check that we have actually reused the original area +new_map=$($QEMU_IMG map --output=json "$TEST_IMG") +if [ "$new_map" = "$orig_map" ]; then + echo 'Successfully reused original clusters.' +else + echo 'Failed to reuse original clusters.' + echo 'Original map:' + echo "$orig_map" + echo 'New map:' + echo "$new_map" +fi + +_cleanup_test_img + # success, all done echo "*** done" diff --git a/tests/qemu-iotests/066.out b/tests/qemu-iotests/066.out index 7c1f31a1b1..3d9da9bd0b 100644 --- a/tests/qemu-iotests/066.out +++ b/tests/qemu-iotests/066.out @@ -14,4 +14,50 @@ discard 67109376/67109376 bytes at offset 0 read 67109376/67109376 bytes at offset 0 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) No errors were found on the image. +Offset Length Mapped to File + +=== Writing to preallocated zero clusters === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67109376 +wrote 262144/262144 bytes at offset 1048576 +256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 262144/262144 bytes at offset 1048576 +256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 196608/196608 bytes at offset 1081344 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +read 32768/32768 bytes at offset 1048576 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 196608/196608 bytes at offset 1081344 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 32768/32768 bytes at offset 1277952 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Successfully reused original clusters. + +=== Writing to a snapshotted preallocated zero cluster === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Consecutive write to a preallocated zero cluster === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=196608 +wrote 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +Successfully reused original clusters. *** done diff --git a/tests/qemu-iotests/085 b/tests/qemu-iotests/085 index c53e97f067..b97adcd8db 100755 --- a/tests/qemu-iotests/085 +++ b/tests/qemu-iotests/085 @@ -45,7 +45,7 @@ _cleanup() rm -f "${TEST_DIR}/${i}-${snapshot_virt0}" rm -f "${TEST_DIR}/${i}-${snapshot_virt1}" done - rm -f "${TEST_IMG}.1" "${TEST_IMG}.2" + rm -f "${TEST_IMG}" "${TEST_IMG}.1" "${TEST_IMG}.2" "${TEST_IMG}.base" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -87,24 +87,26 @@ function create_group_snapshot() } # ${1}: unique identifier for the snapshot filename -# ${2}: true: open backing images; false: don't open them (default) +# ${2}: extra_params to the blockdev-add command +# ${3}: filename +function do_blockdev_add() +{ + cmd="{ 'execute': 'blockdev-add', 'arguments': + { 'driver': 'qcow2', 'node-name': 'snap_${1}', ${2} + 'file': + { 'driver': 'file', 'filename': '${3}', + 'node-name': 'file_${1}' } } }" + _send_qemu_cmd $h "${cmd}" "return" +} + +# ${1}: unique identifier for the snapshot filename function add_snapshot_image() { - if [ "${2}" = "true" ]; then - extra_params="" - else - extra_params="'backing': '', " - fi base_image="${TEST_DIR}/$((${1}-1))-${snapshot_virt0}" snapshot_file="${TEST_DIR}/${1}-${snapshot_virt0}" _make_test_img -b "${base_image}" "$size" mv "${TEST_IMG}" "${snapshot_file}" - cmd="{ 'execute': 'blockdev-add', 'arguments': - { 'driver': 'qcow2', 'node-name': 'snap_${1}', ${extra_params} - 'file': - { 'driver': 'file', 'filename': '${snapshot_file}', - 'node-name': 'file_${1}' } } }" - _send_qemu_cmd $h "${cmd}" "return" + do_blockdev_add "$1" "'backing': '', " "${snapshot_file}" } # ${1}: unique identifier for the snapshot filename @@ -222,7 +224,10 @@ echo === Invalid command - snapshot node has a backing image === echo SNAPSHOTS=$((${SNAPSHOTS}+1)) -add_snapshot_image ${SNAPSHOTS} true + +TEST_IMG="$TEST_IMG.base" _make_test_img "$size" +_make_test_img -b "${TEST_IMG}.base" "$size" +do_blockdev_add ${SNAPSHOTS} "" "${TEST_IMG}" blockdev_snapshot ${SNAPSHOTS} error echo diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out index 182acb42cf..a5d4cc3494 100644 --- a/tests/qemu-iotests/085.out +++ b/tests/qemu-iotests/085.out @@ -78,7 +78,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ === Invalid command - snapshot node has a backing image === -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/12-snapshot-v0.IMGFMT +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/t.IMGFMT.base {"return": {}} {"error": {"class": "GenericError", "desc": "The snapshot already has a backing image"}} diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087 index 9de57ddf6d..6d52f7d1b7 100755 --- a/tests/qemu-iotests/087 +++ b/tests/qemu-iotests/087 @@ -82,8 +82,7 @@ run_qemu -drive driver=$IMGFMT,id=disk,node-name=test-node,file="$TEST_IMG" <<EO "driver": "$IMGFMT", "node-name": "disk", "file": { - "driver": "file", - "filename": "$TEST_IMG" + "driver": "null-co" } } } @@ -92,8 +91,7 @@ run_qemu -drive driver=$IMGFMT,id=disk,node-name=test-node,file="$TEST_IMG" <<EO "driver": "$IMGFMT", "node-name": "test-node", "file": { - "driver": "file", - "filename": "$TEST_IMG" + "driver": "null-co" } } } diff --git a/tests/qemu-iotests/091 b/tests/qemu-iotests/091 index 32bbd56975..10ac4a8d73 100755 --- a/tests/qemu-iotests/091 +++ b/tests/qemu-iotests/091 @@ -95,7 +95,9 @@ echo "vm2: qemu process running successfully" echo "vm2: flush io, and quit" _send_qemu_cmd $h2 'qemu-io disk flush' "(qemu)" _send_qemu_cmd $h2 'quit' "" +_send_qemu_cmd $h1 'quit' "" +wait echo "Check image pattern" ${QEMU_IO} -c "read -P 0x22 0 4M" "${TEST_IMG}" | _filter_testdir | _filter_qemu_io diff --git a/tests/qemu-iotests/102.out b/tests/qemu-iotests/102.out index eecde16ad5..ccf172abd9 100644 --- a/tests/qemu-iotests/102.out +++ b/tests/qemu-iotests/102.out @@ -6,7 +6,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536 wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Image resized. -[ 0] 128/ 128 sectors allocated at offset 0 bytes (1) +64 KiB (0x10000) bytes allocated at offset 0 bytes (0x0) Offset Length Mapped to File === Testing map on an image file truncated outside of qemu === @@ -17,5 +17,5 @@ wrote 65536/65536 bytes at offset 0 Image resized. QEMU X.Y.Z monitor - type 'help' for more information (qemu) qemu-io drv0 map -[ 0] 128/ 128 sectors allocated at offset 0 bytes (1) +64 KiB (0x10000) bytes allocated at offset 0 bytes (0x0) *** done diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out index 9317d801ad..47d8656db8 100644 --- a/tests/qemu-iotests/122.out +++ b/tests/qemu-iotests/122.out @@ -112,7 +112,7 @@ read 3145728/3145728 bytes at offset 0 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 63963136/63963136 bytes at offset 3145728 61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}] +[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] convert -c -S 0: read 3145728/3145728 bytes at offset 0 @@ -134,7 +134,7 @@ read 30408704/30408704 bytes at offset 3145728 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 33554432/33554432 bytes at offset 33554432 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}] +[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] convert -c -S 0 with source backing file: read 3145728/3145728 bytes at offset 0 @@ -152,7 +152,7 @@ read 30408704/30408704 bytes at offset 3145728 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 33554432/33554432 bytes at offset 33554432 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}] +[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] convert -c -S 0 -B ... read 3145728/3145728 bytes at offset 0 @@ -176,11 +176,11 @@ wrote 1024/1024 bytes at offset 17408 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) convert -S 4k -[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 8192}, +[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false}, -{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 9216}, +{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false}, -{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 10240}, +{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}] convert -c -S 4k @@ -192,9 +192,9 @@ convert -c -S 4k { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}] convert -S 8k -[{ "start": 0, "length": 9216, "depth": 0, "zero": false, "data": true, "offset": 8192}, +[{ "start": 0, "length": 9216, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false}, -{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 17408}, +{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}] convert -c -S 8k diff --git a/tests/qemu-iotests/146.out b/tests/qemu-iotests/146.out index 4f334d86bc..db6b296b9e 100644 --- a/tests/qemu-iotests/146.out +++ b/tests/qemu-iotests/146.out @@ -2,39 +2,39 @@ QA output created by 146 === Testing VPC Autodetect === -[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) +126.998 GiB (0x1fbfe04000) bytes not allocated at offset 0 bytes (0x0) === Testing VPC with current_size force === -[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) +127 GiB (0x1fc0000000) bytes not allocated at offset 0 bytes (0x0) === Testing VPC with chs force === -[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) +126.998 GiB (0x1fbfe04000) bytes not allocated at offset 0 bytes (0x0) === Testing Hyper-V Autodetect === -[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) +127 GiB (0x1fc0000000) bytes not allocated at offset 0 bytes (0x0) === Testing Hyper-V with current_size force === -[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) +127 GiB (0x1fc0000000) bytes not allocated at offset 0 bytes (0x0) === Testing Hyper-V with chs force === -[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) +126.998 GiB (0x1fbfe04000) bytes not allocated at offset 0 bytes (0x0) === Testing d2v Autodetect === -[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) +251.250 MiB (0xfb40000) bytes allocated at offset 0 bytes (0x0) === Testing d2v with current_size force === -[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) +251.250 MiB (0xfb40000) bytes allocated at offset 0 bytes (0x0) === Testing d2v with chs force === -[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) +251.250 MiB (0xfb40000) bytes allocated at offset 0 bytes (0x0) === Testing Image create, default === @@ -42,15 +42,15 @@ Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296 === Read created image, default opts ==== -[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) +4 GiB (0x10007a000) bytes not allocated at offset 0 bytes (0x0) === Read created image, force_size_calc=chs ==== -[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) +4 GiB (0x10007a000) bytes not allocated at offset 0 bytes (0x0) === Read created image, force_size_calc=current_size ==== -[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) +4 GiB (0x10007a000) bytes not allocated at offset 0 bytes (0x0) === Testing Image create, force_size === @@ -58,13 +58,13 @@ Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296 forc === Read created image, default opts ==== -[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) +4 GiB (0x100000000) bytes not allocated at offset 0 bytes (0x0) === Read created image, force_size_calc=chs ==== -[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) +4 GiB (0x100000000) bytes not allocated at offset 0 bytes (0x0) === Read created image, force_size_calc=current_size ==== -[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) +4 GiB (0x100000000) bytes not allocated at offset 0 bytes (0x0) *** done diff --git a/tests/qemu-iotests/153 b/tests/qemu-iotests/153 new file mode 100755 index 0000000000..0b45d78ea3 --- /dev/null +++ b/tests/qemu-iotests/153 @@ -0,0 +1,233 @@ +#!/bin/bash +# +# Test image locking +# +# Copyright 2016, 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=famz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f "${TEST_IMG}.base" + rm -f "${TEST_IMG}.convert" + rm -f "${TEST_IMG}.a" + rm -f "${TEST_IMG}.b" + rm -f "${TEST_IMG}.lnk" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +size=32M + +_check_ofd() +{ + _make_test_img $size >/dev/null + if $QEMU_IMG_PROG info --image-opts "driver=file,locking=on,filename=$TEST_IMG" 2>&1 | + grep -q 'falling back to POSIX file'; then + return 1 + else + return 0 + fi +} + +_check_ofd || _notrun "OFD lock not available" + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +_run_cmd() +{ + echo + (echo "$@"; "$@" 2>&1 1>/dev/null) | _filter_testdir +} + +function _do_run_qemu() +{ + ( + if ! test -t 0; then + while read cmd; do + echo $cmd + done + fi + echo quit + ) | $QEMU -nographic -monitor stdio -serial none "$@" 1>/dev/null +} + +function _run_qemu_with_images() +{ + _do_run_qemu \ + $(for i in $@; do echo "-drive if=none,file=$i"; done) 2>&1 \ + | _filter_testdir | _filter_qemu +} + +echo "== readonly=off,force-share=on should be rejected ==" +_run_qemu_with_images null-co://,readonly=off,force-share=on + +for opts1 in "" "read-only=on" "read-only=on,force-share=on"; do + echo + echo "== Creating base image ==" + TEST_IMG="${TEST_IMG}.base" _make_test_img $size + + echo + echo "== Creating test image ==" + $QEMU_IMG create -f $IMGFMT "${TEST_IMG}" -b ${TEST_IMG}.base | _filter_img_create + + echo + echo "== Launching QEMU, opts: '$opts1' ==" + _launch_qemu -drive file="${TEST_IMG}",if=none,$opts1 + h=$QEMU_HANDLE + + for opts2 in "" "read-only=on" "read-only=on,force-share=on"; do + echo + echo "== Launching another QEMU, opts: '$opts2' ==" + echo "quit" | \ + $QEMU -nographic -monitor stdio \ + -drive file="${TEST_IMG}",if=none,$opts2 2>&1 1>/dev/null | \ + _filter_testdir | _filter_qemu + done + + for L in "" "-U"; do + + echo + echo "== Running utility commands $L ==" + _run_cmd $QEMU_IO $L -c "read 0 512" "${TEST_IMG}" + _run_cmd $QEMU_IO $L -r -c "read 0 512" "${TEST_IMG}" + _run_cmd $QEMU_IO -c "open $L ${TEST_IMG}" -c "read 0 512" + _run_cmd $QEMU_IO -c "open -r $L ${TEST_IMG}" -c "read 0 512" + _run_cmd $QEMU_IMG info $L "${TEST_IMG}" + _run_cmd $QEMU_IMG check $L "${TEST_IMG}" + _run_cmd $QEMU_IMG compare $L "${TEST_IMG}" "${TEST_IMG}" + _run_cmd $QEMU_IMG map $L "${TEST_IMG}" + _run_cmd $QEMU_IMG amend -o "" $L "${TEST_IMG}" + _run_cmd $QEMU_IMG commit $L "${TEST_IMG}" + _run_cmd $QEMU_IMG resize $L "${TEST_IMG}" $size + _run_cmd $QEMU_IMG rebase $L "${TEST_IMG}" -b "${TEST_IMG}.base" + _run_cmd $QEMU_IMG snapshot -l $L "${TEST_IMG}" + _run_cmd $QEMU_IMG convert $L "${TEST_IMG}" "${TEST_IMG}.convert" + _run_cmd $QEMU_IMG dd $L if="${TEST_IMG}" of="${TEST_IMG}.convert" bs=512 count=1 + _run_cmd $QEMU_IMG bench $L -c 1 "${TEST_IMG}" + _run_cmd $QEMU_IMG bench $L -w -c 1 "${TEST_IMG}" + done + _send_qemu_cmd $h "{ 'execute': 'quit', }" "" + echo + echo "Round done" + _cleanup_qemu +done + +for opt1 in $test_opts; do + for opt2 in $test_opts; do + echo + echo "== Two devices with the same image ($opt1 - $opt2) ==" + _run_qemu_with_images "${TEST_IMG},$opt1" "${TEST_IMG},$opt2" + done +done + +echo "== Creating ${TEST_IMG}.[abc] ==" | _filter_testdir +( + $QEMU_IMG create -f qcow2 "${TEST_IMG}.a" -b "${TEST_IMG}" + $QEMU_IMG create -f qcow2 "${TEST_IMG}.b" -b "${TEST_IMG}" + $QEMU_IMG create -f qcow2 "${TEST_IMG}.c" -b "${TEST_IMG}.b" +) | _filter_img_create + +echo +echo "== Two devices sharing the same file in backing chain ==" +_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG}.b" +_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG}.c" + +echo +echo "== Backing image also as an active device ==" +_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG}" + +echo +echo "== Backing image also as an active device (ro) ==" +_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG},readonly=on" + +echo +echo "== Symbolic link ==" +rm -f "${TEST_IMG}.lnk" &>/dev/null +ln -s ${TEST_IMG} "${TEST_IMG}.lnk" || echo "Failed to create link" +_run_qemu_with_images "${TEST_IMG}.lnk" "${TEST_IMG}" + +echo +echo "== Closing an image should unlock it ==" +_launch_qemu + +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'qmp_capabilities' }" \ + 'return' + +echo "Adding drive" +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': 'drive_add 0 if=none,id=d0,file=${TEST_IMG}' } }" \ + "" + +_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512' + +echo "Closing drive" +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': 'drive_del d0' } }" \ + "" + +_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512' + +echo "Adding two and closing one" +for d in d0 d1; do + _send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': 'drive_add 0 if=none,id=$d,file=${TEST_IMG},readonly=on' } }" \ + "" +done + +_run_cmd $QEMU_IMG info "${TEST_IMG}" + +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': 'drive_del d0' } }" \ + "" + +_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512' + +echo "Closing the other" +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': 'drive_del d1' } }" \ + "" + +_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512' + +_cleanup_qemu + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/153.out b/tests/qemu-iotests/153.out new file mode 100644 index 0000000000..5ba0b63867 --- /dev/null +++ b/tests/qemu-iotests/153.out @@ -0,0 +1,390 @@ +QA output created by 153 +== readonly=off,force-share=on should be rejected == +QEMU_PROG: -drive if=none,file=null-co://,readonly=off,force-share=on: force-share=on can only be used with read-only images + +== Creating base image == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432 + +== Creating test image == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base + +== Launching QEMU, opts: '' == + +== Launching another QEMU, opts: '' == +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,: Failed to get "write" lock +Is another process using the image? + +== Launching another QEMU, opts: 'read-only=on' == +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,read-only=on: Failed to get shared "write" lock +Is another process using the image? + +== Launching another QEMU, opts: 'read-only=on,force-share=on' == + +== Running utility commands == + +_qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +_qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock +Is another process using the image? + +_qemu_io_wrapper -c open TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +_qemu_io_wrapper -c open -r TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper info TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper check TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper map TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper amend -o TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper commit TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper resize TEST_DIR/t.qcow2 32M +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +== Running utility commands -U == + +_qemu_io_wrapper -U -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_img_wrapper info -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper check -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper compare -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 + +_qemu_img_wrapper map -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper amend -o -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper commit -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper resize -U TEST_DIR/t.qcow2 32M +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper convert -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert + +_qemu_img_wrapper dd -U if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 + +_qemu_img_wrapper bench -U -c 1 TEST_DIR/t.qcow2 + +_qemu_img_wrapper bench -U -w -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': force-share=on can only be used with read-only images + +Round done + +== Creating base image == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432 + +== Creating test image == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base + +== Launching QEMU, opts: 'read-only=on' == + +== Launching another QEMU, opts: '' == +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,: Failed to get "write" lock +Is another process using the image? + +== Launching another QEMU, opts: 'read-only=on' == + +== Launching another QEMU, opts: 'read-only=on,force-share=on' == + +== Running utility commands == + +_qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +_qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -c open TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +_qemu_io_wrapper -c open -r TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_img_wrapper info TEST_DIR/t.qcow2 + +_qemu_img_wrapper check TEST_DIR/t.qcow2 + +_qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 + +_qemu_img_wrapper map TEST_DIR/t.qcow2 + +_qemu_img_wrapper amend -o TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper commit TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper resize TEST_DIR/t.qcow2 32M +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2 + +_qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert + +_qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 + +_qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2 + +_qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +== Running utility commands -U == + +_qemu_io_wrapper -U -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_img_wrapper info -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper check -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper compare -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 + +_qemu_img_wrapper map -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper amend -o -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper commit -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper resize -U TEST_DIR/t.qcow2 32M +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper convert -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert + +_qemu_img_wrapper dd -U if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 + +_qemu_img_wrapper bench -U -c 1 TEST_DIR/t.qcow2 + +_qemu_img_wrapper bench -U -w -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': force-share=on can only be used with read-only images + +Round done + +== Creating base image == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432 + +== Creating test image == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base + +== Launching QEMU, opts: 'read-only=on,force-share=on' == + +== Launching another QEMU, opts: '' == + +== Launching another QEMU, opts: 'read-only=on' == + +== Launching another QEMU, opts: 'read-only=on,force-share=on' == + +== Running utility commands == + +_qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -c open TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_io_wrapper -c open -r TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_img_wrapper info TEST_DIR/t.qcow2 + +_qemu_img_wrapper check TEST_DIR/t.qcow2 + +_qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 + +_qemu_img_wrapper map TEST_DIR/t.qcow2 + +_qemu_img_wrapper amend -o TEST_DIR/t.qcow2 + +_qemu_img_wrapper commit TEST_DIR/t.qcow2 + +_qemu_img_wrapper resize TEST_DIR/t.qcow2 32M + +_qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base + +_qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2 + +_qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert + +_qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 + +_qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2 + +_qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2 + +== Running utility commands -U == + +_qemu_io_wrapper -U -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_img_wrapper info -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper check -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper compare -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 + +_qemu_img_wrapper map -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper amend -o -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper commit -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper resize -U TEST_DIR/t.qcow2 32M +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base + +_qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper convert -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert + +_qemu_img_wrapper dd -U if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 + +_qemu_img_wrapper bench -U -c 1 TEST_DIR/t.qcow2 + +_qemu_img_wrapper bench -U -w -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': force-share=on can only be used with read-only images + +Round done +== Creating TEST_DIR/t.qcow2.[abc] == +Formatting 'TEST_DIR/t.IMGFMT.a', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT +Formatting 'TEST_DIR/t.IMGFMT.b', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT +Formatting 'TEST_DIR/t.IMGFMT.c', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.b + +== Two devices sharing the same file in backing chain == + +== Backing image also as an active device == +QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +== Backing image also as an active device (ro) == + +== Symbolic link == +QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +== Closing an image should unlock it == +{"return": {}} +Adding drive + +_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? +Closing drive + +_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 +Adding two and closing one + +_qemu_img_wrapper info TEST_DIR/t.qcow2 + +_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? +Closing the other + +_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 +*** done diff --git a/tests/qemu-iotests/154 b/tests/qemu-iotests/154 index 7ca7219f08..dd8a426dad 100755 --- a/tests/qemu-iotests/154 +++ b/tests/qemu-iotests/154 @@ -2,7 +2,7 @@ # # qcow2 specific bdrv_pwrite_zeroes tests with backing files (complements 034) # -# Copyright (C) 2016 Red Hat, Inc. +# Copyright (C) 2016-2017 Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -42,7 +42,10 @@ _supported_proto file _supported_os Linux CLUSTER_SIZE=4k -size=128M +size=$((128 * 1024 * 1024)) + +# This test requires zero clusters, added in v3 images +_unsupported_imgopts compat=0.10 echo echo == backing file contains zeros == @@ -299,6 +302,159 @@ $QEMU_IO -c "read -P 0 75k 1k" "$TEST_IMG" | _filter_qemu_io $QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map +echo +echo == unaligned image tail cluster, no allocation needed == + +# With no backing file, write to all or part of unallocated partial cluster +# will mark the cluster as zero, but does not allocate. +# Re-create the image each time to get back to unallocated clusters. + +# Write at the front: sector-wise, the request is: 128m... | 00 -- -- -- +_make_test_img $((size + 2048)) +$QEMU_IO -c "write -z $size 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at the back: sector-wise, the request is: 128m... | -- -- -- 00 +_make_test_img $((size + 2048)) +$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at middle: sector-wise, the request is: 128m... | -- 00 00 -- +_make_test_img $((size + 2048)) +$QEMU_IO -c "write -z $((size + 512)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write entire cluster: sector-wise, the request is: 128m... | 00 00 00 00 +_make_test_img $((size + 2048)) +$QEMU_IO -c "write -z $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Repeat with backing file holding unallocated cluster. +# TODO: Note that this forces an allocation, because we aren't yet able to +# quickly detect that reads beyond EOF of the backing file are always zero +CLUSTER_SIZE=2048 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024)) + +# Write at the front: sector-wise, the request is: +# backing: 128m... | -- -- +# active: 128m... | 00 -- -- -- +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $size 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at the back: sector-wise, the request is: +# backing: 128m... | -- -- +# active: 128m... | -- -- -- 00 +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at middle: sector-wise, the request is: +# backing: 128m... | -- -- +# active: 128m... | -- 00 00 -- +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $((size + 512)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write entire cluster: sector-wise, the request is: +# backing: 128m... | -- -- +# active: 128m... | 00 00 00 00 +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Repeat with backing file holding zero'd cluster +# TODO: Note that this forces an allocation, because we aren't yet able to +# quickly detect that reads beyond EOF of the backing file are always zero +$QEMU_IO -c "write -z $size 512" "$TEST_IMG.base" | _filter_qemu_io + +# Write at the front: sector-wise, the request is: +# backing: 128m... | 00 00 +# active: 128m... | 00 -- -- -- +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $size 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at the back: sector-wise, the request is: +# backing: 128m... | 00 00 +# active: 128m... | -- -- -- 00 +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at middle: sector-wise, the request is: +# backing: 128m... | 00 00 +# active: 128m... | -- 00 00 -- +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $((size + 512)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write entire cluster: sector-wise, the request is: +# backing: 128m... | 00 00 +# active: 128m... | 00 00 00 00 +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# A preallocated cluster maintains its allocation, whether it stays as +# data due to a partial write: +# Convert 128m... | XX XX => ... | XX 00 +_make_test_img $((size + 1024)) +$QEMU_IO -c "write -P 1 $((size)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 1 $((size)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# or because it is the entire cluster and can use the zero flag: +# Convert 128m... | XX XX => ... | 00 00 +$QEMU_IO -c "write -z $((size)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $size 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +echo +echo == unaligned image tail cluster, allocation required == + +# Write beyond backing file must COW +# Backing file: 128m... | XX -- +# Active layer: 128m... | -- -- 00 -- +CLUSTER_SIZE=512 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024)) +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -P 1 $((size)) 512" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z $((size + 1024)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 1 $((size)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size + 512)) 1536" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Writes at boundaries of (partial) cluster must not lose mid-cluster data +# Backing file: 128m: ... | -- XX +# Active layer: 128m: ... | 00 -- -- 00 +CLUSTER_SIZE=512 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024)) +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -P 1 $((size + 512)) 512" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z $((size)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 1 $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size + 1024)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 1 $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size + 1024)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/154.out b/tests/qemu-iotests/154.out index da9eabdda8..d8485eeff2 100644 --- a/tests/qemu-iotests/154.out +++ b/tests/qemu-iotests/154.out @@ -42,9 +42,9 @@ read 1024/1024 bytes at offset 65536 read 2048/2048 bytes at offset 67584 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false}, -{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 36864, "length": 28672, "depth": 1, "zero": true, "data": false}, -{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576}, +{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 69632, "length": 134148096, "depth": 1, "zero": true, "data": false}] == backing file contains non-zero data after write_zeroes == @@ -69,9 +69,9 @@ read 1024/1024 bytes at offset 44032 read 3072/3072 bytes at offset 40960 3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false}, -{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 36864, "length": 4096, "depth": 1, "zero": true, "data": false}, -{ "start": 40960, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576}, +{ "start": 40960, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 45056, "length": 134172672, "depth": 1, "zero": true, "data": false}] == write_zeroes covers non-zero data == @@ -143,13 +143,13 @@ read 1024/1024 bytes at offset 67584 read 5120/5120 bytes at offset 68608 5 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false}, -{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 36864, "length": 4096, "depth": 0, "zero": true, "data": false}, { "start": 40960, "length": 8192, "depth": 1, "zero": true, "data": false}, -{ "start": 49152, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576}, +{ "start": 49152, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 53248, "length": 4096, "depth": 0, "zero": true, "data": false}, { "start": 57344, "length": 8192, "depth": 1, "zero": true, "data": false}, -{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 28672}, +{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 69632, "length": 4096, "depth": 0, "zero": true, "data": false}, { "start": 73728, "length": 134144000, "depth": 1, "zero": true, "data": false}] @@ -186,13 +186,13 @@ read 1024/1024 bytes at offset 72704 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false}, { "start": 32768, "length": 4096, "depth": 0, "zero": true, "data": false}, -{ "start": 36864, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 36864, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 40960, "length": 8192, "depth": 1, "zero": true, "data": false}, { "start": 49152, "length": 4096, "depth": 0, "zero": true, "data": false}, -{ "start": 53248, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576}, +{ "start": 53248, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 57344, "length": 8192, "depth": 1, "zero": true, "data": false}, { "start": 65536, "length": 4096, "depth": 0, "zero": true, "data": false}, -{ "start": 69632, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 28672}, +{ "start": 69632, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 73728, "length": 134144000, "depth": 1, "zero": true, "data": false}] == spanning two clusters, partially overwriting backing file == @@ -212,7 +212,7 @@ read 1024/1024 bytes at offset 5120 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 2048/2048 bytes at offset 6144 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -[{ "start": 0, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": 20480}, +[{ "start": 0, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 8192, "length": 134209536, "depth": 1, "zero": true, "data": false}] == spanning multiple clusters, non-zero in first cluster == @@ -227,7 +227,7 @@ read 2048/2048 bytes at offset 65536 read 10240/10240 bytes at offset 67584 10 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false}, -{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 69632, "length": 8192, "depth": 0, "zero": true, "data": false}, { "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}] @@ -257,7 +257,7 @@ read 2048/2048 bytes at offset 75776 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false}, { "start": 65536, "length": 8192, "depth": 0, "zero": true, "data": false}, -{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}] == spanning multiple clusters, partially overwriting backing file == @@ -278,8 +278,136 @@ read 2048/2048 bytes at offset 74752 read 1024/1024 bytes at offset 76800 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false}, -{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 69632, "length": 4096, "depth": 0, "zero": true, "data": false}, -{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576}, +{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}] + +== unaligned image tail cluster, no allocation needed == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 +wrote 512/512 bytes at offset 134219264 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 +wrote 1024/1024 bytes at offset 134218240 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 +wrote 2048/2048 bytes at offset 134217728 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}] +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134219264 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 1024/1024 bytes at offset 134218240 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 2048/2048 bytes at offset 134217728 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}] +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134219264 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 1024/1024 bytes at offset 134218240 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 2048/2048 bytes at offset 134217728 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134218752 +wrote 1024/1024 bytes at offset 134217728 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 134218240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134218240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +1024/1024 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 0, "zero": true, "data": false}, +{ "start": 134217728, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +wrote 1024/1024 bytes at offset 134217728 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +1024/1024 bytes allocated at offset 128 MiB +read 1024/1024 bytes at offset 134217728 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +[{ "start": 0, "length": 134217728, "depth": 0, "zero": true, "data": false}, +{ "start": 134217728, "length": 1024, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + +== unaligned image tail cluster, allocation required == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 134218752 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 134218240 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134218240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134218240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 134218752 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 134219264 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134218240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 134218752 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] *** done diff --git a/tests/qemu-iotests/172 b/tests/qemu-iotests/172 index 1b7d3a194d..826d6fecd3 100755 --- a/tests/qemu-iotests/172 +++ b/tests/qemu-iotests/172 @@ -30,6 +30,8 @@ status=1 # failure is the default! _cleanup() { _cleanup_test_img + rm -f "$TEST_IMG.2" + rm -f "$TEST_IMG.3" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -86,6 +88,9 @@ size=720k _make_test_img $size +TEST_IMG="$TEST_IMG.2" _make_test_img $size +TEST_IMG="$TEST_IMG.3" _make_test_img $size + # Default drive semantics: # # By default you get a single empty floppy drive. You can override it with @@ -105,7 +110,7 @@ echo === Using -fda/-fdb options === check_floppy_qtree -fda "$TEST_IMG" check_floppy_qtree -fdb "$TEST_IMG" -check_floppy_qtree -fda "$TEST_IMG" -fdb "$TEST_IMG" +check_floppy_qtree -fda "$TEST_IMG" -fdb "$TEST_IMG.2" echo @@ -114,7 +119,7 @@ echo === Using -drive options === check_floppy_qtree -drive if=floppy,file="$TEST_IMG" check_floppy_qtree -drive if=floppy,file="$TEST_IMG",index=1 -check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=floppy,file="$TEST_IMG",index=1 +check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=floppy,file="$TEST_IMG.2",index=1 echo echo @@ -122,7 +127,7 @@ echo === Using -drive if=none and -global === check_floppy_qtree -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0 check_floppy_qtree -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1 echo @@ -131,7 +136,7 @@ echo === Using -drive if=none and -device === check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0 check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -device floppy,drive=none0 -device floppy,drive=none1,unit=1 echo @@ -139,58 +144,58 @@ echo echo === Mixing -fdX and -global === # Working -check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0 -check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0 +check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveB=none0 +check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveA=none0 # Conflicting (-fdX wins) -check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0 -check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0 +check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveA=none0 +check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveB=none0 echo echo echo === Mixing -fdX and -device === # Working -check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0 -check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1 +check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0 +check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=1 -check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0 -check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0 +check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0 +check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=0 # Conflicting -check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0 -check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1 +check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=0 +check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=1 echo echo echo === Mixing -drive and -device === # Working -check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0 -check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1 +check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0 +check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=1 # Conflicting -check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0 +check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=0 echo echo echo === Mixing -global and -device === # Working -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveA=none0 -device floppy,drive=none1 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveB=none0 -device floppy,drive=none1 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0 # Conflicting -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1 echo @@ -199,8 +204,8 @@ echo === Too many floppy drives === # Working check_floppy_qtree -drive if=floppy,file="$TEST_IMG" \ - -drive if=none,file="$TEST_IMG" \ - -drive if=none,file="$TEST_IMG" \ + -drive if=none,file="$TEST_IMG.2" \ + -drive if=none,file="$TEST_IMG.3" \ -global isa-fdc.driveB=none0 \ -device floppy,drive=none1 diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out index 54b53293d7..2732966166 100644 --- a/tests/qemu-iotests/172.out +++ b/tests/qemu-iotests/172.out @@ -1,5 +1,7 @@ QA output created by 172 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=737280 +Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=737280 +Formatting 'TEST_DIR/t.IMGFMT.3', fmt=IMGFMT size=737280 === Default === @@ -99,7 +101,7 @@ Testing: -fdb TEST_DIR/t.qcow2 share-rw = false drive-type = "288" -Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 +Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -205,7 +207,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 share-rw = false drive-type = "288" -Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 +Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2.2,index=1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -300,7 +302,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -395,7 +397,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -436,7 +438,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco === Mixing -fdX and -global === -Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 +Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -474,7 +476,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- share-rw = false drive-type = "144" -Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 +Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -512,7 +514,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- share-rw = false drive-type = "144" -Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 +Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -539,7 +541,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- share-rw = false drive-type = "144" -Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 +Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -569,7 +571,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- === Mixing -fdX and -device === -Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 +Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -607,7 +609,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop share-rw = false drive-type = "144" -Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 +Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -645,7 +647,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop share-rw = false drive-type = "144" -Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 +Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -683,7 +685,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop share-rw = false drive-type = "144" -Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 +Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -721,18 +723,18 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop share-rw = false drive-type = "144" -Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 +Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=0 QEMU_PROG: -device floppy,drive=none0,unit=0: Floppy unit 0 is in use QEMU_PROG: -device floppy,drive=none0,unit=0: Device initialization failed. -Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 +Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=1 QEMU_PROG: -device floppy,drive=none0,unit=1: Floppy unit 1 is in use QEMU_PROG: -device floppy,drive=none0,unit=1: Device initialization failed. === Mixing -drive and -device === -Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 +Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -770,7 +772,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q share-rw = false drive-type = "144" -Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 +Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -808,14 +810,14 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q share-rw = false drive-type = "144" -Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 +Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=0 QEMU_PROG: -device floppy,drive=none0,unit=0: Floppy unit 0 is in use QEMU_PROG: -device floppy,drive=none0,unit=0: Device initialization failed. === Mixing -global and -device === -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -device floppy,drive=none1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -853,7 +855,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -891,7 +893,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 -device floppy,drive=none1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -929,7 +931,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -967,18 +969,18 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0 QEMU_PROG: -device floppy,drive=none1,unit=0: Floppy unit 0 is in use QEMU_PROG: -device floppy,drive=none1,unit=0: Device initialization failed. -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1 QEMU_PROG: -device floppy,drive=none1,unit=1: Floppy unit 1 is in use QEMU_PROG: -device floppy,drive=none1,unit=1: Device initialization failed. === Too many floppy drives === -Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1 +Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -drive if=none,file=TEST_DIR/t.qcow2.3 -global isa-fdc.driveB=none0 -device floppy,drive=none1 QEMU_PROG: -device floppy,drive=none1: Can't create floppy unit 2, bus supports only 2 units QEMU_PROG: -device floppy,drive=none1: Device initialization failed. diff --git a/tests/qemu-iotests/177 b/tests/qemu-iotests/177 new file mode 100755 index 0000000000..2005c174f2 --- /dev/null +++ b/tests/qemu-iotests/177 @@ -0,0 +1,114 @@ +#!/bin/bash +# +# Test corner cases with unusual block geometries +# +# Copyright (C) 2016-2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=eblake@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto file + +CLUSTER_SIZE=1M +size=128M +options=driver=blkdebug,image.driver=qcow2 + +echo +echo "== setting up files ==" + +TEST_IMG="$TEST_IMG.base" _make_test_img $size +$QEMU_IO -c "write -P 11 0 $size" "$TEST_IMG.base" | _filter_qemu_io +_make_test_img -b "$TEST_IMG.base" +$QEMU_IO -c "write -P 22 0 $size" "$TEST_IMG" | _filter_qemu_io + +# Limited to 64k max-transfer +echo +echo "== constrained alignment and max-transfer ==" +limits=align=4k,max-transfer=64k +$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \ + -c "write -P 33 1000 128k" -c "read -P 33 1000 128k" | _filter_qemu_io + +echo +echo "== write zero with constrained max-transfer ==" +limits=align=512,max-transfer=64k,opt-write-zero=$CLUSTER_SIZE +$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \ + -c "write -z 8003584 2093056" | _filter_qemu_io + +# non-power-of-2 write-zero/discard alignments +echo +echo "== non-power-of-2 write zeroes limits ==" + +limits=align=512,opt-write-zero=15M,max-write-zero=15M,opt-discard=15M,max-discard=15M +$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \ + -c "write -z 32M 32M" | _filter_qemu_io + +echo +echo "== non-power-of-2 discard limits ==" + +limits=align=512,opt-write-zero=15M,max-write-zero=15M,opt-discard=15M,max-discard=15M +$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \ + -c "discard 80000001 30M" | _filter_qemu_io + +echo +echo "== verify image content ==" + +function verify_io() +{ + if ($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | + grep "compat: 0.10" > /dev/null); then + # For v2 images, discarded clusters are read from the backing file + discarded=11 + else + # Discarded clusters are zeroed for v3 or later + discarded=0 + fi + + echo read -P 22 0 1000 + echo read -P 33 1000 128k + echo read -P 22 132072 7871512 + echo read -P 0 8003584 2093056 + echo read -P 22 10096640 23457792 + echo read -P 0 32M 32M + echo read -P 22 64M 13M + echo read -P $discarded 77M 29M + echo read -P 22 106M 22M +} + +verify_io | $QEMU_IO -r "$TEST_IMG" | _filter_qemu_io + +_check_test_img + +# success, all done +echo "*** done" +status=0 diff --git a/tests/qemu-iotests/177.out b/tests/qemu-iotests/177.out new file mode 100644 index 0000000000..e887542678 --- /dev/null +++ b/tests/qemu-iotests/177.out @@ -0,0 +1,49 @@ +QA output created by 177 + +== setting up files == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728 +wrote 134217728/134217728 bytes at offset 0 +128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/t.IMGFMT.base +wrote 134217728/134217728 bytes at offset 0 +128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== constrained alignment and max-transfer == +wrote 131072/131072 bytes at offset 1000 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 1000 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== write zero with constrained max-transfer == +wrote 2093056/2093056 bytes at offset 8003584 +1.996 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== non-power-of-2 write zeroes limits == +wrote 33554432/33554432 bytes at offset 33554432 +32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== non-power-of-2 discard limits == +discard 31457280/31457280 bytes at offset 80000001 +30 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify image content == +read 1000/1000 bytes at offset 0 +1000 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 1000 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 7871512/7871512 bytes at offset 132072 +7.507 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2093056/2093056 bytes at offset 8003584 +1.996 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 23457792/23457792 bytes at offset 10096640 +22.371 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 33554432/33554432 bytes at offset 33554432 +32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 13631488/13631488 bytes at offset 67108864 +13 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 30408704/30408704 bytes at offset 80740352 +29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 23068672/23068672 bytes at offset 111149056 +22 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/179 b/tests/qemu-iotests/179 new file mode 100755 index 0000000000..7bc8db8fe0 --- /dev/null +++ b/tests/qemu-iotests/179 @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Test case for write zeroes with unmap +# +# Copyright (C) 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=eblake@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +# v2 images can't mark clusters as zero +_unsupported_imgopts compat=0.10 + +echo +echo '=== Testing write zeroes with unmap ===' +echo + +TEST_IMG="$TEST_IMG.base" _make_test_img 64M +_make_test_img -b "$TEST_IMG.base" + +# Offsets chosen at or near 2M boundaries so test works at all cluster sizes +# 8k and larger (smaller clusters fail due to non-contiguous allocations) + +# Aligned writes to unallocated cluster should not allocate mapping, but must +# mark cluster as zero, whether or not unmap was requested +$QEMU_IO -c "write -z -u 2M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z 6M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# Unaligned writes need not allocate mapping if the cluster already reads +# as zero, but must mark cluster as zero, whether or not unmap was requested +$QEMU_IO -c "write -z -u 10485761 2097150" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z 14680065 2097150" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# Requesting unmap of normal data must deallocate; omitting unmap should +# preserve the mapping +$QEMU_IO -c "write 18M 14M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z -u 20M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z 24M 6M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# Likewise when writing on already-mapped zero data +$QEMU_IO -c "write -z -u 26M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z 28M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# Writing on unmapped zeroes does not allocate +$QEMU_IO -c "write -z 32M 8M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z -u 34M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z 36M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# Writing zero overrides a backing file, regardless of backing cluster type +$QEMU_IO -c "write -z 40M 8M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write 48M 8M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z -u 42M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z 44M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z -u 50M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z 52M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z -u 58M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z 60M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Final check that mappings are correct and images are still sane +TEST_IMG="$TEST_IMG.base" _check_test_img +_check_test_img + +echo +echo '=== Testing cache optimization ===' +echo + +BLKDBG_TEST_IMG="blkdebug:$TEST_DIR/blkdebug.conf:$TEST_IMG.base" + +cat > "$TEST_DIR/blkdebug.conf" <<EOF +[inject-error] +event = "l2_update" +errno = "5" +immediately = "on" +once = "off" +EOF + +# None of the following writes should trigger an L2 update, because the +# cluster already reads as zero, and we don't have to change allocation +$QEMU_IO -c "w -z -u 20M 2M" "$BLKDBG_TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "w -z 20M 2M" "$BLKDBG_TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "w -z 28M 2M" "$BLKDBG_TEST_IMG" | _filter_qemu_io + +# success, all done +echo '*** done' +status=0 diff --git a/tests/qemu-iotests/179.out b/tests/qemu-iotests/179.out new file mode 100644 index 0000000000..80722b2289 --- /dev/null +++ b/tests/qemu-iotests/179.out @@ -0,0 +1,156 @@ +QA output created by 179 + +=== Testing write zeroes with unmap === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base +wrote 2097152/2097152 bytes at offset 2097152 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 6291456 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2 MiB (0x200000) bytes not allocated at offset 0 bytes (0x0) +2 MiB (0x200000) bytes allocated at offset 2 MiB (0x200000) +2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000) +2 MiB (0x200000) bytes allocated at offset 6 MiB (0x600000) +56 MiB (0x3800000) bytes not allocated at offset 8 MiB (0x800000) +[{ "start": 0, "length": 67108864, "depth": 0, "zero": true, "data": false}] +wrote 2097150/2097150 bytes at offset 10485761 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097150/2097150 bytes at offset 14680065 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2 MiB (0x200000) bytes not allocated at offset 0 bytes (0x0) +2 MiB (0x200000) bytes allocated at offset 2 MiB (0x200000) +2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000) +2 MiB (0x200000) bytes allocated at offset 6 MiB (0x600000) +2 MiB (0x200000) bytes not allocated at offset 8 MiB (0x800000) +2 MiB (0x200000) bytes allocated at offset 10 MiB (0xa00000) +2 MiB (0x200000) bytes not allocated at offset 12 MiB (0xc00000) +2 MiB (0x200000) bytes allocated at offset 14 MiB (0xe00000) +48 MiB (0x3000000) bytes not allocated at offset 16 MiB (0x1000000) +[{ "start": 0, "length": 67108864, "depth": 0, "zero": true, "data": false}] +wrote 14680064/14680064 bytes at offset 18874368 +14 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 20971520 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 6291456/6291456 bytes at offset 25165824 +6 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2 MiB (0x200000) bytes not allocated at offset 0 bytes (0x0) +2 MiB (0x200000) bytes allocated at offset 2 MiB (0x200000) +2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000) +2 MiB (0x200000) bytes allocated at offset 6 MiB (0x600000) +2 MiB (0x200000) bytes not allocated at offset 8 MiB (0x800000) +2 MiB (0x200000) bytes allocated at offset 10 MiB (0xa00000) +2 MiB (0x200000) bytes not allocated at offset 12 MiB (0xc00000) +2 MiB (0x200000) bytes allocated at offset 14 MiB (0xe00000) +2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000) +14 MiB (0xe00000) bytes allocated at offset 18 MiB (0x1200000) +32 MiB (0x2000000) bytes not allocated at offset 32 MiB (0x2000000) +[{ "start": 0, "length": 18874368, "depth": 0, "zero": true, "data": false}, +{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 20971520, "length": 2097152, "depth": 0, "zero": true, "data": false}, +{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 25165824, "length": 6291456, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 33554432, "length": 33554432, "depth": 0, "zero": true, "data": false}] +wrote 2097152/2097152 bytes at offset 27262976 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 29360128 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2 MiB (0x200000) bytes not allocated at offset 0 bytes (0x0) +2 MiB (0x200000) bytes allocated at offset 2 MiB (0x200000) +2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000) +2 MiB (0x200000) bytes allocated at offset 6 MiB (0x600000) +2 MiB (0x200000) bytes not allocated at offset 8 MiB (0x800000) +2 MiB (0x200000) bytes allocated at offset 10 MiB (0xa00000) +2 MiB (0x200000) bytes not allocated at offset 12 MiB (0xc00000) +2 MiB (0x200000) bytes allocated at offset 14 MiB (0xe00000) +2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000) +14 MiB (0xe00000) bytes allocated at offset 18 MiB (0x1200000) +32 MiB (0x2000000) bytes not allocated at offset 32 MiB (0x2000000) +[{ "start": 0, "length": 18874368, "depth": 0, "zero": true, "data": false}, +{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 20971520, "length": 2097152, "depth": 0, "zero": true, "data": false}, +{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 25165824, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 27262976, "length": 2097152, "depth": 0, "zero": true, "data": false}, +{ "start": 29360128, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 33554432, "length": 33554432, "depth": 0, "zero": true, "data": false}] +wrote 8388608/8388608 bytes at offset 33554432 +8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 35651584 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 37748736 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2 MiB (0x200000) bytes not allocated at offset 0 bytes (0x0) +2 MiB (0x200000) bytes allocated at offset 2 MiB (0x200000) +2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000) +2 MiB (0x200000) bytes allocated at offset 6 MiB (0x600000) +2 MiB (0x200000) bytes not allocated at offset 8 MiB (0x800000) +2 MiB (0x200000) bytes allocated at offset 10 MiB (0xa00000) +2 MiB (0x200000) bytes not allocated at offset 12 MiB (0xc00000) +2 MiB (0x200000) bytes allocated at offset 14 MiB (0xe00000) +2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000) +22 MiB (0x1600000) bytes allocated at offset 18 MiB (0x1200000) +24 MiB (0x1800000) bytes not allocated at offset 40 MiB (0x2800000) +[{ "start": 0, "length": 18874368, "depth": 0, "zero": true, "data": false}, +{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 20971520, "length": 2097152, "depth": 0, "zero": true, "data": false}, +{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 25165824, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 27262976, "length": 2097152, "depth": 0, "zero": true, "data": false}, +{ "start": 29360128, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 33554432, "length": 33554432, "depth": 0, "zero": true, "data": false}] +wrote 8388608/8388608 bytes at offset 41943040 +8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 8388608/8388608 bytes at offset 50331648 +8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 44040192 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 46137344 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 52428800 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 54525952 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 60817408 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 62914560 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +42 MiB (0x2a00000) bytes not allocated at offset 0 bytes (0x0) +4 MiB (0x400000) bytes allocated at offset 42 MiB (0x2a00000) +4 MiB (0x400000) bytes not allocated at offset 46 MiB (0x2e00000) +4 MiB (0x400000) bytes allocated at offset 50 MiB (0x3200000) +4 MiB (0x400000) bytes not allocated at offset 54 MiB (0x3600000) +4 MiB (0x400000) bytes allocated at offset 58 MiB (0x3a00000) +2 MiB (0x200000) bytes not allocated at offset 62 MiB (0x3e00000) +[{ "start": 0, "length": 18874368, "depth": 1, "zero": true, "data": false}, +{ "start": 18874368, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 20971520, "length": 2097152, "depth": 1, "zero": true, "data": false}, +{ "start": 23068672, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 25165824, "length": 2097152, "depth": 1, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 27262976, "length": 2097152, "depth": 1, "zero": true, "data": false}, +{ "start": 29360128, "length": 2097152, "depth": 1, "zero": true, "data": false, "offset": OFFSET}, +{ "start": 31457280, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 33554432, "length": 10485760, "depth": 1, "zero": true, "data": false}, +{ "start": 44040192, "length": 4194304, "depth": 0, "zero": true, "data": false}, +{ "start": 48234496, "length": 2097152, "depth": 1, "zero": true, "data": false}, +{ "start": 50331648, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 52428800, "length": 4194304, "depth": 0, "zero": true, "data": false}, +{ "start": 56623104, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 58720256, "length": 2097152, "depth": 1, "zero": true, "data": false}, +{ "start": 60817408, "length": 4194304, "depth": 0, "zero": true, "data": false}, +{ "start": 65011712, "length": 2097152, "depth": 1, "zero": true, "data": false}] +No errors were found on the image. +No errors were found on the image. + +=== Testing cache optimization === + +wrote 2097152/2097152 bytes at offset 20971520 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 20971520 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2097152/2097152 bytes at offset 29360128 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +*** done diff --git a/tests/qemu-iotests/182 b/tests/qemu-iotests/182 new file mode 100755 index 0000000000..7ecbb22604 --- /dev/null +++ b/tests/qemu-iotests/182 @@ -0,0 +1,68 @@ +#!/bin/bash +# +# Test image locking for POSIX locks +# +# Copyright 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=famz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +size=32M + +_make_test_img $size + +echo "Starting QEMU" +_launch_qemu -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \ + -device virtio-blk-pci,drive=drive0 + +echo +echo "Starting a second QEMU using the same image should fail" +echo 'quit' | $QEMU -monitor stdio \ + -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \ + -device virtio-blk-pci,drive=drive0 2>&1 | _filter_testdir 2>&1 | + _filter_qemu | + sed -e '/falling back to POSIX file/d' \ + -e '/locks can be lost unexpectedly/d' + +_cleanup_qemu + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/182.out b/tests/qemu-iotests/182.out new file mode 100644 index 0000000000..23a4dbf809 --- /dev/null +++ b/tests/qemu-iotests/182.out @@ -0,0 +1,8 @@ +QA output created by 182 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 +Starting QEMU + +Starting a second QEMU using the same image should fail +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,id=drive0,file.locking=on: Failed to get "write" lock +Is another process using the image? +*** done diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index f58548dc44..2f595b2ce2 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -152,10 +152,12 @@ _filter_img_info() -e "/log_size: [0-9]\\+/d" } -# filter out offsets and file names from qemu-img map +# filter out offsets and file names from qemu-img map; good for both +# human and json output _filter_qemu_img_map() { sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \ + -e 's/"offset": [0-9]\+/"offset": OFFSET/g' \ -e 's/Mapped to *//' | _filter_testdir | _filter_imgfmt } diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern index ddfbca1b76..34f4a8dc9b 100644 --- a/tests/qemu-iotests/common.pattern +++ b/tests/qemu-iotests/common.pattern @@ -18,7 +18,7 @@ function do_is_allocated() { local start=$1 - local size=$(( $2 / 512)) + local size=$2 local step=$3 local count=$4 diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 893962d41e..5c8ea0f95c 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -154,6 +154,7 @@ 149 rw auto sudo 150 rw auto quick 152 rw auto quick +153 rw auto quick 154 rw auto backing quick 155 rw auto 156 rw auto quick @@ -169,4 +170,7 @@ 174 auto 175 auto quick 176 rw auto backing +177 rw auto quick +179 rw auto quick 181 rw auto migration +182 rw auto quick diff --git a/tests/test-crypto-block.c b/tests/test-crypto-block.c index 85e6603d59..95c4bd5da3 100644 --- a/tests/test-crypto-block.c +++ b/tests/test-crypto-block.c @@ -187,10 +187,10 @@ static struct QCryptoBlockTestData { static ssize_t test_block_read_func(QCryptoBlock *block, - void *opaque, size_t offset, uint8_t *buf, size_t buflen, + void *opaque, Error **errp) { Buffer *header = opaque; @@ -204,8 +204,8 @@ static ssize_t test_block_read_func(QCryptoBlock *block, static ssize_t test_block_init_func(QCryptoBlock *block, - void *opaque, size_t headerlen, + void *opaque, Error **errp) { Buffer *header = opaque; @@ -219,10 +219,10 @@ static ssize_t test_block_init_func(QCryptoBlock *block, static ssize_t test_block_write_func(QCryptoBlock *block, - void *opaque, size_t offset, const uint8_t *buf, size_t buflen, + void *opaque, Error **errp) { Buffer *header = opaque; diff --git a/tests/test-replication.c b/tests/test-replication.c index 3016c6f2e0..cebeb793b0 100644 --- a/tests/test-replication.c +++ b/tests/test-replication.c @@ -179,7 +179,8 @@ static BlockBackend *start_primary(void) char *cmdline; cmdline = g_strdup_printf("driver=replication,mode=primary,node-name=xxx," - "file.driver=qcow2,file.file.filename=%s" + "file.driver=qcow2,file.file.filename=%s," + "file.file.locking=off" , p_local_disk); opts = qemu_opts_parse_noisily(&qemu_drive_opts, cmdline, false); g_free(cmdline); @@ -310,7 +311,9 @@ static BlockBackend *start_secondary(void) Error *local_err = NULL; /* add s_local_disk and forge S_LOCAL_DISK_ID */ - cmdline = g_strdup_printf("file.filename=%s,driver=qcow2", s_local_disk); + cmdline = g_strdup_printf("file.filename=%s,driver=qcow2," + "file.locking=off", + s_local_disk); opts = qemu_opts_parse_noisily(&qemu_drive_opts, cmdline, false); g_free(cmdline); @@ -331,8 +334,10 @@ static BlockBackend *start_secondary(void) /* add S_(ACTIVE/HIDDEN)_DISK and forge S_ID */ cmdline = g_strdup_printf("driver=replication,mode=secondary,top-id=%s," "file.driver=qcow2,file.file.filename=%s," + "file.file.locking=off," "file.backing.driver=qcow2," "file.backing.file.filename=%s," + "file.backing.file.locking=off," "file.backing.backing=%s" , S_ID, s_active_disk, s_hidden_disk , S_LOCAL_DISK_ID); diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c index 79a2e69a28..6c71e46391 100644 --- a/tests/test-x86-cpuid-compat.c +++ b/tests/test-x86-cpuid-compat.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qapi/qmp/qlist.h" +#include "qapi/qmp/qstring.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qint.h" #include "qapi/qmp/qbool.h" @@ -78,6 +79,90 @@ static void add_cpuid_test(const char *name, const char *cmdline, qtest_add_data_func(name, args, test_cpuid_prop); } + +/* Parameters to a add_feature_test() test case */ +typedef struct FeatureTestArgs { + /* cmdline to start QEMU */ + const char *cmdline; + /* + * cpuid-input-eax and cpuid-input-ecx values to look for, + * in "feature-words" and "filtered-features" properties. + */ + uint32_t in_eax, in_ecx; + /* The register name to look for, in the X86CPUFeatureWordInfo array */ + const char *reg; + /* The bit to check in X86CPUFeatureWordInfo.features */ + int bitnr; + /* The expected value for the bit in (X86CPUFeatureWordInfo.features) */ + bool expected_value; +} FeatureTestArgs; + +/* Get the value for a feature word in a X86CPUFeatureWordInfo list */ +static uint32_t get_feature_word(QList *features, uint32_t eax, uint32_t ecx, + const char *reg) +{ + const QListEntry *e; + + for (e = qlist_first(features); e; e = qlist_next(e)) { + QDict *w = qobject_to_qdict(qlist_entry_obj(e)); + const char *rreg = qdict_get_str(w, "cpuid-register"); + uint32_t reax = qdict_get_int(w, "cpuid-input-eax"); + bool has_ecx = qdict_haskey(w, "cpuid-input-ecx"); + uint32_t recx = 0; + + if (has_ecx) { + recx = qdict_get_int(w, "cpuid-input-ecx"); + } + if (eax == reax && (!has_ecx || ecx == recx) && !strcmp(rreg, reg)) { + return qint_get_int(qobject_to_qint(qdict_get(w, "features"))); + } + } + return 0; +} + +static void test_feature_flag(const void *data) +{ + const FeatureTestArgs *args = data; + char *path; + QList *present, *filtered; + uint32_t value; + + qtest_start(args->cmdline); + path = get_cpu0_qom_path(); + present = qobject_to_qlist(qom_get(path, "feature-words")); + filtered = qobject_to_qlist(qom_get(path, "filtered-features")); + value = get_feature_word(present, args->in_eax, args->in_ecx, args->reg); + value |= get_feature_word(filtered, args->in_eax, args->in_ecx, args->reg); + qtest_end(); + + g_assert(!!(value & (1U << args->bitnr)) == args->expected_value); + + QDECREF(present); + QDECREF(filtered); + g_free(path); +} + +/* + * Add test case to ensure that a given feature flag is set in + * either "feature-words" or "filtered-features", when running QEMU + * using cmdline + */ +static FeatureTestArgs *add_feature_test(const char *name, const char *cmdline, + uint32_t eax, uint32_t ecx, + const char *reg, int bitnr, + bool expected_value) +{ + FeatureTestArgs *args = g_new0(FeatureTestArgs, 1); + args->cmdline = cmdline; + args->in_eax = eax; + args->in_ecx = ecx; + args->reg = reg; + args->bitnr = bitnr; + args->expected_value = expected_value; + qtest_add_data_func(name, args, test_feature_flag); + return args; +} + #ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS static void test_plus_minus_subprocess(void) { @@ -229,5 +314,31 @@ int main(int argc, char **argv) "-machine pc-i440fx-2.7 -cpu 486,+xstore", "xlevel2", 0); + /* Test feature parsing */ + add_feature_test("x86/cpuid/features/plus", + "-cpu 486,+arat", + 6, 0, "EAX", 2, true); + add_feature_test("x86/cpuid/features/minus", + "-cpu pentium,-mmx", + 1, 0, "EDX", 23, false); + add_feature_test("x86/cpuid/features/on", + "-cpu 486,arat=on", + 6, 0, "EAX", 2, true); + add_feature_test("x86/cpuid/features/off", + "-cpu pentium,mmx=off", + 1, 0, "EDX", 23, false); + add_feature_test("x86/cpuid/features/max-plus-invtsc", + "-cpu max,+invtsc", + 0x80000007, 0, "EDX", 8, true); + add_feature_test("x86/cpuid/features/max-invtsc-on", + "-cpu max,invtsc=on", + 0x80000007, 0, "EDX", 8, true); + add_feature_test("x86/cpuid/features/max-minus-mmx", + "-cpu max,-mmx", + 1, 0, "EDX", 23, false); + add_feature_test("x86/cpuid/features/max-invtsc-on,mmx=off", + "-cpu max,mmx=off", + 1, 0, "EDX", 23, false); + return g_test_run(); } diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c index f25bae5e6c..5b500fedb0 100644 --- a/tests/usb-hcd-uhci-test.c +++ b/tests/usb-hcd-uhci-test.c @@ -79,7 +79,7 @@ int main(int argc, char **argv) { const char *arch = qtest_get_arch(); const char *cmd = "-device piix3-usb-uhci,id=uhci,addr=1d.0" - " -drive id=drive0,if=none,file=/dev/null,format=raw" + " -drive id=drive0,if=none,file=null-co://,format=raw" " -device usb-tablet,bus=uhci.0,port=1"; int ret; diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c index 22513e9eb5..031764da6d 100644 --- a/tests/usb-hcd-xhci-test.c +++ b/tests/usb-hcd-xhci-test.c @@ -89,7 +89,7 @@ int main(int argc, char **argv) qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); qtest_start("-device nec-usb-xhci,id=xhci" - " -drive id=drive0,if=none,file=/dev/null,format=raw"); + " -drive id=drive0,if=none,file=null-co://,format=raw"); ret = g_test_run(); qtest_end(); diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 1eee95df49..fd2078c9da 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -63,7 +63,7 @@ static QOSState *pci_test_start(void) const char *arch = qtest_get_arch(); char *tmp_path; const char *cmd = "-drive if=none,id=drive0,file=%s,format=raw " - "-drive if=none,id=drive1,file=/dev/null,format=raw " + "-drive if=none,id=drive1,file=null-co://,format=raw " "-device virtio-blk-pci,id=drv0,drive=drive0," "addr=%x.%x"; diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c index 0eabd56fd9..8b0f77a63e 100644 --- a/tests/virtio-scsi-test.c +++ b/tests/virtio-scsi-test.c @@ -35,7 +35,7 @@ typedef struct { static QOSState *qvirtio_scsi_start(const char *extra_opts) { const char *arch = qtest_get_arch(); - const char *cmd = "-drive id=drv0,if=none,file=/dev/null,format=raw " + const char *cmd = "-drive id=drv0,if=none,file=null-co://,format=raw " "-device virtio-scsi-pci,id=vs0 " "-device scsi-hd,bus=vs0.0,drive=drv0 %s"; @@ -195,7 +195,8 @@ static void hotplug(void) QDict *response; QOSState *qs; - qs = qvirtio_scsi_start("-drive id=drv1,if=none,file=/dev/null,format=raw"); + qs = qvirtio_scsi_start( + "-drive id=drv1,if=none,file=null-co://,format=raw"); response = qmp("{\"execute\": \"device_add\"," " \"arguments\": {" " \"driver\": \"scsi-hd\"," diff --git a/ui/Makefile.objs b/ui/Makefile.objs index 27566b32f1..aac6ae8bef 100644 --- a/ui/Makefile.objs +++ b/ui/Makefile.objs @@ -33,6 +33,7 @@ common-obj-y += shader.o common-obj-y += console-gl.o common-obj-y += egl-helpers.o common-obj-y += egl-context.o +common-obj-y += egl-headless.o ifeq ($(CONFIG_GTK_GL),y) common-obj-$(CONFIG_GTK) += gtk-gl-area.o else diff --git a/ui/cocoa.m b/ui/cocoa.m index 207555edf7..3a9bc4da5f 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -749,8 +749,8 @@ QemuCocoaView *cocoaView; * clicks in the titlebar. */ if ([self screenContainsPoint:p]) { - qemu_input_queue_abs(dcl->con, INPUT_AXIS_X, p.x, screen.width); - qemu_input_queue_abs(dcl->con, INPUT_AXIS_Y, screen.height - p.y, screen.height); + qemu_input_queue_abs(dcl->con, INPUT_AXIS_X, p.x, 0, screen.width); + qemu_input_queue_abs(dcl->con, INPUT_AXIS_Y, screen.height - p.y, 0, screen.height); } } else { qemu_input_queue_rel(dcl->con, INPUT_AXIS_X, (int)[event deltaX]); diff --git a/ui/egl-context.c b/ui/egl-context.c index 3a02b68d1a..2161969abe 100644 --- a/ui/egl-context.c +++ b/ui/egl-context.c @@ -7,9 +7,10 @@ QEMUGLContext qemu_egl_create_context(DisplayChangeListener *dcl, { EGLContext ctx; EGLint ctx_att[] = { - EGL_CONTEXT_CLIENT_VERSION, params->major_ver, - EGL_CONTEXT_MINOR_VERSION_KHR, params->minor_ver, - EGL_NONE + EGL_CONTEXT_OPENGL_PROFILE_MASK, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT, + EGL_CONTEXT_CLIENT_VERSION, params->major_ver, + EGL_CONTEXT_MINOR_VERSION_KHR, params->minor_ver, + EGL_NONE }; ctx = eglCreateContext(qemu_egl_display, qemu_egl_config, diff --git a/ui/egl-headless.c b/ui/egl-headless.c new file mode 100644 index 0000000000..d8d800f8a6 --- /dev/null +++ b/ui/egl-headless.c @@ -0,0 +1,158 @@ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "sysemu/sysemu.h" +#include "ui/console.h" +#include "ui/egl-helpers.h" +#include "ui/egl-context.h" + +typedef struct egl_dpy { + DisplayChangeListener dcl; + DisplaySurface *ds; + int width, height; + GLuint texture; + GLuint framebuffer; + GLuint blit_texture; + GLuint blit_framebuffer; + bool y_0_top; +} egl_dpy; + +static void egl_refresh(DisplayChangeListener *dcl) +{ + graphic_hw_update(dcl->con); +} + +static void egl_gfx_update(DisplayChangeListener *dcl, + int x, int y, int w, int h) +{ +} + +static void egl_gfx_switch(DisplayChangeListener *dcl, + struct DisplaySurface *new_surface) +{ + egl_dpy *edpy = container_of(dcl, egl_dpy, dcl); + + edpy->ds = new_surface; +} + +static void egl_scanout_disable(DisplayChangeListener *dcl) +{ + egl_dpy *edpy = container_of(dcl, egl_dpy, dcl); + + edpy->texture = 0; + /* XXX: delete framebuffers here ??? */ +} + +static void egl_scanout_texture(DisplayChangeListener *dcl, + uint32_t backing_id, + bool backing_y_0_top, + uint32_t backing_width, + uint32_t backing_height, + uint32_t x, uint32_t y, + uint32_t w, uint32_t h) +{ + egl_dpy *edpy = container_of(dcl, egl_dpy, dcl); + + edpy->texture = backing_id; + edpy->y_0_top = backing_y_0_top; + + /* source framebuffer */ + if (!edpy->framebuffer) { + glGenFramebuffers(1, &edpy->framebuffer); + } + glBindFramebuffer(GL_FRAMEBUFFER_EXT, edpy->framebuffer); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, + GL_TEXTURE_2D, edpy->texture, 0); + + /* dest framebuffer */ + if (!edpy->blit_framebuffer) { + glGenFramebuffers(1, &edpy->blit_framebuffer); + glGenTextures(1, &edpy->blit_texture); + edpy->width = 0; + edpy->height = 0; + } + if (edpy->width != backing_width || edpy->height != backing_height) { + edpy->width = backing_width; + edpy->height = backing_height; + glBindTexture(GL_TEXTURE_2D, edpy->blit_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, + edpy->width, edpy->height, + 0, GL_BGRA, GL_UNSIGNED_BYTE, 0); + glBindFramebuffer(GL_FRAMEBUFFER_EXT, edpy->blit_framebuffer); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, + GL_TEXTURE_2D, edpy->blit_texture, 0); + } +} + +static void egl_scanout_flush(DisplayChangeListener *dcl, + uint32_t x, uint32_t y, + uint32_t w, uint32_t h) +{ + egl_dpy *edpy = container_of(dcl, egl_dpy, dcl); + GLuint y1, y2; + + if (!edpy->texture || !edpy->ds) { + return; + } + assert(surface_width(edpy->ds) == edpy->width); + assert(surface_height(edpy->ds) == edpy->height); + assert(surface_format(edpy->ds) == PIXMAN_x8r8g8b8); + + /* blit framebuffer, flip if needed */ + glBindFramebuffer(GL_READ_FRAMEBUFFER, edpy->framebuffer); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, edpy->blit_framebuffer); + glViewport(0, 0, edpy->width, edpy->height); + y1 = edpy->y_0_top ? edpy->height : 0; + y2 = edpy->y_0_top ? 0 : edpy->height; + glBlitFramebuffer(0, y1, edpy->width, y2, + 0, 0, edpy->width, edpy->height, + GL_COLOR_BUFFER_BIT, GL_NEAREST); + + /* read pixels to surface */ + glBindFramebuffer(GL_READ_FRAMEBUFFER, edpy->blit_framebuffer); + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); + glReadPixels(0, 0, edpy->width, edpy->height, + GL_BGRA, GL_UNSIGNED_BYTE, surface_data(edpy->ds)); + + /* notify about updates */ + dpy_gfx_update(edpy->dcl.con, x, y, w, h); +} + +static const DisplayChangeListenerOps egl_ops = { + .dpy_name = "egl-headless", + .dpy_refresh = egl_refresh, + .dpy_gfx_update = egl_gfx_update, + .dpy_gfx_switch = egl_gfx_switch, + + .dpy_gl_ctx_create = qemu_egl_create_context, + .dpy_gl_ctx_destroy = qemu_egl_destroy_context, + .dpy_gl_ctx_make_current = qemu_egl_make_context_current, + .dpy_gl_ctx_get_current = qemu_egl_get_current_context, + + .dpy_gl_scanout_disable = egl_scanout_disable, + .dpy_gl_scanout_texture = egl_scanout_texture, + .dpy_gl_update = egl_scanout_flush, +}; + +void egl_headless_init(void) +{ + QemuConsole *con; + egl_dpy *edpy; + int idx; + + if (egl_rendernode_init(NULL) < 0) { + error_report("egl: render node init failed"); + exit(1); + } + + for (idx = 0;; idx++) { + con = qemu_console_lookup_by_index(idx); + if (!con || !qemu_console_is_graphic(con)) { + break; + } + + edpy = g_new0(egl_dpy, 1); + edpy->dcl.con = con; + edpy->dcl.ops = &egl_ops; + register_displaychangelistener(&edpy->dcl); + } +} diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c index b7b6b2e3cc..4a4d3370ee 100644 --- a/ui/egl-helpers.c +++ b/ui/egl-helpers.c @@ -26,18 +26,6 @@ EGLConfig qemu_egl_config; /* ---------------------------------------------------------------------- */ -static bool egl_gles; -static int egl_debug; - -#define egl_dbg(_x ...) \ - do { \ - if (egl_debug) { \ - fprintf(stderr, "egl: " _x); \ - } \ - } while (0); - -/* ---------------------------------------------------------------------- */ - #ifdef CONFIG_OPENGL_DMABUF int qemu_egl_rn_fd; @@ -92,6 +80,7 @@ static int qemu_egl_rendernode_open(const char *rendernode) int egl_rendernode_init(const char *rendernode) { qemu_egl_rn_fd = -1; + int rc; qemu_egl_rn_fd = qemu_egl_rendernode_open(rendernode); if (qemu_egl_rn_fd == -1) { @@ -105,7 +94,11 @@ int egl_rendernode_init(const char *rendernode) goto err; } - qemu_egl_init_dpy((EGLNativeDisplayType)qemu_egl_rn_gbm_dev, false, false); + rc = qemu_egl_init_dpy_mesa((EGLNativeDisplayType)qemu_egl_rn_gbm_dev); + if (rc != 0) { + /* qemu_egl_init_dpy_mesa reports error */ + goto err; + } if (!epoxy_has_egl_extension(qemu_egl_display, "EGL_KHR_surfaceless_context")) { @@ -171,8 +164,6 @@ EGLSurface qemu_egl_init_surface_x11(EGLContext ectx, Window win) EGLSurface esurface; EGLBoolean b; - egl_dbg("eglCreateWindowSurface (x11 win id 0x%lx) ...\n", - (unsigned long) win); esurface = eglCreateWindowSurface(qemu_egl_display, qemu_egl_config, (EGLNativeWindowType)win, NULL); @@ -220,20 +211,19 @@ EGLSurface qemu_egl_init_surface_x11(EGLContext ectx, Window win) * platform extensions (EGL_KHR_platform_gbm and friends) yet it doesn't seem * like mesa will be able to advertise these (even though it can do EGL 1.5). */ -static EGLDisplay qemu_egl_get_display(void *native) +static EGLDisplay qemu_egl_get_display(EGLNativeDisplayType native, + EGLenum platform) { EGLDisplay dpy = EGL_NO_DISPLAY; -#ifdef EGL_MESA_platform_gbm /* In practise any EGL 1.5 implementation would support the EXT extension */ if (epoxy_has_egl_extension(NULL, "EGL_EXT_platform_base")) { PFNEGLGETPLATFORMDISPLAYEXTPROC getPlatformDisplayEXT = (void *) eglGetProcAddress("eglGetPlatformDisplayEXT"); - if (getPlatformDisplayEXT) { - dpy = getPlatformDisplayEXT(EGL_PLATFORM_GBM_MESA, native, NULL); + if (getPlatformDisplayEXT && platform != 0) { + dpy = getPlatformDisplayEXT(platform, native, NULL); } } -#endif if (dpy == EGL_NO_DISPLAY) { /* fallback */ @@ -242,7 +232,8 @@ static EGLDisplay qemu_egl_get_display(void *native) return dpy; } -int qemu_egl_init_dpy(EGLNativeDisplayType dpy, bool gles, bool debug) +static int qemu_egl_init_dpy(EGLNativeDisplayType dpy, + EGLenum platform) { static const EGLint conf_att_gl[] = { EGL_SURFACE_TYPE, EGL_WINDOW_BIT, @@ -253,75 +244,66 @@ int qemu_egl_init_dpy(EGLNativeDisplayType dpy, bool gles, bool debug) EGL_ALPHA_SIZE, 0, EGL_NONE, }; - static const EGLint conf_att_gles[] = { - EGL_SURFACE_TYPE, EGL_WINDOW_BIT, - EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, - EGL_RED_SIZE, 5, - EGL_GREEN_SIZE, 5, - EGL_BLUE_SIZE, 5, - EGL_ALPHA_SIZE, 0, - EGL_NONE, - }; EGLint major, minor; EGLBoolean b; EGLint n; - if (debug) { - egl_debug = 1; - setenv("EGL_LOG_LEVEL", "debug", true); - setenv("LIBGL_DEBUG", "verbose", true); - } - - egl_dbg("qemu_egl_get_display (dpy %p) ...\n", dpy); - qemu_egl_display = qemu_egl_get_display(dpy); + qemu_egl_display = qemu_egl_get_display(dpy, platform); if (qemu_egl_display == EGL_NO_DISPLAY) { error_report("egl: eglGetDisplay failed"); return -1; } - egl_dbg("eglInitialize ...\n"); b = eglInitialize(qemu_egl_display, &major, &minor); if (b == EGL_FALSE) { error_report("egl: eglInitialize failed"); return -1; } - egl_dbg("eglBindAPI ...\n"); - b = eglBindAPI(gles ? EGL_OPENGL_ES_API : EGL_OPENGL_API); + b = eglBindAPI(EGL_OPENGL_API); if (b == EGL_FALSE) { error_report("egl: eglBindAPI failed"); return -1; } - egl_dbg("eglChooseConfig ...\n"); - b = eglChooseConfig(qemu_egl_display, - gles ? conf_att_gles : conf_att_gl, + b = eglChooseConfig(qemu_egl_display, conf_att_gl, &qemu_egl_config, 1, &n); if (b == EGL_FALSE || n != 1) { error_report("egl: eglChooseConfig failed"); return -1; } - - egl_gles = gles; return 0; } +int qemu_egl_init_dpy_x11(EGLNativeDisplayType dpy) +{ +#ifdef EGL_KHR_platform_x11 + return qemu_egl_init_dpy(dpy, EGL_PLATFORM_X11_KHR); +#else + return qemu_egl_init_dpy(dpy, 0); +#endif +} + +int qemu_egl_init_dpy_mesa(EGLNativeDisplayType dpy) +{ +#ifdef EGL_MESA_platform_gbm + return qemu_egl_init_dpy(dpy, EGL_PLATFORM_GBM_MESA); +#else + return qemu_egl_init_dpy(dpy, 0); +#endif +} + EGLContext qemu_egl_init_ctx(void) { static const EGLint ctx_att_gl[] = { + EGL_CONTEXT_OPENGL_PROFILE_MASK, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT, EGL_NONE }; - static const EGLint ctx_att_gles[] = { - EGL_CONTEXT_CLIENT_VERSION, 2, - EGL_NONE - }; - EGLContext ectx; EGLBoolean b; - egl_dbg("eglCreateContext ...\n"); ectx = eglCreateContext(qemu_egl_display, qemu_egl_config, EGL_NO_CONTEXT, - egl_gles ? ctx_att_gles : ctx_att_gl); + ctx_att_gl); if (ectx == EGL_NO_CONTEXT) { error_report("egl: eglCreateContext failed"); return NULL; diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c index d53288f027..cf48cca259 100644 --- a/ui/gtk-egl.c +++ b/ui/gtk-egl.c @@ -246,7 +246,7 @@ void gtk_egl_init(void) GdkDisplay *gdk_display = gdk_display_get_default(); Display *x11_display = gdk_x11_display_get_xdisplay(gdk_display); - if (qemu_egl_init_dpy(x11_display, false, false) < 0) { + if (qemu_egl_init_dpy_x11(x11_display) < 0) { return; } @@ -912,9 +912,9 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, return TRUE; } qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_X, x, - surface_width(vc->gfx.ds)); + 0, surface_width(vc->gfx.ds)); qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_Y, y, - surface_height(vc->gfx.ds)); + 0, surface_height(vc->gfx.ds)); qemu_input_event_sync(); } else if (s->last_set && s->ptr_owner == vc) { qemu_input_queue_rel(vc->gfx.dcl.con, INPUT_AXIS_X, x - s->last_x); diff --git a/ui/input-linux.c b/ui/input-linux.c index dc0613ca1f..49d52a69cc 100644 --- a/ui/input-linux.c +++ b/ui/input-linux.c @@ -169,6 +169,10 @@ struct InputLinux { bool has_abs_x; int num_keys; int num_btns; + int abs_x_min; + int abs_x_max; + int abs_y_min; + int abs_y_max; struct input_event event; int read_offset; @@ -314,6 +318,18 @@ static void input_linux_handle_mouse(InputLinux *il, struct input_event *event) break; } break; + case EV_ABS: + switch (event->code) { + case ABS_X: + qemu_input_queue_abs(NULL, INPUT_AXIS_X, event->value, + il->abs_x_min, il->abs_x_max); + break; + case ABS_Y: + qemu_input_queue_abs(NULL, INPUT_AXIS_Y, event->value, + il->abs_y_min, il->abs_y_max); + break; + } + break; case EV_SYN: qemu_input_event_sync(); if (il->wheel != 0) { @@ -351,7 +367,7 @@ static void input_linux_event(void *opaque) if (il->num_keys) { input_linux_handle_keyboard(il, &il->event); } - if (il->has_rel_x && il->num_btns) { + if ((il->has_rel_x || il->has_abs_x) && il->num_btns) { input_linux_handle_mouse(il, &il->event); } } @@ -364,6 +380,7 @@ static void input_linux_complete(UserCreatable *uc, Error **errp) uint8_t keymap[KEY_CNT / 8], keystate[KEY_CNT / 8]; unsigned int i; int rc, ver; + struct input_absinfo absinfo; if (!il->evdev) { error_setg(errp, "no input device specified"); @@ -402,6 +419,12 @@ static void input_linux_complete(UserCreatable *uc, Error **errp) rc = ioctl(il->fd, EVIOCGBIT(EV_ABS, sizeof(absmap)), &absmap); if (absmap & (1 << ABS_X)) { il->has_abs_x = true; + rc = ioctl(il->fd, EVIOCGABS(ABS_X), &absinfo); + il->abs_x_min = absinfo.minimum; + il->abs_x_max = absinfo.maximum; + rc = ioctl(il->fd, EVIOCGABS(ABS_Y), &absinfo); + il->abs_y_min = absinfo.minimum; + il->abs_y_max = absinfo.maximum; } } diff --git a/ui/input.c b/ui/input.c index 830f912f99..290ca9f54d 100644 --- a/ui/input.c +++ b/ui/input.c @@ -166,6 +166,11 @@ void qmp_input_send_event(bool has_device, const char *device, qemu_input_event_sync(); } +static int qemu_input_transform_invert_abs_value(int value) +{ + return (int64_t)INPUT_EVENT_ABS_MAX - value + INPUT_EVENT_ABS_MIN; +} + static void qemu_input_transform_abs_rotate(InputEvent *evt) { InputMoveEvent *move = evt->u.abs.data; @@ -175,16 +180,16 @@ static void qemu_input_transform_abs_rotate(InputEvent *evt) move->axis = INPUT_AXIS_Y; } else if (move->axis == INPUT_AXIS_Y) { move->axis = INPUT_AXIS_X; - move->value = INPUT_EVENT_ABS_SIZE - 1 - move->value; + move->value = qemu_input_transform_invert_abs_value(move->value); } break; case 180: - move->value = INPUT_EVENT_ABS_SIZE - 1 - move->value; + move->value = qemu_input_transform_invert_abs_value(move->value); break; case 270: if (move->axis == INPUT_AXIS_X) { move->axis = INPUT_AXIS_Y; - move->value = INPUT_EVENT_ABS_SIZE - 1 - move->value; + move->value = qemu_input_transform_invert_abs_value(move->value); } else if (move->axis == INPUT_AXIS_Y) { move->axis = INPUT_AXIS_X; } @@ -467,12 +472,17 @@ bool qemu_input_is_absolute(void) return (s != NULL) && (s->handler->mask & INPUT_EVENT_MASK_ABS); } -int qemu_input_scale_axis(int value, int size_in, int size_out) +int qemu_input_scale_axis(int value, + int min_in, int max_in, + int min_out, int max_out) { - if (size_in < 2) { - return size_out / 2; + int64_t range_in = (int64_t)max_in - min_in; + int64_t range_out = (int64_t)max_out - min_out; + + if (range_in < 1) { + return min_out + range_out / 2; } - return (int64_t)value * (size_out - 1) / (size_in - 1); + return ((int64_t)value - min_in) * range_out / range_in + min_out; } InputEvent *qemu_input_event_new_move(InputEventKind kind, @@ -496,10 +506,13 @@ void qemu_input_queue_rel(QemuConsole *src, InputAxis axis, int value) qapi_free_InputEvent(evt); } -void qemu_input_queue_abs(QemuConsole *src, InputAxis axis, int value, int size) +void qemu_input_queue_abs(QemuConsole *src, InputAxis axis, int value, + int min_in, int max_in) { InputEvent *evt; - int scaled = qemu_input_scale_axis(value, size, INPUT_EVENT_ABS_SIZE); + int scaled = qemu_input_scale_axis(value, min_in, max_in, + INPUT_EVENT_ABS_MIN, + INPUT_EVENT_ABS_MAX); evt = qemu_input_event_new_move(INPUT_EVENT_KIND_ABS, axis, scaled); qemu_input_event_send(src, evt); qapi_free_InputEvent(evt); @@ -490,9 +490,9 @@ static void sdl_send_mouse_event(int dx, int dy, int x, int y, int state) if (qemu_input_is_absolute()) { qemu_input_queue_abs(dcl->con, INPUT_AXIS_X, x, - real_screen->w); + 0, real_screen->w); qemu_input_queue_abs(dcl->con, INPUT_AXIS_Y, y, - real_screen->h); + 0, real_screen->h); } else { if (guest_cursor) { x -= guest_x; @@ -298,8 +298,8 @@ static void sdl_send_mouse_event(struct sdl2_console *scon, int dx, int dy, } } } - qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_X, off_x + x, max_w); - qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_Y, off_y + y, max_h); + qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_X, off_x + x, 0, max_w); + qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_Y, off_y + y, 0, max_h); } else { if (guest_cursor) { x -= guest_x; diff --git a/ui/spice-input.c b/ui/spice-input.c index 8eeebdbb2e..86293dd2ce 100644 --- a/ui/spice-input.c +++ b/ui/spice-input.c @@ -172,8 +172,8 @@ static void tablet_position(SpiceTabletInstance* sin, int x, int y, QemuSpicePointer *pointer = container_of(sin, QemuSpicePointer, tablet); spice_update_buttons(pointer, 0, buttons_state); - qemu_input_queue_abs(NULL, INPUT_AXIS_X, x, pointer->width); - qemu_input_queue_abs(NULL, INPUT_AXIS_Y, y, pointer->height); + qemu_input_queue_abs(NULL, INPUT_AXIS_X, x, 0, pointer->width); + qemu_input_queue_abs(NULL, INPUT_AXIS_Y, y, 0, pointer->height); qemu_input_event_sync(); } diff --git a/ui/vnc-enc-zrle.c b/ui/vnc-enc-zrle.c index 5489870e70..fd63d4f688 100644 --- a/ui/vnc-enc-zrle.c +++ b/ui/vnc-enc-zrle.c @@ -163,7 +163,6 @@ static void zrle_choose_palette_rle(VncState *vs, int w, int h, if (packed_bytes < estimated_bytes) { *use_rle = false; *use_palette = true; - estimated_bytes = packed_bytes; } } } @@ -1556,8 +1556,8 @@ static void pointer_event(VncState *vs, int button_mask, int x, int y) } if (vs->absolute) { - qemu_input_queue_abs(con, INPUT_AXIS_X, x, width); - qemu_input_queue_abs(con, INPUT_AXIS_Y, y, height); + qemu_input_queue_abs(con, INPUT_AXIS_X, x, 0, width); + qemu_input_queue_abs(con, INPUT_AXIS_Y, y, 0, height); } else if (vnc_has_feature(vs, VNC_FEATURE_POINTER_TYPE_CHANGE)) { qemu_input_queue_rel(con, INPUT_AXIS_X, x - 0x7FFF); qemu_input_queue_rel(con, INPUT_AXIS_Y, y - 0x7FFF); @@ -2061,15 +2061,15 @@ static void set_pixel_format(VncState *vs, int bits_per_pixel, } vs->client_pf.rmax = red_max ? red_max : 0xFF; - vs->client_pf.rbits = hweight_long(red_max); + vs->client_pf.rbits = ctpopl(red_max); vs->client_pf.rshift = red_shift; vs->client_pf.rmask = red_max << red_shift; vs->client_pf.gmax = green_max ? green_max : 0xFF; - vs->client_pf.gbits = hweight_long(green_max); + vs->client_pf.gbits = ctpopl(green_max); vs->client_pf.gshift = green_shift; vs->client_pf.gmask = green_max << green_shift; vs->client_pf.bmax = blue_max ? blue_max : 0xFF; - vs->client_pf.bbits = hweight_long(blue_max); + vs->client_pf.bbits = ctpopl(blue_max); vs->client_pf.bshift = blue_shift; vs->client_pf.bmask = blue_max << blue_shift; vs->client_pf.bits_per_pixel = bits_per_pixel; diff --git a/util/coroutine-gthread.c b/util/coroutine-gthread.c deleted file mode 100644 index 62bfb4015d..0000000000 --- a/util/coroutine-gthread.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * GThread coroutine initialization code - * - * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> - * Copyright (C) 2011 Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.0 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "qemu/coroutine_int.h" - -typedef struct { - Coroutine base; - GThread *thread; - bool runnable; - bool free_on_thread_exit; - CoroutineAction action; -} CoroutineGThread; - -static CompatGMutex coroutine_lock; -static CompatGCond coroutine_cond; - -/* GLib 2.31 and beyond deprecated various parts of the thread API, - * but the new interfaces are not available in older GLib versions - * so we have to cope with both. - */ -#if GLIB_CHECK_VERSION(2, 31, 0) -/* Awkwardly, the GPrivate API doesn't provide a way to update the - * GDestroyNotify handler for the coroutine key dynamically. So instead - * we track whether or not the CoroutineGThread should be freed on - * thread exit / coroutine key update using the free_on_thread_exit - * field. - */ -static void coroutine_destroy_notify(gpointer data) -{ - CoroutineGThread *co = data; - if (co && co->free_on_thread_exit) { - g_free(co); - } -} - -static GPrivate coroutine_key = G_PRIVATE_INIT(coroutine_destroy_notify); - -static inline CoroutineGThread *get_coroutine_key(void) -{ - return g_private_get(&coroutine_key); -} - -static inline void set_coroutine_key(CoroutineGThread *co, - bool free_on_thread_exit) -{ - /* Unlike g_static_private_set() this does not call the GDestroyNotify - * if the previous value of the key was NULL. Fortunately we only need - * the GDestroyNotify in the non-NULL key case. - */ - co->free_on_thread_exit = free_on_thread_exit; - g_private_replace(&coroutine_key, co); -} - -static inline GThread *create_thread(GThreadFunc func, gpointer data) -{ - return g_thread_new("coroutine", func, data); -} - -#else - -/* Handle older GLib versions */ - -static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT; - -static inline CoroutineGThread *get_coroutine_key(void) -{ - return g_static_private_get(&coroutine_key); -} - -static inline void set_coroutine_key(CoroutineGThread *co, - bool free_on_thread_exit) -{ - g_static_private_set(&coroutine_key, co, - free_on_thread_exit ? (GDestroyNotify)g_free : NULL); -} - -static inline GThread *create_thread(GThreadFunc func, gpointer data) -{ - return g_thread_create_full(func, data, 0, TRUE, TRUE, - G_THREAD_PRIORITY_NORMAL, NULL); -} - -#endif - - -static void __attribute__((constructor)) coroutine_init(void) -{ -#if !GLIB_CHECK_VERSION(2, 31, 0) - if (!g_thread_supported()) { - g_thread_init(NULL); - } -#endif -} - -static void coroutine_wait_runnable_locked(CoroutineGThread *co) -{ - while (!co->runnable) { - g_cond_wait(&coroutine_cond, &coroutine_lock); - } -} - -static void coroutine_wait_runnable(CoroutineGThread *co) -{ - g_mutex_lock(&coroutine_lock); - coroutine_wait_runnable_locked(co); - g_mutex_unlock(&coroutine_lock); -} - -static gpointer coroutine_thread(gpointer opaque) -{ - CoroutineGThread *co = opaque; - - set_coroutine_key(co, false); - coroutine_wait_runnable(co); - co->base.entry(co->base.entry_arg); - qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE); - return NULL; -} - -Coroutine *qemu_coroutine_new(void) -{ - CoroutineGThread *co; - - co = g_malloc0(sizeof(*co)); - co->thread = create_thread(coroutine_thread, co); - if (!co->thread) { - g_free(co); - return NULL; - } - return &co->base; -} - -void qemu_coroutine_delete(Coroutine *co_) -{ - CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_); - - g_thread_join(co->thread); - g_free(co); -} - -CoroutineAction qemu_coroutine_switch(Coroutine *from_, - Coroutine *to_, - CoroutineAction action) -{ - CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_); - CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_); - - g_mutex_lock(&coroutine_lock); - from->runnable = false; - from->action = action; - to->runnable = true; - to->action = action; - g_cond_broadcast(&coroutine_cond); - - if (action != COROUTINE_TERMINATE) { - coroutine_wait_runnable_locked(from); - } - g_mutex_unlock(&coroutine_lock); - return from->action; -} - -Coroutine *qemu_coroutine_self(void) -{ - CoroutineGThread *co = get_coroutine_key(); - if (!co) { - co = g_malloc0(sizeof(*co)); - co->runnable = true; - set_coroutine_key(co, true); - } - - return &co->base; -} - -bool qemu_in_coroutine(void) -{ - CoroutineGThread *co = get_coroutine_key(); - - return co && co->base.caller; -} diff --git a/util/envlist.c b/util/envlist.c index e86857e70a..1eeb7fca87 100644 --- a/util/envlist.c +++ b/util/envlist.c @@ -17,16 +17,14 @@ static int envlist_parse(envlist_t *envlist, const char *env, int (*)(envlist_t *, const char *)); /* - * Allocates new envlist and returns pointer to that or - * NULL in case of error. + * Allocates new envlist and returns pointer to it. */ envlist_t * envlist_create(void) { envlist_t *envlist; - if ((envlist = malloc(sizeof (*envlist))) == NULL) - return (NULL); + envlist = g_malloc(sizeof(*envlist)); QLIST_INIT(&envlist->el_entries); envlist->el_count = 0; @@ -48,10 +46,10 @@ envlist_free(envlist_t *envlist) entry = envlist->el_entries.lh_first; QLIST_REMOVE(entry, ev_link); - free((char *)entry->ev_var); - free(entry); + g_free((char *)entry->ev_var); + g_free(entry); } - free(envlist); + g_free(envlist); } /* @@ -101,8 +99,7 @@ envlist_parse(envlist_t *envlist, const char *env, if ((envlist == NULL) || (env == NULL)) return (EINVAL); - if ((tmpenv = strdup(env)) == NULL) - return (errno); + tmpenv = g_strdup(env); envsave = tmpenv; do { @@ -117,7 +114,7 @@ envlist_parse(envlist_t *envlist, const char *env, tmpenv = envvar + 1; } while (envvar != NULL); - free(envsave); + g_free(envsave); return ret; } @@ -155,18 +152,14 @@ envlist_setenv(envlist_t *envlist, const char *env) if (entry != NULL) { QLIST_REMOVE(entry, ev_link); - free((char *)entry->ev_var); - free(entry); + g_free((char *)entry->ev_var); + g_free(entry); } else { envlist->el_count++; } - if ((entry = malloc(sizeof (*entry))) == NULL) - return (errno); - if ((entry->ev_var = strdup(env)) == NULL) { - free(entry); - return (errno); - } + entry = g_malloc(sizeof(*entry)); + entry->ev_var = g_strdup(env); QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link); return (0); @@ -201,8 +194,8 @@ envlist_unsetenv(envlist_t *envlist, const char *env) } if (entry != NULL) { QLIST_REMOVE(entry, ev_link); - free((char *)entry->ev_var); - free(entry); + g_free((char *)entry->ev_var); + g_free(entry); envlist->el_count--; } @@ -212,12 +205,12 @@ envlist_unsetenv(envlist_t *envlist, const char *env) /* * Returns given envlist as array of strings (in same form that * global variable environ is). Caller must free returned memory - * by calling free(3) for each element and for the array. Returned - * array and given envlist are not related (no common references). + * by calling g_free for each element and the array. + * Returned array and given envlist are not related (no common + * references). * * If caller provides count pointer, number of items in array is - * stored there. In case of error, NULL is returned and no memory - * is allocated. + * stored there. */ char ** envlist_to_environ(const envlist_t *envlist, size_t *count) @@ -225,13 +218,11 @@ envlist_to_environ(const envlist_t *envlist, size_t *count) struct envlist_entry *entry; char **env, **penv; - penv = env = malloc((envlist->el_count + 1) * sizeof (char *)); - if (env == NULL) - return (NULL); + penv = env = g_malloc((envlist->el_count + 1) * sizeof(char *)); for (entry = envlist->el_entries.lh_first; entry != NULL; entry = entry->ev_link.le_next) { - *(penv++) = strdup(entry->ev_var); + *(penv++) = g_strdup(entry->ev_var); } *penv = NULL; /* NULL terminate the list */ diff --git a/util/osdep.c b/util/osdep.c index 06fb1cfda6..a2863c8e53 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -38,6 +38,14 @@ extern int madvise(caddr_t, size_t, int); #include "qemu/error-report.h" #include "monitor/monitor.h" +#ifdef F_OFD_SETLK +#define QEMU_SETLK F_OFD_SETLK +#define QEMU_GETLK F_OFD_GETLK +#else +#define QEMU_SETLK F_SETLK +#define QEMU_GETLK F_GETLK +#endif + static bool fips_enabled = false; static const char *hw_version = QEMU_HW_VERSION; @@ -140,6 +148,46 @@ static int qemu_parse_fdset(const char *param) { return qemu_parse_fd(param); } + +static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type) +{ + int ret; + struct flock fl = { + .l_whence = SEEK_SET, + .l_start = start, + .l_len = len, + .l_type = fl_type, + }; + ret = fcntl(fd, QEMU_SETLK, &fl); + return ret == -1 ? -errno : 0; +} + +int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive) +{ + return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK); +} + +int qemu_unlock_fd(int fd, int64_t start, int64_t len) +{ + return qemu_lock_fcntl(fd, start, len, F_UNLCK); +} + +int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive) +{ + int ret; + struct flock fl = { + .l_whence = SEEK_SET, + .l_start = start, + .l_len = len, + .l_type = exclusive ? F_WRLCK : F_RDLCK, + }; + ret = fcntl(fd, QEMU_GETLK, &fl); + if (ret == -1) { + return -errno; + } else { + return fl.l_type == F_UNLCK ? 0 : -EAGAIN; + } +} #endif /* @@ -2050,6 +2050,7 @@ typedef enum DisplayType { DT_SDL, DT_COCOA, DT_GTK, + DT_EGL, DT_NONE, } DisplayType; @@ -2127,6 +2128,15 @@ static DisplayType select_display(const char *p) error_report("VNC requires a display argument vnc=<display>"); exit(1); } + } else if (strstart(p, "egl-headless", &opts)) { +#ifdef CONFIG_OPENGL + request_opengl = 1; + display_opengl = 1; + display = DT_EGL; +#else + fprintf(stderr, "egl support is disabled\n"); + exit(1); +#endif } else if (strstart(p, "curses", &opts)) { #ifdef CONFIG_CURSES display = DT_CURSES; @@ -3524,10 +3534,11 @@ int main(int argc, char **argv, char **envp) exit(1); } fsdev = qemu_opts_create(qemu_find_opts("fsdev"), + qemu_opts_id(opts) ?: qemu_opt_get(opts, "mount_tag"), 1, NULL); if (!fsdev) { - error_report("duplicate fsdev id: %s", + error_report("duplicate or invalid fsdev id: %s", qemu_opt_get(opts, "mount_tag")); exit(1); } @@ -3565,7 +3576,7 @@ int main(int argc, char **argv, char **envp) &error_abort); qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort); qemu_opt_set(device, "fsdev", - qemu_opt_get(opts, "mount_tag"), &error_abort); + qemu_opts_id(fsdev), &error_abort); qemu_opt_set(device, "mount_tag", qemu_opt_get(opts, "mount_tag"), &error_abort); break; @@ -4503,7 +4514,7 @@ int main(int argc, char **argv, char **envp) default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); - parse_numa_opts(machine_class); + parse_numa_opts(current_machine); if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, NULL)) { @@ -4559,7 +4570,7 @@ int main(int argc, char **argv, char **envp) current_machine->boot_order = boot_order; current_machine->cpu_model = cpu_model; - machine_class->init(current_machine); + machine_run_board_init(current_machine); realtime_init(); @@ -4592,8 +4603,6 @@ int main(int argc, char **argv, char **envp) cpu_synchronize_all_post_init(); - numa_post_machine_init(); - rom_reset_order_override(); /* @@ -4659,6 +4668,12 @@ int main(int argc, char **argv, char **envp) qemu_spice_display_init(); } +#ifdef CONFIG_OPENGL + if (display_type == DT_EGL) { + egl_headless_init(); + } +#endif + if (foreach_device_config(DEV_GDB, gdbserver_start) < 0) { exit(1); } |