diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2016-10-04 14:25:08 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2016-10-04 14:25:08 +0100 |
commit | bbc4c3f4f3c624e2de64fdcb79f4dd8c1a508e9d (patch) | |
tree | acd2201a7f374badd8c9f16c9917a6e93af40285 | |
parent | 6e11eb2d2b96790e647aa4c744ed2ed03a77fbbd (diff) | |
parent | 7d992e4d5a95d0b21c6c33bd32cef8671805e39b (diff) |
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches
# gpg: Signature made Thu 29 Sep 2016 14:11:30 BST
# gpg: using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* remotes/kevin/tags/for-upstream:
oslib-posix: add a configure switch to debug stack usage
coroutine-sigaltstack: use helper for allocating stack memory
coroutine-ucontext: use helper for allocating stack memory
coroutine: add a macro for the coroutine stack size
coroutine-sigaltstack: rename coroutine struct appropriately
oslib-posix: add helpers for stack alloc and free
block: Remove qemu_root_bds_opts
block: Move 'discard' option to bdrv_open_common()
block: Use 'detect-zeroes' option for 'blockdev-change-medium'
block: Parse 'detect-zeroes' in bdrv_open_common()
block/qapi: Move 'aio' option to file driver
block/qapi: Use separate options type for curl driver
block: Drop aio/cache consistency check from qmp_blockdev_add()
block: Fix error path in qmp_blockdev_change_medium()
block-backend: remove blk_flush_all
qemu: use bdrv_flush_all for vm_stop et al
block: reintroduce bdrv_flush_all
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | block.c | 50 | ||||
-rw-r--r-- | block/block-backend.c | 31 | ||||
-rw-r--r-- | block/io.c | 25 | ||||
-rw-r--r-- | block/raw-posix.c | 44 | ||||
-rw-r--r-- | block/raw-win32.c | 56 | ||||
-rw-r--r-- | blockdev.c | 112 | ||||
-rwxr-xr-x | configure | 19 | ||||
-rw-r--r-- | cpus.c | 4 | ||||
-rw-r--r-- | hw/i386/xen/xen_platform.c | 2 | ||||
-rw-r--r-- | hw/ide/piix.c | 4 | ||||
-rw-r--r-- | include/block/block.h | 2 | ||||
-rw-r--r-- | include/qemu/coroutine_int.h | 2 | ||||
-rw-r--r-- | include/sysemu/block-backend.h | 3 | ||||
-rw-r--r-- | include/sysemu/os-posix.h | 27 | ||||
-rw-r--r-- | qapi/block-core.json | 31 | ||||
-rwxr-xr-x | tests/qemu-iotests/087 | 4 | ||||
-rw-r--r-- | tests/qemu-iotests/087.out | 2 | ||||
-rw-r--r-- | util/coroutine-sigaltstack.c | 25 | ||||
-rw-r--r-- | util/coroutine-ucontext.c | 11 | ||||
-rw-r--r-- | util/coroutine-win32.c | 2 | ||||
-rw-r--r-- | util/oslib-posix.c | 77 |
21 files changed, 342 insertions, 191 deletions
@@ -42,6 +42,7 @@ #include "qapi-event.h" #include "qemu/cutils.h" #include "qemu/id.h" +#include "qapi/util.h" #ifdef CONFIG_BSD #include <sys/ioctl.h> @@ -764,7 +765,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, /* Our block drivers take care to send flushes and respect unmap policy, * so we can default to enable both on lower layers regardless of the * corresponding parent options. */ - flags |= BDRV_O_UNMAP; + qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); /* Clear flags that only apply to the top layer */ flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ | @@ -954,6 +955,16 @@ static QemuOptsList bdrv_runtime_opts = { .type = QEMU_OPT_BOOL, .help = "Node is opened in read-only mode", }, + { + .name = "detect-zeroes", + .type = QEMU_OPT_STRING, + .help = "try to optimize zero writes (off, on, unmap)", + }, + { + .name = "discard", + .type = QEMU_OPT_STRING, + .help = "discard operation (ignore/off, unmap/on)", + }, { /* end of list */ } }, }; @@ -970,6 +981,8 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, const char *filename; const char *driver_name = NULL; const char *node_name = NULL; + const char *discard; + const char *detect_zeroes; QemuOpts *opts; BlockDriver *drv; Error *local_err = NULL; @@ -1038,6 +1051,41 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, } } + discard = qemu_opt_get(opts, "discard"); + if (discard != NULL) { + if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { + error_setg(errp, "Invalid discard option"); + ret = -EINVAL; + goto fail_opts; + } + } + + detect_zeroes = qemu_opt_get(opts, "detect-zeroes"); + if (detect_zeroes) { + BlockdevDetectZeroesOptions value = + qapi_enum_parse(BlockdevDetectZeroesOptions_lookup, + detect_zeroes, + BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX, + BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail_opts; + } + + if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && + !(bs->open_flags & BDRV_O_UNMAP)) + { + error_setg(errp, "setting detect-zeroes to unmap is not allowed " + "without setting discard operation to unmap"); + ret = -EINVAL; + goto fail_opts; + } + + bs->detect_zeroes = value; + } + if (filename != NULL) { pstrcpy(bs->filename, sizeof(bs->filename), filename); } else { diff --git a/block/block-backend.c b/block/block-backend.c index 0bd19abdfb..639294b8e6 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1592,13 +1592,12 @@ void blk_update_root_state(BlockBackend *blk) } /* - * Applies the information in the root state to the given BlockDriverState. This - * does not include the flags which have to be specified for bdrv_open(), use - * blk_get_open_flags_from_root_state() to inquire them. + * Returns the detect-zeroes setting to be used for bdrv_open() of a + * BlockDriverState which is supposed to inherit the root state. */ -void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs) +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) { - bs->detect_zeroes = blk->root_state.detect_zeroes; + return blk->root_state.detect_zeroes; } /* @@ -1640,28 +1639,6 @@ int blk_commit_all(void) return 0; } -int blk_flush_all(void) -{ - BlockBackend *blk = NULL; - int result = 0; - - while ((blk = blk_all_next(blk)) != NULL) { - AioContext *aio_context = blk_get_aio_context(blk); - int ret; - - aio_context_acquire(aio_context); - if (blk_is_inserted(blk)) { - ret = blk_flush(blk); - if (ret < 0 && !result) { - result = ret; - } - } - aio_context_release(aio_context); - } - - return result; -} - /* throttling disk I/O limits */ void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg) diff --git a/block/io.c b/block/io.c index fdf70807b0..57a2eeb512 100644 --- a/block/io.c +++ b/block/io.c @@ -1619,6 +1619,31 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, BDRV_REQ_ZERO_WRITE | flags); } +/* + * Flush ALL BDSes regardless of if they are reachable via a BlkBackend or not. + */ +int bdrv_flush_all(void) +{ + BdrvNextIterator it; + BlockDriverState *bs = NULL; + int result = 0; + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *aio_context = bdrv_get_aio_context(bs); + int ret; + + aio_context_acquire(aio_context); + ret = bdrv_flush(bs); + if (ret < 0 && !result) { + result = ret; + } + aio_context_release(aio_context); + } + + return result; +} + + typedef struct BdrvCoGetBlockStatusData { BlockDriverState *bs; BlockDriverState *base; diff --git a/block/raw-posix.c b/block/raw-posix.c index 6ed7547392..166e9d1ad5 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -143,6 +143,7 @@ typedef struct BDRVRawState { bool has_discard:1; bool has_write_zeroes:1; bool discard_zeroes:1; + bool use_linux_aio:1; bool has_fallocate; bool needs_alignment; } BDRVRawState; @@ -367,18 +368,6 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags) } } -#ifdef CONFIG_LINUX_AIO -static bool raw_use_aio(int bdrv_flags) -{ - /* - * Currently Linux do AIO only for files opened with O_DIRECT - * specified so check NOCACHE flag too - */ - return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) == - (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO); -} -#endif - static void raw_parse_filename(const char *filename, QDict *options, Error **errp) { @@ -399,6 +388,11 @@ static QemuOptsList raw_runtime_opts = { .type = QEMU_OPT_STRING, .help = "File name of the image", }, + { + .name = "aio", + .type = QEMU_OPT_STRING, + .help = "host AIO implementation (threads, native)", + }, { /* end of list */ } }, }; @@ -410,6 +404,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, QemuOpts *opts; Error *local_err = NULL; const char *filename = NULL; + BlockdevAioOptions aio, aio_default; int fd, ret; struct stat st; @@ -429,6 +424,18 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, goto fail; } + aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO) + ? BLOCKDEV_AIO_OPTIONS_NATIVE + : BLOCKDEV_AIO_OPTIONS_THREADS; + aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"), + BLOCKDEV_AIO_OPTIONS__MAX, aio_default, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE); + s->open_flags = open_flags; raw_parse_flags(bdrv_flags, &s->open_flags); @@ -444,14 +451,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, s->fd = fd; #ifdef CONFIG_LINUX_AIO - if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) { + /* Currently Linux does AIO only for files opened with O_DIRECT */ + if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) { error_setg(errp, "aio=native was specified, but it requires " "cache.direct=on, which was not specified."); ret = -EINVAL; goto fail; } #else - if (bdrv_flags & BDRV_O_NATIVE_AIO) { + if (s->use_linux_aio) { error_setg(errp, "aio=native was specified, but is not supported " "in this build."); ret = -EINVAL; @@ -1256,7 +1264,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, if (!bdrv_qiov_is_aligned(bs, qiov)) { type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO - } else if (bs->open_flags & BDRV_O_NATIVE_AIO) { + } else if (s->use_linux_aio) { LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); assert(qiov->size == bytes); return laio_co_submit(bs, aio, s->fd, offset, qiov, type); @@ -1285,7 +1293,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, static void raw_aio_plug(BlockDriverState *bs) { #ifdef CONFIG_LINUX_AIO - if (bs->open_flags & BDRV_O_NATIVE_AIO) { + BDRVRawState *s = bs->opaque; + if (s->use_linux_aio) { LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); laio_io_plug(bs, aio); } @@ -1295,7 +1304,8 @@ static void raw_aio_plug(BlockDriverState *bs) static void raw_aio_unplug(BlockDriverState *bs) { #ifdef CONFIG_LINUX_AIO - if (bs->open_flags & BDRV_O_NATIVE_AIO) { + BDRVRawState *s = bs->opaque; + if (s->use_linux_aio) { LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); laio_io_unplug(bs, aio); } diff --git a/block/raw-win32.c b/block/raw-win32.c index 56f45fea9e..734bb105bd 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -32,6 +32,7 @@ #include "block/thread-pool.h" #include "qemu/iov.h" #include "qapi/qmp/qstring.h" +#include "qapi/util.h" #include <windows.h> #include <winioctl.h> @@ -252,7 +253,8 @@ static void raw_probe_alignment(BlockDriverState *bs, Error **errp) } } -static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) +static void raw_parse_flags(int flags, bool use_aio, int *access_flags, + DWORD *overlapped) { assert(access_flags != NULL); assert(overlapped != NULL); @@ -264,7 +266,7 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) } *overlapped = FILE_ATTRIBUTE_NORMAL; - if (flags & BDRV_O_NATIVE_AIO) { + if (use_aio) { *overlapped |= FILE_FLAG_OVERLAPPED; } if (flags & BDRV_O_NOCACHE) { @@ -292,10 +294,35 @@ static QemuOptsList raw_runtime_opts = { .type = QEMU_OPT_STRING, .help = "File name of the image", }, + { + .name = "aio", + .type = QEMU_OPT_STRING, + .help = "host AIO implementation (threads, native)", + }, { /* end of list */ } }, }; +static bool get_aio_option(QemuOpts *opts, int flags, Error **errp) +{ + BlockdevAioOptions aio, aio_default; + + aio_default = (flags & BDRV_O_NATIVE_AIO) ? BLOCKDEV_AIO_OPTIONS_NATIVE + : BLOCKDEV_AIO_OPTIONS_THREADS; + aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"), + BLOCKDEV_AIO_OPTIONS__MAX, aio_default, errp); + + switch (aio) { + case BLOCKDEV_AIO_OPTIONS_NATIVE: + return true; + case BLOCKDEV_AIO_OPTIONS_THREADS: + return false; + default: + error_setg(errp, "Invalid AIO option"); + } + return false; +} + static int raw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -305,6 +332,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, QemuOpts *opts; Error *local_err = NULL; const char *filename; + bool use_aio; int ret; s->type = FTYPE_FILE; @@ -319,7 +347,14 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, filename = qemu_opt_get(opts, "filename"); - raw_parse_flags(flags, &access_flags, &overlapped); + use_aio = get_aio_option(opts, flags, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + raw_parse_flags(flags, use_aio, &access_flags, &overlapped); if (filename[0] && filename[1] == ':') { snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]); @@ -346,7 +381,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - if (flags & BDRV_O_NATIVE_AIO) { + if (use_aio) { s->aio = win32_aio_init(); if (s->aio == NULL) { CloseHandle(s->hfile); @@ -647,6 +682,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; const char *filename; + bool use_aio; QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); @@ -659,6 +695,16 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, filename = qemu_opt_get(opts, "filename"); + use_aio = get_aio_option(opts, flags, &local_err); + if (!local_err && use_aio) { + error_setg(&local_err, "AIO is not supported on Windows host devices"); + } + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto done; + } + if (strstart(filename, "/dev/cdrom", NULL)) { if (find_cdrom(device_name, sizeof(device_name)) < 0) { error_setg(errp, "Could not open CD-ROM drive"); @@ -677,7 +723,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, } s->type = find_device_type(bs, filename); - raw_parse_flags(flags, &access_flags, &overlapped); + raw_parse_flags(flags, use_aio, &access_flags, &overlapped); create_flags = OPEN_EXISTING; diff --git a/blockdev.c b/blockdev.c index 29c6561fd8..07ec733905 100644 --- a/blockdev.c +++ b/blockdev.c @@ -356,7 +356,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, const char **throttling_group, ThrottleConfig *throttle_cfg, BlockdevDetectZeroesOptions *detect_zeroes, Error **errp) { - const char *discard; Error *local_error = NULL; const char *aio; @@ -365,13 +364,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, *bdrv_flags |= BDRV_O_COPY_ON_READ; } - if ((discard = qemu_opt_get(opts, "discard")) != NULL) { - if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) { - error_setg(errp, "Invalid discard option"); - return; - } - } - if ((aio = qemu_opt_get(opts, "aio")) != NULL) { if (!strcmp(aio, "native")) { *bdrv_flags |= BDRV_O_NATIVE_AIO; @@ -449,15 +441,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, error_propagate(errp, local_error); return; } - - if (bdrv_flags && - *detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && - !(*bdrv_flags & BDRV_O_UNMAP)) - { - error_setg(errp, "setting detect-zeroes to unmap is not allowed " - "without setting discard operation to unmap"); - return; - } } } @@ -650,35 +633,11 @@ err_no_opts: return NULL; } -static QemuOptsList qemu_root_bds_opts; - /* Takes the ownership of bs_opts */ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) { - BlockDriverState *bs; - QemuOpts *opts; - Error *local_error = NULL; - BlockdevDetectZeroesOptions detect_zeroes; int bdrv_flags = 0; - opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp); - if (!opts) { - goto fail; - } - - qemu_opts_absorb_qdict(opts, bs_opts, &local_error); - if (local_error) { - error_propagate(errp, local_error); - goto fail; - } - - extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL, - &detect_zeroes, &local_error); - if (local_error) { - error_propagate(errp, local_error); - goto fail; - } - /* bdrv_open() defaults to the values in bdrv_flags (for compatibility * with other callers) rather than what we want as the real defaults. * Apply the defaults here instead. */ @@ -690,21 +649,7 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) bdrv_flags |= BDRV_O_INACTIVE; } - bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); - if (!bs) { - goto fail_no_bs_opts; - } - - bs->detect_zeroes = detect_zeroes; - -fail_no_bs_opts: - qemu_opts_del(opts); - return bs; - -fail: - qemu_opts_del(opts); - QDECREF(bs_opts); - return NULL; + return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); } void blockdev_close_all_bdrv_states(void) @@ -2549,6 +2494,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, BlockBackend *blk; BlockDriverState *medium_bs = NULL; int bdrv_flags; + bool detect_zeroes; int rc; QDict *options = NULL; Error *err = NULL; @@ -2588,8 +2534,12 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, abort(); } + options = qdict_new(); + detect_zeroes = blk_get_detect_zeroes_from_root_state(blk); + qdict_put(options, "detect-zeroes", + qstring_from_str(detect_zeroes ? "on" : "off")); + if (has_format) { - options = qdict_new(); qdict_put(options, "driver", qstring_from_str(format)); } @@ -2614,7 +2564,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, error_free(err); err = NULL; - qmp_x_blockdev_remove_medium(has_device, device, has_id, id, errp); + qmp_x_blockdev_remove_medium(has_device, device, has_id, id, &err); if (err) { error_propagate(errp, err); goto fail; @@ -2626,8 +2576,6 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, goto fail; } - blk_apply_root_state(blk, medium_bs); - qmp_blockdev_close_tray(has_device, device, has_id, id, errp); fail: @@ -3832,21 +3780,6 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp) QDict *qdict; Error *local_err = NULL; - /* TODO Sort it out in raw-posix and drive_new(): Reject aio=native with - * cache.direct=false instead of silently switching to aio=threads, except - * when called from drive_new(). - * - * For now, simply forbidding the combination for all drivers will do. */ - if (options->has_aio && options->aio == BLOCKDEV_AIO_OPTIONS_NATIVE) { - bool direct = options->has_cache && - options->cache->has_direct && - options->cache->direct; - if (!direct) { - error_setg(errp, "aio=native requires cache.direct=true"); - goto fail; - } - } - visit_type_BlockdevOptions(v, NULL, &options, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -4005,10 +3938,6 @@ QemuOptsList qemu_common_drive_opts = { .type = QEMU_OPT_BOOL, .help = "enable/disable snapshot mode", },{ - .name = "discard", - .type = QEMU_OPT_STRING, - .help = "discard operation (ignore/off, unmap/on)", - },{ .name = "aio", .type = QEMU_OPT_STRING, .help = "host AIO implementation (threads, native)", @@ -4135,31 +4064,6 @@ QemuOptsList qemu_common_drive_opts = { }, }; -static QemuOptsList qemu_root_bds_opts = { - .name = "root-bds", - .head = QTAILQ_HEAD_INITIALIZER(qemu_root_bds_opts.head), - .desc = { - { - .name = "discard", - .type = QEMU_OPT_STRING, - .help = "discard operation (ignore/off, unmap/on)", - },{ - .name = "aio", - .type = QEMU_OPT_STRING, - .help = "host AIO implementation (threads, native)", - },{ - .name = "copy-on-read", - .type = QEMU_OPT_BOOL, - .help = "copy read data from backing file into image file", - },{ - .name = "detect-zeroes", - .type = QEMU_OPT_STRING, - .help = "try to optimize zero writes (off, on, unmap)", - }, - { /* end of list */ } - }, -}; - QemuOptsList qemu_drive_opts = { .name = "drive", .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head), @@ -296,6 +296,7 @@ libiscsi="" libnfs="" coroutine="" coroutine_pool="" +debug_stack_usage="no" seccomp="" glusterfs="" glusterfs_xlator_opt="no" @@ -1004,6 +1005,8 @@ for opt do ;; --enable-coroutine-pool) coroutine_pool="yes" ;; + --enable-debug-stack-usage) debug_stack_usage="yes" + ;; --disable-docs) docs="no" ;; --enable-docs) docs="yes" @@ -4331,6 +4334,17 @@ if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)" fi +if test "$debug_stack_usage" = "yes"; then + if test "$cpu" = "ia64" -o "$cpu" = "hppa"; then + error_exit "stack usage debugging is not supported for $cpu" + fi + if test "$coroutine_pool" = "yes"; then + echo "WARN: disabling coroutine pool for stack usage debugging" + coroutine_pool=no + fi +fi + + ########################################## # check if we have open_by_handle_at @@ -4916,6 +4930,7 @@ echo "QGA MSI support $guest_agent_msi" echo "seccomp support $seccomp" echo "coroutine backend $coroutine" echo "coroutine pool $coroutine_pool" +echo "debug stack usage $debug_stack_usage" echo "GlusterFS support $glusterfs" echo "Archipelago support $archipelago" echo "gcov $gcov_tool" @@ -5384,6 +5399,10 @@ else echo "CONFIG_COROUTINE_POOL=0" >> $config_host_mak fi +if test "$debug_stack_usage" = "yes" ; then + echo "CONFIG_DEBUG_STACK_USAGE=y" >> $config_host_mak +fi + if test "$open_by_handle_at" = "yes" ; then echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak fi @@ -751,7 +751,7 @@ static int do_vm_stop(RunState state) bdrv_drain_all(); replay_disable_events(); - ret = blk_flush_all(); + ret = bdrv_flush_all(); return ret; } @@ -1408,7 +1408,7 @@ int vm_stop_force_state(RunState state) bdrv_drain_all(); /* Make sure to return an error if the flush in a previous vm_stop() * failed. */ - return blk_flush_all(); + return bdrv_flush_all(); } } diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index aa7839324c..f85635cc9a 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -134,8 +134,6 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v devices, and bit 2 the non-primary-master IDE devices. */ if (val & UNPLUG_ALL_IDE_DISKS) { DPRINTF("unplug disks\n"); - blk_drain_all(); - blk_flush_all(); pci_unplug_disks(pci_dev->bus); } if (val & UNPLUG_ALL_NICS) { diff --git a/hw/ide/piix.c b/hw/ide/piix.c index c190fcaa3c..d5777fd0b3 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -179,6 +179,10 @@ int pci_piix3_xen_ide_unplug(DeviceState *dev) if (di != NULL && !di->media_cd) { BlockBackend *blk = blk_by_legacy_dinfo(di); DeviceState *ds = blk_get_attached_dev(blk); + + blk_drain(blk); + blk_flush(blk); + if (ds) { blk_detach_dev(blk, ds); } diff --git a/include/block/block.h b/include/block/block.h index e18233afe0..107c603605 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -108,6 +108,7 @@ typedef struct HDGeometry { #define BDRV_OPT_CACHE_DIRECT "cache.direct" #define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" #define BDRV_OPT_READ_ONLY "read-only" +#define BDRV_OPT_DISCARD "discard" #define BDRV_SECTOR_BITS 9 @@ -333,6 +334,7 @@ int bdrv_inactivate_all(void); /* Ensure contents are flushed to disk. */ int bdrv_flush(BlockDriverState *bs); int coroutine_fn bdrv_co_flush(BlockDriverState *bs); +int bdrv_flush_all(void); void bdrv_close_all(void); void bdrv_drain(BlockDriverState *bs); void coroutine_fn bdrv_co_drain(BlockDriverState *bs); diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h index 6df9d33352..14d4f1d1f2 100644 --- a/include/qemu/coroutine_int.h +++ b/include/qemu/coroutine_int.h @@ -28,6 +28,8 @@ #include "qemu/queue.h" #include "qemu/coroutine.h" +#define COROUTINE_STACK_SIZE (1 << 20) + typedef enum { COROUTINE_YIELD = 1, COROUTINE_TERMINATE = 2, diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 3b29317349..a7993afcda 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -152,7 +152,6 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count); int blk_co_flush(BlockBackend *blk); int blk_flush(BlockBackend *blk); -int blk_flush_all(void); int blk_commit_all(void); void blk_drain(BlockBackend *blk); void blk_drain_all(void); @@ -199,7 +198,7 @@ void blk_io_unplug(BlockBackend *blk); BlockAcctStats *blk_get_stats(BlockBackend *blk); BlockBackendRootState *blk_get_root_state(BlockBackend *blk); void blk_update_root_state(BlockBackend *blk); -void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs); +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk); int blk_get_open_flags_from_root_state(BlockBackend *blk); void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 9c7dfdfbec..3cfedbc28b 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -60,4 +60,31 @@ int qemu_utimens(const char *path, const qemu_timespec *times); bool is_daemonized(void); +/** + * qemu_alloc_stack: + * @sz: pointer to a size_t holding the requested usable stack size + * + * Allocate memory that can be used as a stack, for instance for + * coroutines. If the memory cannot be allocated, this function + * will abort (like g_malloc()). This function also inserts an + * additional guard page to catch a potential stack overflow. + * Note that the memory required for the guard page and alignment + * and minimal stack size restrictions will increase the value of sz. + * + * The allocated stack must be freed with qemu_free_stack(). + * + * Returns: pointer to (the lowest address of) the stack memory. + */ +void *qemu_alloc_stack(size_t *sz); + +/** + * qemu_free_stack: + * @stack: stack to free + * @sz: size of stack in bytes + * + * Free a stack allocated via qemu_alloc_stack(). Note that sz must + * be exactly the adjusted stack size returned by qemu_alloc_stack. + */ +void qemu_free_stack(void *stack, size_t sz); + #endif diff --git a/qapi/block-core.json b/qapi/block-core.json index 92193ab0a1..9d797b8fe0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1721,15 +1721,16 @@ ## # @BlockdevOptionsFile # -# Driver specific block device options for the file backend and similar -# protocols. +# Driver specific block device options for the file backend. # # @filename: path to the image file +# @aio: #optional AIO backend (default: threads) (since: 2.8) # # Since: 1.7 ## { 'struct': 'BlockdevOptionsFile', - 'data': { 'filename': 'str' } } + 'data': { 'filename': 'str', + '*aio': 'BlockdevAioOptions' } } ## # @BlockdevOptionsNull @@ -2211,6 +2212,18 @@ '*top-id': 'str' } } ## +# @BlockdevOptionsCurl +# +# Driver specific block device options for the curl backend. +# +# @filename: path to the image file +# +# Since: 1.7 +## +{ 'struct': 'BlockdevOptionsCurl', + 'data': { 'filename': 'str' } } + +## # @BlockdevOptions # # Options for creating a block device. Many options are available for all @@ -2221,7 +2234,6 @@ # This option is required on the top level of blockdev-add. # @discard: #optional discard-related options (default: ignore) # @cache: #optional cache-related options -# @aio: #optional AIO backend (default: threads) # @read-only: #optional whether the block device should be read-only # (default: false) # @detect-zeroes: #optional detect and optimize zero writes (Since 2.1) @@ -2236,7 +2248,6 @@ '*node-name': 'str', '*discard': 'BlockdevDiscardOptions', '*cache': 'BlockdevCacheOptions', - '*aio': 'BlockdevAioOptions', '*read-only': 'bool', '*detect-zeroes': 'BlockdevDetectZeroesOptions' }, 'discriminator': 'driver', @@ -2248,13 +2259,13 @@ 'cloop': 'BlockdevOptionsGenericFormat', 'dmg': 'BlockdevOptionsGenericFormat', 'file': 'BlockdevOptionsFile', - 'ftp': 'BlockdevOptionsFile', - 'ftps': 'BlockdevOptionsFile', + 'ftp': 'BlockdevOptionsCurl', + 'ftps': 'BlockdevOptionsCurl', 'gluster': 'BlockdevOptionsGluster', 'host_cdrom': 'BlockdevOptionsFile', 'host_device':'BlockdevOptionsFile', - 'http': 'BlockdevOptionsFile', - 'https': 'BlockdevOptionsFile', + 'http': 'BlockdevOptionsCurl', + 'https': 'BlockdevOptionsCurl', # TODO iscsi: Wait for structured options 'luks': 'BlockdevOptionsLUKS', # TODO nbd: Should take InetSocketAddress for 'host'? @@ -2271,7 +2282,7 @@ 'replication':'BlockdevOptionsReplication', # TODO sheepdog: Wait for structured options # TODO ssh: Should take InetSocketAddress for 'host'? - 'tftp': 'BlockdevOptionsFile', + 'tftp': 'BlockdevOptionsCurl', 'vdi': 'BlockdevOptionsGenericFormat', 'vhdx': 'BlockdevOptionsGenericFormat', 'vmdk': 'BlockdevOptionsGenericCOWFormat', diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087 index 5c04577b36..b1ac71f2b8 100755 --- a/tests/qemu-iotests/087 +++ b/tests/qemu-iotests/087 @@ -117,10 +117,10 @@ run_qemu <<EOF "options": { "driver": "$IMGFMT", "node-name": "disk", - "aio": "native", "file": { "driver": "file", - "filename": "$TEST_IMG" + "filename": "$TEST_IMG", + "aio": "native" } } } diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out index bef68626c8..cd02eaed4c 100644 --- a/tests/qemu-iotests/087.out +++ b/tests/qemu-iotests/087.out @@ -27,7 +27,7 @@ QMP_VERSION Testing: QMP_VERSION {"return": {}} -{"error": {"class": "GenericError", "desc": "aio=native requires cache.direct=true"}} +{"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c index a7c3366553..f6fc49a0e5 100644 --- a/util/coroutine-sigaltstack.c +++ b/util/coroutine-sigaltstack.c @@ -33,8 +33,9 @@ typedef struct { Coroutine base; void *stack; + size_t stack_size; sigjmp_buf env; -} CoroutineUContext; +} CoroutineSigAltStack; /** * Per-thread coroutine bookkeeping @@ -44,7 +45,7 @@ typedef struct { Coroutine *current; /** The default coroutine */ - CoroutineUContext leader; + CoroutineSigAltStack leader; /** Information for the signal handler (trampoline) */ sigjmp_buf tr_reenter; @@ -89,7 +90,7 @@ static void __attribute__((constructor)) coroutine_init(void) * (from the signal handler when it is not signal handling, read ahead * for more information). */ -static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co) +static void coroutine_bootstrap(CoroutineSigAltStack *self, Coroutine *co) { /* Initialize longjmp environment and switch back the caller */ if (!sigsetjmp(self->env, 0)) { @@ -109,7 +110,7 @@ static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co) */ static void coroutine_trampoline(int signal) { - CoroutineUContext *self; + CoroutineSigAltStack *self; Coroutine *co; CoroutineThreadState *coTS; @@ -143,8 +144,7 @@ static void coroutine_trampoline(int signal) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = 1 << 20; - CoroutineUContext *co; + CoroutineSigAltStack *co; CoroutineThreadState *coTS; struct sigaction sa; struct sigaction osa; @@ -164,7 +164,8 @@ Coroutine *qemu_coroutine_new(void) */ co = g_malloc0(sizeof(*co)); - co->stack = g_malloc(stack_size); + co->stack_size = COROUTINE_STACK_SIZE; + co->stack = qemu_alloc_stack(&co->stack_size); co->base.entry_arg = &old_env; /* stash away our jmp_buf */ coTS = coroutine_get_thread_state(); @@ -189,7 +190,7 @@ Coroutine *qemu_coroutine_new(void) * Set the new stack. */ ss.ss_sp = co->stack; - ss.ss_size = stack_size; + ss.ss_size = co->stack_size; ss.ss_flags = 0; if (sigaltstack(&ss, &oss) < 0) { abort(); @@ -251,17 +252,17 @@ Coroutine *qemu_coroutine_new(void) void qemu_coroutine_delete(Coroutine *co_) { - CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); + CoroutineSigAltStack *co = DO_UPCAST(CoroutineSigAltStack, base, co_); - g_free(co->stack); + qemu_free_stack(co->stack, co->stack_size); g_free(co); } CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, CoroutineAction action) { - CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); - CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); + CoroutineSigAltStack *from = DO_UPCAST(CoroutineSigAltStack, base, from_); + CoroutineSigAltStack *to = DO_UPCAST(CoroutineSigAltStack, base, to_); CoroutineThreadState *s = coroutine_get_thread_state(); int ret; diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c index 2bb7e10d4b..6621f3f692 100644 --- a/util/coroutine-ucontext.c +++ b/util/coroutine-ucontext.c @@ -34,6 +34,7 @@ typedef struct { Coroutine base; void *stack; + size_t stack_size; sigjmp_buf env; #ifdef CONFIG_VALGRIND_H @@ -82,7 +83,6 @@ static void coroutine_trampoline(int i0, int i1) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = 1 << 20; CoroutineUContext *co; ucontext_t old_uc, uc; sigjmp_buf old_env; @@ -101,17 +101,18 @@ Coroutine *qemu_coroutine_new(void) } co = g_malloc0(sizeof(*co)); - co->stack = g_malloc(stack_size); + co->stack_size = COROUTINE_STACK_SIZE; + co->stack = qemu_alloc_stack(&co->stack_size); co->base.entry_arg = &old_env; /* stash away our jmp_buf */ uc.uc_link = &old_uc; uc.uc_stack.ss_sp = co->stack; - uc.uc_stack.ss_size = stack_size; + uc.uc_stack.ss_size = co->stack_size; uc.uc_stack.ss_flags = 0; #ifdef CONFIG_VALGRIND_H co->valgrind_stack_id = - VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size); + VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size); #endif arg.p = co; @@ -149,7 +150,7 @@ void qemu_coroutine_delete(Coroutine *co_) valgrind_stack_deregister(co); #endif - g_free(co->stack); + qemu_free_stack(co->stack, co->stack_size); g_free(co); } diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c index 02e28e825f..de6bd4fd3e 100644 --- a/util/coroutine-win32.c +++ b/util/coroutine-win32.c @@ -71,7 +71,7 @@ static void CALLBACK coroutine_trampoline(void *co_) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = 1 << 20; + const size_t stack_size = COROUTINE_STACK_SIZE; CoroutineWin32 *co; co = g_malloc0(sizeof(*co)); diff --git a/util/oslib-posix.c b/util/oslib-posix.c index f2d4e9e592..aaec1891f5 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -50,6 +50,10 @@ #include "qemu/mmap-alloc.h" +#ifdef CONFIG_DEBUG_STACK_USAGE +#include "qemu/error-report.h" +#endif + int qemu_get_thread_id(void) { #if defined(__linux__) @@ -499,3 +503,76 @@ pid_t qemu_fork(Error **errp) } return pid; } + +void *qemu_alloc_stack(size_t *sz) +{ + void *ptr, *guardpage; +#ifdef CONFIG_DEBUG_STACK_USAGE + void *ptr2; +#endif + size_t pagesz = getpagesize(); +#ifdef _SC_THREAD_STACK_MIN + /* avoid stacks smaller than _SC_THREAD_STACK_MIN */ + long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN); + *sz = MAX(MAX(min_stack_sz, 0), *sz); +#endif + /* adjust stack size to a multiple of the page size */ + *sz = ROUND_UP(*sz, pagesz); + /* allocate one extra page for the guard page */ + *sz += pagesz; + + ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + abort(); + } + +#if defined(HOST_IA64) + /* separate register stack */ + guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz); +#elif defined(HOST_HPPA) + /* stack grows up */ + guardpage = ptr + *sz - pagesz; +#else + /* stack grows down */ + guardpage = ptr; +#endif + if (mprotect(guardpage, pagesz, PROT_NONE) != 0) { + abort(); + } + +#ifdef CONFIG_DEBUG_STACK_USAGE + for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) { + *(uint32_t *)ptr2 = 0xdeadbeaf; + } +#endif + + return ptr; +} + +#ifdef CONFIG_DEBUG_STACK_USAGE +static __thread unsigned int max_stack_usage; +#endif + +void qemu_free_stack(void *stack, size_t sz) +{ +#ifdef CONFIG_DEBUG_STACK_USAGE + unsigned int usage; + void *ptr; + + for (ptr = stack + getpagesize(); ptr < stack + sz; + ptr += sizeof(uint32_t)) { + if (*(uint32_t *)ptr != 0xdeadbeaf) { + break; + } + } + usage = sz - (uintptr_t) (ptr - stack); + if (usage > max_stack_usage) { + error_report("thread %d max stack usage increased from %u to %u", + qemu_get_thread_id(), max_stack_usage, usage); + max_stack_usage = usage; + } +#endif + + munmap(stack, sz); +} |