diff options
-rw-r--r-- | blockjob.c | 7 | ||||
-rw-r--r-- | include/block/blockjob_int.h | 3 | ||||
-rw-r--r-- | include/qemu/coroutine_int.h | 13 | ||||
-rwxr-xr-x | tests/qemu-iotests/200 | 99 | ||||
-rw-r--r-- | tests/qemu-iotests/200.out | 14 | ||||
-rw-r--r-- | tests/qemu-iotests/common.qemu | 8 | ||||
-rw-r--r-- | tests/qemu-iotests/group | 1 | ||||
-rw-r--r-- | util/async.c | 13 | ||||
-rw-r--r-- | util/qemu-coroutine-sleep.c | 12 | ||||
-rw-r--r-- | util/qemu-coroutine.c | 14 |
10 files changed, 177 insertions, 7 deletions
diff --git a/blockjob.c b/blockjob.c index 3a0c49137e..ff9a614531 100644 --- a/blockjob.c +++ b/blockjob.c @@ -797,11 +797,14 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) return; } - job->busy = false; + /* We need to leave job->busy set here, because when we have + * put a coroutine to 'sleep', we have scheduled it to run in + * the future. We cannot enter that same coroutine again before + * it wakes and runs, otherwise we risk double-entry or entry after + * completion. */ if (!block_job_should_pause(job)) { co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); } - job->busy = true; block_job_pause_point(job); } diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index f13ad05c0d..43f3be2965 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -143,7 +143,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, * @ns: How many nanoseconds to stop for. * * Put the job to sleep (assuming that it wasn't canceled) for @ns - * nanoseconds. Canceling the job will interrupt the wait immediately. + * nanoseconds. Canceling the job will not interrupt the wait, so the + * cancel will not process until the coroutine wakes up. */ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h index cb98892bba..59e8406398 100644 --- a/include/qemu/coroutine_int.h +++ b/include/qemu/coroutine_int.h @@ -46,14 +46,21 @@ struct Coroutine { size_t locks_held; + /* Only used when the coroutine has yielded. */ + AioContext *ctx; + + /* Used to catch and abort on illegal co-routine entry. + * Will contain the name of the function that had first + * scheduled the coroutine. */ + const char *scheduled; + + QSIMPLEQ_ENTRY(Coroutine) co_queue_next; + /* Coroutines that should be woken up when we yield or terminate. * Only used when the coroutine is running. */ QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup; - /* Only used when the coroutine has yielded. */ - AioContext *ctx; - QSIMPLEQ_ENTRY(Coroutine) co_queue_next; QSLIST_ENTRY(Coroutine) co_scheduled_next; }; diff --git a/tests/qemu-iotests/200 b/tests/qemu-iotests/200 new file mode 100755 index 0000000000..d8787ddb46 --- /dev/null +++ b/tests/qemu-iotests/200 @@ -0,0 +1,99 @@ +#!/bin/bash +# +# Block job co-routine race condition test. +# +# See: https://bugzilla.redhat.com/show_bug.cgi?id=1508708 +# +# Copyright (C) 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=jcody@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_qemu + rm -f "${TEST_IMG}" "${BACKING_IMG}" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 qed +_supported_proto file +_supported_os Linux + +BACKING_IMG="${TEST_DIR}/backing.img" +TEST_IMG="${TEST_DIR}/test.img" + +${QEMU_IMG} create -f $IMGFMT "${BACKING_IMG}" 512M | _filter_img_create +${QEMU_IMG} create -f $IMGFMT -F $IMGFMT "${TEST_IMG}" -b "${BACKING_IMG}" 512M | _filter_img_create + +${QEMU_IO} -c "write -P 0xa5 512 300M" "${BACKING_IMG}" | _filter_qemu_io + +echo +echo === Starting QEMU VM === +echo +qemu_comm_method="qmp" +_launch_qemu -device pci-bridge,id=bridge1,chassis_nr=1,bus=pci.0 \ + -object iothread,id=iothread0 \ + -device virtio-scsi-pci,bus=bridge1,addr=0x1f,id=scsi0,iothread=iothread0 \ + -drive file="${TEST_IMG}",media=disk,if=none,cache=none,id=drive_sysdisk,aio=native,format=$IMGFMT \ + -device scsi-hd,drive=drive_sysdisk,bus=scsi0.0,id=sysdisk,bootindex=0 +h1=$QEMU_HANDLE + +_send_qemu_cmd $h1 "{ 'execute': 'qmp_capabilities' }" 'return' + +echo +echo === Sending stream/cancel, checking for SIGSEGV only === +echo +for (( i=1;i<500;i++ )) +do + mismatch_only='y' qemu_error_no_exit='n' _send_qemu_cmd $h1 \ + "{ + 'execute': 'block-stream', + 'arguments': { + 'device': 'drive_sysdisk', + 'speed': 10000000, + 'on-error': 'report', + 'job-id': 'job-$i' + } + } + { + 'execute': 'block-job-cancel', + 'arguments': { + 'device': 'job-$i' + } + }" \ + "{.*{.*}.*}" # should match all well-formed QMP responses +done + +silent='y' _send_qemu_cmd $h1 "{ 'execute': 'quit' }" 'return' + +echo "$i iterations performed" + +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/200.out b/tests/qemu-iotests/200.out new file mode 100644 index 0000000000..af6a809e30 --- /dev/null +++ b/tests/qemu-iotests/200.out @@ -0,0 +1,14 @@ +QA output created by 200 +Formatting 'TEST_DIR/backing.img', fmt=IMGFMT size=536870912 +Formatting 'TEST_DIR/test.img', fmt=IMGFMT size=536870912 backing_file=TEST_DIR/backing.img backing_fmt=IMGFMT +wrote 314572800/314572800 bytes at offset 512 +300 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Starting QEMU VM === + +{"return": {}} + +=== Sending stream/cancel, checking for SIGSEGV only === + +500 iterations performed +*** done diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu index 7b3052dc79..85f66b852c 100644 --- a/tests/qemu-iotests/common.qemu +++ b/tests/qemu-iotests/common.qemu @@ -50,6 +50,8 @@ _in_fd=4 # # If $silent is set to anything but an empty string, then # response is not echoed out. +# If $mismatch_only is set, only non-matching responses will +# be echoed. function _timed_wait_for() { local h=${1} @@ -58,14 +60,18 @@ function _timed_wait_for() QEMU_STATUS[$h]=0 while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]} do - if [ -z "${silent}" ]; then + if [ -z "${silent}" ] && [ -z "${mismatch_only}" ]; then echo "${resp}" | _filter_testdir | _filter_qemu \ | _filter_qemu_io | _filter_qmp | _filter_hmp fi grep -q "${*}" < <(echo "${resp}") if [ $? -eq 0 ]; then return + elif [ -z "${silent}" ] && [ -n "${mismatch_only}" ]; then + echo "${resp}" | _filter_testdir | _filter_qemu \ + | _filter_qemu_io | _filter_qmp | _filter_hmp fi + done QEMU_STATUS[$h]=-1 if [ -z "${qemu_error_no_exit}" ]; then diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 1fad602152..3e688678dd 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -196,3 +196,4 @@ 196 rw auto quick 197 rw auto quick 198 rw auto +200 rw auto diff --git a/util/async.c b/util/async.c index 0e1bd8780a..4dd9d95a9e 100644 --- a/util/async.c +++ b/util/async.c @@ -388,6 +388,9 @@ static void co_schedule_bh_cb(void *opaque) QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); trace_aio_co_schedule_bh_cb(ctx, co); aio_context_acquire(ctx); + + /* Protected by write barrier in qemu_aio_coroutine_enter */ + atomic_set(&co->scheduled, NULL); qemu_coroutine_enter(co); aio_context_release(ctx); } @@ -438,6 +441,16 @@ fail: void aio_co_schedule(AioContext *ctx, Coroutine *co) { trace_aio_co_schedule(ctx, co); + const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL, + __func__); + + if (scheduled) { + fprintf(stderr, + "%s: Co-routine was already scheduled in '%s'\n", + __func__, scheduled); + abort(); + } + QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, co, co_scheduled_next); qemu_bh_schedule(ctx->co_schedule_bh); diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c index 9c5655041b..254349cdbb 100644 --- a/util/qemu-coroutine-sleep.c +++ b/util/qemu-coroutine-sleep.c @@ -13,6 +13,7 @@ #include "qemu/osdep.h" #include "qemu/coroutine.h" +#include "qemu/coroutine_int.h" #include "qemu/timer.h" #include "block/aio.h" @@ -25,6 +26,8 @@ static void co_sleep_cb(void *opaque) { CoSleepCB *sleep_cb = opaque; + /* Write of schedule protected by barrier write in aio_co_schedule */ + atomic_set(&sleep_cb->co->scheduled, NULL); aio_co_wake(sleep_cb->co); } @@ -34,6 +37,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, CoSleepCB sleep_cb = { .co = qemu_coroutine_self(), }; + + const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL, + __func__); + if (scheduled) { + fprintf(stderr, + "%s: Co-routine was already scheduled in '%s'\n", + __func__, scheduled); + abort(); + } sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb); timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns); qemu_coroutine_yield(); diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index d6095c1d5a..9eff7fd450 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -107,8 +107,22 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) Coroutine *self = qemu_coroutine_self(); CoroutineAction ret; + /* Cannot rely on the read barrier for co in aio_co_wake(), as there are + * callers outside of aio_co_wake() */ + const char *scheduled = atomic_mb_read(&co->scheduled); + trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg); + /* if the Coroutine has already been scheduled, entering it again will + * cause us to enter it twice, potentially even after the coroutine has + * been deleted */ + if (scheduled) { + fprintf(stderr, + "%s: Co-routine was already scheduled in '%s'\n", + __func__, scheduled); + abort(); + } + if (co->caller) { fprintf(stderr, "Co-routine re-entered recursively\n"); abort(); |