diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2014-08-29 18:40:04 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2014-08-29 18:40:04 +0100 |
commit | 988f46361465db0d4fce50e71fa0ff8f9d20483e (patch) | |
tree | fa637b2f366d47581d9fc6dc01a0ef722e906d9a | |
parent | 8b3030114a449e66c68450acaac4b66f26d91416 (diff) | |
parent | 8df3abfceef557551f00adac1618ddd6fe46f85c (diff) |
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Block pull request
# gpg: Signature made Fri 29 Aug 2014 17:25:58 BST using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>"
* remotes/stefanha/tags/block-pull-request: (35 commits)
quorum: Fix leak of opts in quorum_open
blkverify: Fix leak of opts in blkverify_open
nfs: Fix leak of opts in nfs_file_open
curl: Don't deref NULL pointer in call to aio_poll.
curl: Allow a cookie or cookies to be sent with http/https requests.
virtio-blk: allow drive_del with dataplane
block: acquire AioContext in do_drive_del()
linux-aio: avoid deadlock in nested aio_poll() calls
qemu-iotests: add multiwrite test cases
block: fix overlapping multiwrite requests
nbd: Follow the BDS' AIO context
block: Add AIO context notifiers
nbd: Drop nbd_can_read()
sheepdog: fix a core dump while do auto-reconnecting
aio-win32: add support for sockets
qemu-coroutine-io: fix for Win32
AioContext: introduce aio_prepare
aio-win32: add aio_set_dispatching optimization
test-aio: test timers on Windows too
AioContext: export and use aio_dispatch
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | aio-posix.c | 58 | ||||
-rw-r--r-- | aio-win32.c | 262 | ||||
-rw-r--r-- | async.c | 39 | ||||
-rw-r--r-- | block.c | 62 | ||||
-rw-r--r-- | block/Makefile.objs | 2 | ||||
-rw-r--r-- | block/blkverify.c | 1 | ||||
-rw-r--r-- | block/curl.c | 37 | ||||
-rw-r--r-- | block/linux-aio.c | 71 | ||||
-rw-r--r-- | block/nfs.c | 10 | ||||
-rw-r--r-- | block/quorum.c | 180 | ||||
-rw-r--r-- | block/sheepdog.c | 12 | ||||
-rw-r--r-- | blockdev.c | 12 | ||||
-rw-r--r-- | blockjob.c | 2 | ||||
-rw-r--r-- | hw/block/dataplane/virtio-blk.c | 1 | ||||
-rw-r--r-- | hw/ide/qdev.c | 2 | ||||
-rw-r--r-- | include/block/aio.h | 25 | ||||
-rw-r--r-- | include/block/block_int.h | 41 | ||||
-rw-r--r-- | include/block/coroutine.h | 8 | ||||
-rw-r--r-- | nbd.c | 105 | ||||
-rw-r--r-- | qapi/block-core.json | 20 | ||||
-rw-r--r-- | qemu-coroutine-io.c | 4 | ||||
-rw-r--r-- | qemu-coroutine-sleep.c | 12 | ||||
-rw-r--r-- | qemu-img.c | 23 | ||||
-rw-r--r-- | qemu-options.hx | 15 | ||||
-rwxr-xr-x | tests/qemu-iotests/100 | 134 | ||||
-rw-r--r-- | tests/qemu-iotests/100.out | 89 | ||||
-rw-r--r-- | tests/qemu-iotests/group | 1 | ||||
-rw-r--r-- | tests/test-aio.c | 48 |
28 files changed, 979 insertions, 297 deletions
diff --git a/aio-posix.c b/aio-posix.c index 2eada2e049..d3ac06e238 100644 --- a/aio-posix.c +++ b/aio-posix.c @@ -100,6 +100,11 @@ void aio_set_event_notifier(AioContext *ctx, (IOHandler *)io_read, NULL, notifier); } +bool aio_prepare(AioContext *ctx) +{ + return false; +} + bool aio_pending(AioContext *ctx) { AioHandler *node; @@ -119,12 +124,21 @@ bool aio_pending(AioContext *ctx) return false; } -static bool aio_dispatch(AioContext *ctx) +bool aio_dispatch(AioContext *ctx) { AioHandler *node; bool progress = false; /* + * If there are callbacks left that have been queued, we need to call them. + * Do not call select in this case, because it is possible that the caller + * does not need a complete flush (as is the case for aio_poll loops). + */ + if (aio_bh_poll(ctx)) { + progress = true; + } + + /* * We have to walk very carefully in case aio_set_fd_handler is * called while we're walking. */ @@ -184,22 +198,9 @@ bool aio_poll(AioContext *ctx, bool blocking) /* aio_notify can avoid the expensive event_notifier_set if * everything (file descriptors, bottom halves, timers) will - * be re-evaluated before the next blocking poll(). This happens - * in two cases: - * - * 1) when aio_poll is called with blocking == false - * - * 2) when we are called after poll(). If we are called before - * poll(), bottom halves will not be re-evaluated and we need - * aio_notify() if blocking == true. - * - * The first aio_dispatch() only does something when AioContext is - * running as a GSource, and in that case aio_poll is used only - * with blocking == false, so this optimization is already quite - * effective. However, the code is ugly and should be restructured - * to have a single aio_dispatch() call. To do this, we need to - * reorganize aio_poll into a prepare/poll/dispatch model like - * glib's. + * be re-evaluated before the next blocking poll(). This is + * already true when aio_poll is called with blocking == false; + * if blocking == true, it is only true after poll() returns. * * If we're in a nested event loop, ctx->dispatching might be true. * In that case we can restore it just before returning, but we @@ -207,26 +208,6 @@ bool aio_poll(AioContext *ctx, bool blocking) */ aio_set_dispatching(ctx, !blocking); - /* - * If there are callbacks left that have been queued, we need to call them. - * Do not call select in this case, because it is possible that the caller - * does not need a complete flush (as is the case for aio_poll loops). - */ - if (aio_bh_poll(ctx)) { - blocking = false; - progress = true; - } - - /* Re-evaluate condition (1) above. */ - aio_set_dispatching(ctx, !blocking); - if (aio_dispatch(ctx)) { - progress = true; - } - - if (progress && !blocking) { - goto out; - } - ctx->walking_handlers++; g_array_set_size(ctx->pollfds, 0); @@ -249,7 +230,7 @@ bool aio_poll(AioContext *ctx, bool blocking) /* wait until next event */ ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data, ctx->pollfds->len, - blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0); + blocking ? aio_compute_timeout(ctx) : 0); /* if we have any readable fds, dispatch event */ if (ret > 0) { @@ -268,7 +249,6 @@ bool aio_poll(AioContext *ctx, bool blocking) progress = true; } -out: aio_set_dispatching(ctx, was_dispatching); return progress; } diff --git a/aio-win32.c b/aio-win32.c index c12f61e97d..61e3d2ddfe 100644 --- a/aio-win32.c +++ b/aio-win32.c @@ -22,12 +22,80 @@ struct AioHandler { EventNotifier *e; + IOHandler *io_read; + IOHandler *io_write; EventNotifierHandler *io_notify; GPollFD pfd; int deleted; + void *opaque; QLIST_ENTRY(AioHandler) node; }; +void aio_set_fd_handler(AioContext *ctx, + int fd, + IOHandler *io_read, + IOHandler *io_write, + void *opaque) +{ + /* fd is a SOCKET in our case */ + AioHandler *node; + + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + if (node->pfd.fd == fd && !node->deleted) { + break; + } + } + + /* Are we deleting the fd handler? */ + if (!io_read && !io_write) { + if (node) { + /* If the lock is held, just mark the node as deleted */ + if (ctx->walking_handlers) { + node->deleted = 1; + node->pfd.revents = 0; + } else { + /* Otherwise, delete it for real. We can't just mark it as + * deleted because deleted nodes are only cleaned up after + * releasing the walking_handlers lock. + */ + QLIST_REMOVE(node, node); + g_free(node); + } + } + } else { + HANDLE event; + + if (node == NULL) { + /* Alloc and insert if it's not already there */ + node = g_malloc0(sizeof(AioHandler)); + node->pfd.fd = fd; + QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); + } + + node->pfd.events = 0; + if (node->io_read) { + node->pfd.events |= G_IO_IN; + } + if (node->io_write) { + node->pfd.events |= G_IO_OUT; + } + + node->e = &ctx->notifier; + + /* Update handler with latest information */ + node->opaque = opaque; + node->io_read = io_read; + node->io_write = io_write; + + event = event_notifier_get_handle(&ctx->notifier); + WSAEventSelect(node->pfd.fd, event, + FD_READ | FD_ACCEPT | FD_CLOSE | + FD_CONNECT | FD_WRITE | FD_OOB); + } + + aio_notify(ctx); +} + void aio_set_event_notifier(AioContext *ctx, EventNotifier *e, EventNotifierHandler *io_notify) @@ -76,55 +144,82 @@ void aio_set_event_notifier(AioContext *ctx, aio_notify(ctx); } -bool aio_pending(AioContext *ctx) +bool aio_prepare(AioContext *ctx) { + static struct timeval tv0; AioHandler *node; + bool have_select_revents = false; + fd_set rfds, wfds; + /* fill fd sets */ + FD_ZERO(&rfds); + FD_ZERO(&wfds); QLIST_FOREACH(node, &ctx->aio_handlers, node) { - if (node->pfd.revents && node->io_notify) { - return true; + if (node->io_read) { + FD_SET ((SOCKET)node->pfd.fd, &rfds); + } + if (node->io_write) { + FD_SET ((SOCKET)node->pfd.fd, &wfds); } } - return false; + if (select(0, &rfds, &wfds, NULL, &tv0) > 0) { + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + node->pfd.revents = 0; + if (FD_ISSET(node->pfd.fd, &rfds)) { + node->pfd.revents |= G_IO_IN; + have_select_revents = true; + } + + if (FD_ISSET(node->pfd.fd, &wfds)) { + node->pfd.revents |= G_IO_OUT; + have_select_revents = true; + } + } + } + + return have_select_revents; } -bool aio_poll(AioContext *ctx, bool blocking) +bool aio_pending(AioContext *ctx) { AioHandler *node; - HANDLE events[MAXIMUM_WAIT_OBJECTS + 1]; - bool progress; - int count; - int timeout; - progress = false; + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + if (node->pfd.revents && node->io_notify) { + return true; + } - /* - * If there are callbacks left that have been queued, we need to call then. - * Do not call select in this case, because it is possible that the caller - * does not need a complete flush (as is the case for aio_poll loops). - */ - if (aio_bh_poll(ctx)) { - blocking = false; - progress = true; + if ((node->pfd.revents & G_IO_IN) && node->io_read) { + return true; + } + if ((node->pfd.revents & G_IO_OUT) && node->io_write) { + return true; + } } - /* Run timers */ - progress |= timerlistgroup_run_timers(&ctx->tlg); + return false; +} + +static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event) +{ + AioHandler *node; + bool progress = false; /* - * Then dispatch any pending callbacks from the GSource. - * * We have to walk very carefully in case aio_set_fd_handler is * called while we're walking. */ node = QLIST_FIRST(&ctx->aio_handlers); while (node) { AioHandler *tmp; + int revents = node->pfd.revents; ctx->walking_handlers++; - if (node->pfd.revents && node->io_notify) { + if (!node->deleted && + (revents || event_notifier_get_handle(node->e) == event) && + node->io_notify) { node->pfd.revents = 0; node->io_notify(node->e); @@ -134,6 +229,28 @@ bool aio_poll(AioContext *ctx, bool blocking) } } + if (!node->deleted && + (node->io_read || node->io_write)) { + node->pfd.revents = 0; + if ((revents & G_IO_IN) && node->io_read) { + node->io_read(node->opaque); + progress = true; + } + if ((revents & G_IO_OUT) && node->io_write) { + node->io_write(node->opaque); + progress = true; + } + + /* if the next select() will return an event, we have progressed */ + if (event == event_notifier_get_handle(&ctx->notifier)) { + WSANETWORKEVENTS ev; + WSAEnumNetworkEvents(node->pfd.fd, event, &ev); + if (ev.lNetworkEvents) { + progress = true; + } + } + } + tmp = node; node = QLIST_NEXT(node, node); @@ -145,10 +262,47 @@ bool aio_poll(AioContext *ctx, bool blocking) } } - if (progress && !blocking) { - return true; + return progress; +} + +bool aio_dispatch(AioContext *ctx) +{ + bool progress; + + progress = aio_bh_poll(ctx); + progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE); + progress |= timerlistgroup_run_timers(&ctx->tlg); + return progress; +} + +bool aio_poll(AioContext *ctx, bool blocking) +{ + AioHandler *node; + HANDLE events[MAXIMUM_WAIT_OBJECTS + 1]; + bool was_dispatching, progress, have_select_revents, first; + int count; + int timeout; + + if (aio_prepare(ctx)) { + blocking = false; + have_select_revents = true; } + was_dispatching = ctx->dispatching; + progress = false; + + /* aio_notify can avoid the expensive event_notifier_set if + * everything (file descriptors, bottom halves, timers) will + * be re-evaluated before the next blocking poll(). This is + * already true when aio_poll is called with blocking == false; + * if blocking == true, it is only true after poll() returns. + * + * If we're in a nested event loop, ctx->dispatching might be true. + * In that case we can restore it just before returning, but we + * have to clear it now. + */ + aio_set_dispatching(ctx, !blocking); + ctx->walking_handlers++; /* fill fd sets */ @@ -160,64 +314,42 @@ bool aio_poll(AioContext *ctx, bool blocking) } ctx->walking_handlers--; + first = true; /* wait until next event */ while (count > 0) { + HANDLE event; int ret; - timeout = blocking ? - qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg)) : 0; + timeout = blocking + ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0; ret = WaitForMultipleObjects(count, events, FALSE, timeout); + aio_set_dispatching(ctx, true); + + if (first && aio_bh_poll(ctx)) { + progress = true; + } + first = false; /* if we have any signaled events, dispatch event */ - if ((DWORD) (ret - WAIT_OBJECT_0) >= count) { + event = NULL; + if ((DWORD) (ret - WAIT_OBJECT_0) < count) { + event = events[ret - WAIT_OBJECT_0]; + } else if (!have_select_revents) { break; } + have_select_revents = false; blocking = false; - /* we have to walk very carefully in case - * aio_set_fd_handler is called while we're walking */ - node = QLIST_FIRST(&ctx->aio_handlers); - while (node) { - AioHandler *tmp; - - ctx->walking_handlers++; - - if (!node->deleted && - event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] && - node->io_notify) { - node->io_notify(node->e); - - /* aio_notify() does not count as progress */ - if (node->e != &ctx->notifier) { - progress = true; - } - } - - tmp = node; - node = QLIST_NEXT(node, node); - - ctx->walking_handlers--; - - if (!ctx->walking_handlers && tmp->deleted) { - QLIST_REMOVE(tmp, node); - g_free(tmp); - } - } + progress |= aio_dispatch_handlers(ctx, event); /* Try again, but only call each handler once. */ events[ret - WAIT_OBJECT_0] = events[--count]; } - if (blocking) { - /* Run the timers a second time. We do this because otherwise aio_wait - * will not note progress - and will stop a drain early - if we have - * a timer that was not ready to run entering g_poll but is ready - * after g_poll. This will only do anything if a timer has expired. - */ - progress |= timerlistgroup_run_timers(&ctx->tlg); - } + progress |= timerlistgroup_run_timers(&ctx->tlg); + aio_set_dispatching(ctx, was_dispatching); return progress; } @@ -152,39 +152,48 @@ void qemu_bh_delete(QEMUBH *bh) bh->deleted = 1; } -static gboolean -aio_ctx_prepare(GSource *source, gint *timeout) +int64_t +aio_compute_timeout(AioContext *ctx) { - AioContext *ctx = (AioContext *) source; + int64_t deadline; + int timeout = -1; QEMUBH *bh; - int deadline; - /* We assume there is no timeout already supplied */ - *timeout = -1; for (bh = ctx->first_bh; bh; bh = bh->next) { if (!bh->deleted && bh->scheduled) { if (bh->idle) { /* idle bottom halves will be polled at least * every 10ms */ - *timeout = 10; + timeout = 10000000; } else { /* non-idle bottom halves will be executed * immediately */ - *timeout = 0; - return true; + return 0; } } } - deadline = qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg)); + deadline = timerlistgroup_deadline_ns(&ctx->tlg); if (deadline == 0) { - *timeout = 0; - return true; + return 0; } else { - *timeout = qemu_soonest_timeout(*timeout, deadline); + return qemu_soonest_timeout(timeout, deadline); + } +} + +static gboolean +aio_ctx_prepare(GSource *source, gint *timeout) +{ + AioContext *ctx = (AioContext *) source; + + /* We assume there is no timeout already supplied */ + *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); + + if (aio_prepare(ctx)) { + *timeout = 0; } - return false; + return *timeout == 0; } static gboolean @@ -209,7 +218,7 @@ aio_ctx_dispatch(GSource *source, AioContext *ctx = (AioContext *) source; assert(callback == NULL); - aio_poll(ctx, false); + aio_dispatch(ctx); return true; } @@ -1819,6 +1819,8 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state) void bdrv_close(BlockDriverState *bs) { + BdrvAioNotifier *ban, *ban_next; + if (bs->job) { block_job_cancel_sync(bs->job); } @@ -1863,6 +1865,11 @@ void bdrv_close(BlockDriverState *bs) if (bs->io_limits_enabled) { bdrv_io_limits_disable(bs); } + + QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { + g_free(ban); + } + QLIST_INIT(&bs->aio_notifiers); } void bdrv_close_all(void) @@ -4546,6 +4553,12 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, // Add the second request qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); + // Add tail of first request, if necessary + if (qiov->size < reqs[outidx].qiov->size) { + qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size, + reqs[outidx].qiov->size - qiov->size); + } + reqs[outidx].nb_sectors = qiov->size >> 9; reqs[outidx].qiov = qiov; @@ -5729,10 +5742,16 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs) void bdrv_detach_aio_context(BlockDriverState *bs) { + BdrvAioNotifier *baf; + if (!bs->drv) { return; } + QLIST_FOREACH(baf, &bs->aio_notifiers, list) { + baf->detach_aio_context(baf->opaque); + } + if (bs->io_limits_enabled) { throttle_detach_aio_context(&bs->throttle_state); } @@ -5752,6 +5771,8 @@ void bdrv_detach_aio_context(BlockDriverState *bs) void bdrv_attach_aio_context(BlockDriverState *bs, AioContext *new_context) { + BdrvAioNotifier *ban; + if (!bs->drv) { return; } @@ -5770,6 +5791,10 @@ void bdrv_attach_aio_context(BlockDriverState *bs, if (bs->io_limits_enabled) { throttle_attach_aio_context(&bs->throttle_state, new_context); } + + QLIST_FOREACH(ban, &bs->aio_notifiers, list) { + ban->attached_aio_context(new_context, ban->opaque); + } } void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) @@ -5786,6 +5811,43 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) aio_context_release(new_context); } +void bdrv_add_aio_context_notifier(BlockDriverState *bs, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque) +{ + BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); + *ban = (BdrvAioNotifier){ + .attached_aio_context = attached_aio_context, + .detach_aio_context = detach_aio_context, + .opaque = opaque + }; + + QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); +} + +void bdrv_remove_aio_context_notifier(BlockDriverState *bs, + void (*attached_aio_context)(AioContext *, + void *), + void (*detach_aio_context)(void *), + void *opaque) +{ + BdrvAioNotifier *ban, *ban_next; + + QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { + if (ban->attached_aio_context == attached_aio_context && + ban->detach_aio_context == detach_aio_context && + ban->opaque == opaque) + { + QLIST_REMOVE(ban, list); + g_free(ban); + + return; + } + } + + abort(); +} + void bdrv_add_before_write_notifier(BlockDriverState *bs, NotifierWithReturn *notifier) { diff --git a/block/Makefile.objs b/block/Makefile.objs index 858d2b387b..f45f9399aa 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -10,7 +10,6 @@ block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o -ifeq ($(CONFIG_POSIX),y) block-obj-y += nbd.o nbd-client.o sheepdog.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_LIBNFS) += nfs.o @@ -19,7 +18,6 @@ block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o block-obj-$(CONFIG_LIBSSH2) += ssh.o -endif common-obj-y += stream.o common-obj-y += commit.o diff --git a/block/blkverify.c b/block/blkverify.c index 7c78ca41a5..163064cf6b 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -158,6 +158,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, ret = 0; fail: + qemu_opts_del(opts); return ret; } diff --git a/block/curl.c b/block/curl.c index d4b85d20a5..025833994c 100644 --- a/block/curl.c +++ b/block/curl.c @@ -63,6 +63,7 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle, #define CURL_NUM_ACB 8 #define SECTOR_SIZE 512 #define READ_AHEAD_DEFAULT (256 * 1024) +#define CURL_TIMEOUT_DEFAULT 5 #define FIND_RET_NONE 0 #define FIND_RET_OK 1 @@ -71,6 +72,8 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle, #define CURL_BLOCK_OPT_URL "url" #define CURL_BLOCK_OPT_READAHEAD "readahead" #define CURL_BLOCK_OPT_SSLVERIFY "sslverify" +#define CURL_BLOCK_OPT_TIMEOUT "timeout" +#define CURL_BLOCK_OPT_COOKIE "cookie" struct BDRVCURLState; @@ -109,6 +112,8 @@ typedef struct BDRVCURLState { char *url; size_t readahead_size; bool sslverify; + int timeout; + char *cookie; bool accept_range; AioContext *aio_context; } BDRVCURLState; @@ -352,7 +357,7 @@ static void curl_multi_timeout_do(void *arg) #endif } -static CURLState *curl_init_state(BDRVCURLState *s) +static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s) { CURLState *state = NULL; int i, j; @@ -370,7 +375,7 @@ static CURLState *curl_init_state(BDRVCURLState *s) break; } if (!state) { - aio_poll(state->s->aio_context, true); + aio_poll(bdrv_get_aio_context(bs), true); } } while(!state); @@ -382,7 +387,10 @@ static CURLState *curl_init_state(BDRVCURLState *s) curl_easy_setopt(state->curl, CURLOPT_URL, s->url); curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER, (long) s->sslverify); - curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, 5); + if (s->cookie) { + curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie); + } + curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, s->timeout); curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_read_cb); curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state); @@ -489,6 +497,16 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_BOOL, .help = "Verify SSL certificate" }, + { + .name = CURL_BLOCK_OPT_TIMEOUT, + .type = QEMU_OPT_NUMBER, + .help = "Curl timeout" + }, + { + .name = CURL_BLOCK_OPT_COOKIE, + .type = QEMU_OPT_STRING, + .help = "Pass the cookie or list of cookies with each request" + }, { /* end of list */ } }, }; @@ -501,6 +519,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, QemuOpts *opts; Error *local_err = NULL; const char *file; + const char *cookie; double d; static int inited = 0; @@ -525,8 +544,14 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, goto out_noclean; } + s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT, + CURL_TIMEOUT_DEFAULT); + s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true); + cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE); + s->cookie = g_strdup(cookie); + file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL); if (file == NULL) { error_setg(errp, "curl block driver requires an 'url' option"); @@ -541,7 +566,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, DPRINTF("CURL: Opening %s\n", file); s->aio_context = bdrv_get_aio_context(bs); s->url = g_strdup(file); - state = curl_init_state(s); + state = curl_init_state(bs, s); if (!state) goto out_noclean; @@ -582,6 +607,7 @@ out: curl_easy_cleanup(state->curl); state->curl = NULL; out_noclean: + g_free(s->cookie); g_free(s->url); qemu_opts_del(opts); return -EINVAL; @@ -625,7 +651,7 @@ static void curl_readv_bh_cb(void *p) } // No cache found, so let's start a new request - state = curl_init_state(s); + state = curl_init_state(acb->common.bs, s); if (!state) { acb->common.cb(acb->common.opaque, -EIO); qemu_aio_release(acb); @@ -684,6 +710,7 @@ static void curl_close(BlockDriverState *bs) DPRINTF("CURL: Close\n"); curl_detach_aio_context(bs); + g_free(s->cookie); g_free(s->url); } diff --git a/block/linux-aio.c b/block/linux-aio.c index 7ac7e8c99c..9aca758b10 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -51,6 +51,12 @@ struct qemu_laio_state { /* io queue for submit at batch */ LaioQueue io_q; + + /* I/O completion processing */ + QEMUBH *completion_bh; + struct io_event events[MAX_EVENTS]; + int event_idx; + int event_max; }; static inline ssize_t io_event_ret(struct io_event *ev) @@ -86,27 +92,58 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, qemu_aio_release(laiocb); } -static void qemu_laio_completion_cb(EventNotifier *e) +/* The completion BH fetches completed I/O requests and invokes their + * callbacks. + * + * The function is somewhat tricky because it supports nested event loops, for + * example when a request callback invokes aio_poll(). In order to do this, + * the completion events array and index are kept in qemu_laio_state. The BH + * reschedules itself as long as there are completions pending so it will + * either be called again in a nested event loop or will be called after all + * events have been completed. When there are no events left to complete, the + * BH returns without rescheduling. + */ +static void qemu_laio_completion_bh(void *opaque) { - struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e); - - while (event_notifier_test_and_clear(&s->e)) { - struct io_event events[MAX_EVENTS]; - struct timespec ts = { 0 }; - int nevents, i; + struct qemu_laio_state *s = opaque; + /* Fetch more completion events when empty */ + if (s->event_idx == s->event_max) { do { - nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts); - } while (nevents == -EINTR); + struct timespec ts = { 0 }; + s->event_max = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, + s->events, &ts); + } while (s->event_max == -EINTR); + + s->event_idx = 0; + if (s->event_max <= 0) { + s->event_max = 0; + return; /* no more events */ + } + } - for (i = 0; i < nevents; i++) { - struct iocb *iocb = events[i].obj; - struct qemu_laiocb *laiocb = - container_of(iocb, struct qemu_laiocb, iocb); + /* Reschedule so nested event loops see currently pending completions */ + qemu_bh_schedule(s->completion_bh); - laiocb->ret = io_event_ret(&events[i]); - qemu_laio_process_completion(s, laiocb); - } + /* Process completion events */ + while (s->event_idx < s->event_max) { + struct iocb *iocb = s->events[s->event_idx].obj; + struct qemu_laiocb *laiocb = + container_of(iocb, struct qemu_laiocb, iocb); + + laiocb->ret = io_event_ret(&s->events[s->event_idx]); + s->event_idx++; + + qemu_laio_process_completion(s, laiocb); + } +} + +static void qemu_laio_completion_cb(EventNotifier *e) +{ + struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e); + + if (event_notifier_test_and_clear(&s->e)) { + qemu_bh_schedule(s->completion_bh); } } @@ -272,12 +309,14 @@ void laio_detach_aio_context(void *s_, AioContext *old_context) struct qemu_laio_state *s = s_; aio_set_event_notifier(old_context, &s->e, NULL); + qemu_bh_delete(s->completion_bh); } void laio_attach_aio_context(void *s_, AioContext *new_context) { struct qemu_laio_state *s = s_; + s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s); aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb); } diff --git a/block/nfs.c b/block/nfs.c index 93d87f3256..194f301501 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -393,16 +393,20 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags, qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { error_propagate(errp, local_err); - return -EINVAL; + ret = -EINVAL; + goto out; } ret = nfs_client_open(client, qemu_opt_get(opts, "filename"), (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY, errp); if (ret < 0) { - return ret; + goto out; } bs->total_sectors = ret; - return 0; + ret = 0; +out: + qemu_opts_del(opts); + return ret; } static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp) diff --git a/block/quorum.c b/block/quorum.c index 0de07bb036..093382e8f5 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -29,6 +29,7 @@ #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold" #define QUORUM_OPT_BLKVERIFY "blkverify" #define QUORUM_OPT_REWRITE "rewrite-corrupted" +#define QUORUM_OPT_READ_PATTERN "read-pattern" /* This union holds a vote hash value */ typedef union QuorumVoteValue { @@ -79,6 +80,8 @@ typedef struct BDRVQuorumState { bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted * block if Quorum is reached. */ + + QuorumReadPattern read_pattern; } BDRVQuorumState; typedef struct QuorumAIOCB QuorumAIOCB; @@ -122,6 +125,7 @@ struct QuorumAIOCB { bool is_read; int vote_ret; + int child_iter; /* which child to read in fifo pattern */ }; static bool quorum_vote(QuorumAIOCB *acb); @@ -148,7 +152,6 @@ static AIOCBInfo quorum_aiocb_info = { static void quorum_aio_finalize(QuorumAIOCB *acb) { - BDRVQuorumState *s = acb->common.bs->opaque; int i, ret = 0; if (acb->vote_ret) { @@ -158,7 +161,8 @@ static void quorum_aio_finalize(QuorumAIOCB *acb) acb->common.cb(acb->common.opaque, ret); if (acb->is_read) { - for (i = 0; i < s->num_children; i++) { + /* on the quorum case acb->child_iter == s->num_children - 1 */ + for (i = 0; i <= acb->child_iter; i++) { qemu_vfree(acb->qcrs[i].buf); qemu_iovec_destroy(&acb->qcrs[i].qiov); } @@ -261,6 +265,21 @@ static void quorum_rewrite_aio_cb(void *opaque, int ret) quorum_aio_finalize(acb); } +static BlockDriverAIOCB *read_fifo_child(QuorumAIOCB *acb); + +static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) +{ + int i; + assert(dest->niov == source->niov); + assert(dest->size == source->size); + for (i = 0; i < source->niov; i++) { + assert(dest->iov[i].iov_len == source->iov[i].iov_len); + memcpy(dest->iov[i].iov_base, + source->iov[i].iov_base, + source->iov[i].iov_len); + } +} + static void quorum_aio_cb(void *opaque, int ret) { QuorumChildRequest *sacb = opaque; @@ -268,6 +287,21 @@ static void quorum_aio_cb(void *opaque, int ret) BDRVQuorumState *s = acb->common.bs->opaque; bool rewrite = false; + if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) { + /* We try to read next child in FIFO order if we fail to read */ + if (ret < 0 && ++acb->child_iter < s->num_children) { + read_fifo_child(acb); + return; + } + + if (ret == 0) { + quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->child_iter].qiov); + } + acb->vote_ret = ret; + quorum_aio_finalize(acb); + return; + } + sacb->ret = ret; acb->count++; if (ret == 0) { @@ -348,19 +382,6 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, return count; } -static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) -{ - int i; - assert(dest->niov == source->niov); - assert(dest->size == source->size); - for (i = 0; i < source->niov; i++) { - assert(dest->iov[i].iov_len == source->iov[i].iov_len); - memcpy(dest->iov[i].iov_base, - source->iov[i].iov_base, - source->iov[i].iov_len); - } -} - static void quorum_count_vote(QuorumVotes *votes, QuorumVoteValue *value, int index) @@ -620,34 +641,62 @@ free_exit: return rewrite; } -static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, - int nb_sectors, - BlockDriverCompletionFunc *cb, - void *opaque) +static BlockDriverAIOCB *read_quorum_children(QuorumAIOCB *acb) { - BDRVQuorumState *s = bs->opaque; - QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, - nb_sectors, cb, opaque); + BDRVQuorumState *s = acb->common.bs->opaque; int i; - acb->is_read = true; - for (i = 0; i < s->num_children; i++) { - acb->qcrs[i].buf = qemu_blockalign(s->bs[i], qiov->size); - qemu_iovec_init(&acb->qcrs[i].qiov, qiov->niov); - qemu_iovec_clone(&acb->qcrs[i].qiov, qiov, acb->qcrs[i].buf); + acb->qcrs[i].buf = qemu_blockalign(s->bs[i], acb->qiov->size); + qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov); + qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf); } for (i = 0; i < s->num_children; i++) { - bdrv_aio_readv(s->bs[i], sector_num, &acb->qcrs[i].qiov, nb_sectors, - quorum_aio_cb, &acb->qcrs[i]); + bdrv_aio_readv(s->bs[i], acb->sector_num, &acb->qcrs[i].qiov, + acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]); } return &acb->common; } +static BlockDriverAIOCB *read_fifo_child(QuorumAIOCB *acb) +{ + BDRVQuorumState *s = acb->common.bs->opaque; + + acb->qcrs[acb->child_iter].buf = qemu_blockalign(s->bs[acb->child_iter], + acb->qiov->size); + qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov); + qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov, + acb->qcrs[acb->child_iter].buf); + bdrv_aio_readv(s->bs[acb->child_iter], acb->sector_num, + &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[acb->child_iter]); + + return &acb->common; +} + +static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + BDRVQuorumState *s = bs->opaque; + QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, + nb_sectors, cb, opaque); + acb->is_read = true; + + if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { + acb->child_iter = s->num_children - 1; + return read_quorum_children(acb); + } + + acb->child_iter = 0; + return read_fifo_child(acb); +} + static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, @@ -787,16 +836,39 @@ static QemuOptsList quorum_runtime_opts = { .type = QEMU_OPT_BOOL, .help = "Rewrite corrupted block on read quorum", }, + { + .name = QUORUM_OPT_READ_PATTERN, + .type = QEMU_OPT_STRING, + .help = "Allowed pattern: quorum, fifo. Quorum is default", + }, { /* end of list */ } }, }; +static int parse_read_pattern(const char *opt) +{ + int i; + + if (!opt) { + /* Set quorum as default */ + return QUORUM_READ_PATTERN_QUORUM; + } + + for (i = 0; i < QUORUM_READ_PATTERN_MAX; i++) { + if (!strcmp(opt, QuorumReadPattern_lookup[i])) { + return i; + } + } + + return -EINVAL; +} + static int quorum_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVQuorumState *s = bs->opaque; Error *local_err = NULL; - QemuOpts *opts; + QemuOpts *opts = NULL; bool *opened; QDict *sub = NULL; QList *list = NULL; @@ -832,28 +904,37 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, } s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0); - - /* and validate it against s->num_children */ - ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err); + ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN)); if (ret < 0) { + error_setg(&local_err, "Please set read-pattern as fifo or quorum"); goto exit; } + s->read_pattern = ret; - /* is the driver in blkverify mode */ - if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) && - s->num_children == 2 && s->threshold == 2) { - s->is_blkverify = true; - } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) { - fprintf(stderr, "blkverify mode is set by setting blkverify=on " - "and using two files with vote_threshold=2\n"); - } + if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { + /* and validate it against s->num_children */ + ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err); + if (ret < 0) { + goto exit; + } - s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false); - if (s->rewrite_corrupted && s->is_blkverify) { - error_setg(&local_err, - "rewrite-corrupted=on cannot be used with blkverify=on"); - ret = -EINVAL; - goto exit; + /* is the driver in blkverify mode */ + if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) && + s->num_children == 2 && s->threshold == 2) { + s->is_blkverify = true; + } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) { + fprintf(stderr, "blkverify mode is set by setting blkverify=on " + "and using two files with vote_threshold=2\n"); + } + + s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, + false); + if (s->rewrite_corrupted && s->is_blkverify) { + error_setg(&local_err, + "rewrite-corrupted=on cannot be used with blkverify=on"); + ret = -EINVAL; + goto exit; + } } /* allocate the children BlockDriverState array */ @@ -908,6 +989,7 @@ close_exit: g_free(s->bs); g_free(opened); exit: + qemu_opts_del(opts); /* propagate error */ if (local_err) { error_propagate(errp, local_err); diff --git a/block/sheepdog.c b/block/sheepdog.c index 12cbd9dcb4..f91afc3a5b 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -103,6 +103,9 @@ #define SD_INODE_SIZE (sizeof(SheepdogInode)) #define CURRENT_VDI_ID 0 +#define LOCK_TYPE_NORMAL 0 +#define LOCK_TYPE_SHARED 1 /* for iSCSI multipath */ + typedef struct SheepdogReq { uint8_t proto_ver; uint8_t opcode; @@ -166,7 +169,8 @@ typedef struct SheepdogVdiReq { uint8_t copy_policy; uint8_t reserved[2]; uint32_t snapid; - uint32_t pad[3]; + uint32_t type; + uint32_t pad[2]; } SheepdogVdiReq; typedef struct SheepdogVdiRsp { @@ -712,7 +716,6 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) static coroutine_fn void reconnect_to_sdog(void *opaque) { - Error *local_err = NULL; BDRVSheepdogState *s = opaque; AIOReq *aio_req, *next; @@ -727,6 +730,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque) /* Try to reconnect the sheepdog server every one second. */ while (s->fd < 0) { + Error *local_err = NULL; s->fd = get_sheep_fd(s, &local_err); if (s->fd < 0) { DPRINTF("Wait for connection to be established\n"); @@ -1090,6 +1094,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename, memset(&hdr, 0, sizeof(hdr)); if (lock) { hdr.opcode = SD_OP_LOCK_VDI; + hdr.type = LOCK_TYPE_NORMAL; } else { hdr.opcode = SD_OP_GET_VDI_INFO; } @@ -1110,6 +1115,8 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename, sd_strerror(rsp->result), filename, snapid, tag); if (rsp->result == SD_RES_NO_VDI) { ret = -ENOENT; + } else if (rsp->result == SD_RES_VDI_LOCKED) { + ret = -EBUSY; } else { ret = -EIO; } @@ -1793,6 +1800,7 @@ static void sd_close(BlockDriverState *bs) memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_RELEASE_VDI; + hdr.type = LOCK_TYPE_NORMAL; hdr.base_vdi_id = s->inode.vdi_id; wlen = strlen(s->name) + 1; hdr.data_length = wlen; diff --git a/blockdev.c b/blockdev.c index 6a204c662d..e37b068e9e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1757,6 +1757,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data) { const char *id = qdict_get_str(qdict, "id"); BlockDriverState *bs; + AioContext *aio_context; Error *local_err = NULL; bs = bdrv_find(id); @@ -1764,9 +1765,14 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data) error_report("Device '%s' not found", id); return -1; } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) { error_report("%s", error_get_pretty(local_err)); error_free(local_err); + aio_context_release(aio_context); return -1; } @@ -1790,6 +1796,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data) drive_del(drive_get_by_blockdev(bs)); } + aio_context_release(aio_context); return 0; } @@ -2179,11 +2186,12 @@ void qmp_drive_mirror(const char *device, const char *target, } if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { - error_set(errp, QERR_INVALID_PARAMETER, device); + error_set(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", + "a value in range [512B, 64MB]"); return; } if (granularity & (granularity - 1)) { - error_set(errp, QERR_INVALID_PARAMETER, device); + error_set(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", "power of 2"); return; } diff --git a/blockjob.c b/blockjob.c index ca0b4e25d0..0689fdd2b5 100644 --- a/blockjob.c +++ b/blockjob.c @@ -205,7 +205,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) if (block_job_is_paused(job)) { qemu_coroutine_yield(); } else { - co_sleep_ns(type, ns); + co_aio_sleep_ns(bdrv_get_aio_context(job->bs), type, ns); } job->busy = true; } diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index c07adc6e4f..b55188cb82 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -194,6 +194,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, error_setg(&s->blocker, "block device is in use by data plane"); bdrv_op_block_all(blk->conf.bs, s->blocker); bdrv_op_unblock(blk->conf.bs, BLOCK_OP_TYPE_RESIZE, s->blocker); + bdrv_op_unblock(blk->conf.bs, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker); *dataplane = s; } diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index b4a467116e..efab95b320 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -59,7 +59,7 @@ static char *idebus_get_fw_dev_path(DeviceState *dev) { char path[30]; - snprintf(path, sizeof(path), "%s@%d", qdev_fw_name(dev), + snprintf(path, sizeof(path), "%s@%x", qdev_fw_name(dev), ((IDEBus*)dev->parent_bus)->bus_id); return g_strdup(path); diff --git a/include/block/aio.h b/include/block/aio.h index c23de3cd1f..4603c0f066 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -205,12 +205,25 @@ void qemu_bh_cancel(QEMUBH *bh); void qemu_bh_delete(QEMUBH *bh); /* Return whether there are any pending callbacks from the GSource - * attached to the AioContext. + * attached to the AioContext, before g_poll is invoked. + * + * This is used internally in the implementation of the GSource. + */ +bool aio_prepare(AioContext *ctx); + +/* Return whether there are any pending callbacks from the GSource + * attached to the AioContext, after g_poll is invoked. * * This is used internally in the implementation of the GSource. */ bool aio_pending(AioContext *ctx); +/* Dispatch any pending callbacks from the GSource attached to the AioContext. + * + * This is used internally in the implementation of the GSource. + */ +bool aio_dispatch(AioContext *ctx); + /* Progress in completing AIO work to occur. This can issue new pending * aio as a result of executing I/O completion or bh callbacks. * @@ -226,7 +239,6 @@ bool aio_pending(AioContext *ctx); */ bool aio_poll(AioContext *ctx, bool blocking); -#ifdef CONFIG_POSIX /* Register a file descriptor and associated callbacks. Behaves very similarly * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will * be invoked when using aio_poll(). @@ -239,7 +251,6 @@ void aio_set_fd_handler(AioContext *ctx, IOHandler *io_read, IOHandler *io_write, void *opaque); -#endif /* Register an event notifier and associated callbacks. Behaves very similarly * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks @@ -303,4 +314,12 @@ static inline void aio_timer_init(AioContext *ctx, timer_init(ts, ctx->tlg.tl[type], scale, cb, opaque); } +/** + * aio_compute_timeout: + * @ctx: the aio context + * + * Compute the timeout that a blocking aio_poll should use. + */ +int64_t aio_compute_timeout(AioContext *ctx); + #endif diff --git a/include/block/block_int.h b/include/block/block_int.h index 233489547e..8a61215ac0 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -294,6 +294,15 @@ typedef struct BlockLimits { typedef struct BdrvOpBlocker BdrvOpBlocker; +typedef struct BdrvAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); + + void *opaque; + + QLIST_ENTRY(BdrvAioNotifier) list; +} BdrvAioNotifier; + /* * Note: the function bdrv_append() copies and swaps contents of * BlockDriverStates, so if you add new fields to this struct, please @@ -320,6 +329,10 @@ struct BlockDriverState { void *dev_opaque; AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ + /* long-running tasks intended to always use the same AioContext as this + * BDS may register themselves in this list to be notified of changes + * regarding this BDS's context */ + QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; char filename[1024]; char backing_file[1024]; /* if non zero, the image is a diff of @@ -437,6 +450,34 @@ void bdrv_detach_aio_context(BlockDriverState *bs); void bdrv_attach_aio_context(BlockDriverState *bs, AioContext *new_context); +/** + * bdrv_add_aio_context_notifier: + * + * If a long-running job intends to be always run in the same AioContext as a + * certain BDS, it may use this function to be notified of changes regarding the + * association of the BDS to an AioContext. + * + * attached_aio_context() is called after the target BDS has been attached to a + * new AioContext; detach_aio_context() is called before the target BDS is being + * detached from its old AioContext. + */ +void bdrv_add_aio_context_notifier(BlockDriverState *bs, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque); + +/** + * bdrv_remove_aio_context_notifier: + * + * Unsubscribe of change notifications regarding the BDS's AioContext. The + * parameters given here have to be the same as those given to + * bdrv_add_aio_context_notifier(). + */ +void bdrv_remove_aio_context_notifier(BlockDriverState *bs, + void (*aio_context_attached)(AioContext *, + void *), + void (*aio_context_detached)(void *), + void *opaque); + #ifdef _WIN32 int is_windows_drive(const char *filename); #endif diff --git a/include/block/coroutine.h b/include/block/coroutine.h index b9b7f488c9..793df0ef8b 100644 --- a/include/block/coroutine.h +++ b/include/block/coroutine.h @@ -203,14 +203,6 @@ void qemu_co_rwlock_unlock(CoRwlock *lock); /** * Yield the coroutine for a given duration * - * Note this function uses timers and hence only works when a main loop is in - * use. See main-loop.h and do not use from qemu-tool programs. - */ -void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns); - -/** - * Yield the coroutine for a given duration - * * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be * resumed when using aio_poll(). */ @@ -18,6 +18,7 @@ #include "block/nbd.h" #include "block/block.h" +#include "block/block_int.h" #include "block/coroutine.h" @@ -107,6 +108,8 @@ struct NBDExport { uint32_t nbdflags; QTAILQ_HEAD(, NBDClient) clients; QTAILQ_ENTRY(NBDExport) next; + + AioContext *ctx; }; static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); @@ -123,6 +126,8 @@ struct NBDClient { CoMutex send_lock; Coroutine *send_coroutine; + bool can_read; + QTAILQ_ENTRY(NBDClient) next; int nb_requests; bool closing; @@ -130,6 +135,10 @@ struct NBDClient { /* That's all folks */ +static void nbd_set_handlers(NBDClient *client); +static void nbd_unset_handlers(NBDClient *client); +static void nbd_update_can_read(NBDClient *client); + ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) { size_t offset = 0; @@ -156,7 +165,7 @@ ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) err = socket_error(); /* recoverable error */ - if (err == EINTR || (offset > 0 && err == EAGAIN)) { + if (err == EINTR || (offset > 0 && (err == EAGAIN || err == EWOULDBLOCK))) { continue; } @@ -862,7 +871,7 @@ void nbd_client_put(NBDClient *client) */ assert(client->closing); - qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL); + nbd_unset_handlers(client); close(client->sock); client->sock = -1; if (client->exp) { @@ -898,6 +907,7 @@ static NBDRequest *nbd_request_get(NBDClient *client) assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); client->nb_requests++; + nbd_update_can_read(client); req = g_slice_new0(NBDRequest); nbd_client_get(client); @@ -914,12 +924,39 @@ static void nbd_request_put(NBDRequest *req) } g_slice_free(NBDRequest, req); - if (client->nb_requests-- == MAX_NBD_REQUESTS) { - qemu_notify_event(); - } + client->nb_requests--; + nbd_update_can_read(client); nbd_client_put(client); } +static void bs_aio_attached(AioContext *ctx, void *opaque) +{ + NBDExport *exp = opaque; + NBDClient *client; + + TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx); + + exp->ctx = ctx; + + QTAILQ_FOREACH(client, &exp->clients, next) { + nbd_set_handlers(client); + } +} + +static void bs_aio_detach(void *opaque) +{ + NBDExport *exp = opaque; + NBDClient *client; + + TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx); + + QTAILQ_FOREACH(client, &exp->clients, next) { + nbd_unset_handlers(client); + } + + exp->ctx = NULL; +} + NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, uint32_t nbdflags, void (*close)(NBDExport *)) @@ -932,7 +969,9 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, exp->nbdflags = nbdflags; exp->size = size == -1 ? bdrv_getlength(bs) : size; exp->close = close; + exp->ctx = bdrv_get_aio_context(bs); bdrv_ref(bs); + bdrv_add_aio_context_notifier(bs, bs_aio_attached, bs_aio_detach, exp); return exp; } @@ -980,6 +1019,8 @@ void nbd_export_close(NBDExport *exp) nbd_export_set_name(exp, NULL); nbd_export_put(exp); if (exp->bs) { + bdrv_remove_aio_context_notifier(exp->bs, bs_aio_attached, + bs_aio_detach, exp); bdrv_unref(exp->bs); exp->bs = NULL; } @@ -1023,10 +1064,6 @@ void nbd_export_close_all(void) } } -static int nbd_can_read(void *opaque); -static void nbd_read(void *opaque); -static void nbd_restart_write(void *opaque); - static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, int len) { @@ -1035,9 +1072,8 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, ssize_t rc, ret; qemu_co_mutex_lock(&client->send_lock); - qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, - nbd_restart_write, client); client->send_coroutine = qemu_coroutine_self(); + nbd_set_handlers(client); if (!len) { rc = nbd_send_reply(csock, reply); @@ -1054,7 +1090,7 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, } client->send_coroutine = NULL; - qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client); + nbd_set_handlers(client); qemu_co_mutex_unlock(&client->send_lock); return rc; } @@ -1067,6 +1103,8 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque ssize_t rc; client->recv_coroutine = qemu_coroutine_self(); + nbd_update_can_read(client); + rc = nbd_receive_request(csock, request); if (rc < 0) { if (rc != -EAGAIN) { @@ -1108,6 +1146,8 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque out: client->recv_coroutine = NULL; + nbd_update_can_read(client); + return rc; } @@ -1259,13 +1299,6 @@ out: nbd_client_close(client); } -static int nbd_can_read(void *opaque) -{ - NBDClient *client = opaque; - - return client->recv_coroutine || client->nb_requests < MAX_NBD_REQUESTS; -} - static void nbd_read(void *opaque) { NBDClient *client = opaque; @@ -1284,6 +1317,37 @@ static void nbd_restart_write(void *opaque) qemu_coroutine_enter(client->send_coroutine, NULL); } +static void nbd_set_handlers(NBDClient *client) +{ + if (client->exp && client->exp->ctx) { + aio_set_fd_handler(client->exp->ctx, client->sock, + client->can_read ? nbd_read : NULL, + client->send_coroutine ? nbd_restart_write : NULL, + client); + } +} + +static void nbd_unset_handlers(NBDClient *client) +{ + if (client->exp && client->exp->ctx) { + aio_set_fd_handler(client->exp->ctx, client->sock, NULL, NULL, NULL); + } +} + +static void nbd_update_can_read(NBDClient *client) +{ + bool can_read = client->recv_coroutine || + client->nb_requests < MAX_NBD_REQUESTS; + + if (can_read != client->can_read) { + client->can_read = can_read; + nbd_set_handlers(client); + + /* There is no need to invoke aio_notify(), since aio_set_fd_handler() + * in nbd_set_handlers() will have taken care of that */ + } +} + NBDClient *nbd_client_new(NBDExport *exp, int csock, void (*close)(NBDClient *)) { @@ -1292,13 +1356,14 @@ NBDClient *nbd_client_new(NBDExport *exp, int csock, client->refcount = 1; client->exp = exp; client->sock = csock; + client->can_read = true; if (nbd_send_negotiate(client)) { g_free(client); return NULL; } client->close = close; qemu_co_mutex_init(&client->send_lock); - qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client); + nbd_set_handlers(client); if (exp) { QTAILQ_INSERT_TAIL(&exp->clients, client, next); diff --git a/qapi/block-core.json b/qapi/block-core.json index fb74c56e32..a685d02728 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1419,6 +1419,19 @@ 'raw': 'BlockdevRef' } } ## +# @QuorumReadPattern +# +# An enumeration of quorum read patterns. +# +# @quorum: read all the children and do a quorum vote on reads +# +# @fifo: read only from the first child that has not failed +# +# Since: 2.2 +## +{ 'enum': 'QuorumReadPattern', 'data': [ 'quorum', 'fifo' ] } + +## # @BlockdevOptionsQuorum # # Driver specific block device options for Quorum @@ -1433,12 +1446,17 @@ # @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached # (Since 2.1) # +# @read-pattern: #optional choose read pattern and set to quorum by default +# (Since 2.2) +# # Since: 2.0 ## { 'type': 'BlockdevOptionsQuorum', 'data': { '*blkverify': 'bool', 'children': [ 'BlockdevRef' ], - 'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } } + 'vote-threshold': 'int', + '*rewrite-corrupted': 'bool', + '*read-pattern': 'QuorumReadPattern' } } ## # @BlockdevOptions diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c index 054ca70627..d4049260da 100644 --- a/qemu-coroutine-io.c +++ b/qemu-coroutine-io.c @@ -34,13 +34,15 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, { size_t done = 0; ssize_t ret; + int err; while (done < bytes) { ret = iov_send_recv(sockfd, iov, iov_cnt, offset + done, bytes - done, do_send); if (ret > 0) { done += ret; } else if (ret < 0) { - if (errno == EAGAIN) { + err = socket_error(); + if (err == EAGAIN || err == EWOULDBLOCK) { qemu_coroutine_yield(); } else if (done == 0) { return -1; diff --git a/qemu-coroutine-sleep.c b/qemu-coroutine-sleep.c index ad78fbaa2a..9abb7fdf31 100644 --- a/qemu-coroutine-sleep.c +++ b/qemu-coroutine-sleep.c @@ -27,18 +27,6 @@ static void co_sleep_cb(void *opaque) qemu_coroutine_enter(sleep_cb->co, NULL); } -void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns) -{ - CoSleepCB sleep_cb = { - .co = qemu_coroutine_self(), - }; - sleep_cb.ts = timer_new(type, SCALE_NS, co_sleep_cb, &sleep_cb); - timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns); - qemu_coroutine_yield(); - timer_del(sleep_cb.ts); - timer_free(sleep_cb.ts); -} - void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, int64_t ns) { diff --git a/qemu-img.c b/qemu-img.c index 2052b14b84..ff29ed1c67 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -752,7 +752,7 @@ static int img_commit(int argc, char **argv) ret = bdrv_parse_cache_flags(cache, &flags); if (ret < 0) { error_report("Invalid cache option: %s", cache); - return -1; + return 1; } bs = bdrv_new_open("image", filename, fmt, flags, true, quiet); @@ -999,6 +999,9 @@ static int img_compare(int argc, char **argv) filename1 = argv[optind++]; filename2 = argv[optind++]; + /* Initialize before goto out */ + qemu_progress_init(progress, 2.0); + flags = BDRV_O_FLAGS; ret = bdrv_parse_cache_flags(cache, &flags); if (ret < 0) { @@ -1007,9 +1010,6 @@ static int img_compare(int argc, char **argv) goto out3; } - /* Initialize before goto out */ - qemu_progress_init(progress, 2.0); - bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet); if (!bs1) { error_report("Can't open file %s", filename1); @@ -2304,7 +2304,7 @@ static int img_snapshot(int argc, char **argv) static int img_rebase(int argc, char **argv) { - BlockDriverState *bs, *bs_old_backing = NULL, *bs_new_backing = NULL; + BlockDriverState *bs = NULL, *bs_old_backing = NULL, *bs_new_backing = NULL; BlockDriver *old_backing_drv, *new_backing_drv; char *filename; const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg; @@ -2376,14 +2376,14 @@ static int img_rebase(int argc, char **argv) ret = bdrv_parse_cache_flags(cache, &flags); if (ret < 0) { error_report("Invalid cache option: %s", cache); - return -1; + goto out; } src_flags = BDRV_O_FLAGS; ret = bdrv_parse_cache_flags(src_cache, &src_flags); if (ret < 0) { error_report("Invalid source cache option: %s", src_cache); - return -1; + goto out; } /* @@ -2394,7 +2394,8 @@ static int img_rebase(int argc, char **argv) */ bs = bdrv_new_open("image", filename, fmt, flags, true, quiet); if (!bs) { - return 1; + ret = -1; + goto out; } /* Find the right drivers for the backing files */ @@ -2420,11 +2421,7 @@ static int img_rebase(int argc, char **argv) } /* For safe rebasing we need to compare old and new backing file */ - if (unsafe) { - /* Make the compiler happy */ - bs_old_backing = NULL; - bs_new_backing = NULL; - } else { + if (!unsafe) { char backing_name[1024]; bs_old_backing = bdrv_new("old_backing", &error_abort); diff --git a/qemu-options.hx b/qemu-options.hx index c573dd8893..5479cf54f4 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2351,6 +2351,16 @@ multiple of 512 bytes. It defaults to 256k. @item sslverify Whether to verify the remote server's certificate when connecting over SSL. It can have the value 'on' or 'off'. It defaults to 'on'. + +@item cookie +Send this cookie (it can also be a list of cookies separated by ';') with +each outgoing request. Only supported when using protocols such as HTTP +which support cookies, otherwise ignored. + +@item timeout +Set the timeout in seconds of the CURL connection. This timeout is the time +that CURL waits for a response from the remote server to get the size of the +image to be downloaded. If not set, the default timeout of 5 seconds is used. @end table Note that when passing options to qemu explicitly, @option{driver} is the value @@ -2372,9 +2382,10 @@ qemu-system-x86_64 -drive file=/tmp/Fedora-x86_64-20-20131211.1-sda.qcow2,copy-o @end example Example: boot from an image stored on a VMware vSphere server with a self-signed -certificate using a local overlay for writes and a readahead of 64k +certificate using a local overlay for writes, a readahead of 64k and a timeout +of 10 seconds. @example -qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k"@}' /tmp/test.qcow2 +qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2 qemu-system-x86_64 -drive file=/tmp/test.qcow2 @end example diff --git a/tests/qemu-iotests/100 b/tests/qemu-iotests/100 new file mode 100755 index 0000000000..9124aba760 --- /dev/null +++ b/tests/qemu-iotests/100 @@ -0,0 +1,134 @@ +#!/bin/bash +# +# Test simple read/write using plain bdrv_read/bdrv_write +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=stefanha@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt generic +_supported_proto generic +_supported_os Linux + + +size=128M + +echo +echo "== Single request ==" +_make_test_img $size +$QEMU_IO -c "multiwrite 0 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== verify pattern ==" +$QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== Sequential requests ==" +_make_test_img $size +$QEMU_IO -c "multiwrite 0 4k ; 4k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== verify pattern ==" +$QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0xce 4k 4k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 8k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== Superset overlapping requests ==" +_make_test_img $size +$QEMU_IO -c "multiwrite 0 4k ; 1k 2k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== verify pattern ==" +# Order of overlapping in-flight requests is not guaranteed so we cannot verify +# [1k, 3k) since it could have either pattern 0xcd or 0xce. +$QEMU_IO -c "read -P 0xcd 0 1k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0xcd 3k 1k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== Subset overlapping requests ==" +_make_test_img $size +$QEMU_IO -c "multiwrite 1k 2k ; 0k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== verify pattern ==" +# Order of overlapping in-flight requests is not guaranteed so we cannot verify +# [1k, 3k) since it could have either pattern 0xcd or 0xce. +$QEMU_IO -c "read -P 0xce 0 1k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0xce 3k 1k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== Head overlapping requests ==" +_make_test_img $size +$QEMU_IO -c "multiwrite 0k 2k ; 0k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== verify pattern ==" +# Order of overlapping in-flight requests is not guaranteed so we cannot verify +# [0k, 2k) since it could have either pattern 0xcd or 0xce. +$QEMU_IO -c "read -P 0xce 2k 2k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== Tail overlapping requests ==" +_make_test_img $size +$QEMU_IO -c "multiwrite 2k 2k ; 0k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== verify pattern ==" +# Order of overlapping in-flight requests is not guaranteed so we cannot verify +# [2k, 4k) since it could have either pattern 0xcd or 0xce. +$QEMU_IO -c "read -P 0xce 0k 2k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== Disjoint requests ==" +_make_test_img $size +$QEMU_IO -c "multiwrite 0 4k ; 64k 4k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "== verify pattern ==" +$QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 4k 60k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0xce 64k 4k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 68k 4k" "$TEST_IMG" | _filter_qemu_io + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/100.out b/tests/qemu-iotests/100.out new file mode 100644 index 0000000000..2d6e9f0a7d --- /dev/null +++ b/tests/qemu-iotests/100.out @@ -0,0 +1,89 @@ +QA output created by 100 + +== Single request == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +wrote 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern == +read 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 4096 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== Sequential requests == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +wrote 8192/8192 bytes at offset 0 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern == +read 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 4096 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 8192 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== Superset overlapping requests == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +wrote 6144/6144 bytes at offset 0 +6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern == +read 1024/1024 bytes at offset 0 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 3072 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 4096 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== Subset overlapping requests == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +wrote 6144/6144 bytes at offset 1024 +6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern == +read 1024/1024 bytes at offset 0 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 3072 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 4096 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== Head overlapping requests == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +wrote 6144/6144 bytes at offset 0 +6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern == +read 2048/2048 bytes at offset 2048 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 4096 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== Tail overlapping requests == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +wrote 6144/6144 bytes at offset 2048 +6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern == +read 2048/2048 bytes at offset 0 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 4096 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== Disjoint requests == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +wrote 8192/8192 bytes at offset 0 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern == +read 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 61440/61440 bytes at offset 4096 +60 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 65536 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 69632 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 2803d68d62..0920b28db4 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -101,5 +101,6 @@ 092 rw auto quick 095 rw auto quick 099 rw auto quick +100 rw auto quick 101 rw auto quick 103 rw auto quick diff --git a/tests/test-aio.c b/tests/test-aio.c index f12b6e0ae8..c6a8713e83 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -57,8 +57,6 @@ static void bh_test_cb(void *opaque) } } -#if !defined(_WIN32) - static void timer_test_cb(void *opaque) { TimerTestData *data = opaque; @@ -68,12 +66,10 @@ static void timer_test_cb(void *opaque) } } -static void dummy_io_handler_read(void *opaque) +static void dummy_io_handler_read(EventNotifier *e) { } -#endif /* !_WIN32 */ - static void bh_delete_cb(void *opaque) { BHTestData *data = opaque; @@ -428,24 +424,18 @@ static void test_wait_event_notifier_noflush(void) event_notifier_cleanup(&data.e); } -#if !defined(_WIN32) - static void test_timer_schedule(void) { TimerTestData data = { .n = 0, .ctx = ctx, .ns = SCALE_MS * 750LL, .max = 2, .clock_type = QEMU_CLOCK_VIRTUAL }; - int pipefd[2]; + EventNotifier e; /* aio_poll will not block to wait for timers to complete unless it has * an fd to wait on. Fixing this breaks other tests. So create a dummy one. */ - g_assert(!qemu_pipe(pipefd)); - qemu_set_nonblock(pipefd[0]); - qemu_set_nonblock(pipefd[1]); - - aio_set_fd_handler(ctx, pipefd[0], - dummy_io_handler_read, NULL, NULL); + event_notifier_init(&e, false); + aio_set_event_notifier(ctx, &e, dummy_io_handler_read); aio_poll(ctx, false); aio_timer_init(ctx, &data.timer, data.clock_type, @@ -484,15 +474,12 @@ static void test_timer_schedule(void) g_assert(!aio_poll(ctx, false)); g_assert_cmpint(data.n, ==, 2); - aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL); - close(pipefd[0]); - close(pipefd[1]); + aio_set_event_notifier(ctx, &e, NULL); + event_notifier_cleanup(&e); timer_del(&data.timer); } -#endif /* !_WIN32 */ - /* Now the same tests, using the context as a GSource. They are * very similar to the ones above, with g_main_context_iteration * replacing aio_poll. However: @@ -775,25 +762,19 @@ static void test_source_wait_event_notifier_noflush(void) event_notifier_cleanup(&data.e); } -#if !defined(_WIN32) - static void test_source_timer_schedule(void) { TimerTestData data = { .n = 0, .ctx = ctx, .ns = SCALE_MS * 750LL, .max = 2, .clock_type = QEMU_CLOCK_VIRTUAL }; - int pipefd[2]; + EventNotifier e; int64_t expiry; /* aio_poll will not block to wait for timers to complete unless it has * an fd to wait on. Fixing this breaks other tests. So create a dummy one. */ - g_assert(!qemu_pipe(pipefd)); - qemu_set_nonblock(pipefd[0]); - qemu_set_nonblock(pipefd[1]); - - aio_set_fd_handler(ctx, pipefd[0], - dummy_io_handler_read, NULL, NULL); + event_notifier_init(&e, false); + aio_set_event_notifier(ctx, &e, dummy_io_handler_read); do {} while (g_main_context_iteration(NULL, false)); aio_timer_init(ctx, &data.timer, data.clock_type, @@ -818,15 +799,12 @@ static void test_source_timer_schedule(void) g_assert_cmpint(data.n, ==, 2); g_assert(qemu_clock_get_ns(data.clock_type) > expiry); - aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL); - close(pipefd[0]); - close(pipefd[1]); + aio_set_event_notifier(ctx, &e, NULL); + event_notifier_cleanup(&e); timer_del(&data.timer); } -#endif /* !_WIN32 */ - /* End of tests. */ @@ -857,9 +835,7 @@ int main(int argc, char **argv) g_test_add_func("/aio/event/wait", test_wait_event_notifier); g_test_add_func("/aio/event/wait/no-flush-cb", test_wait_event_notifier_noflush); g_test_add_func("/aio/event/flush", test_flush_event_notifier); -#if !defined(_WIN32) g_test_add_func("/aio/timer/schedule", test_timer_schedule); -#endif g_test_add_func("/aio-gsource/notify", test_source_notify); g_test_add_func("/aio-gsource/flush", test_source_flush); @@ -874,8 +850,6 @@ int main(int argc, char **argv) g_test_add_func("/aio-gsource/event/wait", test_source_wait_event_notifier); g_test_add_func("/aio-gsource/event/wait/no-flush-cb", test_source_wait_event_notifier_noflush); g_test_add_func("/aio-gsource/event/flush", test_source_flush_event_notifier); -#if !defined(_WIN32) g_test_add_func("/aio-gsource/timer/schedule", test_source_timer_schedule); -#endif return g_test_run(); } |