aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--aio-posix.c58
-rw-r--r--aio-win32.c262
-rw-r--r--async.c39
-rw-r--r--block.c62
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/blkverify.c1
-rw-r--r--block/curl.c37
-rw-r--r--block/linux-aio.c71
-rw-r--r--block/nfs.c10
-rw-r--r--block/quorum.c180
-rw-r--r--block/sheepdog.c12
-rw-r--r--blockdev.c12
-rw-r--r--blockjob.c2
-rw-r--r--hw/block/dataplane/virtio-blk.c1
-rw-r--r--hw/ide/qdev.c2
-rw-r--r--include/block/aio.h25
-rw-r--r--include/block/block_int.h41
-rw-r--r--include/block/coroutine.h8
-rw-r--r--nbd.c105
-rw-r--r--qapi/block-core.json20
-rw-r--r--qemu-coroutine-io.c4
-rw-r--r--qemu-coroutine-sleep.c12
-rw-r--r--qemu-img.c23
-rw-r--r--qemu-options.hx15
-rwxr-xr-xtests/qemu-iotests/100134
-rw-r--r--tests/qemu-iotests/100.out89
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/test-aio.c48
28 files changed, 979 insertions, 297 deletions
diff --git a/aio-posix.c b/aio-posix.c
index 2eada2e049..d3ac06e238 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -100,6 +100,11 @@ void aio_set_event_notifier(AioContext *ctx,
(IOHandler *)io_read, NULL, notifier);
}
+bool aio_prepare(AioContext *ctx)
+{
+ return false;
+}
+
bool aio_pending(AioContext *ctx)
{
AioHandler *node;
@@ -119,12 +124,21 @@ bool aio_pending(AioContext *ctx)
return false;
}
-static bool aio_dispatch(AioContext *ctx)
+bool aio_dispatch(AioContext *ctx)
{
AioHandler *node;
bool progress = false;
/*
+ * If there are callbacks left that have been queued, we need to call them.
+ * Do not call select in this case, because it is possible that the caller
+ * does not need a complete flush (as is the case for aio_poll loops).
+ */
+ if (aio_bh_poll(ctx)) {
+ progress = true;
+ }
+
+ /*
* We have to walk very carefully in case aio_set_fd_handler is
* called while we're walking.
*/
@@ -184,22 +198,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
/* aio_notify can avoid the expensive event_notifier_set if
* everything (file descriptors, bottom halves, timers) will
- * be re-evaluated before the next blocking poll(). This happens
- * in two cases:
- *
- * 1) when aio_poll is called with blocking == false
- *
- * 2) when we are called after poll(). If we are called before
- * poll(), bottom halves will not be re-evaluated and we need
- * aio_notify() if blocking == true.
- *
- * The first aio_dispatch() only does something when AioContext is
- * running as a GSource, and in that case aio_poll is used only
- * with blocking == false, so this optimization is already quite
- * effective. However, the code is ugly and should be restructured
- * to have a single aio_dispatch() call. To do this, we need to
- * reorganize aio_poll into a prepare/poll/dispatch model like
- * glib's.
+ * be re-evaluated before the next blocking poll(). This is
+ * already true when aio_poll is called with blocking == false;
+ * if blocking == true, it is only true after poll() returns.
*
* If we're in a nested event loop, ctx->dispatching might be true.
* In that case we can restore it just before returning, but we
@@ -207,26 +208,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
*/
aio_set_dispatching(ctx, !blocking);
- /*
- * If there are callbacks left that have been queued, we need to call them.
- * Do not call select in this case, because it is possible that the caller
- * does not need a complete flush (as is the case for aio_poll loops).
- */
- if (aio_bh_poll(ctx)) {
- blocking = false;
- progress = true;
- }
-
- /* Re-evaluate condition (1) above. */
- aio_set_dispatching(ctx, !blocking);
- if (aio_dispatch(ctx)) {
- progress = true;
- }
-
- if (progress && !blocking) {
- goto out;
- }
-
ctx->walking_handlers++;
g_array_set_size(ctx->pollfds, 0);
@@ -249,7 +230,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
/* wait until next event */
ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
ctx->pollfds->len,
- blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0);
+ blocking ? aio_compute_timeout(ctx) : 0);
/* if we have any readable fds, dispatch event */
if (ret > 0) {
@@ -268,7 +249,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
progress = true;
}
-out:
aio_set_dispatching(ctx, was_dispatching);
return progress;
}
diff --git a/aio-win32.c b/aio-win32.c
index c12f61e97d..61e3d2ddfe 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -22,12 +22,80 @@
struct AioHandler {
EventNotifier *e;
+ IOHandler *io_read;
+ IOHandler *io_write;
EventNotifierHandler *io_notify;
GPollFD pfd;
int deleted;
+ void *opaque;
QLIST_ENTRY(AioHandler) node;
};
+void aio_set_fd_handler(AioContext *ctx,
+ int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ void *opaque)
+{
+ /* fd is a SOCKET in our case */
+ AioHandler *node;
+
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+ if (node->pfd.fd == fd && !node->deleted) {
+ break;
+ }
+ }
+
+ /* Are we deleting the fd handler? */
+ if (!io_read && !io_write) {
+ if (node) {
+ /* If the lock is held, just mark the node as deleted */
+ if (ctx->walking_handlers) {
+ node->deleted = 1;
+ node->pfd.revents = 0;
+ } else {
+ /* Otherwise, delete it for real. We can't just mark it as
+ * deleted because deleted nodes are only cleaned up after
+ * releasing the walking_handlers lock.
+ */
+ QLIST_REMOVE(node, node);
+ g_free(node);
+ }
+ }
+ } else {
+ HANDLE event;
+
+ if (node == NULL) {
+ /* Alloc and insert if it's not already there */
+ node = g_malloc0(sizeof(AioHandler));
+ node->pfd.fd = fd;
+ QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+ }
+
+ node->pfd.events = 0;
+ if (node->io_read) {
+ node->pfd.events |= G_IO_IN;
+ }
+ if (node->io_write) {
+ node->pfd.events |= G_IO_OUT;
+ }
+
+ node->e = &ctx->notifier;
+
+ /* Update handler with latest information */
+ node->opaque = opaque;
+ node->io_read = io_read;
+ node->io_write = io_write;
+
+ event = event_notifier_get_handle(&ctx->notifier);
+ WSAEventSelect(node->pfd.fd, event,
+ FD_READ | FD_ACCEPT | FD_CLOSE |
+ FD_CONNECT | FD_WRITE | FD_OOB);
+ }
+
+ aio_notify(ctx);
+}
+
void aio_set_event_notifier(AioContext *ctx,
EventNotifier *e,
EventNotifierHandler *io_notify)
@@ -76,55 +144,82 @@ void aio_set_event_notifier(AioContext *ctx,
aio_notify(ctx);
}
-bool aio_pending(AioContext *ctx)
+bool aio_prepare(AioContext *ctx)
{
+ static struct timeval tv0;
AioHandler *node;
+ bool have_select_revents = false;
+ fd_set rfds, wfds;
+ /* fill fd sets */
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
- if (node->pfd.revents && node->io_notify) {
- return true;
+ if (node->io_read) {
+ FD_SET ((SOCKET)node->pfd.fd, &rfds);
+ }
+ if (node->io_write) {
+ FD_SET ((SOCKET)node->pfd.fd, &wfds);
}
}
- return false;
+ if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+ node->pfd.revents = 0;
+ if (FD_ISSET(node->pfd.fd, &rfds)) {
+ node->pfd.revents |= G_IO_IN;
+ have_select_revents = true;
+ }
+
+ if (FD_ISSET(node->pfd.fd, &wfds)) {
+ node->pfd.revents |= G_IO_OUT;
+ have_select_revents = true;
+ }
+ }
+ }
+
+ return have_select_revents;
}
-bool aio_poll(AioContext *ctx, bool blocking)
+bool aio_pending(AioContext *ctx)
{
AioHandler *node;
- HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
- bool progress;
- int count;
- int timeout;
- progress = false;
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+ if (node->pfd.revents && node->io_notify) {
+ return true;
+ }
- /*
- * If there are callbacks left that have been queued, we need to call then.
- * Do not call select in this case, because it is possible that the caller
- * does not need a complete flush (as is the case for aio_poll loops).
- */
- if (aio_bh_poll(ctx)) {
- blocking = false;
- progress = true;
+ if ((node->pfd.revents & G_IO_IN) && node->io_read) {
+ return true;
+ }
+ if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
+ return true;
+ }
}
- /* Run timers */
- progress |= timerlistgroup_run_timers(&ctx->tlg);
+ return false;
+}
+
+static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
+{
+ AioHandler *node;
+ bool progress = false;
/*
- * Then dispatch any pending callbacks from the GSource.
- *
* We have to walk very carefully in case aio_set_fd_handler is
* called while we're walking.
*/
node = QLIST_FIRST(&ctx->aio_handlers);
while (node) {
AioHandler *tmp;
+ int revents = node->pfd.revents;
ctx->walking_handlers++;
- if (node->pfd.revents && node->io_notify) {
+ if (!node->deleted &&
+ (revents || event_notifier_get_handle(node->e) == event) &&
+ node->io_notify) {
node->pfd.revents = 0;
node->io_notify(node->e);
@@ -134,6 +229,28 @@ bool aio_poll(AioContext *ctx, bool blocking)
}
}
+ if (!node->deleted &&
+ (node->io_read || node->io_write)) {
+ node->pfd.revents = 0;
+ if ((revents & G_IO_IN) && node->io_read) {
+ node->io_read(node->opaque);
+ progress = true;
+ }
+ if ((revents & G_IO_OUT) && node->io_write) {
+ node->io_write(node->opaque);
+ progress = true;
+ }
+
+ /* if the next select() will return an event, we have progressed */
+ if (event == event_notifier_get_handle(&ctx->notifier)) {
+ WSANETWORKEVENTS ev;
+ WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
+ if (ev.lNetworkEvents) {
+ progress = true;
+ }
+ }
+ }
+
tmp = node;
node = QLIST_NEXT(node, node);
@@ -145,10 +262,47 @@ bool aio_poll(AioContext *ctx, bool blocking)
}
}
- if (progress && !blocking) {
- return true;
+ return progress;
+}
+
+bool aio_dispatch(AioContext *ctx)
+{
+ bool progress;
+
+ progress = aio_bh_poll(ctx);
+ progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
+ progress |= timerlistgroup_run_timers(&ctx->tlg);
+ return progress;
+}
+
+bool aio_poll(AioContext *ctx, bool blocking)
+{
+ AioHandler *node;
+ HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
+ bool was_dispatching, progress, have_select_revents, first;
+ int count;
+ int timeout;
+
+ if (aio_prepare(ctx)) {
+ blocking = false;
+ have_select_revents = true;
}
+ was_dispatching = ctx->dispatching;
+ progress = false;
+
+ /* aio_notify can avoid the expensive event_notifier_set if
+ * everything (file descriptors, bottom halves, timers) will
+ * be re-evaluated before the next blocking poll(). This is
+ * already true when aio_poll is called with blocking == false;
+ * if blocking == true, it is only true after poll() returns.
+ *
+ * If we're in a nested event loop, ctx->dispatching might be true.
+ * In that case we can restore it just before returning, but we
+ * have to clear it now.
+ */
+ aio_set_dispatching(ctx, !blocking);
+
ctx->walking_handlers++;
/* fill fd sets */
@@ -160,64 +314,42 @@ bool aio_poll(AioContext *ctx, bool blocking)
}
ctx->walking_handlers--;
+ first = true;
/* wait until next event */
while (count > 0) {
+ HANDLE event;
int ret;
- timeout = blocking ?
- qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg)) : 0;
+ timeout = blocking
+ ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+ aio_set_dispatching(ctx, true);
+
+ if (first && aio_bh_poll(ctx)) {
+ progress = true;
+ }
+ first = false;
/* if we have any signaled events, dispatch event */
- if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
+ event = NULL;
+ if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
+ event = events[ret - WAIT_OBJECT_0];
+ } else if (!have_select_revents) {
break;
}
+ have_select_revents = false;
blocking = false;
- /* we have to walk very carefully in case
- * aio_set_fd_handler is called while we're walking */
- node = QLIST_FIRST(&ctx->aio_handlers);
- while (node) {
- AioHandler *tmp;
-
- ctx->walking_handlers++;
-
- if (!node->deleted &&
- event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] &&
- node->io_notify) {
- node->io_notify(node->e);
-
- /* aio_notify() does not count as progress */
- if (node->e != &ctx->notifier) {
- progress = true;
- }
- }
-
- tmp = node;
- node = QLIST_NEXT(node, node);
-
- ctx->walking_handlers--;
-
- if (!ctx->walking_handlers && tmp->deleted) {
- QLIST_REMOVE(tmp, node);
- g_free(tmp);
- }
- }
+ progress |= aio_dispatch_handlers(ctx, event);
/* Try again, but only call each handler once. */
events[ret - WAIT_OBJECT_0] = events[--count];
}
- if (blocking) {
- /* Run the timers a second time. We do this because otherwise aio_wait
- * will not note progress - and will stop a drain early - if we have
- * a timer that was not ready to run entering g_poll but is ready
- * after g_poll. This will only do anything if a timer has expired.
- */
- progress |= timerlistgroup_run_timers(&ctx->tlg);
- }
+ progress |= timerlistgroup_run_timers(&ctx->tlg);
+ aio_set_dispatching(ctx, was_dispatching);
return progress;
}
diff --git a/async.c b/async.c
index 34af0b25ca..a99e7f639a 100644
--- a/async.c
+++ b/async.c
@@ -152,39 +152,48 @@ void qemu_bh_delete(QEMUBH *bh)
bh->deleted = 1;
}
-static gboolean
-aio_ctx_prepare(GSource *source, gint *timeout)
+int64_t
+aio_compute_timeout(AioContext *ctx)
{
- AioContext *ctx = (AioContext *) source;
+ int64_t deadline;
+ int timeout = -1;
QEMUBH *bh;
- int deadline;
- /* We assume there is no timeout already supplied */
- *timeout = -1;
for (bh = ctx->first_bh; bh; bh = bh->next) {
if (!bh->deleted && bh->scheduled) {
if (bh->idle) {
/* idle bottom halves will be polled at least
* every 10ms */
- *timeout = 10;
+ timeout = 10000000;
} else {
/* non-idle bottom halves will be executed
* immediately */
- *timeout = 0;
- return true;
+ return 0;
}
}
}
- deadline = qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg));
+ deadline = timerlistgroup_deadline_ns(&ctx->tlg);
if (deadline == 0) {
- *timeout = 0;
- return true;
+ return 0;
} else {
- *timeout = qemu_soonest_timeout(*timeout, deadline);
+ return qemu_soonest_timeout(timeout, deadline);
+ }
+}
+
+static gboolean
+aio_ctx_prepare(GSource *source, gint *timeout)
+{
+ AioContext *ctx = (AioContext *) source;
+
+ /* We assume there is no timeout already supplied */
+ *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
+
+ if (aio_prepare(ctx)) {
+ *timeout = 0;
}
- return false;
+ return *timeout == 0;
}
static gboolean
@@ -209,7 +218,7 @@ aio_ctx_dispatch(GSource *source,
AioContext *ctx = (AioContext *) source;
assert(callback == NULL);
- aio_poll(ctx, false);
+ aio_dispatch(ctx);
return true;
}
diff --git a/block.c b/block.c
index 1df13ac1c7..cb670fd54d 100644
--- a/block.c
+++ b/block.c
@@ -1819,6 +1819,8 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)
void bdrv_close(BlockDriverState *bs)
{
+ BdrvAioNotifier *ban, *ban_next;
+
if (bs->job) {
block_job_cancel_sync(bs->job);
}
@@ -1863,6 +1865,11 @@ void bdrv_close(BlockDriverState *bs)
if (bs->io_limits_enabled) {
bdrv_io_limits_disable(bs);
}
+
+ QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
+ g_free(ban);
+ }
+ QLIST_INIT(&bs->aio_notifiers);
}
void bdrv_close_all(void)
@@ -4546,6 +4553,12 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
// Add the second request
qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
+ // Add tail of first request, if necessary
+ if (qiov->size < reqs[outidx].qiov->size) {
+ qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
+ reqs[outidx].qiov->size - qiov->size);
+ }
+
reqs[outidx].nb_sectors = qiov->size >> 9;
reqs[outidx].qiov = qiov;
@@ -5729,10 +5742,16 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
void bdrv_detach_aio_context(BlockDriverState *bs)
{
+ BdrvAioNotifier *baf;
+
if (!bs->drv) {
return;
}
+ QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
+ baf->detach_aio_context(baf->opaque);
+ }
+
if (bs->io_limits_enabled) {
throttle_detach_aio_context(&bs->throttle_state);
}
@@ -5752,6 +5771,8 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
void bdrv_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
+ BdrvAioNotifier *ban;
+
if (!bs->drv) {
return;
}
@@ -5770,6 +5791,10 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
if (bs->io_limits_enabled) {
throttle_attach_aio_context(&bs->throttle_state, new_context);
}
+
+ QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
+ ban->attached_aio_context(new_context, ban->opaque);
+ }
}
void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
@@ -5786,6 +5811,43 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
aio_context_release(new_context);
}
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+ void (*attached_aio_context)(AioContext *new_context, void *opaque),
+ void (*detach_aio_context)(void *opaque), void *opaque)
+{
+ BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
+ *ban = (BdrvAioNotifier){
+ .attached_aio_context = attached_aio_context,
+ .detach_aio_context = detach_aio_context,
+ .opaque = opaque
+ };
+
+ QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
+}
+
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+ void (*attached_aio_context)(AioContext *,
+ void *),
+ void (*detach_aio_context)(void *),
+ void *opaque)
+{
+ BdrvAioNotifier *ban, *ban_next;
+
+ QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
+ if (ban->attached_aio_context == attached_aio_context &&
+ ban->detach_aio_context == detach_aio_context &&
+ ban->opaque == opaque)
+ {
+ QLIST_REMOVE(ban, list);
+ g_free(ban);
+
+ return;
+ }
+ }
+
+ abort();
+}
+
void bdrv_add_before_write_notifier(BlockDriverState *bs,
NotifierWithReturn *notifier)
{
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 858d2b387b..f45f9399aa 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -10,7 +10,6 @@ block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-ifeq ($(CONFIG_POSIX),y)
block-obj-y += nbd.o nbd-client.o sheepdog.o
block-obj-$(CONFIG_LIBISCSI) += iscsi.o
block-obj-$(CONFIG_LIBNFS) += nfs.o
@@ -19,7 +18,6 @@ block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
-endif
common-obj-y += stream.o
common-obj-y += commit.o
diff --git a/block/blkverify.c b/block/blkverify.c
index 7c78ca41a5..163064cf6b 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -158,6 +158,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
ret = 0;
fail:
+ qemu_opts_del(opts);
return ret;
}
diff --git a/block/curl.c b/block/curl.c
index d4b85d20a5..025833994c 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -63,6 +63,7 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
#define CURL_NUM_ACB 8
#define SECTOR_SIZE 512
#define READ_AHEAD_DEFAULT (256 * 1024)
+#define CURL_TIMEOUT_DEFAULT 5
#define FIND_RET_NONE 0
#define FIND_RET_OK 1
@@ -71,6 +72,8 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
#define CURL_BLOCK_OPT_URL "url"
#define CURL_BLOCK_OPT_READAHEAD "readahead"
#define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
+#define CURL_BLOCK_OPT_TIMEOUT "timeout"
+#define CURL_BLOCK_OPT_COOKIE "cookie"
struct BDRVCURLState;
@@ -109,6 +112,8 @@ typedef struct BDRVCURLState {
char *url;
size_t readahead_size;
bool sslverify;
+ int timeout;
+ char *cookie;
bool accept_range;
AioContext *aio_context;
} BDRVCURLState;
@@ -352,7 +357,7 @@ static void curl_multi_timeout_do(void *arg)
#endif
}
-static CURLState *curl_init_state(BDRVCURLState *s)
+static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
{
CURLState *state = NULL;
int i, j;
@@ -370,7 +375,7 @@ static CURLState *curl_init_state(BDRVCURLState *s)
break;
}
if (!state) {
- aio_poll(state->s->aio_context, true);
+ aio_poll(bdrv_get_aio_context(bs), true);
}
} while(!state);
@@ -382,7 +387,10 @@ static CURLState *curl_init_state(BDRVCURLState *s)
curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
(long) s->sslverify);
- curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, 5);
+ if (s->cookie) {
+ curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie);
+ }
+ curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, s->timeout);
curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
(void *)curl_read_cb);
curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
@@ -489,6 +497,16 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_BOOL,
.help = "Verify SSL certificate"
},
+ {
+ .name = CURL_BLOCK_OPT_TIMEOUT,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Curl timeout"
+ },
+ {
+ .name = CURL_BLOCK_OPT_COOKIE,
+ .type = QEMU_OPT_STRING,
+ .help = "Pass the cookie or list of cookies with each request"
+ },
{ /* end of list */ }
},
};
@@ -501,6 +519,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
QemuOpts *opts;
Error *local_err = NULL;
const char *file;
+ const char *cookie;
double d;
static int inited = 0;
@@ -525,8 +544,14 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
goto out_noclean;
}
+ s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT,
+ CURL_TIMEOUT_DEFAULT);
+
s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);
+ cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE);
+ s->cookie = g_strdup(cookie);
+
file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
if (file == NULL) {
error_setg(errp, "curl block driver requires an 'url' option");
@@ -541,7 +566,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
DPRINTF("CURL: Opening %s\n", file);
s->aio_context = bdrv_get_aio_context(bs);
s->url = g_strdup(file);
- state = curl_init_state(s);
+ state = curl_init_state(bs, s);
if (!state)
goto out_noclean;
@@ -582,6 +607,7 @@ out:
curl_easy_cleanup(state->curl);
state->curl = NULL;
out_noclean:
+ g_free(s->cookie);
g_free(s->url);
qemu_opts_del(opts);
return -EINVAL;
@@ -625,7 +651,7 @@ static void curl_readv_bh_cb(void *p)
}
// No cache found, so let's start a new request
- state = curl_init_state(s);
+ state = curl_init_state(acb->common.bs, s);
if (!state) {
acb->common.cb(acb->common.opaque, -EIO);
qemu_aio_release(acb);
@@ -684,6 +710,7 @@ static void curl_close(BlockDriverState *bs)
DPRINTF("CURL: Close\n");
curl_detach_aio_context(bs);
+ g_free(s->cookie);
g_free(s->url);
}
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 7ac7e8c99c..9aca758b10 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -51,6 +51,12 @@ struct qemu_laio_state {
/* io queue for submit at batch */
LaioQueue io_q;
+
+ /* I/O completion processing */
+ QEMUBH *completion_bh;
+ struct io_event events[MAX_EVENTS];
+ int event_idx;
+ int event_max;
};
static inline ssize_t io_event_ret(struct io_event *ev)
@@ -86,27 +92,58 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
qemu_aio_release(laiocb);
}
-static void qemu_laio_completion_cb(EventNotifier *e)
+/* The completion BH fetches completed I/O requests and invokes their
+ * callbacks.
+ *
+ * The function is somewhat tricky because it supports nested event loops, for
+ * example when a request callback invokes aio_poll(). In order to do this,
+ * the completion events array and index are kept in qemu_laio_state. The BH
+ * reschedules itself as long as there are completions pending so it will
+ * either be called again in a nested event loop or will be called after all
+ * events have been completed. When there are no events left to complete, the
+ * BH returns without rescheduling.
+ */
+static void qemu_laio_completion_bh(void *opaque)
{
- struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
-
- while (event_notifier_test_and_clear(&s->e)) {
- struct io_event events[MAX_EVENTS];
- struct timespec ts = { 0 };
- int nevents, i;
+ struct qemu_laio_state *s = opaque;
+ /* Fetch more completion events when empty */
+ if (s->event_idx == s->event_max) {
do {
- nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
- } while (nevents == -EINTR);
+ struct timespec ts = { 0 };
+ s->event_max = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS,
+ s->events, &ts);
+ } while (s->event_max == -EINTR);
+
+ s->event_idx = 0;
+ if (s->event_max <= 0) {
+ s->event_max = 0;
+ return; /* no more events */
+ }
+ }
- for (i = 0; i < nevents; i++) {
- struct iocb *iocb = events[i].obj;
- struct qemu_laiocb *laiocb =
- container_of(iocb, struct qemu_laiocb, iocb);
+ /* Reschedule so nested event loops see currently pending completions */
+ qemu_bh_schedule(s->completion_bh);
- laiocb->ret = io_event_ret(&events[i]);
- qemu_laio_process_completion(s, laiocb);
- }
+ /* Process completion events */
+ while (s->event_idx < s->event_max) {
+ struct iocb *iocb = s->events[s->event_idx].obj;
+ struct qemu_laiocb *laiocb =
+ container_of(iocb, struct qemu_laiocb, iocb);
+
+ laiocb->ret = io_event_ret(&s->events[s->event_idx]);
+ s->event_idx++;
+
+ qemu_laio_process_completion(s, laiocb);
+ }
+}
+
+static void qemu_laio_completion_cb(EventNotifier *e)
+{
+ struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+
+ if (event_notifier_test_and_clear(&s->e)) {
+ qemu_bh_schedule(s->completion_bh);
}
}
@@ -272,12 +309,14 @@ void laio_detach_aio_context(void *s_, AioContext *old_context)
struct qemu_laio_state *s = s_;
aio_set_event_notifier(old_context, &s->e, NULL);
+ qemu_bh_delete(s->completion_bh);
}
void laio_attach_aio_context(void *s_, AioContext *new_context)
{
struct qemu_laio_state *s = s_;
+ s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
}
diff --git a/block/nfs.c b/block/nfs.c
index 93d87f3256..194f301501 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -393,16 +393,20 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
(flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
errp);
if (ret < 0) {
- return ret;
+ goto out;
}
bs->total_sectors = ret;
- return 0;
+ ret = 0;
+out:
+ qemu_opts_del(opts);
+ return ret;
}
static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
diff --git a/block/quorum.c b/block/quorum.c
index 0de07bb036..093382e8f5 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -29,6 +29,7 @@
#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
#define QUORUM_OPT_BLKVERIFY "blkverify"
#define QUORUM_OPT_REWRITE "rewrite-corrupted"
+#define QUORUM_OPT_READ_PATTERN "read-pattern"
/* This union holds a vote hash value */
typedef union QuorumVoteValue {
@@ -79,6 +80,8 @@ typedef struct BDRVQuorumState {
bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
* block if Quorum is reached.
*/
+
+ QuorumReadPattern read_pattern;
} BDRVQuorumState;
typedef struct QuorumAIOCB QuorumAIOCB;
@@ -122,6 +125,7 @@ struct QuorumAIOCB {
bool is_read;
int vote_ret;
+ int child_iter; /* which child to read in fifo pattern */
};
static bool quorum_vote(QuorumAIOCB *acb);
@@ -148,7 +152,6 @@ static AIOCBInfo quorum_aiocb_info = {
static void quorum_aio_finalize(QuorumAIOCB *acb)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
int i, ret = 0;
if (acb->vote_ret) {
@@ -158,7 +161,8 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
acb->common.cb(acb->common.opaque, ret);
if (acb->is_read) {
- for (i = 0; i < s->num_children; i++) {
+ /* on the quorum case acb->child_iter == s->num_children - 1 */
+ for (i = 0; i <= acb->child_iter; i++) {
qemu_vfree(acb->qcrs[i].buf);
qemu_iovec_destroy(&acb->qcrs[i].qiov);
}
@@ -261,6 +265,21 @@ static void quorum_rewrite_aio_cb(void *opaque, int ret)
quorum_aio_finalize(acb);
}
+static BlockDriverAIOCB *read_fifo_child(QuorumAIOCB *acb);
+
+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
+{
+ int i;
+ assert(dest->niov == source->niov);
+ assert(dest->size == source->size);
+ for (i = 0; i < source->niov; i++) {
+ assert(dest->iov[i].iov_len == source->iov[i].iov_len);
+ memcpy(dest->iov[i].iov_base,
+ source->iov[i].iov_base,
+ source->iov[i].iov_len);
+ }
+}
+
static void quorum_aio_cb(void *opaque, int ret)
{
QuorumChildRequest *sacb = opaque;
@@ -268,6 +287,21 @@ static void quorum_aio_cb(void *opaque, int ret)
BDRVQuorumState *s = acb->common.bs->opaque;
bool rewrite = false;
+ if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
+ /* We try to read next child in FIFO order if we fail to read */
+ if (ret < 0 && ++acb->child_iter < s->num_children) {
+ read_fifo_child(acb);
+ return;
+ }
+
+ if (ret == 0) {
+ quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->child_iter].qiov);
+ }
+ acb->vote_ret = ret;
+ quorum_aio_finalize(acb);
+ return;
+ }
+
sacb->ret = ret;
acb->count++;
if (ret == 0) {
@@ -348,19 +382,6 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
return count;
}
-static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
-{
- int i;
- assert(dest->niov == source->niov);
- assert(dest->size == source->size);
- for (i = 0; i < source->niov; i++) {
- assert(dest->iov[i].iov_len == source->iov[i].iov_len);
- memcpy(dest->iov[i].iov_base,
- source->iov[i].iov_base,
- source->iov[i].iov_len);
- }
-}
-
static void quorum_count_vote(QuorumVotes *votes,
QuorumVoteValue *value,
int index)
@@ -620,34 +641,62 @@ free_exit:
return rewrite;
}
-static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
+static BlockDriverAIOCB *read_quorum_children(QuorumAIOCB *acb)
{
- BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
- nb_sectors, cb, opaque);
+ BDRVQuorumState *s = acb->common.bs->opaque;
int i;
- acb->is_read = true;
-
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].buf = qemu_blockalign(s->bs[i], qiov->size);
- qemu_iovec_init(&acb->qcrs[i].qiov, qiov->niov);
- qemu_iovec_clone(&acb->qcrs[i].qiov, qiov, acb->qcrs[i].buf);
+ acb->qcrs[i].buf = qemu_blockalign(s->bs[i], acb->qiov->size);
+ qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
+ qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf);
}
for (i = 0; i < s->num_children; i++) {
- bdrv_aio_readv(s->bs[i], sector_num, &acb->qcrs[i].qiov, nb_sectors,
- quorum_aio_cb, &acb->qcrs[i]);
+ bdrv_aio_readv(s->bs[i], acb->sector_num, &acb->qcrs[i].qiov,
+ acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
}
return &acb->common;
}
+static BlockDriverAIOCB *read_fifo_child(QuorumAIOCB *acb)
+{
+ BDRVQuorumState *s = acb->common.bs->opaque;
+
+ acb->qcrs[acb->child_iter].buf = qemu_blockalign(s->bs[acb->child_iter],
+ acb->qiov->size);
+ qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
+ qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
+ acb->qcrs[acb->child_iter].buf);
+ bdrv_aio_readv(s->bs[acb->child_iter], acb->sector_num,
+ &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BDRVQuorumState *s = bs->opaque;
+ QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
+ nb_sectors, cb, opaque);
+ acb->is_read = true;
+
+ if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
+ acb->child_iter = s->num_children - 1;
+ return read_quorum_children(acb);
+ }
+
+ acb->child_iter = 0;
+ return read_fifo_child(acb);
+}
+
static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -787,16 +836,39 @@ static QemuOptsList quorum_runtime_opts = {
.type = QEMU_OPT_BOOL,
.help = "Rewrite corrupted block on read quorum",
},
+ {
+ .name = QUORUM_OPT_READ_PATTERN,
+ .type = QEMU_OPT_STRING,
+ .help = "Allowed pattern: quorum, fifo. Quorum is default",
+ },
{ /* end of list */ }
},
};
+static int parse_read_pattern(const char *opt)
+{
+ int i;
+
+ if (!opt) {
+ /* Set quorum as default */
+ return QUORUM_READ_PATTERN_QUORUM;
+ }
+
+ for (i = 0; i < QUORUM_READ_PATTERN_MAX; i++) {
+ if (!strcmp(opt, QuorumReadPattern_lookup[i])) {
+ return i;
+ }
+ }
+
+ return -EINVAL;
+}
+
static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVQuorumState *s = bs->opaque;
Error *local_err = NULL;
- QemuOpts *opts;
+ QemuOpts *opts = NULL;
bool *opened;
QDict *sub = NULL;
QList *list = NULL;
@@ -832,28 +904,37 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
}
s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
-
- /* and validate it against s->num_children */
- ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
+ ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN));
if (ret < 0) {
+ error_setg(&local_err, "Please set read-pattern as fifo or quorum");
goto exit;
}
+ s->read_pattern = ret;
- /* is the driver in blkverify mode */
- if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
- s->num_children == 2 && s->threshold == 2) {
- s->is_blkverify = true;
- } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
- fprintf(stderr, "blkverify mode is set by setting blkverify=on "
- "and using two files with vote_threshold=2\n");
- }
+ if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
+ /* and validate it against s->num_children */
+ ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
+ if (ret < 0) {
+ goto exit;
+ }
- s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
- if (s->rewrite_corrupted && s->is_blkverify) {
- error_setg(&local_err,
- "rewrite-corrupted=on cannot be used with blkverify=on");
- ret = -EINVAL;
- goto exit;
+ /* is the driver in blkverify mode */
+ if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
+ s->num_children == 2 && s->threshold == 2) {
+ s->is_blkverify = true;
+ } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
+ fprintf(stderr, "blkverify mode is set by setting blkverify=on "
+ "and using two files with vote_threshold=2\n");
+ }
+
+ s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE,
+ false);
+ if (s->rewrite_corrupted && s->is_blkverify) {
+ error_setg(&local_err,
+ "rewrite-corrupted=on cannot be used with blkverify=on");
+ ret = -EINVAL;
+ goto exit;
+ }
}
/* allocate the children BlockDriverState array */
@@ -908,6 +989,7 @@ close_exit:
g_free(s->bs);
g_free(opened);
exit:
+ qemu_opts_del(opts);
/* propagate error */
if (local_err) {
error_propagate(errp, local_err);
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 12cbd9dcb4..f91afc3a5b 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -103,6 +103,9 @@
#define SD_INODE_SIZE (sizeof(SheepdogInode))
#define CURRENT_VDI_ID 0
+#define LOCK_TYPE_NORMAL 0
+#define LOCK_TYPE_SHARED 1 /* for iSCSI multipath */
+
typedef struct SheepdogReq {
uint8_t proto_ver;
uint8_t opcode;
@@ -166,7 +169,8 @@ typedef struct SheepdogVdiReq {
uint8_t copy_policy;
uint8_t reserved[2];
uint32_t snapid;
- uint32_t pad[3];
+ uint32_t type;
+ uint32_t pad[2];
} SheepdogVdiReq;
typedef struct SheepdogVdiRsp {
@@ -712,7 +716,6 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
static coroutine_fn void reconnect_to_sdog(void *opaque)
{
- Error *local_err = NULL;
BDRVSheepdogState *s = opaque;
AIOReq *aio_req, *next;
@@ -727,6 +730,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
/* Try to reconnect the sheepdog server every one second. */
while (s->fd < 0) {
+ Error *local_err = NULL;
s->fd = get_sheep_fd(s, &local_err);
if (s->fd < 0) {
DPRINTF("Wait for connection to be established\n");
@@ -1090,6 +1094,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
memset(&hdr, 0, sizeof(hdr));
if (lock) {
hdr.opcode = SD_OP_LOCK_VDI;
+ hdr.type = LOCK_TYPE_NORMAL;
} else {
hdr.opcode = SD_OP_GET_VDI_INFO;
}
@@ -1110,6 +1115,8 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
sd_strerror(rsp->result), filename, snapid, tag);
if (rsp->result == SD_RES_NO_VDI) {
ret = -ENOENT;
+ } else if (rsp->result == SD_RES_VDI_LOCKED) {
+ ret = -EBUSY;
} else {
ret = -EIO;
}
@@ -1793,6 +1800,7 @@ static void sd_close(BlockDriverState *bs)
memset(&hdr, 0, sizeof(hdr));
hdr.opcode = SD_OP_RELEASE_VDI;
+ hdr.type = LOCK_TYPE_NORMAL;
hdr.base_vdi_id = s->inode.vdi_id;
wlen = strlen(s->name) + 1;
hdr.data_length = wlen;
diff --git a/blockdev.c b/blockdev.c
index 6a204c662d..e37b068e9e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1757,6 +1757,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
{
const char *id = qdict_get_str(qdict, "id");
BlockDriverState *bs;
+ AioContext *aio_context;
Error *local_err = NULL;
bs = bdrv_find(id);
@@ -1764,9 +1765,14 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
error_report("Device '%s' not found", id);
return -1;
}
+
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
error_report("%s", error_get_pretty(local_err));
error_free(local_err);
+ aio_context_release(aio_context);
return -1;
}
@@ -1790,6 +1796,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
drive_del(drive_get_by_blockdev(bs));
}
+ aio_context_release(aio_context);
return 0;
}
@@ -2179,11 +2186,12 @@ void qmp_drive_mirror(const char *device, const char *target,
}
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
- error_set(errp, QERR_INVALID_PARAMETER, device);
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
+ "a value in range [512B, 64MB]");
return;
}
if (granularity & (granularity - 1)) {
- error_set(errp, QERR_INVALID_PARAMETER, device);
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", "power of 2");
return;
}
diff --git a/blockjob.c b/blockjob.c
index ca0b4e25d0..0689fdd2b5 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -205,7 +205,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
if (block_job_is_paused(job)) {
qemu_coroutine_yield();
} else {
- co_sleep_ns(type, ns);
+ co_aio_sleep_ns(bdrv_get_aio_context(job->bs), type, ns);
}
job->busy = true;
}
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index c07adc6e4f..b55188cb82 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -194,6 +194,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
error_setg(&s->blocker, "block device is in use by data plane");
bdrv_op_block_all(blk->conf.bs, s->blocker);
bdrv_op_unblock(blk->conf.bs, BLOCK_OP_TYPE_RESIZE, s->blocker);
+ bdrv_op_unblock(blk->conf.bs, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker);
*dataplane = s;
}
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index b4a467116e..efab95b320 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -59,7 +59,7 @@ static char *idebus_get_fw_dev_path(DeviceState *dev)
{
char path[30];
- snprintf(path, sizeof(path), "%s@%d", qdev_fw_name(dev),
+ snprintf(path, sizeof(path), "%s@%x", qdev_fw_name(dev),
((IDEBus*)dev->parent_bus)->bus_id);
return g_strdup(path);
diff --git a/include/block/aio.h b/include/block/aio.h
index c23de3cd1f..4603c0f066 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -205,12 +205,25 @@ void qemu_bh_cancel(QEMUBH *bh);
void qemu_bh_delete(QEMUBH *bh);
/* Return whether there are any pending callbacks from the GSource
- * attached to the AioContext.
+ * attached to the AioContext, before g_poll is invoked.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_prepare(AioContext *ctx);
+
+/* Return whether there are any pending callbacks from the GSource
+ * attached to the AioContext, after g_poll is invoked.
*
* This is used internally in the implementation of the GSource.
*/
bool aio_pending(AioContext *ctx);
+/* Dispatch any pending callbacks from the GSource attached to the AioContext.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_dispatch(AioContext *ctx);
+
/* Progress in completing AIO work to occur. This can issue new pending
* aio as a result of executing I/O completion or bh callbacks.
*
@@ -226,7 +239,6 @@ bool aio_pending(AioContext *ctx);
*/
bool aio_poll(AioContext *ctx, bool blocking);
-#ifdef CONFIG_POSIX
/* Register a file descriptor and associated callbacks. Behaves very similarly
* to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
* be invoked when using aio_poll().
@@ -239,7 +251,6 @@ void aio_set_fd_handler(AioContext *ctx,
IOHandler *io_read,
IOHandler *io_write,
void *opaque);
-#endif
/* Register an event notifier and associated callbacks. Behaves very similarly
* to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
@@ -303,4 +314,12 @@ static inline void aio_timer_init(AioContext *ctx,
timer_init(ts, ctx->tlg.tl[type], scale, cb, opaque);
}
+/**
+ * aio_compute_timeout:
+ * @ctx: the aio context
+ *
+ * Compute the timeout that a blocking aio_poll should use.
+ */
+int64_t aio_compute_timeout(AioContext *ctx);
+
#endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 233489547e..8a61215ac0 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -294,6 +294,15 @@ typedef struct BlockLimits {
typedef struct BdrvOpBlocker BdrvOpBlocker;
+typedef struct BdrvAioNotifier {
+ void (*attached_aio_context)(AioContext *new_context, void *opaque);
+ void (*detach_aio_context)(void *opaque);
+
+ void *opaque;
+
+ QLIST_ENTRY(BdrvAioNotifier) list;
+} BdrvAioNotifier;
+
/*
* Note: the function bdrv_append() copies and swaps contents of
* BlockDriverStates, so if you add new fields to this struct, please
@@ -320,6 +329,10 @@ struct BlockDriverState {
void *dev_opaque;
AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+ /* long-running tasks intended to always use the same AioContext as this
+ * BDS may register themselves in this list to be notified of changes
+ * regarding this BDS's context */
+ QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
char filename[1024];
char backing_file[1024]; /* if non zero, the image is a diff of
@@ -437,6 +450,34 @@ void bdrv_detach_aio_context(BlockDriverState *bs);
void bdrv_attach_aio_context(BlockDriverState *bs,
AioContext *new_context);
+/**
+ * bdrv_add_aio_context_notifier:
+ *
+ * If a long-running job intends to be always run in the same AioContext as a
+ * certain BDS, it may use this function to be notified of changes regarding the
+ * association of the BDS to an AioContext.
+ *
+ * attached_aio_context() is called after the target BDS has been attached to a
+ * new AioContext; detach_aio_context() is called before the target BDS is being
+ * detached from its old AioContext.
+ */
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+ void (*attached_aio_context)(AioContext *new_context, void *opaque),
+ void (*detach_aio_context)(void *opaque), void *opaque);
+
+/**
+ * bdrv_remove_aio_context_notifier:
+ *
+ * Unsubscribe of change notifications regarding the BDS's AioContext. The
+ * parameters given here have to be the same as those given to
+ * bdrv_add_aio_context_notifier().
+ */
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+ void (*aio_context_attached)(AioContext *,
+ void *),
+ void (*aio_context_detached)(void *),
+ void *opaque);
+
#ifdef _WIN32
int is_windows_drive(const char *filename);
#endif
diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index b9b7f488c9..793df0ef8b 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -203,14 +203,6 @@ void qemu_co_rwlock_unlock(CoRwlock *lock);
/**
* Yield the coroutine for a given duration
*
- * Note this function uses timers and hence only works when a main loop is in
- * use. See main-loop.h and do not use from qemu-tool programs.
- */
-void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns);
-
-/**
- * Yield the coroutine for a given duration
- *
* Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
* resumed when using aio_poll().
*/
diff --git a/nbd.c b/nbd.c
index e7d1ceec43..e9b539be58 100644
--- a/nbd.c
+++ b/nbd.c
@@ -18,6 +18,7 @@
#include "block/nbd.h"
#include "block/block.h"
+#include "block/block_int.h"
#include "block/coroutine.h"
@@ -107,6 +108,8 @@ struct NBDExport {
uint32_t nbdflags;
QTAILQ_HEAD(, NBDClient) clients;
QTAILQ_ENTRY(NBDExport) next;
+
+ AioContext *ctx;
};
static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
@@ -123,6 +126,8 @@ struct NBDClient {
CoMutex send_lock;
Coroutine *send_coroutine;
+ bool can_read;
+
QTAILQ_ENTRY(NBDClient) next;
int nb_requests;
bool closing;
@@ -130,6 +135,10 @@ struct NBDClient {
/* That's all folks */
+static void nbd_set_handlers(NBDClient *client);
+static void nbd_unset_handlers(NBDClient *client);
+static void nbd_update_can_read(NBDClient *client);
+
ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
{
size_t offset = 0;
@@ -156,7 +165,7 @@ ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
err = socket_error();
/* recoverable error */
- if (err == EINTR || (offset > 0 && err == EAGAIN)) {
+ if (err == EINTR || (offset > 0 && (err == EAGAIN || err == EWOULDBLOCK))) {
continue;
}
@@ -862,7 +871,7 @@ void nbd_client_put(NBDClient *client)
*/
assert(client->closing);
- qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
+ nbd_unset_handlers(client);
close(client->sock);
client->sock = -1;
if (client->exp) {
@@ -898,6 +907,7 @@ static NBDRequest *nbd_request_get(NBDClient *client)
assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
client->nb_requests++;
+ nbd_update_can_read(client);
req = g_slice_new0(NBDRequest);
nbd_client_get(client);
@@ -914,12 +924,39 @@ static void nbd_request_put(NBDRequest *req)
}
g_slice_free(NBDRequest, req);
- if (client->nb_requests-- == MAX_NBD_REQUESTS) {
- qemu_notify_event();
- }
+ client->nb_requests--;
+ nbd_update_can_read(client);
nbd_client_put(client);
}
+static void bs_aio_attached(AioContext *ctx, void *opaque)
+{
+ NBDExport *exp = opaque;
+ NBDClient *client;
+
+ TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx);
+
+ exp->ctx = ctx;
+
+ QTAILQ_FOREACH(client, &exp->clients, next) {
+ nbd_set_handlers(client);
+ }
+}
+
+static void bs_aio_detach(void *opaque)
+{
+ NBDExport *exp = opaque;
+ NBDClient *client;
+
+ TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
+
+ QTAILQ_FOREACH(client, &exp->clients, next) {
+ nbd_unset_handlers(client);
+ }
+
+ exp->ctx = NULL;
+}
+
NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
off_t size, uint32_t nbdflags,
void (*close)(NBDExport *))
@@ -932,7 +969,9 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
exp->nbdflags = nbdflags;
exp->size = size == -1 ? bdrv_getlength(bs) : size;
exp->close = close;
+ exp->ctx = bdrv_get_aio_context(bs);
bdrv_ref(bs);
+ bdrv_add_aio_context_notifier(bs, bs_aio_attached, bs_aio_detach, exp);
return exp;
}
@@ -980,6 +1019,8 @@ void nbd_export_close(NBDExport *exp)
nbd_export_set_name(exp, NULL);
nbd_export_put(exp);
if (exp->bs) {
+ bdrv_remove_aio_context_notifier(exp->bs, bs_aio_attached,
+ bs_aio_detach, exp);
bdrv_unref(exp->bs);
exp->bs = NULL;
}
@@ -1023,10 +1064,6 @@ void nbd_export_close_all(void)
}
}
-static int nbd_can_read(void *opaque);
-static void nbd_read(void *opaque);
-static void nbd_restart_write(void *opaque);
-
static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
int len)
{
@@ -1035,9 +1072,8 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
ssize_t rc, ret;
qemu_co_mutex_lock(&client->send_lock);
- qemu_set_fd_handler2(csock, nbd_can_read, nbd_read,
- nbd_restart_write, client);
client->send_coroutine = qemu_coroutine_self();
+ nbd_set_handlers(client);
if (!len) {
rc = nbd_send_reply(csock, reply);
@@ -1054,7 +1090,7 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
}
client->send_coroutine = NULL;
- qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
+ nbd_set_handlers(client);
qemu_co_mutex_unlock(&client->send_lock);
return rc;
}
@@ -1067,6 +1103,8 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
ssize_t rc;
client->recv_coroutine = qemu_coroutine_self();
+ nbd_update_can_read(client);
+
rc = nbd_receive_request(csock, request);
if (rc < 0) {
if (rc != -EAGAIN) {
@@ -1108,6 +1146,8 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
out:
client->recv_coroutine = NULL;
+ nbd_update_can_read(client);
+
return rc;
}
@@ -1259,13 +1299,6 @@ out:
nbd_client_close(client);
}
-static int nbd_can_read(void *opaque)
-{
- NBDClient *client = opaque;
-
- return client->recv_coroutine || client->nb_requests < MAX_NBD_REQUESTS;
-}
-
static void nbd_read(void *opaque)
{
NBDClient *client = opaque;
@@ -1284,6 +1317,37 @@ static void nbd_restart_write(void *opaque)
qemu_coroutine_enter(client->send_coroutine, NULL);
}
+static void nbd_set_handlers(NBDClient *client)
+{
+ if (client->exp && client->exp->ctx) {
+ aio_set_fd_handler(client->exp->ctx, client->sock,
+ client->can_read ? nbd_read : NULL,
+ client->send_coroutine ? nbd_restart_write : NULL,
+ client);
+ }
+}
+
+static void nbd_unset_handlers(NBDClient *client)
+{
+ if (client->exp && client->exp->ctx) {
+ aio_set_fd_handler(client->exp->ctx, client->sock, NULL, NULL, NULL);
+ }
+}
+
+static void nbd_update_can_read(NBDClient *client)
+{
+ bool can_read = client->recv_coroutine ||
+ client->nb_requests < MAX_NBD_REQUESTS;
+
+ if (can_read != client->can_read) {
+ client->can_read = can_read;
+ nbd_set_handlers(client);
+
+ /* There is no need to invoke aio_notify(), since aio_set_fd_handler()
+ * in nbd_set_handlers() will have taken care of that */
+ }
+}
+
NBDClient *nbd_client_new(NBDExport *exp, int csock,
void (*close)(NBDClient *))
{
@@ -1292,13 +1356,14 @@ NBDClient *nbd_client_new(NBDExport *exp, int csock,
client->refcount = 1;
client->exp = exp;
client->sock = csock;
+ client->can_read = true;
if (nbd_send_negotiate(client)) {
g_free(client);
return NULL;
}
client->close = close;
qemu_co_mutex_init(&client->send_lock);
- qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
+ nbd_set_handlers(client);
if (exp) {
QTAILQ_INSERT_TAIL(&exp->clients, client, next);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index fb74c56e32..a685d02728 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1419,6 +1419,19 @@
'raw': 'BlockdevRef' } }
##
+# @QuorumReadPattern
+#
+# An enumeration of quorum read patterns.
+#
+# @quorum: read all the children and do a quorum vote on reads
+#
+# @fifo: read only from the first child that has not failed
+#
+# Since: 2.2
+##
+{ 'enum': 'QuorumReadPattern', 'data': [ 'quorum', 'fifo' ] }
+
+##
# @BlockdevOptionsQuorum
#
# Driver specific block device options for Quorum
@@ -1433,12 +1446,17 @@
# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
# (Since 2.1)
#
+# @read-pattern: #optional choose read pattern and set to quorum by default
+# (Since 2.2)
+#
# Since: 2.0
##
{ 'type': 'BlockdevOptionsQuorum',
'data': { '*blkverify': 'bool',
'children': [ 'BlockdevRef' ],
- 'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
+ 'vote-threshold': 'int',
+ '*rewrite-corrupted': 'bool',
+ '*read-pattern': 'QuorumReadPattern' } }
##
# @BlockdevOptions
diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c
index 054ca70627..d4049260da 100644
--- a/qemu-coroutine-io.c
+++ b/qemu-coroutine-io.c
@@ -34,13 +34,15 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
{
size_t done = 0;
ssize_t ret;
+ int err;
while (done < bytes) {
ret = iov_send_recv(sockfd, iov, iov_cnt,
offset + done, bytes - done, do_send);
if (ret > 0) {
done += ret;
} else if (ret < 0) {
- if (errno == EAGAIN) {
+ err = socket_error();
+ if (err == EAGAIN || err == EWOULDBLOCK) {
qemu_coroutine_yield();
} else if (done == 0) {
return -1;
diff --git a/qemu-coroutine-sleep.c b/qemu-coroutine-sleep.c
index ad78fbaa2a..9abb7fdf31 100644
--- a/qemu-coroutine-sleep.c
+++ b/qemu-coroutine-sleep.c
@@ -27,18 +27,6 @@ static void co_sleep_cb(void *opaque)
qemu_coroutine_enter(sleep_cb->co, NULL);
}
-void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns)
-{
- CoSleepCB sleep_cb = {
- .co = qemu_coroutine_self(),
- };
- sleep_cb.ts = timer_new(type, SCALE_NS, co_sleep_cb, &sleep_cb);
- timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
- qemu_coroutine_yield();
- timer_del(sleep_cb.ts);
- timer_free(sleep_cb.ts);
-}
-
void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
int64_t ns)
{
diff --git a/qemu-img.c b/qemu-img.c
index 2052b14b84..ff29ed1c67 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -752,7 +752,7 @@ static int img_commit(int argc, char **argv)
ret = bdrv_parse_cache_flags(cache, &flags);
if (ret < 0) {
error_report("Invalid cache option: %s", cache);
- return -1;
+ return 1;
}
bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
@@ -999,6 +999,9 @@ static int img_compare(int argc, char **argv)
filename1 = argv[optind++];
filename2 = argv[optind++];
+ /* Initialize before goto out */
+ qemu_progress_init(progress, 2.0);
+
flags = BDRV_O_FLAGS;
ret = bdrv_parse_cache_flags(cache, &flags);
if (ret < 0) {
@@ -1007,9 +1010,6 @@ static int img_compare(int argc, char **argv)
goto out3;
}
- /* Initialize before goto out */
- qemu_progress_init(progress, 2.0);
-
bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet);
if (!bs1) {
error_report("Can't open file %s", filename1);
@@ -2304,7 +2304,7 @@ static int img_snapshot(int argc, char **argv)
static int img_rebase(int argc, char **argv)
{
- BlockDriverState *bs, *bs_old_backing = NULL, *bs_new_backing = NULL;
+ BlockDriverState *bs = NULL, *bs_old_backing = NULL, *bs_new_backing = NULL;
BlockDriver *old_backing_drv, *new_backing_drv;
char *filename;
const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
@@ -2376,14 +2376,14 @@ static int img_rebase(int argc, char **argv)
ret = bdrv_parse_cache_flags(cache, &flags);
if (ret < 0) {
error_report("Invalid cache option: %s", cache);
- return -1;
+ goto out;
}
src_flags = BDRV_O_FLAGS;
ret = bdrv_parse_cache_flags(src_cache, &src_flags);
if (ret < 0) {
error_report("Invalid source cache option: %s", src_cache);
- return -1;
+ goto out;
}
/*
@@ -2394,7 +2394,8 @@ static int img_rebase(int argc, char **argv)
*/
bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
if (!bs) {
- return 1;
+ ret = -1;
+ goto out;
}
/* Find the right drivers for the backing files */
@@ -2420,11 +2421,7 @@ static int img_rebase(int argc, char **argv)
}
/* For safe rebasing we need to compare old and new backing file */
- if (unsafe) {
- /* Make the compiler happy */
- bs_old_backing = NULL;
- bs_new_backing = NULL;
- } else {
+ if (!unsafe) {
char backing_name[1024];
bs_old_backing = bdrv_new("old_backing", &error_abort);
diff --git a/qemu-options.hx b/qemu-options.hx
index c573dd8893..5479cf54f4 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2351,6 +2351,16 @@ multiple of 512 bytes. It defaults to 256k.
@item sslverify
Whether to verify the remote server's certificate when connecting over SSL. It
can have the value 'on' or 'off'. It defaults to 'on'.
+
+@item cookie
+Send this cookie (it can also be a list of cookies separated by ';') with
+each outgoing request. Only supported when using protocols such as HTTP
+which support cookies, otherwise ignored.
+
+@item timeout
+Set the timeout in seconds of the CURL connection. This timeout is the time
+that CURL waits for a response from the remote server to get the size of the
+image to be downloaded. If not set, the default timeout of 5 seconds is used.
@end table
Note that when passing options to qemu explicitly, @option{driver} is the value
@@ -2372,9 +2382,10 @@ qemu-system-x86_64 -drive file=/tmp/Fedora-x86_64-20-20131211.1-sda.qcow2,copy-o
@end example
Example: boot from an image stored on a VMware vSphere server with a self-signed
-certificate using a local overlay for writes and a readahead of 64k
+certificate using a local overlay for writes, a readahead of 64k and a timeout
+of 10 seconds.
@example
-qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k"@}' /tmp/test.qcow2
+qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2
qemu-system-x86_64 -drive file=/tmp/test.qcow2
@end example
diff --git a/tests/qemu-iotests/100 b/tests/qemu-iotests/100
new file mode 100755
index 0000000000..9124aba760
--- /dev/null
+++ b/tests/qemu-iotests/100
@@ -0,0 +1,134 @@
+#!/bin/bash
+#
+# Test simple read/write using plain bdrv_read/bdrv_write
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=stefanha@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt generic
+_supported_proto generic
+_supported_os Linux
+
+
+size=128M
+
+echo
+echo "== Single request =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 0 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+$QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Sequential requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 0 4k ; 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+$QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0xce 4k 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 8k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Superset overlapping requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 0 4k ; 1k 2k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+# Order of overlapping in-flight requests is not guaranteed so we cannot verify
+# [1k, 3k) since it could have either pattern 0xcd or 0xce.
+$QEMU_IO -c "read -P 0xcd 0 1k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0xcd 3k 1k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Subset overlapping requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 1k 2k ; 0k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+# Order of overlapping in-flight requests is not guaranteed so we cannot verify
+# [1k, 3k) since it could have either pattern 0xcd or 0xce.
+$QEMU_IO -c "read -P 0xce 0 1k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0xce 3k 1k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Head overlapping requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 0k 2k ; 0k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+# Order of overlapping in-flight requests is not guaranteed so we cannot verify
+# [0k, 2k) since it could have either pattern 0xcd or 0xce.
+$QEMU_IO -c "read -P 0xce 2k 2k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Tail overlapping requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 2k 2k ; 0k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+# Order of overlapping in-flight requests is not guaranteed so we cannot verify
+# [2k, 4k) since it could have either pattern 0xcd or 0xce.
+$QEMU_IO -c "read -P 0xce 0k 2k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Disjoint requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 0 4k ; 64k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+$QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 60k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0xce 64k 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 68k 4k" "$TEST_IMG" | _filter_qemu_io
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/100.out b/tests/qemu-iotests/100.out
new file mode 100644
index 0000000000..2d6e9f0a7d
--- /dev/null
+++ b/tests/qemu-iotests/100.out
@@ -0,0 +1,89 @@
+QA output created by 100
+
+== Single request ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Sequential requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+wrote 8192/8192 bytes at offset 0
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 8192
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Superset overlapping requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+wrote 6144/6144 bytes at offset 0
+6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 1024/1024 bytes at offset 0
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 3072
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Subset overlapping requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+wrote 6144/6144 bytes at offset 1024
+6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 1024/1024 bytes at offset 0
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 3072
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Head overlapping requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+wrote 6144/6144 bytes at offset 0
+6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 2048/2048 bytes at offset 2048
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Tail overlapping requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+wrote 6144/6144 bytes at offset 2048
+6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 2048/2048 bytes at offset 0
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Disjoint requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+wrote 8192/8192 bytes at offset 0
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 61440/61440 bytes at offset 4096
+60 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 65536
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 69632
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 2803d68d62..0920b28db4 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -101,5 +101,6 @@
092 rw auto quick
095 rw auto quick
099 rw auto quick
+100 rw auto quick
101 rw auto quick
103 rw auto quick
diff --git a/tests/test-aio.c b/tests/test-aio.c
index f12b6e0ae8..c6a8713e83 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -57,8 +57,6 @@ static void bh_test_cb(void *opaque)
}
}
-#if !defined(_WIN32)
-
static void timer_test_cb(void *opaque)
{
TimerTestData *data = opaque;
@@ -68,12 +66,10 @@ static void timer_test_cb(void *opaque)
}
}
-static void dummy_io_handler_read(void *opaque)
+static void dummy_io_handler_read(EventNotifier *e)
{
}
-#endif /* !_WIN32 */
-
static void bh_delete_cb(void *opaque)
{
BHTestData *data = opaque;
@@ -428,24 +424,18 @@ static void test_wait_event_notifier_noflush(void)
event_notifier_cleanup(&data.e);
}
-#if !defined(_WIN32)
-
static void test_timer_schedule(void)
{
TimerTestData data = { .n = 0, .ctx = ctx, .ns = SCALE_MS * 750LL,
.max = 2,
.clock_type = QEMU_CLOCK_VIRTUAL };
- int pipefd[2];
+ EventNotifier e;
/* aio_poll will not block to wait for timers to complete unless it has
* an fd to wait on. Fixing this breaks other tests. So create a dummy one.
*/
- g_assert(!qemu_pipe(pipefd));
- qemu_set_nonblock(pipefd[0]);
- qemu_set_nonblock(pipefd[1]);
-
- aio_set_fd_handler(ctx, pipefd[0],
- dummy_io_handler_read, NULL, NULL);
+ event_notifier_init(&e, false);
+ aio_set_event_notifier(ctx, &e, dummy_io_handler_read);
aio_poll(ctx, false);
aio_timer_init(ctx, &data.timer, data.clock_type,
@@ -484,15 +474,12 @@ static void test_timer_schedule(void)
g_assert(!aio_poll(ctx, false));
g_assert_cmpint(data.n, ==, 2);
- aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL);
- close(pipefd[0]);
- close(pipefd[1]);
+ aio_set_event_notifier(ctx, &e, NULL);
+ event_notifier_cleanup(&e);
timer_del(&data.timer);
}
-#endif /* !_WIN32 */
-
/* Now the same tests, using the context as a GSource. They are
* very similar to the ones above, with g_main_context_iteration
* replacing aio_poll. However:
@@ -775,25 +762,19 @@ static void test_source_wait_event_notifier_noflush(void)
event_notifier_cleanup(&data.e);
}
-#if !defined(_WIN32)
-
static void test_source_timer_schedule(void)
{
TimerTestData data = { .n = 0, .ctx = ctx, .ns = SCALE_MS * 750LL,
.max = 2,
.clock_type = QEMU_CLOCK_VIRTUAL };
- int pipefd[2];
+ EventNotifier e;
int64_t expiry;
/* aio_poll will not block to wait for timers to complete unless it has
* an fd to wait on. Fixing this breaks other tests. So create a dummy one.
*/
- g_assert(!qemu_pipe(pipefd));
- qemu_set_nonblock(pipefd[0]);
- qemu_set_nonblock(pipefd[1]);
-
- aio_set_fd_handler(ctx, pipefd[0],
- dummy_io_handler_read, NULL, NULL);
+ event_notifier_init(&e, false);
+ aio_set_event_notifier(ctx, &e, dummy_io_handler_read);
do {} while (g_main_context_iteration(NULL, false));
aio_timer_init(ctx, &data.timer, data.clock_type,
@@ -818,15 +799,12 @@ static void test_source_timer_schedule(void)
g_assert_cmpint(data.n, ==, 2);
g_assert(qemu_clock_get_ns(data.clock_type) > expiry);
- aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL);
- close(pipefd[0]);
- close(pipefd[1]);
+ aio_set_event_notifier(ctx, &e, NULL);
+ event_notifier_cleanup(&e);
timer_del(&data.timer);
}
-#endif /* !_WIN32 */
-
/* End of tests. */
@@ -857,9 +835,7 @@ int main(int argc, char **argv)
g_test_add_func("/aio/event/wait", test_wait_event_notifier);
g_test_add_func("/aio/event/wait/no-flush-cb", test_wait_event_notifier_noflush);
g_test_add_func("/aio/event/flush", test_flush_event_notifier);
-#if !defined(_WIN32)
g_test_add_func("/aio/timer/schedule", test_timer_schedule);
-#endif
g_test_add_func("/aio-gsource/notify", test_source_notify);
g_test_add_func("/aio-gsource/flush", test_source_flush);
@@ -874,8 +850,6 @@ int main(int argc, char **argv)
g_test_add_func("/aio-gsource/event/wait", test_source_wait_event_notifier);
g_test_add_func("/aio-gsource/event/wait/no-flush-cb", test_source_wait_event_notifier_noflush);
g_test_add_func("/aio-gsource/event/flush", test_source_flush_event_notifier);
-#if !defined(_WIN32)
g_test_add_func("/aio-gsource/timer/schedule", test_source_timer_schedule);
-#endif
return g_test_run();
}