aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Makefile3
-rw-r--r--Makefile.objs11
-rw-r--r--async.c98
-rw-r--r--block.c310
-rw-r--r--block.h7
-rw-r--r--block/qcow.c180
-rw-r--r--block/qcow2-cluster.c26
-rw-r--r--block/qcow2.c240
-rw-r--r--block/qcow2.h5
-rw-r--r--block/qed-table.c14
-rw-r--r--block/qed.c4
-rw-r--r--block/raw-posix.c39
-rw-r--r--block/raw-win32.c35
-rw-r--r--block/raw.c7
-rw-r--r--block/vpc.c8
-rw-r--r--block_int.h10
-rw-r--r--blockdev.c17
-rwxr-xr-xconfigure18
-rw-r--r--coroutine-gthread.c131
-rw-r--r--coroutine-ucontext.c230
-rw-r--r--coroutine-win32.c92
-rw-r--r--hw/scsi-bus.c74
-rw-r--r--hw/scsi-defs.h62
-rw-r--r--hw/scsi-disk.c79
-rw-r--r--hw/scsi-generic.c2
-rw-r--r--linux-aio.c43
-rw-r--r--posix-aio-compat.c30
-rw-r--r--qemu-common.h4
-rw-r--r--qemu-coroutine-int.h49
-rw-r--r--qemu-coroutine-lock.c117
-rw-r--r--qemu-coroutine.c75
-rw-r--r--qemu-coroutine.h159
-rw-r--r--test-coroutine.c192
-rw-r--r--trace-events16
35 files changed, 1721 insertions, 667 deletions
diff --git a/.gitignore b/.gitignore
index 54835bcb97..59c343c414 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@ qemu-io
qemu-ga
qemu-monitor.texi
QMP/qmp-commands.txt
+test-coroutine
.gdbinit
*.a
*.aux
diff --git a/Makefile b/Makefile
index 48552512d6..2becedcf88 100644
--- a/Makefile
+++ b/Makefile
@@ -151,7 +151,7 @@ qemu-io$(EXESUF): qemu-io.o cmd.o qemu-tool.o qemu-error.o $(oslib-obj-y) $(trac
qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@," GEN $@")
-check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o: $(GENERATED_HEADERS)
+check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o test-coroutine.o: $(GENERATED_HEADERS)
CHECK_PROG_DEPS = qemu-malloc.o $(oslib-obj-y) $(trace-obj-y) qemu-tool.o
@@ -161,6 +161,7 @@ check-qdict: check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qlist.o $(C
check-qlist: check-qlist.o qlist.o qint.o $(CHECK_PROG_DEPS)
check-qfloat: check-qfloat.o qfloat.o $(CHECK_PROG_DEPS)
check-qjson: check-qjson.o qfloat.o qint.o qdict.o qstring.o qlist.o qbool.o qjson.o json-streamer.o json-lexer.o json-parser.o error.o qerror.o qemu-error.o $(CHECK_PROG_DEPS)
+test-coroutine: test-coroutine.o qemu-timer-common.o async.o $(coroutine-obj-y) $(CHECK_PROG_DEPS)
$(qapi-obj-y): $(GENERATED_HEADERS)
qapi-dir := qapi-generated
diff --git a/Makefile.objs b/Makefile.objs
index 6991a9f52a..89ca3611b3 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -11,10 +11,21 @@ oslib-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o
oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o
#######################################################################
+# coroutines
+coroutine-obj-y = qemu-coroutine.o qemu-coroutine-lock.o
+ifeq ($(CONFIG_UCONTEXT_COROUTINE),y)
+coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o
+else
+coroutine-obj-$(CONFIG_POSIX) += coroutine-gthread.o
+endif
+coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
+
+#######################################################################
# block-obj-y is code used by both qemu system emulation and qemu-img
block-obj-y = cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o async.o
block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o
+block-obj-y += $(coroutine-obj-y)
block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
diff --git a/async.c b/async.c
index fd313dffb7..3fe70b9deb 100644
--- a/async.c
+++ b/async.c
@@ -25,92 +25,8 @@
#include "qemu-common.h"
#include "qemu-aio.h"
-/*
- * An AsyncContext protects the callbacks of AIO requests and Bottom Halves
- * against interfering with each other. A typical example is qcow2 that accepts
- * asynchronous requests, but relies for manipulation of its metadata on
- * synchronous bdrv_read/write that doesn't trigger any callbacks.
- *
- * However, these functions are often emulated using AIO which means that AIO
- * callbacks must be run - but at the same time we must not run callbacks of
- * other requests as they might start to modify metadata and corrupt the
- * internal state of the caller of bdrv_read/write.
- *
- * To achieve the desired semantics we switch into a new AsyncContext.
- * Callbacks must only be run if they belong to the current AsyncContext.
- * Otherwise they need to be queued until their own context is active again.
- * This is how you can make qemu_aio_wait() wait only for your own callbacks.
- *
- * The AsyncContexts form a stack. When you leave a AsyncContexts, you always
- * return to the old ("parent") context.
- */
-struct AsyncContext {
- /* Consecutive number of the AsyncContext (position in the stack) */
- int id;
-
- /* Anchor of the list of Bottom Halves belonging to the context */
- struct QEMUBH *first_bh;
-
- /* Link to parent context */
- struct AsyncContext *parent;
-};
-
-/* The currently active AsyncContext */
-static struct AsyncContext *async_context = &(struct AsyncContext) { 0 };
-
-/*
- * Enter a new AsyncContext. Already scheduled Bottom Halves and AIO callbacks
- * won't be called until this context is left again.
- */
-void async_context_push(void)
-{
- struct AsyncContext *new = qemu_mallocz(sizeof(*new));
- new->parent = async_context;
- new->id = async_context->id + 1;
- async_context = new;
-}
-
-/* Run queued AIO completions and destroy Bottom Half */
-static void bh_run_aio_completions(void *opaque)
-{
- QEMUBH **bh = opaque;
- qemu_bh_delete(*bh);
- qemu_free(bh);
- qemu_aio_process_queue();
-}
-/*
- * Leave the currently active AsyncContext. All Bottom Halves belonging to the
- * old context are executed before changing the context.
- */
-void async_context_pop(void)
-{
- struct AsyncContext *old = async_context;
- QEMUBH **bh;
-
- /* Flush the bottom halves, we don't want to lose them */
- while (qemu_bh_poll());
-
- /* Switch back to the parent context */
- async_context = async_context->parent;
- qemu_free(old);
-
- if (async_context == NULL) {
- abort();
- }
-
- /* Schedule BH to run any queued AIO completions as soon as possible */
- bh = qemu_malloc(sizeof(*bh));
- *bh = qemu_bh_new(bh_run_aio_completions, bh);
- qemu_bh_schedule(*bh);
-}
-
-/*
- * Returns the ID of the currently active AsyncContext
- */
-int get_async_context_id(void)
-{
- return async_context->id;
-}
+/* Anchor of the list of Bottom Halves belonging to the context */
+static struct QEMUBH *first_bh;
/***********************************************************/
/* bottom halves (can be seen as timers which expire ASAP) */
@@ -130,8 +46,8 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
bh = qemu_mallocz(sizeof(QEMUBH));
bh->cb = cb;
bh->opaque = opaque;
- bh->next = async_context->first_bh;
- async_context->first_bh = bh;
+ bh->next = first_bh;
+ first_bh = bh;
return bh;
}
@@ -141,7 +57,7 @@ int qemu_bh_poll(void)
int ret;
ret = 0;
- for (bh = async_context->first_bh; bh; bh = next) {
+ for (bh = first_bh; bh; bh = next) {
next = bh->next;
if (!bh->deleted && bh->scheduled) {
bh->scheduled = 0;
@@ -153,7 +69,7 @@ int qemu_bh_poll(void)
}
/* remove deleted bhs */
- bhp = &async_context->first_bh;
+ bhp = &first_bh;
while (*bhp) {
bh = *bhp;
if (bh->deleted) {
@@ -199,7 +115,7 @@ void qemu_bh_update_timeout(int *timeout)
{
QEMUBH *bh;
- for (bh = async_context->first_bh; bh; bh = bh->next) {
+ for (bh = first_bh; bh; bh = bh->next) {
if (!bh->deleted && bh->scheduled) {
if (bh->idle) {
/* idle bottom halves will be polled at least
diff --git a/block.c b/block.c
index 9549b9eff9..26910ca143 100644
--- a/block.c
+++ b/block.c
@@ -28,6 +28,7 @@
#include "block_int.h"
#include "module.h"
#include "qemu-objects.h"
+#include "qemu-coroutine.h"
#ifdef CONFIG_BSD
#include <sys/types.h>
@@ -57,6 +58,19 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors);
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
+static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -169,14 +183,25 @@ void path_combine(char *dest, int dest_size,
void bdrv_register(BlockDriver *bdrv)
{
- if (!bdrv->bdrv_aio_readv) {
- /* add AIO emulation layer */
- bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
- bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
- } else if (!bdrv->bdrv_read) {
- /* add synchronous IO emulation layer */
+ if (bdrv->bdrv_co_readv) {
+ /* Emulate AIO by coroutines, and sync by AIO */
+ bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
+ bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
bdrv->bdrv_read = bdrv_read_em;
bdrv->bdrv_write = bdrv_write_em;
+ } else {
+ bdrv->bdrv_co_readv = bdrv_co_readv_em;
+ bdrv->bdrv_co_writev = bdrv_co_writev_em;
+
+ if (!bdrv->bdrv_aio_readv) {
+ /* add AIO emulation layer */
+ bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
+ bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
+ } else if (!bdrv->bdrv_read) {
+ /* add synchronous IO emulation layer */
+ bdrv->bdrv_read = bdrv_read_em;
+ bdrv->bdrv_write = bdrv_write_em;
+ }
}
if (!bdrv->bdrv_aio_flush)
@@ -730,6 +755,8 @@ void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
{
assert(bs->peer == qdev);
bs->peer = NULL;
+ bs->change_cb = NULL;
+ bs->change_opaque = NULL;
}
DeviceState *bdrv_get_attached(BlockDriverState *bs)
@@ -920,6 +947,17 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
nb_sectors * BDRV_SECTOR_SIZE);
}
+static inline bool bdrv_has_async_rw(BlockDriver *drv)
+{
+ return drv->bdrv_co_readv != bdrv_co_readv_em
+ || drv->bdrv_aio_readv != bdrv_aio_readv_em;
+}
+
+static inline bool bdrv_has_async_flush(BlockDriver *drv)
+{
+ return drv->bdrv_aio_flush != bdrv_aio_flush_em;
+}
+
/* return < 0 if error. See bdrv_write() for the return codes */
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
@@ -928,6 +966,18 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
if (!drv)
return -ENOMEDIUM;
+
+ if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
+ }
+
if (bdrv_check_request(bs, sector_num, nb_sectors))
return -EIO;
@@ -972,8 +1022,21 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
BlockDriver *drv = bs->drv;
+
if (!bs->drv)
return -ENOMEDIUM;
+
+ if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+ }
+
if (bs->read_only)
return -EACCES;
if (bdrv_check_request(bs, sector_num, nb_sectors))
@@ -1108,17 +1171,49 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
return 0;
}
-/*
- * Writes to the file and ensures that no writes are reordered across this
- * request (acts as a barrier)
- *
- * Returns 0 on success, -errno in error cases.
- */
-int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ BlockDriver *drv = bs->drv;
+
+ trace_bdrv_co_readv(bs, sector_num, nb_sectors);
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ }
+
+ return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+}
+
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
{
- return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num,
- buf, BDRV_SECTOR_SIZE * nb_sectors);
+ BlockDriver *drv = bs->drv;
+
+ trace_bdrv_co_writev(bs, sector_num, nb_sectors);
+
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ }
+ if (bs->read_only) {
+ return -EACCES;
+ }
+ if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ }
+
+ if (bs->dirty_bitmap) {
+ set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
+ }
+
+ if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
+ bs->wr_highest_sector = sector_num + nb_sectors - 1;
+ }
+
+ return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
}
/**
@@ -1591,6 +1686,10 @@ int bdrv_flush(BlockDriverState *bs)
return 0;
}
+ if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
+ return bdrv_co_flush_em(bs);
+ }
+
if (bs->drv && bs->drv->bdrv_flush) {
return bs->drv->bdrv_flush(bs);
}
@@ -2580,6 +2679,89 @@ static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
}
+
+typedef struct BlockDriverAIOCBCoroutine {
+ BlockDriverAIOCB common;
+ BlockRequest req;
+ bool is_write;
+ QEMUBH* bh;
+} BlockDriverAIOCBCoroutine;
+
+static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
+{
+ qemu_aio_flush();
+}
+
+static AIOPool bdrv_em_co_aio_pool = {
+ .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
+ .cancel = bdrv_aio_co_cancel_em,
+};
+
+static void bdrv_co_rw_bh(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+
+ acb->common.cb(acb->common.opaque, acb->req.error);
+ qemu_bh_delete(acb->bh);
+ qemu_aio_release(acb);
+}
+
+static void coroutine_fn bdrv_co_rw(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ if (!acb->is_write) {
+ acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
+ acb->req.nb_sectors, acb->req.qiov);
+ } else {
+ acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
+ acb->req.nb_sectors, acb->req.qiov);
+ }
+
+ acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ bool is_write)
+{
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
+
+ acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
+ acb->req.sector = sector_num;
+ acb->req.nb_sectors = nb_sectors;
+ acb->req.qiov = qiov;
+ acb->is_write = is_write;
+
+ co = qemu_coroutine_create(bdrv_co_rw);
+ qemu_coroutine_enter(co, acb);
+
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
+ false);
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
+ true);
+}
+
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque)
{
@@ -2636,8 +2818,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
struct iovec iov;
QEMUIOVector qiov;
- async_context_push();
-
async_ret = NOT_DONE;
iov.iov_base = (void *)buf;
iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
@@ -2655,7 +2835,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
fail:
- async_context_pop();
return async_ret;
}
@@ -2667,8 +2846,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
struct iovec iov;
QEMUIOVector qiov;
- async_context_push();
-
async_ret = NOT_DONE;
iov.iov_base = (void *)buf;
iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
@@ -2684,7 +2861,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
}
fail:
- async_context_pop();
return async_ret;
}
@@ -2726,6 +2902,77 @@ void qemu_aio_release(void *p)
}
/**************************************************************/
+/* Coroutine block device emulation */
+
+typedef struct CoroutineIOCompletion {
+ Coroutine *coroutine;
+ int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+ CoroutineIOCompletion *co = opaque;
+
+ co->ret = ret;
+ qemu_coroutine_enter(co->coroutine, NULL);
+}
+
+static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *iov,
+ bool is_write)
+{
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ BlockDriverAIOCB *acb;
+
+ if (is_write) {
+ acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ } else {
+ acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ }
+
+ trace_bdrv_co_io(is_write, acb);
+ if (!acb) {
+ return -EIO;
+ }
+ qemu_coroutine_yield();
+
+ return co.ret;
+}
+
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov)
+{
+ return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
+}
+
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov)
+{
+ return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
+}
+
+static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
+{
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ BlockDriverAIOCB *acb;
+
+ acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
+ if (!acb) {
+ return -EIO;
+ }
+ qemu_coroutine_yield();
+ return co.ret;
+}
+
+/**************************************************************/
/* removable device support */
/**
@@ -2768,25 +3015,16 @@ int bdrv_media_changed(BlockDriverState *bs)
int bdrv_eject(BlockDriverState *bs, int eject_flag)
{
BlockDriver *drv = bs->drv;
- int ret;
- if (bs->locked) {
+ if (eject_flag && bs->locked) {
return -EBUSY;
}
- if (!drv || !drv->bdrv_eject) {
- ret = -ENOTSUP;
- } else {
- ret = drv->bdrv_eject(bs, eject_flag);
- }
- if (ret == -ENOTSUP) {
- ret = 0;
+ if (drv && drv->bdrv_eject) {
+ drv->bdrv_eject(bs, eject_flag);
}
- if (ret >= 0) {
- bs->tray_open = eject_flag;
- }
-
- return ret;
+ bs->tray_open = eject_flag;
+ return 0;
}
int bdrv_is_locked(BlockDriverState *bs)
diff --git a/block.h b/block.h
index 59cc410e3b..a3bfaafef0 100644
--- a/block.h
+++ b/block.h
@@ -4,6 +4,7 @@
#include "qemu-aio.h"
#include "qemu-common.h"
#include "qemu-option.h"
+#include "qemu-coroutine.h"
#include "qobject.h"
/* block.c */
@@ -85,8 +86,10 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
const void *buf, int count);
int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
const void *buf, int count);
-int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
int bdrv_truncate(BlockDriverState *bs, int64_t offset);
int64_t bdrv_getlength(BlockDriverState *bs);
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
diff --git a/block/qcow.c b/block/qcow.c
index 227b104e36..6447c2a1c0 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -73,6 +73,7 @@ typedef struct BDRVQcowState {
uint32_t crypt_method_header;
AES_KEY aes_encrypt_key;
AES_KEY aes_decrypt_key;
+ CoMutex lock;
} BDRVQcowState;
static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
@@ -517,11 +518,11 @@ static AIOPool qcow_aio_pool = {
static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque, int is_write)
+ int is_write)
{
QCowAIOCB *acb;
- acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque);
+ acb = qemu_aio_get(&qcow_aio_pool, bs, NULL, NULL);
if (!acb)
return NULL;
acb->hd_aiocb = NULL;
@@ -542,48 +543,15 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
return acb;
}
-static void qcow_aio_read_cb(void *opaque, int ret);
-static void qcow_aio_write_cb(void *opaque, int ret);
-
-static void qcow_aio_rw_bh(void *opaque)
-{
- QCowAIOCB *acb = opaque;
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
-
- if (acb->is_write) {
- qcow_aio_write_cb(opaque, 0);
- } else {
- qcow_aio_read_cb(opaque, 0);
- }
-}
-
-static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
-{
- if (acb->bh) {
- return -EIO;
- }
-
- acb->bh = qemu_bh_new(cb, acb);
- if (!acb->bh) {
- return -EIO;
- }
-
- qemu_bh_schedule(acb->bh);
-
- return 0;
-}
-
-static void qcow_aio_read_cb(void *opaque, int ret)
+static int qcow_aio_read_cb(void *opaque)
{
QCowAIOCB *acb = opaque;
BlockDriverState *bs = acb->common.bs;
BDRVQcowState *s = bs->opaque;
int index_in_cluster;
+ int ret;
acb->hd_aiocb = NULL;
- if (ret < 0)
- goto done;
redo:
/* post process the read buffer */
@@ -605,8 +573,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
if (acb->nb_sectors == 0) {
/* request completed */
- ret = 0;
- goto done;
+ return 0;
}
/* prepare next AIO request */
@@ -623,11 +590,12 @@ static void qcow_aio_read_cb(void *opaque, int ret)
acb->hd_iov.iov_base = (void *)acb->buf;
acb->hd_iov.iov_len = acb->n * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
- &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
- if (acb->hd_aiocb == NULL) {
- ret = -EIO;
- goto done;
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, acb->sector_num,
+ acb->n, &acb->hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ return -EIO;
}
} else {
/* Note: in this case, no need to wait */
@@ -637,64 +605,56 @@ static void qcow_aio_read_cb(void *opaque, int ret)
} else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* add AIO support for compressed blocks ? */
if (decompress_cluster(bs, acb->cluster_offset) < 0) {
- ret = -EIO;
- goto done;
+ return -EIO;
}
memcpy(acb->buf,
s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
goto redo;
} else {
if ((acb->cluster_offset & 511) != 0) {
- ret = -EIO;
- goto done;
+ return -EIO;
}
acb->hd_iov.iov_base = (void *)acb->buf;
acb->hd_iov.iov_len = acb->n * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_readv(bs->file,
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster,
- &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
- if (acb->hd_aiocb == NULL) {
- ret = -EIO;
- goto done;
+ acb->n, &acb->hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ return ret;
}
}
- return;
-
-done:
- if (acb->qiov->niov > 1) {
- qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
- qemu_vfree(acb->orig_buf);
- }
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
+ return 1;
}
-static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
{
+ BDRVQcowState *s = bs->opaque;
QCowAIOCB *acb;
int ret;
- acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
- if (!acb)
- return NULL;
+ acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 0);
- ret = qcow_schedule_bh(qcow_aio_rw_bh, acb);
- if (ret < 0) {
- if (acb->qiov->niov > 1) {
- qemu_vfree(acb->orig_buf);
- }
- qemu_aio_release(acb);
- return NULL;
+ qemu_co_mutex_lock(&s->lock);
+ do {
+ ret = qcow_aio_read_cb(acb);
+ } while (ret > 0);
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (acb->qiov->niov > 1) {
+ qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
+ qemu_vfree(acb->orig_buf);
}
+ qemu_aio_release(acb);
- return &acb->common;
+ return ret;
}
-static void qcow_aio_write_cb(void *opaque, int ret)
+static int qcow_aio_write_cb(void *opaque)
{
QCowAIOCB *acb = opaque;
BlockDriverState *bs = acb->common.bs;
@@ -702,20 +662,17 @@ static void qcow_aio_write_cb(void *opaque, int ret)
int index_in_cluster;
uint64_t cluster_offset;
const uint8_t *src_buf;
+ int ret;
acb->hd_aiocb = NULL;
- if (ret < 0)
- goto done;
-
acb->nb_sectors -= acb->n;
acb->sector_num += acb->n;
acb->buf += acb->n * 512;
if (acb->nb_sectors == 0) {
/* request completed */
- ret = 0;
- goto done;
+ return 0;
}
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -726,16 +683,11 @@ static void qcow_aio_write_cb(void *opaque, int ret)
index_in_cluster,
index_in_cluster + acb->n);
if (!cluster_offset || (cluster_offset & 511) != 0) {
- ret = -EIO;
- goto done;
+ return -EIO;
}
if (s->crypt_method) {
if (!acb->cluster_data) {
acb->cluster_data = qemu_mallocz(s->cluster_size);
- if (!acb->cluster_data) {
- ret = -ENOMEM;
- goto done;
- }
}
encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
acb->n, 1, &s->aes_encrypt_key);
@@ -747,26 +699,19 @@ static void qcow_aio_write_cb(void *opaque, int ret)
acb->hd_iov.iov_base = (void *)src_buf;
acb->hd_iov.iov_len = acb->n * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_writev(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- &acb->hd_qiov, acb->n,
- qcow_aio_write_cb, acb);
- if (acb->hd_aiocb == NULL) {
- ret = -EIO;
- goto done;
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_writev(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ acb->n, &acb->hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ return ret;
}
- return;
-
-done:
- if (acb->qiov->niov > 1)
- qemu_vfree(acb->orig_buf);
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
+ return 1;
}
-static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+static int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
QCowAIOCB *acb;
@@ -774,21 +719,20 @@ static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
s->cluster_cache_offset = -1; /* disable compressed cache */
- acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
- if (!acb)
- return NULL;
+ acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 1);
+ qemu_co_mutex_lock(&s->lock);
+ do {
+ ret = qcow_aio_write_cb(acb);
+ } while (ret > 0);
+ qemu_co_mutex_unlock(&s->lock);
- ret = qcow_schedule_bh(qcow_aio_rw_bh, acb);
- if (ret < 0) {
- if (acb->qiov->niov > 1) {
- qemu_vfree(acb->orig_buf);
- }
- qemu_aio_release(acb);
- return NULL;
+ if (acb->qiov->niov > 1) {
+ qemu_vfree(acb->orig_buf);
}
+ qemu_aio_release(acb);
- return &acb->common;
+ return ret;
}
static void qcow_close(BlockDriverState *bs)
@@ -1020,8 +964,8 @@ static BlockDriver bdrv_qcow = {
.bdrv_is_allocated = qcow_is_allocated,
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
- .bdrv_aio_readv = qcow_aio_readv,
- .bdrv_aio_writev = qcow_aio_writev,
+ .bdrv_co_readv = qcow_co_readv,
+ .bdrv_co_writev = qcow_co_writev,
.bdrv_aio_flush = qcow_aio_flush,
.bdrv_write_compressed = qcow_write_compressed,
.bdrv_get_info = qcow_get_info,
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 882f50a80b..81cf77d83c 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -697,12 +697,12 @@ err:
* m->depends_on is set to NULL and the other fields in m are meaningless.
*
* If the cluster is newly allocated, m->nb_clusters is set to the number of
- * contiguous clusters that have been allocated. This may be 0 if the request
- * conflict with another write request in flight; in this case, m->depends_on
- * is set and the remaining fields of m are meaningless.
+ * contiguous clusters that have been allocated. In this case, the other
+ * fields of m are valid and contain information about the first allocated
+ * cluster.
*
- * If m->nb_clusters is non-zero, the other fields of m are valid and contain
- * information about the first allocated cluster.
+ * If the request conflicts with another write request in flight, the coroutine
+ * is queued and will be reentered when the dependency has completed.
*
* Return 0 on success and -errno in error cases
*/
@@ -721,6 +721,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
return ret;
}
+again:
nb_clusters = size_to_clusters(s, n_end << 9);
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
@@ -792,12 +793,12 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
}
if (nb_clusters == 0) {
- /* Set dependency and wait for a callback */
- m->depends_on = old_alloc;
- m->nb_clusters = 0;
- *num = 0;
-
- goto out_wait_dependency;
+ /* Wait for the dependency to complete. We need to recheck
+ * the free/allocated clusters when we continue. */
+ qemu_co_mutex_unlock(&s->lock);
+ qemu_co_queue_wait(&old_alloc->dependent_requests);
+ qemu_co_mutex_lock(&s->lock);
+ goto again;
}
}
}
@@ -834,9 +835,6 @@ out:
return 0;
-out_wait_dependency:
- return qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-
fail:
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
fail_put:
diff --git a/block/qcow2.c b/block/qcow2.c
index 48e1b95689..f07d550a96 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -276,6 +276,9 @@ static int qcow2_open(BlockDriverState *bs, int flags)
goto fail;
}
+ /* Initialise locks */
+ qemu_co_mutex_init(&s->lock);
+
#ifdef DEBUG_ALLOC
qcow2_check_refcounts(bs);
#endif
@@ -379,7 +382,6 @@ typedef struct QCowAIOCB {
uint64_t cluster_offset;
uint8_t *cluster_data;
bool is_write;
- BlockDriverAIOCB *hd_aiocb;
QEMUIOVector hd_qiov;
QEMUBH *bh;
QCowL2Meta l2meta;
@@ -389,8 +391,6 @@ typedef struct QCowAIOCB {
static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
{
QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
- if (acb->hd_aiocb)
- bdrv_aio_cancel(acb->hd_aiocb);
qemu_aio_release(acb);
}
@@ -399,46 +399,16 @@ static AIOPool qcow2_aio_pool = {
.cancel = qcow2_aio_cancel,
};
-static void qcow2_aio_read_cb(void *opaque, int ret);
-static void qcow2_aio_write_cb(void *opaque, int ret);
-
-static void qcow2_aio_rw_bh(void *opaque)
-{
- QCowAIOCB *acb = opaque;
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
-
- if (acb->is_write) {
- qcow2_aio_write_cb(opaque, 0);
- } else {
- qcow2_aio_read_cb(opaque, 0);
- }
-}
-
-static int qcow2_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
-{
- if (acb->bh)
- return -EIO;
-
- acb->bh = qemu_bh_new(cb, acb);
- if (!acb->bh)
- return -EIO;
-
- qemu_bh_schedule(acb->bh);
-
- return 0;
-}
-
-static void qcow2_aio_read_cb(void *opaque, int ret)
+/*
+ * Returns 0 when the request is completed successfully, 1 when there is still
+ * a part left to do and -errno in error cases.
+ */
+static int qcow2_aio_read_cb(QCowAIOCB *acb)
{
- QCowAIOCB *acb = opaque;
BlockDriverState *bs = acb->common.bs;
BDRVQcowState *s = bs->opaque;
int index_in_cluster, n1;
-
- acb->hd_aiocb = NULL;
- if (ret < 0)
- goto done;
+ int ret;
/* post process the read buffer */
if (!acb->cluster_offset) {
@@ -463,8 +433,7 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
if (acb->remaining_sectors == 0) {
/* request completed */
- ret = 0;
- goto done;
+ return 0;
}
/* prepare next AIO request */
@@ -477,7 +446,7 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
&acb->cur_nr_sectors, &acb->cluster_offset);
if (ret < 0) {
- goto done;
+ return ret;
}
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -494,42 +463,35 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
acb->sector_num, acb->cur_nr_sectors);
if (n1 > 0) {
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
- &acb->hd_qiov, n1, qcow2_aio_read_cb, acb);
- if (acb->hd_aiocb == NULL) {
- ret = -EIO;
- goto done;
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, acb->sector_num,
+ n1, &acb->hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ return ret;
}
- } else {
- ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
- if (ret < 0)
- goto done;
}
+ return 1;
} else {
/* Note: in this case, no need to wait */
qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors);
- ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
- if (ret < 0)
- goto done;
+ return 1;
}
} else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* add AIO support for compressed blocks ? */
ret = qcow2_decompress_cluster(bs, acb->cluster_offset);
if (ret < 0) {
- goto done;
+ return ret;
}
qemu_iovec_from_buffer(&acb->hd_qiov,
s->cluster_cache + index_in_cluster * 512,
512 * acb->cur_nr_sectors);
- ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
- if (ret < 0)
- goto done;
+ return 1;
} else {
if ((acb->cluster_offset & 511) != 0) {
- ret = -EIO;
- goto done;
+ return -EIO;
}
if (s->crypt_method) {
@@ -550,21 +512,17 @@ static void qcow2_aio_read_cb(void *opaque, int ret)
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- acb->hd_aiocb = bdrv_aio_readv(bs->file,
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster,
- &acb->hd_qiov, acb->cur_nr_sectors,
- qcow2_aio_read_cb, acb);
- if (acb->hd_aiocb == NULL) {
- ret = -EIO;
- goto done;
+ acb->cur_nr_sectors, &acb->hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ return ret;
}
}
- return;
-done:
- acb->common.cb(acb->common.opaque, ret);
- qemu_iovec_destroy(&acb->hd_qiov);
- qemu_aio_release(acb);
+ return 1;
}
static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
@@ -577,7 +535,6 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
acb = qemu_aio_get(&qcow2_aio_pool, bs, cb, opaque);
if (!acb)
return NULL;
- acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
acb->qiov = qiov;
acb->is_write = is_write;
@@ -589,70 +546,65 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
acb->cur_nr_sectors = 0;
acb->cluster_offset = 0;
acb->l2meta.nb_clusters = 0;
- QLIST_INIT(&acb->l2meta.dependent_requests);
+ qemu_co_queue_init(&acb->l2meta.dependent_requests);
return acb;
}
-static BlockDriverAIOCB *qcow2_aio_readv(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
+static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
{
+ BDRVQcowState *s = bs->opaque;
QCowAIOCB *acb;
int ret;
- acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
- if (!acb)
- return NULL;
+ acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 0);
- ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
- if (ret < 0) {
- qemu_iovec_destroy(&acb->hd_qiov);
- qemu_aio_release(acb);
- return NULL;
- }
+ qemu_co_mutex_lock(&s->lock);
+ do {
+ ret = qcow2_aio_read_cb(acb);
+ } while (ret > 0);
+ qemu_co_mutex_unlock(&s->lock);
- return &acb->common;
+ qemu_iovec_destroy(&acb->hd_qiov);
+ qemu_aio_release(acb);
+
+ return ret;
}
-static void run_dependent_requests(QCowL2Meta *m)
+static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m)
{
- QCowAIOCB *req;
- QCowAIOCB *next;
-
/* Take the request off the list of running requests */
if (m->nb_clusters != 0) {
QLIST_REMOVE(m, next_in_flight);
}
/* Restart all dependent requests */
- QLIST_FOREACH_SAFE(req, &m->dependent_requests, next_depend, next) {
- qcow2_aio_write_cb(req, 0);
+ if (!qemu_co_queue_empty(&m->dependent_requests)) {
+ qemu_co_mutex_unlock(&s->lock);
+ while(qemu_co_queue_next(&m->dependent_requests));
+ qemu_co_mutex_lock(&s->lock);
}
-
- /* Empty the list for the next part of the request */
- QLIST_INIT(&m->dependent_requests);
}
-static void qcow2_aio_write_cb(void *opaque, int ret)
+/*
+ * Returns 0 when the request is completed successfully, 1 when there is still
+ * a part left to do and -errno in error cases.
+ */
+static int qcow2_aio_write_cb(QCowAIOCB *acb)
{
- QCowAIOCB *acb = opaque;
BlockDriverState *bs = acb->common.bs;
BDRVQcowState *s = bs->opaque;
int index_in_cluster;
int n_end;
+ int ret;
- acb->hd_aiocb = NULL;
-
- if (ret >= 0) {
- ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta);
- }
+ ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta);
- run_dependent_requests(&acb->l2meta);
+ run_dependent_requests(s, &acb->l2meta);
- if (ret < 0)
- goto done;
+ if (ret < 0) {
+ return ret;
+ }
acb->remaining_sectors -= acb->cur_nr_sectors;
acb->sector_num += acb->cur_nr_sectors;
@@ -660,8 +612,7 @@ static void qcow2_aio_write_cb(void *opaque, int ret)
if (acb->remaining_sectors == 0) {
/* request completed */
- ret = 0;
- goto done;
+ return 0;
}
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -673,18 +624,10 @@ static void qcow2_aio_write_cb(void *opaque, int ret)
ret = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9,
index_in_cluster, n_end, &acb->cur_nr_sectors, &acb->l2meta);
if (ret < 0) {
- goto done;
+ return ret;
}
acb->cluster_offset = acb->l2meta.cluster_offset;
-
- /* Need to wait for another request? If so, we are done for now. */
- if (acb->l2meta.nb_clusters == 0 && acb->l2meta.depends_on != NULL) {
- QLIST_INSERT_HEAD(&acb->l2meta.depends_on->dependent_requests,
- acb, next_depend);
- return;
- }
-
assert((acb->cluster_offset & 511) == 0);
qemu_iovec_reset(&acb->hd_qiov);
@@ -709,51 +652,40 @@ static void qcow2_aio_write_cb(void *opaque, int ret)
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- acb->hd_aiocb = bdrv_aio_writev(bs->file,
- (acb->cluster_offset >> 9) + index_in_cluster,
- &acb->hd_qiov, acb->cur_nr_sectors,
- qcow2_aio_write_cb, acb);
- if (acb->hd_aiocb == NULL) {
- ret = -EIO;
- goto fail;
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_writev(bs->file,
+ (acb->cluster_offset >> 9) + index_in_cluster,
+ acb->cur_nr_sectors, &acb->hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ return ret;
}
- return;
-
-fail:
- if (acb->l2meta.nb_clusters != 0) {
- QLIST_REMOVE(&acb->l2meta, next_in_flight);
- }
-done:
- acb->common.cb(acb->common.opaque, ret);
- qemu_iovec_destroy(&acb->hd_qiov);
- qemu_aio_release(acb);
+ return 1;
}
-static BlockDriverAIOCB *qcow2_aio_writev(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
+static int qcow2_co_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors,
+ QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
QCowAIOCB *acb;
int ret;
+ acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 1);
s->cluster_cache_offset = -1; /* disable compressed cache */
- acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
- if (!acb)
- return NULL;
+ qemu_co_mutex_lock(&s->lock);
+ do {
+ ret = qcow2_aio_write_cb(acb);
+ } while (ret > 0);
+ qemu_co_mutex_unlock(&s->lock);
- ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb);
- if (ret < 0) {
- qemu_iovec_destroy(&acb->hd_qiov);
- qemu_aio_release(acb);
- return NULL;
- }
+ qemu_iovec_destroy(&acb->hd_qiov);
+ qemu_aio_release(acb);
- return &acb->common;
+ return ret;
}
static void qcow2_close(BlockDriverState *bs)
@@ -881,7 +813,7 @@ static int preallocate(BlockDriverState *bs)
nb_sectors = bdrv_getlength(bs) >> 9;
offset = 0;
- QLIST_INIT(&meta.dependent_requests);
+ qemu_co_queue_init(&meta.dependent_requests);
meta.cluster_offset = 0;
while (nb_sectors) {
@@ -899,7 +831,7 @@ static int preallocate(BlockDriverState *bs)
/* There are no dependent requests, but we need to remove our request
* from the list of in-flight requests */
- run_dependent_requests(&meta);
+ run_dependent_requests(bs->opaque, &meta);
/* TODO Preallocate data if requested */
@@ -1387,8 +1319,8 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_set_key = qcow2_set_key,
.bdrv_make_empty = qcow2_make_empty,
- .bdrv_aio_readv = qcow2_aio_readv,
- .bdrv_aio_writev = qcow2_aio_writev,
+ .bdrv_co_readv = qcow2_co_readv,
+ .bdrv_co_writev = qcow2_co_writev,
.bdrv_aio_flush = qcow2_aio_flush,
.bdrv_discard = qcow2_discard,
diff --git a/block/qcow2.h b/block/qcow2.h
index 6a0a21b694..de23abe1a4 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -26,6 +26,7 @@
#define BLOCK_QCOW2_H
#include "aes.h"
+#include "qemu-coroutine.h"
//#define DEBUG_ALLOC
//#define DEBUG_ALLOC2
@@ -114,6 +115,8 @@ typedef struct BDRVQcowState {
int64_t free_cluster_index;
int64_t free_byte_offset;
+ CoMutex lock;
+
uint32_t crypt_method; /* current crypt method, 0 if no key yet */
uint32_t crypt_method_header;
AES_KEY aes_encrypt_key;
@@ -146,7 +149,7 @@ typedef struct QCowL2Meta
int nb_available;
int nb_clusters;
struct QCowL2Meta *depends_on;
- QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
+ CoQueue dependent_requests;
QLIST_ENTRY(QCowL2Meta) next_in_flight;
} QCowL2Meta;
diff --git a/block/qed-table.c b/block/qed-table.c
index d38c673547..d96afa81d7 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -179,16 +179,12 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
{
int ret = -EINPROGRESS;
- async_context_push();
-
qed_read_table(s, s->header.l1_table_offset,
s->l1_table, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
qemu_aio_wait();
}
- async_context_pop();
-
return ret;
}
@@ -205,15 +201,11 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
{
int ret = -EINPROGRESS;
- async_context_push();
-
qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
qemu_aio_wait();
}
- async_context_pop();
-
return ret;
}
@@ -282,14 +274,11 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
{
int ret = -EINPROGRESS;
- async_context_push();
-
qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
qemu_aio_wait();
}
- async_context_pop();
return ret;
}
@@ -307,13 +296,10 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
{
int ret = -EINPROGRESS;
- async_context_push();
-
qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
qemu_aio_wait();
}
- async_context_pop();
return ret;
}
diff --git a/block/qed.c b/block/qed.c
index 39703793e9..333f067582 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -680,16 +680,12 @@ static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
};
QEDRequest request = { .l2_table = NULL };
- async_context_push();
-
qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
while (cb.is_allocated == -1) {
qemu_aio_wait();
}
- async_context_pop();
-
qed_unref_l2_cache_entry(request.l2_table);
return cb.is_allocated;
diff --git a/block/raw-posix.c b/block/raw-posix.c
index cd89c8312a..c5c99446c0 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -230,13 +230,15 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
}
}
+ /* We're falling back to POSIX AIO in some cases so init always */
+ if (paio_init() < 0) {
+ goto out_free_buf;
+ }
+
#ifdef CONFIG_LINUX_AIO
if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
(BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
- /* We're falling back to POSIX AIO in some cases */
- paio_init();
-
s->aio_ctx = laio_init();
if (!s->aio_ctx) {
goto out_free_buf;
@@ -245,9 +247,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
} else
#endif
{
- if (paio_init() < 0) {
- goto out_free_buf;
- }
#ifdef CONFIG_LINUX_AIO
s->use_aio = 0;
#endif
@@ -587,7 +586,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
/*
* If O_DIRECT is used the buffer needs to be aligned on a sector
- * boundary. Check if this is the case or telll the low-level
+ * boundary. Check if this is the case or tell the low-level
* driver that it needs to copy the buffer.
*/
if (s->aligned_buf) {
@@ -1254,7 +1253,7 @@ static int floppy_media_changed(BlockDriverState *bs)
return ret;
}
-static int floppy_eject(BlockDriverState *bs, int eject_flag)
+static void floppy_eject(BlockDriverState *bs, int eject_flag)
{
BDRVRawState *s = bs->opaque;
int fd;
@@ -1269,8 +1268,6 @@ static int floppy_eject(BlockDriverState *bs, int eject_flag)
perror("FDEJECT");
close(fd);
}
-
- return 0;
}
static BlockDriver bdrv_host_floppy = {
@@ -1348,7 +1345,7 @@ static int cdrom_is_inserted(BlockDriverState *bs)
return 0;
}
-static int cdrom_eject(BlockDriverState *bs, int eject_flag)
+static void cdrom_eject(BlockDriverState *bs, int eject_flag)
{
BDRVRawState *s = bs->opaque;
@@ -1359,11 +1356,9 @@ static int cdrom_eject(BlockDriverState *bs, int eject_flag)
if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
perror("CDROMEJECT");
}
-
- return 0;
}
-static int cdrom_set_locked(BlockDriverState *bs, int locked)
+static void cdrom_set_locked(BlockDriverState *bs, int locked)
{
BDRVRawState *s = bs->opaque;
@@ -1374,8 +1369,6 @@ static int cdrom_set_locked(BlockDriverState *bs, int locked)
*/
/* perror("CDROM_LOCKDOOR"); */
}
-
- return 0;
}
static BlockDriver bdrv_host_cdrom = {
@@ -1464,12 +1457,12 @@ static int cdrom_is_inserted(BlockDriverState *bs)
return raw_getlength(bs) > 0;
}
-static int cdrom_eject(BlockDriverState *bs, int eject_flag)
+static void cdrom_eject(BlockDriverState *bs, int eject_flag)
{
BDRVRawState *s = bs->opaque;
if (s->fd < 0)
- return -ENOTSUP;
+ return;
(void) ioctl(s->fd, CDIOCALLOW);
@@ -1481,17 +1474,15 @@ static int cdrom_eject(BlockDriverState *bs, int eject_flag)
perror("CDIOCCLOSE");
}
- if (cdrom_reopen(bs) < 0)
- return -ENOTSUP;
- return 0;
+ cdrom_reopen(bs);
}
-static int cdrom_set_locked(BlockDriverState *bs, int locked)
+static void cdrom_set_locked(BlockDriverState *bs, int locked)
{
BDRVRawState *s = bs->opaque;
if (s->fd < 0)
- return -ENOTSUP;
+ return;
if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
/*
* Note: an error can happen if the distribution automatically
@@ -1499,8 +1490,6 @@ static int cdrom_set_locked(BlockDriverState *bs, int locked)
*/
/* perror("CDROM_LOCKDOOR"); */
}
-
- return 0;
}
static BlockDriver bdrv_host_cdrom = {
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 91067e7595..e47cfe0f4a 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -393,41 +393,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
return 0;
}
-#if 0
-/***********************************************/
-/* removable device additional commands */
-
-static int raw_is_inserted(BlockDriverState *bs)
-{
- return 1;
-}
-
-static int raw_media_changed(BlockDriverState *bs)
-{
- return -ENOTSUP;
-}
-
-static int raw_eject(BlockDriverState *bs, int eject_flag)
-{
- DWORD ret_count;
-
- if (s->type == FTYPE_FILE)
- return -ENOTSUP;
- if (eject_flag) {
- DeviceIoControl(s->hfile, IOCTL_STORAGE_EJECT_MEDIA,
- NULL, 0, NULL, 0, &lpBytesReturned, NULL);
- } else {
- DeviceIoControl(s->hfile, IOCTL_STORAGE_LOAD_MEDIA,
- NULL, 0, NULL, 0, &lpBytesReturned, NULL);
- }
-}
-
-static int raw_set_locked(BlockDriverState *bs, int locked)
-{
- return -ENOTSUP;
-}
-#endif
-
static int hdev_has_zero_init(BlockDriverState *bs)
{
return 0;
diff --git a/block/raw.c b/block/raw.c
index b0f72d6a62..cb6203eeca 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -75,15 +75,14 @@ static int raw_is_inserted(BlockDriverState *bs)
return bdrv_is_inserted(bs->file);
}
-static int raw_eject(BlockDriverState *bs, int eject_flag)
+static void raw_eject(BlockDriverState *bs, int eject_flag)
{
- return bdrv_eject(bs->file, eject_flag);
+ bdrv_eject(bs->file, eject_flag);
}
-static int raw_set_locked(BlockDriverState *bs, int locked)
+static void raw_set_locked(BlockDriverState *bs, int locked)
{
bdrv_set_locked(bs->file, locked);
- return 0;
}
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
diff --git a/block/vpc.c b/block/vpc.c
index 56865da5bc..fdd5236892 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -156,6 +156,7 @@ static int vpc_open(BlockDriverState *bs, int flags)
struct vhd_dyndisk_header* dyndisk_header;
uint8_t buf[HEADER_SIZE];
uint32_t checksum;
+ int err = -1;
if (bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE)
goto fail;
@@ -176,6 +177,11 @@ static int vpc_open(BlockDriverState *bs, int flags)
bs->total_sectors = (int64_t)
be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
+ if (bs->total_sectors >= 65535 * 16 * 255) {
+ err = -EFBIG;
+ goto fail;
+ }
+
if (bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE)
!= HEADER_SIZE)
goto fail;
@@ -222,7 +228,7 @@ static int vpc_open(BlockDriverState *bs, int flags)
return 0;
fail:
- return -1;
+ return err;
}
/*
diff --git a/block_int.h b/block_int.h
index efb68038c4..f6d02b38a7 100644
--- a/block_int.h
+++ b/block_int.h
@@ -27,6 +27,7 @@
#include "block.h"
#include "qemu-option.h"
#include "qemu-queue.h"
+#include "qemu-coroutine.h"
#define BLOCK_FLAG_ENCRYPT 1
#define BLOCK_FLAG_COMPAT6 4
@@ -77,6 +78,11 @@ struct BlockDriver {
int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num,
int nb_sectors);
+ int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+
int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs,
int num_reqs);
int (*bdrv_merge_requests)(BlockDriverState *bs, BlockRequest* a,
@@ -112,8 +118,8 @@ struct BlockDriver {
/* removable device specific */
int (*bdrv_is_inserted)(BlockDriverState *bs);
int (*bdrv_media_changed)(BlockDriverState *bs);
- int (*bdrv_eject)(BlockDriverState *bs, int eject_flag);
- int (*bdrv_set_locked)(BlockDriverState *bs, int locked);
+ void (*bdrv_eject)(BlockDriverState *bs, int eject_flag);
+ void (*bdrv_set_locked)(BlockDriverState *bs, int locked);
/* to control generic scsi devices */
int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
diff --git a/blockdev.c b/blockdev.c
index 0b8d3a4f83..a25367a9e3 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -646,16 +646,13 @@ out:
static int eject_device(Monitor *mon, BlockDriverState *bs, int force)
{
- if (!force) {
- if (!bdrv_is_removable(bs)) {
- qerror_report(QERR_DEVICE_NOT_REMOVABLE,
- bdrv_get_device_name(bs));
- return -1;
- }
- if (bdrv_is_locked(bs)) {
- qerror_report(QERR_DEVICE_LOCKED, bdrv_get_device_name(bs));
- return -1;
- }
+ if (!bdrv_is_removable(bs)) {
+ qerror_report(QERR_DEVICE_NOT_REMOVABLE, bdrv_get_device_name(bs));
+ return -1;
+ }
+ if (!force && bdrv_is_locked(bs)) {
+ qerror_report(QERR_DEVICE_LOCKED, bdrv_get_device_name(bs));
+ return -1;
}
bdrv_close(bs);
return 0;
diff --git a/configure b/configure
index e41fa0f20e..27f1fa9266 100755
--- a/configure
+++ b/configure
@@ -2557,6 +2557,20 @@ EOF
fi
##########################################
+# check if we have makecontext
+
+ucontext_coroutine=no
+if test "$darwin" != "yes"; then
+ cat > $TMPC << EOF
+#include <ucontext.h>
+int main(void) { makecontext(0, 0, 0); }
+EOF
+ if compile_prog "" "" ; then
+ ucontext_coroutine=yes
+ fi
+fi
+
+##########################################
# End of CC checks
# After here, no more $cc or $ld runs
@@ -3034,6 +3048,10 @@ if test "$rbd" = "yes" ; then
echo "CONFIG_RBD=y" >> $config_host_mak
fi
+if test "$ucontext_coroutine" = "yes" ; then
+ echo "CONFIG_UCONTEXT_COROUTINE=y" >> $config_host_mak
+fi
+
# USB host support
case "$usb" in
linux)
diff --git a/coroutine-gthread.c b/coroutine-gthread.c
new file mode 100644
index 0000000000..f09877e14f
--- /dev/null
+++ b/coroutine-gthread.c
@@ -0,0 +1,131 @@
+/*
+ * GThread coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <glib.h>
+#include "qemu-common.h"
+#include "qemu-coroutine-int.h"
+
+typedef struct {
+ Coroutine base;
+ GThread *thread;
+ bool runnable;
+ CoroutineAction action;
+} CoroutineGThread;
+
+static GCond *coroutine_cond;
+static GStaticMutex coroutine_lock = G_STATIC_MUTEX_INIT;
+static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT;
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+ if (!g_thread_supported()) {
+ g_thread_init(NULL);
+ }
+
+ coroutine_cond = g_cond_new();
+}
+
+static void coroutine_wait_runnable_locked(CoroutineGThread *co)
+{
+ while (!co->runnable) {
+ g_cond_wait(coroutine_cond, g_static_mutex_get_mutex(&coroutine_lock));
+ }
+}
+
+static void coroutine_wait_runnable(CoroutineGThread *co)
+{
+ g_static_mutex_lock(&coroutine_lock);
+ coroutine_wait_runnable_locked(co);
+ g_static_mutex_unlock(&coroutine_lock);
+}
+
+static gpointer coroutine_thread(gpointer opaque)
+{
+ CoroutineGThread *co = opaque;
+
+ g_static_private_set(&coroutine_key, co, NULL);
+ coroutine_wait_runnable(co);
+ co->base.entry(co->base.entry_arg);
+ qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE);
+ return NULL;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ CoroutineGThread *co;
+
+ co = qemu_mallocz(sizeof(*co));
+ co->thread = g_thread_create_full(coroutine_thread, co, 0, TRUE, TRUE,
+ G_THREAD_PRIORITY_NORMAL, NULL);
+ if (!co->thread) {
+ qemu_free(co);
+ return NULL;
+ }
+ return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_);
+
+ g_thread_join(co->thread);
+ qemu_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_,
+ Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_);
+ CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_);
+
+ g_static_mutex_lock(&coroutine_lock);
+ from->runnable = false;
+ from->action = action;
+ to->runnable = true;
+ to->action = action;
+ g_cond_broadcast(coroutine_cond);
+
+ if (action != COROUTINE_TERMINATE) {
+ coroutine_wait_runnable_locked(from);
+ }
+ g_static_mutex_unlock(&coroutine_lock);
+ return from->action;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ CoroutineGThread *co = g_static_private_get(&coroutine_key);
+
+ if (!co) {
+ co = qemu_mallocz(sizeof(*co));
+ co->runnable = true;
+ g_static_private_set(&coroutine_key, co, (GDestroyNotify)qemu_free);
+ }
+
+ return &co->base;
+}
+
+bool qemu_in_coroutine(void)
+{
+ CoroutineGThread *co = g_static_private_get(&coroutine_key);
+
+ return co && co->base.caller;
+}
diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c
new file mode 100644
index 0000000000..41c2379a2a
--- /dev/null
+++ b/coroutine-ucontext.c
@@ -0,0 +1,230 @@
+/*
+ * ucontext coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <ucontext.h>
+#include "qemu-common.h"
+#include "qemu-coroutine-int.h"
+
+enum {
+ /* Maximum free pool size prevents holding too many freed coroutines */
+ POOL_MAX_SIZE = 64,
+};
+
+typedef struct {
+ Coroutine base;
+ void *stack;
+ jmp_buf env;
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+ /** Currently executing coroutine */
+ Coroutine *current;
+
+ /** Free list to speed up creation */
+ QLIST_HEAD(, Coroutine) pool;
+ unsigned int pool_size;
+
+ /** The default coroutine */
+ CoroutineUContext leader;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+/*
+ * va_args to makecontext() must be type 'int', so passing
+ * the pointer we need may require several int args. This
+ * union is a quick hack to let us do that
+ */
+union cc_arg {
+ void *p;
+ int i[2];
+};
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ if (!s) {
+ s = qemu_mallocz(sizeof(*s));
+ s->current = &s->leader.base;
+ QLIST_INIT(&s->pool);
+ pthread_setspecific(thread_state_key, s);
+ }
+ return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+ CoroutineThreadState *s = opaque;
+ Coroutine *co;
+ Coroutine *tmp;
+
+ QLIST_FOREACH_SAFE(co, &s->pool, pool_next, tmp) {
+ qemu_free(DO_UPCAST(CoroutineUContext, base, co)->stack);
+ qemu_free(co);
+ }
+ qemu_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+ int ret;
+
+ ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+ if (ret != 0) {
+ fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+ abort();
+ }
+}
+
+static void coroutine_trampoline(int i0, int i1)
+{
+ union cc_arg arg;
+ CoroutineUContext *self;
+ Coroutine *co;
+
+ arg.i[0] = i0;
+ arg.i[1] = i1;
+ self = arg.p;
+ co = &self->base;
+
+ /* Initialize longjmp environment and switch back the caller */
+ if (!setjmp(self->env)) {
+ longjmp(*(jmp_buf *)co->entry_arg, 1);
+ }
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+static Coroutine *coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineUContext *co;
+ ucontext_t old_uc, uc;
+ jmp_buf old_env;
+ union cc_arg arg;
+
+ /* The ucontext functions preserve signal masks which incurs a system call
+ * overhead. setjmp()/longjmp() does not preserve signal masks but only
+ * works on the current stack. Since we need a way to create and switch to
+ * a new stack, use the ucontext functions for that but setjmp()/longjmp()
+ * for everything else.
+ */
+
+ if (getcontext(&uc) == -1) {
+ abort();
+ }
+
+ co = qemu_mallocz(sizeof(*co));
+ co->stack = qemu_malloc(stack_size);
+ co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+ uc.uc_link = &old_uc;
+ uc.uc_stack.ss_sp = co->stack;
+ uc.uc_stack.ss_size = stack_size;
+ uc.uc_stack.ss_flags = 0;
+
+ arg.p = co;
+
+ makecontext(&uc, (void (*)(void))coroutine_trampoline,
+ 2, arg.i[0], arg.i[1]);
+
+ /* swapcontext() in, longjmp() back out */
+ if (!setjmp(old_env)) {
+ swapcontext(&old_uc, &uc);
+ }
+ return &co->base;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ CoroutineThreadState *s = coroutine_get_thread_state();
+ Coroutine *co;
+
+ co = QLIST_FIRST(&s->pool);
+ if (co) {
+ QLIST_REMOVE(co, pool_next);
+ s->pool_size--;
+ } else {
+ co = coroutine_new();
+ }
+ return co;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineThreadState *s = coroutine_get_thread_state();
+ CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+ if (s->pool_size < POOL_MAX_SIZE) {
+ QLIST_INSERT_HEAD(&s->pool, &co->base, pool_next);
+ co->base.caller = NULL;
+ s->pool_size++;
+ return;
+ }
+
+ qemu_free(co->stack);
+ qemu_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+ CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+ CoroutineThreadState *s = coroutine_get_thread_state();
+ int ret;
+
+ s->current = to_;
+
+ ret = setjmp(from->env);
+ if (ret == 0) {
+ longjmp(to->env, action);
+ }
+ return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ CoroutineThreadState *s = coroutine_get_thread_state();
+
+ return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ return s && s->current->caller;
+}
diff --git a/coroutine-win32.c b/coroutine-win32.c
new file mode 100644
index 0000000000..0e29448473
--- /dev/null
+++ b/coroutine-win32.c
@@ -0,0 +1,92 @@
+/*
+ * Win32 coroutine initialization code
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu-coroutine-int.h"
+
+typedef struct
+{
+ Coroutine base;
+
+ LPVOID fiber;
+ CoroutineAction action;
+} CoroutineWin32;
+
+static __thread CoroutineWin32 leader;
+static __thread Coroutine *current;
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
+ CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
+
+ current = to_;
+
+ to->action = action;
+ SwitchToFiber(to->fiber);
+ return from->action;
+}
+
+static void CALLBACK coroutine_trampoline(void *co_)
+{
+ Coroutine *co = co_;
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineWin32 *co;
+
+ co = qemu_mallocz(sizeof(*co));
+ co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base);
+ return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_);
+
+ DeleteFiber(co->fiber);
+ qemu_free(co);
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ if (!current) {
+ current = &leader.base;
+ leader.fiber = ConvertThreadToFiber(NULL);
+ }
+ return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ return current && current->caller;
+}
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 8b1a412210..0b0344c1fd 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -223,7 +223,7 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd)
switch(cmd[0]) {
case TEST_UNIT_READY:
- case REZERO_UNIT:
+ case REWIND:
case START_STOP:
case SEEK_6:
case WRITE_FILEMARKS:
@@ -232,24 +232,24 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd)
case RELEASE:
case ERASE:
case ALLOW_MEDIUM_REMOVAL:
- case VERIFY:
+ case VERIFY_10:
case SEEK_10:
case SYNCHRONIZE_CACHE:
case LOCK_UNLOCK_CACHE:
case LOAD_UNLOAD:
case SET_CD_SPEED:
case SET_LIMITS:
- case WRITE_LONG:
+ case WRITE_LONG_10:
case MOVE_MEDIUM:
case UPDATE_BLOCK:
req->cmd.xfer = 0;
break;
case MODE_SENSE:
break;
- case WRITE_SAME:
+ case WRITE_SAME_10:
req->cmd.xfer = 1;
break;
- case READ_CAPACITY:
+ case READ_CAPACITY_10:
req->cmd.xfer = 8;
break;
case READ_BLOCK_LIMITS:
@@ -265,7 +265,7 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd)
req->cmd.xfer *= 8;
break;
case WRITE_10:
- case WRITE_VERIFY:
+ case WRITE_VERIFY_10:
case WRITE_6:
case WRITE_12:
case WRITE_VERIFY_12:
@@ -325,7 +325,7 @@ static void scsi_req_xfer_mode(SCSIRequest *req)
switch (req->cmd.buf[0]) {
case WRITE_6:
case WRITE_10:
- case WRITE_VERIFY:
+ case WRITE_VERIFY_10:
case WRITE_12:
case WRITE_VERIFY_12:
case WRITE_16:
@@ -345,15 +345,13 @@ static void scsi_req_xfer_mode(SCSIRequest *req)
case SEARCH_HIGH:
case SEARCH_LOW:
case UPDATE_BLOCK:
- case WRITE_LONG:
- case WRITE_SAME:
+ case WRITE_LONG_10:
+ case WRITE_SAME_10:
case SEARCH_HIGH_12:
case SEARCH_EQUAL_12:
case SEARCH_LOW_12:
- case SET_WINDOW:
case MEDIUM_SCAN:
case SEND_VOLUME_TAG:
- case WRITE_LONG_2:
case PERSISTENT_RESERVE_OUT:
case MAINTENANCE_OUT:
req->cmd.mode = SCSI_XFER_TO_DEV;
@@ -517,8 +515,7 @@ static const char *scsi_command_name(uint8_t cmd)
{
static const char *names[] = {
[ TEST_UNIT_READY ] = "TEST_UNIT_READY",
- [ REZERO_UNIT ] = "REZERO_UNIT",
- /* REWIND and REZERO_UNIT use the same operation code */
+ [ REWIND ] = "REWIND",
[ REQUEST_SENSE ] = "REQUEST_SENSE",
[ FORMAT_UNIT ] = "FORMAT_UNIT",
[ READ_BLOCK_LIMITS ] = "READ_BLOCK_LIMITS",
@@ -543,14 +540,12 @@ static const char *scsi_command_name(uint8_t cmd)
[ RECEIVE_DIAGNOSTIC ] = "RECEIVE_DIAGNOSTIC",
[ SEND_DIAGNOSTIC ] = "SEND_DIAGNOSTIC",
[ ALLOW_MEDIUM_REMOVAL ] = "ALLOW_MEDIUM_REMOVAL",
-
- [ SET_WINDOW ] = "SET_WINDOW",
- [ READ_CAPACITY ] = "READ_CAPACITY",
+ [ READ_CAPACITY_10 ] = "READ_CAPACITY_10",
[ READ_10 ] = "READ_10",
[ WRITE_10 ] = "WRITE_10",
[ SEEK_10 ] = "SEEK_10",
- [ WRITE_VERIFY ] = "WRITE_VERIFY",
- [ VERIFY ] = "VERIFY",
+ [ WRITE_VERIFY_10 ] = "WRITE_VERIFY_10",
+ [ VERIFY_10 ] = "VERIFY_10",
[ SEARCH_HIGH ] = "SEARCH_HIGH",
[ SEARCH_EQUAL ] = "SEARCH_EQUAL",
[ SEARCH_LOW ] = "SEARCH_LOW",
@@ -566,11 +561,14 @@ static const char *scsi_command_name(uint8_t cmd)
[ WRITE_BUFFER ] = "WRITE_BUFFER",
[ READ_BUFFER ] = "READ_BUFFER",
[ UPDATE_BLOCK ] = "UPDATE_BLOCK",
- [ READ_LONG ] = "READ_LONG",
- [ WRITE_LONG ] = "WRITE_LONG",
+ [ READ_LONG_10 ] = "READ_LONG_10",
+ [ WRITE_LONG_10 ] = "WRITE_LONG_10",
[ CHANGE_DEFINITION ] = "CHANGE_DEFINITION",
- [ WRITE_SAME ] = "WRITE_SAME",
+ [ WRITE_SAME_10 ] = "WRITE_SAME_10",
+ [ UNMAP ] = "UNMAP",
[ READ_TOC ] = "READ_TOC",
+ [ REPORT_DENSITY_SUPPORT ] = "REPORT_DENSITY_SUPPORT",
+ [ GET_CONFIGURATION ] = "GET_CONFIGURATION",
[ LOG_SELECT ] = "LOG_SELECT",
[ LOG_SENSE ] = "LOG_SENSE",
[ MODE_SELECT_10 ] = "MODE_SELECT_10",
@@ -579,27 +577,39 @@ static const char *scsi_command_name(uint8_t cmd)
[ MODE_SENSE_10 ] = "MODE_SENSE_10",
[ PERSISTENT_RESERVE_IN ] = "PERSISTENT_RESERVE_IN",
[ PERSISTENT_RESERVE_OUT ] = "PERSISTENT_RESERVE_OUT",
+ [ WRITE_FILEMARKS_16 ] = "WRITE_FILEMARKS_16",
+ [ EXTENDED_COPY ] = "EXTENDED_COPY",
+ [ ATA_PASSTHROUGH ] = "ATA_PASSTHROUGH",
+ [ ACCESS_CONTROL_IN ] = "ACCESS_CONTROL_IN",
+ [ ACCESS_CONTROL_OUT ] = "ACCESS_CONTROL_OUT",
+ [ READ_16 ] = "READ_16",
+ [ COMPARE_AND_WRITE ] = "COMPARE_AND_WRITE",
+ [ WRITE_16 ] = "WRITE_16",
+ [ WRITE_VERIFY_16 ] = "WRITE_VERIFY_16",
+ [ VERIFY_16 ] = "VERIFY_16",
+ [ SYNCHRONIZE_CACHE_16 ] = "SYNCHRONIZE_CACHE_16",
+ [ LOCATE_16 ] = "LOCATE_16",
+ [ WRITE_SAME_16 ] = "WRITE_SAME_16",
+ [ ERASE_16 ] = "ERASE_16",
+ [ SERVICE_ACTION_IN ] = "SERVICE_ACTION_IN",
+ [ WRITE_LONG_16 ] = "WRITE_LONG_16",
+ [ REPORT_LUNS ] = "REPORT_LUNS",
+ [ BLANK ] = "BLANK",
+ [ MAINTENANCE_IN ] = "MAINTENANCE_IN",
+ [ MAINTENANCE_OUT ] = "MAINTENANCE_OUT",
[ MOVE_MEDIUM ] = "MOVE_MEDIUM",
+ [ LOAD_UNLOAD ] = "LOAD_UNLOAD",
[ READ_12 ] = "READ_12",
[ WRITE_12 ] = "WRITE_12",
[ WRITE_VERIFY_12 ] = "WRITE_VERIFY_12",
+ [ VERIFY_12 ] = "VERIFY_12",
[ SEARCH_HIGH_12 ] = "SEARCH_HIGH_12",
[ SEARCH_EQUAL_12 ] = "SEARCH_EQUAL_12",
[ SEARCH_LOW_12 ] = "SEARCH_LOW_12",
[ READ_ELEMENT_STATUS ] = "READ_ELEMENT_STATUS",
[ SEND_VOLUME_TAG ] = "SEND_VOLUME_TAG",
- [ WRITE_LONG_2 ] = "WRITE_LONG_2",
-
- [ REPORT_DENSITY_SUPPORT ] = "REPORT_DENSITY_SUPPORT",
- [ GET_CONFIGURATION ] = "GET_CONFIGURATION",
- [ READ_16 ] = "READ_16",
- [ WRITE_16 ] = "WRITE_16",
- [ WRITE_VERIFY_16 ] = "WRITE_VERIFY_16",
- [ SERVICE_ACTION_IN ] = "SERVICE_ACTION_IN",
- [ REPORT_LUNS ] = "REPORT_LUNS",
- [ LOAD_UNLOAD ] = "LOAD_UNLOAD",
+ [ READ_DEFECT_DATA_12 ] = "READ_DEFECT_DATA_12",
[ SET_CD_SPEED ] = "SET_CD_SPEED",
- [ BLANK ] = "BLANK",
};
if (cmd >= ARRAY_SIZE(names) || names[cmd] == NULL)
diff --git a/hw/scsi-defs.h b/hw/scsi-defs.h
index 413cce07b5..27010b74c0 100644
--- a/hw/scsi-defs.h
+++ b/hw/scsi-defs.h
@@ -25,7 +25,7 @@
*/
#define TEST_UNIT_READY 0x00
-#define REZERO_UNIT 0x01
+#define REWIND 0x01
#define REQUEST_SENSE 0x03
#define FORMAT_UNIT 0x04
#define READ_BLOCK_LIMITS 0x05
@@ -48,14 +48,13 @@
#define RECEIVE_DIAGNOSTIC 0x1c
#define SEND_DIAGNOSTIC 0x1d
#define ALLOW_MEDIUM_REMOVAL 0x1e
-
-#define SET_WINDOW 0x24
-#define READ_CAPACITY 0x25
+#define READ_CAPACITY_10 0x25
#define READ_10 0x28
#define WRITE_10 0x2a
#define SEEK_10 0x2b
-#define WRITE_VERIFY 0x2e
-#define VERIFY 0x2f
+#define LOCATE_10 0x2b
+#define WRITE_VERIFY_10 0x2e
+#define VERIFY_10 0x2f
#define SEARCH_HIGH 0x30
#define SEARCH_EQUAL 0x31
#define SEARCH_LOW 0x32
@@ -71,11 +70,14 @@
#define WRITE_BUFFER 0x3b
#define READ_BUFFER 0x3c
#define UPDATE_BLOCK 0x3d
-#define READ_LONG 0x3e
-#define WRITE_LONG 0x3f
+#define READ_LONG_10 0x3e
+#define WRITE_LONG_10 0x3f
#define CHANGE_DEFINITION 0x40
-#define WRITE_SAME 0x41
+#define WRITE_SAME_10 0x41
+#define UNMAP 0x42
#define READ_TOC 0x43
+#define REPORT_DENSITY_SUPPORT 0x44
+#define GET_CONFIGURATION 0x46
#define LOG_SELECT 0x4c
#define LOG_SENSE 0x4d
#define MODE_SELECT_10 0x55
@@ -84,32 +86,40 @@
#define MODE_SENSE_10 0x5a
#define PERSISTENT_RESERVE_IN 0x5e
#define PERSISTENT_RESERVE_OUT 0x5f
+#define VARLENGTH_CDB 0x7f
+#define WRITE_FILEMARKS_16 0x80
+#define EXTENDED_COPY 0x83
+#define ATA_PASSTHROUGH 0x85
+#define ACCESS_CONTROL_IN 0x86
+#define ACCESS_CONTROL_OUT 0x87
+#define READ_16 0x88
+#define COMPARE_AND_WRITE 0x89
+#define WRITE_16 0x8a
+#define WRITE_VERIFY_16 0x8e
+#define VERIFY_16 0x8f
+#define SYNCHRONIZE_CACHE_16 0x91
+#define LOCATE_16 0x92
#define WRITE_SAME_16 0x93
+#define ERASE_16 0x93
+#define SERVICE_ACTION_IN 0x9e
+#define WRITE_LONG_16 0x9f
+#define REPORT_LUNS 0xa0
+#define BLANK 0xa1
#define MAINTENANCE_IN 0xa3
#define MAINTENANCE_OUT 0xa4
#define MOVE_MEDIUM 0xa5
+#define LOAD_UNLOAD 0xa6
#define READ_12 0xa8
#define WRITE_12 0xaa
#define WRITE_VERIFY_12 0xae
+#define VERIFY_12 0xaf
#define SEARCH_HIGH_12 0xb0
#define SEARCH_EQUAL_12 0xb1
#define SEARCH_LOW_12 0xb2
#define READ_ELEMENT_STATUS 0xb8
#define SEND_VOLUME_TAG 0xb6
-#define WRITE_LONG_2 0xea
-
-/* from hw/scsi-generic.c */
-#define REWIND 0x01
-#define REPORT_DENSITY_SUPPORT 0x44
-#define GET_CONFIGURATION 0x46
-#define READ_16 0x88
-#define WRITE_16 0x8a
-#define WRITE_VERIFY_16 0x8e
-#define SERVICE_ACTION_IN 0x9e
-#define REPORT_LUNS 0xa0
-#define LOAD_UNLOAD 0xa6
-#define SET_CD_SPEED 0xbb
-#define BLANK 0xa1
+#define READ_DEFECT_DATA_12 0xb7
+#define SET_CD_SPEED 0xbb
/*
* SAM Status codes
@@ -154,6 +164,7 @@
#define TYPE_DISK 0x00
#define TYPE_TAPE 0x01
+#define TYPE_PRINTER 0x02
#define TYPE_PROCESSOR 0x03 /* HP scanners use this */
#define TYPE_WORM 0x04 /* Treated as ROM by our system */
#define TYPE_ROM 0x05
@@ -161,6 +172,9 @@
#define TYPE_MOD 0x07 /* Magneto-optical disk -
* - treated as TYPE_DISK */
#define TYPE_MEDIUM_CHANGER 0x08
-#define TYPE_ENCLOSURE 0x0d /* Enclosure Services Device */
+#define TYPE_STORAGE_ARRAY 0x0c /* Storage array device */
+#define TYPE_ENCLOSURE 0x0d /* Enclosure Services Device */
+#define TYPE_RBC 0x0e /* Simplified Direct-Access Device */
+#define TYPE_OSD 0x11 /* Object-storage Device */
#define TYPE_NO_LUN 0x7f
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index f42a5d1f85..fa198f928c 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -59,8 +59,6 @@ typedef struct SCSIDiskReq {
uint32_t status;
} SCSIDiskReq;
-typedef enum { SCSI_HD, SCSI_CD } SCSIDriveKind;
-
struct SCSIDiskState
{
SCSIDevice qdev;
@@ -74,7 +72,6 @@ struct SCSIDiskState
char *version;
char *serial;
SCSISense sense;
- SCSIDriveKind drive_kind;
};
static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type);
@@ -382,7 +379,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
return -1;
}
- if (s->drive_kind == SCSI_CD) {
+ if (s->qdev.type == TYPE_ROM) {
outbuf[buflen++] = 5;
} else {
outbuf[buflen++] = 0;
@@ -401,7 +398,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
if (s->serial)
outbuf[buflen++] = 0x80; // unit serial number
outbuf[buflen++] = 0x83; // device identification
- if (s->drive_kind == SCSI_HD) {
+ if (s->qdev.type == TYPE_DISK) {
outbuf[buflen++] = 0xb0; // block limits
outbuf[buflen++] = 0xb2; // thin provisioning
}
@@ -460,7 +457,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
unsigned int opt_io_size =
s->qdev.conf.opt_io_size / s->qdev.blocksize;
- if (s->drive_kind == SCSI_CD) {
+ if (s->qdev.type == TYPE_ROM) {
DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n",
page_code);
return -1;
@@ -526,16 +523,15 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
memset(outbuf, 0, buflen);
if (req->lun) {
- outbuf[0] = 0x7f; /* LUN not supported */
+ outbuf[0] = 0x7f; /* LUN not supported */
return buflen;
}
- if (s->drive_kind == SCSI_CD) {
- outbuf[0] = 5;
+ outbuf[0] = s->qdev.type & 0x1f;
+ if (s->qdev.type == TYPE_ROM) {
outbuf[1] = 0x80;
memcpy(&outbuf[16], "QEMU CD-ROM ", 16);
} else {
- outbuf[0] = 0;
outbuf[1] = s->removable ? 0x80 : 0;
memcpy(&outbuf[16], "QEMU HARDDISK ", 16);
}
@@ -661,7 +657,7 @@ static int mode_sense_page(SCSIRequest *req, int page, uint8_t *p,
return p[1] + 2;
case 0x2a: /* CD Capabilities and Mechanical Status page. */
- if (s->drive_kind != SCSI_CD)
+ if (s->qdev.type != TYPE_ROM)
return 0;
p[0] = 0x2a;
p[1] = 0x14;
@@ -836,7 +832,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
case TEST_UNIT_READY:
if (!bdrv_is_inserted(s->bs))
goto not_ready;
- break;
+ break;
case REQUEST_SENSE:
if (req->cmd.xfer < 4)
goto illegal_request;
@@ -848,7 +844,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
buflen = scsi_disk_emulate_inquiry(req, outbuf);
if (buflen < 0)
goto illegal_request;
- break;
+ break;
case MODE_SENSE:
case MODE_SENSE_10:
buflen = scsi_disk_emulate_mode_sense(req, outbuf);
@@ -877,18 +873,18 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
goto illegal_request;
break;
case START_STOP:
- if (s->drive_kind == SCSI_CD && (req->cmd.buf[4] & 2)) {
+ if (s->qdev.type == TYPE_ROM && (req->cmd.buf[4] & 2)) {
/* load/eject medium */
bdrv_eject(s->bs, !(req->cmd.buf[4] & 1));
}
- break;
+ break;
case ALLOW_MEDIUM_REMOVAL:
bdrv_set_locked(s->bs, req->cmd.buf[4] & 1);
- break;
- case READ_CAPACITY:
+ break;
+ case READ_CAPACITY_10:
/* The normal LEN field for this command is zero. */
- memset(outbuf, 0, 8);
- bdrv_get_geometry(s->bs, &nb_sectors);
+ memset(outbuf, 0, 8);
+ bdrv_get_geometry(s->bs, &nb_sectors);
if (!nb_sectors)
goto not_ready;
nb_sectors /= s->cluster_size;
@@ -908,7 +904,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
outbuf[6] = s->cluster_size * 2;
outbuf[7] = 0;
buflen = 8;
- break;
+ break;
case SYNCHRONIZE_CACHE:
ret = bdrv_flush(s->bs);
if (ret < 0) {
@@ -970,13 +966,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
outbuf[3] = 8;
buflen = 16;
break;
- case VERIFY:
- break;
- case REZERO_UNIT:
- DPRINTF("Rezero Unit\n");
- if (!bdrv_is_inserted(s->bs)) {
- goto not_ready;
- }
+ case VERIFY_10:
break;
default:
scsi_command_complete(r, CHECK_CONDITION, SENSE_CODE(INVALID_OPCODE));
@@ -1052,14 +1042,13 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
case RELEASE_10:
case START_STOP:
case ALLOW_MEDIUM_REMOVAL:
- case READ_CAPACITY:
+ case READ_CAPACITY_10:
case SYNCHRONIZE_CACHE:
case READ_TOC:
case GET_CONFIGURATION:
case SERVICE_ACTION_IN:
case REPORT_LUNS:
- case VERIFY:
- case REZERO_UNIT:
+ case VERIFY_10:
rc = scsi_disk_emulate_command(r, outbuf);
if (rc < 0) {
return 0;
@@ -1082,7 +1071,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
case WRITE_10:
case WRITE_12:
case WRITE_16:
- case WRITE_VERIFY:
+ case WRITE_VERIFY_10:
case WRITE_VERIFY_12:
case WRITE_VERIFY_16:
len = r->req.cmd.xfer / s->qdev.blocksize;
@@ -1190,7 +1179,7 @@ static void scsi_destroy(SCSIDevice *dev)
blockdev_mark_auto_del(s->qdev.conf.bs);
}
-static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind)
+static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type)
{
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
DriveInfo *dinfo;
@@ -1200,9 +1189,8 @@ static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind)
return -1;
}
s->bs = s->qdev.conf.bs;
- s->drive_kind = kind;
- if (kind == SCSI_HD && !bdrv_is_inserted(s->bs)) {
+ if (scsi_type == TYPE_DISK && !bdrv_is_inserted(s->bs)) {
error_report("Device needs media, but drive is empty");
return -1;
}
@@ -1224,44 +1212,47 @@ static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind)
return -1;
}
- if (kind == SCSI_CD) {
+ if (scsi_type == TYPE_ROM) {
s->qdev.blocksize = 2048;
- } else {
+ } else if (scsi_type == TYPE_DISK) {
s->qdev.blocksize = s->qdev.conf.logical_block_size;
+ } else {
+ error_report("scsi-disk: Unhandled SCSI type %02x", scsi_type);
+ return -1;
}
s->cluster_size = s->qdev.blocksize / 512;
s->bs->buffer_alignment = s->qdev.blocksize;
- s->qdev.type = TYPE_DISK;
+ s->qdev.type = scsi_type;
qemu_add_vm_change_state_handler(scsi_dma_restart_cb, s);
- bdrv_set_removable(s->bs, kind == SCSI_CD);
+ bdrv_set_removable(s->bs, scsi_type == TYPE_ROM);
add_boot_device_path(s->qdev.conf.bootindex, &dev->qdev, ",0");
return 0;
}
static int scsi_hd_initfn(SCSIDevice *dev)
{
- return scsi_initfn(dev, SCSI_HD);
+ return scsi_initfn(dev, TYPE_DISK);
}
static int scsi_cd_initfn(SCSIDevice *dev)
{
- return scsi_initfn(dev, SCSI_CD);
+ return scsi_initfn(dev, TYPE_ROM);
}
static int scsi_disk_initfn(SCSIDevice *dev)
{
- SCSIDriveKind kind;
DriveInfo *dinfo;
+ uint8_t scsi_type;
if (!dev->conf.bs) {
- kind = SCSI_HD; /* will die in scsi_initfn() */
+ scsi_type = TYPE_DISK; /* will die in scsi_initfn() */
} else {
dinfo = drive_get_by_blockdev(dev->conf.bs);
- kind = dinfo->media_cd ? SCSI_CD : SCSI_HD;
+ scsi_type = dinfo->media_cd ? TYPE_ROM : TYPE_DISK;
}
- return scsi_initfn(dev, kind);
+ return scsi_initfn(dev, scsi_type);
}
#define DEFINE_SCSI_DISK_PROPERTIES() \
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index 63361b3542..7b0026eb98 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -406,7 +406,7 @@ static int get_blocksize(BlockDriverState *bdrv)
memset(cmd, 0, sizeof(cmd));
memset(buf, 0, sizeof(buf));
- cmd[0] = READ_CAPACITY;
+ cmd[0] = READ_CAPACITY_10;
memset(&io_header, 0, sizeof(io_header));
io_header.interface_id = 'S';
diff --git a/linux-aio.c b/linux-aio.c
index 68f4b3d757..dc3faf2499 100644
--- a/linux-aio.c
+++ b/linux-aio.c
@@ -31,7 +31,6 @@ struct qemu_laiocb {
struct iocb iocb;
ssize_t ret;
size_t nbytes;
- int async_context_id;
QLIST_ENTRY(qemu_laiocb) node;
};
@@ -39,7 +38,6 @@ struct qemu_laio_state {
io_context_t ctx;
int efd;
int count;
- QLIST_HEAD(, qemu_laiocb) completed_reqs;
};
static inline ssize_t io_event_ret(struct io_event *ev)
@@ -49,7 +47,6 @@ static inline ssize_t io_event_ret(struct io_event *ev)
/*
* Completes an AIO request (calls the callback and frees the ACB).
- * Be sure to be in the right AsyncContext before calling this function.
*/
static void qemu_laio_process_completion(struct qemu_laio_state *s,
struct qemu_laiocb *laiocb)
@@ -72,42 +69,12 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
}
/*
- * Processes all queued AIO requests, i.e. requests that have return from OS
- * but their callback was not called yet. Requests that cannot have their
- * callback called in the current AsyncContext, remain in the queue.
- *
- * Returns 1 if at least one request could be completed, 0 otherwise.
+ * All requests are directly processed when they complete, so there's nothing
+ * left to do during qemu_aio_wait().
*/
static int qemu_laio_process_requests(void *opaque)
{
- struct qemu_laio_state *s = opaque;
- struct qemu_laiocb *laiocb, *next;
- int res = 0;
-
- QLIST_FOREACH_SAFE (laiocb, &s->completed_reqs, node, next) {
- if (laiocb->async_context_id == get_async_context_id()) {
- qemu_laio_process_completion(s, laiocb);
- QLIST_REMOVE(laiocb, node);
- res = 1;
- }
- }
-
- return res;
-}
-
-/*
- * Puts a request in the completion queue so that its callback is called the
- * next time when it's possible. If we already are in the right AsyncContext,
- * the request is completed immediately instead.
- */
-static void qemu_laio_enqueue_completed(struct qemu_laio_state *s,
- struct qemu_laiocb* laiocb)
-{
- if (laiocb->async_context_id == get_async_context_id()) {
- qemu_laio_process_completion(s, laiocb);
- } else {
- QLIST_INSERT_HEAD(&s->completed_reqs, laiocb, node);
- }
+ return 0;
}
static void qemu_laio_completion_cb(void *opaque)
@@ -141,7 +108,7 @@ static void qemu_laio_completion_cb(void *opaque)
container_of(iocb, struct qemu_laiocb, iocb);
laiocb->ret = io_event_ret(&events[i]);
- qemu_laio_enqueue_completed(s, laiocb);
+ qemu_laio_process_completion(s, laiocb);
}
}
}
@@ -204,7 +171,6 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
laiocb->nbytes = nb_sectors * 512;
laiocb->ctx = s;
laiocb->ret = -EINPROGRESS;
- laiocb->async_context_id = get_async_context_id();
iocbs = &laiocb->iocb;
@@ -239,7 +205,6 @@ void *laio_init(void)
struct qemu_laio_state *s;
s = qemu_mallocz(sizeof(*s));
- QLIST_INIT(&s->completed_reqs);
s->efd = eventfd(0, 0);
if (s->efd == -1)
goto out_free_state;
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index c4116e30f2..8dc00cbb0f 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -49,8 +49,6 @@ struct qemu_paiocb {
ssize_t ret;
int active;
struct qemu_paiocb *next;
-
- int async_context_id;
};
typedef struct PosixAioState {
@@ -200,6 +198,12 @@ static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
return len;
}
+/*
+ * Read/writes the data to/from a given linear buffer.
+ *
+ * Returns the number of bytes handles or -errno in case of an error. Short
+ * reads are only returned if the end of the file is reached.
+ */
static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
{
ssize_t offset = 0;
@@ -336,6 +340,19 @@ static void *aio_thread(void *unused)
switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
case QEMU_AIO_READ:
+ ret = handle_aiocb_rw(aiocb);
+ if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
+ /* A short read means that we have reached EOF. Pad the buffer
+ * with zeros for bytes after EOF. */
+ QEMUIOVector qiov;
+
+ qemu_iovec_init_external(&qiov, aiocb->aio_iov,
+ aiocb->aio_niov);
+ qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - ret, ret);
+
+ ret = aiocb->aio_nbytes;
+ }
+ break;
case QEMU_AIO_WRITE:
ret = handle_aiocb_rw(aiocb);
break;
@@ -420,7 +437,6 @@ static int posix_aio_process_queue(void *opaque)
struct qemu_paiocb *acb, **pacb;
int ret;
int result = 0;
- int async_context_id = get_async_context_id();
for(;;) {
pacb = &s->first_aio;
@@ -429,12 +445,6 @@ static int posix_aio_process_queue(void *opaque)
if (!acb)
return result;
- /* we're only interested in requests in the right context */
- if (acb->async_context_id != async_context_id) {
- pacb = &acb->next;
- continue;
- }
-
ret = qemu_paio_error(acb);
if (ret == ECANCELED) {
/* remove the request */
@@ -575,7 +585,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
acb->ev_signo = SIGUSR2;
- acb->async_context_id = get_async_context_id();
if (qiov) {
acb->aio_iov = qiov->iov;
@@ -604,7 +613,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
acb->aio_type = QEMU_AIO_IOCTL;
acb->aio_fildes = fd;
acb->ev_signo = SIGUSR2;
- acb->async_context_id = get_async_context_id();
acb->aio_offset = 0;
acb->aio_ioctl_buf = buf;
acb->aio_ioctl_cmd = req;
diff --git a/qemu-common.h b/qemu-common.h
index 1e3c66511e..8f21a8cb29 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -115,10 +115,6 @@ int qemu_main(int argc, char **argv, char **envp);
/* bottom halves */
typedef void QEMUBHFunc(void *opaque);
-void async_context_push(void);
-void async_context_pop(void);
-int get_async_context_id(void);
-
QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
void qemu_bh_schedule(QEMUBH *bh);
/* Bottom halfs that are scheduled from a bottom half handler are instantly
diff --git a/qemu-coroutine-int.h b/qemu-coroutine-int.h
new file mode 100644
index 0000000000..d495615cf6
--- /dev/null
+++ b/qemu-coroutine-int.h
@@ -0,0 +1,49 @@
+/*
+ * Coroutine internals
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_COROUTINE_INT_H
+#define QEMU_COROUTINE_INT_H
+
+#include "qemu-queue.h"
+#include "qemu-coroutine.h"
+
+typedef enum {
+ COROUTINE_YIELD = 1,
+ COROUTINE_TERMINATE = 2,
+} CoroutineAction;
+
+struct Coroutine {
+ CoroutineEntry *entry;
+ void *entry_arg;
+ Coroutine *caller;
+ QLIST_ENTRY(Coroutine) pool_next;
+ QTAILQ_ENTRY(Coroutine) co_queue_next;
+};
+
+Coroutine *qemu_coroutine_new(void);
+void qemu_coroutine_delete(Coroutine *co);
+CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
+ CoroutineAction action);
+
+#endif
diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c
new file mode 100644
index 0000000000..a80f437c59
--- /dev/null
+++ b/qemu-coroutine-lock.c
@@ -0,0 +1,117 @@
+/*
+ * coroutine queues and locks
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu-coroutine.h"
+#include "qemu-coroutine-int.h"
+#include "qemu-queue.h"
+#include "trace.h"
+
+static QTAILQ_HEAD(, Coroutine) unlock_bh_queue =
+ QTAILQ_HEAD_INITIALIZER(unlock_bh_queue);
+static QEMUBH* unlock_bh;
+
+static void qemu_co_queue_next_bh(void *opaque)
+{
+ Coroutine *next;
+
+ trace_qemu_co_queue_next_bh();
+ while ((next = QTAILQ_FIRST(&unlock_bh_queue))) {
+ QTAILQ_REMOVE(&unlock_bh_queue, next, co_queue_next);
+ qemu_coroutine_enter(next, NULL);
+ }
+}
+
+void qemu_co_queue_init(CoQueue *queue)
+{
+ QTAILQ_INIT(&queue->entries);
+
+ if (!unlock_bh) {
+ unlock_bh = qemu_bh_new(qemu_co_queue_next_bh, NULL);
+ }
+}
+
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
+{
+ Coroutine *self = qemu_coroutine_self();
+ QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+ qemu_coroutine_yield();
+ assert(qemu_in_coroutine());
+}
+
+bool qemu_co_queue_next(CoQueue *queue)
+{
+ Coroutine *next;
+
+ next = QTAILQ_FIRST(&queue->entries);
+ if (next) {
+ QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+ QTAILQ_INSERT_TAIL(&unlock_bh_queue, next, co_queue_next);
+ trace_qemu_co_queue_next(next);
+ qemu_bh_schedule(unlock_bh);
+ }
+
+ return (next != NULL);
+}
+
+bool qemu_co_queue_empty(CoQueue *queue)
+{
+ return (QTAILQ_FIRST(&queue->entries) == NULL);
+}
+
+void qemu_co_mutex_init(CoMutex *mutex)
+{
+ memset(mutex, 0, sizeof(*mutex));
+ qemu_co_queue_init(&mutex->queue);
+}
+
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_co_mutex_lock_entry(mutex, self);
+
+ while (mutex->locked) {
+ qemu_co_queue_wait(&mutex->queue);
+ }
+
+ mutex->locked = true;
+
+ trace_qemu_co_mutex_lock_return(mutex, self);
+}
+
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_co_mutex_unlock_entry(mutex, self);
+
+ assert(mutex->locked == true);
+ assert(qemu_in_coroutine());
+
+ mutex->locked = false;
+ qemu_co_queue_next(&mutex->queue);
+
+ trace_qemu_co_mutex_unlock_return(mutex, self);
+}
diff --git a/qemu-coroutine.c b/qemu-coroutine.c
new file mode 100644
index 0000000000..600be2643c
--- /dev/null
+++ b/qemu-coroutine.c
@@ -0,0 +1,75 @@
+/*
+ * QEMU coroutines
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu-common.h"
+#include "qemu-coroutine.h"
+#include "qemu-coroutine-int.h"
+
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
+{
+ Coroutine *co = qemu_coroutine_new();
+ co->entry = entry;
+ return co;
+}
+
+static void coroutine_swap(Coroutine *from, Coroutine *to)
+{
+ CoroutineAction ret;
+
+ ret = qemu_coroutine_switch(from, to, COROUTINE_YIELD);
+
+ switch (ret) {
+ case COROUTINE_YIELD:
+ return;
+ case COROUTINE_TERMINATE:
+ trace_qemu_coroutine_terminate(to);
+ qemu_coroutine_delete(to);
+ return;
+ default:
+ abort();
+ }
+}
+
+void qemu_coroutine_enter(Coroutine *co, void *opaque)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_coroutine_enter(self, co, opaque);
+
+ if (co->caller) {
+ fprintf(stderr, "Co-routine re-entered recursively\n");
+ abort();
+ }
+
+ co->caller = self;
+ co->entry_arg = opaque;
+ coroutine_swap(self, co);
+}
+
+void coroutine_fn qemu_coroutine_yield(void)
+{
+ Coroutine *self = qemu_coroutine_self();
+ Coroutine *to = self->caller;
+
+ trace_qemu_coroutine_yield(self, to);
+
+ if (!to) {
+ fprintf(stderr, "Co-routine is yielding to no one\n");
+ abort();
+ }
+
+ self->caller = NULL;
+ coroutine_swap(self, to);
+}
diff --git a/qemu-coroutine.h b/qemu-coroutine.h
new file mode 100644
index 0000000000..2f2fd95552
--- /dev/null
+++ b/qemu-coroutine.h
@@ -0,0 +1,159 @@
+/*
+ * QEMU coroutine implementation
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_COROUTINE_H
+#define QEMU_COROUTINE_H
+
+#include <stdbool.h>
+#include "qemu-queue.h"
+
+/**
+ * Coroutines are a mechanism for stack switching and can be used for
+ * cooperative userspace threading. These functions provide a simple but
+ * useful flavor of coroutines that is suitable for writing sequential code,
+ * rather than callbacks, for operations that need to give up control while
+ * waiting for events to complete.
+ *
+ * These functions are re-entrant and may be used outside the global mutex.
+ */
+
+/**
+ * Mark a function that executes in coroutine context
+ *
+ * Functions that execute in coroutine context cannot be called directly from
+ * normal functions. In the future it would be nice to enable compiler or
+ * static checker support for catching such errors. This annotation might make
+ * it possible and in the meantime it serves as documentation.
+ *
+ * For example:
+ *
+ * static void coroutine_fn foo(void) {
+ * ....
+ * }
+ */
+#define coroutine_fn
+
+typedef struct Coroutine Coroutine;
+
+/**
+ * Coroutine entry point
+ *
+ * When the coroutine is entered for the first time, opaque is passed in as an
+ * argument.
+ *
+ * When this function returns, the coroutine is destroyed automatically and
+ * execution continues in the caller who last entered the coroutine.
+ */
+typedef void coroutine_fn CoroutineEntry(void *opaque);
+
+/**
+ * Create a new coroutine
+ *
+ * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
+ */
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry);
+
+/**
+ * Transfer control to a coroutine
+ *
+ * The opaque argument is passed as the argument to the entry point when
+ * entering the coroutine for the first time. It is subsequently ignored.
+ */
+void qemu_coroutine_enter(Coroutine *coroutine, void *opaque);
+
+/**
+ * Transfer control back to a coroutine's caller
+ *
+ * This function does not return until the coroutine is re-entered using
+ * qemu_coroutine_enter().
+ */
+void coroutine_fn qemu_coroutine_yield(void);
+
+/**
+ * Get the currently executing coroutine
+ */
+Coroutine *coroutine_fn qemu_coroutine_self(void);
+
+/**
+ * Return whether or not currently inside a coroutine
+ *
+ * This can be used to write functions that work both when in coroutine context
+ * and when not in coroutine context. Note that such functions cannot use the
+ * coroutine_fn annotation since they work outside coroutine context.
+ */
+bool qemu_in_coroutine(void);
+
+
+
+/**
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
+ * them later. They provide the fundamental primitives on which coroutine locks
+ * are built.
+ */
+typedef struct CoQueue {
+ QTAILQ_HEAD(, Coroutine) entries;
+} CoQueue;
+
+/**
+ * Initialise a CoQueue. This must be called before any other operation is used
+ * on the CoQueue.
+ */
+void qemu_co_queue_init(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
+
+/**
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
+ *
+ * Returns true if a coroutine was restarted, false if the queue is empty.
+ */
+bool qemu_co_queue_next(CoQueue *queue);
+
+/**
+ * Checks if the CoQueue is empty.
+ */
+bool qemu_co_queue_empty(CoQueue *queue);
+
+
+/**
+ * Provides a mutex that can be used to synchronise coroutines
+ */
+typedef struct CoMutex {
+ bool locked;
+ CoQueue queue;
+} CoMutex;
+
+/**
+ * Initialises a CoMutex. This must be called before any other operation is used
+ * on the CoMutex.
+ */
+void qemu_co_mutex_init(CoMutex *mutex);
+
+/**
+ * Locks the mutex. If the lock cannot be taken immediately, control is
+ * transferred to the caller of the current coroutine.
+ */
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
+
+/**
+ * Unlocks the mutex and schedules the next coroutine that was waiting for this
+ * lock to be run.
+ */
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
+
+#endif /* QEMU_COROUTINE_H */
diff --git a/test-coroutine.c b/test-coroutine.c
new file mode 100644
index 0000000000..bf9f3e91b5
--- /dev/null
+++ b/test-coroutine.c
@@ -0,0 +1,192 @@
+/*
+ * Coroutine tests
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include <glib.h>
+#include "qemu-coroutine.h"
+
+/*
+ * Check that qemu_in_coroutine() works
+ */
+
+static void coroutine_fn verify_in_coroutine(void *opaque)
+{
+ g_assert(qemu_in_coroutine());
+}
+
+static void test_in_coroutine(void)
+{
+ Coroutine *coroutine;
+
+ g_assert(!qemu_in_coroutine());
+
+ coroutine = qemu_coroutine_create(verify_in_coroutine);
+ qemu_coroutine_enter(coroutine, NULL);
+}
+
+/*
+ * Check that qemu_coroutine_self() works
+ */
+
+static void coroutine_fn verify_self(void *opaque)
+{
+ g_assert(qemu_coroutine_self() == opaque);
+}
+
+static void test_self(void)
+{
+ Coroutine *coroutine;
+
+ coroutine = qemu_coroutine_create(verify_self);
+ qemu_coroutine_enter(coroutine, coroutine);
+}
+
+/*
+ * Check that coroutines may nest multiple levels
+ */
+
+typedef struct {
+ unsigned int n_enter; /* num coroutines entered */
+ unsigned int n_return; /* num coroutines returned */
+ unsigned int max; /* maximum level of nesting */
+} NestData;
+
+static void coroutine_fn nest(void *opaque)
+{
+ NestData *nd = opaque;
+
+ nd->n_enter++;
+
+ if (nd->n_enter < nd->max) {
+ Coroutine *child;
+
+ child = qemu_coroutine_create(nest);
+ qemu_coroutine_enter(child, nd);
+ }
+
+ nd->n_return++;
+}
+
+static void test_nesting(void)
+{
+ Coroutine *root;
+ NestData nd = {
+ .n_enter = 0,
+ .n_return = 0,
+ .max = 128,
+ };
+
+ root = qemu_coroutine_create(nest);
+ qemu_coroutine_enter(root, &nd);
+
+ /* Must enter and return from max nesting level */
+ g_assert_cmpint(nd.n_enter, ==, nd.max);
+ g_assert_cmpint(nd.n_return, ==, nd.max);
+}
+
+/*
+ * Check that yield/enter transfer control correctly
+ */
+
+static void coroutine_fn yield_5_times(void *opaque)
+{
+ bool *done = opaque;
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ qemu_coroutine_yield();
+ }
+ *done = true;
+}
+
+static void test_yield(void)
+{
+ Coroutine *coroutine;
+ bool done = false;
+ int i = -1; /* one extra time to return from coroutine */
+
+ coroutine = qemu_coroutine_create(yield_5_times);
+ while (!done) {
+ qemu_coroutine_enter(coroutine, &done);
+ i++;
+ }
+ g_assert_cmpint(i, ==, 5); /* coroutine must yield 5 times */
+}
+
+/*
+ * Check that creation, enter, and return work
+ */
+
+static void coroutine_fn set_and_exit(void *opaque)
+{
+ bool *done = opaque;
+
+ *done = true;
+}
+
+static void test_lifecycle(void)
+{
+ Coroutine *coroutine;
+ bool done = false;
+
+ /* Create, enter, and return from coroutine */
+ coroutine = qemu_coroutine_create(set_and_exit);
+ qemu_coroutine_enter(coroutine, &done);
+ g_assert(done); /* expect done to be true (first time) */
+
+ /* Repeat to check that no state affects this test */
+ done = false;
+ coroutine = qemu_coroutine_create(set_and_exit);
+ qemu_coroutine_enter(coroutine, &done);
+ g_assert(done); /* expect done to be true (second time) */
+}
+
+/*
+ * Lifecycle benchmark
+ */
+
+static void coroutine_fn empty_coroutine(void *opaque)
+{
+ /* Do nothing */
+}
+
+static void perf_lifecycle(void)
+{
+ Coroutine *coroutine;
+ unsigned int i, max;
+ double duration;
+
+ max = 1000000;
+
+ g_test_timer_start();
+ for (i = 0; i < max; i++) {
+ coroutine = qemu_coroutine_create(empty_coroutine);
+ qemu_coroutine_enter(coroutine, NULL);
+ }
+ duration = g_test_timer_elapsed();
+
+ g_test_message("Lifecycle %u iterations: %f s\n", max, duration);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+ g_test_add_func("/basic/lifecycle", test_lifecycle);
+ g_test_add_func("/basic/yield", test_yield);
+ g_test_add_func("/basic/nesting", test_nesting);
+ g_test_add_func("/basic/self", test_self);
+ g_test_add_func("/basic/in_coroutine", test_in_coroutine);
+ if (g_test_perf()) {
+ g_test_add_func("/perf/lifecycle", perf_lifecycle);
+ }
+ return g_test_run();
+}
diff --git a/trace-events b/trace-events
index 713f042081..19d31e3541 100644
--- a/trace-events
+++ b/trace-events
@@ -66,6 +66,9 @@ disable bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
disable bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
disable bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
disable bdrv_set_locked(void *bs, int locked) "bs %p locked %d"
+disable bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+disable bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+disable bdrv_co_io(int is_write, void *acb) "is_write %d acb %p"
# hw/virtio-blk.c
disable virtio_blk_req_complete(void *req, int status) "req %p status %d"
@@ -425,3 +428,16 @@ disable qemu_put_ram_ptr(void* addr) "%p"
# hw/xen_platform.c
disable xen_platform_log(char *s) "xen platform: %s"
+
+# qemu-coroutine.c
+disable qemu_coroutine_enter(void *from, void *to, void *opaque) "from %p to %p opaque %p"
+disable qemu_coroutine_yield(void *from, void *to) "from %p to %p"
+disable qemu_coroutine_terminate(void *co) "self %p"
+
+# qemu-coroutine-lock.c
+disable qemu_co_queue_next_bh(void) ""
+disable qemu_co_queue_next(void *next) "next %p"
+disable qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
+disable qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
+disable qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
+disable qemu_co_mutex_unlock_return(void *mutex, void *self) "mutex %p self %p"