diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | Makefile.objs | 11 | ||||
-rw-r--r-- | async.c | 98 | ||||
-rw-r--r-- | block.c | 310 | ||||
-rw-r--r-- | block.h | 7 | ||||
-rw-r--r-- | block/qcow.c | 180 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 26 | ||||
-rw-r--r-- | block/qcow2.c | 240 | ||||
-rw-r--r-- | block/qcow2.h | 5 | ||||
-rw-r--r-- | block/qed-table.c | 14 | ||||
-rw-r--r-- | block/qed.c | 4 | ||||
-rw-r--r-- | block/raw-posix.c | 39 | ||||
-rw-r--r-- | block/raw-win32.c | 35 | ||||
-rw-r--r-- | block/raw.c | 7 | ||||
-rw-r--r-- | block/vpc.c | 8 | ||||
-rw-r--r-- | block_int.h | 10 | ||||
-rw-r--r-- | blockdev.c | 17 | ||||
-rwxr-xr-x | configure | 18 | ||||
-rw-r--r-- | coroutine-gthread.c | 131 | ||||
-rw-r--r-- | coroutine-ucontext.c | 230 | ||||
-rw-r--r-- | coroutine-win32.c | 92 | ||||
-rw-r--r-- | hw/scsi-bus.c | 74 | ||||
-rw-r--r-- | hw/scsi-defs.h | 62 | ||||
-rw-r--r-- | hw/scsi-disk.c | 79 | ||||
-rw-r--r-- | hw/scsi-generic.c | 2 | ||||
-rw-r--r-- | linux-aio.c | 43 | ||||
-rw-r--r-- | posix-aio-compat.c | 30 | ||||
-rw-r--r-- | qemu-common.h | 4 | ||||
-rw-r--r-- | qemu-coroutine-int.h | 49 | ||||
-rw-r--r-- | qemu-coroutine-lock.c | 117 | ||||
-rw-r--r-- | qemu-coroutine.c | 75 | ||||
-rw-r--r-- | qemu-coroutine.h | 159 | ||||
-rw-r--r-- | test-coroutine.c | 192 | ||||
-rw-r--r-- | trace-events | 16 |
35 files changed, 1721 insertions, 667 deletions
diff --git a/.gitignore b/.gitignore index 54835bcb97..59c343c414 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ qemu-io qemu-ga qemu-monitor.texi QMP/qmp-commands.txt +test-coroutine .gdbinit *.a *.aux @@ -151,7 +151,7 @@ qemu-io$(EXESUF): qemu-io.o cmd.o qemu-tool.o qemu-error.o $(oslib-obj-y) $(trac qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@," GEN $@") -check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o: $(GENERATED_HEADERS) +check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o test-coroutine.o: $(GENERATED_HEADERS) CHECK_PROG_DEPS = qemu-malloc.o $(oslib-obj-y) $(trace-obj-y) qemu-tool.o @@ -161,6 +161,7 @@ check-qdict: check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qlist.o $(C check-qlist: check-qlist.o qlist.o qint.o $(CHECK_PROG_DEPS) check-qfloat: check-qfloat.o qfloat.o $(CHECK_PROG_DEPS) check-qjson: check-qjson.o qfloat.o qint.o qdict.o qstring.o qlist.o qbool.o qjson.o json-streamer.o json-lexer.o json-parser.o error.o qerror.o qemu-error.o $(CHECK_PROG_DEPS) +test-coroutine: test-coroutine.o qemu-timer-common.o async.o $(coroutine-obj-y) $(CHECK_PROG_DEPS) $(qapi-obj-y): $(GENERATED_HEADERS) qapi-dir := qapi-generated diff --git a/Makefile.objs b/Makefile.objs index 6991a9f52a..89ca3611b3 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -11,10 +11,21 @@ oslib-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o ####################################################################### +# coroutines +coroutine-obj-y = qemu-coroutine.o qemu-coroutine-lock.o +ifeq ($(CONFIG_UCONTEXT_COROUTINE),y) +coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o +else +coroutine-obj-$(CONFIG_POSIX) += coroutine-gthread.o +endif +coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o + +####################################################################### # block-obj-y is code used by both qemu system emulation and qemu-img block-obj-y = cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o async.o block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o +block-obj-y += $(coroutine-obj-y) block-obj-$(CONFIG_POSIX) += posix-aio-compat.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o @@ -25,92 +25,8 @@ #include "qemu-common.h" #include "qemu-aio.h" -/* - * An AsyncContext protects the callbacks of AIO requests and Bottom Halves - * against interfering with each other. A typical example is qcow2 that accepts - * asynchronous requests, but relies for manipulation of its metadata on - * synchronous bdrv_read/write that doesn't trigger any callbacks. - * - * However, these functions are often emulated using AIO which means that AIO - * callbacks must be run - but at the same time we must not run callbacks of - * other requests as they might start to modify metadata and corrupt the - * internal state of the caller of bdrv_read/write. - * - * To achieve the desired semantics we switch into a new AsyncContext. - * Callbacks must only be run if they belong to the current AsyncContext. - * Otherwise they need to be queued until their own context is active again. - * This is how you can make qemu_aio_wait() wait only for your own callbacks. - * - * The AsyncContexts form a stack. When you leave a AsyncContexts, you always - * return to the old ("parent") context. - */ -struct AsyncContext { - /* Consecutive number of the AsyncContext (position in the stack) */ - int id; - - /* Anchor of the list of Bottom Halves belonging to the context */ - struct QEMUBH *first_bh; - - /* Link to parent context */ - struct AsyncContext *parent; -}; - -/* The currently active AsyncContext */ -static struct AsyncContext *async_context = &(struct AsyncContext) { 0 }; - -/* - * Enter a new AsyncContext. Already scheduled Bottom Halves and AIO callbacks - * won't be called until this context is left again. - */ -void async_context_push(void) -{ - struct AsyncContext *new = qemu_mallocz(sizeof(*new)); - new->parent = async_context; - new->id = async_context->id + 1; - async_context = new; -} - -/* Run queued AIO completions and destroy Bottom Half */ -static void bh_run_aio_completions(void *opaque) -{ - QEMUBH **bh = opaque; - qemu_bh_delete(*bh); - qemu_free(bh); - qemu_aio_process_queue(); -} -/* - * Leave the currently active AsyncContext. All Bottom Halves belonging to the - * old context are executed before changing the context. - */ -void async_context_pop(void) -{ - struct AsyncContext *old = async_context; - QEMUBH **bh; - - /* Flush the bottom halves, we don't want to lose them */ - while (qemu_bh_poll()); - - /* Switch back to the parent context */ - async_context = async_context->parent; - qemu_free(old); - - if (async_context == NULL) { - abort(); - } - - /* Schedule BH to run any queued AIO completions as soon as possible */ - bh = qemu_malloc(sizeof(*bh)); - *bh = qemu_bh_new(bh_run_aio_completions, bh); - qemu_bh_schedule(*bh); -} - -/* - * Returns the ID of the currently active AsyncContext - */ -int get_async_context_id(void) -{ - return async_context->id; -} +/* Anchor of the list of Bottom Halves belonging to the context */ +static struct QEMUBH *first_bh; /***********************************************************/ /* bottom halves (can be seen as timers which expire ASAP) */ @@ -130,8 +46,8 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) bh = qemu_mallocz(sizeof(QEMUBH)); bh->cb = cb; bh->opaque = opaque; - bh->next = async_context->first_bh; - async_context->first_bh = bh; + bh->next = first_bh; + first_bh = bh; return bh; } @@ -141,7 +57,7 @@ int qemu_bh_poll(void) int ret; ret = 0; - for (bh = async_context->first_bh; bh; bh = next) { + for (bh = first_bh; bh; bh = next) { next = bh->next; if (!bh->deleted && bh->scheduled) { bh->scheduled = 0; @@ -153,7 +69,7 @@ int qemu_bh_poll(void) } /* remove deleted bhs */ - bhp = &async_context->first_bh; + bhp = &first_bh; while (*bhp) { bh = *bhp; if (bh->deleted) { @@ -199,7 +115,7 @@ void qemu_bh_update_timeout(int *timeout) { QEMUBH *bh; - for (bh = async_context->first_bh; bh; bh = bh->next) { + for (bh = first_bh; bh; bh = bh->next) { if (!bh->deleted && bh->scheduled) { if (bh->idle) { /* idle bottom halves will be polled at least @@ -28,6 +28,7 @@ #include "block_int.h" #include "module.h" #include "qemu-objects.h" +#include "qemu-coroutine.h" #ifdef CONFIG_BSD #include <sys/types.h> @@ -57,6 +58,19 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); +static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); +static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); +static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *iov); +static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *iov); +static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs); static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -169,14 +183,25 @@ void path_combine(char *dest, int dest_size, void bdrv_register(BlockDriver *bdrv) { - if (!bdrv->bdrv_aio_readv) { - /* add AIO emulation layer */ - bdrv->bdrv_aio_readv = bdrv_aio_readv_em; - bdrv->bdrv_aio_writev = bdrv_aio_writev_em; - } else if (!bdrv->bdrv_read) { - /* add synchronous IO emulation layer */ + if (bdrv->bdrv_co_readv) { + /* Emulate AIO by coroutines, and sync by AIO */ + bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em; + bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em; bdrv->bdrv_read = bdrv_read_em; bdrv->bdrv_write = bdrv_write_em; + } else { + bdrv->bdrv_co_readv = bdrv_co_readv_em; + bdrv->bdrv_co_writev = bdrv_co_writev_em; + + if (!bdrv->bdrv_aio_readv) { + /* add AIO emulation layer */ + bdrv->bdrv_aio_readv = bdrv_aio_readv_em; + bdrv->bdrv_aio_writev = bdrv_aio_writev_em; + } else if (!bdrv->bdrv_read) { + /* add synchronous IO emulation layer */ + bdrv->bdrv_read = bdrv_read_em; + bdrv->bdrv_write = bdrv_write_em; + } } if (!bdrv->bdrv_aio_flush) @@ -730,6 +755,8 @@ void bdrv_detach(BlockDriverState *bs, DeviceState *qdev) { assert(bs->peer == qdev); bs->peer = NULL; + bs->change_cb = NULL; + bs->change_opaque = NULL; } DeviceState *bdrv_get_attached(BlockDriverState *bs) @@ -920,6 +947,17 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, nb_sectors * BDRV_SECTOR_SIZE); } +static inline bool bdrv_has_async_rw(BlockDriver *drv) +{ + return drv->bdrv_co_readv != bdrv_co_readv_em + || drv->bdrv_aio_readv != bdrv_aio_readv_em; +} + +static inline bool bdrv_has_async_flush(BlockDriver *drv) +{ + return drv->bdrv_aio_flush != bdrv_aio_flush_em; +} + /* return < 0 if error. See bdrv_write() for the return codes */ int bdrv_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) @@ -928,6 +966,18 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num, if (!drv) return -ENOMEDIUM; + + if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) { + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = nb_sectors * BDRV_SECTOR_SIZE, + }; + + qemu_iovec_init_external(&qiov, &iov, 1); + return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov); + } + if (bdrv_check_request(bs, sector_num, nb_sectors)) return -EIO; @@ -972,8 +1022,21 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { BlockDriver *drv = bs->drv; + if (!bs->drv) return -ENOMEDIUM; + + if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) { + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = nb_sectors * BDRV_SECTOR_SIZE, + }; + + qemu_iovec_init_external(&qiov, &iov, 1); + return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov); + } + if (bs->read_only) return -EACCES; if (bdrv_check_request(bs, sector_num, nb_sectors)) @@ -1108,17 +1171,49 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, return 0; } -/* - * Writes to the file and ensures that no writes are reordered across this - * request (acts as a barrier) - * - * Returns 0 on success, -errno in error cases. - */ -int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) +int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + BlockDriver *drv = bs->drv; + + trace_bdrv_co_readv(bs, sector_num, nb_sectors); + + if (!drv) { + return -ENOMEDIUM; + } + if (bdrv_check_request(bs, sector_num, nb_sectors)) { + return -EIO; + } + + return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); +} + +int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) { - return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num, - buf, BDRV_SECTOR_SIZE * nb_sectors); + BlockDriver *drv = bs->drv; + + trace_bdrv_co_writev(bs, sector_num, nb_sectors); + + if (!bs->drv) { + return -ENOMEDIUM; + } + if (bs->read_only) { + return -EACCES; + } + if (bdrv_check_request(bs, sector_num, nb_sectors)) { + return -EIO; + } + + if (bs->dirty_bitmap) { + set_dirty_bitmap(bs, sector_num, nb_sectors, 1); + } + + if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { + bs->wr_highest_sector = sector_num + nb_sectors - 1; + } + + return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); } /** @@ -1591,6 +1686,10 @@ int bdrv_flush(BlockDriverState *bs) return 0; } + if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) { + return bdrv_co_flush_em(bs); + } + if (bs->drv && bs->drv->bdrv_flush) { return bs->drv->bdrv_flush(bs); } @@ -2580,6 +2679,89 @@ static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); } + +typedef struct BlockDriverAIOCBCoroutine { + BlockDriverAIOCB common; + BlockRequest req; + bool is_write; + QEMUBH* bh; +} BlockDriverAIOCBCoroutine; + +static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) +{ + qemu_aio_flush(); +} + +static AIOPool bdrv_em_co_aio_pool = { + .aiocb_size = sizeof(BlockDriverAIOCBCoroutine), + .cancel = bdrv_aio_co_cancel_em, +}; + +static void bdrv_co_rw_bh(void *opaque) +{ + BlockDriverAIOCBCoroutine *acb = opaque; + + acb->common.cb(acb->common.opaque, acb->req.error); + qemu_bh_delete(acb->bh); + qemu_aio_release(acb); +} + +static void coroutine_fn bdrv_co_rw(void *opaque) +{ + BlockDriverAIOCBCoroutine *acb = opaque; + BlockDriverState *bs = acb->common.bs; + + if (!acb->is_write) { + acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector, + acb->req.nb_sectors, acb->req.qiov); + } else { + acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector, + acb->req.nb_sectors, acb->req.qiov); + } + + acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb); + qemu_bh_schedule(acb->bh); +} + +static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque, + bool is_write) +{ + Coroutine *co; + BlockDriverAIOCBCoroutine *acb; + + acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque); + acb->req.sector = sector_num; + acb->req.nb_sectors = nb_sectors; + acb->req.qiov = qiov; + acb->is_write = is_write; + + co = qemu_coroutine_create(bdrv_co_rw); + qemu_coroutine_enter(co, acb); + + return &acb->common; +} + +static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, + false); +} + +static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, + true); +} + static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { @@ -2636,8 +2818,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, struct iovec iov; QEMUIOVector qiov; - async_context_push(); - async_ret = NOT_DONE; iov.iov_base = (void *)buf; iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; @@ -2655,7 +2835,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, fail: - async_context_pop(); return async_ret; } @@ -2667,8 +2846,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, struct iovec iov; QEMUIOVector qiov; - async_context_push(); - async_ret = NOT_DONE; iov.iov_base = (void *)buf; iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; @@ -2684,7 +2861,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, } fail: - async_context_pop(); return async_ret; } @@ -2726,6 +2902,77 @@ void qemu_aio_release(void *p) } /**************************************************************/ +/* Coroutine block device emulation */ + +typedef struct CoroutineIOCompletion { + Coroutine *coroutine; + int ret; +} CoroutineIOCompletion; + +static void bdrv_co_io_em_complete(void *opaque, int ret) +{ + CoroutineIOCompletion *co = opaque; + + co->ret = ret; + qemu_coroutine_enter(co->coroutine, NULL); +} + +static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *iov, + bool is_write) +{ + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + BlockDriverAIOCB *acb; + + if (is_write) { + acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors, + bdrv_co_io_em_complete, &co); + } else { + acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors, + bdrv_co_io_em_complete, &co); + } + + trace_bdrv_co_io(is_write, acb); + if (!acb) { + return -EIO; + } + qemu_coroutine_yield(); + + return co.ret; +} + +static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *iov) +{ + return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); +} + +static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *iov) +{ + return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); +} + +static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs) +{ + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + BlockDriverAIOCB *acb; + + acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); + if (!acb) { + return -EIO; + } + qemu_coroutine_yield(); + return co.ret; +} + +/**************************************************************/ /* removable device support */ /** @@ -2768,25 +3015,16 @@ int bdrv_media_changed(BlockDriverState *bs) int bdrv_eject(BlockDriverState *bs, int eject_flag) { BlockDriver *drv = bs->drv; - int ret; - if (bs->locked) { + if (eject_flag && bs->locked) { return -EBUSY; } - if (!drv || !drv->bdrv_eject) { - ret = -ENOTSUP; - } else { - ret = drv->bdrv_eject(bs, eject_flag); - } - if (ret == -ENOTSUP) { - ret = 0; + if (drv && drv->bdrv_eject) { + drv->bdrv_eject(bs, eject_flag); } - if (ret >= 0) { - bs->tray_open = eject_flag; - } - - return ret; + bs->tray_open = eject_flag; + return 0; } int bdrv_is_locked(BlockDriverState *bs) @@ -4,6 +4,7 @@ #include "qemu-aio.h" #include "qemu-common.h" #include "qemu-option.h" +#include "qemu-coroutine.h" #include "qobject.h" /* block.c */ @@ -85,8 +86,10 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset, const void *buf, int count); int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, const void *buf, int count); -int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors); +int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov); +int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov); int bdrv_truncate(BlockDriverState *bs, int64_t offset); int64_t bdrv_getlength(BlockDriverState *bs); int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); diff --git a/block/qcow.c b/block/qcow.c index 227b104e36..6447c2a1c0 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -73,6 +73,7 @@ typedef struct BDRVQcowState { uint32_t crypt_method_header; AES_KEY aes_encrypt_key; AES_KEY aes_decrypt_key; + CoMutex lock; } BDRVQcowState; static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); @@ -517,11 +518,11 @@ static AIOPool qcow_aio_pool = { static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque, int is_write) + int is_write) { QCowAIOCB *acb; - acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque); + acb = qemu_aio_get(&qcow_aio_pool, bs, NULL, NULL); if (!acb) return NULL; acb->hd_aiocb = NULL; @@ -542,48 +543,15 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, return acb; } -static void qcow_aio_read_cb(void *opaque, int ret); -static void qcow_aio_write_cb(void *opaque, int ret); - -static void qcow_aio_rw_bh(void *opaque) -{ - QCowAIOCB *acb = opaque; - qemu_bh_delete(acb->bh); - acb->bh = NULL; - - if (acb->is_write) { - qcow_aio_write_cb(opaque, 0); - } else { - qcow_aio_read_cb(opaque, 0); - } -} - -static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb) -{ - if (acb->bh) { - return -EIO; - } - - acb->bh = qemu_bh_new(cb, acb); - if (!acb->bh) { - return -EIO; - } - - qemu_bh_schedule(acb->bh); - - return 0; -} - -static void qcow_aio_read_cb(void *opaque, int ret) +static int qcow_aio_read_cb(void *opaque) { QCowAIOCB *acb = opaque; BlockDriverState *bs = acb->common.bs; BDRVQcowState *s = bs->opaque; int index_in_cluster; + int ret; acb->hd_aiocb = NULL; - if (ret < 0) - goto done; redo: /* post process the read buffer */ @@ -605,8 +573,7 @@ static void qcow_aio_read_cb(void *opaque, int ret) if (acb->nb_sectors == 0) { /* request completed */ - ret = 0; - goto done; + return 0; } /* prepare next AIO request */ @@ -623,11 +590,12 @@ static void qcow_aio_read_cb(void *opaque, int ret) acb->hd_iov.iov_base = (void *)acb->buf; acb->hd_iov.iov_len = acb->n * 512; qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, - &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); - if (acb->hd_aiocb == NULL) { - ret = -EIO; - goto done; + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_readv(bs->backing_hd, acb->sector_num, + acb->n, &acb->hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + return -EIO; } } else { /* Note: in this case, no need to wait */ @@ -637,64 +605,56 @@ static void qcow_aio_read_cb(void *opaque, int ret) } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { /* add AIO support for compressed blocks ? */ if (decompress_cluster(bs, acb->cluster_offset) < 0) { - ret = -EIO; - goto done; + return -EIO; } memcpy(acb->buf, s->cluster_cache + index_in_cluster * 512, 512 * acb->n); goto redo; } else { if ((acb->cluster_offset & 511) != 0) { - ret = -EIO; - goto done; + return -EIO; } acb->hd_iov.iov_base = (void *)acb->buf; acb->hd_iov.iov_len = acb->n * 512; qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_readv(bs->file, + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_readv(bs->file, (acb->cluster_offset >> 9) + index_in_cluster, - &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); - if (acb->hd_aiocb == NULL) { - ret = -EIO; - goto done; + acb->n, &acb->hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + return ret; } } - return; - -done: - if (acb->qiov->niov > 1) { - qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); - qemu_vfree(acb->orig_buf); - } - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); + return 1; } -static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) { + BDRVQcowState *s = bs->opaque; QCowAIOCB *acb; int ret; - acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); - if (!acb) - return NULL; + acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 0); - ret = qcow_schedule_bh(qcow_aio_rw_bh, acb); - if (ret < 0) { - if (acb->qiov->niov > 1) { - qemu_vfree(acb->orig_buf); - } - qemu_aio_release(acb); - return NULL; + qemu_co_mutex_lock(&s->lock); + do { + ret = qcow_aio_read_cb(acb); + } while (ret > 0); + qemu_co_mutex_unlock(&s->lock); + + if (acb->qiov->niov > 1) { + qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); + qemu_vfree(acb->orig_buf); } + qemu_aio_release(acb); - return &acb->common; + return ret; } -static void qcow_aio_write_cb(void *opaque, int ret) +static int qcow_aio_write_cb(void *opaque) { QCowAIOCB *acb = opaque; BlockDriverState *bs = acb->common.bs; @@ -702,20 +662,17 @@ static void qcow_aio_write_cb(void *opaque, int ret) int index_in_cluster; uint64_t cluster_offset; const uint8_t *src_buf; + int ret; acb->hd_aiocb = NULL; - if (ret < 0) - goto done; - acb->nb_sectors -= acb->n; acb->sector_num += acb->n; acb->buf += acb->n * 512; if (acb->nb_sectors == 0) { /* request completed */ - ret = 0; - goto done; + return 0; } index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); @@ -726,16 +683,11 @@ static void qcow_aio_write_cb(void *opaque, int ret) index_in_cluster, index_in_cluster + acb->n); if (!cluster_offset || (cluster_offset & 511) != 0) { - ret = -EIO; - goto done; + return -EIO; } if (s->crypt_method) { if (!acb->cluster_data) { acb->cluster_data = qemu_mallocz(s->cluster_size); - if (!acb->cluster_data) { - ret = -ENOMEM; - goto done; - } } encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, acb->n, 1, &s->aes_encrypt_key); @@ -747,26 +699,19 @@ static void qcow_aio_write_cb(void *opaque, int ret) acb->hd_iov.iov_base = (void *)src_buf; acb->hd_iov.iov_len = acb->n * 512; qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_writev(bs->file, - (cluster_offset >> 9) + index_in_cluster, - &acb->hd_qiov, acb->n, - qcow_aio_write_cb, acb); - if (acb->hd_aiocb == NULL) { - ret = -EIO; - goto done; + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_writev(bs->file, + (cluster_offset >> 9) + index_in_cluster, + acb->n, &acb->hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + return ret; } - return; - -done: - if (acb->qiov->niov > 1) - qemu_vfree(acb->orig_buf); - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); + return 1; } -static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +static int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) { BDRVQcowState *s = bs->opaque; QCowAIOCB *acb; @@ -774,21 +719,20 @@ static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, s->cluster_cache_offset = -1; /* disable compressed cache */ - acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); - if (!acb) - return NULL; + acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 1); + qemu_co_mutex_lock(&s->lock); + do { + ret = qcow_aio_write_cb(acb); + } while (ret > 0); + qemu_co_mutex_unlock(&s->lock); - ret = qcow_schedule_bh(qcow_aio_rw_bh, acb); - if (ret < 0) { - if (acb->qiov->niov > 1) { - qemu_vfree(acb->orig_buf); - } - qemu_aio_release(acb); - return NULL; + if (acb->qiov->niov > 1) { + qemu_vfree(acb->orig_buf); } + qemu_aio_release(acb); - return &acb->common; + return ret; } static void qcow_close(BlockDriverState *bs) @@ -1020,8 +964,8 @@ static BlockDriver bdrv_qcow = { .bdrv_is_allocated = qcow_is_allocated, .bdrv_set_key = qcow_set_key, .bdrv_make_empty = qcow_make_empty, - .bdrv_aio_readv = qcow_aio_readv, - .bdrv_aio_writev = qcow_aio_writev, + .bdrv_co_readv = qcow_co_readv, + .bdrv_co_writev = qcow_co_writev, .bdrv_aio_flush = qcow_aio_flush, .bdrv_write_compressed = qcow_write_compressed, .bdrv_get_info = qcow_get_info, diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 882f50a80b..81cf77d83c 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -697,12 +697,12 @@ err: * m->depends_on is set to NULL and the other fields in m are meaningless. * * If the cluster is newly allocated, m->nb_clusters is set to the number of - * contiguous clusters that have been allocated. This may be 0 if the request - * conflict with another write request in flight; in this case, m->depends_on - * is set and the remaining fields of m are meaningless. + * contiguous clusters that have been allocated. In this case, the other + * fields of m are valid and contain information about the first allocated + * cluster. * - * If m->nb_clusters is non-zero, the other fields of m are valid and contain - * information about the first allocated cluster. + * If the request conflicts with another write request in flight, the coroutine + * is queued and will be reentered when the dependency has completed. * * Return 0 on success and -errno in error cases */ @@ -721,6 +721,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, return ret; } +again: nb_clusters = size_to_clusters(s, n_end << 9); nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); @@ -792,12 +793,12 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, } if (nb_clusters == 0) { - /* Set dependency and wait for a callback */ - m->depends_on = old_alloc; - m->nb_clusters = 0; - *num = 0; - - goto out_wait_dependency; + /* Wait for the dependency to complete. We need to recheck + * the free/allocated clusters when we continue. */ + qemu_co_mutex_unlock(&s->lock); + qemu_co_queue_wait(&old_alloc->dependent_requests); + qemu_co_mutex_lock(&s->lock); + goto again; } } } @@ -834,9 +835,6 @@ out: return 0; -out_wait_dependency: - return qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); - fail: qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); fail_put: diff --git a/block/qcow2.c b/block/qcow2.c index 48e1b95689..f07d550a96 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -276,6 +276,9 @@ static int qcow2_open(BlockDriverState *bs, int flags) goto fail; } + /* Initialise locks */ + qemu_co_mutex_init(&s->lock); + #ifdef DEBUG_ALLOC qcow2_check_refcounts(bs); #endif @@ -379,7 +382,6 @@ typedef struct QCowAIOCB { uint64_t cluster_offset; uint8_t *cluster_data; bool is_write; - BlockDriverAIOCB *hd_aiocb; QEMUIOVector hd_qiov; QEMUBH *bh; QCowL2Meta l2meta; @@ -389,8 +391,6 @@ typedef struct QCowAIOCB { static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb) { QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common); - if (acb->hd_aiocb) - bdrv_aio_cancel(acb->hd_aiocb); qemu_aio_release(acb); } @@ -399,46 +399,16 @@ static AIOPool qcow2_aio_pool = { .cancel = qcow2_aio_cancel, }; -static void qcow2_aio_read_cb(void *opaque, int ret); -static void qcow2_aio_write_cb(void *opaque, int ret); - -static void qcow2_aio_rw_bh(void *opaque) -{ - QCowAIOCB *acb = opaque; - qemu_bh_delete(acb->bh); - acb->bh = NULL; - - if (acb->is_write) { - qcow2_aio_write_cb(opaque, 0); - } else { - qcow2_aio_read_cb(opaque, 0); - } -} - -static int qcow2_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb) -{ - if (acb->bh) - return -EIO; - - acb->bh = qemu_bh_new(cb, acb); - if (!acb->bh) - return -EIO; - - qemu_bh_schedule(acb->bh); - - return 0; -} - -static void qcow2_aio_read_cb(void *opaque, int ret) +/* + * Returns 0 when the request is completed successfully, 1 when there is still + * a part left to do and -errno in error cases. + */ +static int qcow2_aio_read_cb(QCowAIOCB *acb) { - QCowAIOCB *acb = opaque; BlockDriverState *bs = acb->common.bs; BDRVQcowState *s = bs->opaque; int index_in_cluster, n1; - - acb->hd_aiocb = NULL; - if (ret < 0) - goto done; + int ret; /* post process the read buffer */ if (!acb->cluster_offset) { @@ -463,8 +433,7 @@ static void qcow2_aio_read_cb(void *opaque, int ret) if (acb->remaining_sectors == 0) { /* request completed */ - ret = 0; - goto done; + return 0; } /* prepare next AIO request */ @@ -477,7 +446,7 @@ static void qcow2_aio_read_cb(void *opaque, int ret) ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9, &acb->cur_nr_sectors, &acb->cluster_offset); if (ret < 0) { - goto done; + return ret; } index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); @@ -494,42 +463,35 @@ static void qcow2_aio_read_cb(void *opaque, int ret) acb->sector_num, acb->cur_nr_sectors); if (n1 > 0) { BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); - acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, - &acb->hd_qiov, n1, qcow2_aio_read_cb, acb); - if (acb->hd_aiocb == NULL) { - ret = -EIO; - goto done; + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_readv(bs->backing_hd, acb->sector_num, + n1, &acb->hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + return ret; } - } else { - ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb); - if (ret < 0) - goto done; } + return 1; } else { /* Note: in this case, no need to wait */ qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors); - ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb); - if (ret < 0) - goto done; + return 1; } } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { /* add AIO support for compressed blocks ? */ ret = qcow2_decompress_cluster(bs, acb->cluster_offset); if (ret < 0) { - goto done; + return ret; } qemu_iovec_from_buffer(&acb->hd_qiov, s->cluster_cache + index_in_cluster * 512, 512 * acb->cur_nr_sectors); - ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb); - if (ret < 0) - goto done; + return 1; } else { if ((acb->cluster_offset & 511) != 0) { - ret = -EIO; - goto done; + return -EIO; } if (s->crypt_method) { @@ -550,21 +512,17 @@ static void qcow2_aio_read_cb(void *opaque, int ret) } BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - acb->hd_aiocb = bdrv_aio_readv(bs->file, + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_readv(bs->file, (acb->cluster_offset >> 9) + index_in_cluster, - &acb->hd_qiov, acb->cur_nr_sectors, - qcow2_aio_read_cb, acb); - if (acb->hd_aiocb == NULL) { - ret = -EIO; - goto done; + acb->cur_nr_sectors, &acb->hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + return ret; } } - return; -done: - acb->common.cb(acb->common.opaque, ret); - qemu_iovec_destroy(&acb->hd_qiov); - qemu_aio_release(acb); + return 1; } static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num, @@ -577,7 +535,6 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num, acb = qemu_aio_get(&qcow2_aio_pool, bs, cb, opaque); if (!acb) return NULL; - acb->hd_aiocb = NULL; acb->sector_num = sector_num; acb->qiov = qiov; acb->is_write = is_write; @@ -589,70 +546,65 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num, acb->cur_nr_sectors = 0; acb->cluster_offset = 0; acb->l2meta.nb_clusters = 0; - QLIST_INIT(&acb->l2meta.dependent_requests); + qemu_co_queue_init(&acb->l2meta.dependent_requests); return acb; } -static BlockDriverAIOCB *qcow2_aio_readv(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, - void *opaque) +static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) { + BDRVQcowState *s = bs->opaque; QCowAIOCB *acb; int ret; - acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); - if (!acb) - return NULL; + acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 0); - ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb); - if (ret < 0) { - qemu_iovec_destroy(&acb->hd_qiov); - qemu_aio_release(acb); - return NULL; - } + qemu_co_mutex_lock(&s->lock); + do { + ret = qcow2_aio_read_cb(acb); + } while (ret > 0); + qemu_co_mutex_unlock(&s->lock); - return &acb->common; + qemu_iovec_destroy(&acb->hd_qiov); + qemu_aio_release(acb); + + return ret; } -static void run_dependent_requests(QCowL2Meta *m) +static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m) { - QCowAIOCB *req; - QCowAIOCB *next; - /* Take the request off the list of running requests */ if (m->nb_clusters != 0) { QLIST_REMOVE(m, next_in_flight); } /* Restart all dependent requests */ - QLIST_FOREACH_SAFE(req, &m->dependent_requests, next_depend, next) { - qcow2_aio_write_cb(req, 0); + if (!qemu_co_queue_empty(&m->dependent_requests)) { + qemu_co_mutex_unlock(&s->lock); + while(qemu_co_queue_next(&m->dependent_requests)); + qemu_co_mutex_lock(&s->lock); } - - /* Empty the list for the next part of the request */ - QLIST_INIT(&m->dependent_requests); } -static void qcow2_aio_write_cb(void *opaque, int ret) +/* + * Returns 0 when the request is completed successfully, 1 when there is still + * a part left to do and -errno in error cases. + */ +static int qcow2_aio_write_cb(QCowAIOCB *acb) { - QCowAIOCB *acb = opaque; BlockDriverState *bs = acb->common.bs; BDRVQcowState *s = bs->opaque; int index_in_cluster; int n_end; + int ret; - acb->hd_aiocb = NULL; - - if (ret >= 0) { - ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta); - } + ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta); - run_dependent_requests(&acb->l2meta); + run_dependent_requests(s, &acb->l2meta); - if (ret < 0) - goto done; + if (ret < 0) { + return ret; + } acb->remaining_sectors -= acb->cur_nr_sectors; acb->sector_num += acb->cur_nr_sectors; @@ -660,8 +612,7 @@ static void qcow2_aio_write_cb(void *opaque, int ret) if (acb->remaining_sectors == 0) { /* request completed */ - ret = 0; - goto done; + return 0; } index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); @@ -673,18 +624,10 @@ static void qcow2_aio_write_cb(void *opaque, int ret) ret = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9, index_in_cluster, n_end, &acb->cur_nr_sectors, &acb->l2meta); if (ret < 0) { - goto done; + return ret; } acb->cluster_offset = acb->l2meta.cluster_offset; - - /* Need to wait for another request? If so, we are done for now. */ - if (acb->l2meta.nb_clusters == 0 && acb->l2meta.depends_on != NULL) { - QLIST_INSERT_HEAD(&acb->l2meta.depends_on->dependent_requests, - acb, next_depend); - return; - } - assert((acb->cluster_offset & 511) == 0); qemu_iovec_reset(&acb->hd_qiov); @@ -709,51 +652,40 @@ static void qcow2_aio_write_cb(void *opaque, int ret) } BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - acb->hd_aiocb = bdrv_aio_writev(bs->file, - (acb->cluster_offset >> 9) + index_in_cluster, - &acb->hd_qiov, acb->cur_nr_sectors, - qcow2_aio_write_cb, acb); - if (acb->hd_aiocb == NULL) { - ret = -EIO; - goto fail; + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_writev(bs->file, + (acb->cluster_offset >> 9) + index_in_cluster, + acb->cur_nr_sectors, &acb->hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + return ret; } - return; - -fail: - if (acb->l2meta.nb_clusters != 0) { - QLIST_REMOVE(&acb->l2meta, next_in_flight); - } -done: - acb->common.cb(acb->common.opaque, ret); - qemu_iovec_destroy(&acb->hd_qiov); - qemu_aio_release(acb); + return 1; } -static BlockDriverAIOCB *qcow2_aio_writev(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, - void *opaque) +static int qcow2_co_writev(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, + QEMUIOVector *qiov) { BDRVQcowState *s = bs->opaque; QCowAIOCB *acb; int ret; + acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 1); s->cluster_cache_offset = -1; /* disable compressed cache */ - acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); - if (!acb) - return NULL; + qemu_co_mutex_lock(&s->lock); + do { + ret = qcow2_aio_write_cb(acb); + } while (ret > 0); + qemu_co_mutex_unlock(&s->lock); - ret = qcow2_schedule_bh(qcow2_aio_rw_bh, acb); - if (ret < 0) { - qemu_iovec_destroy(&acb->hd_qiov); - qemu_aio_release(acb); - return NULL; - } + qemu_iovec_destroy(&acb->hd_qiov); + qemu_aio_release(acb); - return &acb->common; + return ret; } static void qcow2_close(BlockDriverState *bs) @@ -881,7 +813,7 @@ static int preallocate(BlockDriverState *bs) nb_sectors = bdrv_getlength(bs) >> 9; offset = 0; - QLIST_INIT(&meta.dependent_requests); + qemu_co_queue_init(&meta.dependent_requests); meta.cluster_offset = 0; while (nb_sectors) { @@ -899,7 +831,7 @@ static int preallocate(BlockDriverState *bs) /* There are no dependent requests, but we need to remove our request * from the list of in-flight requests */ - run_dependent_requests(&meta); + run_dependent_requests(bs->opaque, &meta); /* TODO Preallocate data if requested */ @@ -1387,8 +1319,8 @@ static BlockDriver bdrv_qcow2 = { .bdrv_set_key = qcow2_set_key, .bdrv_make_empty = qcow2_make_empty, - .bdrv_aio_readv = qcow2_aio_readv, - .bdrv_aio_writev = qcow2_aio_writev, + .bdrv_co_readv = qcow2_co_readv, + .bdrv_co_writev = qcow2_co_writev, .bdrv_aio_flush = qcow2_aio_flush, .bdrv_discard = qcow2_discard, diff --git a/block/qcow2.h b/block/qcow2.h index 6a0a21b694..de23abe1a4 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -26,6 +26,7 @@ #define BLOCK_QCOW2_H #include "aes.h" +#include "qemu-coroutine.h" //#define DEBUG_ALLOC //#define DEBUG_ALLOC2 @@ -114,6 +115,8 @@ typedef struct BDRVQcowState { int64_t free_cluster_index; int64_t free_byte_offset; + CoMutex lock; + uint32_t crypt_method; /* current crypt method, 0 if no key yet */ uint32_t crypt_method_header; AES_KEY aes_encrypt_key; @@ -146,7 +149,7 @@ typedef struct QCowL2Meta int nb_available; int nb_clusters; struct QCowL2Meta *depends_on; - QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests; + CoQueue dependent_requests; QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; diff --git a/block/qed-table.c b/block/qed-table.c index d38c673547..d96afa81d7 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -179,16 +179,12 @@ int qed_read_l1_table_sync(BDRVQEDState *s) { int ret = -EINPROGRESS; - async_context_push(); - qed_read_table(s, s->header.l1_table_offset, s->l1_table, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { qemu_aio_wait(); } - async_context_pop(); - return ret; } @@ -205,15 +201,11 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, { int ret = -EINPROGRESS; - async_context_push(); - qed_write_l1_table(s, index, n, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { qemu_aio_wait(); } - async_context_pop(); - return ret; } @@ -282,14 +274,11 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset { int ret = -EINPROGRESS; - async_context_push(); - qed_read_l2_table(s, request, offset, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { qemu_aio_wait(); } - async_context_pop(); return ret; } @@ -307,13 +296,10 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, { int ret = -EINPROGRESS; - async_context_push(); - qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { qemu_aio_wait(); } - async_context_pop(); return ret; } diff --git a/block/qed.c b/block/qed.c index 39703793e9..333f067582 100644 --- a/block/qed.c +++ b/block/qed.c @@ -680,16 +680,12 @@ static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num, }; QEDRequest request = { .l2_table = NULL }; - async_context_push(); - qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb); while (cb.is_allocated == -1) { qemu_aio_wait(); } - async_context_pop(); - qed_unref_l2_cache_entry(request.l2_table); return cb.is_allocated; diff --git a/block/raw-posix.c b/block/raw-posix.c index cd89c8312a..c5c99446c0 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -230,13 +230,15 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, } } + /* We're falling back to POSIX AIO in some cases so init always */ + if (paio_init() < 0) { + goto out_free_buf; + } + #ifdef CONFIG_LINUX_AIO if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) == (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) { - /* We're falling back to POSIX AIO in some cases */ - paio_init(); - s->aio_ctx = laio_init(); if (!s->aio_ctx) { goto out_free_buf; @@ -245,9 +247,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, } else #endif { - if (paio_init() < 0) { - goto out_free_buf; - } #ifdef CONFIG_LINUX_AIO s->use_aio = 0; #endif @@ -587,7 +586,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, /* * If O_DIRECT is used the buffer needs to be aligned on a sector - * boundary. Check if this is the case or telll the low-level + * boundary. Check if this is the case or tell the low-level * driver that it needs to copy the buffer. */ if (s->aligned_buf) { @@ -1254,7 +1253,7 @@ static int floppy_media_changed(BlockDriverState *bs) return ret; } -static int floppy_eject(BlockDriverState *bs, int eject_flag) +static void floppy_eject(BlockDriverState *bs, int eject_flag) { BDRVRawState *s = bs->opaque; int fd; @@ -1269,8 +1268,6 @@ static int floppy_eject(BlockDriverState *bs, int eject_flag) perror("FDEJECT"); close(fd); } - - return 0; } static BlockDriver bdrv_host_floppy = { @@ -1348,7 +1345,7 @@ static int cdrom_is_inserted(BlockDriverState *bs) return 0; } -static int cdrom_eject(BlockDriverState *bs, int eject_flag) +static void cdrom_eject(BlockDriverState *bs, int eject_flag) { BDRVRawState *s = bs->opaque; @@ -1359,11 +1356,9 @@ static int cdrom_eject(BlockDriverState *bs, int eject_flag) if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0) perror("CDROMEJECT"); } - - return 0; } -static int cdrom_set_locked(BlockDriverState *bs, int locked) +static void cdrom_set_locked(BlockDriverState *bs, int locked) { BDRVRawState *s = bs->opaque; @@ -1374,8 +1369,6 @@ static int cdrom_set_locked(BlockDriverState *bs, int locked) */ /* perror("CDROM_LOCKDOOR"); */ } - - return 0; } static BlockDriver bdrv_host_cdrom = { @@ -1464,12 +1457,12 @@ static int cdrom_is_inserted(BlockDriverState *bs) return raw_getlength(bs) > 0; } -static int cdrom_eject(BlockDriverState *bs, int eject_flag) +static void cdrom_eject(BlockDriverState *bs, int eject_flag) { BDRVRawState *s = bs->opaque; if (s->fd < 0) - return -ENOTSUP; + return; (void) ioctl(s->fd, CDIOCALLOW); @@ -1481,17 +1474,15 @@ static int cdrom_eject(BlockDriverState *bs, int eject_flag) perror("CDIOCCLOSE"); } - if (cdrom_reopen(bs) < 0) - return -ENOTSUP; - return 0; + cdrom_reopen(bs); } -static int cdrom_set_locked(BlockDriverState *bs, int locked) +static void cdrom_set_locked(BlockDriverState *bs, int locked) { BDRVRawState *s = bs->opaque; if (s->fd < 0) - return -ENOTSUP; + return; if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) { /* * Note: an error can happen if the distribution automatically @@ -1499,8 +1490,6 @@ static int cdrom_set_locked(BlockDriverState *bs, int locked) */ /* perror("CDROM_LOCKDOOR"); */ } - - return 0; } static BlockDriver bdrv_host_cdrom = { diff --git a/block/raw-win32.c b/block/raw-win32.c index 91067e7595..e47cfe0f4a 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -393,41 +393,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) return 0; } -#if 0 -/***********************************************/ -/* removable device additional commands */ - -static int raw_is_inserted(BlockDriverState *bs) -{ - return 1; -} - -static int raw_media_changed(BlockDriverState *bs) -{ - return -ENOTSUP; -} - -static int raw_eject(BlockDriverState *bs, int eject_flag) -{ - DWORD ret_count; - - if (s->type == FTYPE_FILE) - return -ENOTSUP; - if (eject_flag) { - DeviceIoControl(s->hfile, IOCTL_STORAGE_EJECT_MEDIA, - NULL, 0, NULL, 0, &lpBytesReturned, NULL); - } else { - DeviceIoControl(s->hfile, IOCTL_STORAGE_LOAD_MEDIA, - NULL, 0, NULL, 0, &lpBytesReturned, NULL); - } -} - -static int raw_set_locked(BlockDriverState *bs, int locked) -{ - return -ENOTSUP; -} -#endif - static int hdev_has_zero_init(BlockDriverState *bs) { return 0; diff --git a/block/raw.c b/block/raw.c index b0f72d6a62..cb6203eeca 100644 --- a/block/raw.c +++ b/block/raw.c @@ -75,15 +75,14 @@ static int raw_is_inserted(BlockDriverState *bs) return bdrv_is_inserted(bs->file); } -static int raw_eject(BlockDriverState *bs, int eject_flag) +static void raw_eject(BlockDriverState *bs, int eject_flag) { - return bdrv_eject(bs->file, eject_flag); + bdrv_eject(bs->file, eject_flag); } -static int raw_set_locked(BlockDriverState *bs, int locked) +static void raw_set_locked(BlockDriverState *bs, int locked) { bdrv_set_locked(bs->file, locked); - return 0; } static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) diff --git a/block/vpc.c b/block/vpc.c index 56865da5bc..fdd5236892 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -156,6 +156,7 @@ static int vpc_open(BlockDriverState *bs, int flags) struct vhd_dyndisk_header* dyndisk_header; uint8_t buf[HEADER_SIZE]; uint32_t checksum; + int err = -1; if (bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE) goto fail; @@ -176,6 +177,11 @@ static int vpc_open(BlockDriverState *bs, int flags) bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; + if (bs->total_sectors >= 65535 * 16 * 255) { + err = -EFBIG; + goto fail; + } + if (bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE) != HEADER_SIZE) goto fail; @@ -222,7 +228,7 @@ static int vpc_open(BlockDriverState *bs, int flags) return 0; fail: - return -1; + return err; } /* diff --git a/block_int.h b/block_int.h index efb68038c4..f6d02b38a7 100644 --- a/block_int.h +++ b/block_int.h @@ -27,6 +27,7 @@ #include "block.h" #include "qemu-option.h" #include "qemu-queue.h" +#include "qemu-coroutine.h" #define BLOCK_FLAG_ENCRYPT 1 #define BLOCK_FLAG_COMPAT6 4 @@ -77,6 +78,11 @@ struct BlockDriver { int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num, int nb_sectors); + int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs, int num_reqs); int (*bdrv_merge_requests)(BlockDriverState *bs, BlockRequest* a, @@ -112,8 +118,8 @@ struct BlockDriver { /* removable device specific */ int (*bdrv_is_inserted)(BlockDriverState *bs); int (*bdrv_media_changed)(BlockDriverState *bs); - int (*bdrv_eject)(BlockDriverState *bs, int eject_flag); - int (*bdrv_set_locked)(BlockDriverState *bs, int locked); + void (*bdrv_eject)(BlockDriverState *bs, int eject_flag); + void (*bdrv_set_locked)(BlockDriverState *bs, int locked); /* to control generic scsi devices */ int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf); diff --git a/blockdev.c b/blockdev.c index 0b8d3a4f83..a25367a9e3 100644 --- a/blockdev.c +++ b/blockdev.c @@ -646,16 +646,13 @@ out: static int eject_device(Monitor *mon, BlockDriverState *bs, int force) { - if (!force) { - if (!bdrv_is_removable(bs)) { - qerror_report(QERR_DEVICE_NOT_REMOVABLE, - bdrv_get_device_name(bs)); - return -1; - } - if (bdrv_is_locked(bs)) { - qerror_report(QERR_DEVICE_LOCKED, bdrv_get_device_name(bs)); - return -1; - } + if (!bdrv_is_removable(bs)) { + qerror_report(QERR_DEVICE_NOT_REMOVABLE, bdrv_get_device_name(bs)); + return -1; + } + if (!force && bdrv_is_locked(bs)) { + qerror_report(QERR_DEVICE_LOCKED, bdrv_get_device_name(bs)); + return -1; } bdrv_close(bs); return 0; @@ -2557,6 +2557,20 @@ EOF fi ########################################## +# check if we have makecontext + +ucontext_coroutine=no +if test "$darwin" != "yes"; then + cat > $TMPC << EOF +#include <ucontext.h> +int main(void) { makecontext(0, 0, 0); } +EOF + if compile_prog "" "" ; then + ucontext_coroutine=yes + fi +fi + +########################################## # End of CC checks # After here, no more $cc or $ld runs @@ -3034,6 +3048,10 @@ if test "$rbd" = "yes" ; then echo "CONFIG_RBD=y" >> $config_host_mak fi +if test "$ucontext_coroutine" = "yes" ; then + echo "CONFIG_UCONTEXT_COROUTINE=y" >> $config_host_mak +fi + # USB host support case "$usb" in linux) diff --git a/coroutine-gthread.c b/coroutine-gthread.c new file mode 100644 index 0000000000..f09877e14f --- /dev/null +++ b/coroutine-gthread.c @@ -0,0 +1,131 @@ +/* + * GThread coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> + * Copyright (C) 2011 Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <glib.h> +#include "qemu-common.h" +#include "qemu-coroutine-int.h" + +typedef struct { + Coroutine base; + GThread *thread; + bool runnable; + CoroutineAction action; +} CoroutineGThread; + +static GCond *coroutine_cond; +static GStaticMutex coroutine_lock = G_STATIC_MUTEX_INIT; +static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT; + +static void __attribute__((constructor)) coroutine_init(void) +{ + if (!g_thread_supported()) { + g_thread_init(NULL); + } + + coroutine_cond = g_cond_new(); +} + +static void coroutine_wait_runnable_locked(CoroutineGThread *co) +{ + while (!co->runnable) { + g_cond_wait(coroutine_cond, g_static_mutex_get_mutex(&coroutine_lock)); + } +} + +static void coroutine_wait_runnable(CoroutineGThread *co) +{ + g_static_mutex_lock(&coroutine_lock); + coroutine_wait_runnable_locked(co); + g_static_mutex_unlock(&coroutine_lock); +} + +static gpointer coroutine_thread(gpointer opaque) +{ + CoroutineGThread *co = opaque; + + g_static_private_set(&coroutine_key, co, NULL); + coroutine_wait_runnable(co); + co->base.entry(co->base.entry_arg); + qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE); + return NULL; +} + +Coroutine *qemu_coroutine_new(void) +{ + CoroutineGThread *co; + + co = qemu_mallocz(sizeof(*co)); + co->thread = g_thread_create_full(coroutine_thread, co, 0, TRUE, TRUE, + G_THREAD_PRIORITY_NORMAL, NULL); + if (!co->thread) { + qemu_free(co); + return NULL; + } + return &co->base; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_); + + g_thread_join(co->thread); + qemu_free(co); +} + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, + Coroutine *to_, + CoroutineAction action) +{ + CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_); + CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_); + + g_static_mutex_lock(&coroutine_lock); + from->runnable = false; + from->action = action; + to->runnable = true; + to->action = action; + g_cond_broadcast(coroutine_cond); + + if (action != COROUTINE_TERMINATE) { + coroutine_wait_runnable_locked(from); + } + g_static_mutex_unlock(&coroutine_lock); + return from->action; +} + +Coroutine *qemu_coroutine_self(void) +{ + CoroutineGThread *co = g_static_private_get(&coroutine_key); + + if (!co) { + co = qemu_mallocz(sizeof(*co)); + co->runnable = true; + g_static_private_set(&coroutine_key, co, (GDestroyNotify)qemu_free); + } + + return &co->base; +} + +bool qemu_in_coroutine(void) +{ + CoroutineGThread *co = g_static_private_get(&coroutine_key); + + return co && co->base.caller; +} diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c new file mode 100644 index 0000000000..41c2379a2a --- /dev/null +++ b/coroutine-ucontext.c @@ -0,0 +1,230 @@ +/* + * ucontext coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> + * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */ +#ifdef _FORTIFY_SOURCE +#undef _FORTIFY_SOURCE +#endif +#include <stdlib.h> +#include <setjmp.h> +#include <stdint.h> +#include <pthread.h> +#include <ucontext.h> +#include "qemu-common.h" +#include "qemu-coroutine-int.h" + +enum { + /* Maximum free pool size prevents holding too many freed coroutines */ + POOL_MAX_SIZE = 64, +}; + +typedef struct { + Coroutine base; + void *stack; + jmp_buf env; +} CoroutineUContext; + +/** + * Per-thread coroutine bookkeeping + */ +typedef struct { + /** Currently executing coroutine */ + Coroutine *current; + + /** Free list to speed up creation */ + QLIST_HEAD(, Coroutine) pool; + unsigned int pool_size; + + /** The default coroutine */ + CoroutineUContext leader; +} CoroutineThreadState; + +static pthread_key_t thread_state_key; + +/* + * va_args to makecontext() must be type 'int', so passing + * the pointer we need may require several int args. This + * union is a quick hack to let us do that + */ +union cc_arg { + void *p; + int i[2]; +}; + +static CoroutineThreadState *coroutine_get_thread_state(void) +{ + CoroutineThreadState *s = pthread_getspecific(thread_state_key); + + if (!s) { + s = qemu_mallocz(sizeof(*s)); + s->current = &s->leader.base; + QLIST_INIT(&s->pool); + pthread_setspecific(thread_state_key, s); + } + return s; +} + +static void qemu_coroutine_thread_cleanup(void *opaque) +{ + CoroutineThreadState *s = opaque; + Coroutine *co; + Coroutine *tmp; + + QLIST_FOREACH_SAFE(co, &s->pool, pool_next, tmp) { + qemu_free(DO_UPCAST(CoroutineUContext, base, co)->stack); + qemu_free(co); + } + qemu_free(s); +} + +static void __attribute__((constructor)) coroutine_init(void) +{ + int ret; + + ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup); + if (ret != 0) { + fprintf(stderr, "unable to create leader key: %s\n", strerror(errno)); + abort(); + } +} + +static void coroutine_trampoline(int i0, int i1) +{ + union cc_arg arg; + CoroutineUContext *self; + Coroutine *co; + + arg.i[0] = i0; + arg.i[1] = i1; + self = arg.p; + co = &self->base; + + /* Initialize longjmp environment and switch back the caller */ + if (!setjmp(self->env)) { + longjmp(*(jmp_buf *)co->entry_arg, 1); + } + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +static Coroutine *coroutine_new(void) +{ + const size_t stack_size = 1 << 20; + CoroutineUContext *co; + ucontext_t old_uc, uc; + jmp_buf old_env; + union cc_arg arg; + + /* The ucontext functions preserve signal masks which incurs a system call + * overhead. setjmp()/longjmp() does not preserve signal masks but only + * works on the current stack. Since we need a way to create and switch to + * a new stack, use the ucontext functions for that but setjmp()/longjmp() + * for everything else. + */ + + if (getcontext(&uc) == -1) { + abort(); + } + + co = qemu_mallocz(sizeof(*co)); + co->stack = qemu_malloc(stack_size); + co->base.entry_arg = &old_env; /* stash away our jmp_buf */ + + uc.uc_link = &old_uc; + uc.uc_stack.ss_sp = co->stack; + uc.uc_stack.ss_size = stack_size; + uc.uc_stack.ss_flags = 0; + + arg.p = co; + + makecontext(&uc, (void (*)(void))coroutine_trampoline, + 2, arg.i[0], arg.i[1]); + + /* swapcontext() in, longjmp() back out */ + if (!setjmp(old_env)) { + swapcontext(&old_uc, &uc); + } + return &co->base; +} + +Coroutine *qemu_coroutine_new(void) +{ + CoroutineThreadState *s = coroutine_get_thread_state(); + Coroutine *co; + + co = QLIST_FIRST(&s->pool); + if (co) { + QLIST_REMOVE(co, pool_next); + s->pool_size--; + } else { + co = coroutine_new(); + } + return co; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineThreadState *s = coroutine_get_thread_state(); + CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); + + if (s->pool_size < POOL_MAX_SIZE) { + QLIST_INSERT_HEAD(&s->pool, &co->base, pool_next); + co->base.caller = NULL; + s->pool_size++; + return; + } + + qemu_free(co->stack); + qemu_free(co); +} + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); + CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); + CoroutineThreadState *s = coroutine_get_thread_state(); + int ret; + + s->current = to_; + + ret = setjmp(from->env); + if (ret == 0) { + longjmp(to->env, action); + } + return ret; +} + +Coroutine *qemu_coroutine_self(void) +{ + CoroutineThreadState *s = coroutine_get_thread_state(); + + return s->current; +} + +bool qemu_in_coroutine(void) +{ + CoroutineThreadState *s = pthread_getspecific(thread_state_key); + + return s && s->current->caller; +} diff --git a/coroutine-win32.c b/coroutine-win32.c new file mode 100644 index 0000000000..0e29448473 --- /dev/null +++ b/coroutine-win32.c @@ -0,0 +1,92 @@ +/* + * Win32 coroutine initialization code + * + * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "qemu-coroutine-int.h" + +typedef struct +{ + Coroutine base; + + LPVOID fiber; + CoroutineAction action; +} CoroutineWin32; + +static __thread CoroutineWin32 leader; +static __thread Coroutine *current; + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); + CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); + + current = to_; + + to->action = action; + SwitchToFiber(to->fiber); + return from->action; +} + +static void CALLBACK coroutine_trampoline(void *co_) +{ + Coroutine *co = co_; + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +Coroutine *qemu_coroutine_new(void) +{ + const size_t stack_size = 1 << 20; + CoroutineWin32 *co; + + co = qemu_mallocz(sizeof(*co)); + co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base); + return &co->base; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_); + + DeleteFiber(co->fiber); + qemu_free(co); +} + +Coroutine *qemu_coroutine_self(void) +{ + if (!current) { + current = &leader.base; + leader.fiber = ConvertThreadToFiber(NULL); + } + return current; +} + +bool qemu_in_coroutine(void) +{ + return current && current->caller; +} diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c index 8b1a412210..0b0344c1fd 100644 --- a/hw/scsi-bus.c +++ b/hw/scsi-bus.c @@ -223,7 +223,7 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd) switch(cmd[0]) { case TEST_UNIT_READY: - case REZERO_UNIT: + case REWIND: case START_STOP: case SEEK_6: case WRITE_FILEMARKS: @@ -232,24 +232,24 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd) case RELEASE: case ERASE: case ALLOW_MEDIUM_REMOVAL: - case VERIFY: + case VERIFY_10: case SEEK_10: case SYNCHRONIZE_CACHE: case LOCK_UNLOCK_CACHE: case LOAD_UNLOAD: case SET_CD_SPEED: case SET_LIMITS: - case WRITE_LONG: + case WRITE_LONG_10: case MOVE_MEDIUM: case UPDATE_BLOCK: req->cmd.xfer = 0; break; case MODE_SENSE: break; - case WRITE_SAME: + case WRITE_SAME_10: req->cmd.xfer = 1; break; - case READ_CAPACITY: + case READ_CAPACITY_10: req->cmd.xfer = 8; break; case READ_BLOCK_LIMITS: @@ -265,7 +265,7 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd) req->cmd.xfer *= 8; break; case WRITE_10: - case WRITE_VERIFY: + case WRITE_VERIFY_10: case WRITE_6: case WRITE_12: case WRITE_VERIFY_12: @@ -325,7 +325,7 @@ static void scsi_req_xfer_mode(SCSIRequest *req) switch (req->cmd.buf[0]) { case WRITE_6: case WRITE_10: - case WRITE_VERIFY: + case WRITE_VERIFY_10: case WRITE_12: case WRITE_VERIFY_12: case WRITE_16: @@ -345,15 +345,13 @@ static void scsi_req_xfer_mode(SCSIRequest *req) case SEARCH_HIGH: case SEARCH_LOW: case UPDATE_BLOCK: - case WRITE_LONG: - case WRITE_SAME: + case WRITE_LONG_10: + case WRITE_SAME_10: case SEARCH_HIGH_12: case SEARCH_EQUAL_12: case SEARCH_LOW_12: - case SET_WINDOW: case MEDIUM_SCAN: case SEND_VOLUME_TAG: - case WRITE_LONG_2: case PERSISTENT_RESERVE_OUT: case MAINTENANCE_OUT: req->cmd.mode = SCSI_XFER_TO_DEV; @@ -517,8 +515,7 @@ static const char *scsi_command_name(uint8_t cmd) { static const char *names[] = { [ TEST_UNIT_READY ] = "TEST_UNIT_READY", - [ REZERO_UNIT ] = "REZERO_UNIT", - /* REWIND and REZERO_UNIT use the same operation code */ + [ REWIND ] = "REWIND", [ REQUEST_SENSE ] = "REQUEST_SENSE", [ FORMAT_UNIT ] = "FORMAT_UNIT", [ READ_BLOCK_LIMITS ] = "READ_BLOCK_LIMITS", @@ -543,14 +540,12 @@ static const char *scsi_command_name(uint8_t cmd) [ RECEIVE_DIAGNOSTIC ] = "RECEIVE_DIAGNOSTIC", [ SEND_DIAGNOSTIC ] = "SEND_DIAGNOSTIC", [ ALLOW_MEDIUM_REMOVAL ] = "ALLOW_MEDIUM_REMOVAL", - - [ SET_WINDOW ] = "SET_WINDOW", - [ READ_CAPACITY ] = "READ_CAPACITY", + [ READ_CAPACITY_10 ] = "READ_CAPACITY_10", [ READ_10 ] = "READ_10", [ WRITE_10 ] = "WRITE_10", [ SEEK_10 ] = "SEEK_10", - [ WRITE_VERIFY ] = "WRITE_VERIFY", - [ VERIFY ] = "VERIFY", + [ WRITE_VERIFY_10 ] = "WRITE_VERIFY_10", + [ VERIFY_10 ] = "VERIFY_10", [ SEARCH_HIGH ] = "SEARCH_HIGH", [ SEARCH_EQUAL ] = "SEARCH_EQUAL", [ SEARCH_LOW ] = "SEARCH_LOW", @@ -566,11 +561,14 @@ static const char *scsi_command_name(uint8_t cmd) [ WRITE_BUFFER ] = "WRITE_BUFFER", [ READ_BUFFER ] = "READ_BUFFER", [ UPDATE_BLOCK ] = "UPDATE_BLOCK", - [ READ_LONG ] = "READ_LONG", - [ WRITE_LONG ] = "WRITE_LONG", + [ READ_LONG_10 ] = "READ_LONG_10", + [ WRITE_LONG_10 ] = "WRITE_LONG_10", [ CHANGE_DEFINITION ] = "CHANGE_DEFINITION", - [ WRITE_SAME ] = "WRITE_SAME", + [ WRITE_SAME_10 ] = "WRITE_SAME_10", + [ UNMAP ] = "UNMAP", [ READ_TOC ] = "READ_TOC", + [ REPORT_DENSITY_SUPPORT ] = "REPORT_DENSITY_SUPPORT", + [ GET_CONFIGURATION ] = "GET_CONFIGURATION", [ LOG_SELECT ] = "LOG_SELECT", [ LOG_SENSE ] = "LOG_SENSE", [ MODE_SELECT_10 ] = "MODE_SELECT_10", @@ -579,27 +577,39 @@ static const char *scsi_command_name(uint8_t cmd) [ MODE_SENSE_10 ] = "MODE_SENSE_10", [ PERSISTENT_RESERVE_IN ] = "PERSISTENT_RESERVE_IN", [ PERSISTENT_RESERVE_OUT ] = "PERSISTENT_RESERVE_OUT", + [ WRITE_FILEMARKS_16 ] = "WRITE_FILEMARKS_16", + [ EXTENDED_COPY ] = "EXTENDED_COPY", + [ ATA_PASSTHROUGH ] = "ATA_PASSTHROUGH", + [ ACCESS_CONTROL_IN ] = "ACCESS_CONTROL_IN", + [ ACCESS_CONTROL_OUT ] = "ACCESS_CONTROL_OUT", + [ READ_16 ] = "READ_16", + [ COMPARE_AND_WRITE ] = "COMPARE_AND_WRITE", + [ WRITE_16 ] = "WRITE_16", + [ WRITE_VERIFY_16 ] = "WRITE_VERIFY_16", + [ VERIFY_16 ] = "VERIFY_16", + [ SYNCHRONIZE_CACHE_16 ] = "SYNCHRONIZE_CACHE_16", + [ LOCATE_16 ] = "LOCATE_16", + [ WRITE_SAME_16 ] = "WRITE_SAME_16", + [ ERASE_16 ] = "ERASE_16", + [ SERVICE_ACTION_IN ] = "SERVICE_ACTION_IN", + [ WRITE_LONG_16 ] = "WRITE_LONG_16", + [ REPORT_LUNS ] = "REPORT_LUNS", + [ BLANK ] = "BLANK", + [ MAINTENANCE_IN ] = "MAINTENANCE_IN", + [ MAINTENANCE_OUT ] = "MAINTENANCE_OUT", [ MOVE_MEDIUM ] = "MOVE_MEDIUM", + [ LOAD_UNLOAD ] = "LOAD_UNLOAD", [ READ_12 ] = "READ_12", [ WRITE_12 ] = "WRITE_12", [ WRITE_VERIFY_12 ] = "WRITE_VERIFY_12", + [ VERIFY_12 ] = "VERIFY_12", [ SEARCH_HIGH_12 ] = "SEARCH_HIGH_12", [ SEARCH_EQUAL_12 ] = "SEARCH_EQUAL_12", [ SEARCH_LOW_12 ] = "SEARCH_LOW_12", [ READ_ELEMENT_STATUS ] = "READ_ELEMENT_STATUS", [ SEND_VOLUME_TAG ] = "SEND_VOLUME_TAG", - [ WRITE_LONG_2 ] = "WRITE_LONG_2", - - [ REPORT_DENSITY_SUPPORT ] = "REPORT_DENSITY_SUPPORT", - [ GET_CONFIGURATION ] = "GET_CONFIGURATION", - [ READ_16 ] = "READ_16", - [ WRITE_16 ] = "WRITE_16", - [ WRITE_VERIFY_16 ] = "WRITE_VERIFY_16", - [ SERVICE_ACTION_IN ] = "SERVICE_ACTION_IN", - [ REPORT_LUNS ] = "REPORT_LUNS", - [ LOAD_UNLOAD ] = "LOAD_UNLOAD", + [ READ_DEFECT_DATA_12 ] = "READ_DEFECT_DATA_12", [ SET_CD_SPEED ] = "SET_CD_SPEED", - [ BLANK ] = "BLANK", }; if (cmd >= ARRAY_SIZE(names) || names[cmd] == NULL) diff --git a/hw/scsi-defs.h b/hw/scsi-defs.h index 413cce07b5..27010b74c0 100644 --- a/hw/scsi-defs.h +++ b/hw/scsi-defs.h @@ -25,7 +25,7 @@ */ #define TEST_UNIT_READY 0x00 -#define REZERO_UNIT 0x01 +#define REWIND 0x01 #define REQUEST_SENSE 0x03 #define FORMAT_UNIT 0x04 #define READ_BLOCK_LIMITS 0x05 @@ -48,14 +48,13 @@ #define RECEIVE_DIAGNOSTIC 0x1c #define SEND_DIAGNOSTIC 0x1d #define ALLOW_MEDIUM_REMOVAL 0x1e - -#define SET_WINDOW 0x24 -#define READ_CAPACITY 0x25 +#define READ_CAPACITY_10 0x25 #define READ_10 0x28 #define WRITE_10 0x2a #define SEEK_10 0x2b -#define WRITE_VERIFY 0x2e -#define VERIFY 0x2f +#define LOCATE_10 0x2b +#define WRITE_VERIFY_10 0x2e +#define VERIFY_10 0x2f #define SEARCH_HIGH 0x30 #define SEARCH_EQUAL 0x31 #define SEARCH_LOW 0x32 @@ -71,11 +70,14 @@ #define WRITE_BUFFER 0x3b #define READ_BUFFER 0x3c #define UPDATE_BLOCK 0x3d -#define READ_LONG 0x3e -#define WRITE_LONG 0x3f +#define READ_LONG_10 0x3e +#define WRITE_LONG_10 0x3f #define CHANGE_DEFINITION 0x40 -#define WRITE_SAME 0x41 +#define WRITE_SAME_10 0x41 +#define UNMAP 0x42 #define READ_TOC 0x43 +#define REPORT_DENSITY_SUPPORT 0x44 +#define GET_CONFIGURATION 0x46 #define LOG_SELECT 0x4c #define LOG_SENSE 0x4d #define MODE_SELECT_10 0x55 @@ -84,32 +86,40 @@ #define MODE_SENSE_10 0x5a #define PERSISTENT_RESERVE_IN 0x5e #define PERSISTENT_RESERVE_OUT 0x5f +#define VARLENGTH_CDB 0x7f +#define WRITE_FILEMARKS_16 0x80 +#define EXTENDED_COPY 0x83 +#define ATA_PASSTHROUGH 0x85 +#define ACCESS_CONTROL_IN 0x86 +#define ACCESS_CONTROL_OUT 0x87 +#define READ_16 0x88 +#define COMPARE_AND_WRITE 0x89 +#define WRITE_16 0x8a +#define WRITE_VERIFY_16 0x8e +#define VERIFY_16 0x8f +#define SYNCHRONIZE_CACHE_16 0x91 +#define LOCATE_16 0x92 #define WRITE_SAME_16 0x93 +#define ERASE_16 0x93 +#define SERVICE_ACTION_IN 0x9e +#define WRITE_LONG_16 0x9f +#define REPORT_LUNS 0xa0 +#define BLANK 0xa1 #define MAINTENANCE_IN 0xa3 #define MAINTENANCE_OUT 0xa4 #define MOVE_MEDIUM 0xa5 +#define LOAD_UNLOAD 0xa6 #define READ_12 0xa8 #define WRITE_12 0xaa #define WRITE_VERIFY_12 0xae +#define VERIFY_12 0xaf #define SEARCH_HIGH_12 0xb0 #define SEARCH_EQUAL_12 0xb1 #define SEARCH_LOW_12 0xb2 #define READ_ELEMENT_STATUS 0xb8 #define SEND_VOLUME_TAG 0xb6 -#define WRITE_LONG_2 0xea - -/* from hw/scsi-generic.c */ -#define REWIND 0x01 -#define REPORT_DENSITY_SUPPORT 0x44 -#define GET_CONFIGURATION 0x46 -#define READ_16 0x88 -#define WRITE_16 0x8a -#define WRITE_VERIFY_16 0x8e -#define SERVICE_ACTION_IN 0x9e -#define REPORT_LUNS 0xa0 -#define LOAD_UNLOAD 0xa6 -#define SET_CD_SPEED 0xbb -#define BLANK 0xa1 +#define READ_DEFECT_DATA_12 0xb7 +#define SET_CD_SPEED 0xbb /* * SAM Status codes @@ -154,6 +164,7 @@ #define TYPE_DISK 0x00 #define TYPE_TAPE 0x01 +#define TYPE_PRINTER 0x02 #define TYPE_PROCESSOR 0x03 /* HP scanners use this */ #define TYPE_WORM 0x04 /* Treated as ROM by our system */ #define TYPE_ROM 0x05 @@ -161,6 +172,9 @@ #define TYPE_MOD 0x07 /* Magneto-optical disk - * - treated as TYPE_DISK */ #define TYPE_MEDIUM_CHANGER 0x08 -#define TYPE_ENCLOSURE 0x0d /* Enclosure Services Device */ +#define TYPE_STORAGE_ARRAY 0x0c /* Storage array device */ +#define TYPE_ENCLOSURE 0x0d /* Enclosure Services Device */ +#define TYPE_RBC 0x0e /* Simplified Direct-Access Device */ +#define TYPE_OSD 0x11 /* Object-storage Device */ #define TYPE_NO_LUN 0x7f diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index f42a5d1f85..fa198f928c 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -59,8 +59,6 @@ typedef struct SCSIDiskReq { uint32_t status; } SCSIDiskReq; -typedef enum { SCSI_HD, SCSI_CD } SCSIDriveKind; - struct SCSIDiskState { SCSIDevice qdev; @@ -74,7 +72,6 @@ struct SCSIDiskState char *version; char *serial; SCSISense sense; - SCSIDriveKind drive_kind; }; static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type); @@ -382,7 +379,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) return -1; } - if (s->drive_kind == SCSI_CD) { + if (s->qdev.type == TYPE_ROM) { outbuf[buflen++] = 5; } else { outbuf[buflen++] = 0; @@ -401,7 +398,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) if (s->serial) outbuf[buflen++] = 0x80; // unit serial number outbuf[buflen++] = 0x83; // device identification - if (s->drive_kind == SCSI_HD) { + if (s->qdev.type == TYPE_DISK) { outbuf[buflen++] = 0xb0; // block limits outbuf[buflen++] = 0xb2; // thin provisioning } @@ -460,7 +457,7 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) unsigned int opt_io_size = s->qdev.conf.opt_io_size / s->qdev.blocksize; - if (s->drive_kind == SCSI_CD) { + if (s->qdev.type == TYPE_ROM) { DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n", page_code); return -1; @@ -526,16 +523,15 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) memset(outbuf, 0, buflen); if (req->lun) { - outbuf[0] = 0x7f; /* LUN not supported */ + outbuf[0] = 0x7f; /* LUN not supported */ return buflen; } - if (s->drive_kind == SCSI_CD) { - outbuf[0] = 5; + outbuf[0] = s->qdev.type & 0x1f; + if (s->qdev.type == TYPE_ROM) { outbuf[1] = 0x80; memcpy(&outbuf[16], "QEMU CD-ROM ", 16); } else { - outbuf[0] = 0; outbuf[1] = s->removable ? 0x80 : 0; memcpy(&outbuf[16], "QEMU HARDDISK ", 16); } @@ -661,7 +657,7 @@ static int mode_sense_page(SCSIRequest *req, int page, uint8_t *p, return p[1] + 2; case 0x2a: /* CD Capabilities and Mechanical Status page. */ - if (s->drive_kind != SCSI_CD) + if (s->qdev.type != TYPE_ROM) return 0; p[0] = 0x2a; p[1] = 0x14; @@ -836,7 +832,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf) case TEST_UNIT_READY: if (!bdrv_is_inserted(s->bs)) goto not_ready; - break; + break; case REQUEST_SENSE: if (req->cmd.xfer < 4) goto illegal_request; @@ -848,7 +844,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf) buflen = scsi_disk_emulate_inquiry(req, outbuf); if (buflen < 0) goto illegal_request; - break; + break; case MODE_SENSE: case MODE_SENSE_10: buflen = scsi_disk_emulate_mode_sense(req, outbuf); @@ -877,18 +873,18 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf) goto illegal_request; break; case START_STOP: - if (s->drive_kind == SCSI_CD && (req->cmd.buf[4] & 2)) { + if (s->qdev.type == TYPE_ROM && (req->cmd.buf[4] & 2)) { /* load/eject medium */ bdrv_eject(s->bs, !(req->cmd.buf[4] & 1)); } - break; + break; case ALLOW_MEDIUM_REMOVAL: bdrv_set_locked(s->bs, req->cmd.buf[4] & 1); - break; - case READ_CAPACITY: + break; + case READ_CAPACITY_10: /* The normal LEN field for this command is zero. */ - memset(outbuf, 0, 8); - bdrv_get_geometry(s->bs, &nb_sectors); + memset(outbuf, 0, 8); + bdrv_get_geometry(s->bs, &nb_sectors); if (!nb_sectors) goto not_ready; nb_sectors /= s->cluster_size; @@ -908,7 +904,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf) outbuf[6] = s->cluster_size * 2; outbuf[7] = 0; buflen = 8; - break; + break; case SYNCHRONIZE_CACHE: ret = bdrv_flush(s->bs); if (ret < 0) { @@ -970,13 +966,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf) outbuf[3] = 8; buflen = 16; break; - case VERIFY: - break; - case REZERO_UNIT: - DPRINTF("Rezero Unit\n"); - if (!bdrv_is_inserted(s->bs)) { - goto not_ready; - } + case VERIFY_10: break; default: scsi_command_complete(r, CHECK_CONDITION, SENSE_CODE(INVALID_OPCODE)); @@ -1052,14 +1042,13 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf) case RELEASE_10: case START_STOP: case ALLOW_MEDIUM_REMOVAL: - case READ_CAPACITY: + case READ_CAPACITY_10: case SYNCHRONIZE_CACHE: case READ_TOC: case GET_CONFIGURATION: case SERVICE_ACTION_IN: case REPORT_LUNS: - case VERIFY: - case REZERO_UNIT: + case VERIFY_10: rc = scsi_disk_emulate_command(r, outbuf); if (rc < 0) { return 0; @@ -1082,7 +1071,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf) case WRITE_10: case WRITE_12: case WRITE_16: - case WRITE_VERIFY: + case WRITE_VERIFY_10: case WRITE_VERIFY_12: case WRITE_VERIFY_16: len = r->req.cmd.xfer / s->qdev.blocksize; @@ -1190,7 +1179,7 @@ static void scsi_destroy(SCSIDevice *dev) blockdev_mark_auto_del(s->qdev.conf.bs); } -static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind) +static int scsi_initfn(SCSIDevice *dev, uint8_t scsi_type) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); DriveInfo *dinfo; @@ -1200,9 +1189,8 @@ static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind) return -1; } s->bs = s->qdev.conf.bs; - s->drive_kind = kind; - if (kind == SCSI_HD && !bdrv_is_inserted(s->bs)) { + if (scsi_type == TYPE_DISK && !bdrv_is_inserted(s->bs)) { error_report("Device needs media, but drive is empty"); return -1; } @@ -1224,44 +1212,47 @@ static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind) return -1; } - if (kind == SCSI_CD) { + if (scsi_type == TYPE_ROM) { s->qdev.blocksize = 2048; - } else { + } else if (scsi_type == TYPE_DISK) { s->qdev.blocksize = s->qdev.conf.logical_block_size; + } else { + error_report("scsi-disk: Unhandled SCSI type %02x", scsi_type); + return -1; } s->cluster_size = s->qdev.blocksize / 512; s->bs->buffer_alignment = s->qdev.blocksize; - s->qdev.type = TYPE_DISK; + s->qdev.type = scsi_type; qemu_add_vm_change_state_handler(scsi_dma_restart_cb, s); - bdrv_set_removable(s->bs, kind == SCSI_CD); + bdrv_set_removable(s->bs, scsi_type == TYPE_ROM); add_boot_device_path(s->qdev.conf.bootindex, &dev->qdev, ",0"); return 0; } static int scsi_hd_initfn(SCSIDevice *dev) { - return scsi_initfn(dev, SCSI_HD); + return scsi_initfn(dev, TYPE_DISK); } static int scsi_cd_initfn(SCSIDevice *dev) { - return scsi_initfn(dev, SCSI_CD); + return scsi_initfn(dev, TYPE_ROM); } static int scsi_disk_initfn(SCSIDevice *dev) { - SCSIDriveKind kind; DriveInfo *dinfo; + uint8_t scsi_type; if (!dev->conf.bs) { - kind = SCSI_HD; /* will die in scsi_initfn() */ + scsi_type = TYPE_DISK; /* will die in scsi_initfn() */ } else { dinfo = drive_get_by_blockdev(dev->conf.bs); - kind = dinfo->media_cd ? SCSI_CD : SCSI_HD; + scsi_type = dinfo->media_cd ? TYPE_ROM : TYPE_DISK; } - return scsi_initfn(dev, kind); + return scsi_initfn(dev, scsi_type); } #define DEFINE_SCSI_DISK_PROPERTIES() \ diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c index 63361b3542..7b0026eb98 100644 --- a/hw/scsi-generic.c +++ b/hw/scsi-generic.c @@ -406,7 +406,7 @@ static int get_blocksize(BlockDriverState *bdrv) memset(cmd, 0, sizeof(cmd)); memset(buf, 0, sizeof(buf)); - cmd[0] = READ_CAPACITY; + cmd[0] = READ_CAPACITY_10; memset(&io_header, 0, sizeof(io_header)); io_header.interface_id = 'S'; diff --git a/linux-aio.c b/linux-aio.c index 68f4b3d757..dc3faf2499 100644 --- a/linux-aio.c +++ b/linux-aio.c @@ -31,7 +31,6 @@ struct qemu_laiocb { struct iocb iocb; ssize_t ret; size_t nbytes; - int async_context_id; QLIST_ENTRY(qemu_laiocb) node; }; @@ -39,7 +38,6 @@ struct qemu_laio_state { io_context_t ctx; int efd; int count; - QLIST_HEAD(, qemu_laiocb) completed_reqs; }; static inline ssize_t io_event_ret(struct io_event *ev) @@ -49,7 +47,6 @@ static inline ssize_t io_event_ret(struct io_event *ev) /* * Completes an AIO request (calls the callback and frees the ACB). - * Be sure to be in the right AsyncContext before calling this function. */ static void qemu_laio_process_completion(struct qemu_laio_state *s, struct qemu_laiocb *laiocb) @@ -72,42 +69,12 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, } /* - * Processes all queued AIO requests, i.e. requests that have return from OS - * but their callback was not called yet. Requests that cannot have their - * callback called in the current AsyncContext, remain in the queue. - * - * Returns 1 if at least one request could be completed, 0 otherwise. + * All requests are directly processed when they complete, so there's nothing + * left to do during qemu_aio_wait(). */ static int qemu_laio_process_requests(void *opaque) { - struct qemu_laio_state *s = opaque; - struct qemu_laiocb *laiocb, *next; - int res = 0; - - QLIST_FOREACH_SAFE (laiocb, &s->completed_reqs, node, next) { - if (laiocb->async_context_id == get_async_context_id()) { - qemu_laio_process_completion(s, laiocb); - QLIST_REMOVE(laiocb, node); - res = 1; - } - } - - return res; -} - -/* - * Puts a request in the completion queue so that its callback is called the - * next time when it's possible. If we already are in the right AsyncContext, - * the request is completed immediately instead. - */ -static void qemu_laio_enqueue_completed(struct qemu_laio_state *s, - struct qemu_laiocb* laiocb) -{ - if (laiocb->async_context_id == get_async_context_id()) { - qemu_laio_process_completion(s, laiocb); - } else { - QLIST_INSERT_HEAD(&s->completed_reqs, laiocb, node); - } + return 0; } static void qemu_laio_completion_cb(void *opaque) @@ -141,7 +108,7 @@ static void qemu_laio_completion_cb(void *opaque) container_of(iocb, struct qemu_laiocb, iocb); laiocb->ret = io_event_ret(&events[i]); - qemu_laio_enqueue_completed(s, laiocb); + qemu_laio_process_completion(s, laiocb); } } } @@ -204,7 +171,6 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, laiocb->nbytes = nb_sectors * 512; laiocb->ctx = s; laiocb->ret = -EINPROGRESS; - laiocb->async_context_id = get_async_context_id(); iocbs = &laiocb->iocb; @@ -239,7 +205,6 @@ void *laio_init(void) struct qemu_laio_state *s; s = qemu_mallocz(sizeof(*s)); - QLIST_INIT(&s->completed_reqs); s->efd = eventfd(0, 0); if (s->efd == -1) goto out_free_state; diff --git a/posix-aio-compat.c b/posix-aio-compat.c index c4116e30f2..8dc00cbb0f 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -49,8 +49,6 @@ struct qemu_paiocb { ssize_t ret; int active; struct qemu_paiocb *next; - - int async_context_id; }; typedef struct PosixAioState { @@ -200,6 +198,12 @@ static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb) return len; } +/* + * Read/writes the data to/from a given linear buffer. + * + * Returns the number of bytes handles or -errno in case of an error. Short + * reads are only returned if the end of the file is reached. + */ static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf) { ssize_t offset = 0; @@ -336,6 +340,19 @@ static void *aio_thread(void *unused) switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { case QEMU_AIO_READ: + ret = handle_aiocb_rw(aiocb); + if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) { + /* A short read means that we have reached EOF. Pad the buffer + * with zeros for bytes after EOF. */ + QEMUIOVector qiov; + + qemu_iovec_init_external(&qiov, aiocb->aio_iov, + aiocb->aio_niov); + qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - ret, ret); + + ret = aiocb->aio_nbytes; + } + break; case QEMU_AIO_WRITE: ret = handle_aiocb_rw(aiocb); break; @@ -420,7 +437,6 @@ static int posix_aio_process_queue(void *opaque) struct qemu_paiocb *acb, **pacb; int ret; int result = 0; - int async_context_id = get_async_context_id(); for(;;) { pacb = &s->first_aio; @@ -429,12 +445,6 @@ static int posix_aio_process_queue(void *opaque) if (!acb) return result; - /* we're only interested in requests in the right context */ - if (acb->async_context_id != async_context_id) { - pacb = &acb->next; - continue; - } - ret = qemu_paio_error(acb); if (ret == ECANCELED) { /* remove the request */ @@ -575,7 +585,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, acb->aio_type = type; acb->aio_fildes = fd; acb->ev_signo = SIGUSR2; - acb->async_context_id = get_async_context_id(); if (qiov) { acb->aio_iov = qiov->iov; @@ -604,7 +613,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, acb->aio_type = QEMU_AIO_IOCTL; acb->aio_fildes = fd; acb->ev_signo = SIGUSR2; - acb->async_context_id = get_async_context_id(); acb->aio_offset = 0; acb->aio_ioctl_buf = buf; acb->aio_ioctl_cmd = req; diff --git a/qemu-common.h b/qemu-common.h index 1e3c66511e..8f21a8cb29 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -115,10 +115,6 @@ int qemu_main(int argc, char **argv, char **envp); /* bottom halves */ typedef void QEMUBHFunc(void *opaque); -void async_context_push(void); -void async_context_pop(void); -int get_async_context_id(void); - QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); void qemu_bh_schedule(QEMUBH *bh); /* Bottom halfs that are scheduled from a bottom half handler are instantly diff --git a/qemu-coroutine-int.h b/qemu-coroutine-int.h new file mode 100644 index 0000000000..d495615cf6 --- /dev/null +++ b/qemu-coroutine-int.h @@ -0,0 +1,49 @@ +/* + * Coroutine internals + * + * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_COROUTINE_INT_H +#define QEMU_COROUTINE_INT_H + +#include "qemu-queue.h" +#include "qemu-coroutine.h" + +typedef enum { + COROUTINE_YIELD = 1, + COROUTINE_TERMINATE = 2, +} CoroutineAction; + +struct Coroutine { + CoroutineEntry *entry; + void *entry_arg; + Coroutine *caller; + QLIST_ENTRY(Coroutine) pool_next; + QTAILQ_ENTRY(Coroutine) co_queue_next; +}; + +Coroutine *qemu_coroutine_new(void); +void qemu_coroutine_delete(Coroutine *co); +CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to, + CoroutineAction action); + +#endif diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c new file mode 100644 index 0000000000..a80f437c59 --- /dev/null +++ b/qemu-coroutine-lock.c @@ -0,0 +1,117 @@ +/* + * coroutine queues and locks + * + * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "qemu-coroutine.h" +#include "qemu-coroutine-int.h" +#include "qemu-queue.h" +#include "trace.h" + +static QTAILQ_HEAD(, Coroutine) unlock_bh_queue = + QTAILQ_HEAD_INITIALIZER(unlock_bh_queue); +static QEMUBH* unlock_bh; + +static void qemu_co_queue_next_bh(void *opaque) +{ + Coroutine *next; + + trace_qemu_co_queue_next_bh(); + while ((next = QTAILQ_FIRST(&unlock_bh_queue))) { + QTAILQ_REMOVE(&unlock_bh_queue, next, co_queue_next); + qemu_coroutine_enter(next, NULL); + } +} + +void qemu_co_queue_init(CoQueue *queue) +{ + QTAILQ_INIT(&queue->entries); + + if (!unlock_bh) { + unlock_bh = qemu_bh_new(qemu_co_queue_next_bh, NULL); + } +} + +void coroutine_fn qemu_co_queue_wait(CoQueue *queue) +{ + Coroutine *self = qemu_coroutine_self(); + QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next); + qemu_coroutine_yield(); + assert(qemu_in_coroutine()); +} + +bool qemu_co_queue_next(CoQueue *queue) +{ + Coroutine *next; + + next = QTAILQ_FIRST(&queue->entries); + if (next) { + QTAILQ_REMOVE(&queue->entries, next, co_queue_next); + QTAILQ_INSERT_TAIL(&unlock_bh_queue, next, co_queue_next); + trace_qemu_co_queue_next(next); + qemu_bh_schedule(unlock_bh); + } + + return (next != NULL); +} + +bool qemu_co_queue_empty(CoQueue *queue) +{ + return (QTAILQ_FIRST(&queue->entries) == NULL); +} + +void qemu_co_mutex_init(CoMutex *mutex) +{ + memset(mutex, 0, sizeof(*mutex)); + qemu_co_queue_init(&mutex->queue); +} + +void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex) +{ + Coroutine *self = qemu_coroutine_self(); + + trace_qemu_co_mutex_lock_entry(mutex, self); + + while (mutex->locked) { + qemu_co_queue_wait(&mutex->queue); + } + + mutex->locked = true; + + trace_qemu_co_mutex_lock_return(mutex, self); +} + +void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) +{ + Coroutine *self = qemu_coroutine_self(); + + trace_qemu_co_mutex_unlock_entry(mutex, self); + + assert(mutex->locked == true); + assert(qemu_in_coroutine()); + + mutex->locked = false; + qemu_co_queue_next(&mutex->queue); + + trace_qemu_co_mutex_unlock_return(mutex, self); +} diff --git a/qemu-coroutine.c b/qemu-coroutine.c new file mode 100644 index 0000000000..600be2643c --- /dev/null +++ b/qemu-coroutine.c @@ -0,0 +1,75 @@ +/* + * QEMU coroutines + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * Kevin Wolf <kwolf@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "qemu-common.h" +#include "qemu-coroutine.h" +#include "qemu-coroutine-int.h" + +Coroutine *qemu_coroutine_create(CoroutineEntry *entry) +{ + Coroutine *co = qemu_coroutine_new(); + co->entry = entry; + return co; +} + +static void coroutine_swap(Coroutine *from, Coroutine *to) +{ + CoroutineAction ret; + + ret = qemu_coroutine_switch(from, to, COROUTINE_YIELD); + + switch (ret) { + case COROUTINE_YIELD: + return; + case COROUTINE_TERMINATE: + trace_qemu_coroutine_terminate(to); + qemu_coroutine_delete(to); + return; + default: + abort(); + } +} + +void qemu_coroutine_enter(Coroutine *co, void *opaque) +{ + Coroutine *self = qemu_coroutine_self(); + + trace_qemu_coroutine_enter(self, co, opaque); + + if (co->caller) { + fprintf(stderr, "Co-routine re-entered recursively\n"); + abort(); + } + + co->caller = self; + co->entry_arg = opaque; + coroutine_swap(self, co); +} + +void coroutine_fn qemu_coroutine_yield(void) +{ + Coroutine *self = qemu_coroutine_self(); + Coroutine *to = self->caller; + + trace_qemu_coroutine_yield(self, to); + + if (!to) { + fprintf(stderr, "Co-routine is yielding to no one\n"); + abort(); + } + + self->caller = NULL; + coroutine_swap(self, to); +} diff --git a/qemu-coroutine.h b/qemu-coroutine.h new file mode 100644 index 0000000000..2f2fd95552 --- /dev/null +++ b/qemu-coroutine.h @@ -0,0 +1,159 @@ +/* + * QEMU coroutine implementation + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * Kevin Wolf <kwolf@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QEMU_COROUTINE_H +#define QEMU_COROUTINE_H + +#include <stdbool.h> +#include "qemu-queue.h" + +/** + * Coroutines are a mechanism for stack switching and can be used for + * cooperative userspace threading. These functions provide a simple but + * useful flavor of coroutines that is suitable for writing sequential code, + * rather than callbacks, for operations that need to give up control while + * waiting for events to complete. + * + * These functions are re-entrant and may be used outside the global mutex. + */ + +/** + * Mark a function that executes in coroutine context + * + * Functions that execute in coroutine context cannot be called directly from + * normal functions. In the future it would be nice to enable compiler or + * static checker support for catching such errors. This annotation might make + * it possible and in the meantime it serves as documentation. + * + * For example: + * + * static void coroutine_fn foo(void) { + * .... + * } + */ +#define coroutine_fn + +typedef struct Coroutine Coroutine; + +/** + * Coroutine entry point + * + * When the coroutine is entered for the first time, opaque is passed in as an + * argument. + * + * When this function returns, the coroutine is destroyed automatically and + * execution continues in the caller who last entered the coroutine. + */ +typedef void coroutine_fn CoroutineEntry(void *opaque); + +/** + * Create a new coroutine + * + * Use qemu_coroutine_enter() to actually transfer control to the coroutine. + */ +Coroutine *qemu_coroutine_create(CoroutineEntry *entry); + +/** + * Transfer control to a coroutine + * + * The opaque argument is passed as the argument to the entry point when + * entering the coroutine for the first time. It is subsequently ignored. + */ +void qemu_coroutine_enter(Coroutine *coroutine, void *opaque); + +/** + * Transfer control back to a coroutine's caller + * + * This function does not return until the coroutine is re-entered using + * qemu_coroutine_enter(). + */ +void coroutine_fn qemu_coroutine_yield(void); + +/** + * Get the currently executing coroutine + */ +Coroutine *coroutine_fn qemu_coroutine_self(void); + +/** + * Return whether or not currently inside a coroutine + * + * This can be used to write functions that work both when in coroutine context + * and when not in coroutine context. Note that such functions cannot use the + * coroutine_fn annotation since they work outside coroutine context. + */ +bool qemu_in_coroutine(void); + + + +/** + * CoQueues are a mechanism to queue coroutines in order to continue executing + * them later. They provide the fundamental primitives on which coroutine locks + * are built. + */ +typedef struct CoQueue { + QTAILQ_HEAD(, Coroutine) entries; +} CoQueue; + +/** + * Initialise a CoQueue. This must be called before any other operation is used + * on the CoQueue. + */ +void qemu_co_queue_init(CoQueue *queue); + +/** + * Adds the current coroutine to the CoQueue and transfers control to the + * caller of the coroutine. + */ +void coroutine_fn qemu_co_queue_wait(CoQueue *queue); + +/** + * Restarts the next coroutine in the CoQueue and removes it from the queue. + * + * Returns true if a coroutine was restarted, false if the queue is empty. + */ +bool qemu_co_queue_next(CoQueue *queue); + +/** + * Checks if the CoQueue is empty. + */ +bool qemu_co_queue_empty(CoQueue *queue); + + +/** + * Provides a mutex that can be used to synchronise coroutines + */ +typedef struct CoMutex { + bool locked; + CoQueue queue; +} CoMutex; + +/** + * Initialises a CoMutex. This must be called before any other operation is used + * on the CoMutex. + */ +void qemu_co_mutex_init(CoMutex *mutex); + +/** + * Locks the mutex. If the lock cannot be taken immediately, control is + * transferred to the caller of the current coroutine. + */ +void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex); + +/** + * Unlocks the mutex and schedules the next coroutine that was waiting for this + * lock to be run. + */ +void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex); + +#endif /* QEMU_COROUTINE_H */ diff --git a/test-coroutine.c b/test-coroutine.c new file mode 100644 index 0000000000..bf9f3e91b5 --- /dev/null +++ b/test-coroutine.c @@ -0,0 +1,192 @@ +/* + * Coroutine tests + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include <glib.h> +#include "qemu-coroutine.h" + +/* + * Check that qemu_in_coroutine() works + */ + +static void coroutine_fn verify_in_coroutine(void *opaque) +{ + g_assert(qemu_in_coroutine()); +} + +static void test_in_coroutine(void) +{ + Coroutine *coroutine; + + g_assert(!qemu_in_coroutine()); + + coroutine = qemu_coroutine_create(verify_in_coroutine); + qemu_coroutine_enter(coroutine, NULL); +} + +/* + * Check that qemu_coroutine_self() works + */ + +static void coroutine_fn verify_self(void *opaque) +{ + g_assert(qemu_coroutine_self() == opaque); +} + +static void test_self(void) +{ + Coroutine *coroutine; + + coroutine = qemu_coroutine_create(verify_self); + qemu_coroutine_enter(coroutine, coroutine); +} + +/* + * Check that coroutines may nest multiple levels + */ + +typedef struct { + unsigned int n_enter; /* num coroutines entered */ + unsigned int n_return; /* num coroutines returned */ + unsigned int max; /* maximum level of nesting */ +} NestData; + +static void coroutine_fn nest(void *opaque) +{ + NestData *nd = opaque; + + nd->n_enter++; + + if (nd->n_enter < nd->max) { + Coroutine *child; + + child = qemu_coroutine_create(nest); + qemu_coroutine_enter(child, nd); + } + + nd->n_return++; +} + +static void test_nesting(void) +{ + Coroutine *root; + NestData nd = { + .n_enter = 0, + .n_return = 0, + .max = 128, + }; + + root = qemu_coroutine_create(nest); + qemu_coroutine_enter(root, &nd); + + /* Must enter and return from max nesting level */ + g_assert_cmpint(nd.n_enter, ==, nd.max); + g_assert_cmpint(nd.n_return, ==, nd.max); +} + +/* + * Check that yield/enter transfer control correctly + */ + +static void coroutine_fn yield_5_times(void *opaque) +{ + bool *done = opaque; + int i; + + for (i = 0; i < 5; i++) { + qemu_coroutine_yield(); + } + *done = true; +} + +static void test_yield(void) +{ + Coroutine *coroutine; + bool done = false; + int i = -1; /* one extra time to return from coroutine */ + + coroutine = qemu_coroutine_create(yield_5_times); + while (!done) { + qemu_coroutine_enter(coroutine, &done); + i++; + } + g_assert_cmpint(i, ==, 5); /* coroutine must yield 5 times */ +} + +/* + * Check that creation, enter, and return work + */ + +static void coroutine_fn set_and_exit(void *opaque) +{ + bool *done = opaque; + + *done = true; +} + +static void test_lifecycle(void) +{ + Coroutine *coroutine; + bool done = false; + + /* Create, enter, and return from coroutine */ + coroutine = qemu_coroutine_create(set_and_exit); + qemu_coroutine_enter(coroutine, &done); + g_assert(done); /* expect done to be true (first time) */ + + /* Repeat to check that no state affects this test */ + done = false; + coroutine = qemu_coroutine_create(set_and_exit); + qemu_coroutine_enter(coroutine, &done); + g_assert(done); /* expect done to be true (second time) */ +} + +/* + * Lifecycle benchmark + */ + +static void coroutine_fn empty_coroutine(void *opaque) +{ + /* Do nothing */ +} + +static void perf_lifecycle(void) +{ + Coroutine *coroutine; + unsigned int i, max; + double duration; + + max = 1000000; + + g_test_timer_start(); + for (i = 0; i < max; i++) { + coroutine = qemu_coroutine_create(empty_coroutine); + qemu_coroutine_enter(coroutine, NULL); + } + duration = g_test_timer_elapsed(); + + g_test_message("Lifecycle %u iterations: %f s\n", max, duration); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/basic/lifecycle", test_lifecycle); + g_test_add_func("/basic/yield", test_yield); + g_test_add_func("/basic/nesting", test_nesting); + g_test_add_func("/basic/self", test_self); + g_test_add_func("/basic/in_coroutine", test_in_coroutine); + if (g_test_perf()) { + g_test_add_func("/perf/lifecycle", perf_lifecycle); + } + return g_test_run(); +} diff --git a/trace-events b/trace-events index 713f042081..19d31e3541 100644 --- a/trace-events +++ b/trace-events @@ -66,6 +66,9 @@ disable bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p" disable bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" disable bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" disable bdrv_set_locked(void *bs, int locked) "bs %p locked %d" +disable bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" +disable bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" +disable bdrv_co_io(int is_write, void *acb) "is_write %d acb %p" # hw/virtio-blk.c disable virtio_blk_req_complete(void *req, int status) "req %p status %d" @@ -425,3 +428,16 @@ disable qemu_put_ram_ptr(void* addr) "%p" # hw/xen_platform.c disable xen_platform_log(char *s) "xen platform: %s" + +# qemu-coroutine.c +disable qemu_coroutine_enter(void *from, void *to, void *opaque) "from %p to %p opaque %p" +disable qemu_coroutine_yield(void *from, void *to) "from %p to %p" +disable qemu_coroutine_terminate(void *co) "self %p" + +# qemu-coroutine-lock.c +disable qemu_co_queue_next_bh(void) "" +disable qemu_co_queue_next(void *next) "next %p" +disable qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p" +disable qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p" +disable qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p" +disable qemu_co_mutex_unlock_return(void *mutex, void *self) "mutex %p self %p" |