aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2012-10-26 11:43:58 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2012-10-31 10:38:13 +0100
commita27365265cc2fed1178bf25a205e8ee02a9c0caf (patch)
treeaa5f89fd139ea99b02c8fac5c488eea6d62a9491 /block
parent10fb6e06825743bd517d4b5bb0e7b9e05e0fe92c (diff)
raw-win32: implement native asynchronous I/O
With the new support for EventNotifiers in the AIO event loop, we can hook a completion port to every opened file and use asynchronous I/O on them. Wine's support is extremely inefficient, also because it really does the I/O synchronously on regular files. (!) But it works, and it is good to keep the Win32 and POSIX ports as similar as possible. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'block')
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/raw-aio.h10
-rw-r--r--block/raw-win32.c42
-rw-r--r--block/win32-aio.c226
4 files changed, 274 insertions, 6 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 771d3414d9..30ef6aec03 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -3,7 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-y += parallels.o blkdebug.o blkverify.o
-block-obj-$(CONFIG_WIN32) += raw-win32.o
+block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
diff --git a/block/raw-aio.h b/block/raw-aio.h
index b3bb07377e..e77f361148 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -35,4 +35,14 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
BlockDriverCompletionFunc *cb, void *opaque, int type);
#endif
+#ifdef _WIN32
+typedef struct QEMUWin32AIOState QEMUWin32AIOState;
+QEMUWin32AIOState *win32_aio_init(void);
+int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
+BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
+ QEMUWin32AIOState *aio, HANDLE hfile,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque, int type);
+#endif
+
#endif /* QEMU_RAW_AIO_H */
diff --git a/block/raw-win32.c b/block/raw-win32.c
index ffd86e3f38..0c05c58c5a 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -36,6 +36,8 @@
#define FTYPE_CD 1
#define FTYPE_HARDDISK 2
+static QEMUWin32AIOState *aio;
+
typedef struct RawWin32AIOData {
BlockDriverState *bs;
HANDLE hfile;
@@ -50,6 +52,7 @@ typedef struct BDRVRawState {
HANDLE hfile;
int type;
char drive_path[16]; /* format: "d:\" */
+ QEMUWin32AIOState *aio;
} BDRVRawState;
/*
@@ -208,6 +211,9 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
}
*overlapped = FILE_ATTRIBUTE_NORMAL;
+ if (flags & BDRV_O_NATIVE_AIO) {
+ *overlapped |= FILE_FLAG_OVERLAPPED;
+ }
if (flags & BDRV_O_NOCACHE) {
*overlapped |= FILE_FLAG_NO_BUFFERING;
}
@@ -222,6 +228,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
s->type = FTYPE_FILE;
raw_parse_flags(flags, &access_flags, &overlapped);
+
+ if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
+ aio = win32_aio_init();
+ if (aio == NULL) {
+ return -EINVAL;
+ }
+ }
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
@@ -231,7 +244,16 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
if (err == ERROR_ACCESS_DENIED)
return -EACCES;
- return -1;
+ return -EINVAL;
+ }
+
+ if (flags & BDRV_O_NATIVE_AIO) {
+ int ret = win32_aio_attach(aio, s->hfile);
+ if (ret < 0) {
+ CloseHandle(s->hfile);
+ return ret;
+ }
+ s->aio = aio;
}
return 0;
}
@@ -241,8 +263,13 @@ static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
- return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_READ);
+ if (s->aio) {
+ return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
+ nb_sectors, cb, opaque, QEMU_AIO_READ);
+ } else {
+ return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+ cb, opaque, QEMU_AIO_READ);
+ }
}
static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
@@ -250,8 +277,13 @@ static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
- return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_WRITE);
+ if (s->aio) {
+ return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
+ nb_sectors, cb, opaque, QEMU_AIO_WRITE);
+ } else {
+ return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+ cb, opaque, QEMU_AIO_WRITE);
+ }
}
static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
diff --git a/block/win32-aio.c b/block/win32-aio.c
new file mode 100644
index 0000000000..c34dc73b6c
--- /dev/null
+++ b/block/win32-aio.c
@@ -0,0 +1,226 @@
+/*
+ * Block driver for RAW files (win32)
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu-timer.h"
+#include "block_int.h"
+#include "module.h"
+#include "qemu-common.h"
+#include "qemu-aio.h"
+#include "raw-aio.h"
+#include "event_notifier.h"
+#include <windows.h>
+#include <winioctl.h>
+
+#define FTYPE_FILE 0
+#define FTYPE_CD 1
+#define FTYPE_HARDDISK 2
+
+struct QEMUWin32AIOState {
+ HANDLE hIOCP;
+ EventNotifier e;
+ int count;
+};
+
+typedef struct QEMUWin32AIOCB {
+ BlockDriverAIOCB common;
+ struct QEMUWin32AIOState *ctx;
+ int nbytes;
+ OVERLAPPED ov;
+ QEMUIOVector *qiov;
+ void *buf;
+ bool is_read;
+ bool is_linear;
+} QEMUWin32AIOCB;
+
+/*
+ * Completes an AIO request (calls the callback and frees the ACB).
+ */
+static void win32_aio_process_completion(QEMUWin32AIOState *s,
+ QEMUWin32AIOCB *waiocb, DWORD count)
+{
+ int ret;
+ s->count--;
+
+ if (waiocb->ov.Internal != 0) {
+ ret = -EIO;
+ } else {
+ ret = 0;
+ if (count < waiocb->nbytes) {
+ /* Short reads mean EOF, pad with zeros. */
+ if (waiocb->is_read) {
+ qemu_iovec_memset(waiocb->qiov, count, 0,
+ waiocb->qiov->size - count);
+ } else {
+ ret = -EINVAL;
+ }
+ }
+ }
+
+ if (!waiocb->is_linear) {
+ if (ret == 0 && waiocb->is_read) {
+ QEMUIOVector *qiov = waiocb->qiov;
+ char *p = waiocb->buf;
+ int i;
+
+ for (i = 0; i < qiov->niov; ++i) {
+ memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
+ p += qiov->iov[i].iov_len;
+ }
+ g_free(waiocb->buf);
+ }
+ }
+
+
+ waiocb->common.cb(waiocb->common.opaque, ret);
+ qemu_aio_release(waiocb);
+}
+
+static void win32_aio_completion_cb(EventNotifier *e)
+{
+ QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
+ DWORD count;
+ ULONG_PTR key;
+ OVERLAPPED *ov;
+
+ event_notifier_test_and_clear(&s->e);
+ while (GetQueuedCompletionStatus(s->hIOCP, &count, &key, &ov, 0)) {
+ QEMUWin32AIOCB *waiocb = container_of(ov, QEMUWin32AIOCB, ov);
+
+ win32_aio_process_completion(s, waiocb, count);
+ }
+}
+
+static int win32_aio_flush_cb(EventNotifier *e)
+{
+ QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
+
+ return (s->count > 0) ? 1 : 0;
+}
+
+static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb;
+
+ /*
+ * CancelIoEx is only supported in Vista and newer. For now, just
+ * wait for completion.
+ */
+ while (!HasOverlappedIoCompleted(&waiocb->ov)) {
+ qemu_aio_wait();
+ }
+}
+
+static AIOPool win32_aio_pool = {
+ .aiocb_size = sizeof(QEMUWin32AIOCB),
+ .cancel = win32_aio_cancel,
+};
+
+BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
+ QEMUWin32AIOState *aio, HANDLE hfile,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque, int type)
+{
+ struct QEMUWin32AIOCB *waiocb;
+ uint64_t offset = sector_num * 512;
+ DWORD rc;
+
+ waiocb = qemu_aio_get(&win32_aio_pool, bs, cb, opaque);
+ waiocb->nbytes = nb_sectors * 512;
+ waiocb->qiov = qiov;
+ waiocb->is_read = (type == QEMU_AIO_READ);
+
+ if (qiov->niov > 1) {
+ waiocb->buf = qemu_blockalign(bs, qiov->size);
+ if (type & QEMU_AIO_WRITE) {
+ char *p = waiocb->buf;
+ int i;
+
+ for (i = 0; i < qiov->niov; ++i) {
+ memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
+ p += qiov->iov[i].iov_len;
+ }
+ }
+ waiocb->is_linear = false;
+ } else {
+ waiocb->buf = qiov->iov[0].iov_base;
+ waiocb->is_linear = true;
+ }
+
+ waiocb->ov = (OVERLAPPED) {
+ .Offset = (DWORD) offset,
+ .OffsetHigh = (DWORD) (offset >> 32),
+ .hEvent = event_notifier_get_handle(&aio->e)
+ };
+ aio->count++;
+
+ if (type & QEMU_AIO_READ) {
+ rc = ReadFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov);
+ } else {
+ rc = WriteFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov);
+ }
+ if(rc == 0 && GetLastError() != ERROR_IO_PENDING) {
+ goto out_dec_count;
+ }
+ return &waiocb->common;
+
+out_dec_count:
+ aio->count--;
+ qemu_aio_release(waiocb);
+ return NULL;
+}
+
+int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
+{
+ if (CreateIoCompletionPort(hfile, aio->hIOCP, (ULONG_PTR) 0, 0) == NULL) {
+ return -EINVAL;
+ } else {
+ return 0;
+ }
+}
+
+QEMUWin32AIOState *win32_aio_init(void)
+{
+ QEMUWin32AIOState *s;
+
+ s = g_malloc0(sizeof(*s));
+ if (event_notifier_init(&s->e, false) < 0) {
+ goto out_free_state;
+ }
+
+ s->hIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
+ if (s->hIOCP == NULL) {
+ goto out_close_efd;
+ }
+
+ qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb,
+ win32_aio_flush_cb);
+
+ return s;
+
+out_close_efd:
+ event_notifier_cleanup(&s->e);
+out_free_state:
+ g_free(s);
+ return NULL;
+}