aboutsummaryrefslogtreecommitdiff
path: root/block-raw-posix.c
diff options
context:
space:
mode:
authorbellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162>2008-05-28 09:51:09 +0000
committerbellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162>2008-05-28 09:51:09 +0000
commitbed5cc520707ba4382444c4fb2afd428df080e6c (patch)
treeaf914f87ab2ee87933bfec370281549eed1971d2 /block-raw-posix.c
parent0ac087f1f3ae036a477fa4b69f29a08a0f504033 (diff)
Align file accesses with cache=off (O_DIRECT) (Kevin Wolf, Laurent Vivier)
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4599 c046a42c-6fe2-441c-8c8c-71466251a162
Diffstat (limited to 'block-raw-posix.c')
-rw-r--r--block-raw-posix.c240
1 files changed, 238 insertions, 2 deletions
diff --git a/block-raw-posix.c b/block-raw-posix.c
index 6b0009e564..fd40dda449 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -70,6 +70,8 @@
#define FTYPE_CD 1
#define FTYPE_FD 2
+#define ALIGNED_BUFFER_SIZE (32 * 512)
+
/* if the FD is not accessed during that time (in ms), we try to
reopen it to see if the disk has been changed */
#define FD_OPEN_TIMEOUT 1000
@@ -86,6 +88,9 @@ typedef struct BDRVRawState {
int fd_got_error;
int fd_media_changed;
#endif
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+ uint8_t* aligned_buf;
+#endif
} BDRVRawState;
static int fd_open(BlockDriverState *bs);
@@ -121,6 +126,17 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
return ret;
}
s->fd = fd;
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+ s->aligned_buf = NULL;
+ if (flags & BDRV_O_DIRECT) {
+ s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
+ if (s->aligned_buf == NULL) {
+ ret = -errno;
+ close(fd);
+ return ret;
+ }
+ }
+#endif
return 0;
}
@@ -141,7 +157,14 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
#endif
*/
-static int raw_pread(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@@ -194,7 +217,14 @@ label__raw_read__success:
return ret;
}
-static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
const uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@@ -230,6 +260,164 @@ label__raw_write__success:
return ret;
}
+
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pread_aligned to do the actual read.
+ */
+static int raw_pread(BlockDriverState *bs, int64_t offset,
+ uint8_t *buf, int count)
+{
+ BDRVRawState *s = bs->opaque;
+ int size, ret, shift, sum;
+
+ sum = 0;
+
+ if (s->aligned_buf != NULL) {
+
+ if (offset & 0x1ff) {
+ /* align offset on a 512 bytes boundary */
+
+ shift = offset & 0x1ff;
+ size = (shift + count + 0x1ff) & ~0x1ff;
+ if (size > ALIGNED_BUFFER_SIZE)
+ size = ALIGNED_BUFFER_SIZE;
+ ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
+ if (ret < 0)
+ return ret;
+
+ size = 512 - shift;
+ if (size > count)
+ size = count;
+ memcpy(buf, s->aligned_buf + shift, size);
+
+ buf += size;
+ offset += size;
+ count -= size;
+ sum += size;
+
+ if (count == 0)
+ return sum;
+ }
+ if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+
+ /* read on aligned buffer */
+
+ while (count) {
+
+ size = (count + 0x1ff) & ~0x1ff;
+ if (size > ALIGNED_BUFFER_SIZE)
+ size = ALIGNED_BUFFER_SIZE;
+
+ ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
+ if (ret < 0)
+ return ret;
+
+ size = ret;
+ if (size > count)
+ size = count;
+
+ memcpy(buf, s->aligned_buf, size);
+
+ buf += size;
+ offset += size;
+ count -= size;
+ sum += size;
+ }
+
+ return sum;
+ }
+ }
+
+ return raw_pread_aligned(bs, offset, buf, count) + sum;
+}
+
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pwrite_aligned to do the actual write.
+ */
+static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+ const uint8_t *buf, int count)
+{
+ BDRVRawState *s = bs->opaque;
+ int size, ret, shift, sum;
+
+ sum = 0;
+
+ if (s->aligned_buf != NULL) {
+
+ if (offset & 0x1ff) {
+ /* align offset on a 512 bytes boundary */
+ shift = offset & 0x1ff;
+ ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
+ if (ret < 0)
+ return ret;
+
+ size = 512 - shift;
+ if (size > count)
+ size = count;
+ memcpy(s->aligned_buf + shift, buf, size);
+
+ ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
+ if (ret < 0)
+ return ret;
+
+ buf += size;
+ offset += size;
+ count -= size;
+ sum += size;
+
+ if (count == 0)
+ return sum;
+ }
+ if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+
+ while ((size = (count & ~0x1ff)) != 0) {
+
+ if (size > ALIGNED_BUFFER_SIZE)
+ size = ALIGNED_BUFFER_SIZE;
+
+ memcpy(s->aligned_buf, buf, size);
+
+ ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ offset += ret;
+ count -= ret;
+ sum += ret;
+ }
+ /* here, count < 512 because (count & ~0x1ff) == 0 */
+ if (count) {
+ ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
+ if (ret < 0)
+ return ret;
+ memcpy(s->aligned_buf, buf, count);
+
+ ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
+ if (ret < 0)
+ return ret;
+ if (count < ret)
+ ret = count;
+
+ sum += ret;
+ }
+ return sum;
+ }
+ }
+ return raw_pwrite_aligned(bs, offset, buf, count) + sum;
+}
+
+#else
+#define raw_pread raw_pread_aligned
+#define raw_pwrite raw_pwrite_aligned
+#endif
+
+
/***********************************************************/
/* Unix AIO using POSIX AIO */
@@ -237,6 +425,7 @@ typedef struct RawAIOCB {
BlockDriverAIOCB common;
struct aiocb aiocb;
struct RawAIOCB *next;
+ int ret;
} RawAIOCB;
static int aio_sig_num = SIGUSR2;
@@ -397,12 +586,38 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
return acb;
}
+#ifndef QEMU_IMG
+static void raw_aio_em_cb(void* opaque)
+{
+ RawAIOCB *acb = opaque;
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_aio_release(acb);
+}
+#endif
+
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
int64_t sector_num, uint8_t *buf, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
+ /*
+ * If O_DIRECT is used and the buffer is not aligned fall back
+ * to synchronous IO.
+ */
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+ BDRVRawState *s = bs->opaque;
+
+ if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
+ QEMUBH *bh;
+ acb = qemu_aio_get(bs, cb, opaque);
+ acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
+ bh = qemu_bh_new(raw_aio_em_cb, acb);
+ qemu_bh_schedule(bh);
+ return &acb->common;
+ }
+#endif
+
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
@@ -419,6 +634,23 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
{
RawAIOCB *acb;
+ /*
+ * If O_DIRECT is used and the buffer is not aligned fall back
+ * to synchronous IO.
+ */
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+ BDRVRawState *s = bs->opaque;
+
+ if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
+ QEMUBH *bh;
+ acb = qemu_aio_get(bs, cb, opaque);
+ acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
+ bh = qemu_bh_new(raw_aio_em_cb, acb);
+ qemu_bh_schedule(bh);
+ return &acb->common;
+ }
+#endif
+
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
@@ -462,6 +694,10 @@ static void raw_close(BlockDriverState *bs)
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+ if (s->aligned_buf != NULL)
+ qemu_free(s->aligned_buf);
+#endif
}
}