aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2013-01-14 16:26:55 +0100
committerStefan Hajnoczi <stefanha@redhat.com>2013-01-15 10:03:47 +0100
commit8238010b265886249f9f3d45e890788319b7736e (patch)
treeb526fdc8a5ec57a5986d2cd64bf0f4217a9f3391
parentfcd9d4555252c47a337357dfce0806e5dde99d96 (diff)
block: make discard asynchronous
This is easy with the thread pool, because we can use s->is_xfs and s->has_discard from the worker function. QEMU has a widespread assumption that each I/O operation writes less than 2^32 bytes. This patch doesn't fix it throughout of course, but it starts correcting struct RawPosixAIOData so that there is no regression with respect to the synchronous discard implementation. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r--block/raw-aio.h5
-rw-r--r--block/raw-posix.c164
2 files changed, 88 insertions, 81 deletions
diff --git a/block/raw-aio.h b/block/raw-aio.h
index e77f361148..c61f1595d9 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -20,11 +20,14 @@
#define QEMU_AIO_WRITE 0x0002
#define QEMU_AIO_IOCTL 0x0004
#define QEMU_AIO_FLUSH 0x0008
+#define QEMU_AIO_DISCARD 0x0010
#define QEMU_AIO_TYPE_MASK \
- (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH)
+ (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
+ QEMU_AIO_DISCARD)
/* AIO flags */
#define QEMU_AIO_MISALIGNED 0x1000
+#define QEMU_AIO_BLKDEV 0x2000
/* linux-aio.c - Linux native implementation */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 1d32139c9b..679fcc5113 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -163,7 +163,7 @@ typedef struct RawPosixAIOData {
void *aio_ioctl_buf;
};
int aio_niov;
- size_t aio_nbytes;
+ uint64_t aio_nbytes;
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
off_t aio_offset;
int aio_type;
@@ -623,6 +623,72 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
return nbytes;
}
+#ifdef CONFIG_XFS
+static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
+{
+ struct xfs_flock64 fl;
+
+ memset(&fl, 0, sizeof(fl));
+ fl.l_whence = SEEK_SET;
+ fl.l_start = offset;
+ fl.l_len = bytes;
+
+ if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
+ DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
+ return -errno;
+ }
+
+ return 0;
+}
+#endif
+
+static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
+{
+ int ret = -EOPNOTSUPP;
+ BDRVRawState *s = aiocb->bs->opaque;
+
+ if (s->has_discard == 0) {
+ return 0;
+ }
+
+ if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
+#ifdef BLKDISCARD
+ do {
+ uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
+ if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
+ return 0;
+ }
+ } while (errno == EINTR);
+
+ ret = -errno;
+#endif
+ } else {
+#ifdef CONFIG_XFS
+ if (s->is_xfs) {
+ return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
+ }
+#endif
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ do {
+ if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
+ return 0;
+ }
+ } while (errno == EINTR);
+
+ ret = -errno;
+#endif
+ }
+
+ if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
+ ret == -ENOTTY) {
+ s->has_discard = 0;
+ ret = 0;
+ }
+ return ret;
+}
+
static int aio_worker(void *arg)
{
RawPosixAIOData *aiocb = arg;
@@ -657,6 +723,9 @@ static int aio_worker(void *arg)
case QEMU_AIO_IOCTL:
ret = handle_aiocb_ioctl(aiocb);
break;
+ case QEMU_AIO_DISCARD:
+ ret = handle_aiocb_discard(aiocb);
+ break;
default:
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
ret = -EINVAL;
@@ -1057,57 +1126,14 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
}
}
-#ifdef CONFIG_XFS
-static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
-{
- struct xfs_flock64 fl;
-
- memset(&fl, 0, sizeof(fl));
- fl.l_whence = SEEK_SET;
- fl.l_start = sector_num << 9;
- fl.l_len = (int64_t)nb_sectors << 9;
-
- if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
- DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
- return -errno;
- }
-
- return 0;
-}
-#endif
-
-static coroutine_fn int raw_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
{
- int ret = -EOPNOTSUPP;
BDRVRawState *s = bs->opaque;
- if (!s->has_discard) {
- return 0;
- }
-
-#ifdef CONFIG_XFS
- if (s->is_xfs) {
- return xfs_discard(s, sector_num, nb_sectors);
- }
-#endif
-
-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
- do {
- if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- sector_num << BDRV_SECTOR_BITS,
- (int64_t)nb_sectors << BDRV_SECTOR_BITS) == 0) {
- return 0;
- }
- } while (errno == EINTR);
-
- ret = -errno;
-#endif
-
- if (ret == -EOPNOTSUPP) {
- return 0;
- }
- return ret;
+ return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+ cb, opaque, QEMU_AIO_DISCARD);
}
static QEMUOptionParameter raw_create_options[] = {
@@ -1130,12 +1156,12 @@ static BlockDriver bdrv_file = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
- .bdrv_co_discard = raw_co_discard,
.bdrv_co_is_allocated = raw_co_is_allocated,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_aio_discard = raw_aio_discard,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1345,38 +1371,17 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
}
-static coroutine_fn int hdev_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
- int ret;
-
- if (s->has_discard == 0) {
- return 0;
- }
- ret = fd_open(bs);
- if (ret < 0) {
- return ret;
- }
- ret = -EOPNOTSUPP;
-#ifdef BLKDISCARD
- do {
- uint64_t range[2] = { sector_num * 512, (uint64_t)nb_sectors * 512 };
- if (ioctl(s->fd, BLKDISCARD, range) == 0) {
- return 0;
- }
- } while (errno == EINTR);
-
- ret = -errno;
-#endif
- if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
- ret == -ENOTTY) {
- s->has_discard = 0;
- ret = 0;
+ if (fd_open(bs) < 0) {
+ return NULL;
}
- return ret;
-
+ return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+ cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -1447,11 +1452,10 @@ static BlockDriver bdrv_host_device = {
.create_options = raw_create_options,
.bdrv_has_zero_init = hdev_has_zero_init,
- .bdrv_co_discard = hdev_co_discard,
-
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_aio_discard = hdev_aio_discard,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,