diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2013-01-14 16:26:55 +0100 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2013-01-15 10:03:47 +0100 |
commit | 8238010b265886249f9f3d45e890788319b7736e (patch) | |
tree | b526fdc8a5ec57a5986d2cd64bf0f4217a9f3391 | |
parent | fcd9d4555252c47a337357dfce0806e5dde99d96 (diff) |
block: make discard asynchronous
This is easy with the thread pool, because we can use s->is_xfs and
s->has_discard from the worker function.
QEMU has a widespread assumption that each I/O operation writes less
than 2^32 bytes. This patch doesn't fix it throughout of course,
but it starts correcting struct RawPosixAIOData so that there is
no regression with respect to the synchronous discard implementation.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r-- | block/raw-aio.h | 5 | ||||
-rw-r--r-- | block/raw-posix.c | 164 |
2 files changed, 88 insertions, 81 deletions
diff --git a/block/raw-aio.h b/block/raw-aio.h index e77f361148..c61f1595d9 100644 --- a/block/raw-aio.h +++ b/block/raw-aio.h @@ -20,11 +20,14 @@ #define QEMU_AIO_WRITE 0x0002 #define QEMU_AIO_IOCTL 0x0004 #define QEMU_AIO_FLUSH 0x0008 +#define QEMU_AIO_DISCARD 0x0010 #define QEMU_AIO_TYPE_MASK \ - (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH) + (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \ + QEMU_AIO_DISCARD) /* AIO flags */ #define QEMU_AIO_MISALIGNED 0x1000 +#define QEMU_AIO_BLKDEV 0x2000 /* linux-aio.c - Linux native implementation */ diff --git a/block/raw-posix.c b/block/raw-posix.c index 1d32139c9b..679fcc5113 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -163,7 +163,7 @@ typedef struct RawPosixAIOData { void *aio_ioctl_buf; }; int aio_niov; - size_t aio_nbytes; + uint64_t aio_nbytes; #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ off_t aio_offset; int aio_type; @@ -623,6 +623,72 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) return nbytes; } +#ifdef CONFIG_XFS +static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes) +{ + struct xfs_flock64 fl; + + memset(&fl, 0, sizeof(fl)); + fl.l_whence = SEEK_SET; + fl.l_start = offset; + fl.l_len = bytes; + + if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) { + DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno)); + return -errno; + } + + return 0; +} +#endif + +static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) +{ + int ret = -EOPNOTSUPP; + BDRVRawState *s = aiocb->bs->opaque; + + if (s->has_discard == 0) { + return 0; + } + + if (aiocb->aio_type & QEMU_AIO_BLKDEV) { +#ifdef BLKDISCARD + do { + uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; + if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) { + return 0; + } + } while (errno == EINTR); + + ret = -errno; +#endif + } else { +#ifdef CONFIG_XFS + if (s->is_xfs) { + return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); + } +#endif + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + do { + if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes) == 0) { + return 0; + } + } while (errno == EINTR); + + ret = -errno; +#endif + } + + if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP || + ret == -ENOTTY) { + s->has_discard = 0; + ret = 0; + } + return ret; +} + static int aio_worker(void *arg) { RawPosixAIOData *aiocb = arg; @@ -657,6 +723,9 @@ static int aio_worker(void *arg) case QEMU_AIO_IOCTL: ret = handle_aiocb_ioctl(aiocb); break; + case QEMU_AIO_DISCARD: + ret = handle_aiocb_discard(aiocb); + break; default: fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); ret = -EINVAL; @@ -1057,57 +1126,14 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, } } -#ifdef CONFIG_XFS -static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors) -{ - struct xfs_flock64 fl; - - memset(&fl, 0, sizeof(fl)); - fl.l_whence = SEEK_SET; - fl.l_start = sector_num << 9; - fl.l_len = (int64_t)nb_sectors << 9; - - if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) { - DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno)); - return -errno; - } - - return 0; -} -#endif - -static coroutine_fn int raw_co_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) +static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) { - int ret = -EOPNOTSUPP; BDRVRawState *s = bs->opaque; - if (!s->has_discard) { - return 0; - } - -#ifdef CONFIG_XFS - if (s->is_xfs) { - return xfs_discard(s, sector_num, nb_sectors); - } -#endif - -#ifdef CONFIG_FALLOCATE_PUNCH_HOLE - do { - if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - sector_num << BDRV_SECTOR_BITS, - (int64_t)nb_sectors << BDRV_SECTOR_BITS) == 0) { - return 0; - } - } while (errno == EINTR); - - ret = -errno; -#endif - - if (ret == -EOPNOTSUPP) { - return 0; - } - return ret; + return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors, + cb, opaque, QEMU_AIO_DISCARD); } static QEMUOptionParameter raw_create_options[] = { @@ -1130,12 +1156,12 @@ static BlockDriver bdrv_file = { .bdrv_reopen_abort = raw_reopen_abort, .bdrv_close = raw_close, .bdrv_create = raw_create, - .bdrv_co_discard = raw_co_discard, .bdrv_co_is_allocated = raw_co_is_allocated, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_aio_discard = raw_aio_discard, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1345,38 +1371,17 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs, return thread_pool_submit_aio(aio_worker, acb, cb, opaque); } -static coroutine_fn int hdev_co_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) +static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - int ret; - - if (s->has_discard == 0) { - return 0; - } - ret = fd_open(bs); - if (ret < 0) { - return ret; - } - ret = -EOPNOTSUPP; -#ifdef BLKDISCARD - do { - uint64_t range[2] = { sector_num * 512, (uint64_t)nb_sectors * 512 }; - if (ioctl(s->fd, BLKDISCARD, range) == 0) { - return 0; - } - } while (errno == EINTR); - - ret = -errno; -#endif - if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP || - ret == -ENOTTY) { - s->has_discard = 0; - ret = 0; + if (fd_open(bs) < 0) { + return NULL; } - return ret; - + return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors, + cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); } #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) @@ -1447,11 +1452,10 @@ static BlockDriver bdrv_host_device = { .create_options = raw_create_options, .bdrv_has_zero_init = hdev_has_zero_init, - .bdrv_co_discard = hdev_co_discard, - .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_aio_discard = hdev_aio_discard, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, |