diff options
-rw-r--r-- | block/backup.c | 1 | ||||
-rw-r--r-- | block/gluster.c | 230 | ||||
-rw-r--r-- | block/mirror.c | 14 | ||||
-rw-r--r-- | block/nfs.c | 55 | ||||
-rw-r--r-- | blockjob.c | 1 | ||||
-rwxr-xr-x | tests/qemu-iotests/041 | 30 | ||||
-rw-r--r-- | tests/qemu-iotests/041.out | 4 |
7 files changed, 317 insertions, 18 deletions
diff --git a/block/backup.c b/block/backup.c index 581269b29a..f87f8d539b 100644 --- a/block/backup.c +++ b/block/backup.c @@ -489,7 +489,6 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, assert(bs); assert(target); - assert(cb); if (bs == target) { error_setg(errp, "Source and target cannot be the same"); diff --git a/block/gluster.c b/block/gluster.c index d361d8e847..16f7778a50 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -24,6 +24,8 @@ typedef struct GlusterAIOCB { typedef struct BDRVGlusterState { struct glfs *glfs; struct glfs_fd *fd; + bool supports_seek_data; + int debug_level; } BDRVGlusterState; typedef struct GlusterConf { @@ -32,6 +34,7 @@ typedef struct GlusterConf { char *volname; char *image; char *transport; + int debug_level; } GlusterConf; static void qemu_gluster_gconf_free(GlusterConf *gconf) @@ -194,11 +197,7 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename, goto out; } - /* - * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when - * GlusterFS makes GF_LOG_* macros available to libgfapi users. - */ - ret = glfs_set_logging(glfs, "-", 4); + ret = glfs_set_logging(glfs, "-", gconf->debug_level); if (ret < 0) { goto out; } @@ -256,16 +255,26 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) qemu_bh_schedule(acb->bh); } +#define GLUSTER_OPT_FILENAME "filename" +#define GLUSTER_OPT_DEBUG "debug" +#define GLUSTER_DEBUG_DEFAULT 4 +#define GLUSTER_DEBUG_MAX 9 + /* TODO Convert to fine grained options */ static QemuOptsList runtime_opts = { .name = "gluster", .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), .desc = { { - .name = "filename", + .name = GLUSTER_OPT_FILENAME, .type = QEMU_OPT_STRING, .help = "URL to the gluster image", }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, { /* end of list */ } }, }; @@ -287,6 +296,28 @@ static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags) } } +/* + * Do SEEK_DATA/HOLE to detect if it is functional. Older broken versions of + * gfapi incorrectly return the current offset when SEEK_DATA/HOLE is used. + * - Corrected versions return -1 and set errno to EINVAL. + * - Versions that support SEEK_DATA/HOLE correctly, will return -1 and set + * errno to ENXIO when SEEK_DATA is called with a position of EOF. + */ +static bool qemu_gluster_test_seek(struct glfs_fd *fd) +{ + off_t ret, eof; + + eof = glfs_lseek(fd, 0, SEEK_END); + if (eof < 0) { + /* this should never occur */ + return false; + } + + /* this should always fail with ENXIO if SEEK_DATA is supported */ + ret = glfs_lseek(fd, eof, SEEK_DATA); + return (ret < 0) && (errno == ENXIO); +} + static int qemu_gluster_open(BlockDriverState *bs, QDict *options, int bdrv_flags, Error **errp) { @@ -306,8 +337,17 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, goto out; } - filename = qemu_opt_get(opts, "filename"); + filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME); + s->debug_level = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG, + GLUSTER_DEBUG_DEFAULT); + if (s->debug_level < 0) { + s->debug_level = 0; + } else if (s->debug_level > GLUSTER_DEBUG_MAX) { + s->debug_level = GLUSTER_DEBUG_MAX; + } + + gconf->debug_level = s->debug_level; s->glfs = qemu_gluster_init(gconf, filename, errp); if (!s->glfs) { ret = -errno; @@ -338,6 +378,8 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, ret = -errno; } + s->supports_seek_data = qemu_gluster_test_seek(s->fd); + out: qemu_opts_del(opts); qemu_gluster_gconf_free(gconf); @@ -363,6 +405,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { int ret = 0; + BDRVGlusterState *s; BDRVGlusterReopenState *reop_s; GlusterConf *gconf = NULL; int open_flags = 0; @@ -370,6 +413,8 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, assert(state != NULL); assert(state->bs != NULL); + s = state->bs->opaque; + state->opaque = g_new0(BDRVGlusterReopenState, 1); reop_s = state->opaque; @@ -377,6 +422,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, gconf = g_new0(GlusterConf, 1); + gconf->debug_level = s->debug_level; reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp); if (reop_s->glfs == NULL) { ret = -errno; @@ -510,6 +556,14 @@ static int qemu_gluster_create(const char *filename, char *tmp = NULL; GlusterConf *gconf = g_new0(GlusterConf, 1); + gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG, + GLUSTER_DEBUG_DEFAULT); + if (gconf->debug_level < 0) { + gconf->debug_level = 0; + } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) { + gconf->debug_level = GLUSTER_DEBUG_MAX; + } + glfs = qemu_gluster_init(gconf, filename, errp); if (!glfs) { ret = -errno; @@ -727,6 +781,159 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs) return 0; } +/* + * Find allocation range in @bs around offset @start. + * May change underlying file descriptor's file offset. + * If @start is not in a hole, store @start in @data, and the + * beginning of the next hole in @hole, and return 0. + * If @start is in a non-trailing hole, store @start in @hole and the + * beginning of the next non-hole in @data, and return 0. + * If @start is in a trailing hole or beyond EOF, return -ENXIO. + * If we can't find out, return a negative errno other than -ENXIO. + * + * (Shamefully copied from raw-posix.c, only miniscule adaptions.) + */ +static int find_allocation(BlockDriverState *bs, off_t start, + off_t *data, off_t *hole) +{ + BDRVGlusterState *s = bs->opaque; + off_t offs; + + if (!s->supports_seek_data) { + return -ENOTSUP; + } + + /* + * SEEK_DATA cases: + * D1. offs == start: start is in data + * D2. offs > start: start is in a hole, next data at offs + * D3. offs < 0, errno = ENXIO: either start is in a trailing hole + * or start is beyond EOF + * If the latter happens, the file has been truncated behind + * our back since we opened it. All bets are off then. + * Treating like a trailing hole is simplest. + * D4. offs < 0, errno != ENXIO: we learned nothing + */ + offs = glfs_lseek(s->fd, start, SEEK_DATA); + if (offs < 0) { + return -errno; /* D3 or D4 */ + } + assert(offs >= start); + + if (offs > start) { + /* D2: in hole, next data at offs */ + *hole = start; + *data = offs; + return 0; + } + + /* D1: in data, end not yet known */ + + /* + * SEEK_HOLE cases: + * H1. offs == start: start is in a hole + * If this happens here, a hole has been dug behind our back + * since the previous lseek(). + * H2. offs > start: either start is in data, next hole at offs, + * or start is in trailing hole, EOF at offs + * Linux treats trailing holes like any other hole: offs == + * start. Solaris seeks to EOF instead: offs > start (blech). + * If that happens here, a hole has been dug behind our back + * since the previous lseek(). + * H3. offs < 0, errno = ENXIO: start is beyond EOF + * If this happens, the file has been truncated behind our + * back since we opened it. Treat it like a trailing hole. + * H4. offs < 0, errno != ENXIO: we learned nothing + * Pretend we know nothing at all, i.e. "forget" about D1. + */ + offs = glfs_lseek(s->fd, start, SEEK_HOLE); + if (offs < 0) { + return -errno; /* D1 and (H3 or H4) */ + } + assert(offs >= start); + + if (offs > start) { + /* + * D1 and H2: either in data, next hole at offs, or it was in + * data but is now in a trailing hole. In the latter case, + * all bets are off. Treating it as if it there was data all + * the way to EOF is safe, so simply do that. + */ + *data = start; + *hole = offs; + return 0; + } + + /* D1 and H1 */ + return -EBUSY; +} + +/* + * Returns the allocation status of the specified sectors. + * + * If 'sector_num' is beyond the end of the disk image the return value is 0 + * and 'pnum' is set to 0. + * + * 'pnum' is set to the number of sectors (including and immediately following + * the specified sector) that are known to be in the same + * allocated/unallocated state. + * + * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes + * beyond the end of the disk image it will be clamped. + * + * (Based on raw_co_get_block_status() from raw-posix.c.) + */ +static int64_t coroutine_fn qemu_gluster_co_get_block_status( + BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, + BlockDriverState **file) +{ + BDRVGlusterState *s = bs->opaque; + off_t start, data = 0, hole = 0; + int64_t total_size; + int ret = -EINVAL; + + if (!s->fd) { + return ret; + } + + start = sector_num * BDRV_SECTOR_SIZE; + total_size = bdrv_getlength(bs); + if (total_size < 0) { + return total_size; + } else if (start >= total_size) { + *pnum = 0; + return 0; + } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) { + nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); + } + + ret = find_allocation(bs, start, &data, &hole); + if (ret == -ENXIO) { + /* Trailing hole */ + *pnum = nb_sectors; + ret = BDRV_BLOCK_ZERO; + } else if (ret < 0) { + /* No info available, so pretend there are no holes */ + *pnum = nb_sectors; + ret = BDRV_BLOCK_DATA; + } else if (data == start) { + /* On a data extent, compute sectors to the end of the extent, + * possibly including a partial sector at EOF. */ + *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); + ret = BDRV_BLOCK_DATA; + } else { + /* On a hole, compute sectors to the beginning of the next extent. */ + assert(hole == start); + *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); + ret = BDRV_BLOCK_ZERO; + } + + *file = bs; + + return ret | BDRV_BLOCK_OFFSET_VALID | start; +} + + static QemuOptsList qemu_gluster_create_opts = { .name = "qemu-gluster-create-opts", .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head), @@ -741,6 +948,11 @@ static QemuOptsList qemu_gluster_create_opts = { .type = QEMU_OPT_STRING, .help = "Preallocation mode (allowed values: off, full)" }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, { /* end of list */ } } }; @@ -769,6 +981,7 @@ static BlockDriver bdrv_gluster = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif + .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, .create_opts = &qemu_gluster_create_opts, }; @@ -796,6 +1009,7 @@ static BlockDriver bdrv_gluster_tcp = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif + .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, .create_opts = &qemu_gluster_create_opts, }; @@ -823,6 +1037,7 @@ static BlockDriver bdrv_gluster_unix = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif + .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, .create_opts = &qemu_gluster_create_opts, }; @@ -850,6 +1065,7 @@ static BlockDriver bdrv_gluster_rdma = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif + .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, .create_opts = &qemu_gluster_create_opts, }; diff --git a/block/mirror.c b/block/mirror.c index a04ed9c7a4..8d96049555 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -218,7 +218,9 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s) } /* Submit async read while handling COW. - * Returns: nb_sectors if no alignment is necessary, or + * Returns: The number of sectors copied after and including sector_num, + * excluding any sectors copied prior to sector_num due to alignment. + * This will be nb_sectors if no alignment is necessary, or * (new_end - sector_num) if tail is rounded up or down due to * alignment or buffer limit. */ @@ -227,14 +229,18 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num, { BlockBackend *source = s->common.blk; int sectors_per_chunk, nb_chunks; - int ret = nb_sectors; + int ret; MirrorOp *op; + int max_sectors; sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + max_sectors = sectors_per_chunk * s->max_iov; /* We can only handle as much as buf_size at a time. */ nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors); + nb_sectors = MIN(max_sectors, nb_sectors); assert(nb_sectors); + ret = nb_sectors; if (s->cow_bitmap) { ret += mirror_cow_align(s, §or_num, &nb_sectors); @@ -327,7 +333,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) first_chunk = sector_num / sectors_per_chunk; while (test_bit(first_chunk, s->in_flight_bitmap)) { - trace_mirror_yield_in_flight(s, first_chunk, s->in_flight); + trace_mirror_yield_in_flight(s, sector_num, s->in_flight); mirror_wait_for_io(s); } @@ -769,7 +775,7 @@ static void mirror_complete(BlockJob *job, Error **errp) } } - /* check the target bs is not blocked and block all operations on it */ + /* block all operations on to_replace bs */ if (s->replaces) { AioContext *replace_aio_context; diff --git a/block/nfs.c b/block/nfs.c index 9f51cc3f10..15d6832c4c 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -1,7 +1,7 @@ /* * QEMU Block driver for native access to files on NFS shares * - * Copyright (c) 2014 Peter Lieven <pl@kamp.de> + * Copyright (c) 2014-2016 Peter Lieven <pl@kamp.de> * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -38,6 +38,7 @@ #include <nfsc/libnfs.h> #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576 +#define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE) #define QEMU_NFS_MAX_DEBUG_LEVEL 2 typedef struct NFSClient { @@ -47,6 +48,7 @@ typedef struct NFSClient { bool has_zero_init; AioContext *aio_context; blkcnt_t st_blocks; + bool cache_used; } NFSClient; typedef struct NFSRPC { @@ -278,7 +280,7 @@ static void nfs_file_close(BlockDriverState *bs) } static int64_t nfs_client_open(NFSClient *client, const char *filename, - int flags, Error **errp) + int flags, Error **errp, int open_flags) { int ret = -EINVAL, i; struct stat st; @@ -330,12 +332,38 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename, nfs_set_tcp_syncnt(client->context, val); #ifdef LIBNFS_FEATURE_READAHEAD } else if (!strcmp(qp->p[i].name, "readahead")) { + if (open_flags & BDRV_O_NOCACHE) { + error_setg(errp, "Cannot enable NFS readahead " + "if cache.direct = on"); + goto fail; + } if (val > QEMU_NFS_MAX_READAHEAD_SIZE) { error_report("NFS Warning: Truncating NFS readahead" " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE); val = QEMU_NFS_MAX_READAHEAD_SIZE; } nfs_set_readahead(client->context, val); +#ifdef LIBNFS_FEATURE_PAGECACHE + nfs_set_pagecache_ttl(client->context, 0); +#endif + client->cache_used = true; +#endif +#ifdef LIBNFS_FEATURE_PAGECACHE + nfs_set_pagecache_ttl(client->context, 0); + } else if (!strcmp(qp->p[i].name, "pagecache")) { + if (open_flags & BDRV_O_NOCACHE) { + error_setg(errp, "Cannot enable NFS pagecache " + "if cache.direct = on"); + goto fail; + } + if (val > QEMU_NFS_MAX_PAGECACHE_SIZE) { + error_report("NFS Warning: Truncating NFS pagecache" + " size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE); + val = QEMU_NFS_MAX_PAGECACHE_SIZE; + } + nfs_set_pagecache(client->context, val); + nfs_set_pagecache_ttl(client->context, 0); + client->cache_used = true; #endif #ifdef LIBNFS_FEATURE_DEBUG } else if (!strcmp(qp->p[i].name, "debug")) { @@ -418,7 +446,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags, } ret = nfs_client_open(client, qemu_opt_get(opts, "filename"), (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY, - errp); + errp, bs->open_flags); if (ret < 0) { goto out; } @@ -454,7 +482,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp) total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), BDRV_SECTOR_SIZE); - ret = nfs_client_open(client, url, O_CREAT, errp); + ret = nfs_client_open(client, url, O_CREAT, errp, 0); if (ret < 0) { goto out; } @@ -516,6 +544,12 @@ static int nfs_reopen_prepare(BDRVReopenState *state, return -EACCES; } + if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) { + error_setg(errp, "Cannot disable cache if libnfs readahead or" + " pagecache is enabled"); + return -EINVAL; + } + /* Update cache for read-only reopens */ if (!(state->flags & BDRV_O_RDWR)) { ret = nfs_fstat(client->context, client->fh, &st); @@ -530,6 +564,15 @@ static int nfs_reopen_prepare(BDRVReopenState *state, return 0; } +#ifdef LIBNFS_FEATURE_PAGECACHE +static void nfs_invalidate_cache(BlockDriverState *bs, + Error **errp) +{ + NFSClient *client = bs->opaque; + nfs_pagecache_invalidate(client->context, client->fh); +} +#endif + static BlockDriver bdrv_nfs = { .format_name = "nfs", .protocol_name = "nfs", @@ -553,6 +596,10 @@ static BlockDriver bdrv_nfs = { .bdrv_detach_aio_context = nfs_detach_aio_context, .bdrv_attach_aio_context = nfs_attach_aio_context, + +#ifdef LIBNFS_FEATURE_PAGECACHE + .bdrv_invalidate_cache = nfs_invalidate_cache, +#endif }; static void nfs_block_init(void) diff --git a/blockjob.c b/blockjob.c index 90c4e262b0..205da9df4e 100644 --- a/blockjob.c +++ b/blockjob.c @@ -110,6 +110,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, BlockBackend *blk; BlockJob *job; + assert(cb); if (bs->job) { error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); return NULL; diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index ed1d9d464c..cbf5e0ba5c 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -727,6 +727,36 @@ class TestUnbackedSource(iotests.QMPTestCase): self.complete_and_wait() self.assert_no_active_block_jobs() +class TestGranularity(iotests.QMPTestCase): + image_len = 10 * 1024 * 1024 # MB + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, test_img, + str(TestGranularity.image_len)) + qemu_io('-c', 'write 0 %d' % (self.image_len), + test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + self.assertTrue(iotests.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + os.remove(test_img) + os.remove(target_img) + + def test_granularity(self): + self.assert_no_active_block_jobs() + result = self.vm.qmp('drive-mirror', device='drive0', + sync='full', target=target_img, + mode='absolute-paths', granularity=8192) + self.assert_qmp(result, 'return', {}) + event = self.vm.get_qmp_event(wait=60.0) + # Failures will manifest as COMPLETED/ERROR. + self.assert_qmp(event, 'event', 'BLOCK_JOB_READY') + self.complete_and_wait(drive='drive0', wait_ready=False) + self.assert_no_active_block_jobs() + class TestRepairQuorum(iotests.QMPTestCase): """ This class test quorum file repair using drive-mirror. It's mostly a fork of TestSingleDrive """ diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index b0cadc8245..b67d0504a6 100644 --- a/tests/qemu-iotests/041.out +++ b/tests/qemu-iotests/041.out @@ -1,5 +1,5 @@ -........................................................................... +............................................................................ ---------------------------------------------------------------------- -Ran 75 tests +Ran 76 tests OK |