diff options
author | Anthony Liguori <aliguori@amazon.com> | 2013-12-06 12:59:58 -0800 |
---|---|---|
committer | Anthony Liguori <aliguori@amazon.com> | 2013-12-06 12:59:58 -0800 |
commit | 0a0ee0b93bdd6e1ef628283d00bb979e27655ebb (patch) | |
tree | a8e1d34e3a187afc96d8697dfe121032a0268912 /block | |
parent | 9ed5dacbfa0f3f74238854776385f150b68e78b9 (diff) | |
parent | 981cbf59b5360647e908186e7306ee9013a58c88 (diff) |
Merge remote-tracking branch 'kwolf/tags/for-anthony' into staging
Block patches for 2.0 (flushing block-next)
# gpg: Signature made Fri 29 Nov 2013 08:43:18 AM PST using RSA key ID C88F2FD6
# gpg: Can't check signature: public key not found
# By Peter Lieven (17) and others
# Via Kevin Wolf
* kwolf/tags/for-anthony: (41 commits)
qemu-iotests: Add sample image and test for VMDK version 3
vmdk: Allow read only open of VMDK version 3
qemu-iotests: Filter out 'qemu-io> ' prompt
qemu-iotests: Filter qemu-io output in 025
block: Use BDRV_O_NO_BACKING where appropriate
qemu-iotests: Test snapshot mode
block: Enable BDRV_O_SNAPSHOT with driver-specific options
qemu-iotests: Make test case 030, 040 and 055 deterministic
qemu-iotest: Add pause_drive and resume_drive methods
blkdebug: add "remove_break" command
qemu-iotests: Drop local version of cancel_and_wait from 040
sheepdog: support user-defined redundancy option
sheepdog: refactor do_sd_create()
qdict: Optimise qdict_do_flatten()
qdict: Fix memory leak in qdict_do_flatten()
MAINTAINERS: add sheepdog development mailing list
COW: Extend checking allocated bits to beyond one sector
COW: Speed up writes
qapi: Change BlockDirtyInfo to list
block: per caller dirty bitmap
...
Message-id: 1385743555-27888-1-git-send-email-kwolf@redhat.com
Signed-off-by: Anthony Liguori <aliguori@amazon.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/backup.c | 3 | ||||
-rw-r--r-- | block/blkdebug.c | 27 | ||||
-rw-r--r-- | block/cow.c | 124 | ||||
-rw-r--r-- | block/iscsi.c | 150 | ||||
-rw-r--r-- | block/mirror.c | 23 | ||||
-rw-r--r-- | block/qapi.c | 9 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 2 | ||||
-rw-r--r-- | block/qcow2.c | 5 | ||||
-rw-r--r-- | block/qed.c | 3 | ||||
-rw-r--r-- | block/raw_bsd.c | 6 | ||||
-rw-r--r-- | block/sheepdog.c | 130 | ||||
-rw-r--r-- | block/stream.c | 5 | ||||
-rw-r--r-- | block/vmdk.c | 14 |
13 files changed, 364 insertions, 137 deletions
diff --git a/block/backup.c b/block/backup.c index cad14c90b2..0198514043 100644 --- a/block/backup.c +++ b/block/backup.c @@ -138,7 +138,8 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, if (buffer_is_zero(iov.iov_base, iov.iov_len)) { ret = bdrv_co_write_zeroes(job->target, - start * BACKUP_SECTORS_PER_CLUSTER, n); + start * BACKUP_SECTORS_PER_CLUSTER, + n, BDRV_REQ_MAY_UNMAP); } else { ret = bdrv_co_writev(job->target, start * BACKUP_SECTORS_PER_CLUSTER, n, diff --git a/block/blkdebug.c b/block/blkdebug.c index 16d2b91ac9..37cf028545 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -605,6 +605,31 @@ static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag) return -ENOENT; } +static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs, + const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + BlkdebugRule *rule, *next; + int i, ret = -ENOENT; + + for (i = 0; i < BLKDBG_EVENT_MAX; i++) { + QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { + if (rule->action == ACTION_SUSPEND && + !strcmp(rule->options.suspend.tag, tag)) { + remove_rule(rule); + ret = 0; + } + } + } + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + qemu_coroutine_enter(r->co, NULL); + ret = 0; + } + } + return ret; +} static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag) { @@ -639,6 +664,8 @@ static BlockDriver bdrv_blkdebug = { .bdrv_debug_event = blkdebug_debug_event, .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, + .bdrv_debug_remove_breakpoint + = blkdebug_debug_remove_breakpoint, .bdrv_debug_resume = blkdebug_debug_resume, .bdrv_debug_is_suspended = blkdebug_debug_is_suspended, }; diff --git a/block/cow.c b/block/cow.c index 909c3e7182..dc15e46b6c 100644 --- a/block/cow.c +++ b/block/cow.c @@ -103,40 +103,18 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags, return ret; } -/* - * XXX(hch): right now these functions are extremely inefficient. - * We should just read the whole bitmap we'll need in one go instead. - */ -static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum, bool *first) +static inline void cow_set_bits(uint8_t *bitmap, int start, int64_t nb_sectors) { - uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8; - uint8_t bitmap; - int ret; - - ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)); - if (ret < 0) { - return ret; - } - - if (bitmap & (1 << (bitnum % 8))) { - return 0; - } - - if (*first) { - ret = bdrv_flush(bs->file); - if (ret < 0) { - return ret; + int64_t bitnum = start, last = start + nb_sectors; + while (bitnum < last) { + if ((bitnum & 7) == 0 && bitnum + 8 <= last) { + bitmap[bitnum / 8] = 0xFF; + bitnum += 8; + continue; } - *first = false; + bitmap[bitnum/8] |= (1 << (bitnum % 8)); + bitnum++; } - - bitmap |= (1 << (bitnum % 8)); - - ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap)); - if (ret < 0) { - return ret; - } - return 0; } #define BITS_PER_BITMAP_SECTOR (512 * 8) @@ -174,18 +152,34 @@ static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs, { int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8; uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE; - uint8_t bitmap[BDRV_SECTOR_SIZE]; - int ret; - int changed; + bool first = true; + int changed = 0, same = 0; - ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)); - if (ret < 0) { - return ret; - } + do { + int ret; + uint8_t bitmap[BDRV_SECTOR_SIZE]; + + bitnum &= BITS_PER_BITMAP_SECTOR - 1; + int sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum); + + ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)); + if (ret < 0) { + return ret; + } + + if (first) { + changed = cow_test_bit(bitnum, bitmap); + first = false; + } + + same += cow_find_streak(bitmap, changed, bitnum, nb_sectors); + + bitnum += sector_bits; + nb_sectors -= sector_bits; + offset += BDRV_SECTOR_SIZE; + } while (nb_sectors); - bitnum &= BITS_PER_BITMAP_SECTOR - 1; - changed = cow_test_bit(bitnum, bitmap); - *num_same = cow_find_streak(bitmap, changed, bitnum, nb_sectors); + *num_same = same; return changed; } @@ -204,18 +198,52 @@ static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs, static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { - int error = 0; - int i; + int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8; + uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE; bool first = true; + int sector_bits; + + for ( ; nb_sectors; + bitnum += sector_bits, + nb_sectors -= sector_bits, + offset += BDRV_SECTOR_SIZE) { + int ret, set; + uint8_t bitmap[BDRV_SECTOR_SIZE]; + + bitnum &= BITS_PER_BITMAP_SECTOR - 1; + sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum); - for (i = 0; i < nb_sectors; i++) { - error = cow_set_bit(bs, sector_num + i, &first); - if (error) { - break; + ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)); + if (ret < 0) { + return ret; + } + + /* Skip over any already set bits */ + set = cow_find_streak(bitmap, 1, bitnum, sector_bits); + bitnum += set; + sector_bits -= set; + nb_sectors -= set; + if (!sector_bits) { + continue; + } + + if (first) { + ret = bdrv_flush(bs->file); + if (ret < 0) { + return ret; + } + first = false; + } + + cow_set_bits(bitmap, bitnum, sector_bits); + + ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap)); + if (ret < 0) { + return ret; } } - return error; + return 0; } static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num, diff --git a/block/iscsi.c b/block/iscsi.c index a2d578c0a7..b7b52381d6 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -56,6 +56,7 @@ typedef struct IscsiLun { uint8_t lbprz; struct scsi_inquiry_logical_block_provisioning lbp; struct scsi_inquiry_block_limits bl; + unsigned char *zeroblock; } IscsiLun; typedef struct IscsiTask { @@ -87,7 +88,6 @@ typedef struct IscsiAIOCB { #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 #define ISCSI_CMD_RETRIES 5 -#define ISCSI_MAX_UNMAP 131072 static void iscsi_bh_cb(void *p) @@ -912,8 +912,6 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num, IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; struct unmap_list list; - uint32_t nb_blocks; - uint32_t max_unmap; if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { return -EINVAL; @@ -925,57 +923,102 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num, } list.lba = sector_qemu2lun(sector_num, iscsilun); - nb_blocks = sector_qemu2lun(nb_sectors, iscsilun); + list.num = sector_qemu2lun(nb_sectors, iscsilun); - max_unmap = iscsilun->bl.max_unmap; - if (max_unmap == 0xffffffff) { - max_unmap = ISCSI_MAX_UNMAP; + iscsi_co_init_iscsitask(iscsilun, &iTask); +retry: + if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1, + iscsi_co_generic_cb, &iTask) == NULL) { + return -EIO; } - while (nb_blocks > 0) { - iscsi_co_init_iscsitask(iscsilun, &iTask); - list.num = nb_blocks; - if (list.num > max_unmap) { - list.num = max_unmap; - } -retry: - if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1, - iscsi_co_generic_cb, &iTask) == NULL) { - return -EIO; - } + while (!iTask.complete) { + iscsi_set_events(iscsilun); + qemu_coroutine_yield(); + } - while (!iTask.complete) { - iscsi_set_events(iscsilun); - qemu_coroutine_yield(); - } + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + iTask.task = NULL; + } - if (iTask.task != NULL) { - scsi_free_scsi_task(iTask.task); - iTask.task = NULL; - } + if (iTask.do_retry) { + goto retry; + } - if (iTask.do_retry) { - goto retry; - } + if (iTask.status == SCSI_STATUS_CHECK_CONDITION) { + /* the target might fail with a check condition if it + is not happy with the alignment of the UNMAP request + we silently fail in this case */ + return 0; + } - if (iTask.status == SCSI_STATUS_CHECK_CONDITION) { - /* the target might fail with a check condition if it - is not happy with the alignment of the UNMAP request - we silently fail in this case */ - return 0; - } + if (iTask.status != SCSI_STATUS_GOOD) { + return -EIO; + } - if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; - } + return 0; +} + +#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED) + +static int +coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags) +{ + IscsiLun *iscsilun = bs->opaque; + struct IscsiTask iTask; + uint64_t lba; + uint32_t nb_blocks; - list.lba += list.num; - nb_blocks -= list.num; + if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { + return -EINVAL; + } + + if (!iscsilun->lbp.lbpws) { + /* WRITE SAME is not supported by the target */ + return -ENOTSUP; + } + + lba = sector_qemu2lun(sector_num, iscsilun); + nb_blocks = sector_qemu2lun(nb_sectors, iscsilun); + + if (iscsilun->zeroblock == NULL) { + iscsilun->zeroblock = g_malloc0(iscsilun->block_size); + } + + iscsi_co_init_iscsitask(iscsilun, &iTask); +retry: + if (iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba, + iscsilun->zeroblock, iscsilun->block_size, + nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP), + 0, 0, iscsi_co_generic_cb, &iTask) == NULL) { + return -EIO; + } + + while (!iTask.complete) { + iscsi_set_events(iscsilun); + qemu_coroutine_yield(); + } + + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + iTask.task = NULL; + } + + if (iTask.do_retry) { + goto retry; + } + + if (iTask.status != SCSI_STATUS_GOOD) { + return -EIO; } return 0; } +#endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */ + static int parse_chap(struct iscsi_context *iscsi, const char *target) { QemuOptsList *list; @@ -1384,6 +1427,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, sizeof(struct scsi_inquiry_block_limits)); scsi_free_scsi_task(task); task = NULL; + + if (iscsilun->bl.max_unmap < 0xffffffff) { + bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap, + iscsilun); + } + bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, + iscsilun); + + if (iscsilun->bl.max_ws_len < 0xffffffff) { + bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len, + iscsilun); + } + bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, + iscsilun); } #if defined(LIBISCSI_FEATURE_NOP_COUNTER) @@ -1424,6 +1481,7 @@ static void iscsi_close(BlockDriverState *bs) } qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL); iscsi_destroy_context(iscsi); + g_free(iscsilun->zeroblock); memset(iscsilun, 0, sizeof(IscsiLun)); } @@ -1506,6 +1564,14 @@ out: return ret; } +static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + IscsiLun *iscsilun = bs->opaque; + bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz; + bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws; + return 0; +} + static QEMUOptionParameter iscsi_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -1527,12 +1593,16 @@ static BlockDriver bdrv_iscsi = { .create_options = iscsi_create_options, .bdrv_getlength = iscsi_getlength, + .bdrv_get_info = iscsi_get_info, .bdrv_truncate = iscsi_truncate, #if defined(LIBISCSI_FEATURE_IOVECTOR) .bdrv_co_get_block_status = iscsi_co_get_block_status, #endif .bdrv_co_discard = iscsi_co_discard, +#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED) + .bdrv_co_write_zeroes = iscsi_co_write_zeroes, +#endif .bdrv_aio_readv = iscsi_aio_readv, .bdrv_aio_writev = iscsi_aio_writev, diff --git a/block/mirror.c b/block/mirror.c index 7b95acf88c..6dc27ad35d 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -39,6 +39,7 @@ typedef struct MirrorBlockJob { int64_t granularity; size_t buf_size; unsigned long *cow_bitmap; + BdrvDirtyBitmap *dirty_bitmap; HBitmapIter hbi; uint8_t *buf; QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; @@ -145,9 +146,10 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) s->sector_num = hbitmap_iter_next(&s->hbi); if (s->sector_num < 0) { - bdrv_dirty_iter_init(source, &s->hbi); + bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi); s->sector_num = hbitmap_iter_next(&s->hbi); - trace_mirror_restart_iter(s, bdrv_get_dirty_count(source)); + trace_mirror_restart_iter(s, + bdrv_get_dirty_count(source, s->dirty_bitmap)); assert(s->sector_num >= 0); } @@ -183,7 +185,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) do { int added_sectors, added_chunks; - if (!bdrv_get_dirty(source, next_sector) || + if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) || test_bit(next_chunk, s->in_flight_bitmap)) { assert(nb_sectors > 0); break; @@ -249,7 +251,8 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) /* Advance the HBitmapIter in parallel, so that we do not examine * the same sector twice. */ - if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) { + if (next_sector > hbitmap_next_sector + && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) { hbitmap_next_sector = hbitmap_iter_next(&s->hbi); } @@ -355,7 +358,7 @@ static void coroutine_fn mirror_run(void *opaque) } } - bdrv_dirty_iter_init(bs, &s->hbi); + bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi); last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); for (;;) { uint64_t delay_ns; @@ -367,7 +370,7 @@ static void coroutine_fn mirror_run(void *opaque) goto immediate_exit; } - cnt = bdrv_get_dirty_count(bs); + cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that qemu_aio_flush() returns. @@ -409,7 +412,7 @@ static void coroutine_fn mirror_run(void *opaque) should_complete = s->should_complete || block_job_is_cancelled(&s->common); - cnt = bdrv_get_dirty_count(bs); + cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); } } @@ -424,7 +427,7 @@ static void coroutine_fn mirror_run(void *opaque) */ trace_mirror_before_drain(s, cnt); bdrv_drain_all(); - cnt = bdrv_get_dirty_count(bs); + cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); } ret = 0; @@ -471,7 +474,7 @@ immediate_exit: qemu_vfree(s->buf); g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); - bdrv_set_dirty_tracking(bs, 0); + bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { @@ -575,7 +578,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, s->granularity = granularity; s->buf_size = MAX(buf_size, granularity); - bdrv_set_dirty_tracking(bs, granularity); + s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity); bdrv_set_enable_write_cache(s->target, true); bdrv_set_on_error(s->target, on_target_error, on_target_error); bdrv_iostatus_enable(s->target); diff --git a/block/qapi.c b/block/qapi.c index 5880b3e42b..a32cb79db8 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -204,12 +204,9 @@ void bdrv_query_info(BlockDriverState *bs, info->io_status = bs->iostatus; } - if (bs->dirty_bitmap) { - info->has_dirty = true; - info->dirty = g_malloc0(sizeof(*info->dirty)); - info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE; - info->dirty->granularity = - ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap)); + if (!QLIST_EMPTY(&bs->dirty_bitmaps)) { + info->has_dirty_bitmaps = true; + info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs); } if (bs->drv) { diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 791083a0ef..11f9c50aa7 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1613,7 +1613,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, } ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE, - s->cluster_sectors); + s->cluster_sectors, 0); if (ret < 0) { if (!preallocated) { qcow2_free_clusters(bs, offset, s->cluster_size, diff --git a/block/qcow2.c b/block/qcow2.c index 6e5d98dc48..8e2b6c7548 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1588,7 +1588,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ ret = bdrv_open(bs, filename, NULL, - BDRV_O_RDWR | BDRV_O_CACHE_WB, drv, &local_err); + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, + drv, &local_err); if (error_is_set(&local_err)) { error_propagate(errp, local_err); goto out; @@ -1696,7 +1697,7 @@ static int qcow2_make_empty(BlockDriverState *bs) } static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { int ret; BDRVQcowState *s = bs->opaque; diff --git a/block/qed.c b/block/qed.c index 6c0cba04f3..adc2736dd7 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1397,7 +1397,8 @@ static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret) static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) + int nb_sectors, + BdrvRequestFlags flags) { BlockDriverAIOCB *blockacb; BDRVQEDState *s = bs->opaque; diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 2265dcc03f..978ae7a102 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -68,9 +68,10 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, } static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) + int64_t sector_num, int nb_sectors, + BdrvRequestFlags flags) { - return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors); + return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags); } static int coroutine_fn raw_co_discard(BlockDriverState *bs, @@ -149,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { bs->sg = bs->file->sg; + bs->bl = bs->file->bl; return 0; } diff --git a/block/sheepdog.c b/block/sheepdog.c index ef387de71f..b4ae50f44d 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -91,6 +91,14 @@ #define SD_NR_VDIS (1U << 24) #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22) #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) +/* + * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and + * (SD_EC_MAX_STRIP - 1) for parity strips + * + * SD_MAX_COPIES is sum of number of data strips and parity strips. + */ +#define SD_EC_MAX_STRIP 16 +#define SD_MAX_COPIES (SD_EC_MAX_STRIP * 2 - 1) #define SD_INODE_SIZE (sizeof(SheepdogInode)) #define CURRENT_VDI_ID 0 @@ -1464,9 +1472,7 @@ out: return ret; } -static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, - uint32_t base_vid, uint32_t *vdi_id, int snapshot, - uint8_t copy_policy) +static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot) { SheepdogVdiReq hdr; SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; @@ -1483,11 +1489,11 @@ static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, * does not fit in buf? For now, just truncate and avoid buffer overrun. */ memset(buf, 0, sizeof(buf)); - pstrcpy(buf, sizeof(buf), filename); + pstrcpy(buf, sizeof(buf), s->name); memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_NEW_VDI; - hdr.vdi_id = base_vid; + hdr.vdi_id = s->inode.vdi_id; wlen = SD_MAX_VDI_LEN; @@ -1495,8 +1501,9 @@ static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, hdr.snapid = snapshot; hdr.data_length = wlen; - hdr.vdi_size = vdi_size; - hdr.copy_policy = copy_policy; + hdr.vdi_size = s->inode.vdi_size; + hdr.copy_policy = s->inode.copy_policy; + hdr.copies = s->inode.nr_copies; ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); @@ -1507,7 +1514,7 @@ static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, } if (rsp->result != SD_RES_SUCCESS) { - error_report("%s, %s", sd_strerror(rsp->result), filename); + error_report("%s, %s", sd_strerror(rsp->result), s->inode.name); return -EIO; } @@ -1564,27 +1571,79 @@ out: return ret; } +/* + * Sheepdog support two kinds of redundancy, full replication and erasure + * coding. + * + * # create a fully replicated vdi with x copies + * -o redundancy=x (1 <= x <= SD_MAX_COPIES) + * + * # create a erasure coded vdi with x data strips and y parity strips + * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP) + */ +static int parse_redundancy(BDRVSheepdogState *s, const char *opt) +{ + struct SheepdogInode *inode = &s->inode; + const char *n1, *n2; + long copy, parity; + char p[10]; + + pstrcpy(p, sizeof(p), opt); + n1 = strtok(p, ":"); + n2 = strtok(NULL, ":"); + + if (!n1) { + return -EINVAL; + } + + copy = strtol(n1, NULL, 10); + if (copy > SD_MAX_COPIES || copy < 1) { + return -EINVAL; + } + if (!n2) { + inode->copy_policy = 0; + inode->nr_copies = copy; + return 0; + } + + if (copy != 2 && copy != 4 && copy != 8 && copy != 16) { + return -EINVAL; + } + + parity = strtol(n2, NULL, 10); + if (parity >= SD_EC_MAX_STRIP || parity < 1) { + return -EINVAL; + } + + /* + * 4 bits for parity and 4 bits for data. + * We have to compress upper data bits because it can't represent 16 + */ + inode->copy_policy = ((copy / 2) << 4) + parity; + inode->nr_copies = copy + parity; + + return 0; +} + static int sd_create(const char *filename, QEMUOptionParameter *options, Error **errp) { int ret = 0; - uint32_t vid = 0, base_vid = 0; - int64_t vdi_size = 0; + uint32_t vid = 0; char *backing_file = NULL; BDRVSheepdogState *s; - char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; + char tag[SD_MAX_VDI_TAG_LEN]; uint32_t snapid; bool prealloc = false; Error *local_err = NULL; s = g_malloc0(sizeof(BDRVSheepdogState)); - memset(vdi, 0, sizeof(vdi)); memset(tag, 0, sizeof(tag)); if (strstr(filename, "://")) { - ret = sd_parse_uri(s, filename, vdi, &snapid, tag); + ret = sd_parse_uri(s, filename, s->name, &snapid, tag); } else { - ret = parse_vdiname(s, filename, vdi, &snapid, tag); + ret = parse_vdiname(s, filename, s->name, &snapid, tag); } if (ret < 0) { goto out; @@ -1592,7 +1651,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, while (options && options->name) { if (!strcmp(options->name, BLOCK_OPT_SIZE)) { - vdi_size = options->value.n; + s->inode.vdi_size = options->value.n; } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { backing_file = options->value.s; } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { @@ -1606,11 +1665,16 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, ret = -EINVAL; goto out; } + } else if (!strcmp(options->name, BLOCK_OPT_REDUNDANCY)) { + ret = parse_redundancy(s, options->value.s); + if (ret < 0) { + goto out; + } } options++; } - if (vdi_size > SD_MAX_VDI_SIZE) { + if (s->inode.vdi_size > SD_MAX_VDI_SIZE) { error_report("too big image size"); ret = -EINVAL; goto out; @@ -1645,12 +1709,10 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, goto out; } - base_vid = s->inode.vdi_id; bdrv_unref(bs); } - /* TODO: allow users to specify copy number */ - ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0, 0); + ret = do_sd_create(s, &vid, 0); if (!prealloc || ret) { goto out; } @@ -1833,8 +1895,7 @@ static int sd_create_branch(BDRVSheepdogState *s) * false bail out. */ deleted = sd_delete(s); - ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, - !deleted, s->inode.copy_policy); + ret = do_sd_create(s, &vid, !deleted); if (ret) { goto out; } @@ -2097,8 +2158,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) goto cleanup; } - ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, - 1, s->inode.copy_policy); + ret = do_sd_create(s, &new_vid, 1); if (ret < 0) { error_report("failed to create inode for snapshot. %s", strerror(errno)); @@ -2407,6 +2467,22 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, return ret; } +static int64_t sd_get_allocated_file_size(BlockDriverState *bs) +{ + BDRVSheepdogState *s = bs->opaque; + SheepdogInode *inode = &s->inode; + unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, SD_DATA_OBJ_SIZE); + uint64_t size = 0; + + for (i = 0; i < last; i++) { + if (inode->data_vdi_id[i] == 0) { + continue; + } + size += SD_DATA_OBJ_SIZE; + } + return size; +} + static QEMUOptionParameter sd_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -2423,6 +2499,11 @@ static QEMUOptionParameter sd_create_options[] = { .type = OPT_STRING, .help = "Preallocation mode (allowed values: off, full)" }, + { + .name = BLOCK_OPT_REDUNDANCY, + .type = OPT_STRING, + .help = "Redundancy of the image" + }, { NULL } }; @@ -2436,6 +2517,7 @@ static BlockDriver bdrv_sheepdog = { .bdrv_create = sd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, + .bdrv_get_allocated_file_size = sd_get_allocated_file_size, .bdrv_truncate = sd_truncate, .bdrv_co_readv = sd_co_readv, @@ -2465,6 +2547,7 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_create = sd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, + .bdrv_get_allocated_file_size = sd_get_allocated_file_size, .bdrv_truncate = sd_truncate, .bdrv_co_readv = sd_co_readv, @@ -2494,6 +2577,7 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_create = sd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, + .bdrv_get_allocated_file_size = sd_get_allocated_file_size, .bdrv_truncate = sd_truncate, .bdrv_co_readv = sd_co_readv, diff --git a/block/stream.c b/block/stream.c index 694fd42e41..46bec7d379 100644 --- a/block/stream.c +++ b/block/stream.c @@ -88,6 +88,11 @@ static void coroutine_fn stream_run(void *opaque) int n = 0; void *buf; + if (!bs->backing_hd) { + block_job_completed(&s->common, 0); + return; + } + s->common.len = bdrv_getlength(bs); if (s->common.len < 0) { block_job_completed(&s->common, s->common.len); diff --git a/block/vmdk.c b/block/vmdk.c index a7ebd0f125..88d09e3e16 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -605,13 +605,20 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, header = footer.header; } - if (le32_to_cpu(header.version) >= 3) { + if (le32_to_cpu(header.version) > 3) { char buf[64]; snprintf(buf, sizeof(buf), "VMDK version %d", le32_to_cpu(header.version)); qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, bs->device_name, "vmdk", buf); return -ENOTSUP; + } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) { + /* VMware KB 2064959 explains that version 3 added support for + * persistent changed block tracking (CBT), and backup software can + * read it as version=1 if it doesn't care about the changed area + * information. So we are safe to enable read only. */ + error_setg(errp, "VMDK version 3 must be read only"); + return -EINVAL; } if (le32_to_cpu(header.num_gtes_per_gt) > 512) { @@ -1419,7 +1426,8 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) + int nb_sectors, + BdrvRequestFlags flags) { int ret; BDRVVmdkState *s = bs->opaque; @@ -1689,7 +1697,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options, } if (backing_file) { BlockDriverState *bs = bdrv_new(""); - ret = bdrv_open(bs, backing_file, NULL, 0, NULL, errp); + ret = bdrv_open(bs, backing_file, NULL, BDRV_O_NO_BACKING, NULL, errp); if (ret != 0) { bdrv_unref(bs); return ret; |