aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@amazon.com>2013-12-06 12:59:58 -0800
committerAnthony Liguori <aliguori@amazon.com>2013-12-06 12:59:58 -0800
commit0a0ee0b93bdd6e1ef628283d00bb979e27655ebb (patch)
treea8e1d34e3a187afc96d8697dfe121032a0268912 /block
parent9ed5dacbfa0f3f74238854776385f150b68e78b9 (diff)
parent981cbf59b5360647e908186e7306ee9013a58c88 (diff)
Merge remote-tracking branch 'kwolf/tags/for-anthony' into staging
Block patches for 2.0 (flushing block-next) # gpg: Signature made Fri 29 Nov 2013 08:43:18 AM PST using RSA key ID C88F2FD6 # gpg: Can't check signature: public key not found # By Peter Lieven (17) and others # Via Kevin Wolf * kwolf/tags/for-anthony: (41 commits) qemu-iotests: Add sample image and test for VMDK version 3 vmdk: Allow read only open of VMDK version 3 qemu-iotests: Filter out 'qemu-io> ' prompt qemu-iotests: Filter qemu-io output in 025 block: Use BDRV_O_NO_BACKING where appropriate qemu-iotests: Test snapshot mode block: Enable BDRV_O_SNAPSHOT with driver-specific options qemu-iotests: Make test case 030, 040 and 055 deterministic qemu-iotest: Add pause_drive and resume_drive methods blkdebug: add "remove_break" command qemu-iotests: Drop local version of cancel_and_wait from 040 sheepdog: support user-defined redundancy option sheepdog: refactor do_sd_create() qdict: Optimise qdict_do_flatten() qdict: Fix memory leak in qdict_do_flatten() MAINTAINERS: add sheepdog development mailing list COW: Extend checking allocated bits to beyond one sector COW: Speed up writes qapi: Change BlockDirtyInfo to list block: per caller dirty bitmap ... Message-id: 1385743555-27888-1-git-send-email-kwolf@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
Diffstat (limited to 'block')
-rw-r--r--block/backup.c3
-rw-r--r--block/blkdebug.c27
-rw-r--r--block/cow.c124
-rw-r--r--block/iscsi.c150
-rw-r--r--block/mirror.c23
-rw-r--r--block/qapi.c9
-rw-r--r--block/qcow2-cluster.c2
-rw-r--r--block/qcow2.c5
-rw-r--r--block/qed.c3
-rw-r--r--block/raw_bsd.c6
-rw-r--r--block/sheepdog.c130
-rw-r--r--block/stream.c5
-rw-r--r--block/vmdk.c14
13 files changed, 364 insertions, 137 deletions
diff --git a/block/backup.c b/block/backup.c
index cad14c90b2..0198514043 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -138,7 +138,8 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
ret = bdrv_co_write_zeroes(job->target,
- start * BACKUP_SECTORS_PER_CLUSTER, n);
+ start * BACKUP_SECTORS_PER_CLUSTER,
+ n, BDRV_REQ_MAY_UNMAP);
} else {
ret = bdrv_co_writev(job->target,
start * BACKUP_SECTORS_PER_CLUSTER, n,
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 16d2b91ac9..37cf028545 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -605,6 +605,31 @@ static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
return -ENOENT;
}
+static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
+ const char *tag)
+{
+ BDRVBlkdebugState *s = bs->opaque;
+ BlkdebugSuspendedReq *r;
+ BlkdebugRule *rule, *next;
+ int i, ret = -ENOENT;
+
+ for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
+ QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
+ if (rule->action == ACTION_SUSPEND &&
+ !strcmp(rule->options.suspend.tag, tag)) {
+ remove_rule(rule);
+ ret = 0;
+ }
+ }
+ }
+ QLIST_FOREACH(r, &s->suspended_reqs, next) {
+ if (!strcmp(r->tag, tag)) {
+ qemu_coroutine_enter(r->co, NULL);
+ ret = 0;
+ }
+ }
+ return ret;
+}
static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
{
@@ -639,6 +664,8 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_debug_event = blkdebug_debug_event,
.bdrv_debug_breakpoint = blkdebug_debug_breakpoint,
+ .bdrv_debug_remove_breakpoint
+ = blkdebug_debug_remove_breakpoint,
.bdrv_debug_resume = blkdebug_debug_resume,
.bdrv_debug_is_suspended = blkdebug_debug_is_suspended,
};
diff --git a/block/cow.c b/block/cow.c
index 909c3e7182..dc15e46b6c 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -103,40 +103,18 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
}
-/*
- * XXX(hch): right now these functions are extremely inefficient.
- * We should just read the whole bitmap we'll need in one go instead.
- */
-static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum, bool *first)
+static inline void cow_set_bits(uint8_t *bitmap, int start, int64_t nb_sectors)
{
- uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
- uint8_t bitmap;
- int ret;
-
- ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
- if (ret < 0) {
- return ret;
- }
-
- if (bitmap & (1 << (bitnum % 8))) {
- return 0;
- }
-
- if (*first) {
- ret = bdrv_flush(bs->file);
- if (ret < 0) {
- return ret;
+ int64_t bitnum = start, last = start + nb_sectors;
+ while (bitnum < last) {
+ if ((bitnum & 7) == 0 && bitnum + 8 <= last) {
+ bitmap[bitnum / 8] = 0xFF;
+ bitnum += 8;
+ continue;
}
- *first = false;
+ bitmap[bitnum/8] |= (1 << (bitnum % 8));
+ bitnum++;
}
-
- bitmap |= (1 << (bitnum % 8));
-
- ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
- if (ret < 0) {
- return ret;
- }
- return 0;
}
#define BITS_PER_BITMAP_SECTOR (512 * 8)
@@ -174,18 +152,34 @@ static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
{
int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
- uint8_t bitmap[BDRV_SECTOR_SIZE];
- int ret;
- int changed;
+ bool first = true;
+ int changed = 0, same = 0;
- ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
- if (ret < 0) {
- return ret;
- }
+ do {
+ int ret;
+ uint8_t bitmap[BDRV_SECTOR_SIZE];
+
+ bitnum &= BITS_PER_BITMAP_SECTOR - 1;
+ int sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
+
+ ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (first) {
+ changed = cow_test_bit(bitnum, bitmap);
+ first = false;
+ }
+
+ same += cow_find_streak(bitmap, changed, bitnum, nb_sectors);
+
+ bitnum += sector_bits;
+ nb_sectors -= sector_bits;
+ offset += BDRV_SECTOR_SIZE;
+ } while (nb_sectors);
- bitnum &= BITS_PER_BITMAP_SECTOR - 1;
- changed = cow_test_bit(bitnum, bitmap);
- *num_same = cow_find_streak(bitmap, changed, bitnum, nb_sectors);
+ *num_same = same;
return changed;
}
@@ -204,18 +198,52 @@ static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
- int error = 0;
- int i;
+ int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
+ uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
bool first = true;
+ int sector_bits;
+
+ for ( ; nb_sectors;
+ bitnum += sector_bits,
+ nb_sectors -= sector_bits,
+ offset += BDRV_SECTOR_SIZE) {
+ int ret, set;
+ uint8_t bitmap[BDRV_SECTOR_SIZE];
+
+ bitnum &= BITS_PER_BITMAP_SECTOR - 1;
+ sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
- for (i = 0; i < nb_sectors; i++) {
- error = cow_set_bit(bs, sector_num + i, &first);
- if (error) {
- break;
+ ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Skip over any already set bits */
+ set = cow_find_streak(bitmap, 1, bitnum, sector_bits);
+ bitnum += set;
+ sector_bits -= set;
+ nb_sectors -= set;
+ if (!sector_bits) {
+ continue;
+ }
+
+ if (first) {
+ ret = bdrv_flush(bs->file);
+ if (ret < 0) {
+ return ret;
+ }
+ first = false;
+ }
+
+ cow_set_bits(bitmap, bitnum, sector_bits);
+
+ ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
+ if (ret < 0) {
+ return ret;
}
}
- return error;
+ return 0;
}
static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
diff --git a/block/iscsi.c b/block/iscsi.c
index a2d578c0a7..b7b52381d6 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -56,6 +56,7 @@ typedef struct IscsiLun {
uint8_t lbprz;
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
+ unsigned char *zeroblock;
} IscsiLun;
typedef struct IscsiTask {
@@ -87,7 +88,6 @@ typedef struct IscsiAIOCB {
#define NOP_INTERVAL 5000
#define MAX_NOP_FAILURES 3
#define ISCSI_CMD_RETRIES 5
-#define ISCSI_MAX_UNMAP 131072
static void
iscsi_bh_cb(void *p)
@@ -912,8 +912,6 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
struct unmap_list list;
- uint32_t nb_blocks;
- uint32_t max_unmap;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
@@ -925,57 +923,102 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
}
list.lba = sector_qemu2lun(sector_num, iscsilun);
- nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
+ list.num = sector_qemu2lun(nb_sectors, iscsilun);
- max_unmap = iscsilun->bl.max_unmap;
- if (max_unmap == 0xffffffff) {
- max_unmap = ISCSI_MAX_UNMAP;
+ iscsi_co_init_iscsitask(iscsilun, &iTask);
+retry:
+ if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
+ iscsi_co_generic_cb, &iTask) == NULL) {
+ return -EIO;
}
- while (nb_blocks > 0) {
- iscsi_co_init_iscsitask(iscsilun, &iTask);
- list.num = nb_blocks;
- if (list.num > max_unmap) {
- list.num = max_unmap;
- }
-retry:
- if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
- iscsi_co_generic_cb, &iTask) == NULL) {
- return -EIO;
- }
+ while (!iTask.complete) {
+ iscsi_set_events(iscsilun);
+ qemu_coroutine_yield();
+ }
- while (!iTask.complete) {
- iscsi_set_events(iscsilun);
- qemu_coroutine_yield();
- }
+ if (iTask.task != NULL) {
+ scsi_free_scsi_task(iTask.task);
+ iTask.task = NULL;
+ }
- if (iTask.task != NULL) {
- scsi_free_scsi_task(iTask.task);
- iTask.task = NULL;
- }
+ if (iTask.do_retry) {
+ goto retry;
+ }
- if (iTask.do_retry) {
- goto retry;
- }
+ if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
+ /* the target might fail with a check condition if it
+ is not happy with the alignment of the UNMAP request
+ we silently fail in this case */
+ return 0;
+ }
- if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
- /* the target might fail with a check condition if it
- is not happy with the alignment of the UNMAP request
- we silently fail in this case */
- return 0;
- }
+ if (iTask.status != SCSI_STATUS_GOOD) {
+ return -EIO;
+ }
- if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
- }
+ return 0;
+}
+
+#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
+
+static int
+coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, BdrvRequestFlags flags)
+{
+ IscsiLun *iscsilun = bs->opaque;
+ struct IscsiTask iTask;
+ uint64_t lba;
+ uint32_t nb_blocks;
- list.lba += list.num;
- nb_blocks -= list.num;
+ if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+ return -EINVAL;
+ }
+
+ if (!iscsilun->lbp.lbpws) {
+ /* WRITE SAME is not supported by the target */
+ return -ENOTSUP;
+ }
+
+ lba = sector_qemu2lun(sector_num, iscsilun);
+ nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
+
+ if (iscsilun->zeroblock == NULL) {
+ iscsilun->zeroblock = g_malloc0(iscsilun->block_size);
+ }
+
+ iscsi_co_init_iscsitask(iscsilun, &iTask);
+retry:
+ if (iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
+ iscsilun->zeroblock, iscsilun->block_size,
+ nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
+ 0, 0, iscsi_co_generic_cb, &iTask) == NULL) {
+ return -EIO;
+ }
+
+ while (!iTask.complete) {
+ iscsi_set_events(iscsilun);
+ qemu_coroutine_yield();
+ }
+
+ if (iTask.task != NULL) {
+ scsi_free_scsi_task(iTask.task);
+ iTask.task = NULL;
+ }
+
+ if (iTask.do_retry) {
+ goto retry;
+ }
+
+ if (iTask.status != SCSI_STATUS_GOOD) {
+ return -EIO;
}
return 0;
}
+#endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */
+
static int parse_chap(struct iscsi_context *iscsi, const char *target)
{
QemuOptsList *list;
@@ -1384,6 +1427,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
sizeof(struct scsi_inquiry_block_limits));
scsi_free_scsi_task(task);
task = NULL;
+
+ if (iscsilun->bl.max_unmap < 0xffffffff) {
+ bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
+ iscsilun);
+ }
+ bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
+ iscsilun);
+
+ if (iscsilun->bl.max_ws_len < 0xffffffff) {
+ bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
+ iscsilun);
+ }
+ bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
+ iscsilun);
}
#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
@@ -1424,6 +1481,7 @@ static void iscsi_close(BlockDriverState *bs)
}
qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
iscsi_destroy_context(iscsi);
+ g_free(iscsilun->zeroblock);
memset(iscsilun, 0, sizeof(IscsiLun));
}
@@ -1506,6 +1564,14 @@ out:
return ret;
}
+static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ IscsiLun *iscsilun = bs->opaque;
+ bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
+ bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
+ return 0;
+}
+
static QEMUOptionParameter iscsi_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@@ -1527,12 +1593,16 @@ static BlockDriver bdrv_iscsi = {
.create_options = iscsi_create_options,
.bdrv_getlength = iscsi_getlength,
+ .bdrv_get_info = iscsi_get_info,
.bdrv_truncate = iscsi_truncate,
#if defined(LIBISCSI_FEATURE_IOVECTOR)
.bdrv_co_get_block_status = iscsi_co_get_block_status,
#endif
.bdrv_co_discard = iscsi_co_discard,
+#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
+ .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
+#endif
.bdrv_aio_readv = iscsi_aio_readv,
.bdrv_aio_writev = iscsi_aio_writev,
diff --git a/block/mirror.c b/block/mirror.c
index 7b95acf88c..6dc27ad35d 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -39,6 +39,7 @@ typedef struct MirrorBlockJob {
int64_t granularity;
size_t buf_size;
unsigned long *cow_bitmap;
+ BdrvDirtyBitmap *dirty_bitmap;
HBitmapIter hbi;
uint8_t *buf;
QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
@@ -145,9 +146,10 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
s->sector_num = hbitmap_iter_next(&s->hbi);
if (s->sector_num < 0) {
- bdrv_dirty_iter_init(source, &s->hbi);
+ bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
s->sector_num = hbitmap_iter_next(&s->hbi);
- trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
+ trace_mirror_restart_iter(s,
+ bdrv_get_dirty_count(source, s->dirty_bitmap));
assert(s->sector_num >= 0);
}
@@ -183,7 +185,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
do {
int added_sectors, added_chunks;
- if (!bdrv_get_dirty(source, next_sector) ||
+ if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
test_bit(next_chunk, s->in_flight_bitmap)) {
assert(nb_sectors > 0);
break;
@@ -249,7 +251,8 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
/* Advance the HBitmapIter in parallel, so that we do not examine
* the same sector twice.
*/
- if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
+ if (next_sector > hbitmap_next_sector
+ && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
}
@@ -355,7 +358,7 @@ static void coroutine_fn mirror_run(void *opaque)
}
}
- bdrv_dirty_iter_init(bs, &s->hbi);
+ bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
for (;;) {
uint64_t delay_ns;
@@ -367,7 +370,7 @@ static void coroutine_fn mirror_run(void *opaque)
goto immediate_exit;
}
- cnt = bdrv_get_dirty_count(bs);
+ cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
/* Note that even when no rate limit is applied we need to yield
* periodically with no pending I/O so that qemu_aio_flush() returns.
@@ -409,7 +412,7 @@ static void coroutine_fn mirror_run(void *opaque)
should_complete = s->should_complete ||
block_job_is_cancelled(&s->common);
- cnt = bdrv_get_dirty_count(bs);
+ cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
}
}
@@ -424,7 +427,7 @@ static void coroutine_fn mirror_run(void *opaque)
*/
trace_mirror_before_drain(s, cnt);
bdrv_drain_all();
- cnt = bdrv_get_dirty_count(bs);
+ cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
}
ret = 0;
@@ -471,7 +474,7 @@ immediate_exit:
qemu_vfree(s->buf);
g_free(s->cow_bitmap);
g_free(s->in_flight_bitmap);
- bdrv_set_dirty_tracking(bs, 0);
+ bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
bdrv_iostatus_disable(s->target);
if (s->should_complete && ret == 0) {
if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
@@ -575,7 +578,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
s->granularity = granularity;
s->buf_size = MAX(buf_size, granularity);
- bdrv_set_dirty_tracking(bs, granularity);
+ s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity);
bdrv_set_enable_write_cache(s->target, true);
bdrv_set_on_error(s->target, on_target_error, on_target_error);
bdrv_iostatus_enable(s->target);
diff --git a/block/qapi.c b/block/qapi.c
index 5880b3e42b..a32cb79db8 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -204,12 +204,9 @@ void bdrv_query_info(BlockDriverState *bs,
info->io_status = bs->iostatus;
}
- if (bs->dirty_bitmap) {
- info->has_dirty = true;
- info->dirty = g_malloc0(sizeof(*info->dirty));
- info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
- info->dirty->granularity =
- ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
+ if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
+ info->has_dirty_bitmaps = true;
+ info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
}
if (bs->drv) {
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 791083a0ef..11f9c50aa7 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1613,7 +1613,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
}
ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
- s->cluster_sectors);
+ s->cluster_sectors, 0);
if (ret < 0) {
if (!preallocated) {
qcow2_free_clusters(bs, offset, s->cluster_size,
diff --git a/block/qcow2.c b/block/qcow2.c
index 6e5d98dc48..8e2b6c7548 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1588,7 +1588,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
ret = bdrv_open(bs, filename, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB, drv, &local_err);
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
+ drv, &local_err);
if (error_is_set(&local_err)) {
error_propagate(errp, local_err);
goto out;
@@ -1696,7 +1697,7 @@ static int qcow2_make_empty(BlockDriverState *bs)
}
static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
int ret;
BDRVQcowState *s = bs->opaque;
diff --git a/block/qed.c b/block/qed.c
index 6c0cba04f3..adc2736dd7 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1397,7 +1397,8 @@ static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors)
+ int nb_sectors,
+ BdrvRequestFlags flags)
{
BlockDriverAIOCB *blockacb;
BDRVQEDState *s = bs->opaque;
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 2265dcc03f..978ae7a102 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -68,9 +68,10 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
}
static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+ int64_t sector_num, int nb_sectors,
+ BdrvRequestFlags flags)
{
- return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
+ return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags);
}
static int coroutine_fn raw_co_discard(BlockDriverState *bs,
@@ -149,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->sg = bs->file->sg;
+ bs->bl = bs->file->bl;
return 0;
}
diff --git a/block/sheepdog.c b/block/sheepdog.c
index ef387de71f..b4ae50f44d 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -91,6 +91,14 @@
#define SD_NR_VDIS (1U << 24)
#define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
#define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
+/*
+ * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and
+ * (SD_EC_MAX_STRIP - 1) for parity strips
+ *
+ * SD_MAX_COPIES is sum of number of data strips and parity strips.
+ */
+#define SD_EC_MAX_STRIP 16
+#define SD_MAX_COPIES (SD_EC_MAX_STRIP * 2 - 1)
#define SD_INODE_SIZE (sizeof(SheepdogInode))
#define CURRENT_VDI_ID 0
@@ -1464,9 +1472,7 @@ out:
return ret;
}
-static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
- uint32_t base_vid, uint32_t *vdi_id, int snapshot,
- uint8_t copy_policy)
+static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot)
{
SheepdogVdiReq hdr;
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
@@ -1483,11 +1489,11 @@ static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
* does not fit in buf? For now, just truncate and avoid buffer overrun.
*/
memset(buf, 0, sizeof(buf));
- pstrcpy(buf, sizeof(buf), filename);
+ pstrcpy(buf, sizeof(buf), s->name);
memset(&hdr, 0, sizeof(hdr));
hdr.opcode = SD_OP_NEW_VDI;
- hdr.vdi_id = base_vid;
+ hdr.vdi_id = s->inode.vdi_id;
wlen = SD_MAX_VDI_LEN;
@@ -1495,8 +1501,9 @@ static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
hdr.snapid = snapshot;
hdr.data_length = wlen;
- hdr.vdi_size = vdi_size;
- hdr.copy_policy = copy_policy;
+ hdr.vdi_size = s->inode.vdi_size;
+ hdr.copy_policy = s->inode.copy_policy;
+ hdr.copies = s->inode.nr_copies;
ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
@@ -1507,7 +1514,7 @@ static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
}
if (rsp->result != SD_RES_SUCCESS) {
- error_report("%s, %s", sd_strerror(rsp->result), filename);
+ error_report("%s, %s", sd_strerror(rsp->result), s->inode.name);
return -EIO;
}
@@ -1564,27 +1571,79 @@ out:
return ret;
}
+/*
+ * Sheepdog support two kinds of redundancy, full replication and erasure
+ * coding.
+ *
+ * # create a fully replicated vdi with x copies
+ * -o redundancy=x (1 <= x <= SD_MAX_COPIES)
+ *
+ * # create a erasure coded vdi with x data strips and y parity strips
+ * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP)
+ */
+static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
+{
+ struct SheepdogInode *inode = &s->inode;
+ const char *n1, *n2;
+ long copy, parity;
+ char p[10];
+
+ pstrcpy(p, sizeof(p), opt);
+ n1 = strtok(p, ":");
+ n2 = strtok(NULL, ":");
+
+ if (!n1) {
+ return -EINVAL;
+ }
+
+ copy = strtol(n1, NULL, 10);
+ if (copy > SD_MAX_COPIES || copy < 1) {
+ return -EINVAL;
+ }
+ if (!n2) {
+ inode->copy_policy = 0;
+ inode->nr_copies = copy;
+ return 0;
+ }
+
+ if (copy != 2 && copy != 4 && copy != 8 && copy != 16) {
+ return -EINVAL;
+ }
+
+ parity = strtol(n2, NULL, 10);
+ if (parity >= SD_EC_MAX_STRIP || parity < 1) {
+ return -EINVAL;
+ }
+
+ /*
+ * 4 bits for parity and 4 bits for data.
+ * We have to compress upper data bits because it can't represent 16
+ */
+ inode->copy_policy = ((copy / 2) << 4) + parity;
+ inode->nr_copies = copy + parity;
+
+ return 0;
+}
+
static int sd_create(const char *filename, QEMUOptionParameter *options,
Error **errp)
{
int ret = 0;
- uint32_t vid = 0, base_vid = 0;
- int64_t vdi_size = 0;
+ uint32_t vid = 0;
char *backing_file = NULL;
BDRVSheepdogState *s;
- char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
+ char tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid;
bool prealloc = false;
Error *local_err = NULL;
s = g_malloc0(sizeof(BDRVSheepdogState));
- memset(vdi, 0, sizeof(vdi));
memset(tag, 0, sizeof(tag));
if (strstr(filename, "://")) {
- ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
+ ret = sd_parse_uri(s, filename, s->name, &snapid, tag);
} else {
- ret = parse_vdiname(s, filename, vdi, &snapid, tag);
+ ret = parse_vdiname(s, filename, s->name, &snapid, tag);
}
if (ret < 0) {
goto out;
@@ -1592,7 +1651,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
while (options && options->name) {
if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- vdi_size = options->value.n;
+ s->inode.vdi_size = options->value.n;
} else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
backing_file = options->value.s;
} else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
@@ -1606,11 +1665,16 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
ret = -EINVAL;
goto out;
}
+ } else if (!strcmp(options->name, BLOCK_OPT_REDUNDANCY)) {
+ ret = parse_redundancy(s, options->value.s);
+ if (ret < 0) {
+ goto out;
+ }
}
options++;
}
- if (vdi_size > SD_MAX_VDI_SIZE) {
+ if (s->inode.vdi_size > SD_MAX_VDI_SIZE) {
error_report("too big image size");
ret = -EINVAL;
goto out;
@@ -1645,12 +1709,10 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
goto out;
}
- base_vid = s->inode.vdi_id;
bdrv_unref(bs);
}
- /* TODO: allow users to specify copy number */
- ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0, 0);
+ ret = do_sd_create(s, &vid, 0);
if (!prealloc || ret) {
goto out;
}
@@ -1833,8 +1895,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
* false bail out.
*/
deleted = sd_delete(s);
- ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid,
- !deleted, s->inode.copy_policy);
+ ret = do_sd_create(s, &vid, !deleted);
if (ret) {
goto out;
}
@@ -2097,8 +2158,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto cleanup;
}
- ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid,
- 1, s->inode.copy_policy);
+ ret = do_sd_create(s, &new_vid, 1);
if (ret < 0) {
error_report("failed to create inode for snapshot. %s",
strerror(errno));
@@ -2407,6 +2467,22 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
return ret;
}
+static int64_t sd_get_allocated_file_size(BlockDriverState *bs)
+{
+ BDRVSheepdogState *s = bs->opaque;
+ SheepdogInode *inode = &s->inode;
+ unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, SD_DATA_OBJ_SIZE);
+ uint64_t size = 0;
+
+ for (i = 0; i < last; i++) {
+ if (inode->data_vdi_id[i] == 0) {
+ continue;
+ }
+ size += SD_DATA_OBJ_SIZE;
+ }
+ return size;
+}
+
static QEMUOptionParameter sd_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@@ -2423,6 +2499,11 @@ static QEMUOptionParameter sd_create_options[] = {
.type = OPT_STRING,
.help = "Preallocation mode (allowed values: off, full)"
},
+ {
+ .name = BLOCK_OPT_REDUNDANCY,
+ .type = OPT_STRING,
+ .help = "Redundancy of the image"
+ },
{ NULL }
};
@@ -2436,6 +2517,7 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
+ .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
.bdrv_truncate = sd_truncate,
.bdrv_co_readv = sd_co_readv,
@@ -2465,6 +2547,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
+ .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
.bdrv_truncate = sd_truncate,
.bdrv_co_readv = sd_co_readv,
@@ -2494,6 +2577,7 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
+ .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
.bdrv_truncate = sd_truncate,
.bdrv_co_readv = sd_co_readv,
diff --git a/block/stream.c b/block/stream.c
index 694fd42e41..46bec7d379 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -88,6 +88,11 @@ static void coroutine_fn stream_run(void *opaque)
int n = 0;
void *buf;
+ if (!bs->backing_hd) {
+ block_job_completed(&s->common, 0);
+ return;
+ }
+
s->common.len = bdrv_getlength(bs);
if (s->common.len < 0) {
block_job_completed(&s->common, s->common.len);
diff --git a/block/vmdk.c b/block/vmdk.c
index a7ebd0f125..88d09e3e16 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -605,13 +605,20 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
header = footer.header;
}
- if (le32_to_cpu(header.version) >= 3) {
+ if (le32_to_cpu(header.version) > 3) {
char buf[64];
snprintf(buf, sizeof(buf), "VMDK version %d",
le32_to_cpu(header.version));
qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
bs->device_name, "vmdk", buf);
return -ENOTSUP;
+ } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
+ /* VMware KB 2064959 explains that version 3 added support for
+ * persistent changed block tracking (CBT), and backup software can
+ * read it as version=1 if it doesn't care about the changed area
+ * information. So we are safe to enable read only. */
+ error_setg(errp, "VMDK version 3 must be read only");
+ return -EINVAL;
}
if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
@@ -1419,7 +1426,8 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors)
+ int nb_sectors,
+ BdrvRequestFlags flags)
{
int ret;
BDRVVmdkState *s = bs->opaque;
@@ -1689,7 +1697,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
}
if (backing_file) {
BlockDriverState *bs = bdrv_new("");
- ret = bdrv_open(bs, backing_file, NULL, 0, NULL, errp);
+ ret = bdrv_open(bs, backing_file, NULL, BDRV_O_NO_BACKING, NULL, errp);
if (ret != 0) {
bdrv_unref(bs);
return ret;