aboutsummaryrefslogtreecommitdiff
path: root/block.c
diff options
context:
space:
mode:
Diffstat (limited to 'block.c')
-rw-r--r--block.c364
1 files changed, 272 insertions, 92 deletions
diff --git a/block.c b/block.c
index 382ea71f4b..3d78581c63 100644
--- a/block.c
+++ b/block.c
@@ -49,12 +49,12 @@
#include <windows.h>
#endif
-#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+struct BdrvDirtyBitmap {
+ HBitmap *bitmap;
+ QLIST_ENTRY(BdrvDirtyBitmap) list;
+};
-typedef enum {
- BDRV_REQ_COPY_ON_READ = 0x1,
- BDRV_REQ_ZERO_WRITE = 0x2,
-} BdrvRequestFlags;
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
@@ -84,7 +84,7 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
bool is_write);
static void coroutine_fn bdrv_co_do_rw(void *opaque);
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors);
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -323,6 +323,7 @@ BlockDriverState *bdrv_new(const char *device_name)
BlockDriverState *bs;
bs = g_malloc0(sizeof(BlockDriverState));
+ QLIST_INIT(&bs->dirty_bitmaps);
pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
if (device_name[0] != '\0') {
QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
@@ -1052,21 +1053,16 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
int64_t total_size;
BlockDriver *bdrv_qcow2;
QEMUOptionParameter *create_options;
- char backing_filename[PATH_MAX];
-
- if (qdict_size(options) != 0) {
- error_setg(errp, "Can't use snapshot=on with driver-specific options");
- ret = -EINVAL;
- goto fail;
- }
- assert(filename != NULL);
+ QDict *snapshot_options;
/* if snapshot, we create a temporary backing file and open it
instead of opening 'filename' directly */
- /* if there is a backing file, use it */
+ /* Get the required size from the image */
bs1 = bdrv_new("");
- ret = bdrv_open(bs1, filename, NULL, 0, drv, &local_err);
+ QINCREF(options);
+ ret = bdrv_open(bs1, filename, options, BDRV_O_NO_BACKING,
+ drv, &local_err);
if (ret < 0) {
bdrv_unref(bs1);
goto fail;
@@ -1075,33 +1071,18 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
bdrv_unref(bs1);
+ /* Create the temporary image */
ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not get temporary filename");
goto fail;
}
- /* Real path is meaningless for protocols */
- if (path_has_protocol(filename)) {
- snprintf(backing_filename, sizeof(backing_filename),
- "%s", filename);
- } else if (!realpath(filename, backing_filename)) {
- ret = -errno;
- error_setg_errno(errp, errno, "Could not resolve path '%s'", filename);
- goto fail;
- }
-
bdrv_qcow2 = bdrv_find_format("qcow2");
create_options = parse_option_parameters("", bdrv_qcow2->create_options,
NULL);
set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
- set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE,
- backing_filename);
- if (drv) {
- set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT,
- drv->format_name);
- }
ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err);
free_option_parameters(create_options);
@@ -1114,6 +1095,22 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
goto fail;
}
+ /* Prepare a new options QDict for the temporary file, where user
+ * options refer to the backing file */
+ if (filename) {
+ qdict_put(options, "file.filename", qstring_from_str(filename));
+ }
+ if (drv) {
+ qdict_put(options, "driver", qstring_from_str(drv->format_name));
+ }
+
+ snapshot_options = qdict_new();
+ qdict_put(snapshot_options, "backing", options);
+ qdict_flatten(snapshot_options);
+
+ bs->options = snapshot_options;
+ options = qdict_clone_shallow(bs->options);
+
filename = tmp_filename;
drv = bdrv_qcow2;
bs->is_temporary = 1;
@@ -1622,7 +1619,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
bs_dest->iostatus = bs_src->iostatus;
/* dirty bitmap */
- bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
+ bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
/* reference count */
bs_dest->refcnt = bs_src->refcnt;
@@ -1655,7 +1652,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
/* bs_new must be anonymous and shouldn't have anything fancy enabled */
assert(bs_new->device_name[0] == '\0');
- assert(bs_new->dirty_bitmap == NULL);
+ assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
assert(bs_new->job == NULL);
assert(bs_new->dev == NULL);
assert(bs_new->in_use == 0);
@@ -1716,6 +1713,7 @@ static void bdrv_delete(BlockDriverState *bs)
assert(!bs->job);
assert(!bs->in_use);
assert(!bs->refcnt);
+ assert(QLIST_EMPTY(&bs->dirty_bitmaps));
bdrv_close(bs);
@@ -2397,10 +2395,48 @@ int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
}
-int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, BdrvRequestFlags flags)
{
return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
- BDRV_REQ_ZERO_WRITE);
+ BDRV_REQ_ZERO_WRITE | flags);
+}
+
+/*
+ * Completely zero out a block device with the help of bdrv_write_zeroes.
+ * The operation is sped up by checking the block status and only writing
+ * zeroes to the device if they currently do not return zeroes. Optional
+ * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
+ *
+ * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
+ */
+int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
+{
+ int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
+ int64_t ret, nb_sectors, sector_num = 0;
+ int n;
+
+ for (;;) {
+ nb_sectors = target_size - sector_num;
+ if (nb_sectors <= 0) {
+ return 0;
+ }
+ if (nb_sectors > INT_MAX) {
+ nb_sectors = INT_MAX;
+ }
+ ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
+ if (ret & BDRV_BLOCK_ZERO) {
+ sector_num += n;
+ continue;
+ }
+ ret = bdrv_write_zeroes(bs, sector_num, n, flags);
+ if (ret < 0) {
+ error_report("error writing zeroes at sector %" PRId64 ": %s",
+ sector_num, strerror(-ret));
+ return ret;
+ }
+ sector_num += n;
+ }
}
int bdrv_pread(BlockDriverState *bs, int64_t offset,
@@ -2582,7 +2618,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
if (drv->bdrv_co_write_zeroes &&
buffer_is_zero(bounce_buffer, iov.iov_len)) {
ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
- cluster_nb_sectors);
+ cluster_nb_sectors, 0);
} else {
/* This does not change the data on the disk, it is not necessary
* to flush even in cache=writethrough mode.
@@ -2715,32 +2751,65 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
BDRV_REQ_COPY_ON_READ);
}
+/* if no limit is specified in the BlockLimits use a default
+ * of 32768 512-byte sectors (16 MiB) per request.
+ */
+#define MAX_WRITE_ZEROES_DEFAULT 32768
+
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
- struct iovec iov;
- int ret;
+ struct iovec iov = {0};
+ int ret = 0;
- /* TODO Emulate only part of misaligned requests instead of letting block
- * drivers return -ENOTSUP and emulate everything */
+ int max_write_zeroes = bs->bl.max_write_zeroes ?
+ bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
- /* First try the efficient write zeroes operation */
- if (drv->bdrv_co_write_zeroes) {
- ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
- if (ret != -ENOTSUP) {
- return ret;
+ while (nb_sectors > 0 && !ret) {
+ int num = nb_sectors;
+
+ /* align request */
+ if (bs->bl.write_zeroes_alignment &&
+ num >= bs->bl.write_zeroes_alignment &&
+ sector_num % bs->bl.write_zeroes_alignment) {
+ if (num > bs->bl.write_zeroes_alignment) {
+ num = bs->bl.write_zeroes_alignment;
+ }
+ num -= sector_num % bs->bl.write_zeroes_alignment;
}
- }
- /* Fall back to bounce buffer if write zeroes is unsupported */
- iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
- iov.iov_base = qemu_blockalign(bs, iov.iov_len);
- memset(iov.iov_base, 0, iov.iov_len);
- qemu_iovec_init_external(&qiov, &iov, 1);
+ /* limit request size */
+ if (num > max_write_zeroes) {
+ num = max_write_zeroes;
+ }
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+ ret = -ENOTSUP;
+ /* First try the efficient write zeroes operation */
+ if (drv->bdrv_co_write_zeroes) {
+ ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
+ }
+
+ if (ret == -ENOTSUP) {
+ /* Fall back to bounce buffer if write zeroes is unsupported */
+ iov.iov_len = num * BDRV_SECTOR_SIZE;
+ if (iov.iov_base == NULL) {
+ /* allocate bounce buffer only once and ensure that it
+ * is big enough for this and all future requests.
+ */
+ size_t bufsize = num <= nb_sectors ? num : max_write_zeroes;
+ iov.iov_base = qemu_blockalign(bs, bufsize * BDRV_SECTOR_SIZE);
+ memset(iov.iov_base, 0, bufsize * BDRV_SECTOR_SIZE);
+ }
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
+ }
+
+ sector_num += num;
+ nb_sectors -= num;
+ }
qemu_vfree(iov.iov_base);
return ret;
@@ -2783,7 +2852,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
if (ret < 0) {
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
- ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
+ ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
} else {
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
}
@@ -2792,9 +2861,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
ret = bdrv_co_flush(bs);
}
- if (bs->dirty_bitmap) {
- bdrv_set_dirty(bs, sector_num, nb_sectors);
- }
+ bdrv_set_dirty(bs, sector_num, nb_sectors);
if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
bs->wr_highest_sector = sector_num + nb_sectors - 1;
@@ -2817,12 +2884,17 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
}
int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+ int64_t sector_num, int nb_sectors,
+ BdrvRequestFlags flags)
{
trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ if (!(bs->open_flags & BDRV_O_UNMAP)) {
+ flags &= ~BDRV_REQ_MAY_UNMAP;
+ }
+
return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
- BDRV_REQ_ZERO_WRITE);
+ BDRV_REQ_ZERO_WRITE | flags);
}
/**
@@ -3102,6 +3174,36 @@ int bdrv_has_zero_init(BlockDriverState *bs)
return 0;
}
+bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
+{
+ BlockDriverInfo bdi;
+
+ if (bs->backing_hd) {
+ return false;
+ }
+
+ if (bdrv_get_info(bs, &bdi) == 0) {
+ return bdi.unallocated_blocks_are_zero;
+ }
+
+ return false;
+}
+
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
+{
+ BlockDriverInfo bdi;
+
+ if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
+ return false;
+ }
+
+ if (bdrv_get_info(bs, &bdi) == 0) {
+ return bdi.can_write_zeroes_with_unmap;
+ }
+
+ return false;
+}
+
typedef struct BdrvCoGetBlockStatusData {
BlockDriverState *bs;
BlockDriverState *base;
@@ -3171,8 +3273,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
*pnum, pnum);
}
- if (!(ret & BDRV_BLOCK_DATA)) {
- if (bdrv_has_zero_init(bs)) {
+ if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
+ if (bdrv_unallocated_blocks_are_zero(bs)) {
ret |= BDRV_BLOCK_ZERO;
} else if (bs->backing_hd) {
BlockDriverState *bs2 = bs->backing_hd;
@@ -3330,7 +3432,7 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (bdrv_check_request(bs, sector_num, nb_sectors))
return -EIO;
- assert(!bs->dirty_bitmap);
+ assert(QLIST_EMPTY(&bs->dirty_bitmaps));
return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
}
@@ -3419,6 +3521,19 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
return -ENOTSUP;
}
+int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
+{
+ while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
+ bs = bs->file;
+ }
+
+ if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
+ return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
+ }
+
+ return -ENOTSUP;
+}
+
int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
{
while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
@@ -4179,6 +4294,11 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque)
rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
}
+/* if no limit is specified in the BlockLimits use a default
+ * of 32768 512-byte sectors (16 MiB) per request.
+ */
+#define MAX_DISCARD_DEFAULT 32768
+
int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
@@ -4190,9 +4310,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
return -EROFS;
}
- if (bs->dirty_bitmap) {
- bdrv_reset_dirty(bs, sector_num, nb_sectors);
- }
+ bdrv_reset_dirty(bs, sector_num, nb_sectors);
/* Do nothing if disabled. */
if (!(bs->open_flags & BDRV_O_UNMAP)) {
@@ -4200,7 +4318,37 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
}
if (bs->drv->bdrv_co_discard) {
- return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
+ int max_discard = bs->bl.max_discard ?
+ bs->bl.max_discard : MAX_DISCARD_DEFAULT;
+
+ while (nb_sectors > 0) {
+ int ret;
+ int num = nb_sectors;
+
+ /* align request */
+ if (bs->bl.discard_alignment &&
+ num >= bs->bl.discard_alignment &&
+ sector_num % bs->bl.discard_alignment) {
+ if (num > bs->bl.discard_alignment) {
+ num = bs->bl.discard_alignment;
+ }
+ num -= sector_num % bs->bl.discard_alignment;
+ }
+
+ /* limit request size */
+ if (num > max_discard) {
+ num = max_discard;
+ }
+
+ ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
+ if (ret) {
+ return ret;
+ }
+
+ sector_num += num;
+ nb_sectors -= num;
+ }
+ return 0;
} else if (bs->drv->bdrv_aio_discard) {
BlockDriverAIOCB *acb;
CoroutineIOCompletion co = {
@@ -4354,58 +4502,90 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
return true;
}
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
{
int64_t bitmap_size;
+ BdrvDirtyBitmap *bitmap;
assert((granularity & (granularity - 1)) == 0);
- if (granularity) {
- granularity >>= BDRV_SECTOR_BITS;
- assert(!bs->dirty_bitmap);
- bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
- bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
- } else {
- if (bs->dirty_bitmap) {
- hbitmap_free(bs->dirty_bitmap);
- bs->dirty_bitmap = NULL;
+ granularity >>= BDRV_SECTOR_BITS;
+ assert(granularity);
+ bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+ bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
+ bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
+ QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+ return bitmap;
+}
+
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+ BdrvDirtyBitmap *bm, *next;
+ QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
+ if (bm == bitmap) {
+ QLIST_REMOVE(bitmap, list);
+ hbitmap_free(bitmap->bitmap);
+ g_free(bitmap);
+ return;
}
}
}
-int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
+{
+ BdrvDirtyBitmap *bm;
+ BlockDirtyInfoList *list = NULL;
+ BlockDirtyInfoList **plist = &list;
+
+ QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+ BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo));
+ BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList));
+ info->count = bdrv_get_dirty_count(bs, bm);
+ info->granularity =
+ ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
+ entry->value = info;
+ *plist = entry;
+ plist = &entry->next;
+ }
+
+ return list;
+}
+
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
{
- if (bs->dirty_bitmap) {
- return hbitmap_get(bs->dirty_bitmap, sector);
+ if (bitmap) {
+ return hbitmap_get(bitmap->bitmap, sector);
} else {
return 0;
}
}
-void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
+void bdrv_dirty_iter_init(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
{
- hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
+ hbitmap_iter_init(hbi, bitmap->bitmap, 0);
}
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
int nr_sectors)
{
- hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
+ BdrvDirtyBitmap *bitmap;
+ QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+ }
}
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
- int nr_sectors)
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
{
- hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
+ BdrvDirtyBitmap *bitmap;
+ QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+ hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+ }
}
-int64_t bdrv_get_dirty_count(BlockDriverState *bs)
+int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
{
- if (bs->dirty_bitmap) {
- return hbitmap_count(bs->dirty_bitmap);
- } else {
- return 0;
- }
+ return hbitmap_count(bitmap->bitmap);
}
/* Get a reference to bs */