diff options
53 files changed, 1639 insertions, 379 deletions
@@ -519,6 +519,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) return; } bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; + bs->bl.max_transfer_length = bs->file->bl.max_transfer_length; bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; } else { bs->bl.opt_mem_alignment = 512; @@ -533,6 +534,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.opt_transfer_length = MAX(bs->bl.opt_transfer_length, bs->backing_hd->bl.opt_transfer_length); + bs->bl.max_transfer_length = + MIN_NON_ZERO(bs->bl.max_transfer_length, + bs->backing_hd->bl.max_transfer_length); bs->bl.opt_mem_alignment = MAX(bs->bl.opt_mem_alignment, bs->backing_hd->bl.opt_mem_alignment); @@ -1900,6 +1904,34 @@ static bool bdrv_requests_pending(BlockDriverState *bs) return false; } +static bool bdrv_drain_one(BlockDriverState *bs) +{ + bool bs_busy; + + bdrv_flush_io_queue(bs); + bdrv_start_throttled_reqs(bs); + bs_busy = bdrv_requests_pending(bs); + bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy); + return bs_busy; +} + +/* + * Wait for pending requests to complete on a single BlockDriverState subtree + * + * See the warning in bdrv_drain_all(). This function can only be called if + * you are sure nothing can generate I/O because you have op blockers + * installed. + * + * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState + * AioContext. + */ +void bdrv_drain(BlockDriverState *bs) +{ + while (bdrv_drain_one(bs)) { + /* Keep iterating */ + } +} + /* * Wait for pending requests to complete across all BlockDriverStates * @@ -1923,16 +1955,10 @@ void bdrv_drain_all(void) QTAILQ_FOREACH(bs, &bdrv_states, device_list) { AioContext *aio_context = bdrv_get_aio_context(bs); - bool bs_busy; aio_context_acquire(aio_context); - bdrv_flush_io_queue(bs); - bdrv_start_throttled_reqs(bs); - bs_busy = bdrv_requests_pending(bs); - bs_busy |= aio_poll(aio_context, bs_busy); + busy |= bdrv_drain_one(bs); aio_context_release(aio_context); - - busy |= bs_busy; } } } @@ -3540,10 +3566,10 @@ static void send_qmp_error_event(BlockDriverState *bs, BlockErrorAction action, bool is_read, int error) { - BlockErrorAction ac; + IoOperationType optype; - ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; - qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action, + optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; + qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, bdrv_iostatus_is_enabled(bs), error == ENOSPC, strerror(error), &error_abort); @@ -4442,6 +4468,11 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, merge = 0; } + if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors + + reqs[i].nb_sectors > bs->bl.max_transfer_length) { + merge = 0; + } + if (merge) { size_t size; QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); @@ -5750,12 +5781,13 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs, notifier_with_return_list_add(&bs->before_write_notifiers, notifier); } -int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts) +int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb) { if (!bs->drv->bdrv_amend_options) { return -ENOTSUP; } - return bs->drv->bdrv_amend_options(bs, opts); + return bs->drv->bdrv_amend_options(bs, opts, status_cb); } /* This function will be called by the bdrv_recurse_is_first_non_filter method @@ -5818,13 +5850,19 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate) BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) { BlockDriverState *to_replace_bs = bdrv_find_node(node_name); + AioContext *aio_context; + if (!to_replace_bs) { error_setg(errp, "Node name '%s' not found", node_name); return NULL; } + aio_context = bdrv_get_aio_context(to_replace_bs); + aio_context_acquire(aio_context); + if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { - return NULL; + to_replace_bs = NULL; + goto out; } /* We don't want arbitrary node of the BDS chain to be replaced only the top @@ -5834,9 +5872,12 @@ BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) */ if (!bdrv_is_first_non_filter(to_replace_bs)) { error_setg(errp, "Only top most non filter can be replaced"); - return NULL; + to_replace_bs = NULL; + goto out; } +out: + aio_context_release(aio_context); return to_replace_bs; } diff --git a/block/Makefile.objs b/block/Makefile.objs index 27911b6b88..04b0e43eb1 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -9,7 +9,7 @@ block-obj-y += block-backend.o snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o -block-obj-y += null.o +block-obj-y += null.o mirror.o block-obj-y += nbd.o nbd-client.o sheepdog.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o @@ -23,7 +23,6 @@ block-obj-y += accounting.o common-obj-y += stream.o common-obj-y += commit.o -common-obj-y += mirror.o common-obj-y += backup.o iscsi.o-cflags := $(LIBISCSI_CFLAGS) diff --git a/block/backup.c b/block/backup.c index e334740161..792e65514b 100644 --- a/block/backup.c +++ b/block/backup.c @@ -227,9 +227,25 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job, } } +typedef struct { + int ret; +} BackupCompleteData; + +static void backup_complete(BlockJob *job, void *opaque) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common); + BackupCompleteData *data = opaque; + + bdrv_unref(s->target); + + block_job_completed(job, data->ret); + g_free(data); +} + static void coroutine_fn backup_run(void *opaque) { BackupBlockJob *job = opaque; + BackupCompleteData *data; BlockDriverState *bs = job->common.bs; BlockDriverState *target = job->target; BlockdevOnError on_target_error = job->on_target_error; @@ -344,9 +360,10 @@ static void coroutine_fn backup_run(void *opaque) hbitmap_free(job->bitmap); bdrv_iostatus_disable(target); - bdrv_unref(target); - block_job_completed(&job->common, ret); + data = g_malloc(sizeof(*data)); + data->ret = ret; + block_job_defer_to_main_loop(&job->common, backup_complete, data); } void backup_start(BlockDriverState *bs, BlockDriverState *target, diff --git a/block/blkdebug.c b/block/blkdebug.c index e046b920fb..862d93b59b 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -195,6 +195,8 @@ static const char *event_names[BLKDBG_EVENT_MAX] = { [BLKDBG_PWRITEV] = "pwritev", [BLKDBG_PWRITEV_ZERO] = "pwritev_zero", [BLKDBG_PWRITEV_DONE] = "pwritev_done", + + [BLKDBG_EMPTY_IMAGE_PREPARE] = "empty_image_prepare", }; static int get_event_by_name(const char *name, BlkDebugEvent *event) diff --git a/block/commit.c b/block/commit.c index 60a2accf04..cfa2bbebc2 100644 --- a/block/commit.c +++ b/block/commit.c @@ -60,17 +60,50 @@ static int coroutine_fn commit_populate(BlockDriverState *bs, return 0; } -static void coroutine_fn commit_run(void *opaque) +typedef struct { + int ret; +} CommitCompleteData; + +static void commit_complete(BlockJob *job, void *opaque) { - CommitBlockJob *s = opaque; + CommitBlockJob *s = container_of(job, CommitBlockJob, common); + CommitCompleteData *data = opaque; BlockDriverState *active = s->active; BlockDriverState *top = s->top; BlockDriverState *base = s->base; BlockDriverState *overlay_bs; + int ret = data->ret; + + if (!block_job_is_cancelled(&s->common) && ret == 0) { + /* success */ + ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str); + } + + /* restore base open flags here if appropriate (e.g., change the base back + * to r/o). These reopens do not need to be atomic, since we won't abort + * even on failure here */ + if (s->base_flags != bdrv_get_flags(base)) { + bdrv_reopen(base, s->base_flags, NULL); + } + overlay_bs = bdrv_find_overlay(active, top); + if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { + bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); + } + g_free(s->backing_file_str); + block_job_completed(&s->common, ret); + g_free(data); +} + +static void coroutine_fn commit_run(void *opaque) +{ + CommitBlockJob *s = opaque; + CommitCompleteData *data; + BlockDriverState *top = s->top; + BlockDriverState *base = s->base; int64_t sector_num, end; int ret = 0; int n = 0; - void *buf; + void *buf = NULL; int bytes_written = 0; int64_t base_len; @@ -78,18 +111,18 @@ static void coroutine_fn commit_run(void *opaque) if (s->common.len < 0) { - goto exit_restore_reopen; + goto out; } ret = base_len = bdrv_getlength(base); if (base_len < 0) { - goto exit_restore_reopen; + goto out; } if (base_len < s->common.len) { ret = bdrv_truncate(base, s->common.len); if (ret) { - goto exit_restore_reopen; + goto out; } } @@ -128,7 +161,7 @@ wait: if (s->on_error == BLOCKDEV_ON_ERROR_STOP || s->on_error == BLOCKDEV_ON_ERROR_REPORT|| (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) { - goto exit_free_buf; + goto out; } else { n = 0; continue; @@ -140,27 +173,12 @@ wait: ret = 0; - if (!block_job_is_cancelled(&s->common) && sector_num == end) { - /* success */ - ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str); - } - -exit_free_buf: +out: qemu_vfree(buf); -exit_restore_reopen: - /* restore base open flags here if appropriate (e.g., change the base back - * to r/o). These reopens do not need to be atomic, since we won't abort - * even on failure here */ - if (s->base_flags != bdrv_get_flags(base)) { - bdrv_reopen(base, s->base_flags, NULL); - } - overlay_bs = bdrv_find_overlay(active, top); - if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { - bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); - } - g_free(s->backing_file_str); - block_job_completed(&s->common, ret); + data = g_malloc(sizeof(*data)); + data->ret = ret; + block_job_defer_to_main_loop(&s->common, commit_complete, data); } static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) diff --git a/block/curl.c b/block/curl.c index b4157cc8b3..bbee3ca179 100644 --- a/block/curl.c +++ b/block/curl.c @@ -64,6 +64,7 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle, #define SECTOR_SIZE 512 #define READ_AHEAD_DEFAULT (256 * 1024) #define CURL_TIMEOUT_DEFAULT 5 +#define CURL_TIMEOUT_MAX 10000 #define FIND_RET_NONE 0 #define FIND_RET_OK 1 @@ -112,7 +113,7 @@ typedef struct BDRVCURLState { char *url; size_t readahead_size; bool sslverify; - int timeout; + uint64_t timeout; char *cookie; bool accept_range; AioContext *aio_context; @@ -390,7 +391,7 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s) if (s->cookie) { curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie); } - curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, s->timeout); + curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout); curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_read_cb); curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state); @@ -546,6 +547,10 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT, CURL_TIMEOUT_DEFAULT); + if (s->timeout > CURL_TIMEOUT_MAX) { + error_setg(errp, "timeout parameter is too large or negative"); + goto out_noclean; + } s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true); diff --git a/block/iscsi.c b/block/iscsi.c index 3485d622ec..ed375fc30e 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -362,6 +362,12 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs, return -EINVAL; } + if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) { + error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len " + "of %d sectors", nb_sectors, bs->bl.max_transfer_length); + return -EINVAL; + } + lba = sector_qemu2lun(sector_num, iscsilun); num_sectors = sector_qemu2lun(nb_sectors, iscsilun); iscsi_co_init_iscsitask(iscsilun, &iTask); @@ -529,6 +535,12 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, return -EINVAL; } + if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) { + error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len " + "of %d sectors", nb_sectors, bs->bl.max_transfer_length); + return -EINVAL; + } + if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES && !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) { int64_t ret; @@ -1489,31 +1501,44 @@ static void iscsi_close(BlockDriverState *bs) memset(iscsilun, 0, sizeof(IscsiLun)); } -static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp) +static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun) { - IscsiLun *iscsilun = bs->opaque; + return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1); +} +static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp) +{ /* We don't actually refresh here, but just return data queried in * iscsi_open(): iscsi targets don't change their limits. */ + + IscsiLun *iscsilun = bs->opaque; + uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff; + + if (iscsilun->bl.max_xfer_len) { + max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len); + } + + bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun); + if (iscsilun->lbp.lbpu) { if (iscsilun->bl.max_unmap < 0xffffffff) { - bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap, - iscsilun); + bs->bl.max_discard = + sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun); } - bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, - iscsilun); + bs->bl.discard_alignment = + sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun); } if (iscsilun->bl.max_ws_len < 0xffffffff) { - bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len, - iscsilun); + bs->bl.max_write_zeroes = + sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun); } if (iscsilun->lbp.lbpws) { - bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, - iscsilun); + bs->bl.write_zeroes_alignment = + sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun); } - bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len, - iscsilun); + bs->bl.opt_transfer_length = + sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun); } /* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in diff --git a/block/mirror.c b/block/mirror.c index e8a43eb39e..2c6dd2a4c1 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -45,6 +45,7 @@ typedef struct MirrorBlockJob { int64_t sector_num; int64_t granularity; size_t buf_size; + int64_t bdev_length; unsigned long *cow_bitmap; BdrvDirtyBitmap *dirty_bitmap; HBitmapIter hbi; @@ -54,6 +55,7 @@ typedef struct MirrorBlockJob { unsigned long *in_flight_bitmap; int in_flight; + int sectors_in_flight; int ret; } MirrorBlockJob; @@ -87,6 +89,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret); s->in_flight--; + s->sectors_in_flight -= op->nb_sectors; iov = op->qiov.iov; for (i = 0; i < op->qiov.niov; i++) { MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base; @@ -98,8 +101,11 @@ static void mirror_iteration_done(MirrorOp *op, int ret) chunk_num = op->sector_num / sectors_per_chunk; nb_chunks = op->nb_sectors / sectors_per_chunk; bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks); - if (s->cow_bitmap && ret >= 0) { - bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); + if (ret >= 0) { + if (s->cow_bitmap) { + bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); + } + s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE; } qemu_iovec_destroy(&op->qiov); @@ -172,7 +178,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) hbitmap_next_sector = s->sector_num; sector_num = s->sector_num; sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; - end = s->common.len >> BDRV_SECTOR_BITS; + end = s->bdev_length / BDRV_SECTOR_SIZE; /* Extend the QEMUIOVector to include all adjacent blocks that will * be copied in this operation. @@ -284,6 +290,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) /* Copy the dirty cluster. */ s->in_flight++; + s->sectors_in_flight += nb_sectors; trace_mirror_one_iteration(s, sector_num, nb_sectors); bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, mirror_read_complete, op); @@ -314,9 +321,56 @@ static void mirror_drain(MirrorBlockJob *s) } } +typedef struct { + int ret; +} MirrorExitData; + +static void mirror_exit(BlockJob *job, void *opaque) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + MirrorExitData *data = opaque; + AioContext *replace_aio_context = NULL; + + if (s->to_replace) { + replace_aio_context = bdrv_get_aio_context(s->to_replace); + aio_context_acquire(replace_aio_context); + } + + if (s->should_complete && data->ret == 0) { + BlockDriverState *to_replace = s->common.bs; + if (s->to_replace) { + to_replace = s->to_replace; + } + if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { + bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); + } + bdrv_swap(s->target, to_replace); + if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) { + /* drop the bs loop chain formed by the swap: break the loop then + * trigger the unref from the top one */ + BlockDriverState *p = s->base->backing_hd; + bdrv_set_backing_hd(s->base, NULL); + bdrv_unref(p); + } + } + if (s->to_replace) { + bdrv_op_unblock_all(s->to_replace, s->replace_blocker); + error_free(s->replace_blocker); + bdrv_unref(s->to_replace); + } + if (replace_aio_context) { + aio_context_release(replace_aio_context); + } + g_free(s->replaces); + bdrv_unref(s->target); + block_job_completed(&s->common, data->ret); + g_free(data); +} + static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; + MirrorExitData *data; BlockDriverState *bs = s->common.bs; int64_t sector_num, end, sectors_per_chunk, length; uint64_t last_pause_ns; @@ -329,11 +383,11 @@ static void coroutine_fn mirror_run(void *opaque) goto immediate_exit; } - s->common.len = bdrv_getlength(bs); - if (s->common.len < 0) { - ret = s->common.len; + s->bdev_length = bdrv_getlength(bs); + if (s->bdev_length < 0) { + ret = s->bdev_length; goto immediate_exit; - } else if (s->common.len == 0) { + } else if (s->bdev_length == 0) { /* Report BLOCK_JOB_READY and wait for complete. */ block_job_event_ready(&s->common); s->synced = true; @@ -344,7 +398,7 @@ static void coroutine_fn mirror_run(void *opaque) goto immediate_exit; } - length = DIV_ROUND_UP(s->common.len, s->granularity); + length = DIV_ROUND_UP(s->bdev_length, s->granularity); s->in_flight_bitmap = bitmap_new(length); /* If we have no backing file yet in the destination, we cannot let @@ -364,7 +418,7 @@ static void coroutine_fn mirror_run(void *opaque) } } - end = s->common.len >> BDRV_SECTOR_BITS; + end = s->bdev_length / BDRV_SECTOR_SIZE; s->buf = qemu_try_blockalign(bs, s->buf_size); if (s->buf == NULL) { ret = -ENOMEM; @@ -409,6 +463,12 @@ static void coroutine_fn mirror_run(void *opaque) } cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); + /* s->common.offset contains the number of bytes already processed so + * far, cnt is the number of dirty sectors remaining and + * s->sectors_in_flight is the number of sectors currently being + * processed; together those are the current total operation length */ + s->common.len = s->common.offset + + (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE; /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that qemu_aio_flush() returns. @@ -445,7 +505,6 @@ static void coroutine_fn mirror_run(void *opaque) * report completion. This way, block-job-cancel will leave * the target in a consistent state. */ - s->common.offset = end * BDRV_SECTOR_SIZE; if (!s->synced) { block_job_event_ready(&s->common); s->synced = true; @@ -467,15 +526,13 @@ static void coroutine_fn mirror_run(void *opaque) * mirror_populate runs. */ trace_mirror_before_drain(s, cnt); - bdrv_drain_all(); + bdrv_drain(bs); cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); } ret = 0; trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); if (!s->synced) { - /* Publish progress */ - s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE; block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { break; @@ -510,31 +567,10 @@ immediate_exit: g_free(s->in_flight_bitmap); bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); bdrv_iostatus_disable(s->target); - if (s->should_complete && ret == 0) { - BlockDriverState *to_replace = s->common.bs; - if (s->to_replace) { - to_replace = s->to_replace; - } - if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { - bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); - } - bdrv_swap(s->target, to_replace); - if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) { - /* drop the bs loop chain formed by the swap: break the loop then - * trigger the unref from the top one */ - BlockDriverState *p = s->base->backing_hd; - bdrv_set_backing_hd(s->base, NULL); - bdrv_unref(p); - } - } - if (s->to_replace) { - bdrv_op_unblock_all(s->to_replace, s->replace_blocker); - error_free(s->replace_blocker); - bdrv_unref(s->to_replace); - } - g_free(s->replaces); - bdrv_unref(s->target); - block_job_completed(&s->common, ret); + + data = g_malloc(sizeof(*data)); + data->ret = ret; + block_job_defer_to_main_loop(&s->common, mirror_exit, data); } static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) @@ -574,16 +610,23 @@ static void mirror_complete(BlockJob *job, Error **errp) /* check the target bs is not blocked and block all operations on it */ if (s->replaces) { + AioContext *replace_aio_context; + s->to_replace = check_to_replace_node(s->replaces, &local_err); if (!s->to_replace) { error_propagate(errp, local_err); return; } + replace_aio_context = bdrv_get_aio_context(s->to_replace); + aio_context_acquire(replace_aio_context); + error_setg(&s->replace_blocker, "block device is in use by block-job-complete"); bdrv_op_block_all(s->to_replace, s->replace_blocker); bdrv_ref(s->to_replace); + + aio_context_release(replace_aio_context); } s->should_complete = true; diff --git a/block/parallels.c b/block/parallels.c index 2a814f3db4..4f9cd8dd23 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -155,7 +155,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num) offset = sector_num % s->tracks; /* not allocated */ - if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0)) + if ((index >= s->catalog_size) || (s->catalog_bitmap[index] == 0)) return -1; return ((uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset) * 512; diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 4d888c785f..df0b2c9cec 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1414,7 +1414,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) * clusters. */ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, - unsigned int nb_clusters, enum qcow2_discard_type type) + unsigned int nb_clusters, enum qcow2_discard_type type, bool full_discard) { BDRVQcowState *s = bs->opaque; uint64_t *l2_table; @@ -1436,23 +1436,30 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, old_l2_entry = be64_to_cpu(l2_table[l2_index + i]); /* - * Make sure that a discarded area reads back as zeroes for v3 images - * (we cannot do it for v2 without actually writing a zero-filled - * buffer). We can skip the operation if the cluster is already marked - * as zero, or if it's unallocated and we don't have a backing file. + * If full_discard is false, make sure that a discarded area reads back + * as zeroes for v3 images (we cannot do it for v2 without actually + * writing a zero-filled buffer). We can skip the operation if the + * cluster is already marked as zero, or if it's unallocated and we + * don't have a backing file. * * TODO We might want to use bdrv_get_block_status(bs) here, but we're * holding s->lock, so that doesn't work today. + * + * If full_discard is true, the sector should not read back as zeroes, + * but rather fall through to the backing file. */ switch (qcow2_get_cluster_type(old_l2_entry)) { case QCOW2_CLUSTER_UNALLOCATED: - if (!bs->backing_hd) { + if (full_discard || !bs->backing_hd) { continue; } break; case QCOW2_CLUSTER_ZERO: - continue; + if (!full_discard) { + continue; + } + break; case QCOW2_CLUSTER_NORMAL: case QCOW2_CLUSTER_COMPRESSED: @@ -1464,7 +1471,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, /* First remove L2 entries */ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); - if (s->qcow_version >= 3) { + if (!full_discard && s->qcow_version >= 3) { l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); } else { l2_table[l2_index + i] = cpu_to_be64(0); @@ -1483,7 +1490,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, } int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, - int nb_sectors, enum qcow2_discard_type type) + int nb_sectors, enum qcow2_discard_type type, bool full_discard) { BDRVQcowState *s = bs->opaque; uint64_t end_offset; @@ -1506,7 +1513,7 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, /* Each L2 table is handled by its own loop iteration */ while (nb_clusters > 0) { - ret = discard_single_l2(bs, offset, nb_clusters, type); + ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard); if (ret < 0) { goto fail; } @@ -1606,15 +1613,14 @@ fail: * Expands all zero clusters in a specific L1 table (or deallocates them, for * non-backed non-pre-allocated zero clusters). * - * expanded_clusters is a bitmap where every bit corresponds to one cluster in - * the image file; a bit gets set if the corresponding cluster has been used for - * zero expansion (i.e., has been filled with zeroes and is referenced from an - * L2 table). nb_clusters contains the total cluster count of the image file, - * i.e., the number of bits in expanded_clusters. + * l1_entries and *visited_l1_entries are used to keep track of progress for + * status_cb(). l1_entries contains the total number of L1 entries and + * *visited_l1_entries counts all visited L1 entries. */ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, - int l1_size, uint8_t **expanded_clusters, - uint64_t *nb_clusters) + int l1_size, int64_t *visited_l1_entries, + int64_t l1_entries, + BlockDriverAmendStatusCB *status_cb) { BDRVQcowState *s = bs->opaque; bool is_active_l1 = (l1_table == s->l1_table); @@ -1634,9 +1640,14 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, for (i = 0; i < l1_size; i++) { uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; bool l2_dirty = false; + int l2_refcount; if (!l2_offset) { /* unallocated */ + (*visited_l1_entries)++; + if (status_cb) { + status_cb(bs, *visited_l1_entries, l1_entries); + } continue; } @@ -1653,33 +1664,19 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, goto fail; } + l2_refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits); + if (l2_refcount < 0) { + ret = l2_refcount; + goto fail; + } + for (j = 0; j < s->l2_size; j++) { uint64_t l2_entry = be64_to_cpu(l2_table[j]); - int64_t offset = l2_entry & L2E_OFFSET_MASK, cluster_index; + int64_t offset = l2_entry & L2E_OFFSET_MASK; int cluster_type = qcow2_get_cluster_type(l2_entry); bool preallocated = offset != 0; - if (cluster_type == QCOW2_CLUSTER_NORMAL) { - cluster_index = offset >> s->cluster_bits; - assert((cluster_index >= 0) && (cluster_index < *nb_clusters)); - if ((*expanded_clusters)[cluster_index / 8] & - (1 << (cluster_index % 8))) { - /* Probably a shared L2 table; this cluster was a zero - * cluster which has been expanded, its refcount - * therefore most likely requires an update. */ - ret = qcow2_update_cluster_refcount(bs, cluster_index, 1, - QCOW2_DISCARD_NEVER); - if (ret < 0) { - goto fail; - } - /* Since we just increased the refcount, the COPIED flag may - * no longer be set. */ - l2_table[j] = cpu_to_be64(l2_entry & ~QCOW_OFLAG_COPIED); - l2_dirty = true; - } - continue; - } - else if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) { + if (cluster_type != QCOW2_CLUSTER_ZERO) { continue; } @@ -1697,6 +1694,19 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, ret = offset; goto fail; } + + if (l2_refcount > 1) { + /* For shared L2 tables, set the refcount accordingly (it is + * already 1 and needs to be l2_refcount) */ + ret = qcow2_update_cluster_refcount(bs, + offset >> s->cluster_bits, l2_refcount - 1, + QCOW2_DISCARD_OTHER); + if (ret < 0) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_OTHER); + goto fail; + } + } } ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); @@ -1718,29 +1728,12 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, goto fail; } - l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); - l2_dirty = true; - - cluster_index = offset >> s->cluster_bits; - - if (cluster_index >= *nb_clusters) { - uint64_t old_bitmap_size = (*nb_clusters + 7) / 8; - uint64_t new_bitmap_size; - /* The offset may lie beyond the old end of the underlying image - * file for growable files only */ - assert(bs->file->growable); - *nb_clusters = size_to_clusters(s, bs->file->total_sectors * - BDRV_SECTOR_SIZE); - new_bitmap_size = (*nb_clusters + 7) / 8; - *expanded_clusters = g_realloc(*expanded_clusters, - new_bitmap_size); - /* clear the newly allocated space */ - memset(&(*expanded_clusters)[old_bitmap_size], 0, - new_bitmap_size - old_bitmap_size); + if (l2_refcount == 1) { + l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); + } else { + l2_table[j] = cpu_to_be64(offset); } - - assert((cluster_index >= 0) && (cluster_index < *nb_clusters)); - (*expanded_clusters)[cluster_index / 8] |= 1 << (cluster_index % 8); + l2_dirty = true; } if (is_active_l1) { @@ -1769,6 +1762,11 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, } } } + + (*visited_l1_entries)++; + if (status_cb) { + status_cb(bs, *visited_l1_entries, l1_entries); + } } ret = 0; @@ -1795,25 +1793,25 @@ fail: * allocation for pre-allocated ones). This is important for downgrading to a * qcow2 version which doesn't yet support metadata zero clusters. */ -int qcow2_expand_zero_clusters(BlockDriverState *bs) +int qcow2_expand_zero_clusters(BlockDriverState *bs, + BlockDriverAmendStatusCB *status_cb) { BDRVQcowState *s = bs->opaque; uint64_t *l1_table = NULL; - uint64_t nb_clusters; - uint8_t *expanded_clusters; + int64_t l1_entries = 0, visited_l1_entries = 0; int ret; int i, j; - nb_clusters = size_to_clusters(s, bs->file->total_sectors * - BDRV_SECTOR_SIZE); - expanded_clusters = g_try_malloc0((nb_clusters + 7) / 8); - if (expanded_clusters == NULL) { - ret = -ENOMEM; - goto fail; + if (status_cb) { + l1_entries = s->l1_size; + for (i = 0; i < s->nb_snapshots; i++) { + l1_entries += s->snapshots[i].l1_size; + } } ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, - &expanded_clusters, &nb_clusters); + &visited_l1_entries, l1_entries, + status_cb); if (ret < 0) { goto fail; } @@ -1847,7 +1845,8 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs) } ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, - &expanded_clusters, &nb_clusters); + &visited_l1_entries, l1_entries, + status_cb); if (ret < 0) { goto fail; } @@ -1856,7 +1855,6 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs) ret = 0; fail: - g_free(expanded_clusters); g_free(l1_table); return ret; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 1477031206..9afdb40b40 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -91,7 +91,7 @@ static int load_refcount_block(BlockDriverState *bs, * return value is the refcount of the cluster, negative values are -errno * and indicate an error. */ -static int get_refcount(BlockDriverState *bs, int64_t cluster_index) +int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index) { BDRVQcowState *s = bs->opaque; uint64_t refcount_table_index, block_index; @@ -629,8 +629,7 @@ fail: } /* - * Increases or decreases the refcount of a given cluster by one. - * addend must be 1 or -1. + * Increases or decreases the refcount of a given cluster. * * If the return value is non-negative, it is the new refcount of the cluster. * If it is negative, it is -errno and indicates an error. @@ -649,7 +648,7 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, return ret; } - return get_refcount(bs, cluster_index); + return qcow2_get_refcount(bs, cluster_index); } @@ -670,7 +669,7 @@ static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size) retry: for(i = 0; i < nb_clusters; i++) { uint64_t next_cluster_index = s->free_cluster_index++; - refcount = get_refcount(bs, next_cluster_index); + refcount = qcow2_get_refcount(bs, next_cluster_index); if (refcount < 0) { return refcount; @@ -734,7 +733,7 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, /* Check how many clusters there are free */ cluster_index = offset >> s->cluster_bits; for(i = 0; i < nb_clusters; i++) { - refcount = get_refcount(bs, cluster_index++); + refcount = qcow2_get_refcount(bs, cluster_index++); if (refcount < 0) { return refcount; @@ -981,7 +980,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, cluster_index, addend, QCOW2_DISCARD_SNAPSHOT); } else { - refcount = get_refcount(bs, cluster_index); + refcount = qcow2_get_refcount(bs, cluster_index); } if (refcount < 0) { @@ -1021,7 +1020,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, refcount = qcow2_update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend, QCOW2_DISCARD_SNAPSHOT); } else { - refcount = get_refcount(bs, l2_offset >> s->cluster_bits); + refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits); } if (refcount < 0) { ret = refcount; @@ -1332,8 +1331,8 @@ fail: * Checks the OFLAG_COPIED flag for all L1 and L2 entries. * * This function does not print an error message nor does it increment - * check_errors if get_refcount fails (this is because such an error will have - * been already detected and sufficiently signaled by the calling function + * check_errors if qcow2_get_refcount fails (this is because such an error will + * have been already detected and sufficiently signaled by the calling function * (qcow2_check_refcounts) by the time this function is called). */ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, @@ -1354,7 +1353,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, continue; } - refcount = get_refcount(bs, l2_offset >> s->cluster_bits); + refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits); if (refcount < 0) { /* don't print message nor increment check_errors */ continue; @@ -1396,7 +1395,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, if ((cluster_type == QCOW2_CLUSTER_NORMAL) || ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) { - refcount = get_refcount(bs, data_offset >> s->cluster_bits); + refcount = qcow2_get_refcount(bs, + data_offset >> s->cluster_bits); if (refcount < 0) { /* don't print message nor increment check_errors */ continue; @@ -1632,7 +1632,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, int refcount1, refcount2, ret; for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) { - refcount1 = get_refcount(bs, i); + refcount1 = qcow2_get_refcount(bs, i); if (refcount1 < 0) { fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", i, strerror(-refcount1)); diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index f52d7fdd22..5b3903cf22 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -441,7 +441,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) qcow2_discard_clusters(bs, qcow2_vm_state_offset(s), align_offset(sn->vm_state_size, s->cluster_size) >> BDRV_SECTOR_BITS, - QCOW2_DISCARD_NEVER); + QCOW2_DISCARD_NEVER, false); #ifdef DEBUG_ALLOC { diff --git a/block/qcow2.c b/block/qcow2.c index d031515838..d12049451a 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2089,7 +2089,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, qemu_co_mutex_lock(&s->lock); ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors, QCOW2_DISCARD_REQUEST); + nb_sectors, QCOW2_DISCARD_REQUEST, false); qemu_co_mutex_unlock(&s->lock); return ret; } @@ -2230,6 +2230,195 @@ fail: return ret; } +static int make_completely_empty(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + int ret, l1_clusters; + int64_t offset; + uint64_t *new_reftable = NULL; + uint64_t rt_entry, l1_size2; + struct { + uint64_t l1_offset; + uint64_t reftable_offset; + uint32_t reftable_clusters; + } QEMU_PACKED l1_ofs_rt_ofs_cls; + + ret = qcow2_cache_empty(bs, s->l2_table_cache); + if (ret < 0) { + goto fail; + } + + ret = qcow2_cache_empty(bs, s->refcount_block_cache); + if (ret < 0) { + goto fail; + } + + /* Refcounts will be broken utterly */ + ret = qcow2_mark_dirty(bs); + if (ret < 0) { + goto fail; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); + + l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); + l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t); + + /* After this call, neither the in-memory nor the on-disk refcount + * information accurately describe the actual references */ + + ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE, + l1_clusters * s->cluster_sectors, 0); + if (ret < 0) { + goto fail_broken_refcounts; + } + memset(s->l1_table, 0, l1_size2); + + BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE); + + /* Overwrite enough clusters at the beginning of the sectors to place + * the refcount table, a refcount block and the L1 table in; this may + * overwrite parts of the existing refcount and L1 table, which is not + * an issue because the dirty flag is set, complete data loss is in fact + * desired and partial data loss is consequently fine as well */ + ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE, + (2 + l1_clusters) * s->cluster_size / + BDRV_SECTOR_SIZE, 0); + /* This call (even if it failed overall) may have overwritten on-disk + * refcount structures; in that case, the in-memory refcount information + * will probably differ from the on-disk information which makes the BDS + * unusable */ + if (ret < 0) { + goto fail_broken_refcounts; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); + BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); + + /* "Create" an empty reftable (one cluster) directly after the image + * header and an empty L1 table three clusters after the image header; + * the cluster between those two will be used as the first refblock */ + cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size); + cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size); + cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1); + ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), + &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); + if (ret < 0) { + goto fail_broken_refcounts; + } + + s->l1_table_offset = 3 * s->cluster_size; + + new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t)); + if (!new_reftable) { + ret = -ENOMEM; + goto fail_broken_refcounts; + } + + s->refcount_table_offset = s->cluster_size; + s->refcount_table_size = s->cluster_size / sizeof(uint64_t); + + g_free(s->refcount_table); + s->refcount_table = new_reftable; + new_reftable = NULL; + + /* Now the in-memory refcount information again corresponds to the on-disk + * information (reftable is empty and no refblocks (the refblock cache is + * empty)); however, this means some clusters (e.g. the image header) are + * referenced, but not refcounted, but the normal qcow2 code assumes that + * the in-memory information is always correct */ + + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); + + /* Enter the first refblock into the reftable */ + rt_entry = cpu_to_be64(2 * s->cluster_size); + ret = bdrv_pwrite_sync(bs->file, s->cluster_size, + &rt_entry, sizeof(rt_entry)); + if (ret < 0) { + goto fail_broken_refcounts; + } + s->refcount_table[0] = 2 * s->cluster_size; + + s->free_cluster_index = 0; + assert(3 + l1_clusters <= s->refcount_block_size); + offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2); + if (offset < 0) { + ret = offset; + goto fail_broken_refcounts; + } else if (offset > 0) { + error_report("First cluster in emptied image is in use"); + abort(); + } + + /* Now finally the in-memory information corresponds to the on-disk + * structures and is correct */ + ret = qcow2_mark_clean(bs); + if (ret < 0) { + goto fail; + } + + ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size); + if (ret < 0) { + goto fail; + } + + return 0; + +fail_broken_refcounts: + /* The BDS is unusable at this point. If we wanted to make it usable, we + * would have to call qcow2_refcount_close(), qcow2_refcount_init(), + * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init() + * again. However, because the functions which could have caused this error + * path to be taken are used by those functions as well, it's very likely + * that that sequence will fail as well. Therefore, just eject the BDS. */ + bs->drv = NULL; + +fail: + g_free(new_reftable); + return ret; +} + +static int qcow2_make_empty(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + uint64_t start_sector; + int sector_step = INT_MAX / BDRV_SECTOR_SIZE; + int l1_clusters, ret = 0; + + l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); + + if (s->qcow_version >= 3 && !s->snapshots && + 3 + l1_clusters <= s->refcount_block_size) { + /* The following function only works for qcow2 v3 images (it requires + * the dirty flag) and only as long as there are no snapshots (because + * it completely empties the image). Furthermore, the L1 table and three + * additional clusters (image header, refcount table, one refcount + * block) have to fit inside one refcount block. */ + return make_completely_empty(bs); + } + + /* This fallback code simply discards every active cluster; this is slow, + * but works in all cases */ + for (start_sector = 0; start_sector < bs->total_sectors; + start_sector += sector_step) + { + /* As this function is generally used after committing an external + * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the + * default action for this kind of discard is to pass the discard, + * which will ideally result in an actually smaller image file, as + * is probably desired. */ + ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE, + MIN(sector_step, + bs->total_sectors - start_sector), + QCOW2_DISCARD_SNAPSHOT, true); + if (ret < 0) { + break; + } + } + + return ret; +} + static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; @@ -2361,7 +2550,8 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, * Downgrades an image's version. To achieve this, any incompatible features * have to be removed. */ -static int qcow2_downgrade(BlockDriverState *bs, int target_version) +static int qcow2_downgrade(BlockDriverState *bs, int target_version, + BlockDriverAmendStatusCB *status_cb) { BDRVQcowState *s = bs->opaque; int current_version = s->qcow_version; @@ -2410,7 +2600,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version) /* clearing autoclear features is trivial */ s->autoclear_features = 0; - ret = qcow2_expand_zero_clusters(bs); + ret = qcow2_expand_zero_clusters(bs, status_cb); if (ret < 0) { return ret; } @@ -2424,7 +2614,8 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version) return 0; } -static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts) +static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb) { BDRVQcowState *s = bs->opaque; int old_version = s->qcow_version, new_version = old_version; @@ -2502,7 +2693,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts) return ret; } } else { - ret = qcow2_downgrade(bs, new_version); + ret = qcow2_downgrade(bs, new_version, status_cb); if (ret < 0) { return ret; } @@ -2676,6 +2867,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_co_discard = qcow2_co_discard, .bdrv_truncate = qcow2_truncate, .bdrv_write_compressed = qcow2_write_compressed, + .bdrv_make_empty = qcow2_make_empty, .bdrv_snapshot_create = qcow2_snapshot_create, .bdrv_snapshot_goto = qcow2_snapshot_goto, diff --git a/block/qcow2.h b/block/qcow2.h index 577ccd1bf0..6e39a1b639 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -487,6 +487,8 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, int qcow2_refcount_init(BlockDriverState *bs); void qcow2_refcount_close(BlockDriverState *bs); +int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index); + int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, int addend, enum qcow2_discard_type type); @@ -534,10 +536,11 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, - int nb_sectors, enum qcow2_discard_type type); + int nb_sectors, enum qcow2_discard_type type, bool full_discard); int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); -int qcow2_expand_zero_clusters(BlockDriverState *bs); +int qcow2_expand_zero_clusters(BlockDriverState *bs, + BlockDriverAmendStatusCB *status_cb); /* qcow2-snapshot.c functions */ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); diff --git a/block/raw-posix.c b/block/raw-posix.c index 475cf74655..e100ae2046 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1481,12 +1481,12 @@ out: return result; } -static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data, - off_t *hole, int nb_sectors, int *pnum) +static int try_fiemap(BlockDriverState *bs, off_t start, off_t *data, + off_t *hole, int nb_sectors) { #ifdef CONFIG_FIEMAP BDRVRawState *s = bs->opaque; - int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; + int ret = 0; struct { struct fiemap fm; struct fiemap_extent fe; @@ -1527,18 +1527,14 @@ static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data, #endif } -static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data, - off_t *hole, int *pnum) +static int try_seek_hole(BlockDriverState *bs, off_t start, off_t *data, + off_t *hole) { #if defined SEEK_HOLE && defined SEEK_DATA BDRVRawState *s = bs->opaque; *hole = lseek(s->fd, start, SEEK_HOLE); if (*hole == -1) { - /* -ENXIO indicates that sector_num was past the end of the file. - * There is a virtual hole there. */ - assert(errno != -ENXIO); - return -errno; } @@ -1552,7 +1548,7 @@ static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data, } } - return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; + return 0; #else return -ENOTSUP; #endif @@ -1578,7 +1574,8 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, int nb_sectors, int *pnum) { off_t start, data = 0, hole = 0; - int64_t ret; + int64_t total_size; + int ret; ret = fd_open(bs); if (ret < 0) { @@ -1586,29 +1583,38 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, } start = sector_num * BDRV_SECTOR_SIZE; + total_size = bdrv_getlength(bs); + if (total_size < 0) { + return total_size; + } else if (start >= total_size) { + *pnum = 0; + return 0; + } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) { + nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); + } - ret = try_seek_hole(bs, start, &data, &hole, pnum); + ret = try_seek_hole(bs, start, &data, &hole); if (ret < 0) { - ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum); + ret = try_fiemap(bs, start, &data, &hole, nb_sectors); if (ret < 0) { /* Assume everything is allocated. */ data = 0; hole = start + nb_sectors * BDRV_SECTOR_SIZE; - ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; + ret = 0; } } + assert(ret >= 0); + if (data <= start) { /* On a data extent, compute sectors to the end of the extent. */ *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE); + return ret | BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; } else { /* On a hole, compute sectors to the beginning of the next extent. */ *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); - ret &= ~BDRV_BLOCK_DATA; - ret |= BDRV_BLOCK_ZERO; + return ret | BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID | start; } - - return ret; } static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs, diff --git a/block/rbd.c b/block/rbd.c index 47cab8be94..5b5a64a27a 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -887,6 +887,18 @@ static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs, } #endif +#ifdef LIBRBD_SUPPORTS_INVALIDATE +static void qemu_rbd_invalidate_cache(BlockDriverState *bs, + Error **errp) +{ + BDRVRBDState *s = bs->opaque; + int r = rbd_invalidate_cache(s->image); + if (r < 0) { + error_setg_errno(errp, -r, "Failed to invalidate the cache"); + } +} +#endif + static QemuOptsList qemu_rbd_create_opts = { .name = "rbd-create-opts", .head = QTAILQ_HEAD_INITIALIZER(qemu_rbd_create_opts.head), @@ -936,6 +948,9 @@ static BlockDriver bdrv_rbd = { .bdrv_snapshot_delete = qemu_rbd_snap_remove, .bdrv_snapshot_list = qemu_rbd_snap_list, .bdrv_snapshot_goto = qemu_rbd_snap_rollback, +#ifdef LIBRBD_SUPPORTS_INVALIDATE + .bdrv_invalidate_cache = qemu_rbd_invalidate_cache, +#endif }; static void bdrv_rbd_init(void) diff --git a/block/snapshot.c b/block/snapshot.c index 85c52ff455..698e1a1d58 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -236,6 +236,10 @@ int bdrv_snapshot_delete(BlockDriverState *bs, error_setg(errp, "snapshot_id and name are both NULL"); return -EINVAL; } + + /* drain all pending i/o before deleting snapshot */ + bdrv_drain_all(); + if (drv->bdrv_snapshot_delete) { return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); } diff --git a/block/stream.c b/block/stream.c index a1dc8da484..a628901f69 100644 --- a/block/stream.c +++ b/block/stream.c @@ -79,9 +79,39 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base, bdrv_refresh_limits(top, NULL); } +typedef struct { + int ret; + bool reached_end; +} StreamCompleteData; + +static void stream_complete(BlockJob *job, void *opaque) +{ + StreamBlockJob *s = container_of(job, StreamBlockJob, common); + StreamCompleteData *data = opaque; + BlockDriverState *base = s->base; + + if (!block_job_is_cancelled(&s->common) && data->reached_end && + data->ret == 0) { + const char *base_id = NULL, *base_fmt = NULL; + if (base) { + base_id = s->backing_file_str; + if (base->drv) { + base_fmt = base->drv->format_name; + } + } + data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt); + close_unused_images(job->bs, base, base_id); + } + + g_free(s->backing_file_str); + block_job_completed(&s->common, data->ret); + g_free(data); +} + static void coroutine_fn stream_run(void *opaque) { StreamBlockJob *s = opaque; + StreamCompleteData *data; BlockDriverState *bs = s->common.bs; BlockDriverState *base = s->base; int64_t sector_num, end; @@ -183,21 +213,13 @@ wait: /* Do not remove the backing file if an error was there but ignored. */ ret = error; - if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) { - const char *base_id = NULL, *base_fmt = NULL; - if (base) { - base_id = s->backing_file_str; - if (base->drv) { - base_fmt = base->drv->format_name; - } - } - ret = bdrv_change_backing_file(bs, base_id, base_fmt); - close_unused_images(bs, base, base_id); - } - qemu_vfree(buf); - g_free(s->backing_file_str); - block_job_completed(&s->common, ret); + + /* Modify backing chain and close BDSes in main loop */ + data = g_malloc(sizeof(*data)); + data->ret = ret; + data->reached_end = sector_num == end; + block_job_defer_to_main_loop(&s->common, stream_complete, data); } static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp) diff --git a/blockdev.c b/blockdev.c index eb743a956c..57910b82c7 100644 --- a/blockdev.c +++ b/blockdev.c @@ -115,14 +115,21 @@ void blockdev_mark_auto_del(BlockBackend *blk) { DriveInfo *dinfo = blk_legacy_dinfo(blk); BlockDriverState *bs = blk_bs(blk); + AioContext *aio_context; if (!dinfo) { return; } + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + if (bs->job) { block_job_cancel(bs->job); } + + aio_context_release(aio_context); + dinfo->auto_del = 1; } @@ -1922,6 +1929,11 @@ out: static void block_job_cb(void *opaque, int ret) { + /* Note that this function may be executed from another AioContext besides + * the QEMU main loop. If you need to access anything that assumes the + * QEMU global mutex, use a BH or introduce a mutex. + */ + BlockDriverState *bs = opaque; const char *msg = NULL; @@ -1951,6 +1963,7 @@ void qmp_block_stream(const char *device, { BlockDriverState *bs; BlockDriverState *base_bs = NULL; + AioContext *aio_context; Error *local_err = NULL; const char *base_name = NULL; @@ -1964,16 +1977,20 @@ void qmp_block_stream(const char *device, return; } + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) { - return; + goto out; } if (has_base) { base_bs = bdrv_find_backing_image(bs, base); if (base_bs == NULL) { error_set(errp, QERR_BASE_NOT_FOUND, base); - return; + goto out; } + assert(bdrv_get_aio_context(base_bs) == aio_context); base_name = base; } @@ -1982,7 +1999,7 @@ void qmp_block_stream(const char *device, if (base_bs == NULL && has_backing_file) { error_setg(errp, "backing file specified, but streaming the " "entire chain"); - return; + goto out; } /* backing_file string overrides base bs filename */ @@ -1992,10 +2009,13 @@ void qmp_block_stream(const char *device, on_error, block_job_cb, bs, &local_err); if (local_err) { error_propagate(errp, local_err); - return; + goto out; } trace_qmp_block_stream(bs, bs->job); + +out: + aio_context_release(aio_context); } void qmp_block_commit(const char *device, @@ -2007,6 +2027,7 @@ void qmp_block_commit(const char *device, { BlockDriverState *bs; BlockDriverState *base_bs, *top_bs; + AioContext *aio_context; Error *local_err = NULL; /* This will be part of the QMP command, if/when the * BlockdevOnError change for blkmirror makes it in @@ -2017,9 +2038,6 @@ void qmp_block_commit(const char *device, speed = 0; } - /* drain all i/o before commits */ - bdrv_drain_all(); - /* Important Note: * libvirt relies on the DeviceNotFound error class in order to probe for * live commit feature versions; for this to work, we must make sure to @@ -2031,8 +2049,14 @@ void qmp_block_commit(const char *device, return; } + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + /* drain all i/o before commits */ + bdrv_drain_all(); + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) { - return; + goto out; } /* default top_bs is the active layer */ @@ -2046,9 +2070,11 @@ void qmp_block_commit(const char *device, if (top_bs == NULL) { error_setg(errp, "Top image file %s not found", top ? top : "NULL"); - return; + goto out; } + assert(bdrv_get_aio_context(top_bs) == aio_context); + if (has_base && base) { base_bs = bdrv_find_backing_image(top_bs, base); } else { @@ -2057,20 +2083,22 @@ void qmp_block_commit(const char *device, if (base_bs == NULL) { error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL"); - return; + goto out; } + assert(bdrv_get_aio_context(base_bs) == aio_context); + /* Do not allow attempts to commit an image into itself */ if (top_bs == base_bs) { error_setg(errp, "cannot commit an image into itself"); - return; + goto out; } if (top_bs == bs) { if (has_backing_file) { error_setg(errp, "'backing-file' specified," " but 'top' is the active layer"); - return; + goto out; } commit_active_start(bs, base_bs, speed, on_error, block_job_cb, bs, &local_err); @@ -2080,8 +2108,11 @@ void qmp_block_commit(const char *device, } if (local_err != NULL) { error_propagate(errp, local_err); - return; + goto out; } + +out: + aio_context_release(aio_context); } void qmp_drive_backup(const char *device, const char *target, @@ -2096,6 +2127,7 @@ void qmp_drive_backup(const char *device, const char *target, BlockDriverState *bs; BlockDriverState *target_bs; BlockDriverState *source = NULL; + AioContext *aio_context; BlockDriver *drv = NULL; Error *local_err = NULL; int flags; @@ -2121,9 +2153,12 @@ void qmp_drive_backup(const char *device, const char *target, return; } + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + if (!bdrv_is_inserted(bs)) { error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); - return; + goto out; } if (!has_format) { @@ -2133,12 +2168,12 @@ void qmp_drive_backup(const char *device, const char *target, drv = bdrv_find_format(format); if (!drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - return; + goto out; } } if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { - return; + goto out; } flags = bs->open_flags | BDRV_O_RDWR; @@ -2158,7 +2193,7 @@ void qmp_drive_backup(const char *device, const char *target, size = bdrv_getlength(bs); if (size < 0) { error_setg_errno(errp, -size, "bdrv_getlength failed"); - return; + goto out; } if (mode != NEW_IMAGE_MODE_EXISTING) { @@ -2175,23 +2210,28 @@ void qmp_drive_backup(const char *device, const char *target, if (local_err) { error_propagate(errp, local_err); - return; + goto out; } target_bs = NULL; ret = bdrv_open(&target_bs, target, NULL, NULL, flags, drv, &local_err); if (ret < 0) { error_propagate(errp, local_err); - return; + goto out; } + bdrv_set_aio_context(target_bs, aio_context); + backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, block_job_cb, bs, &local_err); if (local_err != NULL) { bdrv_unref(target_bs); error_propagate(errp, local_err); - return; + goto out; } + +out: + aio_context_release(aio_context); } BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) @@ -2216,6 +2256,7 @@ void qmp_drive_mirror(const char *device, const char *target, { BlockDriverState *bs; BlockDriverState *source, *target_bs; + AioContext *aio_context; BlockDriver *drv = NULL; Error *local_err = NULL; QDict *options = NULL; @@ -2258,9 +2299,12 @@ void qmp_drive_mirror(const char *device, const char *target, return; } + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + if (!bdrv_is_inserted(bs)) { error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); - return; + goto out; } if (!has_format) { @@ -2270,12 +2314,12 @@ void qmp_drive_mirror(const char *device, const char *target, drv = bdrv_find_format(format); if (!drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - return; + goto out; } } if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR, errp)) { - return; + goto out; } flags = bs->open_flags | BDRV_O_RDWR; @@ -2290,29 +2334,36 @@ void qmp_drive_mirror(const char *device, const char *target, size = bdrv_getlength(bs); if (size < 0) { error_setg_errno(errp, -size, "bdrv_getlength failed"); - return; + goto out; } if (has_replaces) { BlockDriverState *to_replace_bs; + AioContext *replace_aio_context; + int64_t replace_size; if (!has_node_name) { error_setg(errp, "a node-name must be provided when replacing a" " named node of the graph"); - return; + goto out; } to_replace_bs = check_to_replace_node(replaces, &local_err); if (!to_replace_bs) { error_propagate(errp, local_err); - return; + goto out; } - if (size != bdrv_getlength(to_replace_bs)) { + replace_aio_context = bdrv_get_aio_context(to_replace_bs); + aio_context_acquire(replace_aio_context); + replace_size = bdrv_getlength(to_replace_bs); + aio_context_release(replace_aio_context); + + if (size != replace_size) { error_setg(errp, "cannot replace image with a mirror image of " "different size"); - return; + goto out; } } @@ -2341,7 +2392,7 @@ void qmp_drive_mirror(const char *device, const char *target, if (local_err) { error_propagate(errp, local_err); - return; + goto out; } if (has_node_name) { @@ -2357,9 +2408,11 @@ void qmp_drive_mirror(const char *device, const char *target, flags | BDRV_O_NO_BACKING, drv, &local_err); if (ret < 0) { error_propagate(errp, local_err); - return; + goto out; } + bdrv_set_aio_context(target_bs, aio_context); + /* pass the node name to replace to mirror start since it's loose coupling * and will allow to check whether the node still exist at mirror completion */ @@ -2371,24 +2424,42 @@ void qmp_drive_mirror(const char *device, const char *target, if (local_err != NULL) { bdrv_unref(target_bs); error_propagate(errp, local_err); - return; + goto out; } + +out: + aio_context_release(aio_context); } -static BlockJob *find_block_job(const char *device) +/* Get the block job for a given device name and acquire its AioContext */ +static BlockJob *find_block_job(const char *device, AioContext **aio_context) { BlockDriverState *bs; bs = bdrv_find(device); - if (!bs || !bs->job) { - return NULL; + if (!bs) { + goto notfound; } + + *aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(*aio_context); + + if (!bs->job) { + aio_context_release(*aio_context); + goto notfound; + } + return bs->job; + +notfound: + *aio_context = NULL; + return NULL; } void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp) { - BlockJob *job = find_block_job(device); + AioContext *aio_context; + BlockJob *job = find_block_job(device, &aio_context); if (!job) { error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); @@ -2396,34 +2467,40 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp) } block_job_set_speed(job, speed, errp); + aio_context_release(aio_context); } void qmp_block_job_cancel(const char *device, bool has_force, bool force, Error **errp) { - BlockJob *job = find_block_job(device); - - if (!has_force) { - force = false; - } + AioContext *aio_context; + BlockJob *job = find_block_job(device, &aio_context); if (!job) { error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } + + if (!has_force) { + force = false; + } + if (job->paused && !force) { error_setg(errp, "The block job for device '%s' is currently paused", device); - return; + goto out; } trace_qmp_block_job_cancel(job); block_job_cancel(job); +out: + aio_context_release(aio_context); } void qmp_block_job_pause(const char *device, Error **errp) { - BlockJob *job = find_block_job(device); + AioContext *aio_context; + BlockJob *job = find_block_job(device, &aio_context); if (!job) { error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); @@ -2432,11 +2509,13 @@ void qmp_block_job_pause(const char *device, Error **errp) trace_qmp_block_job_pause(job); block_job_pause(job); + aio_context_release(aio_context); } void qmp_block_job_resume(const char *device, Error **errp) { - BlockJob *job = find_block_job(device); + AioContext *aio_context; + BlockJob *job = find_block_job(device, &aio_context); if (!job) { error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); @@ -2445,11 +2524,13 @@ void qmp_block_job_resume(const char *device, Error **errp) trace_qmp_block_job_resume(job); block_job_resume(job); + aio_context_release(aio_context); } void qmp_block_job_complete(const char *device, Error **errp) { - BlockJob *job = find_block_job(device); + AioContext *aio_context; + BlockJob *job = find_block_job(device, &aio_context); if (!job) { error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); @@ -2458,6 +2539,7 @@ void qmp_block_job_complete(const char *device, Error **errp) trace_qmp_block_job_complete(job); block_job_complete(job, errp); + aio_context_release(aio_context); } void qmp_change_backing_file(const char *device, @@ -2602,12 +2684,18 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp) BlockDriverState *bs; for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); + if (bs->job) { BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1); elem->value = block_job_query(bs->job); *p_next = elem; p_next = &elem->next; } + + aio_context_release(aio_context); } return head; diff --git a/blockjob.c b/blockjob.c index ff0908a653..ba2255d91f 100644 --- a/blockjob.c +++ b/blockjob.c @@ -50,6 +50,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, error_setg(&job->blocker, "block device is in use by block job: %s", BlockJobType_lookup[driver->job_type]); bdrv_op_block_all(bs, job->blocker); + bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); job->driver = driver; job->bs = bs; @@ -153,7 +154,7 @@ void block_job_iostatus_reset(BlockJob *job) } } -struct BlockCancelData { +struct BlockFinishData { BlockJob *job; BlockCompletionFunc *cb; void *opaque; @@ -161,19 +162,22 @@ struct BlockCancelData { int ret; }; -static void block_job_cancel_cb(void *opaque, int ret) +static void block_job_finish_cb(void *opaque, int ret) { - struct BlockCancelData *data = opaque; + struct BlockFinishData *data = opaque; data->cancelled = block_job_is_cancelled(data->job); data->ret = ret; data->cb(data->opaque, ret); } -int block_job_cancel_sync(BlockJob *job) +static int block_job_finish_sync(BlockJob *job, + void (*finish)(BlockJob *, Error **errp), + Error **errp) { - struct BlockCancelData data; + struct BlockFinishData data; BlockDriverState *bs = job->bs; + Error *local_err = NULL; assert(bs->job == job); @@ -184,15 +188,37 @@ int block_job_cancel_sync(BlockJob *job) data.cb = job->cb; data.opaque = job->opaque; data.ret = -EINPROGRESS; - job->cb = block_job_cancel_cb; + job->cb = block_job_finish_cb; job->opaque = &data; - block_job_cancel(job); + finish(job, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EBUSY; + } while (data.ret == -EINPROGRESS) { aio_poll(bdrv_get_aio_context(bs), true); } return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; } +/* A wrapper around block_job_cancel() taking an Error ** parameter so it may be + * used with block_job_finish_sync() without the need for (rather nasty) + * function pointer casts there. */ +static void block_job_cancel_err(BlockJob *job, Error **errp) +{ + block_job_cancel(job); +} + +int block_job_cancel_sync(BlockJob *job) +{ + return block_job_finish_sync(job, &block_job_cancel_err, NULL); +} + +int block_job_complete_sync(BlockJob *job, Error **errp) +{ + return block_job_finish_sync(job, &block_job_complete, errp); +} + void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) { assert(job->busy); @@ -236,6 +262,7 @@ BlockJobInfo *block_job_query(BlockJob *job) info->offset = job->offset; info->speed = job->speed; info->io_status = job->iostatus; + info->ready = job->ready; return info; } @@ -271,6 +298,8 @@ void block_job_event_completed(BlockJob *job, const char *msg) void block_job_event_ready(BlockJob *job) { + job->ready = true; + qapi_event_send_block_job_ready(job->driver->job_type, bdrv_get_device_name(job->bs), job->len, @@ -314,3 +343,48 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, } return action; } + +typedef struct { + BlockJob *job; + QEMUBH *bh; + AioContext *aio_context; + BlockJobDeferToMainLoopFn *fn; + void *opaque; +} BlockJobDeferToMainLoopData; + +static void block_job_defer_to_main_loop_bh(void *opaque) +{ + BlockJobDeferToMainLoopData *data = opaque; + AioContext *aio_context; + + qemu_bh_delete(data->bh); + + /* Prevent race with block_job_defer_to_main_loop() */ + aio_context_acquire(data->aio_context); + + /* Fetch BDS AioContext again, in case it has changed */ + aio_context = bdrv_get_aio_context(data->job->bs); + aio_context_acquire(aio_context); + + data->fn(data->job, data->opaque); + + aio_context_release(aio_context); + + aio_context_release(data->aio_context); + + g_free(data); +} + +void block_job_defer_to_main_loop(BlockJob *job, + BlockJobDeferToMainLoopFn *fn, + void *opaque) +{ + BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data)); + data->job = job; + data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data); + data->aio_context = bdrv_get_aio_context(job->bs); + data->fn = fn; + data->opaque = opaque; + + qemu_bh_schedule(data->bh); +} diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 45b1164c3e..1222a37f4f 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -196,6 +196,11 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, blk_op_block_all(conf->conf.blk, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_RESIZE, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_BACKUP_SOURCE, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_MIRROR, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_STREAM, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_REPLACE, s->blocker); *dataplane = s; } diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 70958e3655..61dbed1b97 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -49,6 +49,9 @@ static int handle_cmd(AHCIState *s,int port,int slot); static void ahci_reset_port(AHCIState *s, int port); static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis); static void ahci_init_d2h(AHCIDevice *ad); +static int ahci_dma_prepare_buf(IDEDMA *dma, int is_write); +static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes); + static uint32_t ahci_port_read(AHCIState *s, int port, int offset) { @@ -567,24 +570,24 @@ static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished) AHCIDevice *ad = &s->dev[port]; AHCIPortRegs *pr = &ad->port_regs; IDEState *ide_state; - uint8_t *sdb_fis; + SDBFIS *sdb_fis; if (!s->dev[port].res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) { return; } - sdb_fis = &ad->res_fis[RES_FIS_SDBFIS]; + sdb_fis = (SDBFIS *)&ad->res_fis[RES_FIS_SDBFIS]; ide_state = &ad->port.ifs[0]; - /* clear memory */ - *(uint32_t*)sdb_fis = 0; - - /* write values */ - sdb_fis[0] = ide_state->error; - sdb_fis[2] = ide_state->status & 0x77; + sdb_fis->type = 0xA1; + /* Interrupt pending & Notification bit */ + sdb_fis->flags = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0); + sdb_fis->status = ide_state->status & 0x77; + sdb_fis->error = ide_state->error; + /* update SAct field in SDB_FIS */ s->dev[port].finished |= finished; - *(uint32_t*)(sdb_fis + 4) = cpu_to_le32(ad->finished); + sdb_fis->payload = cpu_to_le32(ad->finished); /* Update shadow registers (except BSY 0x80 and DRQ 0x08) */ pr->tfdata = (ad->port.ifs[0].error << 8) | @@ -600,6 +603,7 @@ static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len) uint8_t *pio_fis, *cmd_fis; uint64_t tbl_addr; dma_addr_t cmd_len = 0x80; + IDEState *s = &ad->port.ifs[0]; if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) { return; @@ -629,21 +633,21 @@ static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len) pio_fis[0] = 0x5f; pio_fis[1] = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0); - pio_fis[2] = ad->port.ifs[0].status; - pio_fis[3] = ad->port.ifs[0].error; - - pio_fis[4] = cmd_fis[4]; - pio_fis[5] = cmd_fis[5]; - pio_fis[6] = cmd_fis[6]; - pio_fis[7] = cmd_fis[7]; - pio_fis[8] = cmd_fis[8]; - pio_fis[9] = cmd_fis[9]; - pio_fis[10] = cmd_fis[10]; - pio_fis[11] = cmd_fis[11]; + pio_fis[2] = s->status; + pio_fis[3] = s->error; + + pio_fis[4] = s->sector; + pio_fis[5] = s->lcyl; + pio_fis[6] = s->hcyl; + pio_fis[7] = s->select; + pio_fis[8] = s->hob_sector; + pio_fis[9] = s->hob_lcyl; + pio_fis[10] = s->hob_hcyl; + pio_fis[11] = 0; pio_fis[12] = cmd_fis[12]; pio_fis[13] = cmd_fis[13]; pio_fis[14] = 0; - pio_fis[15] = ad->port.ifs[0].status; + pio_fis[15] = s->status; pio_fis[16] = len & 255; pio_fis[17] = len >> 8; pio_fis[18] = 0; @@ -670,6 +674,7 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis) int i; dma_addr_t cmd_len = 0x80; int cmd_mapped = 0; + IDEState *s = &ad->port.ifs[0]; if (!ad->res_fis || !(pr->cmd & PORT_CMD_FIS_RX)) { return; @@ -687,17 +692,17 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis) d2h_fis[0] = 0x34; d2h_fis[1] = (ad->hba->control_regs.irqstatus ? (1 << 6) : 0); - d2h_fis[2] = ad->port.ifs[0].status; - d2h_fis[3] = ad->port.ifs[0].error; - - d2h_fis[4] = cmd_fis[4]; - d2h_fis[5] = cmd_fis[5]; - d2h_fis[6] = cmd_fis[6]; - d2h_fis[7] = cmd_fis[7]; - d2h_fis[8] = cmd_fis[8]; - d2h_fis[9] = cmd_fis[9]; - d2h_fis[10] = cmd_fis[10]; - d2h_fis[11] = cmd_fis[11]; + d2h_fis[2] = s->status; + d2h_fis[3] = s->error; + + d2h_fis[4] = s->sector; + d2h_fis[5] = s->lcyl; + d2h_fis[6] = s->hcyl; + d2h_fis[7] = s->select; + d2h_fis[8] = s->hob_sector; + d2h_fis[9] = s->hob_lcyl; + d2h_fis[10] = s->hob_hcyl; + d2h_fis[11] = 0; d2h_fis[12] = cmd_fis[12]; d2h_fis[13] = cmd_fis[13]; for (i = 14; i < 20; i++) { @@ -1103,16 +1108,12 @@ static void ahci_start_transfer(IDEDMA *dma) } } - /* update number of transferred bytes */ - ad->cur_cmd->status = cpu_to_le32(le32_to_cpu(ad->cur_cmd->status) + size); - out: /* declare that we processed everything */ s->data_ptr = s->data_end; - if (has_sglist) { - qemu_sglist_destroy(&s->sg); - } + /* Update number of transferred bytes, destroy sglist */ + ahci_commit_buf(dma, size); s->end_transfer_func(s); @@ -1133,6 +1134,11 @@ static void ahci_start_dma(IDEDMA *dma, IDEState *s, dma_cb(s, 0); } +/** + * Called in DMA R/W chains to read the PRDT, utilizing ahci_populate_sglist. + * Not currently invoked by PIO R/W chains, + * which invoke ahci_populate_sglist via ahci_start_transfer. + */ static int ahci_dma_prepare_buf(IDEDMA *dma, int is_write) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); @@ -1145,6 +1151,24 @@ static int ahci_dma_prepare_buf(IDEDMA *dma, int is_write) return s->io_buffer_size != 0; } +/** + * Destroys the scatter-gather list, + * and updates the command header with a bytes-read value. + * called explicitly via ahci_dma_rw_buf (ATAPI DMA), + * and ahci_start_transfer (PIO R/W), + * and called via callback from ide_dma_cb for DMA R/W paths. + */ +static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes) +{ + AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); + IDEState *s = &ad->port.ifs[0]; + + tx_bytes += le32_to_cpu(ad->cur_cmd->status); + ad->cur_cmd->status = cpu_to_le32(tx_bytes); + + qemu_sglist_destroy(&s->sg); +} + static int ahci_dma_rw_buf(IDEDMA *dma, int is_write) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); @@ -1162,11 +1186,9 @@ static int ahci_dma_rw_buf(IDEDMA *dma, int is_write) dma_buf_write(p, l, &s->sg); } - /* free sglist that was created in ahci_populate_sglist() */ - qemu_sglist_destroy(&s->sg); + /* free sglist, update byte count */ + ahci_commit_buf(dma, l); - /* update number of transferred bytes */ - ad->cur_cmd->status = cpu_to_le32(le32_to_cpu(ad->cur_cmd->status) + l); s->io_buffer_index += l; s->io_buffer_offset += l; @@ -1209,6 +1231,7 @@ static const IDEDMAOps ahci_dma_ops = { .start_dma = ahci_start_dma, .start_transfer = ahci_start_transfer, .prepare_buf = ahci_dma_prepare_buf, + .commit_buf = ahci_commit_buf, .rw_buf = ahci_dma_rw_buf, .set_unit = ahci_dma_set_unit, .cmd_done = ahci_cmd_done, diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h index e8ea34a1df..b12323730b 100644 --- a/hw/ide/ahci.h +++ b/hw/ide/ahci.h @@ -327,6 +327,14 @@ typedef struct NCQFrame { uint8_t reserved10; } QEMU_PACKED NCQFrame; +typedef struct SDBFIS { + uint8_t type; + uint8_t flags; + uint8_t status; + uint8_t error; + uint32_t payload; +} QEMU_PACKED SDBFIS; + void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports); void ahci_uninit(AHCIState *s); diff --git a/hw/ide/core.c b/hw/ide/core.c index 44e3d502d4..d316ccf961 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -634,8 +634,11 @@ void ide_sector_read(IDEState *s) ide_sector_read_cb, s); } -static void dma_buf_commit(IDEState *s) +static void dma_buf_commit(IDEState *s, uint32_t tx_bytes) { + if (s->bus->dma->ops->commit_buf) { + s->bus->dma->ops->commit_buf(s->bus->dma, tx_bytes); + } qemu_sglist_destroy(&s->sg); } @@ -650,6 +653,7 @@ void ide_set_inactive(IDEState *s, bool more) void ide_dma_error(IDEState *s) { + dma_buf_commit(s, 0); ide_abort_command(s); ide_set_inactive(s, false); ide_set_irq(s->bus); @@ -665,7 +669,6 @@ static int ide_handle_rw_error(IDEState *s, int error, int op) s->bus->error_status = op; } else if (action == BLOCK_ERROR_ACTION_REPORT) { if (op & IDE_RETRY_DMA) { - dma_buf_commit(s); ide_dma_error(s); } else { ide_rw_error(s); @@ -709,7 +712,8 @@ void ide_dma_cb(void *opaque, int ret) sector_num = ide_get_sector(s); if (n > 0) { - dma_buf_commit(s); + assert(s->io_buffer_size == s->sg.size); + dma_buf_commit(s, s->io_buffer_size); sector_num += n; ide_set_sector(s, sector_num); s->nsector -= n; @@ -740,7 +744,6 @@ void ide_dma_cb(void *opaque, int ret) if ((s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) && !ide_sect_range_ok(s, sector_num, n)) { - dma_buf_commit(s); ide_dma_error(s); return; } diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 68ac610d6f..907493d0f8 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -322,6 +322,7 @@ typedef void EndTransferFunc(IDEState *); typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockCompletionFunc *); typedef void DMAVoidFunc(IDEDMA *); typedef int DMAIntFunc(IDEDMA *, int); +typedef void DMAu32Func(IDEDMA *, uint32_t); typedef void DMAStopFunc(IDEDMA *, bool); typedef void DMARestartFunc(void *, int, RunState); @@ -430,6 +431,7 @@ struct IDEDMAOps { DMAStartFunc *start_dma; DMAVoidFunc *start_transfer; DMAIntFunc *prepare_buf; + DMAu32Func *commit_buf; DMAIntFunc *rw_buf; DMAIntFunc *set_unit; DMAStopFunc *set_inactive; diff --git a/include/block/block.h b/include/block/block.h index 341054dcf7..13e453736c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -268,7 +268,13 @@ typedef enum { int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); -int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts); +/* The units of offset and total_work_size may be chosen arbitrarily by the + * block driver; total_work_size may change during the course of the amendment + * operation */ +typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset, + int64_t total_work_size); +int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb); /* external snapshots */ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, @@ -328,6 +334,7 @@ int bdrv_flush(BlockDriverState *bs); int coroutine_fn bdrv_co_flush(BlockDriverState *bs); int bdrv_flush_all(void); void bdrv_close_all(void); +void bdrv_drain(BlockDriverState *bs); void bdrv_drain_all(void); int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); @@ -498,6 +505,8 @@ typedef enum { BLKDBG_PWRITEV_ZERO, BLKDBG_PWRITEV_DONE, + BLKDBG_EMPTY_IMAGE_PREPARE, + BLKDBG_EVENT_MAX, } BlkDebugEvent; diff --git a/include/block/block_int.h b/include/block/block_int.h index 8898c6c51b..a1c17b9578 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -232,7 +232,8 @@ struct BlockDriver { int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result, BdrvCheckMode fix); - int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts); + int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb); void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event); @@ -289,6 +290,9 @@ typedef struct BlockLimits { /* optimal transfer length in sectors */ int opt_transfer_length; + /* maximal transfer length in sectors */ + int max_transfer_length; + /* memory alignment so that no bounce buffer is needed */ size_t opt_mem_alignment; } BlockLimits; diff --git a/include/block/blockjob.h b/include/block/blockjob.h index acb399f87d..b6d4ebbe03 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -91,6 +91,11 @@ struct BlockJob { */ bool busy; + /** + * Set to true when the job is ready to be completed. + */ + bool ready; + /** Status that is published by the query-block-jobs QMP API */ BlockDeviceIoStatus iostatus; @@ -273,6 +278,21 @@ bool block_job_is_paused(BlockJob *job); int block_job_cancel_sync(BlockJob *job); /** + * block_job_complete_sync: + * @job: The job to be completed. + * @errp: Error object which may be set by block_job_complete(); this is not + * necessarily set on every error, the job return value has to be + * checked as well. + * + * Synchronously complete the job. The completion callback is called before the + * function returns, unless it is NULL (which is permissible when using this + * function). + * + * Returns the return value from the job. + */ +int block_job_complete_sync(BlockJob *job, Error **errp); + +/** * block_job_iostatus_reset: * @job: The job whose I/O status should be reset. * @@ -295,4 +315,23 @@ void block_job_iostatus_reset(BlockJob *job); BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, BlockdevOnError on_err, int is_read, int error); + +typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque); + +/** + * block_job_defer_to_main_loop: + * @job: The job + * @fn: The function to run in the main loop + * @opaque: The opaque value that is passed to @fn + * + * Execute a given function in the main loop with the BlockDriverState + * AioContext acquired. Block jobs must call bdrv_unref(), bdrv_close(), and + * anything that uses bdrv_drain_all() in the main loop. + * + * The @job AioContext is held while @fn executes. + */ +void block_job_defer_to_main_loop(BlockJob *job, + BlockJobDeferToMainLoopFn *fn, + void *opaque); + #endif diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 1565404f7e..c0324344d5 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -68,6 +68,12 @@ typedef signed int int_fast16_t; #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #endif +/* Minimum function that returns zero only iff both values are zero. + * Intended for use with unsigned values only. */ +#ifndef MIN_NON_ZERO +#define MIN_NON_ZERO(a, b) (((a) != 0 && (a) < (b)) ? (a) : (b)) +#endif + #ifndef ROUND_UP #define ROUND_UP(n,d) (((n) + (d) - 1) & -(d)) #endif diff --git a/qapi/block-core.json b/qapi/block-core.json index 8f7089e054..77a0cfbd82 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -514,12 +514,14 @@ # # @io-status: the status of the job (since 1.3) # +# @ready: true if the job may be completed (since 2.2) +# # Since: 1.1 ## { 'type': 'BlockJobInfo', 'data': {'type': 'str', 'device': 'str', 'len': 'int', 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int', - 'io-status': 'BlockDeviceIoStatus'} } + 'io-status': 'BlockDeviceIoStatus', 'ready': 'bool'} } ## # @query-block-jobs: diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 55aec6bded..95677745f9 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -22,9 +22,9 @@ STEXI ETEXI DEF("commit", img_commit, - "commit [-q] [-f fmt] [-t cache] filename") + "commit [-q] [-f fmt] [-t cache] [-b base] [-d] [-p] filename") STEXI -@item commit [-q] [-f @var{fmt}] [-t @var{cache}] @var{filename} +@item commit [-q] [-f @var{fmt}] [-t @var{cache}] [-b @var{base}] [-d] [-p] @var{filename} ETEXI DEF("compare", img_compare, @@ -70,8 +70,8 @@ STEXI ETEXI DEF("amend", img_amend, - "amend [-q] [-f fmt] [-t cache] -o options filename") + "amend [-p] [-q] [-f fmt] [-t cache] -o options filename") STEXI -@item amend [-q] [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename} +@item amend [-p] [-q] [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename} @end table ETEXI diff --git a/qemu-img.c b/qemu-img.c index 731502c2ce..66a7eb4045 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -31,6 +31,7 @@ #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" #include "block/block_int.h" +#include "block/blockjob.h" #include "block/qapi.h" #include <getopt.h> @@ -722,18 +723,54 @@ fail: return ret; } +typedef struct CommonBlockJobCBInfo { + BlockDriverState *bs; + Error **errp; +} CommonBlockJobCBInfo; + +static void common_block_job_cb(void *opaque, int ret) +{ + CommonBlockJobCBInfo *cbi = opaque; + + if (ret < 0) { + error_setg_errno(cbi->errp, -ret, "Block job failed"); + } + + /* Drop this block job's reference */ + bdrv_unref(cbi->bs); +} + +static void run_block_job(BlockJob *job, Error **errp) +{ + AioContext *aio_context = bdrv_get_aio_context(job->bs); + + do { + aio_poll(aio_context, true); + qemu_progress_print((float)job->offset / job->len * 100.f, 0); + } while (!job->ready); + + block_job_complete_sync(job, errp); + + /* A block job may finish instantaneously without publishing any progress, + * so just signal completion here */ + qemu_progress_print(100.f, 0); +} + static int img_commit(int argc, char **argv) { int c, ret, flags; - const char *filename, *fmt, *cache; + const char *filename, *fmt, *cache, *base; BlockBackend *blk; - BlockDriverState *bs; - bool quiet = false; + BlockDriverState *bs, *base_bs; + bool progress = false, quiet = false, drop = false; + Error *local_err = NULL; + CommonBlockJobCBInfo cbi; fmt = NULL; cache = BDRV_DEFAULT_CACHE; + base = NULL; for(;;) { - c = getopt(argc, argv, "f:ht:q"); + c = getopt(argc, argv, "f:ht:b:dpq"); if (c == -1) { break; } @@ -748,17 +785,34 @@ static int img_commit(int argc, char **argv) case 't': cache = optarg; break; + case 'b': + base = optarg; + /* -b implies -d */ + drop = true; + break; + case 'd': + drop = true; + break; + case 'p': + progress = true; + break; case 'q': quiet = true; break; } } + + /* Progress is not shown in Quiet mode */ + if (quiet) { + progress = false; + } + if (optind != argc - 1) { error_exit("Expecting one image file name"); } filename = argv[optind++]; - flags = BDRV_O_RDWR; + flags = BDRV_O_RDWR | BDRV_O_UNMAP; ret = bdrv_parse_cache_flags(cache, &flags); if (ret < 0) { error_report("Invalid cache option: %s", cache); @@ -771,29 +825,76 @@ static int img_commit(int argc, char **argv) } bs = blk_bs(blk); - ret = bdrv_commit(bs); - switch(ret) { - case 0: - qprintf(quiet, "Image committed.\n"); - break; - case -ENOENT: - error_report("No disk inserted"); - break; - case -EACCES: - error_report("Image is read-only"); - break; - case -ENOTSUP: - error_report("Image is already committed"); - break; - default: - error_report("Error while committing image"); - break; + qemu_progress_init(progress, 1.f); + qemu_progress_print(0.f, 100); + + if (base) { + base_bs = bdrv_find_backing_image(bs, base); + if (!base_bs) { + error_set(&local_err, QERR_BASE_NOT_FOUND, base); + goto done; + } + } else { + /* This is different from QMP, which by default uses the deepest file in + * the backing chain (i.e., the very base); however, the traditional + * behavior of qemu-img commit is using the immediate backing file. */ + base_bs = bs->backing_hd; + if (!base_bs) { + error_setg(&local_err, "Image does not have a backing file"); + goto done; + } } + cbi = (CommonBlockJobCBInfo){ + .errp = &local_err, + .bs = bs, + }; + + commit_active_start(bs, base_bs, 0, BLOCKDEV_ON_ERROR_REPORT, + common_block_job_cb, &cbi, &local_err); + if (local_err) { + goto done; + } + + /* The block job will swap base_bs and bs (which is not what we really want + * here, but okay) and unref base_bs (after the swap, i.e., the old top + * image). In order to still be able to empty that top image afterwards, + * increment the reference counter here preemptively. */ + if (!drop) { + bdrv_ref(base_bs); + } + + run_block_job(bs->job, &local_err); + if (local_err) { + goto unref_backing; + } + + if (!drop && base_bs->drv->bdrv_make_empty) { + ret = base_bs->drv->bdrv_make_empty(base_bs); + if (ret) { + error_setg_errno(&local_err, -ret, "Could not empty %s", + filename); + goto unref_backing; + } + } + +unref_backing: + if (!drop) { + bdrv_unref(base_bs); + } + +done: + qemu_progress_end(); + blk_unref(blk); - if (ret) { + + if (local_err) { + qerror_report_err(local_err); + error_free(local_err); return 1; } + + qprintf(quiet, "Image committed.\n"); return 0; } @@ -2770,6 +2871,12 @@ out: return 0; } +static void amend_status_cb(BlockDriverState *bs, + int64_t offset, int64_t total_work_size) +{ + qemu_progress_print(100.f * offset / total_work_size, 0); +} + static int img_amend(int argc, char **argv) { int c, ret = 0; @@ -2778,13 +2885,13 @@ static int img_amend(int argc, char **argv) QemuOpts *opts = NULL; const char *fmt = NULL, *filename, *cache; int flags; - bool quiet = false; + bool quiet = false, progress = false; BlockBackend *blk = NULL; BlockDriverState *bs = NULL; cache = BDRV_DEFAULT_CACHE; for (;;) { - c = getopt(argc, argv, "ho:f:t:q"); + c = getopt(argc, argv, "ho:f:t:pq"); if (c == -1) { break; } @@ -2814,6 +2921,9 @@ static int img_amend(int argc, char **argv) case 't': cache = optarg; break; + case 'p': + progress = true; + break; case 'q': quiet = true; break; @@ -2824,6 +2934,11 @@ static int img_amend(int argc, char **argv) error_exit("Must specify options (-o)"); } + if (quiet) { + progress = false; + } + qemu_progress_init(progress, 1.0); + filename = (optind == argc - 1) ? argv[argc - 1] : NULL; if (fmt && has_help_option(options)) { /* If a format is explicitly specified (and possibly no filename is @@ -2833,7 +2948,9 @@ static int img_amend(int argc, char **argv) } if (optind != argc - 1) { - error_exit("Expecting one image file name"); + error_report("Expecting one image file name"); + ret = -1; + goto out; } flags = BDRV_O_FLAGS | BDRV_O_RDWR; @@ -2867,13 +2984,18 @@ static int img_amend(int argc, char **argv) goto out; } - ret = bdrv_amend_options(bs, opts); + /* In case the driver does not call amend_status_cb() */ + qemu_progress_print(0.f, 0); + ret = bdrv_amend_options(bs, opts, &amend_status_cb); + qemu_progress_print(100.f, 0); if (ret < 0) { error_report("Error while amending options: %s", strerror(-ret)); goto out; } out: + qemu_progress_end(); + blk_unref(blk); qemu_opts_del(opts); qemu_opts_free(create_opts); diff --git a/qemu-img.texi b/qemu-img.texi index e3ef4a00e9..0a1ab35989 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -167,7 +167,7 @@ this case. @var{backing_file} will never be modified unless you use the The size can also be specified using the @var{size} option with @code{-o}, it doesn't need to be specified separately in this case. -@item commit [-f @var{fmt}] [-t @var{cache}] @var{filename} +@item commit [-q] [-f @var{fmt}] [-t @var{cache}] [-b @var{base}] [-d] [-p] @var{filename} Commit the changes recorded in @var{filename} in its base image or backing file. If the backing file is smaller than the snapshot, then the backing file will be @@ -176,6 +176,20 @@ the backing file, the backing file will not be truncated. If you want the backing file to match the size of the smaller snapshot, you can safely truncate it yourself once the commit operation successfully completes. +The image @var{filename} is emptied after the operation has succeeded. If you do +not need @var{filename} afterwards and intend to drop it, you may skip emptying +@var{filename} by specifying the @code{-d} flag. + +If the backing chain of the given image file @var{filename} has more than one +layer, the backing file into which the changes will be committed may be +specified as @var{base} (which has to be part of @var{filename}'s backing +chain). If @var{base} is not specified, the immediate backing file of the top +image (which is @var{filename}) will be used. For reasons of consistency, +explicitly specifying @var{base} will always imply @code{-d} (since emptying an +image after committing to an indirect backing file would lead to different data +being read from the image due to content in the intermediate backing chain +overruling the commit target). + @item compare [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-s] [-q] @var{filename1} @var{filename2} Check if two images have the same content. You can compare images with @@ -398,7 +412,7 @@ After using this command to grow a disk image, you must use file system and partitioning tools inside the VM to actually begin using the new space on the device. -@item amend [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename} +@item amend [-p] [-f @var{fmt}] [-t @var{cache}] -o @var{options} @var{filename} Amends the image format specific @var{options} for the image file @var{filename}. Not all file formats support this operation. @@ -1246,7 +1246,7 @@ int load_vmstate(const char *name) void do_delvm(Monitor *mon, const QDict *qdict) { BlockDriverState *bs; - Error *err = NULL; + Error *err; const char *name = qdict_get_str(qdict, "name"); if (!find_vmstate_bs()) { @@ -1257,6 +1257,7 @@ void do_delvm(Monitor *mon, const QDict *qdict) bs = NULL; while ((bs = bdrv_next(bs))) { if (bdrv_can_snapshot(bs)) { + err = NULL; bdrv_snapshot_delete_by_id_or_name(bs, name, &err); if (err) { monitor_printf(mon, diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index f1e16c11c7..2b432ad7a1 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -43,8 +43,7 @@ class ImageCommitTestCase(iotests.QMPTestCase): if event['event'] == 'BLOCK_JOB_COMPLETED': self.assert_qmp(event, 'data/type', 'commit') self.assert_qmp(event, 'data/device', 'drive0') - self.assert_qmp(event, 'data/offset', self.image_len) - self.assert_qmp(event, 'data/len', self.image_len) + self.assert_qmp(event, 'data/offset', event['data']['len']) if need_ready: self.assertTrue(ready, "Expecting BLOCK_JOB_COMPLETED event") completed = True @@ -52,7 +51,6 @@ class ImageCommitTestCase(iotests.QMPTestCase): ready = True self.assert_qmp(event, 'data/type', 'commit') self.assert_qmp(event, 'data/device', 'drive0') - self.assert_qmp(event, 'data/len', self.image_len) self.vm.qmp('block-job-complete', device='drive0') self.assert_no_active_block_jobs() diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 5dbd4ee91b..59a8f733f7 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -52,8 +52,7 @@ class ImageMirroringTestCase(iotests.QMPTestCase): event = self.cancel_and_wait(drive=drive) self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED') self.assert_qmp(event, 'data/type', 'mirror') - self.assert_qmp(event, 'data/offset', self.image_len) - self.assert_qmp(event, 'data/len', self.image_len) + self.assert_qmp(event, 'data/offset', event['data']['len']) def complete_and_wait(self, drive='drive0', wait_ready=True): '''Complete a block job and wait for it to finish''' @@ -417,7 +416,6 @@ new_state = "1" self.assert_qmp(event, 'data/type', 'mirror') self.assert_qmp(event, 'data/device', 'drive0') self.assert_qmp(event, 'data/error', 'Input/output error') - self.assert_qmp(event, 'data/len', self.image_len) completed = True self.assert_no_active_block_jobs() @@ -568,7 +566,6 @@ new_state = "1" self.assert_qmp(event, 'data/type', 'mirror') self.assert_qmp(event, 'data/device', 'drive0') self.assert_qmp(event, 'data/error', 'Input/output error') - self.assert_qmp(event, 'data/len', self.image_len) completed = True self.assert_no_active_block_jobs() diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 index ab98def6d4..8d37f8a65c 100755 --- a/tests/qemu-iotests/061 +++ b/tests/qemu-iotests/061 @@ -209,6 +209,31 @@ $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG" _check_test_img $QEMU_IO -c "read -P 0 0 64M" "$TEST_IMG" | _filter_qemu_io +echo +echo "=== Testing progress report without snapshot ===" +echo +IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 4G +IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 4G +$QEMU_IO -c "write -z 0 64k" \ + -c "write -z 1G 64k" \ + -c "write -z 2G 64k" \ + -c "write -z 3G 64k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG amend -p -o "compat=0.10" "$TEST_IMG" +_check_test_img + +echo +echo "=== Testing progress report with snapshot ===" +echo +IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 4G +IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 4G +$QEMU_IO -c "write -z 0 64k" \ + -c "write -z 1G 64k" \ + -c "write -z 2G 64k" \ + -c "write -z 3G 64k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG snapshot -c foo "$TEST_IMG" +$QEMU_IMG amend -p -o "compat=0.10" "$TEST_IMG" +_check_test_img + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out index 4ae6472c73..9045544df2 100644 --- a/tests/qemu-iotests/061.out +++ b/tests/qemu-iotests/061.out @@ -390,4 +390,34 @@ wrote 67108864/67108864 bytes at offset 0 No errors were found on the image. read 67108864/67108864 bytes at offset 0 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Testing progress report without snapshot === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=4294967296 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 backing_file='TEST_DIR/t.IMGFMT.base' +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 1073741824 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 2147483648 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 3221225472 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + (0.00/100%)
(12.50/100%)
(25.00/100%)
(37.50/100%)
(50.00/100%)
(62.50/100%)
(75.00/100%)
(87.50/100%)
(100.00/100%)
(100.00/100%) +No errors were found on the image. + +=== Testing progress report with snapshot === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=4294967296 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 backing_file='TEST_DIR/t.IMGFMT.base' +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 1073741824 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 2147483648 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 3221225472 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + (0.00/100%)
(6.25/100%)
(12.50/100%)
(18.75/100%)
(25.00/100%)
(31.25/100%)
(37.50/100%)
(43.75/100%)
(50.00/100%)
(56.25/100%)
(62.50/100%)
(68.75/100%)
(75.00/100%)
(81.25/100%)
(87.50/100%)
(93.75/100%)
(100.00/100%)
(100.00/100%) +No errors were found on the image. *** done diff --git a/tests/qemu-iotests/076 b/tests/qemu-iotests/076 index bc47457a85..ed2be3581e 100755 --- a/tests/qemu-iotests/076 +++ b/tests/qemu-iotests/076 @@ -47,29 +47,34 @@ catalog_entries_offset=$((0x20)) nb_sectors_offset=$((0x24)) echo -echo "== Read from a valid (enough) image ==" -_use_sample_img fake.parallels.bz2 +echo "== Read from a valid v1 image ==" +_use_sample_img parallels-v1.bz2 { $QEMU_IO -c "read -P 0x11 0 64k" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir echo echo "== Negative catalog size ==" -_use_sample_img fake.parallels.bz2 +_use_sample_img parallels-v1.bz2 poke_file "$TEST_IMG" "$catalog_entries_offset" "\xff\xff\xff\xff" { $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir echo echo "== Overflow in catalog allocation ==" -_use_sample_img fake.parallels.bz2 +_use_sample_img parallels-v1.bz2 poke_file "$TEST_IMG" "$nb_sectors_offset" "\xff\xff\xff\xff" poke_file "$TEST_IMG" "$catalog_entries_offset" "\x01\x00\x00\x40" { $QEMU_IO -c "read 64M 64M" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir echo echo "== Zero sectors per track ==" -_use_sample_img fake.parallels.bz2 +_use_sample_img parallels-v1.bz2 poke_file "$TEST_IMG" "$tracks_offset" "\x00\x00\x00\x00" { $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir +echo +echo "== Read from a valid v2 image ==" +_use_sample_img parallels-v2.bz2 +{ $QEMU_IO -c "read -P 0x11 0 64k" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/076.out b/tests/qemu-iotests/076.out index f7745d8b0d..32ade08565 100644 --- a/tests/qemu-iotests/076.out +++ b/tests/qemu-iotests/076.out @@ -1,18 +1,22 @@ QA output created by 076 -== Read from a valid (enough) image == +== Read from a valid v1 image == read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == Negative catalog size == -qemu-io: can't open device TEST_DIR/fake.parallels: Catalog too large +qemu-io: can't open device TEST_DIR/parallels-v1: Catalog too large no file open, try 'help open' == Overflow in catalog allocation == -qemu-io: can't open device TEST_DIR/fake.parallels: Catalog too large +qemu-io: can't open device TEST_DIR/parallels-v1: Catalog too large no file open, try 'help open' == Zero sectors per track == -qemu-io: can't open device TEST_DIR/fake.parallels: Invalid image: Zero sectors per track +qemu-io: can't open device TEST_DIR/parallels-v1: Invalid image: Zero sectors per track no file open, try 'help open' + +== Read from a valid v2 image == +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done diff --git a/tests/qemu-iotests/097 b/tests/qemu-iotests/097 new file mode 100755 index 0000000000..c7a613b7ee --- /dev/null +++ b/tests/qemu-iotests/097 @@ -0,0 +1,122 @@ +#!/bin/bash +# +# Commit changes into backing chains and empty the top image if the +# backing image is not explicitly specified +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + _rm_test_img "$TEST_IMG.itmd" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.pattern + +# Any format supporting backing files and bdrv_make_empty +_supported_fmt qcow qcow2 +_supported_proto file +_supported_os Linux + + +# Four passes: +# 0: Two-layer backing chain, commit to upper backing file (implicitly) +# (in this case, the top image will be emptied) +# 1: Two-layer backing chain, commit to upper backing file (explicitly) +# (in this case, the top image will implicitly stay unchanged) +# 2: Two-layer backing chain, commit to upper backing file (implicitly with -d) +# (in this case, the top image will explicitly stay unchanged) +# 3: Two-layer backing chain, commit to lower backing file +# (in this case, the top image will implicitly stay unchanged) +# +# 020 already tests committing, so this only tests whether image chains are +# working properly and that all images above the base are emptied; therefore, +# no complicated patterns are necessary +for i in 0 1 2 3; do + +echo +echo "=== Test pass $i ===" +echo + +TEST_IMG="$TEST_IMG.base" _make_test_img 64M +TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" 64M +_make_test_img -b "$TEST_IMG.itmd" 64M + +$QEMU_IO -c 'write -P 1 0 192k' "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c 'write -P 2 64k 128k' "$TEST_IMG.itmd" | _filter_qemu_io +$QEMU_IO -c 'write -P 3 128k 64k' "$TEST_IMG" | _filter_qemu_io + +if [ $i -lt 3 ]; then + if [ $i == 0 ]; then + # -b "$TEST_IMG.itmd" should be the default (that is, committing to the + # first backing file in the chain) + $QEMU_IMG commit "$TEST_IMG" + elif [ $i == 1 ]; then + # explicitly specify the commit target (this should imply -d) + $QEMU_IMG commit -b "$TEST_IMG.itmd" "$TEST_IMG" + else + # do not explicitly specify the commit target, but use -d to leave the + # top image unchanged + $QEMU_IMG commit -d "$TEST_IMG" + fi + + # Bottom should be unchanged + $QEMU_IO -c 'read -P 1 0 192k' "$TEST_IMG.base" | _filter_qemu_io + + # Intermediate should contain changes from top + $QEMU_IO -c 'read -P 1 0 64k' "$TEST_IMG.itmd" | _filter_qemu_io + $QEMU_IO -c 'read -P 2 64k 64k' "$TEST_IMG.itmd" | _filter_qemu_io + $QEMU_IO -c 'read -P 3 128k 64k' "$TEST_IMG.itmd" | _filter_qemu_io + + # And in pass 0, the top image should be empty, whereas in both other passes + # it should be unchanged (which is both checked by qemu-img map) +else + $QEMU_IMG commit -b "$TEST_IMG.base" "$TEST_IMG" + + # Bottom should contain all changes + $QEMU_IO -c 'read -P 1 0 64k' "$TEST_IMG.base" | _filter_qemu_io + $QEMU_IO -c 'read -P 2 64k 64k' "$TEST_IMG.base" | _filter_qemu_io + $QEMU_IO -c 'read -P 3 128k 64k' "$TEST_IMG.base" | _filter_qemu_io + + # Both top and intermediate should be unchanged +fi + +$QEMU_IMG map "$TEST_IMG.base" | _filter_qemu_img_map +$QEMU_IMG map "$TEST_IMG.itmd" | _filter_qemu_img_map +$QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map + +done + + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/097.out b/tests/qemu-iotests/097.out new file mode 100644 index 0000000000..1cb7764fe8 --- /dev/null +++ b/tests/qemu-iotests/097.out @@ -0,0 +1,119 @@ +QA output created by 097 + +=== Test pass 0 === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.itmd' +wrote 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 65536 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image committed. +read 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x30000 TEST_DIR/t.IMGFMT.base +Offset Length File +0 0x10000 TEST_DIR/t.IMGFMT.base +0x10000 0x20000 TEST_DIR/t.IMGFMT.itmd +Offset Length File +0 0x10000 TEST_DIR/t.IMGFMT.base +0x10000 0x20000 TEST_DIR/t.IMGFMT.itmd + +=== Test pass 1 === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.itmd' +wrote 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 65536 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image committed. +read 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x30000 TEST_DIR/t.IMGFMT.base +Offset Length File +0 0x10000 TEST_DIR/t.IMGFMT.base +0x10000 0x20000 TEST_DIR/t.IMGFMT.itmd +Offset Length File +0 0x10000 TEST_DIR/t.IMGFMT.base +0x10000 0x10000 TEST_DIR/t.IMGFMT.itmd +0x20000 0x10000 TEST_DIR/t.IMGFMT + +=== Test pass 2 === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.itmd' +wrote 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 65536 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image committed. +read 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x30000 TEST_DIR/t.IMGFMT.base +Offset Length File +0 0x10000 TEST_DIR/t.IMGFMT.base +0x10000 0x20000 TEST_DIR/t.IMGFMT.itmd +Offset Length File +0 0x10000 TEST_DIR/t.IMGFMT.base +0x10000 0x10000 TEST_DIR/t.IMGFMT.itmd +0x20000 0x10000 TEST_DIR/t.IMGFMT + +=== Test pass 3 === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.itmd' +wrote 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 65536 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image committed. +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x30000 TEST_DIR/t.IMGFMT.base +Offset Length File +0 0x10000 TEST_DIR/t.IMGFMT.base +0x10000 0x20000 TEST_DIR/t.IMGFMT.itmd +Offset Length File +0 0x10000 TEST_DIR/t.IMGFMT.base +0x10000 0x10000 TEST_DIR/t.IMGFMT.itmd +0x20000 0x10000 TEST_DIR/t.IMGFMT +*** done diff --git a/tests/qemu-iotests/098 b/tests/qemu-iotests/098 new file mode 100755 index 0000000000..e2230ad60c --- /dev/null +++ b/tests/qemu-iotests/098 @@ -0,0 +1,82 @@ +#!/bin/bash +# +# Test qcow2's bdrv_make_empty for images without internal snapshots +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f "$TEST_DIR/blkdebug.conf" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.pattern + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +IMGOPTS="compat=1.1" + +for event in l1_update empty_image_prepare reftable_update refblock_alloc; do + +echo +echo "=== $event ===" +echo + +TEST_IMG="$TEST_IMG.base" _make_test_img 64M +_make_test_img -b "$TEST_IMG.base" 64M + +# Some data that can be leaked when emptying the top image +$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io + +cat > "$TEST_DIR/blkdebug.conf" <<EOF +[inject-error] +event = "$event" +EOF + +$QEMU_IMG commit "blkdebug:$TEST_DIR/blkdebug.conf:$TEST_IMG" 2>&1 \ + | _filter_testdir | _filter_imgfmt + +# There may be errors, but they should be fixed by opening the image +$QEMU_IO -c close "$TEST_IMG" + +_check_test_img + +done + + +rm -f "$TEST_DIR/blkdebug.conf" + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/098.out b/tests/qemu-iotests/098.out new file mode 100644 index 0000000000..586dfc809f --- /dev/null +++ b/tests/qemu-iotests/098.out @@ -0,0 +1,52 @@ +QA output created by 098 + +=== l1_update === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base' +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-img: Could not empty blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error +No errors were found on the image. + +=== empty_image_prepare === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base' +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-img: Could not empty blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error +Leaked cluster 4 refcount=1 reference=0 +Leaked cluster 5 refcount=1 reference=0 +Repairing cluster 4 refcount=1 reference=0 +Repairing cluster 5 refcount=1 reference=0 +No errors were found on the image. + +=== reftable_update === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base' +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-img: Could not empty blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error +ERROR cluster 0 refcount=0 reference=1 +ERROR cluster 1 refcount=0 reference=1 +ERROR cluster 3 refcount=0 reference=1 +Rebuilding refcount structure +Repairing cluster 1 refcount=1 reference=0 +No errors were found on the image. + +=== refblock_alloc === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base' +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-img: Could not empty blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error +ERROR cluster 0 refcount=0 reference=1 +ERROR cluster 1 refcount=0 reference=1 +ERROR cluster 3 refcount=0 reference=1 +Rebuilding refcount structure +Repairing cluster 1 refcount=1 reference=0 +No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/102 b/tests/qemu-iotests/102 index 34b363f78f..161b1974ce 100755 --- a/tests/qemu-iotests/102 +++ b/tests/qemu-iotests/102 @@ -34,9 +34,10 @@ _cleanup() } trap "_cleanup; exit \$status" 0 1 2 3 15 -# get standard environment, filters and checks +# get standard environment, filters and qemu instance handling . ./common.rc . ./common.filter +. ./common.qemu _supported_fmt qcow2 _supported_proto file @@ -53,11 +54,27 @@ _make_test_img $IMG_SIZE $QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io # Remove data cluster from image (first cluster: image header, second: reftable, # third: refblock, fourth: L1 table, fifth: L2 table) -truncate -s $((5 * 64 * 1024)) "$TEST_IMG" +$QEMU_IMG resize -f raw "$TEST_IMG" $((5 * 64 * 1024)) $QEMU_IO -c map "$TEST_IMG" $QEMU_IMG map "$TEST_IMG" +echo +echo '=== Testing map on an image file truncated outside of qemu ===' +echo + +# Same as above, only now we concurrently truncate and map the image +_make_test_img $IMG_SIZE +$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io + +qemu_comm_method=monitor _launch_qemu -drive if=none,file="$TEST_IMG",id=drv0 + +$QEMU_IMG resize -f raw "$TEST_IMG" $((5 * 64 * 1024)) + +_send_qemu_cmd $QEMU_HANDLE 'qemu-io drv0 map' 'allocated' \ + | sed -e 's/^(qemu).*qemu-io drv0 map...$/(qemu) qemu-io drv0 map/' +_send_qemu_cmd $QEMU_HANDLE 'quit' '' + # success, all done echo '*** done' rm -f $seq.full diff --git a/tests/qemu-iotests/102.out b/tests/qemu-iotests/102.out index e0e9cdc541..eecde16ad5 100644 --- a/tests/qemu-iotests/102.out +++ b/tests/qemu-iotests/102.out @@ -5,6 +5,17 @@ QA output created by 102 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536 wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image resized. [ 0] 128/ 128 sectors allocated at offset 0 bytes (1) Offset Length Mapped to File + +=== Testing map on an image file truncated outside of qemu === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image resized. +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) qemu-io drv0 map +[ 0] 128/ 128 sectors allocated at offset 0 bytes (1) *** done diff --git a/tests/qemu-iotests/107 b/tests/qemu-iotests/107 index cad1cf98a0..9862030469 100755 --- a/tests/qemu-iotests/107 +++ b/tests/qemu-iotests/107 @@ -39,7 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 . ./common.filter _supported_fmt qcow2 -_supported_proto file +_supported_proto file nfs _supported_os Linux diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index f69cb6b916..3acdb307f4 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -213,5 +213,12 @@ _filter_img_info() -e "s/archipelago:a/TEST_DIR\//g" } +# filter out offsets and file names from qemu-img map +_filter_qemu_img_map() +{ + sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \ + -e 's/Mapped to *//' | _filter_testdir | _filter_imgfmt +} + # make sure this script returns success /bin/true diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 9bbd5d3a17..7b2c66676b 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -67,7 +67,7 @@ 058 rw auto quick 059 rw auto quick 060 rw auto quick -061 rw auto quick +061 rw auto 062 rw auto quick 063 rw auto quick 064 rw auto quick @@ -100,6 +100,8 @@ 091 rw auto quick 092 rw auto quick 095 rw auto quick +097 rw auto backing +098 rw auto backing quick 099 rw auto quick 100 rw auto quick 101 rw auto quick diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 39a4cfcf4d..f57f1548ac 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -267,8 +267,7 @@ class QMPTestCase(unittest.TestCase): self.assert_qmp(event, 'data/device', drive) self.assert_qmp_absent(event, 'data/error') if check_offset: - self.assert_qmp(event, 'data/offset', self.image_len) - self.assert_qmp(event, 'data/len', self.image_len) + self.assert_qmp(event, 'data/offset', event['data']['len']) completed = True self.assert_no_active_block_jobs() diff --git a/tests/qemu-iotests/sample_images/fake.parallels.bz2 b/tests/qemu-iotests/sample_images/fake.parallels.bz2 Binary files differdeleted file mode 100644 index ffb5f13bac..0000000000 --- a/tests/qemu-iotests/sample_images/fake.parallels.bz2 +++ /dev/null diff --git a/tests/qemu-iotests/sample_images/parallels-v1.bz2 b/tests/qemu-iotests/sample_images/parallels-v1.bz2 Binary files differnew file mode 100644 index 0000000000..d1ef142054 --- /dev/null +++ b/tests/qemu-iotests/sample_images/parallels-v1.bz2 diff --git a/tests/qemu-iotests/sample_images/parallels-v2.bz2 b/tests/qemu-iotests/sample_images/parallels-v2.bz2 Binary files differnew file mode 100644 index 0000000000..fd8614d061 --- /dev/null +++ b/tests/qemu-iotests/sample_images/parallels-v2.bz2 |