diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blkdebug.c | 128 | ||||
-rw-r--r-- | block/commit.c | 2 | ||||
-rw-r--r-- | block/mirror.c | 2 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 191 | ||||
-rw-r--r-- | block/qcow2.c | 87 | ||||
-rw-r--r-- | block/qcow2.h | 49 | ||||
-rw-r--r-- | block/raw-posix.c | 27 | ||||
-rw-r--r-- | block/raw-win32.c | 17 | ||||
-rw-r--r-- | block/rbd.c | 20 | ||||
-rw-r--r-- | block/stream.c | 2 | ||||
-rw-r--r-- | block/vpc.c | 24 |
11 files changed, 382 insertions, 167 deletions
diff --git a/block/blkdebug.c b/block/blkdebug.c index d61ece86a9..294e983306 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -29,8 +29,10 @@ typedef struct BDRVBlkdebugState { int state; int new_state; + QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX]; QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; + QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs; } BDRVBlkdebugState; typedef struct BlkdebugAIOCB { @@ -39,6 +41,12 @@ typedef struct BlkdebugAIOCB { int ret; } BlkdebugAIOCB; +typedef struct BlkdebugSuspendedReq { + Coroutine *co; + char *tag; + QLIST_ENTRY(BlkdebugSuspendedReq) next; +} BlkdebugSuspendedReq; + static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb); static const AIOCBInfo blkdebug_aiocb_info = { @@ -49,6 +57,7 @@ static const AIOCBInfo blkdebug_aiocb_info = { enum { ACTION_INJECT_ERROR, ACTION_SET_STATE, + ACTION_SUSPEND, }; typedef struct BlkdebugRule { @@ -65,6 +74,9 @@ typedef struct BlkdebugRule { struct { int new_state; } set_state; + struct { + char *tag; + } suspend; } options; QLIST_ENTRY(BlkdebugRule) next; QSIMPLEQ_ENTRY(BlkdebugRule) active_next; @@ -226,6 +238,11 @@ static int add_rule(QemuOpts *opts, void *opaque) rule->options.set_state.new_state = qemu_opt_get_number(opts, "new_state", 0); break; + + case ACTION_SUSPEND: + rule->options.suspend.tag = + g_strdup(qemu_opt_get(opts, "tag")); + break; }; /* Add the rule */ @@ -234,12 +251,32 @@ static int add_rule(QemuOpts *opts, void *opaque) return 0; } +static void remove_rule(BlkdebugRule *rule) +{ + switch (rule->action) { + case ACTION_INJECT_ERROR: + case ACTION_SET_STATE: + break; + case ACTION_SUSPEND: + g_free(rule->options.suspend.tag); + break; + } + + QLIST_REMOVE(rule, next); + g_free(rule); +} + static int read_config(BDRVBlkdebugState *s, const char *filename) { FILE *f; int ret; struct add_rule_data d; + /* Allow usage without config file */ + if (!*filename) { + return 0; + } + f = fopen(filename, "r"); if (f == NULL) { return -errno; @@ -389,6 +426,7 @@ static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs, return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque); } + static void blkdebug_close(BlockDriverState *bs) { BDRVBlkdebugState *s = bs->opaque; @@ -397,12 +435,32 @@ static void blkdebug_close(BlockDriverState *bs) for (i = 0; i < BLKDBG_EVENT_MAX; i++) { QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { - QLIST_REMOVE(rule, next); - g_free(rule); + remove_rule(rule); } } } +static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq r; + + r = (BlkdebugSuspendedReq) { + .co = qemu_coroutine_self(), + .tag = g_strdup(rule->options.suspend.tag), + }; + + remove_rule(rule); + QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next); + + printf("blkdebug: Suspended request '%s'\n", r.tag); + qemu_coroutine_yield(); + printf("blkdebug: Resuming request '%s'\n", r.tag); + + QLIST_REMOVE(&r, next); + g_free(r.tag); +} + static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, bool injected) { @@ -426,6 +484,10 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, case ACTION_SET_STATE: s->new_state = rule->options.set_state.new_state; break; + + case ACTION_SUSPEND: + suspend_request(bs, rule); + break; } return injected; } @@ -433,19 +495,72 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event) { BDRVBlkdebugState *s = bs->opaque; - struct BlkdebugRule *rule; + struct BlkdebugRule *rule, *next; bool injected; assert((int)event >= 0 && event < BLKDBG_EVENT_MAX); injected = false; s->new_state = s->state; - QLIST_FOREACH(rule, &s->rules[event], next) { + QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) { injected = process_rule(bs, rule, injected); } s->state = s->new_state; } +static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + struct BlkdebugRule *rule; + BlkDebugEvent blkdebug_event; + + if (get_event_by_name(event, &blkdebug_event) < 0) { + return -ENOENT; + } + + + rule = g_malloc(sizeof(*rule)); + *rule = (struct BlkdebugRule) { + .event = blkdebug_event, + .action = ACTION_SUSPEND, + .state = 0, + .options.suspend.tag = g_strdup(tag), + }; + + QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next); + + return 0; +} + +static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + qemu_coroutine_enter(r->co, NULL); + return 0; + } + } + return -ENOENT; +} + + +static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + return true; + } + } + return false; +} + static int64_t blkdebug_getlength(BlockDriverState *bs) { return bdrv_getlength(bs->file); @@ -464,7 +579,10 @@ static BlockDriver bdrv_blkdebug = { .bdrv_aio_readv = blkdebug_aio_readv, .bdrv_aio_writev = blkdebug_aio_writev, - .bdrv_debug_event = blkdebug_debug_event, + .bdrv_debug_event = blkdebug_debug_event, + .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, + .bdrv_debug_resume = blkdebug_debug_resume, + .bdrv_debug_is_suspended = blkdebug_debug_is_suspended, }; static void bdrv_blkdebug_init(void) diff --git a/block/commit.c b/block/commit.c index fae79582d4..e2bb1e241b 100644 --- a/block/commit.c +++ b/block/commit.c @@ -103,7 +103,7 @@ static void coroutine_fn commit_run(void *opaque) wait: /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that qemu_aio_flush() returns. + * with no pending I/O here so that bdrv_drain_all() returns. */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { diff --git a/block/mirror.c b/block/mirror.c index d6618a4b34..b1f5d4fa22 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -205,7 +205,7 @@ static void coroutine_fn mirror_run(void *opaque) } /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that qemu_aio_flush() returns. + * with no pending I/O here so that bdrv_drain_all() returns. */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index e179211c57..468ef1be56 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -615,57 +615,67 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return cluster_offset; } -int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) { BDRVQcowState *s = bs->opaque; - int i, j = 0, l2_index, ret; - uint64_t *old_cluster, start_sect, *l2_table; - uint64_t cluster_offset = m->alloc_offset; - bool cow = false; - - trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + int ret; - if (m->nb_clusters == 0) + if (r->nb_sectors == 0) { return 0; + } - old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); + qemu_co_mutex_unlock(&s->lock); + ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset, + r->offset / BDRV_SECTOR_SIZE, + r->offset / BDRV_SECTOR_SIZE + r->nb_sectors); + qemu_co_mutex_lock(&s->lock); - /* copy content of unmodified sectors */ - start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; - if (m->n_start) { - cow = true; - qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) - goto err; - } - - if (m->nb_available & (s->cluster_sectors - 1)) { - cow = true; - qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, start_sect, cluster_offset, m->nb_available, - align_offset(m->nb_available, s->cluster_sectors)); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) - goto err; + if (ret < 0) { + return ret; } /* - * Update L2 table. - * * Before we update the L2 table to actually point to the new cluster, we * need to be sure that the refcounts have been increased and COW was * handled. */ - if (cow) { - qcow2_cache_depends_on_flush(s->l2_table_cache); + qcow2_cache_depends_on_flush(s->l2_table_cache); + + return 0; +} + +int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +{ + BDRVQcowState *s = bs->opaque; + int i, j = 0, l2_index, ret; + uint64_t *old_cluster, *l2_table; + uint64_t cluster_offset = m->alloc_offset; + + trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + assert(m->nb_clusters > 0); + + old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); + + /* copy content of unmodified sectors */ + ret = perform_cow(bs, m, &m->cow_start); + if (ret < 0) { + goto err; + } + + ret = perform_cow(bs, m, &m->cow_end); + if (ret < 0) { + goto err; } + /* Update L2 table. */ + if (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS) { + qcow2_mark_dirty(bs); + } if (qcow2_need_accurate_refcounts(s)) { qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); } + ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); if (ret < 0) { goto err; @@ -743,38 +753,16 @@ out: } /* - * Allocates new clusters for the given guest_offset. - * - * At most *nb_clusters are allocated, and on return *nb_clusters is updated to - * contain the number of clusters that have been allocated and are contiguous - * in the image file. - * - * If *host_offset is non-zero, it specifies the offset in the image file at - * which the new clusters must start. *nb_clusters can be 0 on return in this - * case if the cluster at host_offset is already in use. If *host_offset is - * zero, the clusters can be allocated anywhere in the image file. - * - * *host_offset is updated to contain the offset into the image file at which - * the first allocated cluster starts. - * - * Return 0 on success and -errno in error cases. -EAGAIN means that the - * function has been waiting for another request and the allocation must be - * restarted, but the whole request should not be failed. + * Check if there already is an AIO write request in flight which allocates + * the same cluster. In this case we need to wait until the previous + * request has completed and updated the L2 table accordingly. */ -static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, - uint64_t *host_offset, unsigned int *nb_clusters) +static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, + unsigned int *nb_clusters) { BDRVQcowState *s = bs->opaque; QCowL2Meta *old_alloc; - trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, - *host_offset, *nb_clusters); - - /* - * Check if there already is an AIO write request in flight which allocates - * the same cluster. In this case we need to wait until the previous - * request has completed and updated the L2 table accordingly. - */ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { uint64_t start = guest_offset >> s->cluster_bits; @@ -807,6 +795,42 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, abort(); } + return 0; +} + +/* + * Allocates new clusters for the given guest_offset. + * + * At most *nb_clusters are allocated, and on return *nb_clusters is updated to + * contain the number of clusters that have been allocated and are contiguous + * in the image file. + * + * If *host_offset is non-zero, it specifies the offset in the image file at + * which the new clusters must start. *nb_clusters can be 0 on return in this + * case if the cluster at host_offset is already in use. If *host_offset is + * zero, the clusters can be allocated anywhere in the image file. + * + * *host_offset is updated to contain the offset into the image file at which + * the first allocated cluster starts. + * + * Return 0 on success and -errno in error cases. -EAGAIN means that the + * function has been waiting for another request and the allocation must be + * restarted, but the whole request should not be failed. + */ +static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, unsigned int *nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + int ret; + + trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, + *host_offset, *nb_clusters); + + ret = handle_dependencies(bs, guest_offset, nb_clusters); + if (ret < 0) { + return ret; + } + /* Allocate new clusters */ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); if (*host_offset == 0) { @@ -818,7 +842,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, *host_offset = cluster_offset; return 0; } else { - int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); + ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); if (ret < 0) { return ret; } @@ -847,7 +871,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, * Return 0 on success and -errno in error cases */ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m) + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m) { BDRVQcowState *s = bs->opaque; int l2_index, ret, sectors; @@ -919,12 +943,6 @@ again: } /* If there is something left to allocate, do that now */ - *m = (QCowL2Meta) { - .cluster_offset = cluster_offset, - .nb_clusters = 0, - }; - qemu_co_queue_init(&m->dependent_requests); - if (nb_clusters > 0) { uint64_t alloc_offset; uint64_t alloc_cluster_offset; @@ -957,22 +975,40 @@ again: * * avail_sectors: Number of sectors from the start of the first * newly allocated to the end of the last newly allocated cluster. + * + * nb_sectors: The number of sectors from the start of the first + * newly allocated cluster to the end of the aread that the write + * request actually writes to (excluding COW at the end) */ int requested_sectors = n_end - keep_clusters * s->cluster_sectors; int avail_sectors = nb_clusters << (s->cluster_bits - BDRV_SECTOR_BITS); + int alloc_n_start = keep_clusters == 0 ? n_start : 0; + int nb_sectors = MIN(requested_sectors, avail_sectors); + + if (keep_clusters == 0) { + cluster_offset = alloc_cluster_offset; + } + + *m = g_malloc0(sizeof(**m)); - *m = (QCowL2Meta) { - .cluster_offset = keep_clusters == 0 ? - alloc_cluster_offset : cluster_offset, + **m = (QCowL2Meta) { .alloc_offset = alloc_cluster_offset, - .offset = alloc_offset, - .n_start = keep_clusters == 0 ? n_start : 0, + .offset = alloc_offset & ~(s->cluster_size - 1), .nb_clusters = nb_clusters, - .nb_available = MIN(requested_sectors, avail_sectors), + .nb_available = nb_sectors, + + .cow_start = { + .offset = 0, + .nb_sectors = alloc_n_start, + }, + .cow_end = { + .offset = nb_sectors * BDRV_SECTOR_SIZE, + .nb_sectors = avail_sectors - nb_sectors, + }, }; - qemu_co_queue_init(&m->dependent_requests); - QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); + qemu_co_queue_init(&(*m)->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); } } @@ -984,12 +1020,13 @@ again: assert(sectors > n_start); *num = sectors - n_start; + *host_offset = cluster_offset; return 0; fail: - if (m->nb_clusters > 0) { - QLIST_REMOVE(m, next_in_flight); + if (*m && (*m)->nb_clusters > 0) { + QLIST_REMOVE(*m, next_in_flight); } return ret; } diff --git a/block/qcow2.c b/block/qcow2.c index c1ff31f482..8520bda21a 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -222,7 +222,7 @@ static void report_unsupported_feature(BlockDriverState *bs, * updated successfully. Therefore it is not required to check the return * value of this function. */ -static int qcow2_mark_dirty(BlockDriverState *bs) +int qcow2_mark_dirty(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; uint64_t val; @@ -745,21 +745,6 @@ fail: return ret; } -static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m) -{ - /* Take the request off the list of running requests */ - if (m->nb_clusters != 0) { - QLIST_REMOVE(m, next_in_flight); - } - - /* Restart all dependent requests */ - if (!qemu_co_queue_empty(&m->dependent_requests)) { - qemu_co_mutex_unlock(&s->lock); - qemu_co_queue_restart_all(&m->dependent_requests); - qemu_co_mutex_lock(&s->lock); - } -} - static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, int64_t sector_num, int remaining_sectors, @@ -774,15 +759,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, QEMUIOVector hd_qiov; uint64_t bytes_done = 0; uint8_t *cluster_data = NULL; - QCowL2Meta l2meta = { - .nb_clusters = 0, - }; + QCowL2Meta *l2meta; trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, remaining_sectors); - qemu_co_queue_init(&l2meta.dependent_requests); - qemu_iovec_init(&hd_qiov, qiov->niov); s->cluster_cache_offset = -1; /* disable compressed cache */ @@ -791,6 +772,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, while (remaining_sectors != 0) { + l2meta = NULL; + trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num & (s->cluster_sectors - 1); n_end = index_in_cluster + remaining_sectors; @@ -800,17 +783,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, - index_in_cluster, n_end, &cur_nr_sectors, &l2meta); + index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta); if (ret < 0) { goto fail; } - if (l2meta.nb_clusters > 0 && - (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) { - qcow2_mark_dirty(bs); - } - - cluster_offset = l2meta.cluster_offset; assert((cluster_offset & 511) == 0); qemu_iovec_reset(&hd_qiov); @@ -835,8 +812,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, cur_nr_sectors * 512); } - BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); qemu_co_mutex_unlock(&s->lock); + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); trace_qcow2_writev_data(qemu_coroutine_self(), (cluster_offset >> 9) + index_in_cluster); ret = bdrv_co_writev(bs->file, @@ -847,12 +824,24 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } - ret = qcow2_alloc_cluster_link_l2(bs, &l2meta); - if (ret < 0) { - goto fail; - } + if (l2meta != NULL) { + ret = qcow2_alloc_cluster_link_l2(bs, l2meta); + if (ret < 0) { + goto fail; + } + + /* Take the request off the list of running requests */ + if (l2meta->nb_clusters != 0) { + QLIST_REMOVE(l2meta, next_in_flight); + } + + qemu_co_mutex_unlock(&s->lock); + qemu_co_queue_restart_all(&l2meta->dependent_requests); + qemu_co_mutex_lock(&s->lock); - run_dependent_requests(s, &l2meta); + g_free(l2meta); + l2meta = NULL; + } remaining_sectors -= cur_nr_sectors; sector_num += cur_nr_sectors; @@ -862,10 +851,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, ret = 0; fail: - run_dependent_requests(s, &l2meta); - qemu_co_mutex_unlock(&s->lock); + if (l2meta != NULL) { + if (l2meta->nb_clusters != 0) { + QLIST_REMOVE(l2meta, next_in_flight); + } + qemu_co_queue_restart_all(&l2meta->dependent_requests); + g_free(l2meta); + } + qemu_iovec_destroy(&hd_qiov); qemu_vfree(cluster_data); trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); @@ -1128,31 +1123,33 @@ static int preallocate(BlockDriverState *bs) { uint64_t nb_sectors; uint64_t offset; + uint64_t host_offset = 0; int num; int ret; - QCowL2Meta meta; + QCowL2Meta *meta; nb_sectors = bdrv_getlength(bs) >> 9; offset = 0; - qemu_co_queue_init(&meta.dependent_requests); - meta.cluster_offset = 0; while (nb_sectors) { num = MIN(nb_sectors, INT_MAX >> 9); - ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta); + ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, + &host_offset, &meta); if (ret < 0) { return ret; } - ret = qcow2_alloc_cluster_link_l2(bs, &meta); + ret = qcow2_alloc_cluster_link_l2(bs, meta); if (ret < 0) { - qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters); + qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters); return ret; } /* There are no dependent requests, but we need to remove our request * from the list of in-flight requests */ - run_dependent_requests(bs->opaque, &meta); + if (meta != NULL) { + QLIST_REMOVE(meta, next_in_flight); + } /* TODO Preallocate data if requested */ @@ -1165,10 +1162,10 @@ static int preallocate(BlockDriverState *bs) * all of the allocated clusters (otherwise we get failing reads after * EOF). Extend the image to the last allocated sector. */ - if (meta.cluster_offset != 0) { + if (host_offset != 0) { uint8_t buf[512]; memset(buf, 0, 512); - ret = bdrv_write(bs->file, (meta.cluster_offset >> 9) + num - 1, buf, 1); + ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1); if (ret < 0) { return ret; } diff --git a/block/qcow2.h b/block/qcow2.h index b4eb65470e..a60fcb429a 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -196,17 +196,56 @@ typedef struct QCowCreateState { struct QCowAIOCB; -/* XXX This could be private for qcow2-cluster.c */ +typedef struct Qcow2COWRegion { + /** + * Offset of the COW region in bytes from the start of the first cluster + * touched by the request. + */ + uint64_t offset; + + /** Number of sectors to copy */ + int nb_sectors; +} Qcow2COWRegion; + +/** + * Describes an in-flight (part of a) write request that writes to clusters + * that are not referenced in their L2 table yet. + */ typedef struct QCowL2Meta { + /** Guest offset of the first newly allocated cluster */ uint64_t offset; - uint64_t cluster_offset; + + /** Host offset of the first newly allocated cluster */ uint64_t alloc_offset; - int n_start; + + /** + * Number of sectors from the start of the first allocated cluster to + * the end of the (possibly shortened) request + */ int nb_available; + + /** Number of newly allocated clusters */ int nb_clusters; + + /** + * Requests that overlap with this allocation and wait to be restarted + * when the allocating request has completed. + */ CoQueue dependent_requests; + /** + * The COW Region between the start of the first allocated cluster and the + * area the guest actually writes to. + */ + Qcow2COWRegion cow_start; + + /** + * The COW Region between the area the guest actually writes to and the + * end of the last allocated cluster. + */ + Qcow2COWRegion cow_end; + QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; @@ -264,6 +303,8 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s) /* qcow2.c functions */ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); + +int qcow2_mark_dirty(BlockDriverState *bs); int qcow2_update_header(BlockDriverState *bs); /* qcow2-refcount.c functions */ @@ -297,7 +338,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int *num, uint64_t *cluster_offset); int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m); + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m); uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset, int compressed_size); diff --git a/block/raw-posix.c b/block/raw-posix.c index 550c81f22b..abfedbea73 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -708,22 +708,6 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, return thread_pool_submit_aio(aio_worker, acb, cb, opaque); } -static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, - unsigned long int req, void *buf, - BlockDriverCompletionFunc *cb, void *opaque) -{ - RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); - - acb->bs = bs; - acb->aio_type = QEMU_AIO_IOCTL; - acb->aio_fildes = fd; - acb->aio_offset = 0; - acb->aio_ioctl_buf = buf; - acb->aio_ioctl_cmd = req; - - return thread_pool_submit_aio(aio_worker, acb, cb, opaque); -} - static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int type) @@ -1346,10 +1330,19 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; + RawPosixAIOData *acb; if (fd_open(bs) < 0) return NULL; - return paio_ioctl(bs, s->fd, req, buf, cb, opaque); + + acb = g_slice_new(RawPosixAIOData); + acb->bs = bs; + acb->aio_type = QEMU_AIO_IOCTL; + acb->aio_fildes = s->fd; + acb->aio_offset = 0; + acb->aio_ioctl_buf = buf; + acb->aio_ioctl_cmd = req; + return thread_pool_submit_aio(aio_worker, acb, cb, opaque); } #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) diff --git a/block/raw-win32.c b/block/raw-win32.c index 0c05c58c5a..ce207a3109 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -303,13 +303,24 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset) { BDRVRawState *s = bs->opaque; LONG low, high; + DWORD dwPtrLow; low = offset; high = offset >> 32; - if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN)) - return -EIO; - if (!SetEndOfFile(s->hfile)) + + /* + * An error has occurred if the return value is INVALID_SET_FILE_POINTER + * and GetLastError doesn't return NO_ERROR. + */ + dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN); + if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) { + fprintf(stderr, "SetFilePointer error: %d\n", GetLastError()); + return -EIO; + } + if (SetEndOfFile(s->hfile) == 0) { + fprintf(stderr, "SetEndOfFile error: %d\n", GetLastError()); return -EIO; + } return 0; } diff --git a/block/rbd.c b/block/rbd.c index f3becc7a8b..737bab16cc 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -77,6 +77,7 @@ typedef struct RBDAIOCB { int error; struct BDRVRBDState *s; int cancelled; + int status; } RBDAIOCB; typedef struct RADOSCB { @@ -376,12 +377,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) RBDAIOCB *acb = rcb->acb; int64_t r; - if (acb->cancelled) { - qemu_vfree(acb->bounce); - qemu_aio_release(acb); - goto done; - } - r = rcb->ret; if (acb->cmd == RBD_AIO_WRITE || @@ -409,7 +404,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) /* Note that acb->bh can be NULL in case where the aio was cancelled */ acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb); qemu_bh_schedule(acb->bh); -done: g_free(rcb); } @@ -568,6 +562,12 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb) { RBDAIOCB *acb = (RBDAIOCB *) blockacb; acb->cancelled = 1; + + while (acb->status == -EINPROGRESS) { + qemu_aio_wait(); + } + + qemu_aio_release(acb); } static const AIOCBInfo rbd_aiocb_info = { @@ -639,8 +639,11 @@ static void rbd_aio_bh_cb(void *opaque) acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); qemu_bh_delete(acb->bh); acb->bh = NULL; + acb->status = 0; - qemu_aio_release(acb); + if (!acb->cancelled) { + qemu_aio_release(acb); + } } static int rbd_aio_discard_wrapper(rbd_image_t image, @@ -685,6 +688,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, acb->s = s; acb->cancelled = 0; acb->bh = NULL; + acb->status = -EINPROGRESS; if (cmd == RBD_AIO_WRITE) { qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); diff --git a/block/stream.c b/block/stream.c index 0c0fc7a13b..0dcd286035 100644 --- a/block/stream.c +++ b/block/stream.c @@ -108,7 +108,7 @@ static void coroutine_fn stream_run(void *opaque) wait: /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that qemu_aio_flush() returns. + * with no pending I/O here so that bdrv_drain_all() returns. */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { diff --git a/block/vpc.c b/block/vpc.c index b6bf52f140..566e9a3b37 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -26,6 +26,9 @@ #include "block_int.h" #include "module.h" #include "migration.h" +#if defined(CONFIG_UUID) +#include <uuid/uuid.h> +#endif /**************************************************************/ @@ -198,7 +201,8 @@ static int vpc_open(BlockDriverState *bs, int flags) bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; - if (bs->total_sectors >= 65535 * 16 * 255) { + /* Allow a maximum disk size of approximately 2 TB */ + if (bs->total_sectors >= 65535LL * 255 * 255) { err = -EFBIG; goto fail; } @@ -524,19 +528,27 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num, * Note that the geometry doesn't always exactly match total_sectors but * may round it down. * - * Returns 0 on success, -EFBIG if the size is larger than 127 GB + * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override + * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB) + * and instead allow up to 255 heads. */ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, uint8_t* heads, uint8_t* secs_per_cyl) { uint32_t cyls_times_heads; - if (total_sectors > 65535 * 16 * 255) + /* Allow a maximum disk size of approximately 2 TB */ + if (total_sectors > 65535LL * 255 * 255) { return -EFBIG; + } if (total_sectors > 65535 * 16 * 63) { *secs_per_cyl = 255; - *heads = 16; + if (total_sectors > 65535 * 16 * 255) { + *heads = 255; + } else { + *heads = 16; + } cyls_times_heads = total_sectors / *secs_per_cyl; } else { *secs_per_cyl = 17; @@ -739,7 +751,9 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options) footer->type = be32_to_cpu(disk_type); - /* TODO uuid is missing */ +#if defined(CONFIG_UUID) + uuid_generate(footer->uuid); +#endif footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE)); |