aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blkdebug.c128
-rw-r--r--block/commit.c2
-rw-r--r--block/mirror.c2
-rw-r--r--block/qcow2-cluster.c191
-rw-r--r--block/qcow2.c87
-rw-r--r--block/qcow2.h49
-rw-r--r--block/raw-posix.c27
-rw-r--r--block/raw-win32.c17
-rw-r--r--block/rbd.c20
-rw-r--r--block/stream.c2
-rw-r--r--block/vpc.c24
11 files changed, 382 insertions, 167 deletions
diff --git a/block/blkdebug.c b/block/blkdebug.c
index d61ece86a9..294e983306 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -29,8 +29,10 @@
typedef struct BDRVBlkdebugState {
int state;
int new_state;
+
QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
+ QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
} BDRVBlkdebugState;
typedef struct BlkdebugAIOCB {
@@ -39,6 +41,12 @@ typedef struct BlkdebugAIOCB {
int ret;
} BlkdebugAIOCB;
+typedef struct BlkdebugSuspendedReq {
+ Coroutine *co;
+ char *tag;
+ QLIST_ENTRY(BlkdebugSuspendedReq) next;
+} BlkdebugSuspendedReq;
+
static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb);
static const AIOCBInfo blkdebug_aiocb_info = {
@@ -49,6 +57,7 @@ static const AIOCBInfo blkdebug_aiocb_info = {
enum {
ACTION_INJECT_ERROR,
ACTION_SET_STATE,
+ ACTION_SUSPEND,
};
typedef struct BlkdebugRule {
@@ -65,6 +74,9 @@ typedef struct BlkdebugRule {
struct {
int new_state;
} set_state;
+ struct {
+ char *tag;
+ } suspend;
} options;
QLIST_ENTRY(BlkdebugRule) next;
QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
@@ -226,6 +238,11 @@ static int add_rule(QemuOpts *opts, void *opaque)
rule->options.set_state.new_state =
qemu_opt_get_number(opts, "new_state", 0);
break;
+
+ case ACTION_SUSPEND:
+ rule->options.suspend.tag =
+ g_strdup(qemu_opt_get(opts, "tag"));
+ break;
};
/* Add the rule */
@@ -234,12 +251,32 @@ static int add_rule(QemuOpts *opts, void *opaque)
return 0;
}
+static void remove_rule(BlkdebugRule *rule)
+{
+ switch (rule->action) {
+ case ACTION_INJECT_ERROR:
+ case ACTION_SET_STATE:
+ break;
+ case ACTION_SUSPEND:
+ g_free(rule->options.suspend.tag);
+ break;
+ }
+
+ QLIST_REMOVE(rule, next);
+ g_free(rule);
+}
+
static int read_config(BDRVBlkdebugState *s, const char *filename)
{
FILE *f;
int ret;
struct add_rule_data d;
+ /* Allow usage without config file */
+ if (!*filename) {
+ return 0;
+ }
+
f = fopen(filename, "r");
if (f == NULL) {
return -errno;
@@ -389,6 +426,7 @@ static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
}
+
static void blkdebug_close(BlockDriverState *bs)
{
BDRVBlkdebugState *s = bs->opaque;
@@ -397,12 +435,32 @@ static void blkdebug_close(BlockDriverState *bs)
for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
- QLIST_REMOVE(rule, next);
- g_free(rule);
+ remove_rule(rule);
}
}
}
+static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
+{
+ BDRVBlkdebugState *s = bs->opaque;
+ BlkdebugSuspendedReq r;
+
+ r = (BlkdebugSuspendedReq) {
+ .co = qemu_coroutine_self(),
+ .tag = g_strdup(rule->options.suspend.tag),
+ };
+
+ remove_rule(rule);
+ QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
+
+ printf("blkdebug: Suspended request '%s'\n", r.tag);
+ qemu_coroutine_yield();
+ printf("blkdebug: Resuming request '%s'\n", r.tag);
+
+ QLIST_REMOVE(&r, next);
+ g_free(r.tag);
+}
+
static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
bool injected)
{
@@ -426,6 +484,10 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
case ACTION_SET_STATE:
s->new_state = rule->options.set_state.new_state;
break;
+
+ case ACTION_SUSPEND:
+ suspend_request(bs, rule);
+ break;
}
return injected;
}
@@ -433,19 +495,72 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
{
BDRVBlkdebugState *s = bs->opaque;
- struct BlkdebugRule *rule;
+ struct BlkdebugRule *rule, *next;
bool injected;
assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);
injected = false;
s->new_state = s->state;
- QLIST_FOREACH(rule, &s->rules[event], next) {
+ QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
injected = process_rule(bs, rule, injected);
}
s->state = s->new_state;
}
+static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag)
+{
+ BDRVBlkdebugState *s = bs->opaque;
+ struct BlkdebugRule *rule;
+ BlkDebugEvent blkdebug_event;
+
+ if (get_event_by_name(event, &blkdebug_event) < 0) {
+ return -ENOENT;
+ }
+
+
+ rule = g_malloc(sizeof(*rule));
+ *rule = (struct BlkdebugRule) {
+ .event = blkdebug_event,
+ .action = ACTION_SUSPEND,
+ .state = 0,
+ .options.suspend.tag = g_strdup(tag),
+ };
+
+ QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
+
+ return 0;
+}
+
+static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
+{
+ BDRVBlkdebugState *s = bs->opaque;
+ BlkdebugSuspendedReq *r;
+
+ QLIST_FOREACH(r, &s->suspended_reqs, next) {
+ if (!strcmp(r->tag, tag)) {
+ qemu_coroutine_enter(r->co, NULL);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+
+static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
+{
+ BDRVBlkdebugState *s = bs->opaque;
+ BlkdebugSuspendedReq *r;
+
+ QLIST_FOREACH(r, &s->suspended_reqs, next) {
+ if (!strcmp(r->tag, tag)) {
+ return true;
+ }
+ }
+ return false;
+}
+
static int64_t blkdebug_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file);
@@ -464,7 +579,10 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_aio_readv = blkdebug_aio_readv,
.bdrv_aio_writev = blkdebug_aio_writev,
- .bdrv_debug_event = blkdebug_debug_event,
+ .bdrv_debug_event = blkdebug_debug_event,
+ .bdrv_debug_breakpoint = blkdebug_debug_breakpoint,
+ .bdrv_debug_resume = blkdebug_debug_resume,
+ .bdrv_debug_is_suspended = blkdebug_debug_is_suspended,
};
static void bdrv_blkdebug_init(void)
diff --git a/block/commit.c b/block/commit.c
index fae79582d4..e2bb1e241b 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -103,7 +103,7 @@ static void coroutine_fn commit_run(void *opaque)
wait:
/* Note that even when no rate limit is applied we need to yield
- * with no pending I/O here so that qemu_aio_flush() returns.
+ * with no pending I/O here so that bdrv_drain_all() returns.
*/
block_job_sleep_ns(&s->common, rt_clock, delay_ns);
if (block_job_is_cancelled(&s->common)) {
diff --git a/block/mirror.c b/block/mirror.c
index d6618a4b34..b1f5d4fa22 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -205,7 +205,7 @@ static void coroutine_fn mirror_run(void *opaque)
}
/* Note that even when no rate limit is applied we need to yield
- * with no pending I/O here so that qemu_aio_flush() returns.
+ * with no pending I/O here so that bdrv_drain_all() returns.
*/
block_job_sleep_ns(&s->common, rt_clock, delay_ns);
if (block_job_is_cancelled(&s->common)) {
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index e179211c57..468ef1be56 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -615,57 +615,67 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
return cluster_offset;
}
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
+static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
{
BDRVQcowState *s = bs->opaque;
- int i, j = 0, l2_index, ret;
- uint64_t *old_cluster, start_sect, *l2_table;
- uint64_t cluster_offset = m->alloc_offset;
- bool cow = false;
-
- trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
+ int ret;
- if (m->nb_clusters == 0)
+ if (r->nb_sectors == 0) {
return 0;
+ }
- old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+ qemu_co_mutex_unlock(&s->lock);
+ ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
+ r->offset / BDRV_SECTOR_SIZE,
+ r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
+ qemu_co_mutex_lock(&s->lock);
- /* copy content of unmodified sectors */
- start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
- if (m->n_start) {
- cow = true;
- qemu_co_mutex_unlock(&s->lock);
- ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0)
- goto err;
- }
-
- if (m->nb_available & (s->cluster_sectors - 1)) {
- cow = true;
- qemu_co_mutex_unlock(&s->lock);
- ret = copy_sectors(bs, start_sect, cluster_offset, m->nb_available,
- align_offset(m->nb_available, s->cluster_sectors));
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ return ret;
}
/*
- * Update L2 table.
- *
* Before we update the L2 table to actually point to the new cluster, we
* need to be sure that the refcounts have been increased and COW was
* handled.
*/
- if (cow) {
- qcow2_cache_depends_on_flush(s->l2_table_cache);
+ qcow2_cache_depends_on_flush(s->l2_table_cache);
+
+ return 0;
+}
+
+int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, j = 0, l2_index, ret;
+ uint64_t *old_cluster, *l2_table;
+ uint64_t cluster_offset = m->alloc_offset;
+
+ trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
+ assert(m->nb_clusters > 0);
+
+ old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+
+ /* copy content of unmodified sectors */
+ ret = perform_cow(bs, m, &m->cow_start);
+ if (ret < 0) {
+ goto err;
+ }
+
+ ret = perform_cow(bs, m, &m->cow_end);
+ if (ret < 0) {
+ goto err;
}
+ /* Update L2 table. */
+ if (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS) {
+ qcow2_mark_dirty(bs);
+ }
if (qcow2_need_accurate_refcounts(s)) {
qcow2_cache_set_dependency(bs, s->l2_table_cache,
s->refcount_block_cache);
}
+
ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
if (ret < 0) {
goto err;
@@ -743,38 +753,16 @@ out:
}
/*
- * Allocates new clusters for the given guest_offset.
- *
- * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
- * contain the number of clusters that have been allocated and are contiguous
- * in the image file.
- *
- * If *host_offset is non-zero, it specifies the offset in the image file at
- * which the new clusters must start. *nb_clusters can be 0 on return in this
- * case if the cluster at host_offset is already in use. If *host_offset is
- * zero, the clusters can be allocated anywhere in the image file.
- *
- * *host_offset is updated to contain the offset into the image file at which
- * the first allocated cluster starts.
- *
- * Return 0 on success and -errno in error cases. -EAGAIN means that the
- * function has been waiting for another request and the allocation must be
- * restarted, but the whole request should not be failed.
+ * Check if there already is an AIO write request in flight which allocates
+ * the same cluster. In this case we need to wait until the previous
+ * request has completed and updated the L2 table accordingly.
*/
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, unsigned int *nb_clusters)
+static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
+ unsigned int *nb_clusters)
{
BDRVQcowState *s = bs->opaque;
QCowL2Meta *old_alloc;
- trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
- *host_offset, *nb_clusters);
-
- /*
- * Check if there already is an AIO write request in flight which allocates
- * the same cluster. In this case we need to wait until the previous
- * request has completed and updated the L2 table accordingly.
- */
QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
uint64_t start = guest_offset >> s->cluster_bits;
@@ -807,6 +795,42 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
abort();
}
+ return 0;
+}
+
+/*
+ * Allocates new clusters for the given guest_offset.
+ *
+ * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
+ * contain the number of clusters that have been allocated and are contiguous
+ * in the image file.
+ *
+ * If *host_offset is non-zero, it specifies the offset in the image file at
+ * which the new clusters must start. *nb_clusters can be 0 on return in this
+ * case if the cluster at host_offset is already in use. If *host_offset is
+ * zero, the clusters can be allocated anywhere in the image file.
+ *
+ * *host_offset is updated to contain the offset into the image file at which
+ * the first allocated cluster starts.
+ *
+ * Return 0 on success and -errno in error cases. -EAGAIN means that the
+ * function has been waiting for another request and the allocation must be
+ * restarted, but the whole request should not be failed.
+ */
+static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, unsigned int *nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
+ *host_offset, *nb_clusters);
+
+ ret = handle_dependencies(bs, guest_offset, nb_clusters);
+ if (ret < 0) {
+ return ret;
+ }
+
/* Allocate new clusters */
trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
if (*host_offset == 0) {
@@ -818,7 +842,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
*host_offset = cluster_offset;
return 0;
} else {
- int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
+ ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
if (ret < 0) {
return ret;
}
@@ -847,7 +871,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
* Return 0 on success and -errno in error cases
*/
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int n_start, int n_end, int *num, QCowL2Meta *m)
+ int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret, sectors;
@@ -919,12 +943,6 @@ again:
}
/* If there is something left to allocate, do that now */
- *m = (QCowL2Meta) {
- .cluster_offset = cluster_offset,
- .nb_clusters = 0,
- };
- qemu_co_queue_init(&m->dependent_requests);
-
if (nb_clusters > 0) {
uint64_t alloc_offset;
uint64_t alloc_cluster_offset;
@@ -957,22 +975,40 @@ again:
*
* avail_sectors: Number of sectors from the start of the first
* newly allocated to the end of the last newly allocated cluster.
+ *
+ * nb_sectors: The number of sectors from the start of the first
+ * newly allocated cluster to the end of the aread that the write
+ * request actually writes to (excluding COW at the end)
*/
int requested_sectors = n_end - keep_clusters * s->cluster_sectors;
int avail_sectors = nb_clusters
<< (s->cluster_bits - BDRV_SECTOR_BITS);
+ int alloc_n_start = keep_clusters == 0 ? n_start : 0;
+ int nb_sectors = MIN(requested_sectors, avail_sectors);
+
+ if (keep_clusters == 0) {
+ cluster_offset = alloc_cluster_offset;
+ }
+
+ *m = g_malloc0(sizeof(**m));
- *m = (QCowL2Meta) {
- .cluster_offset = keep_clusters == 0 ?
- alloc_cluster_offset : cluster_offset,
+ **m = (QCowL2Meta) {
.alloc_offset = alloc_cluster_offset,
- .offset = alloc_offset,
- .n_start = keep_clusters == 0 ? n_start : 0,
+ .offset = alloc_offset & ~(s->cluster_size - 1),
.nb_clusters = nb_clusters,
- .nb_available = MIN(requested_sectors, avail_sectors),
+ .nb_available = nb_sectors,
+
+ .cow_start = {
+ .offset = 0,
+ .nb_sectors = alloc_n_start,
+ },
+ .cow_end = {
+ .offset = nb_sectors * BDRV_SECTOR_SIZE,
+ .nb_sectors = avail_sectors - nb_sectors,
+ },
};
- qemu_co_queue_init(&m->dependent_requests);
- QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+ qemu_co_queue_init(&(*m)->dependent_requests);
+ QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
}
}
@@ -984,12 +1020,13 @@ again:
assert(sectors > n_start);
*num = sectors - n_start;
+ *host_offset = cluster_offset;
return 0;
fail:
- if (m->nb_clusters > 0) {
- QLIST_REMOVE(m, next_in_flight);
+ if (*m && (*m)->nb_clusters > 0) {
+ QLIST_REMOVE(*m, next_in_flight);
}
return ret;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index c1ff31f482..8520bda21a 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -222,7 +222,7 @@ static void report_unsupported_feature(BlockDriverState *bs,
* updated successfully. Therefore it is not required to check the return
* value of this function.
*/
-static int qcow2_mark_dirty(BlockDriverState *bs)
+int qcow2_mark_dirty(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
uint64_t val;
@@ -745,21 +745,6 @@ fail:
return ret;
}
-static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m)
-{
- /* Take the request off the list of running requests */
- if (m->nb_clusters != 0) {
- QLIST_REMOVE(m, next_in_flight);
- }
-
- /* Restart all dependent requests */
- if (!qemu_co_queue_empty(&m->dependent_requests)) {
- qemu_co_mutex_unlock(&s->lock);
- qemu_co_queue_restart_all(&m->dependent_requests);
- qemu_co_mutex_lock(&s->lock);
- }
-}
-
static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
int64_t sector_num,
int remaining_sectors,
@@ -774,15 +759,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
QEMUIOVector hd_qiov;
uint64_t bytes_done = 0;
uint8_t *cluster_data = NULL;
- QCowL2Meta l2meta = {
- .nb_clusters = 0,
- };
+ QCowL2Meta *l2meta;
trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
remaining_sectors);
- qemu_co_queue_init(&l2meta.dependent_requests);
-
qemu_iovec_init(&hd_qiov, qiov->niov);
s->cluster_cache_offset = -1; /* disable compressed cache */
@@ -791,6 +772,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
while (remaining_sectors != 0) {
+ l2meta = NULL;
+
trace_qcow2_writev_start_part(qemu_coroutine_self());
index_in_cluster = sector_num & (s->cluster_sectors - 1);
n_end = index_in_cluster + remaining_sectors;
@@ -800,17 +783,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
}
ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
- index_in_cluster, n_end, &cur_nr_sectors, &l2meta);
+ index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
if (ret < 0) {
goto fail;
}
- if (l2meta.nb_clusters > 0 &&
- (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) {
- qcow2_mark_dirty(bs);
- }
-
- cluster_offset = l2meta.cluster_offset;
assert((cluster_offset & 511) == 0);
qemu_iovec_reset(&hd_qiov);
@@ -835,8 +812,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
cur_nr_sectors * 512);
}
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
qemu_co_mutex_unlock(&s->lock);
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
(cluster_offset >> 9) + index_in_cluster);
ret = bdrv_co_writev(bs->file,
@@ -847,12 +824,24 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
goto fail;
}
- ret = qcow2_alloc_cluster_link_l2(bs, &l2meta);
- if (ret < 0) {
- goto fail;
- }
+ if (l2meta != NULL) {
+ ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Take the request off the list of running requests */
+ if (l2meta->nb_clusters != 0) {
+ QLIST_REMOVE(l2meta, next_in_flight);
+ }
+
+ qemu_co_mutex_unlock(&s->lock);
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
+ qemu_co_mutex_lock(&s->lock);
- run_dependent_requests(s, &l2meta);
+ g_free(l2meta);
+ l2meta = NULL;
+ }
remaining_sectors -= cur_nr_sectors;
sector_num += cur_nr_sectors;
@@ -862,10 +851,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
ret = 0;
fail:
- run_dependent_requests(s, &l2meta);
-
qemu_co_mutex_unlock(&s->lock);
+ if (l2meta != NULL) {
+ if (l2meta->nb_clusters != 0) {
+ QLIST_REMOVE(l2meta, next_in_flight);
+ }
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
+ g_free(l2meta);
+ }
+
qemu_iovec_destroy(&hd_qiov);
qemu_vfree(cluster_data);
trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
@@ -1128,31 +1123,33 @@ static int preallocate(BlockDriverState *bs)
{
uint64_t nb_sectors;
uint64_t offset;
+ uint64_t host_offset = 0;
int num;
int ret;
- QCowL2Meta meta;
+ QCowL2Meta *meta;
nb_sectors = bdrv_getlength(bs) >> 9;
offset = 0;
- qemu_co_queue_init(&meta.dependent_requests);
- meta.cluster_offset = 0;
while (nb_sectors) {
num = MIN(nb_sectors, INT_MAX >> 9);
- ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta);
+ ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
+ &host_offset, &meta);
if (ret < 0) {
return ret;
}
- ret = qcow2_alloc_cluster_link_l2(bs, &meta);
+ ret = qcow2_alloc_cluster_link_l2(bs, meta);
if (ret < 0) {
- qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters);
+ qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters);
return ret;
}
/* There are no dependent requests, but we need to remove our request
* from the list of in-flight requests */
- run_dependent_requests(bs->opaque, &meta);
+ if (meta != NULL) {
+ QLIST_REMOVE(meta, next_in_flight);
+ }
/* TODO Preallocate data if requested */
@@ -1165,10 +1162,10 @@ static int preallocate(BlockDriverState *bs)
* all of the allocated clusters (otherwise we get failing reads after
* EOF). Extend the image to the last allocated sector.
*/
- if (meta.cluster_offset != 0) {
+ if (host_offset != 0) {
uint8_t buf[512];
memset(buf, 0, 512);
- ret = bdrv_write(bs->file, (meta.cluster_offset >> 9) + num - 1, buf, 1);
+ ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
if (ret < 0) {
return ret;
}
diff --git a/block/qcow2.h b/block/qcow2.h
index b4eb65470e..a60fcb429a 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -196,17 +196,56 @@ typedef struct QCowCreateState {
struct QCowAIOCB;
-/* XXX This could be private for qcow2-cluster.c */
+typedef struct Qcow2COWRegion {
+ /**
+ * Offset of the COW region in bytes from the start of the first cluster
+ * touched by the request.
+ */
+ uint64_t offset;
+
+ /** Number of sectors to copy */
+ int nb_sectors;
+} Qcow2COWRegion;
+
+/**
+ * Describes an in-flight (part of a) write request that writes to clusters
+ * that are not referenced in their L2 table yet.
+ */
typedef struct QCowL2Meta
{
+ /** Guest offset of the first newly allocated cluster */
uint64_t offset;
- uint64_t cluster_offset;
+
+ /** Host offset of the first newly allocated cluster */
uint64_t alloc_offset;
- int n_start;
+
+ /**
+ * Number of sectors from the start of the first allocated cluster to
+ * the end of the (possibly shortened) request
+ */
int nb_available;
+
+ /** Number of newly allocated clusters */
int nb_clusters;
+
+ /**
+ * Requests that overlap with this allocation and wait to be restarted
+ * when the allocating request has completed.
+ */
CoQueue dependent_requests;
+ /**
+ * The COW Region between the start of the first allocated cluster and the
+ * area the guest actually writes to.
+ */
+ Qcow2COWRegion cow_start;
+
+ /**
+ * The COW Region between the area the guest actually writes to and the
+ * end of the last allocated cluster.
+ */
+ Qcow2COWRegion cow_end;
+
QLIST_ENTRY(QCowL2Meta) next_in_flight;
} QCowL2Meta;
@@ -264,6 +303,8 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
/* qcow2.c functions */
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, int nb_sectors);
+
+int qcow2_mark_dirty(BlockDriverState *bs);
int qcow2_update_header(BlockDriverState *bs);
/* qcow2-refcount.c functions */
@@ -297,7 +338,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
int *num, uint64_t *cluster_offset);
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int n_start, int n_end, int *num, QCowL2Meta *m);
+ int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
uint64_t offset,
int compressed_size);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 550c81f22b..abfedbea73 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -708,22 +708,6 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
}
-static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
- unsigned long int req, void *buf,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
-
- acb->bs = bs;
- acb->aio_type = QEMU_AIO_IOCTL;
- acb->aio_fildes = fd;
- acb->aio_offset = 0;
- acb->aio_ioctl_buf = buf;
- acb->aio_ioctl_cmd = req;
-
- return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
-}
-
static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int type)
@@ -1346,10 +1330,19 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
+ RawPosixAIOData *acb;
if (fd_open(bs) < 0)
return NULL;
- return paio_ioctl(bs, s->fd, req, buf, cb, opaque);
+
+ acb = g_slice_new(RawPosixAIOData);
+ acb->bs = bs;
+ acb->aio_type = QEMU_AIO_IOCTL;
+ acb->aio_fildes = s->fd;
+ acb->aio_offset = 0;
+ acb->aio_ioctl_buf = buf;
+ acb->aio_ioctl_cmd = req;
+ return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
}
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 0c05c58c5a..ce207a3109 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -303,13 +303,24 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
{
BDRVRawState *s = bs->opaque;
LONG low, high;
+ DWORD dwPtrLow;
low = offset;
high = offset >> 32;
- if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN))
- return -EIO;
- if (!SetEndOfFile(s->hfile))
+
+ /*
+ * An error has occurred if the return value is INVALID_SET_FILE_POINTER
+ * and GetLastError doesn't return NO_ERROR.
+ */
+ dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN);
+ if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
+ fprintf(stderr, "SetFilePointer error: %d\n", GetLastError());
+ return -EIO;
+ }
+ if (SetEndOfFile(s->hfile) == 0) {
+ fprintf(stderr, "SetEndOfFile error: %d\n", GetLastError());
return -EIO;
+ }
return 0;
}
diff --git a/block/rbd.c b/block/rbd.c
index f3becc7a8b..737bab16cc 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -77,6 +77,7 @@ typedef struct RBDAIOCB {
int error;
struct BDRVRBDState *s;
int cancelled;
+ int status;
} RBDAIOCB;
typedef struct RADOSCB {
@@ -376,12 +377,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
RBDAIOCB *acb = rcb->acb;
int64_t r;
- if (acb->cancelled) {
- qemu_vfree(acb->bounce);
- qemu_aio_release(acb);
- goto done;
- }
-
r = rcb->ret;
if (acb->cmd == RBD_AIO_WRITE ||
@@ -409,7 +404,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
/* Note that acb->bh can be NULL in case where the aio was cancelled */
acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
qemu_bh_schedule(acb->bh);
-done:
g_free(rcb);
}
@@ -568,6 +562,12 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
{
RBDAIOCB *acb = (RBDAIOCB *) blockacb;
acb->cancelled = 1;
+
+ while (acb->status == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ qemu_aio_release(acb);
}
static const AIOCBInfo rbd_aiocb_info = {
@@ -639,8 +639,11 @@ static void rbd_aio_bh_cb(void *opaque)
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
qemu_bh_delete(acb->bh);
acb->bh = NULL;
+ acb->status = 0;
- qemu_aio_release(acb);
+ if (!acb->cancelled) {
+ qemu_aio_release(acb);
+ }
}
static int rbd_aio_discard_wrapper(rbd_image_t image,
@@ -685,6 +688,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
acb->s = s;
acb->cancelled = 0;
acb->bh = NULL;
+ acb->status = -EINPROGRESS;
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
diff --git a/block/stream.c b/block/stream.c
index 0c0fc7a13b..0dcd286035 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -108,7 +108,7 @@ static void coroutine_fn stream_run(void *opaque)
wait:
/* Note that even when no rate limit is applied we need to yield
- * with no pending I/O here so that qemu_aio_flush() returns.
+ * with no pending I/O here so that bdrv_drain_all() returns.
*/
block_job_sleep_ns(&s->common, rt_clock, delay_ns);
if (block_job_is_cancelled(&s->common)) {
diff --git a/block/vpc.c b/block/vpc.c
index b6bf52f140..566e9a3b37 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -26,6 +26,9 @@
#include "block_int.h"
#include "module.h"
#include "migration.h"
+#if defined(CONFIG_UUID)
+#include <uuid/uuid.h>
+#endif
/**************************************************************/
@@ -198,7 +201,8 @@ static int vpc_open(BlockDriverState *bs, int flags)
bs->total_sectors = (int64_t)
be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
- if (bs->total_sectors >= 65535 * 16 * 255) {
+ /* Allow a maximum disk size of approximately 2 TB */
+ if (bs->total_sectors >= 65535LL * 255 * 255) {
err = -EFBIG;
goto fail;
}
@@ -524,19 +528,27 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
* Note that the geometry doesn't always exactly match total_sectors but
* may round it down.
*
- * Returns 0 on success, -EFBIG if the size is larger than 127 GB
+ * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override
+ * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
+ * and instead allow up to 255 heads.
*/
static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
uint8_t* heads, uint8_t* secs_per_cyl)
{
uint32_t cyls_times_heads;
- if (total_sectors > 65535 * 16 * 255)
+ /* Allow a maximum disk size of approximately 2 TB */
+ if (total_sectors > 65535LL * 255 * 255) {
return -EFBIG;
+ }
if (total_sectors > 65535 * 16 * 63) {
*secs_per_cyl = 255;
- *heads = 16;
+ if (total_sectors > 65535 * 16 * 255) {
+ *heads = 255;
+ } else {
+ *heads = 16;
+ }
cyls_times_heads = total_sectors / *secs_per_cyl;
} else {
*secs_per_cyl = 17;
@@ -739,7 +751,9 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
footer->type = be32_to_cpu(disk_type);
- /* TODO uuid is missing */
+#if defined(CONFIG_UUID)
+ uuid_generate(footer->uuid);
+#endif
footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));