aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c88
-rw-r--r--block.h21
-rw-r--r--block/blkdebug.c7
-rw-r--r--block/cow.c6
-rw-r--r--block/qcow.c6
-rw-r--r--block/qcow2-cluster.c18
-rw-r--r--block/qcow2.c6
-rw-r--r--block/qed-check.c9
-rw-r--r--block/qed.c33
-rw-r--r--block/qed.h2
-rw-r--r--block/raw.c6
-rw-r--r--block/sheepdog.c144
-rw-r--r--block/stream.c41
-rw-r--r--block/vdi.c429
-rw-r--r--block/vmdk.c4
-rw-r--r--block/vpc.c9
-rw-r--r--block_int.h135
-rw-r--r--blockdev.c12
-rw-r--r--dma-helpers.c21
-rw-r--r--dma.h12
-rw-r--r--hw/ide/core.c60
-rw-r--r--hw/ide/internal.h7
-rw-r--r--hw/ide/macio.c3
-rw-r--r--hw/ide/qdev.c7
-rw-r--r--hw/lsi53c895a.c1
-rw-r--r--hw/qdev-properties.c46
-rw-r--r--hw/qdev.h3
-rw-r--r--linux-aio.c1
-rw-r--r--migration.c1
-rw-r--r--qemu-aio.h21
-rw-r--r--qemu-img.c12
-rw-r--r--qemu-io.c10
-rw-r--r--qerror.c7
-rw-r--r--qerror.h4
-rwxr-xr-xscripts/tracetool4
-rwxr-xr-xtests/qemu-iotests/0094
-rwxr-xr-xtests/qemu-iotests/0106
-rwxr-xr-xtests/qemu-iotests/0112
-rwxr-xr-xtests/qemu-iotests/03172
-rw-r--r--tests/qemu-iotests/031.out76
-rw-r--r--tests/qemu-iotests/common.rc9
-rw-r--r--tests/qemu-iotests/group1
-rwxr-xr-xtests/qemu-iotests/qcow2.py207
-rw-r--r--trace-events2
44 files changed, 1081 insertions, 494 deletions
diff --git a/block.c b/block.c
index b88ee90817..c0c90f061b 100644
--- a/block.c
+++ b/block.c
@@ -813,6 +813,9 @@ unlink_and_fail:
void bdrv_close(BlockDriverState *bs)
{
if (bs->drv) {
+ if (bs->job) {
+ block_job_cancel_sync(bs->job);
+ }
if (bs == bs_snapshots) {
bs_snapshots = NULL;
}
@@ -889,14 +892,16 @@ void bdrv_make_anon(BlockDriverState *bs)
* This will modify the BlockDriverState fields, and swap contents
* between bs_new and bs_top. Both bs_new and bs_top are modified.
*
+ * bs_new is required to be anonymous.
+ *
* This function does not create any image files.
*/
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
{
BlockDriverState tmp;
- /* the new bs must not be in bdrv_states */
- bdrv_make_anon(bs_new);
+ /* bs_new must be anonymous */
+ assert(bs_new->device_name[0] == '\0');
tmp = *bs_new;
@@ -941,11 +946,18 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
* swapping bs_new and bs_top contents. */
tmp.backing_hd = bs_new;
pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
+ bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
/* swap contents of the fixed new bs and the current top */
*bs_new = *bs_top;
*bs_top = tmp;
+ /* device_name[] was carried over from the old bs_top. bs_new
+ * shouldn't be in bdrv_states, so we need to make device_name[]
+ * reflect the anonymity of bs_new
+ */
+ bs_new->device_name[0] = '\0';
+
/* clear the copied fields in the new backing file */
bdrv_detach_dev(bs_new, bs_new->dev);
@@ -966,6 +978,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
void bdrv_delete(BlockDriverState *bs)
{
assert(!bs->dev);
+ assert(!bs->job);
+ assert(!bs->in_use);
/* remove from list, if necessary */
bdrv_make_anon(bs);
@@ -1463,6 +1477,17 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
qemu_iovec_init_external(&qiov, &iov, 1);
+ /**
+ * In sync call context, when the vcpu is blocked, this throttling timer
+ * will not fire; so the I/O throttling function has to be disabled here
+ * if it has been enabled.
+ */
+ if (bs->io_limits_enabled) {
+ fprintf(stderr, "Disabling I/O throttling on '%s' due "
+ "to synchronous I/O.\n", bdrv_get_device_name(bs));
+ bdrv_io_limits_disable(bs);
+ }
+
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_rw_co_entry(&rwco);
@@ -1969,10 +1994,19 @@ static int guess_disk_lchs(BlockDriverState *bs,
struct partition *p;
uint32_t nr_sects;
uint64_t nb_sectors;
+ bool enabled;
bdrv_get_geometry(bs, &nb_sectors);
+ /**
+ * The function will be invoked during startup not only in sync I/O mode,
+ * but also in async I/O mode. So the I/O throttling function has to
+ * be disabled temporarily here, not permanently.
+ */
+ enabled = bs->io_limits_enabled;
+ bs->io_limits_enabled = false;
ret = bdrv_read(bs, 0, buf, 1);
+ bs->io_limits_enabled = enabled;
if (ret < 0)
return -1;
/* test msdos magic */
@@ -2331,9 +2365,7 @@ void bdrv_flush_all(void)
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, list) {
- if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
- bdrv_flush(bs);
- }
+ bdrv_flush(bs);
}
}
@@ -3520,7 +3552,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
{
int ret;
- if (!bs->drv) {
+ if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
return 0;
}
@@ -3538,7 +3570,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
}
if (bs->drv->bdrv_co_flush_to_disk) {
- return bs->drv->bdrv_co_flush_to_disk(bs);
+ ret = bs->drv->bdrv_co_flush_to_disk(bs);
} else if (bs->drv->bdrv_aio_flush) {
BlockDriverAIOCB *acb;
CoroutineIOCompletion co = {
@@ -3547,10 +3579,10 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
if (acb == NULL) {
- return -EIO;
+ ret = -EIO;
} else {
qemu_coroutine_yield();
- return co.ret;
+ ret = co.ret;
}
} else {
/*
@@ -3564,8 +3596,16 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
*
* Let's hope the user knows what he's doing.
*/
- return 0;
+ ret = 0;
+ }
+ if (ret < 0) {
+ return ret;
}
+
+ /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
+ * in the case of cache=unsafe, so there are no useless flushes.
+ */
+ return bdrv_co_flush(bs->file);
}
void bdrv_invalidate_cache(BlockDriverState *bs)
@@ -3584,6 +3624,15 @@ void bdrv_invalidate_cache_all(void)
}
}
+void bdrv_clear_incoming_migration_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
+ }
+}
+
int bdrv_flush(BlockDriverState *bs)
{
Coroutine *co;
@@ -4054,10 +4103,16 @@ void block_job_complete(BlockJob *job, int ret)
int block_job_set_speed(BlockJob *job, int64_t value)
{
+ int rc;
+
if (!job->job_type->set_speed) {
return -ENOTSUP;
}
- return job->job_type->set_speed(job, value);
+ rc = job->job_type->set_speed(job, value);
+ if (rc == 0) {
+ job->speed = value;
+ }
+ return rc;
}
void block_job_cancel(BlockJob *job)
@@ -4069,3 +4124,14 @@ bool block_job_is_cancelled(BlockJob *job)
{
return job->cancelled;
}
+
+void block_job_cancel_sync(BlockJob *job)
+{
+ BlockDriverState *bs = job->bs;
+
+ assert(bs->job == job);
+ block_job_cancel(job);
+ while (bs->job != NULL && bs->job->busy) {
+ qemu_aio_wait();
+ }
+}
diff --git a/block.h b/block.h
index 415bb17c73..f163e54b5f 100644
--- a/block.h
+++ b/block.h
@@ -15,8 +15,15 @@ typedef struct BlockDriverInfo {
int cluster_size;
/* offset at which the VM state can be saved (0 if not possible) */
int64_t vm_state_offset;
+ bool is_dirty;
} BlockDriverInfo;
+typedef struct BlockFragInfo {
+ uint64_t allocated_clusters;
+ uint64_t total_clusters;
+ uint64_t fragmented_clusters;
+} BlockFragInfo;
+
typedef struct QEMUSnapshotInfo {
char id_str[128]; /* unique snapshot id */
/* the following fields are informative. They are not needed for
@@ -71,6 +78,7 @@ typedef struct BlockDevOps {
#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
+#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
@@ -175,13 +183,12 @@ typedef struct BdrvCheckResult {
int corruptions;
int leaks;
int check_errors;
+ BlockFragInfo bfi;
} BdrvCheckResult;
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res);
/* async block I/O */
-typedef struct BlockDriverAIOCB BlockDriverAIOCB;
-typedef void BlockDriverCompletionFunc(void *opaque, int ret);
typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
int sector_num);
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
@@ -222,6 +229,8 @@ BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
void bdrv_invalidate_cache(BlockDriverState *bs);
void bdrv_invalidate_cache_all(void);
+void bdrv_clear_incoming_migration_all(void);
+
/* Ensure contents are flushed to disk. */
int bdrv_flush(BlockDriverState *bs);
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
@@ -437,10 +446,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
#define DEFINE_BLOCK_PROPERTIES(_state, _conf) \
DEFINE_PROP_DRIVE("drive", _state, _conf.bs), \
- DEFINE_PROP_UINT16("logical_block_size", _state, \
- _conf.logical_block_size, 512), \
- DEFINE_PROP_UINT16("physical_block_size", _state, \
- _conf.physical_block_size, 512), \
+ DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \
+ _conf.logical_block_size, 512), \
+ DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \
+ _conf.physical_block_size, 512), \
DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0), \
DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \
DEFINE_PROP_INT32("bootindex", _state, _conf.bootindex, -1), \
diff --git a/block/blkdebug.c b/block/blkdebug.c
index a251802ad4..e56e37da51 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -397,12 +397,6 @@ static void blkdebug_close(BlockDriverState *bs)
}
}
-static BlockDriverAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_flush(bs->file, cb, opaque);
-}
-
static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
BlkdebugVars *old_vars)
{
@@ -452,7 +446,6 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_aio_readv = blkdebug_aio_readv,
.bdrv_aio_writev = blkdebug_aio_writev,
- .bdrv_aio_flush = blkdebug_aio_flush,
.bdrv_debug_event = blkdebug_debug_event,
};
diff --git a/block/cow.c b/block/cow.c
index bb5927c6aa..8d3c9f873c 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -318,11 +318,6 @@ exit:
return ret;
}
-static coroutine_fn int cow_co_flush(BlockDriverState *bs)
-{
- return bdrv_co_flush(bs->file);
-}
-
static QEMUOptionParameter cow_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@@ -348,7 +343,6 @@ static BlockDriver bdrv_cow = {
.bdrv_read = cow_co_read,
.bdrv_write = cow_co_write,
- .bdrv_co_flush_to_disk = cow_co_flush,
.bdrv_co_is_allocated = cow_co_is_allocated,
.create_options = cow_create_options,
diff --git a/block/qcow.c b/block/qcow.c
index b1cfe1f696..35dff497ae 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -835,11 +835,6 @@ fail:
return ret;
}
-static coroutine_fn int qcow_co_flush(BlockDriverState *bs)
-{
- return bdrv_co_flush(bs->file);
-}
-
static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BDRVQcowState *s = bs->opaque;
@@ -877,7 +872,6 @@ static BlockDriver bdrv_qcow = {
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,
- .bdrv_co_flush_to_disk = qcow_co_flush,
.bdrv_co_is_allocated = qcow_co_is_allocated,
.bdrv_set_key = qcow_set_key,
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index e0fb90792f..cbd224dc46 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -466,7 +466,6 @@ out:
*/
static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
uint64_t **new_l2_table,
- uint64_t *new_l2_offset,
int *new_l2_index)
{
BDRVQcowState *s = bs->opaque;
@@ -514,7 +513,6 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
*new_l2_table = l2_table;
- *new_l2_offset = l2_offset;
*new_l2_index = l2_index;
return 0;
@@ -539,11 +537,11 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret;
- uint64_t l2_offset, *l2_table;
+ uint64_t *l2_table;
int64_t cluster_offset;
int nb_csectors;
- ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
if (ret < 0) {
return 0;
}
@@ -588,7 +586,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
{
BDRVQcowState *s = bs->opaque;
int i, j = 0, l2_index, ret;
- uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
+ uint64_t *old_cluster, start_sect, *l2_table;
uint64_t cluster_offset = m->alloc_offset;
bool cow = false;
@@ -633,7 +631,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
}
qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
- ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index);
+ ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
if (ret < 0) {
goto err;
}
@@ -817,7 +815,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret, sectors;
- uint64_t l2_offset, *l2_table;
+ uint64_t *l2_table;
unsigned int nb_clusters, keep_clusters;
uint64_t cluster_offset;
@@ -825,7 +823,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
n_start, n_end);
/* Find L2 entry for the first involved cluster */
- ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
if (ret < 0) {
return ret;
}
@@ -1000,12 +998,12 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
unsigned int nb_clusters)
{
BDRVQcowState *s = bs->opaque;
- uint64_t l2_offset, *l2_table;
+ uint64_t *l2_table;
int l2_index;
int ret;
int i;
- ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
if (ret < 0) {
return ret;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 7aece65406..70d3141dd1 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1253,11 +1253,6 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
return 0;
}
-static coroutine_fn int qcow2_co_flush_to_disk(BlockDriverState *bs)
-{
- return bdrv_co_flush(bs->file);
-}
-
static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
{
return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
@@ -1377,7 +1372,6 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_co_readv = qcow2_co_readv,
.bdrv_co_writev = qcow2_co_writev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
- .bdrv_co_flush_to_disk = qcow2_co_flush_to_disk,
.bdrv_co_discard = qcow2_co_discard,
.bdrv_truncate = qcow2_truncate,
diff --git a/block/qed-check.c b/block/qed-check.c
index e4a49ce72c..94327ff5b3 100644
--- a/block/qed-check.c
+++ b/block/qed-check.c
@@ -68,6 +68,7 @@ static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
{
BDRVQEDState *s = check->s;
unsigned int i, num_invalid = 0;
+ uint64_t last_offset = 0;
for (i = 0; i < s->table_nelems; i++) {
uint64_t offset = table->offsets[i];
@@ -76,6 +77,11 @@ static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
qed_offset_is_zero_cluster(offset)) {
continue;
}
+ check->result->bfi.allocated_clusters++;
+ if (last_offset && (last_offset + s->header.cluster_size != offset)) {
+ check->result->bfi.fragmented_clusters++;
+ }
+ last_offset = offset;
/* Detect invalid cluster offset */
if (!qed_check_cluster_offset(s, offset)) {
@@ -200,6 +206,9 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
sizeof(check.used_clusters[0]));
+ check.result->bfi.total_clusters =
+ (s->header.image_size + s->header.cluster_size - 1) /
+ s->header.cluster_size;
ret = qed_check_l1_table(&check, s->l1_table);
if (ret == 0) {
/* Only check for leaks if entire image was scanned successfully */
diff --git a/block/qed.c b/block/qed.c
index a041d31e66..366cde7ad8 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -450,7 +450,7 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
* feature is no longer valid.
*/
if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
- !bdrv_is_read_only(bs->file)) {
+ !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
ret = qed_write_header_sync(s);
@@ -477,7 +477,8 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
* potentially inconsistent images to be opened read-only. This can
* aid data recovery from an otherwise inconsistent image.
*/
- if (!bdrv_is_read_only(bs->file)) {
+ if (!bdrv_is_read_only(bs->file) &&
+ !(flags & BDRV_O_INCOMING)) {
BdrvCheckResult result = {0};
ret = qed_check(s, &result, true);
@@ -497,12 +498,6 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
s->need_check_timer = qemu_new_timer_ns(vm_clock,
qed_need_check_timer_cb, s);
- error_set(&s->migration_blocker,
- QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- "qed", bs->device_name, "live migration");
- migrate_add_blocker(s->migration_blocker);
-
-
out:
if (ret) {
qed_free_l2_cache(&s->l2_cache);
@@ -515,9 +510,6 @@ static void bdrv_qed_close(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
- migrate_del_blocker(s->migration_blocker);
- error_free(s->migration_blocker);
-
qed_cancel_need_check_timer(s);
qemu_free_timer(s->need_check_timer);
@@ -1350,13 +1342,6 @@ static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
opaque, QED_AIOCB_WRITE);
}
-static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb,
- void *opaque)
-{
- return bdrv_aio_flush(bs->file, cb, opaque);
-}
-
typedef struct {
Coroutine *co;
int ret;
@@ -1441,6 +1426,7 @@ static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
memset(bdi, 0, sizeof(*bdi));
bdi->cluster_size = s->header.cluster_size;
+ bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
return 0;
}
@@ -1516,6 +1502,15 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
return ret;
}
+static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ bdrv_qed_close(bs);
+ memset(s, 0, sizeof(BDRVQEDState));
+ bdrv_qed_open(bs, bs->open_flags);
+}
+
static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result)
{
BDRVQEDState *s = bs->opaque;
@@ -1562,12 +1557,12 @@ static BlockDriver bdrv_qed = {
.bdrv_make_empty = bdrv_qed_make_empty,
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,
- .bdrv_aio_flush = bdrv_qed_aio_flush,
.bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
.bdrv_truncate = bdrv_qed_truncate,
.bdrv_getlength = bdrv_qed_getlength,
.bdrv_get_info = bdrv_qed_get_info,
.bdrv_change_backing_file = bdrv_qed_change_backing_file,
+ .bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
.bdrv_check = bdrv_qed_check,
};
diff --git a/block/qed.h b/block/qed.h
index 62624a1f34..c716772ad7 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -169,8 +169,6 @@ typedef struct {
/* Periodic flush and clear need check flag */
QEMUTimer *need_check_timer;
-
- Error *migration_blocker;
} BDRVQEDState;
enum {
diff --git a/block/raw.c b/block/raw.c
index 1cdac0ccdc..7086e314a6 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -25,11 +25,6 @@ static void raw_close(BlockDriverState *bs)
{
}
-static int coroutine_fn raw_co_flush(BlockDriverState *bs)
-{
- return bdrv_co_flush(bs->file);
-}
-
static int64_t raw_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file);
@@ -113,7 +108,6 @@ static BlockDriver bdrv_raw = {
.bdrv_co_readv = raw_co_readv,
.bdrv_co_writev = raw_co_writev,
- .bdrv_co_flush_to_disk = raw_co_flush,
.bdrv_co_discard = raw_co_discard,
.bdrv_probe = raw_probe,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 00276f6f46..3eaf625e98 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -32,9 +32,11 @@
#define SD_OP_RELEASE_VDI 0x13
#define SD_OP_GET_VDI_INFO 0x14
#define SD_OP_READ_VDIS 0x15
+#define SD_OP_FLUSH_VDI 0x16
#define SD_FLAG_CMD_WRITE 0x01
#define SD_FLAG_CMD_COW 0x02
+#define SD_FLAG_CMD_CACHE 0x04
#define SD_RES_SUCCESS 0x00 /* Success */
#define SD_RES_UNKNOWN 0x01 /* Unknown error */
@@ -293,10 +295,12 @@ typedef struct BDRVSheepdogState {
char name[SD_MAX_VDI_LEN];
int is_snapshot;
+ uint8_t cache_enabled;
char *addr;
char *port;
int fd;
+ int flush_fd;
CoMutex lock;
Coroutine *co_send;
@@ -506,6 +510,7 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
ret = qemu_send_full(sockfd, hdr, sizeof(*hdr), 0);
if (ret < sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
+ return ret;
}
ret = qemu_send_full(sockfd, data, *wlen, 0);
@@ -516,6 +521,24 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
return ret;
}
+static int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
+ unsigned int *wlen)
+{
+ int ret;
+
+ ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
+ if (ret < sizeof(*hdr)) {
+ error_report("failed to send a req, %s", strerror(errno));
+ return ret;
+ }
+
+ ret = qemu_co_send(sockfd, data, *wlen);
+ if (ret < *wlen) {
+ error_report("failed to send a req, %s", strerror(errno));
+ }
+
+ return ret;
+}
static int do_req(int sockfd, SheepdogReq *hdr, void *data,
unsigned int *wlen, unsigned int *rlen)
{
@@ -550,6 +573,40 @@ out:
return ret;
}
+static int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
+ unsigned int *wlen, unsigned int *rlen)
+{
+ int ret;
+
+ socket_set_block(sockfd);
+ ret = send_co_req(sockfd, hdr, data, wlen);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
+ if (ret < sizeof(*hdr)) {
+ error_report("failed to get a rsp, %s", strerror(errno));
+ goto out;
+ }
+
+ if (*rlen > hdr->data_length) {
+ *rlen = hdr->data_length;
+ }
+
+ if (*rlen) {
+ ret = qemu_co_recv(sockfd, data, *rlen);
+ if (ret < *rlen) {
+ error_report("failed to get the data, %s", strerror(errno));
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ socket_set_nonblock(sockfd);
+ return ret;
+}
+
static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
struct iovec *iov, int niov, int create,
enum AIOCBState aiocb_type);
@@ -900,6 +957,10 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
hdr.flags = SD_FLAG_CMD_WRITE | flags;
}
+ if (s->cache_enabled) {
+ hdr.flags |= SD_FLAG_CMD_CACHE;
+ }
+
hdr.oid = oid;
hdr.cow_oid = old_oid;
hdr.copies = s->inode.nr_copies;
@@ -942,7 +1003,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset,
- int write, int create)
+ int write, int create, uint8_t cache)
{
SheepdogObjReq hdr;
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
@@ -965,6 +1026,11 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
rlen = datalen;
hdr.opcode = SD_OP_READ_OBJ;
}
+
+ if (cache) {
+ hdr.flags |= SD_FLAG_CMD_CACHE;
+ }
+
hdr.oid = oid;
hdr.data_length = datalen;
hdr.offset = offset;
@@ -986,15 +1052,18 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
}
static int read_object(int fd, char *buf, uint64_t oid, int copies,
- unsigned int datalen, uint64_t offset)
+ unsigned int datalen, uint64_t offset, uint8_t cache)
{
- return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0);
+ return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0,
+ cache);
}
static int write_object(int fd, char *buf, uint64_t oid, int copies,
- unsigned int datalen, uint64_t offset, int create)
+ unsigned int datalen, uint64_t offset, int create,
+ uint8_t cache)
{
- return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create);
+ return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create,
+ cache);
}
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
@@ -1026,6 +1095,15 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
goto out;
}
+ if (flags & BDRV_O_CACHE_WB) {
+ s->cache_enabled = 1;
+ s->flush_fd = connect_to_sdog(s->addr, s->port);
+ if (s->flush_fd < 0) {
+ error_report("failed to connect");
+ goto out;
+ }
+ }
+
if (snapid) {
dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
s->is_snapshot = 1;
@@ -1038,7 +1116,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
}
buf = g_malloc(SD_INODE_SIZE);
- ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0);
+ ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
+ s->cache_enabled);
closesocket(fd);
@@ -1272,6 +1351,9 @@ static void sd_close(BlockDriverState *bs)
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
closesocket(s->fd);
+ if (s->cache_enabled) {
+ closesocket(s->flush_fd);
+ }
g_free(s->addr);
}
@@ -1305,7 +1387,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
s->inode.vdi_size = offset;
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
- s->inode.nr_copies, datalen, 0, 0);
+ s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
close(fd);
if (ret < 0) {
@@ -1387,7 +1469,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
}
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
- SD_INODE_SIZE, 0);
+ SD_INODE_SIZE, 0, s->cache_enabled);
closesocket(fd);
@@ -1575,6 +1657,36 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
return acb->ret;
}
+static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
+{
+ BDRVSheepdogState *s = bs->opaque;
+ SheepdogObjReq hdr = { 0 };
+ SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
+ SheepdogInode *inode = &s->inode;
+ int ret;
+ unsigned int wlen = 0, rlen = 0;
+
+ if (!s->cache_enabled) {
+ return 0;
+ }
+
+ hdr.opcode = SD_OP_FLUSH_VDI;
+ hdr.oid = vid_to_vdi_oid(inode->vdi_id);
+
+ ret = do_co_req(s->flush_fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);
+ if (ret) {
+ error_report("failed to send a request to the sheep");
+ return ret;
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ error_report("%s", sd_strerror(rsp->result));
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
BDRVSheepdogState *s = bs->opaque;
@@ -1610,7 +1722,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
- s->inode.nr_copies, datalen, 0, 0);
+ s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
if (ret < 0) {
error_report("failed to write snapshot's inode.");
ret = -EIO;
@@ -1629,7 +1741,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
inode = (SheepdogInode *)g_malloc(datalen);
ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
- s->inode.nr_copies, datalen, 0);
+ s->inode.nr_copies, datalen, 0, s->cache_enabled);
if (ret < 0) {
error_report("failed to read new inode info. %s", strerror(errno));
@@ -1684,7 +1796,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
- SD_INODE_SIZE, 0);
+ SD_INODE_SIZE, 0, s->cache_enabled);
closesocket(fd);
@@ -1779,7 +1891,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
/* we don't need to read entire object */
ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
- 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0);
+ 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
+ s->cache_enabled);
if (ret) {
continue;
@@ -1835,10 +1948,12 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
create = (offset == 0);
if (load) {
ret = read_object(fd, (char *)data, vmstate_oid,
- s->inode.nr_copies, data_len, offset);
+ s->inode.nr_copies, data_len, offset,
+ s->cache_enabled);
} else {
ret = write_object(fd, (char *)data, vmstate_oid,
- s->inode.nr_copies, data_len, offset, create);
+ s->inode.nr_copies, data_len, offset, create,
+ s->cache_enabled);
}
if (ret < 0) {
@@ -1904,6 +2019,7 @@ BlockDriver bdrv_sheepdog = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
+ .bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
diff --git a/block/stream.c b/block/stream.c
index d1b3986a8a..0efe1adfd5 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -76,6 +76,39 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
}
+static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
+ const char *base_id)
+{
+ BlockDriverState *intermediate;
+ intermediate = top->backing_hd;
+
+ while (intermediate) {
+ BlockDriverState *unused;
+
+ /* reached base */
+ if (intermediate == base) {
+ break;
+ }
+
+ unused = intermediate;
+ intermediate = intermediate->backing_hd;
+ unused->backing_hd = NULL;
+ bdrv_delete(unused);
+ }
+ top->backing_hd = base;
+
+ pstrcpy(top->backing_file, sizeof(top->backing_file), "");
+ pstrcpy(top->backing_format, sizeof(top->backing_format), "");
+ if (base_id) {
+ pstrcpy(top->backing_file, sizeof(top->backing_file), base_id);
+ if (base->drv) {
+ pstrcpy(top->backing_format, sizeof(top->backing_format),
+ base->drv->format_name);
+ }
+ }
+
+}
+
/*
* Given an image chain: [BASE] -> [INTER1] -> [INTER2] -> [TOP]
*
@@ -175,7 +208,7 @@ retry:
break;
}
-
+ s->common.busy = true;
if (base) {
ret = is_allocated_base(bs, base, sector_num,
STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
@@ -189,6 +222,7 @@ retry:
if (s->common.speed) {
uint64_t delay_ns = ratelimit_calculate_delay(&s->limit, n);
if (delay_ns > 0) {
+ s->common.busy = false;
co_sleep_ns(rt_clock, delay_ns);
/* Recheck cancellation and that sectors are unallocated */
@@ -208,6 +242,7 @@ retry:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that qemu_aio_flush() returns.
*/
+ s->common.busy = false;
co_sleep_ns(rt_clock, 0);
}
@@ -215,12 +250,13 @@ retry:
bdrv_disable_copy_on_read(bs);
}
- if (sector_num == end && ret == 0) {
+ if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
const char *base_id = NULL;
if (base) {
base_id = s->backing_file_id;
}
ret = bdrv_change_backing_file(bs, base_id, NULL);
+ close_unused_images(bs, base, base_id);
}
qemu_vfree(buf);
@@ -234,7 +270,6 @@ static int stream_set_speed(BlockJob *job, int64_t value)
if (value < 0) {
return -EINVAL;
}
- job->speed = value;
ratelimit_set_speed(&s->limit, value / BDRV_SECTOR_SIZE);
return 0;
}
diff --git a/block/vdi.c b/block/vdi.c
index 6a0011fbcc..119d3c74da 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -144,29 +144,6 @@ void uuid_unparse(const uuid_t uu, char *out)
#endif
typedef struct {
- BlockDriverAIOCB common;
- int64_t sector_num;
- QEMUIOVector *qiov;
- uint8_t *buf;
- /* Total number of sectors. */
- int nb_sectors;
- /* Number of sectors for current AIO. */
- int n_sectors;
- /* New allocated block map entry. */
- uint32_t bmap_first;
- uint32_t bmap_last;
- /* Buffer for new allocated block. */
- void *block_buffer;
- void *orig_buf;
- bool is_write;
- int header_modified;
- BlockDriverAIOCB *hd_aiocb;
- struct iovec hd_iov;
- QEMUIOVector hd_qiov;
- QEMUBH *bh;
-} VdiAIOCB;
-
-typedef struct {
char text[0x40];
uint32_t signature;
uint32_t version;
@@ -489,332 +466,150 @@ static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
return VDI_IS_ALLOCATED(bmap_entry);
}
-static void vdi_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- /* TODO: This code is untested. How can I get it executed? */
- VdiAIOCB *acb = container_of(blockacb, VdiAIOCB, common);
- logout("\n");
- if (acb->hd_aiocb) {
- bdrv_aio_cancel(acb->hd_aiocb);
- }
- qemu_aio_release(acb);
-}
-
-static AIOPool vdi_aio_pool = {
- .aiocb_size = sizeof(VdiAIOCB),
- .cancel = vdi_aio_cancel,
-};
-
-static VdiAIOCB *vdi_aio_setup(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque, int is_write)
-{
- VdiAIOCB *acb;
-
- logout("%p, %" PRId64 ", %p, %d, %p, %p, %d\n",
- bs, sector_num, qiov, nb_sectors, cb, opaque, is_write);
-
- acb = qemu_aio_get(&vdi_aio_pool, bs, cb, opaque);
- acb->hd_aiocb = NULL;
- acb->sector_num = sector_num;
- acb->qiov = qiov;
- acb->is_write = is_write;
-
- if (qiov->niov > 1) {
- acb->buf = qemu_blockalign(bs, qiov->size);
- acb->orig_buf = acb->buf;
- if (is_write) {
- qemu_iovec_to_buffer(qiov, acb->buf);
- }
- } else {
- acb->buf = (uint8_t *)qiov->iov->iov_base;
- }
- acb->nb_sectors = nb_sectors;
- acb->n_sectors = 0;
- acb->bmap_first = VDI_UNALLOCATED;
- acb->bmap_last = VDI_UNALLOCATED;
- acb->block_buffer = NULL;
- acb->header_modified = 0;
- return acb;
-}
-
-static int vdi_schedule_bh(QEMUBHFunc *cb, VdiAIOCB *acb)
-{
- logout("\n");
-
- if (acb->bh) {
- return -EIO;
- }
-
- acb->bh = qemu_bh_new(cb, acb);
- if (!acb->bh) {
- return -EIO;
- }
-
- qemu_bh_schedule(acb->bh);
-
- return 0;
-}
-
-static void vdi_aio_read_cb(void *opaque, int ret);
-static void vdi_aio_write_cb(void *opaque, int ret);
-
-static void vdi_aio_rw_bh(void *opaque)
+static int vdi_co_read(BlockDriverState *bs,
+ int64_t sector_num, uint8_t *buf, int nb_sectors)
{
- VdiAIOCB *acb = opaque;
- logout("\n");
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
-
- if (acb->is_write) {
- vdi_aio_write_cb(opaque, 0);
- } else {
- vdi_aio_read_cb(opaque, 0);
- }
-}
-
-static void vdi_aio_read_cb(void *opaque, int ret)
-{
- VdiAIOCB *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
BDRVVdiState *s = bs->opaque;
uint32_t bmap_entry;
uint32_t block_index;
uint32_t sector_in_block;
uint32_t n_sectors;
+ int ret = 0;
- logout("%u sectors read\n", acb->n_sectors);
-
- acb->hd_aiocb = NULL;
-
- if (ret < 0) {
- goto done;
- }
-
- acb->nb_sectors -= acb->n_sectors;
-
- if (acb->nb_sectors == 0) {
- /* request completed */
- ret = 0;
- goto done;
- }
-
- acb->sector_num += acb->n_sectors;
- acb->buf += acb->n_sectors * SECTOR_SIZE;
-
- block_index = acb->sector_num / s->block_sectors;
- sector_in_block = acb->sector_num % s->block_sectors;
- n_sectors = s->block_sectors - sector_in_block;
- if (n_sectors > acb->nb_sectors) {
- n_sectors = acb->nb_sectors;
- }
-
- logout("will read %u sectors starting at sector %" PRIu64 "\n",
- n_sectors, acb->sector_num);
+ logout("\n");
- /* prepare next AIO request */
- acb->n_sectors = n_sectors;
- bmap_entry = le32_to_cpu(s->bmap[block_index]);
- if (!VDI_IS_ALLOCATED(bmap_entry)) {
- /* Block not allocated, return zeros, no need to wait. */
- memset(acb->buf, 0, n_sectors * SECTOR_SIZE);
- ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
- if (ret < 0) {
- goto done;
+ while (ret >= 0 && nb_sectors > 0) {
+ block_index = sector_num / s->block_sectors;
+ sector_in_block = sector_num % s->block_sectors;
+ n_sectors = s->block_sectors - sector_in_block;
+ if (n_sectors > nb_sectors) {
+ n_sectors = nb_sectors;
}
- } else {
- uint64_t offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors +
- sector_in_block;
- acb->hd_iov.iov_base = (void *)acb->buf;
- acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_readv(bs->file, offset, &acb->hd_qiov,
- n_sectors, vdi_aio_read_cb, acb);
- }
- return;
-done:
- if (acb->qiov->niov > 1) {
- qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
- qemu_vfree(acb->orig_buf);
- }
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
-}
-static BlockDriverAIOCB *vdi_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- VdiAIOCB *acb;
- int ret;
+ logout("will read %u sectors starting at sector %" PRIu64 "\n",
+ n_sectors, sector_num);
- logout("\n");
- acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
- ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
- if (ret < 0) {
- if (acb->qiov->niov > 1) {
- qemu_vfree(acb->orig_buf);
+ /* prepare next AIO request */
+ bmap_entry = le32_to_cpu(s->bmap[block_index]);
+ if (!VDI_IS_ALLOCATED(bmap_entry)) {
+ /* Block not allocated, return zeros, no need to wait. */
+ memset(buf, 0, n_sectors * SECTOR_SIZE);
+ ret = 0;
+ } else {
+ uint64_t offset = s->header.offset_data / SECTOR_SIZE +
+ (uint64_t)bmap_entry * s->block_sectors +
+ sector_in_block;
+ ret = bdrv_read(bs->file, offset, buf, n_sectors);
}
- qemu_aio_release(acb);
- return NULL;
+ logout("%u sectors read\n", n_sectors);
+
+ nb_sectors -= n_sectors;
+ sector_num += n_sectors;
+ buf += n_sectors * SECTOR_SIZE;
}
- return &acb->common;
+ return ret;
}
-static void vdi_aio_write_cb(void *opaque, int ret)
+static int vdi_co_write(BlockDriverState *bs,
+ int64_t sector_num, const uint8_t *buf, int nb_sectors)
{
- VdiAIOCB *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
BDRVVdiState *s = bs->opaque;
uint32_t bmap_entry;
uint32_t block_index;
uint32_t sector_in_block;
uint32_t n_sectors;
+ uint32_t bmap_first = VDI_UNALLOCATED;
+ uint32_t bmap_last = VDI_UNALLOCATED;
+ uint8_t *block = NULL;
+ int ret = 0;
- acb->hd_aiocb = NULL;
+ logout("\n");
- if (ret < 0) {
- goto done;
- }
+ while (ret >= 0 && nb_sectors > 0) {
+ block_index = sector_num / s->block_sectors;
+ sector_in_block = sector_num % s->block_sectors;
+ n_sectors = s->block_sectors - sector_in_block;
+ if (n_sectors > nb_sectors) {
+ n_sectors = nb_sectors;
+ }
+
+ logout("will write %u sectors starting at sector %" PRIu64 "\n",
+ n_sectors, sector_num);
- acb->nb_sectors -= acb->n_sectors;
- acb->sector_num += acb->n_sectors;
- acb->buf += acb->n_sectors * SECTOR_SIZE;
-
- if (acb->nb_sectors == 0) {
- logout("finished data write\n");
- acb->n_sectors = 0;
- if (acb->header_modified) {
- VdiHeader *header = acb->block_buffer;
- logout("now writing modified header\n");
- assert(VDI_IS_ALLOCATED(acb->bmap_first));
- *header = s->header;
- vdi_header_to_le(header);
- acb->header_modified = 0;
- acb->hd_iov.iov_base = acb->block_buffer;
- acb->hd_iov.iov_len = SECTOR_SIZE;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_writev(bs->file, 0, &acb->hd_qiov, 1,
- vdi_aio_write_cb, acb);
- return;
- } else if (VDI_IS_ALLOCATED(acb->bmap_first)) {
- /* One or more new blocks were allocated. */
+ /* prepare next AIO request */
+ bmap_entry = le32_to_cpu(s->bmap[block_index]);
+ if (!VDI_IS_ALLOCATED(bmap_entry)) {
+ /* Allocate new block and write to it. */
uint64_t offset;
- uint32_t bmap_first;
- uint32_t bmap_last;
- g_free(acb->block_buffer);
- acb->block_buffer = NULL;
- bmap_first = acb->bmap_first;
- bmap_last = acb->bmap_last;
- logout("now writing modified block map entry %u...%u\n",
- bmap_first, bmap_last);
- /* Write modified sectors from block map. */
- bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
- bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
- n_sectors = bmap_last - bmap_first + 1;
- offset = s->bmap_sector + bmap_first;
- acb->bmap_first = VDI_UNALLOCATED;
- acb->hd_iov.iov_base = (void *)((uint8_t *)&s->bmap[0] +
- bmap_first * SECTOR_SIZE);
- acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- logout("will write %u block map sectors starting from entry %u\n",
- n_sectors, bmap_first);
- acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov,
- n_sectors, vdi_aio_write_cb, acb);
- return;
+ bmap_entry = s->header.blocks_allocated;
+ s->bmap[block_index] = cpu_to_le32(bmap_entry);
+ s->header.blocks_allocated++;
+ offset = s->header.offset_data / SECTOR_SIZE +
+ (uint64_t)bmap_entry * s->block_sectors;
+ if (block == NULL) {
+ block = g_malloc(s->block_size);
+ bmap_first = block_index;
+ }
+ bmap_last = block_index;
+ /* Copy data to be written to new block and zero unused parts. */
+ memset(block, 0, sector_in_block * SECTOR_SIZE);
+ memcpy(block + sector_in_block * SECTOR_SIZE,
+ buf, n_sectors * SECTOR_SIZE);
+ memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
+ (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
+ ret = bdrv_write(bs->file, offset, block, s->block_sectors);
+ } else {
+ uint64_t offset = s->header.offset_data / SECTOR_SIZE +
+ (uint64_t)bmap_entry * s->block_sectors +
+ sector_in_block;
+ ret = bdrv_write(bs->file, offset, buf, n_sectors);
}
- ret = 0;
- goto done;
- }
- logout("%u sectors written\n", acb->n_sectors);
+ nb_sectors -= n_sectors;
+ sector_num += n_sectors;
+ buf += n_sectors * SECTOR_SIZE;
- block_index = acb->sector_num / s->block_sectors;
- sector_in_block = acb->sector_num % s->block_sectors;
- n_sectors = s->block_sectors - sector_in_block;
- if (n_sectors > acb->nb_sectors) {
- n_sectors = acb->nb_sectors;
+ logout("%u sectors written\n", n_sectors);
}
- logout("will write %u sectors starting at sector %" PRIu64 "\n",
- n_sectors, acb->sector_num);
-
- /* prepare next AIO request */
- acb->n_sectors = n_sectors;
- bmap_entry = le32_to_cpu(s->bmap[block_index]);
- if (!VDI_IS_ALLOCATED(bmap_entry)) {
- /* Allocate new block and write to it. */
- uint64_t offset;
- uint8_t *block;
- bmap_entry = s->header.blocks_allocated;
- s->bmap[block_index] = cpu_to_le32(bmap_entry);
- s->header.blocks_allocated++;
- offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors;
- block = acb->block_buffer;
- if (block == NULL) {
- block = g_malloc(s->block_size);
- acb->block_buffer = block;
- acb->bmap_first = block_index;
- assert(!acb->header_modified);
- acb->header_modified = 1;
- }
- acb->bmap_last = block_index;
- /* Copy data to be written to new block and zero unused parts. */
- memset(block, 0, sector_in_block * SECTOR_SIZE);
- memcpy(block + sector_in_block * SECTOR_SIZE,
- acb->buf, n_sectors * SECTOR_SIZE);
- memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
- (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
- acb->hd_iov.iov_base = (void *)block;
- acb->hd_iov.iov_len = s->block_size;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_writev(bs->file, offset,
- &acb->hd_qiov, s->block_sectors,
- vdi_aio_write_cb, acb);
- } else {
- uint64_t offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors +
- sector_in_block;
- acb->hd_iov.iov_base = (void *)acb->buf;
- acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov,
- n_sectors, vdi_aio_write_cb, acb);
+ logout("finished data write\n");
+ if (ret < 0) {
+ return ret;
}
- return;
-
-done:
- if (acb->qiov->niov > 1) {
- qemu_vfree(acb->orig_buf);
- }
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
-}
+ if (block) {
+ /* One or more new blocks were allocated. */
+ VdiHeader *header = (VdiHeader *) block;
+ uint8_t *base;
+ uint64_t offset;
-static BlockDriverAIOCB *vdi_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- VdiAIOCB *acb;
- int ret;
+ logout("now writing modified header\n");
+ assert(VDI_IS_ALLOCATED(bmap_first));
+ *header = s->header;
+ vdi_header_to_le(header);
+ ret = bdrv_write(bs->file, 0, block, 1);
+ g_free(block);
+ block = NULL;
- logout("\n");
- acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
- ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
- if (ret < 0) {
- if (acb->qiov->niov > 1) {
- qemu_vfree(acb->orig_buf);
+ if (ret < 0) {
+ return ret;
}
- qemu_aio_release(acb);
- return NULL;
+
+ logout("now writing modified block map entry %u...%u\n",
+ bmap_first, bmap_last);
+ /* Write modified sectors from block map. */
+ bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
+ bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
+ n_sectors = bmap_last - bmap_first + 1;
+ offset = s->bmap_sector + bmap_first;
+ base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
+ logout("will write %u block map sectors starting from entry %u\n",
+ n_sectors, bmap_first);
+ ret = bdrv_write(bs->file, offset, base, n_sectors);
}
- return &acb->common;
+ return ret;
}
static int vdi_create(const char *filename, QEMUOptionParameter *options)
@@ -930,13 +725,6 @@ static void vdi_close(BlockDriverState *bs)
error_free(s->migration_blocker);
}
-static coroutine_fn int vdi_co_flush(BlockDriverState *bs)
-{
- logout("\n");
- return bdrv_co_flush(bs->file);
-}
-
-
static QEMUOptionParameter vdi_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@@ -969,13 +757,12 @@ static BlockDriver bdrv_vdi = {
.bdrv_open = vdi_open,
.bdrv_close = vdi_close,
.bdrv_create = vdi_create,
- .bdrv_co_flush_to_disk = vdi_co_flush,
.bdrv_co_is_allocated = vdi_co_is_allocated,
.bdrv_make_empty = vdi_make_empty,
- .bdrv_aio_readv = vdi_aio_readv,
+ .bdrv_read = vdi_co_read,
#if defined(CONFIG_VDI_WRITE)
- .bdrv_aio_writev = vdi_aio_writev,
+ .bdrv_write = vdi_co_write,
#endif
.bdrv_get_info = vdi_get_info,
diff --git a/block/vmdk.c b/block/vmdk.c
index 45c003a0f1..18e9b4caf6 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1525,10 +1525,10 @@ static void vmdk_close(BlockDriverState *bs)
static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
{
- int i, ret, err;
BDRVVmdkState *s = bs->opaque;
+ int i, err;
+ int ret = 0;
- ret = bdrv_co_flush(bs->file);
for (i = 0; i < s->num_extents; i++) {
err = bdrv_co_flush(s->extents[i].file);
if (err < 0) {
diff --git a/block/vpc.c b/block/vpc.c
index 6b4816f563..5cd13d17a1 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -189,6 +189,9 @@ static int vpc_open(BlockDriverState *bs, int flags)
fprintf(stderr, "block-vpc: The header checksum of '%s' is "
"incorrect.\n", bs->filename);
+ /* Write 'checksum' back to footer, or else will leave it with zero. */
+ footer->checksum = be32_to_cpu(checksum);
+
// The visible size of a image in Virtual PC depends on the geometry
// rather than on the size stored in the footer (the size in the footer
// is too large usually)
@@ -507,11 +510,6 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
return ret;
}
-static coroutine_fn int vpc_co_flush(BlockDriverState *bs)
-{
- return bdrv_co_flush(bs->file);
-}
-
/*
* Calculates the number of cylinders, heads and sectors per cylinder
* based on a given number of sectors. This is the algorithm described
@@ -789,7 +787,6 @@ static BlockDriver bdrv_vpc = {
.bdrv_read = vpc_co_read,
.bdrv_write = vpc_co_write,
- .bdrv_co_flush_to_disk = vpc_co_flush,
.create_options = vpc_create_options,
};
diff --git a/block_int.h b/block_int.h
index b460c369ca..0e5a032e77 100644
--- a/block_int.h
+++ b/block_int.h
@@ -53,12 +53,6 @@
typedef struct BdrvTrackedRequest BdrvTrackedRequest;
-typedef struct AIOPool {
- void (*cancel)(BlockDriverAIOCB *acb);
- int aiocb_size;
- BlockDriverAIOCB *free_aiocb;
-} AIOPool;
-
typedef struct BlockIOLimit {
int64_t bps[3];
int64_t iops[3];
@@ -69,8 +63,13 @@ typedef struct BlockIOBaseValue {
uint64_t ios[2];
} BlockIOBaseValue;
-typedef void BlockJobCancelFunc(void *opaque);
typedef struct BlockJob BlockJob;
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
typedef struct BlockJobType {
/** Derived BlockJob struct size */
size_t instance_size;
@@ -83,19 +82,48 @@ typedef struct BlockJobType {
} BlockJobType;
/**
- * Long-running operation on a BlockDriverState
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
*/
struct BlockJob {
+ /** The job type, including the job vtable. */
const BlockJobType *job_type;
+
+ /** The block device on which the job is operating. */
BlockDriverState *bs;
+
+ /**
+ * Set to true if the job should cancel itself. The flag must
+ * always be tested just before toggling the busy flag from false
+ * to true. After a job has detected that the cancelled flag is
+ * true, it should not anymore issue any I/O operation to the
+ * block device.
+ */
bool cancelled;
- /* These fields are published by the query-block-jobs QMP API */
+ /**
+ * Set to false by the job while it is in a quiescent state, where
+ * no I/O is pending and cancellation can be processed without
+ * issuing new I/O. The busy flag must be set to false when the
+ * job goes to sleep on any condition that is not detected by
+ * #qemu_aio_wait, such as a timer.
+ */
+ bool busy;
+
+ /** Offset that is published by the query-block-jobs QMP API */
int64_t offset;
+
+ /** Length that is published by the query-block-jobs QMP API */
int64_t len;
+
+ /** Speed that was set with @block_job_set_speed. */
int64_t speed;
+ /** The completion function that will be called when the job completes. */
BlockDriverCompletionFunc *cb;
+
+ /** The opaque value that is passed to the completion function. */
void *opaque;
};
@@ -302,20 +330,8 @@ struct BlockDriverState {
BlockJob *job;
};
-struct BlockDriverAIOCB {
- AIOPool *pool;
- BlockDriverState *bs;
- BlockDriverCompletionFunc *cb;
- void *opaque;
- BlockDriverAIOCB *next;
-};
-
void get_tmp_filename(char *filename, int size);
-void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque);
-void qemu_aio_release(void *p);
-
void bdrv_set_io_limits(BlockDriverState *bs,
BlockIOLimit *io_limits);
@@ -323,13 +339,90 @@ void bdrv_set_io_limits(BlockDriverState *bs,
int is_windows_drive(const char *filename);
#endif
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ *
+ * Create a new long-running block device job and return it. The job
+ * will call @cb asynchronously when the job completes. Note that
+ * @bs may have been closed at the time the @cb it is called. If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque);
+
+/**
+ * block_job_complete:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
void block_job_complete(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
int block_job_set_speed(BlockJob *job, int64_t value);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
bool block_job_is_cancelled(BlockJob *job);
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the job and wait for it to reach a quiescent
+ * state. Note that the completion callback will still be called
+ * asynchronously, hence it is *not* valid to call #bdrv_delete
+ * immediately after #block_job_cancel_sync. Users of block jobs
+ * will usually protect the BlockDriverState objects with a reference
+ * count, should this be a concern.
+ */
+void block_job_cancel_sync(BlockJob *job);
+
+/**
+ * stream_start:
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @base_id: The file name that will be written to @bs as the new
+ * backing file if the job completes. Ignored if @base is %NULL.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ *
+ * Start a streaming operation on @bs. Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs. At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @base_id in the written image and to @base in the live BlockDriverState.
+ */
int stream_start(BlockDriverState *bs, BlockDriverState *base,
const char *base_id, BlockDriverCompletionFunc *cb,
void *opaque);
diff --git a/blockdev.c b/blockdev.c
index 1a500b830d..0c2440e249 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -64,6 +64,9 @@ void blockdev_mark_auto_del(BlockDriverState *bs)
{
DriveInfo *dinfo = drive_get_by_blockdev(bs);
+ if (bs->job) {
+ block_job_cancel(bs->job);
+ }
if (dinfo) {
dinfo->auto_del = 1;
}
@@ -532,8 +535,9 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
dinfo->unit = unit_id;
dinfo->opts = opts;
dinfo->refcount = 1;
- if (serial)
- strncpy(dinfo->serial, serial, sizeof(dinfo->serial) - 1);
+ if (serial) {
+ pstrcpy(dinfo->serial, sizeof(dinfo->serial), serial);
+ }
QTAILQ_INSERT_TAIL(&drives, dinfo, next);
bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
@@ -591,6 +595,10 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
bdrv_flags |= BDRV_O_COPY_ON_READ;
}
+ if (runstate_check(RUN_STATE_INMIGRATE)) {
+ bdrv_flags |= BDRV_O_INCOMING;
+ }
+
if (media == MEDIA_CDROM) {
/* CDROM is fine for any interface, don't check. */
ro = 1;
diff --git a/dma-helpers.c b/dma-helpers.c
index c29ea6d3ab..7971a89c14 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -8,7 +8,6 @@
*/
#include "dma.h"
-#include "block_int.h"
#include "trace.h"
void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
@@ -42,7 +41,7 @@ typedef struct {
BlockDriverAIOCB *acb;
QEMUSGList *sg;
uint64_t sector_num;
- bool to_dev;
+ DMADirection dir;
bool in_cancel;
int sg_cur_index;
dma_addr_t sg_cur_byte;
@@ -76,7 +75,8 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
for (i = 0; i < dbs->iov.niov; ++i) {
cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
- dbs->iov.iov[i].iov_len, !dbs->to_dev,
+ dbs->iov.iov[i].iov_len,
+ dbs->dir != DMA_DIRECTION_TO_DEVICE,
dbs->iov.iov[i].iov_len);
}
qemu_iovec_reset(&dbs->iov);
@@ -123,7 +123,8 @@ static void dma_bdrv_cb(void *opaque, int ret)
while (dbs->sg_cur_index < dbs->sg->nsg) {
cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
- mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->to_dev);
+ mem = cpu_physical_memory_map(cur_addr, &cur_len,
+ dbs->dir != DMA_DIRECTION_TO_DEVICE);
if (!mem)
break;
qemu_iovec_add(&dbs->iov, mem, cur_len);
@@ -170,11 +171,11 @@ static AIOPool dma_aio_pool = {
BlockDriverAIOCB *dma_bdrv_io(
BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
- void *opaque, bool to_dev)
+ void *opaque, DMADirection dir)
{
DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
- trace_dma_bdrv_io(dbs, bs, sector_num, to_dev);
+ trace_dma_bdrv_io(dbs, bs, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));
dbs->acb = NULL;
dbs->bs = bs;
@@ -182,7 +183,7 @@ BlockDriverAIOCB *dma_bdrv_io(
dbs->sector_num = sector_num;
dbs->sg_cur_index = 0;
dbs->sg_cur_byte = 0;
- dbs->to_dev = to_dev;
+ dbs->dir = dir;
dbs->io_func = io_func;
dbs->bh = NULL;
qemu_iovec_init(&dbs->iov, sg->nsg);
@@ -195,14 +196,16 @@ BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
void (*cb)(void *opaque, int ret), void *opaque)
{
- return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, false);
+ return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque,
+ DMA_DIRECTION_FROM_DEVICE);
}
BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
void (*cb)(void *opaque, int ret), void *opaque)
{
- return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, true);
+ return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque,
+ DMA_DIRECTION_TO_DEVICE);
}
diff --git a/dma.h b/dma.h
index 5bd1fc836f..8c1ec8f0d2 100644
--- a/dma.h
+++ b/dma.h
@@ -16,6 +16,11 @@
typedef struct ScatterGatherEntry ScatterGatherEntry;
+typedef enum {
+ DMA_DIRECTION_TO_DEVICE = 0,
+ DMA_DIRECTION_FROM_DEVICE = 1,
+} DMADirection;
+
struct QEMUSGList {
ScatterGatherEntry *sg;
int nsg;
@@ -28,11 +33,6 @@ typedef target_phys_addr_t dma_addr_t;
#define DMA_ADDR_FMT TARGET_FMT_plx
-typedef enum {
- DMA_DIRECTION_TO_DEVICE = 0,
- DMA_DIRECTION_FROM_DEVICE = 1,
-} DMADirection;
-
struct ScatterGatherEntry {
dma_addr_t base;
dma_addr_t len;
@@ -50,7 +50,7 @@ typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num,
BlockDriverAIOCB *dma_bdrv_io(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector_num,
DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
- void *opaque, bool to_dev);
+ void *opaque, DMADirection dir);
BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
BlockDriverCompletionFunc *cb, void *opaque);
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 4d568acc9c..35723fd800 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -31,7 +31,6 @@
#include "sysemu.h"
#include "dma.h"
#include "blockdev.h"
-#include "block_int.h"
#include <hw/ide/internal.h>
@@ -101,7 +100,7 @@ static void ide_identify(IDEState *s)
put_le16(p + 21, 512); /* cache size in sectors */
put_le16(p + 22, 4); /* ecc bytes */
padstr((char *)(p + 23), s->version, 8); /* firmware version */
- padstr((char *)(p + 27), "QEMU HARDDISK", 40); /* model */
+ padstr((char *)(p + 27), s->drive_model_str, 40); /* model */
#if MAX_MULT_SECTORS > 1
put_le16(p + 47, 0x8000 | MAX_MULT_SECTORS);
#endif
@@ -143,17 +142,25 @@ static void ide_identify(IDEState *s)
put_le16(p + 82, (1 << 14) | (1 << 5) | 1);
/* 13=flush_cache_ext,12=flush_cache,10=lba48 */
put_le16(p + 83, (1 << 14) | (1 << 13) | (1 <<12) | (1 << 10));
- /* 14=set to 1, 1=SMART self test, 0=SMART error logging */
- put_le16(p + 84, (1 << 14) | 0);
+ /* 14=set to 1, 8=has WWN, 1=SMART self test, 0=SMART error logging */
+ if (s->wwn) {
+ put_le16(p + 84, (1 << 14) | (1 << 8) | 0);
+ } else {
+ put_le16(p + 84, (1 << 14) | 0);
+ }
/* 14 = NOP supported, 5=WCACHE enabled, 0=SMART feature set enabled */
if (bdrv_enable_write_cache(s->bs))
put_le16(p + 85, (1 << 14) | (1 << 5) | 1);
else
put_le16(p + 85, (1 << 14) | 1);
/* 13=flush_cache_ext,12=flush_cache,10=lba48 */
- put_le16(p + 86, (1 << 14) | (1 << 13) | (1 <<12) | (1 << 10));
- /* 14=set to 1, 1=smart self test, 0=smart error logging */
- put_le16(p + 87, (1 << 14) | 0);
+ put_le16(p + 86, (1 << 13) | (1 <<12) | (1 << 10));
+ /* 14=set to 1, 8=has WWN, 1=SMART self test, 0=SMART error logging */
+ if (s->wwn) {
+ put_le16(p + 87, (1 << 14) | (1 << 8) | 0);
+ } else {
+ put_le16(p + 87, (1 << 14) | 0);
+ }
put_le16(p + 88, 0x3f | (1 << 13)); /* udma5 set and supported */
put_le16(p + 93, 1 | (1 << 14) | 0x2000);
put_le16(p + 100, s->nb_sectors);
@@ -163,6 +170,13 @@ static void ide_identify(IDEState *s)
if (dev && dev->conf.physical_block_size)
put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf));
+ if (s->wwn) {
+ /* LE 16-bit words 111-108 contain 64-bit World Wide Name */
+ put_le16(p + 108, s->wwn >> 48);
+ put_le16(p + 109, s->wwn >> 32);
+ put_le16(p + 110, s->wwn >> 16);
+ put_le16(p + 111, s->wwn);
+ }
if (dev && dev->conf.discard_granularity) {
put_le16(p + 169, 1); /* TRIM support */
}
@@ -189,7 +203,7 @@ static void ide_atapi_identify(IDEState *s)
put_le16(p + 21, 512); /* cache size in sectors */
put_le16(p + 22, 4); /* ecc bytes */
padstr((char *)(p + 23), s->version, 8); /* firmware version */
- padstr((char *)(p + 27), "QEMU DVD-ROM", 40); /* model */
+ padstr((char *)(p + 27), s->drive_model_str, 40); /* model */
put_le16(p + 48, 1); /* dword I/O (XXX: should not be set on CDROM) */
#ifdef USE_DMA_CDROM
put_le16(p + 49, 1 << 9 | 1 << 8); /* DMA and LBA supported */
@@ -246,7 +260,7 @@ static void ide_cfata_identify(IDEState *s)
padstr((char *)(p + 10), s->drive_serial_str, 20); /* serial number */
put_le16(p + 22, 0x0004); /* ECC bytes */
padstr((char *) (p + 23), s->version, 8); /* Firmware Revision */
- padstr((char *) (p + 27), "QEMU MICRODRIVE", 40);/* Model number */
+ padstr((char *) (p + 27), s->drive_model_str, 40);/* Model number */
#if MAX_MULT_SECTORS > 1
put_le16(p + 47, 0x8000 | MAX_MULT_SECTORS);
#else
@@ -604,7 +618,8 @@ void ide_dma_cb(void *opaque, int ret)
break;
case IDE_DMA_TRIM:
s->bus->dma->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
- ide_issue_trim, ide_dma_cb, s, true);
+ ide_issue_trim, ide_dma_cb, s,
+ DMA_DIRECTION_TO_DEVICE);
break;
}
return;
@@ -1834,7 +1849,8 @@ static const BlockDevOps ide_cd_block_ops = {
};
int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
- const char *version, const char *serial)
+ const char *version, const char *serial, const char *model,
+ uint64_t wwn)
{
int cylinders, heads, secs;
uint64_t nb_sectors;
@@ -1860,6 +1876,7 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
s->heads = heads;
s->sectors = secs;
s->nb_sectors = nb_sectors;
+ s->wwn = wwn;
/* The SMART values should be preserved across power cycles
but they aren't. */
s->smart_enabled = 1;
@@ -1880,11 +1897,27 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
}
}
if (serial) {
- strncpy(s->drive_serial_str, serial, sizeof(s->drive_serial_str));
+ pstrcpy(s->drive_serial_str, sizeof(s->drive_serial_str), serial);
} else {
snprintf(s->drive_serial_str, sizeof(s->drive_serial_str),
"QM%05d", s->drive_serial);
}
+ if (model) {
+ pstrcpy(s->drive_model_str, sizeof(s->drive_model_str), model);
+ } else {
+ switch (kind) {
+ case IDE_CD:
+ strcpy(s->drive_model_str, "QEMU DVD-ROM");
+ break;
+ case IDE_CFATA:
+ strcpy(s->drive_model_str, "QEMU MICRODRIVE");
+ break;
+ default:
+ strcpy(s->drive_model_str, "QEMU HARDDISK");
+ break;
+ }
+ }
+
if (version) {
pstrcpy(s->version, sizeof(s->version), version);
} else {
@@ -1977,7 +2010,8 @@ void ide_init2_with_non_qdev_drives(IDEBus *bus, DriveInfo *hd0,
if (dinfo) {
if (ide_init_drive(&bus->ifs[i], dinfo->bdrv,
dinfo->media_cd ? IDE_CD : IDE_HD, NULL,
- *dinfo->serial ? dinfo->serial : NULL) < 0) {
+ *dinfo->serial ? dinfo->serial : NULL,
+ NULL, 0) < 0) {
error_report("Can't set up IDE drive %s", dinfo->id);
exit(1);
}
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index c808a0ddf8..100efd3076 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -348,6 +348,8 @@ struct IDEState {
uint8_t identify_data[512];
int drive_serial;
char drive_serial_str[21];
+ char drive_model_str[41];
+ uint64_t wwn;
/* ide regs */
uint8_t feature;
uint8_t error;
@@ -468,6 +470,8 @@ struct IDEDevice {
BlockConf conf;
char *version;
char *serial;
+ char *model;
+ uint64_t wwn;
};
#define BM_STATUS_DMAING 0x01
@@ -534,7 +538,8 @@ void ide_data_writel(void *opaque, uint32_t addr, uint32_t val);
uint32_t ide_data_readl(void *opaque, uint32_t addr);
int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
- const char *version, const char *serial);
+ const char *version, const char *serial, const char *model,
+ uint64_t wwn);
void ide_init2(IDEBus *bus, qemu_irq irq);
void ide_init2_with_non_qdev_drives(IDEBus *bus, DriveInfo *hd0,
DriveInfo *hd1, qemu_irq irq);
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index a4df24406a..7b38d9e683 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -149,7 +149,8 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
break;
case IDE_DMA_TRIM:
m->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
- ide_issue_trim, pmac_ide_transfer_cb, s, true);
+ ide_issue_trim, pmac_ide_transfer_cb, s,
+ DMA_DIRECTION_TO_DEVICE);
break;
}
return;
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index f6a48961c5..a46578d685 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -136,7 +136,8 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
}
}
- if (ide_init_drive(s, dev->conf.bs, kind, dev->version, serial) < 0) {
+ if (ide_init_drive(s, dev->conf.bs, kind,
+ dev->version, serial, dev->model, dev->wwn) < 0) {
return -1;
}
@@ -173,7 +174,9 @@ static int ide_drive_initfn(IDEDevice *dev)
#define DEFINE_IDE_DEV_PROPERTIES() \
DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \
DEFINE_PROP_STRING("ver", IDEDrive, dev.version), \
- DEFINE_PROP_STRING("serial", IDEDrive, dev.serial)
+ DEFINE_PROP_HEX64("wwn", IDEDrive, dev.wwn, 0), \
+ DEFINE_PROP_STRING("serial", IDEDrive, dev.serial),\
+ DEFINE_PROP_STRING("model", IDEDrive, dev.model)
static Property ide_hd_properties[] = {
DEFINE_IDE_DEV_PROPERTIES(),
diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c
index edc09b7307..f022a02447 100644
--- a/hw/lsi53c895a.c
+++ b/hw/lsi53c895a.c
@@ -15,7 +15,6 @@
#include "hw.h"
#include "pci.h"
#include "scsi.h"
-#include "block_int.h"
#include "dma.h"
//#define DEBUG_LSI
diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c
index bff9152df5..98dd06aeba 100644
--- a/hw/qdev-properties.c
+++ b/hw/qdev-properties.c
@@ -877,6 +877,52 @@ PropertyInfo qdev_prop_pci_devfn = {
.max = 0xFFFFFFFFULL,
};
+/* --- blocksize --- */
+
+static void set_blocksize(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ DeviceState *dev = DEVICE(obj);
+ Property *prop = opaque;
+ int16_t *ptr = qdev_get_prop_ptr(dev, prop);
+ Error *local_err = NULL;
+ int64_t value;
+
+ if (dev->state != DEV_STATE_CREATED) {
+ error_set(errp, QERR_PERMISSION_DENIED);
+ return;
+ }
+
+ visit_type_int(v, &value, name, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ if (value < prop->info->min || value > prop->info->max) {
+ error_set(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE,
+ dev->id?:"", name, value, prop->info->min,
+ prop->info->max);
+ return;
+ }
+
+ /* We rely on power-of-2 blocksizes for bitmasks */
+ if ((value & (value - 1)) != 0) {
+ error_set(errp, QERR_PROPERTY_VALUE_NOT_POWER_OF_2,
+ dev->id?:"", name, value);
+ return;
+ }
+
+ *ptr = value;
+}
+
+PropertyInfo qdev_prop_blocksize = {
+ .name = "blocksize",
+ .get = get_int16,
+ .set = set_blocksize,
+ .min = 512,
+ .max = 65024,
+};
+
/* --- public helpers --- */
static Property *qdev_prop_walk(Property *props, const char *name)
diff --git a/hw/qdev.h b/hw/qdev.h
index a8df42f6fa..1c109607b4 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -223,6 +223,7 @@ extern PropertyInfo qdev_prop_drive;
extern PropertyInfo qdev_prop_netdev;
extern PropertyInfo qdev_prop_vlan;
extern PropertyInfo qdev_prop_pci_devfn;
+extern PropertyInfo qdev_prop_blocksize;
#define DEFINE_PROP(_name, _state, _field, _prop, _type) { \
.name = (_name), \
@@ -284,6 +285,8 @@ extern PropertyInfo qdev_prop_pci_devfn;
#define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
LostTickPolicy)
+#define DEFINE_PROP_BLOCKSIZE(_n, _s, _f, _d) \
+ DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_blocksize, uint16_t)
#define DEFINE_PROP_END_OF_LIST() \
{}
diff --git a/linux-aio.c b/linux-aio.c
index d2fc2e7d02..15261ece3d 100644
--- a/linux-aio.c
+++ b/linux-aio.c
@@ -9,7 +9,6 @@
*/
#include "qemu-common.h"
#include "qemu-aio.h"
-#include "block_int.h"
#include "block/raw-posix-aio.h"
#include <sys/eventfd.h>
diff --git a/migration.c b/migration.c
index 8c119ba8ff..94f7839e8b 100644
--- a/migration.c
+++ b/migration.c
@@ -91,6 +91,7 @@ void process_incoming_migration(QEMUFile *f)
qemu_announce_self();
DPRINTF("successfully loaded vm state\n");
+ bdrv_clear_incoming_migration_all();
/* Make sure all file formats flush their mutable metadata */
bdrv_invalidate_cache_all();
diff --git a/qemu-aio.h b/qemu-aio.h
index 3bdd749f80..230c2f79a0 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -17,6 +17,27 @@
#include "qemu-common.h"
#include "qemu-char.h"
+typedef struct BlockDriverAIOCB BlockDriverAIOCB;
+typedef void BlockDriverCompletionFunc(void *opaque, int ret);
+
+typedef struct AIOPool {
+ void (*cancel)(BlockDriverAIOCB *acb);
+ int aiocb_size;
+ BlockDriverAIOCB *free_aiocb;
+} AIOPool;
+
+struct BlockDriverAIOCB {
+ AIOPool *pool;
+ BlockDriverState *bs;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+ BlockDriverAIOCB *next;
+};
+
+void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void qemu_aio_release(void *p);
+
/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
typedef int (AioFlushHandler)(void *opaque);
diff --git a/qemu-img.c b/qemu-img.c
index 0e48b35296..6a61ca8d06 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -428,6 +428,13 @@ static int img_check(int argc, char **argv)
}
}
+ if (result.bfi.total_clusters != 0 && result.bfi.allocated_clusters != 0) {
+ printf("%" PRId64 "/%" PRId64 "= %0.2f%% allocated, %0.2f%% fragmented\n",
+ result.bfi.allocated_clusters, result.bfi.total_clusters,
+ result.bfi.allocated_clusters * 100.0 / result.bfi.total_clusters,
+ result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters);
+ }
+
bdrv_delete(bs);
if (ret < 0 || result.check_errors) {
@@ -716,7 +723,7 @@ static int img_convert(int argc, char **argv)
ret = -1;
goto out;
}
-
+
qemu_progress_init(progress, 2.0);
qemu_progress_print(0, 100);
@@ -1125,6 +1132,9 @@ static int img_info(int argc, char **argv)
if (bdi.cluster_size != 0) {
printf("cluster_size: %d\n", bdi.cluster_size);
}
+ if (bdi.is_dirty) {
+ printf("cleanly shut down: no\n");
+ }
}
bdrv_get_backing_filename(bs, backing_filename, sizeof(backing_filename));
if (backing_filename[0] != '\0') {
diff --git a/qemu-io.c b/qemu-io.c
index 31895305f1..e6fcd7719e 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -17,6 +17,7 @@
#include "qemu-common.h"
#include "block_int.h"
#include "cmd.h"
+#include "trace/control.h"
#define VERSION "0.0.1"
@@ -1783,6 +1784,7 @@ static void usage(const char *name)
" -g, --growable allow file to grow (only applies to protocols)\n"
" -m, --misalign misalign allocations for O_DIRECT\n"
" -k, --native-aio use kernel AIO implementation (on Linux only)\n"
+" -T, --trace FILE enable trace events listed in the given file\n"
" -h, --help display this help and exit\n"
" -V, --version output version information and exit\n"
"\n",
@@ -1794,7 +1796,7 @@ int main(int argc, char **argv)
{
int readonly = 0;
int growable = 0;
- const char *sopt = "hVc:rsnmgk";
+ const char *sopt = "hVc:rsnmgkT:";
const struct option lopt[] = {
{ "help", 0, NULL, 'h' },
{ "version", 0, NULL, 'V' },
@@ -1806,6 +1808,7 @@ int main(int argc, char **argv)
{ "misalign", 0, NULL, 'm' },
{ "growable", 0, NULL, 'g' },
{ "native-aio", 0, NULL, 'k' },
+ { "trace", 1, NULL, 'T' },
{ NULL, 0, NULL, 0 }
};
int c;
@@ -1837,6 +1840,11 @@ int main(int argc, char **argv)
case 'k':
flags |= BDRV_O_NATIVE_AIO;
break;
+ case 'T':
+ if (!trace_backend_init(optarg, NULL)) {
+ exit(1); /* error message will have been printed */
+ }
+ break;
case 'V':
printf("%s version %s\n", progname, VERSION);
exit(0);
diff --git a/qerror.c b/qerror.c
index 41c729a01f..96fbe71ae5 100644
--- a/qerror.c
+++ b/qerror.c
@@ -241,9 +241,14 @@ static const QErrorStringTable qerror_table[] = {
.desc = "Property '%(device).%(property)' can't find value '%(value)'",
},
{
+ .error_fmt = QERR_PROPERTY_VALUE_NOT_POWER_OF_2,
+ .desc = "Property '%(device).%(property)' doesn't take "
+ "value '%(value)', it's not a power of 2",
+ },
+ {
.error_fmt = QERR_PROPERTY_VALUE_OUT_OF_RANGE,
.desc = "Property '%(device).%(property)' doesn't take "
- "value %(value) (minimum: %(min), maximum: %(max)'",
+ "value %(value) (minimum: %(min), maximum: %(max))",
},
{
.error_fmt = QERR_QGA_COMMAND_FAILED,
diff --git a/qerror.h b/qerror.h
index e16f9c27e1..5c23c1ff20 100644
--- a/qerror.h
+++ b/qerror.h
@@ -202,6 +202,10 @@ QError *qobject_to_qerror(const QObject *obj);
#define QERR_PROPERTY_VALUE_NOT_FOUND \
"{ 'class': 'PropertyValueNotFound', 'data': { 'device': %s, 'property': %s, 'value': %s } }"
+#define QERR_PROPERTY_VALUE_NOT_POWER_OF_2 \
+ "{ 'class': 'PropertyValueNotPowerOf2', 'data': { " \
+ "'device': %s, 'property': %s, 'value': %"PRId64" } }"
+
#define QERR_PROPERTY_VALUE_OUT_OF_RANGE \
"{ 'class': 'PropertyValueOutOfRange', 'data': { 'device': %s, 'property': %s, 'value': %"PRId64", 'min': %"PRId64", 'max': %"PRId64" } }"
diff --git a/scripts/tracetool b/scripts/tracetool
index 47389b62ea..7b1c142b67 100755
--- a/scripts/tracetool
+++ b/scripts/tracetool
@@ -81,6 +81,10 @@ get_args()
args=${1#*\(}
args=${args%%\)*}
echo "$args"
+
+ if (echo "$args" | grep "[ *]next\($\|[, ]\)" > /dev/null 2>&1); then
+ echo -e "\n#error 'next' is a bad argument name (clash with systemtap keyword)\n "
+ fi
}
# Get the argument name list of a trace event
diff --git a/tests/qemu-iotests/009 b/tests/qemu-iotests/009
index f7262b57bf..25368c819b 100755
--- a/tests/qemu-iotests/009
+++ b/tests/qemu-iotests/009
@@ -53,10 +53,10 @@ _make_test_img $size
echo
echo "creating pattern"
$QEMU_IO \
- -c "write 2048k 4k -P 65" \
+ -c "write -P 65 2048k 4k" \
-c "write 4k 4k" \
-c "write 9M 4k" \
- -c "read 2044k 8k -P 65 -s 4k -l 4k" \
+ -c "read -P 65 -s 4k -l 4k 2044k 8k" \
$TEST_IMG | _filter_qemu_io
echo
diff --git a/tests/qemu-iotests/010 b/tests/qemu-iotests/010
index e3205aa206..7b5792934a 100755
--- a/tests/qemu-iotests/010
+++ b/tests/qemu-iotests/010
@@ -53,11 +53,11 @@ _make_test_img $size
echo
echo "creating pattern"
$QEMU_IO \
- -c "write 2048k 4k -P 165" \
+ -c "write -P 165 2048k 4k" \
-c "write 64k 4k" \
-c "write 9M 4k" \
- -c "write 2044k 4k -P 165" \
- -c "write 8M 4k -P 99" \
+ -c "write -P 165 2044k 4k" \
+ -c "write -P 99 8M 4k" \
-c "read -P 165 2044k 8k" \
$TEST_IMG | _filter_qemu_io
diff --git a/tests/qemu-iotests/011 b/tests/qemu-iotests/011
index 59df1ae48e..b03df6887d 100755
--- a/tests/qemu-iotests/011
+++ b/tests/qemu-iotests/011
@@ -60,7 +60,7 @@ for i in `seq 1 10`; do
# Note that we filter away the actual offset. That's because qemu
# may re-order the two aio requests. We only want to make sure the
# filesystem isn't corrupted afterwards anyway.
- $QEMU_IO $TEST_IMG -c "aio_write $off1 1M" -c "aio_write $off2 1M" | \
+ $QEMU_IO -c "aio_write $off1 1M" -c "aio_write $off2 1M" $TEST_IMG | \
_filter_qemu_io | \
sed -e 's/bytes at offset [0-9]*/bytes at offset XXX/g'
done
diff --git a/tests/qemu-iotests/031 b/tests/qemu-iotests/031
new file mode 100755
index 0000000000..6365f287e0
--- /dev/null
+++ b/tests/qemu-iotests/031
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+# Test that all qcow2 header extensions survive a header rewrite
+#
+# Copyright (C) 2011 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.pattern
+
+# This tests qcow2-specific low-level functionality
+_supported_fmt qcow2
+_supported_proto generic
+_supported_os Linux
+
+CLUSTER_SIZE=65536
+echo
+echo === Create image with unknown header extension ===
+echo
+_make_test_img 64M
+./qcow2.py $TEST_IMG add-header-ext 0x12345678 "This is a test header extension"
+./qcow2.py $TEST_IMG dump-header
+_check_test_img
+
+echo
+echo === Rewrite header with no backing file ===
+echo
+$QEMU_IMG rebase -u -b "" $TEST_IMG
+./qcow2.py $TEST_IMG dump-header
+_check_test_img
+
+echo
+echo === Add a backing file and format ===
+echo
+$QEMU_IMG rebase -u -b "/some/backing/file/path" -F host_device $TEST_IMG
+./qcow2.py $TEST_IMG dump-header
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/031.out b/tests/qemu-iotests/031.out
new file mode 100644
index 0000000000..0f1bf685f3
--- /dev/null
+++ b/tests/qemu-iotests/031.out
@@ -0,0 +1,76 @@
+QA output created by 031
+
+=== Create image with unknown header extension ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 cluster_size=65536
+magic 0x514649fb
+version 2
+backing_file_offset 0x0
+backing_file_size 0x0
+cluster_bits 16
+size 67108864
+crypt_method 0
+l1_size 1
+l1_table_offset 0x30000
+refcount_table_offset 0x10000
+refcount_table_clusters 1
+nb_snapshots 0
+snapshot_offset 0x0
+
+Header extension:
+magic 0x12345678
+length 31
+data 'This is a test header extension'
+
+No errors were found on the image.
+
+=== Rewrite header with no backing file ===
+
+magic 0x514649fb
+version 2
+backing_file_offset 0x0
+backing_file_size 0x0
+cluster_bits 16
+size 67108864
+crypt_method 0
+l1_size 1
+l1_table_offset 0x30000
+refcount_table_offset 0x10000
+refcount_table_clusters 1
+nb_snapshots 0
+snapshot_offset 0x0
+
+Header extension:
+magic 0x12345678
+length 31
+data 'This is a test header extension'
+
+No errors were found on the image.
+
+=== Add a backing file and format ===
+
+magic 0x514649fb
+version 2
+backing_file_offset 0x90
+backing_file_size 0x17
+cluster_bits 16
+size 67108864
+crypt_method 0
+l1_size 1
+l1_table_offset 0x30000
+refcount_table_offset 0x10000
+refcount_table_clusters 1
+nb_snapshots 0
+snapshot_offset 0x0
+
+Header extension:
+magic 0xe2792aca
+length 11
+data 'host_device'
+
+Header extension:
+magic 0x12345678
+length 31
+data 'This is a test header extension'
+
+*** done
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 26811ca660..4cb8dae8c6 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -57,16 +57,21 @@ _make_test_img()
{
# extra qemu-img options can be added by tests
# at least one argument (the image size) needs to be added
- local extra_img_options=$*
+ local extra_img_options=""
local cluster_size_filter="s# cluster_size=[0-9]\\+##g"
+ local image_size=$*
+ if [ "$1" = "-b" ]; then
+ extra_img_options="$1 $2"
+ image_size=$3
+ fi
if [ \( "$IMGFMT" = "qcow2" -o "$IMGFMT" = "qed" \) -a -n "$CLUSTER_SIZE" ]; then
extra_img_options="-o cluster_size=$CLUSTER_SIZE $extra_img_options"
cluster_size_filter=""
fi
# XXX(hch): have global image options?
- $QEMU_IMG create -f $IMGFMT $TEST_IMG $extra_img_options | \
+ $QEMU_IMG create -f $IMGFMT $extra_img_options $TEST_IMG $image_size | \
sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" | \
sed -e "s#$TEST_DIR#TEST_DIR#g" | \
sed -e "s#$IMGFMT#IMGFMT#g" | \
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index b549f10f17..1742ede180 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -37,3 +37,4 @@
028 rw backing auto
029 rw auto quick
030 rw auto
+031 rw auto quick
diff --git a/tests/qemu-iotests/qcow2.py b/tests/qemu-iotests/qcow2.py
new file mode 100755
index 0000000000..bfb47e88fc
--- /dev/null
+++ b/tests/qemu-iotests/qcow2.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python
+
+import sys
+import struct
+import string
+
+class QcowHeaderExtension:
+
+ def __init__(self, magic, length, data):
+ self.magic = magic
+ self.length = length
+ self.data = data
+
+ @classmethod
+ def create(cls, magic, data):
+ return QcowHeaderExtension(magic, len(data), data)
+
+class QcowHeader:
+
+ uint32_t = 'I'
+ uint64_t = 'Q'
+
+ fields = [
+ # Version 2 header fields
+ [ uint32_t, '%#x', 'magic' ],
+ [ uint32_t, '%d', 'version' ],
+ [ uint64_t, '%#x', 'backing_file_offset' ],
+ [ uint32_t, '%#x', 'backing_file_size' ],
+ [ uint32_t, '%d', 'cluster_bits' ],
+ [ uint64_t, '%d', 'size' ],
+ [ uint32_t, '%d', 'crypt_method' ],
+ [ uint32_t, '%d', 'l1_size' ],
+ [ uint64_t, '%#x', 'l1_table_offset' ],
+ [ uint64_t, '%#x', 'refcount_table_offset' ],
+ [ uint32_t, '%d', 'refcount_table_clusters' ],
+ [ uint32_t, '%d', 'nb_snapshots' ],
+ [ uint64_t, '%#x', 'snapshot_offset' ],
+ ];
+
+ fmt = '>' + ''.join(field[0] for field in fields)
+
+ def __init__(self, fd):
+
+ buf_size = struct.calcsize(QcowHeader.fmt)
+
+ fd.seek(0)
+ buf = fd.read(buf_size)
+
+ header = struct.unpack(QcowHeader.fmt, buf)
+ self.__dict__ = dict((field[2], header[i])
+ for i, field in enumerate(QcowHeader.fields))
+
+ self.cluster_size = 1 << self.cluster_bits
+
+ fd.seek(self.get_header_length())
+ self.load_extensions(fd)
+
+ if self.backing_file_offset:
+ fd.seek(self.backing_file_offset)
+ self.backing_file = fd.read(self.backing_file_size)
+ else:
+ self.backing_file = None
+
+ def get_header_length(self):
+ if self.version == 2:
+ return 72
+ else:
+ raise Exception("version != 2 not supported")
+
+ def load_extensions(self, fd):
+ self.extensions = []
+
+ if self.backing_file_offset != 0:
+ end = min(self.cluster_size, self.backing_file_offset)
+ else:
+ end = self.cluster_size
+
+ while fd.tell() < end:
+ (magic, length) = struct.unpack('>II', fd.read(8))
+ if magic == 0:
+ break
+ else:
+ padded = (length + 7) & ~7
+ data = fd.read(padded)
+ self.extensions.append(QcowHeaderExtension(magic, length, data))
+
+ def update_extensions(self, fd):
+
+ fd.seek(self.get_header_length())
+ extensions = self.extensions
+ extensions.append(QcowHeaderExtension(0, 0, ""))
+ for ex in extensions:
+ buf = struct.pack('>II', ex.magic, ex.length)
+ fd.write(buf)
+ fd.write(ex.data)
+
+ if self.backing_file != None:
+ self.backing_file_offset = fd.tell()
+ fd.write(self.backing_file)
+
+ if fd.tell() > self.cluster_size:
+ raise Exception("I think I just broke the image...")
+
+
+ def update(self, fd):
+ header_bytes = self.get_header_length()
+
+ self.update_extensions(fd)
+
+ fd.seek(0)
+ header = tuple(self.__dict__[f] for t, p, f in QcowHeader.fields)
+ buf = struct.pack(QcowHeader.fmt, *header)
+ buf = buf[0:header_bytes-1]
+ fd.write(buf)
+
+ def dump(self):
+ for f in QcowHeader.fields:
+ print "%-25s" % f[2], f[1] % self.__dict__[f[2]]
+ print ""
+
+ def dump_extensions(self):
+ for ex in self.extensions:
+
+ data = ex.data[:ex.length]
+ if all(c in string.printable for c in data):
+ data = "'%s'" % data
+ else:
+ data = "<binary>"
+
+ print "Header extension:"
+ print "%-25s %#x" % ("magic", ex.magic)
+ print "%-25s %d" % ("length", ex.length)
+ print "%-25s %s" % ("data", data)
+ print ""
+
+
+def cmd_dump_header(fd):
+ h = QcowHeader(fd)
+ h.dump()
+ h.dump_extensions()
+
+def cmd_add_header_ext(fd, magic, data):
+ try:
+ magic = int(magic, 0)
+ except:
+ print "'%s' is not a valid magic number" % magic
+ sys.exit(1)
+
+ h = QcowHeader(fd)
+ h.extensions.append(QcowHeaderExtension.create(magic, data))
+ h.update(fd)
+
+def cmd_del_header_ext(fd, magic):
+ try:
+ magic = int(magic, 0)
+ except:
+ print "'%s' is not a valid magic number" % magic
+ sys.exit(1)
+
+ h = QcowHeader(fd)
+ found = False
+
+ for ex in h.extensions:
+ if ex.magic == magic:
+ found = True
+ h.extensions.remove(ex)
+
+ if not found:
+ print "No such header extension"
+ return
+
+ h.update(fd)
+
+cmds = [
+ [ 'dump-header', cmd_dump_header, 0, 'Dump image header and header extensions' ],
+ [ 'add-header-ext', cmd_add_header_ext, 2, 'Add a header extension' ],
+ [ 'del-header-ext', cmd_del_header_ext, 1, 'Delete a header extension' ],
+]
+
+def main(filename, cmd, args):
+ fd = open(filename, "r+b")
+ try:
+ for name, handler, num_args, desc in cmds:
+ if name != cmd:
+ continue
+ elif len(args) != num_args:
+ usage()
+ return
+ else:
+ handler(fd, *args)
+ return
+ print "Unknown command '%s'" % cmd
+ finally:
+ fd.close()
+
+def usage():
+ print "Usage: %s <file> <cmd> [<arg>, ...]" % sys.argv[0]
+ print ""
+ print "Supported commands:"
+ for name, handler, num_args, desc in cmds:
+ print " %-20s - %s" % (name, desc)
+
+if len(sys.argv) < 3:
+ usage()
+ sys.exit(1)
+
+main(sys.argv[1], sys.argv[2], sys.argv[3:])
diff --git a/trace-events b/trace-events
index db2cd39950..a5f276d020 100644
--- a/trace-events
+++ b/trace-events
@@ -562,7 +562,7 @@ qemu_coroutine_terminate(void *co) "self %p"
# qemu-coroutine-lock.c
qemu_co_queue_next_bh(void) ""
-qemu_co_queue_next(void *next) "next %p"
+qemu_co_queue_next(void *nxt) "next %p"
qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"