diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2012-04-10 08:16:12 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2012-04-10 08:16:12 -0500 |
commit | bb5d8dd757eaa8f9a048c5205c69bed20ea373d1 (patch) | |
tree | f4c7f7218e62f529c6406b0ad1635bef32867854 | |
parent | 72fe3aaed94936739abfa158fa28f147b75ae9ff (diff) | |
parent | 50d30c267563bf492fd403dd23abc7888f3e220c (diff) |
Merge remote-tracking branch 'kwolf/for-anthony' into staging
* kwolf/for-anthony: (46 commits)
qed: remove incoming live migration blocker
qed: honor BDRV_O_INCOMING for incoming live migration
migration: clear BDRV_O_INCOMING flags on end of incoming live migration
qed: add bdrv_invalidate_cache to be called after incoming live migration
blockdev: open images with BDRV_O_INCOMING on incoming live migration
block: add a function to clear incoming live migration flags
block: Add new BDRV_O_INCOMING flag to notice incoming live migration
block stream: close unused files and update ->backing_hd
qemu-iotests: Fix call syntax for qemu-io
qemu-iotests: Fix call syntax for qemu-img
qemu-iotests: Test unknown qcow2 header extensions
qemu-iotests: qcow2.py
sheepdog: fix send req helpers
sheepdog: implement SD_OP_FLUSH_VDI operation
block: bdrv_append() fixes
qed: track dirty flag status
qemu-img: add dirty flag status
qed: image fragmentation statistics
qemu-img: add image fragmentation statistics
block: document job API
...
44 files changed, 1081 insertions, 494 deletions
@@ -813,6 +813,9 @@ unlink_and_fail: void bdrv_close(BlockDriverState *bs) { if (bs->drv) { + if (bs->job) { + block_job_cancel_sync(bs->job); + } if (bs == bs_snapshots) { bs_snapshots = NULL; } @@ -889,14 +892,16 @@ void bdrv_make_anon(BlockDriverState *bs) * This will modify the BlockDriverState fields, and swap contents * between bs_new and bs_top. Both bs_new and bs_top are modified. * + * bs_new is required to be anonymous. + * * This function does not create any image files. */ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) { BlockDriverState tmp; - /* the new bs must not be in bdrv_states */ - bdrv_make_anon(bs_new); + /* bs_new must be anonymous */ + assert(bs_new->device_name[0] == '\0'); tmp = *bs_new; @@ -941,11 +946,18 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) * swapping bs_new and bs_top contents. */ tmp.backing_hd = bs_new; pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename); + bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format)); /* swap contents of the fixed new bs and the current top */ *bs_new = *bs_top; *bs_top = tmp; + /* device_name[] was carried over from the old bs_top. bs_new + * shouldn't be in bdrv_states, so we need to make device_name[] + * reflect the anonymity of bs_new + */ + bs_new->device_name[0] = '\0'; + /* clear the copied fields in the new backing file */ bdrv_detach_dev(bs_new, bs_new->dev); @@ -966,6 +978,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) void bdrv_delete(BlockDriverState *bs) { assert(!bs->dev); + assert(!bs->job); + assert(!bs->in_use); /* remove from list, if necessary */ bdrv_make_anon(bs); @@ -1463,6 +1477,17 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, qemu_iovec_init_external(&qiov, &iov, 1); + /** + * In sync call context, when the vcpu is blocked, this throttling timer + * will not fire; so the I/O throttling function has to be disabled here + * if it has been enabled. + */ + if (bs->io_limits_enabled) { + fprintf(stderr, "Disabling I/O throttling on '%s' due " + "to synchronous I/O.\n", bdrv_get_device_name(bs)); + bdrv_io_limits_disable(bs); + } + if (qemu_in_coroutine()) { /* Fast-path if already in coroutine context */ bdrv_rw_co_entry(&rwco); @@ -1969,10 +1994,19 @@ static int guess_disk_lchs(BlockDriverState *bs, struct partition *p; uint32_t nr_sects; uint64_t nb_sectors; + bool enabled; bdrv_get_geometry(bs, &nb_sectors); + /** + * The function will be invoked during startup not only in sync I/O mode, + * but also in async I/O mode. So the I/O throttling function has to + * be disabled temporarily here, not permanently. + */ + enabled = bs->io_limits_enabled; + bs->io_limits_enabled = false; ret = bdrv_read(bs, 0, buf, 1); + bs->io_limits_enabled = enabled; if (ret < 0) return -1; /* test msdos magic */ @@ -2331,9 +2365,7 @@ void bdrv_flush_all(void) BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, list) { - if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) { - bdrv_flush(bs); - } + bdrv_flush(bs); } } @@ -3520,7 +3552,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) { int ret; - if (!bs->drv) { + if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { return 0; } @@ -3538,7 +3570,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) } if (bs->drv->bdrv_co_flush_to_disk) { - return bs->drv->bdrv_co_flush_to_disk(bs); + ret = bs->drv->bdrv_co_flush_to_disk(bs); } else if (bs->drv->bdrv_aio_flush) { BlockDriverAIOCB *acb; CoroutineIOCompletion co = { @@ -3547,10 +3579,10 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); if (acb == NULL) { - return -EIO; + ret = -EIO; } else { qemu_coroutine_yield(); - return co.ret; + ret = co.ret; } } else { /* @@ -3564,8 +3596,16 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) * * Let's hope the user knows what he's doing. */ - return 0; + ret = 0; + } + if (ret < 0) { + return ret; } + + /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH + * in the case of cache=unsafe, so there are no useless flushes. + */ + return bdrv_co_flush(bs->file); } void bdrv_invalidate_cache(BlockDriverState *bs) @@ -3584,6 +3624,15 @@ void bdrv_invalidate_cache_all(void) } } +void bdrv_clear_incoming_migration_all(void) +{ + BlockDriverState *bs; + + QTAILQ_FOREACH(bs, &bdrv_states, list) { + bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING); + } +} + int bdrv_flush(BlockDriverState *bs) { Coroutine *co; @@ -4054,10 +4103,16 @@ void block_job_complete(BlockJob *job, int ret) int block_job_set_speed(BlockJob *job, int64_t value) { + int rc; + if (!job->job_type->set_speed) { return -ENOTSUP; } - return job->job_type->set_speed(job, value); + rc = job->job_type->set_speed(job, value); + if (rc == 0) { + job->speed = value; + } + return rc; } void block_job_cancel(BlockJob *job) @@ -4069,3 +4124,14 @@ bool block_job_is_cancelled(BlockJob *job) { return job->cancelled; } + +void block_job_cancel_sync(BlockJob *job) +{ + BlockDriverState *bs = job->bs; + + assert(bs->job == job); + block_job_cancel(job); + while (bs->job != NULL && bs->job->busy) { + qemu_aio_wait(); + } +} @@ -15,8 +15,15 @@ typedef struct BlockDriverInfo { int cluster_size; /* offset at which the VM state can be saved (0 if not possible) */ int64_t vm_state_offset; + bool is_dirty; } BlockDriverInfo; +typedef struct BlockFragInfo { + uint64_t allocated_clusters; + uint64_t total_clusters; + uint64_t fragmented_clusters; +} BlockFragInfo; + typedef struct QEMUSnapshotInfo { char id_str[128]; /* unique snapshot id */ /* the following fields are informative. They are not needed for @@ -71,6 +78,7 @@ typedef struct BlockDevOps { #define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ #define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ #define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ +#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */ #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH) @@ -175,13 +183,12 @@ typedef struct BdrvCheckResult { int corruptions; int leaks; int check_errors; + BlockFragInfo bfi; } BdrvCheckResult; int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res); /* async block I/O */ -typedef struct BlockDriverAIOCB BlockDriverAIOCB; -typedef void BlockDriverCompletionFunc(void *opaque, int ret); typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, int sector_num); BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, @@ -222,6 +229,8 @@ BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, void bdrv_invalidate_cache(BlockDriverState *bs); void bdrv_invalidate_cache_all(void); +void bdrv_clear_incoming_migration_all(void); + /* Ensure contents are flushed to disk. */ int bdrv_flush(BlockDriverState *bs); int coroutine_fn bdrv_co_flush(BlockDriverState *bs); @@ -437,10 +446,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) #define DEFINE_BLOCK_PROPERTIES(_state, _conf) \ DEFINE_PROP_DRIVE("drive", _state, _conf.bs), \ - DEFINE_PROP_UINT16("logical_block_size", _state, \ - _conf.logical_block_size, 512), \ - DEFINE_PROP_UINT16("physical_block_size", _state, \ - _conf.physical_block_size, 512), \ + DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \ + _conf.logical_block_size, 512), \ + DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ + _conf.physical_block_size, 512), \ DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0), \ DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \ DEFINE_PROP_INT32("bootindex", _state, _conf.bootindex, -1), \ diff --git a/block/blkdebug.c b/block/blkdebug.c index a251802ad4..e56e37da51 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -397,12 +397,6 @@ static void blkdebug_close(BlockDriverState *bs) } } -static BlockDriverAIOCB *blkdebug_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque) -{ - return bdrv_aio_flush(bs->file, cb, opaque); -} - static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, BlkdebugVars *old_vars) { @@ -452,7 +446,6 @@ static BlockDriver bdrv_blkdebug = { .bdrv_aio_readv = blkdebug_aio_readv, .bdrv_aio_writev = blkdebug_aio_writev, - .bdrv_aio_flush = blkdebug_aio_flush, .bdrv_debug_event = blkdebug_debug_event, }; diff --git a/block/cow.c b/block/cow.c index bb5927c6aa..8d3c9f873c 100644 --- a/block/cow.c +++ b/block/cow.c @@ -318,11 +318,6 @@ exit: return ret; } -static coroutine_fn int cow_co_flush(BlockDriverState *bs) -{ - return bdrv_co_flush(bs->file); -} - static QEMUOptionParameter cow_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -348,7 +343,6 @@ static BlockDriver bdrv_cow = { .bdrv_read = cow_co_read, .bdrv_write = cow_co_write, - .bdrv_co_flush_to_disk = cow_co_flush, .bdrv_co_is_allocated = cow_co_is_allocated, .create_options = cow_create_options, diff --git a/block/qcow.c b/block/qcow.c index b1cfe1f696..35dff497ae 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -835,11 +835,6 @@ fail: return ret; } -static coroutine_fn int qcow_co_flush(BlockDriverState *bs) -{ - return bdrv_co_flush(bs->file); -} - static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { BDRVQcowState *s = bs->opaque; @@ -877,7 +872,6 @@ static BlockDriver bdrv_qcow = { .bdrv_co_readv = qcow_co_readv, .bdrv_co_writev = qcow_co_writev, - .bdrv_co_flush_to_disk = qcow_co_flush, .bdrv_co_is_allocated = qcow_co_is_allocated, .bdrv_set_key = qcow_set_key, diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index e0fb90792f..cbd224dc46 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -466,7 +466,6 @@ out: */ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, uint64_t **new_l2_table, - uint64_t *new_l2_offset, int *new_l2_index) { BDRVQcowState *s = bs->opaque; @@ -514,7 +513,6 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); *new_l2_table = l2_table; - *new_l2_offset = l2_offset; *new_l2_index = l2_index; return 0; @@ -539,11 +537,11 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, { BDRVQcowState *s = bs->opaque; int l2_index, ret; - uint64_t l2_offset, *l2_table; + uint64_t *l2_table; int64_t cluster_offset; int nb_csectors; - ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); + ret = get_cluster_table(bs, offset, &l2_table, &l2_index); if (ret < 0) { return 0; } @@ -588,7 +586,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) { BDRVQcowState *s = bs->opaque; int i, j = 0, l2_index, ret; - uint64_t *old_cluster, start_sect, l2_offset, *l2_table; + uint64_t *old_cluster, start_sect, *l2_table; uint64_t cluster_offset = m->alloc_offset; bool cow = false; @@ -633,7 +631,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) } qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); - ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index); + ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); if (ret < 0) { goto err; } @@ -817,7 +815,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, { BDRVQcowState *s = bs->opaque; int l2_index, ret, sectors; - uint64_t l2_offset, *l2_table; + uint64_t *l2_table; unsigned int nb_clusters, keep_clusters; uint64_t cluster_offset; @@ -825,7 +823,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, n_start, n_end); /* Find L2 entry for the first involved cluster */ - ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); + ret = get_cluster_table(bs, offset, &l2_table, &l2_index); if (ret < 0) { return ret; } @@ -1000,12 +998,12 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, unsigned int nb_clusters) { BDRVQcowState *s = bs->opaque; - uint64_t l2_offset, *l2_table; + uint64_t *l2_table; int l2_index; int ret; int i; - ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); + ret = get_cluster_table(bs, offset, &l2_table, &l2_index); if (ret < 0) { return ret; } diff --git a/block/qcow2.c b/block/qcow2.c index 7aece65406..70d3141dd1 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1253,11 +1253,6 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) return 0; } -static coroutine_fn int qcow2_co_flush_to_disk(BlockDriverState *bs) -{ - return bdrv_co_flush(bs->file); -} - static int64_t qcow2_vm_state_offset(BDRVQcowState *s) { return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits); @@ -1377,7 +1372,6 @@ static BlockDriver bdrv_qcow2 = { .bdrv_co_readv = qcow2_co_readv, .bdrv_co_writev = qcow2_co_writev, .bdrv_co_flush_to_os = qcow2_co_flush_to_os, - .bdrv_co_flush_to_disk = qcow2_co_flush_to_disk, .bdrv_co_discard = qcow2_co_discard, .bdrv_truncate = qcow2_truncate, diff --git a/block/qed-check.c b/block/qed-check.c index e4a49ce72c..94327ff5b3 100644 --- a/block/qed-check.c +++ b/block/qed-check.c @@ -68,6 +68,7 @@ static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table) { BDRVQEDState *s = check->s; unsigned int i, num_invalid = 0; + uint64_t last_offset = 0; for (i = 0; i < s->table_nelems; i++) { uint64_t offset = table->offsets[i]; @@ -76,6 +77,11 @@ static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table) qed_offset_is_zero_cluster(offset)) { continue; } + check->result->bfi.allocated_clusters++; + if (last_offset && (last_offset + s->header.cluster_size != offset)) { + check->result->bfi.fragmented_clusters++; + } + last_offset = offset; /* Detect invalid cluster offset */ if (!qed_check_cluster_offset(s, offset)) { @@ -200,6 +206,9 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix) check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) * sizeof(check.used_clusters[0])); + check.result->bfi.total_clusters = + (s->header.image_size + s->header.cluster_size - 1) / + s->header.cluster_size; ret = qed_check_l1_table(&check, s->l1_table); if (ret == 0) { /* Only check for leaks if entire image was scanned successfully */ diff --git a/block/qed.c b/block/qed.c index a041d31e66..366cde7ad8 100644 --- a/block/qed.c +++ b/block/qed.c @@ -450,7 +450,7 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags) * feature is no longer valid. */ if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 && - !bdrv_is_read_only(bs->file)) { + !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) { s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK; ret = qed_write_header_sync(s); @@ -477,7 +477,8 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags) * potentially inconsistent images to be opened read-only. This can * aid data recovery from an otherwise inconsistent image. */ - if (!bdrv_is_read_only(bs->file)) { + if (!bdrv_is_read_only(bs->file) && + !(flags & BDRV_O_INCOMING)) { BdrvCheckResult result = {0}; ret = qed_check(s, &result, true); @@ -497,12 +498,6 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags) s->need_check_timer = qemu_new_timer_ns(vm_clock, qed_need_check_timer_cb, s); - error_set(&s->migration_blocker, - QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, - "qed", bs->device_name, "live migration"); - migrate_add_blocker(s->migration_blocker); - - out: if (ret) { qed_free_l2_cache(&s->l2_cache); @@ -515,9 +510,6 @@ static void bdrv_qed_close(BlockDriverState *bs) { BDRVQEDState *s = bs->opaque; - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); - qed_cancel_need_check_timer(s); qemu_free_timer(s->need_check_timer); @@ -1350,13 +1342,6 @@ static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, opaque, QED_AIOCB_WRITE); } -static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, - void *opaque) -{ - return bdrv_aio_flush(bs->file, cb, opaque); -} - typedef struct { Coroutine *co; int ret; @@ -1441,6 +1426,7 @@ static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) memset(bdi, 0, sizeof(*bdi)); bdi->cluster_size = s->header.cluster_size; + bdi->is_dirty = s->header.features & QED_F_NEED_CHECK; return 0; } @@ -1516,6 +1502,15 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs, return ret; } +static void bdrv_qed_invalidate_cache(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + bdrv_qed_close(bs); + memset(s, 0, sizeof(BDRVQEDState)); + bdrv_qed_open(bs, bs->open_flags); +} + static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result) { BDRVQEDState *s = bs->opaque; @@ -1562,12 +1557,12 @@ static BlockDriver bdrv_qed = { .bdrv_make_empty = bdrv_qed_make_empty, .bdrv_aio_readv = bdrv_qed_aio_readv, .bdrv_aio_writev = bdrv_qed_aio_writev, - .bdrv_aio_flush = bdrv_qed_aio_flush, .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes, .bdrv_truncate = bdrv_qed_truncate, .bdrv_getlength = bdrv_qed_getlength, .bdrv_get_info = bdrv_qed_get_info, .bdrv_change_backing_file = bdrv_qed_change_backing_file, + .bdrv_invalidate_cache = bdrv_qed_invalidate_cache, .bdrv_check = bdrv_qed_check, }; diff --git a/block/qed.h b/block/qed.h index 62624a1f34..c716772ad7 100644 --- a/block/qed.h +++ b/block/qed.h @@ -169,8 +169,6 @@ typedef struct { /* Periodic flush and clear need check flag */ QEMUTimer *need_check_timer; - - Error *migration_blocker; } BDRVQEDState; enum { diff --git a/block/raw.c b/block/raw.c index 1cdac0ccdc..7086e314a6 100644 --- a/block/raw.c +++ b/block/raw.c @@ -25,11 +25,6 @@ static void raw_close(BlockDriverState *bs) { } -static int coroutine_fn raw_co_flush(BlockDriverState *bs) -{ - return bdrv_co_flush(bs->file); -} - static int64_t raw_getlength(BlockDriverState *bs) { return bdrv_getlength(bs->file); @@ -113,7 +108,6 @@ static BlockDriver bdrv_raw = { .bdrv_co_readv = raw_co_readv, .bdrv_co_writev = raw_co_writev, - .bdrv_co_flush_to_disk = raw_co_flush, .bdrv_co_discard = raw_co_discard, .bdrv_probe = raw_probe, diff --git a/block/sheepdog.c b/block/sheepdog.c index 00276f6f46..3eaf625e98 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -32,9 +32,11 @@ #define SD_OP_RELEASE_VDI 0x13 #define SD_OP_GET_VDI_INFO 0x14 #define SD_OP_READ_VDIS 0x15 +#define SD_OP_FLUSH_VDI 0x16 #define SD_FLAG_CMD_WRITE 0x01 #define SD_FLAG_CMD_COW 0x02 +#define SD_FLAG_CMD_CACHE 0x04 #define SD_RES_SUCCESS 0x00 /* Success */ #define SD_RES_UNKNOWN 0x01 /* Unknown error */ @@ -293,10 +295,12 @@ typedef struct BDRVSheepdogState { char name[SD_MAX_VDI_LEN]; int is_snapshot; + uint8_t cache_enabled; char *addr; char *port; int fd; + int flush_fd; CoMutex lock; Coroutine *co_send; @@ -506,6 +510,7 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data, ret = qemu_send_full(sockfd, hdr, sizeof(*hdr), 0); if (ret < sizeof(*hdr)) { error_report("failed to send a req, %s", strerror(errno)); + return ret; } ret = qemu_send_full(sockfd, data, *wlen, 0); @@ -516,6 +521,24 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data, return ret; } +static int send_co_req(int sockfd, SheepdogReq *hdr, void *data, + unsigned int *wlen) +{ + int ret; + + ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); + if (ret < sizeof(*hdr)) { + error_report("failed to send a req, %s", strerror(errno)); + return ret; + } + + ret = qemu_co_send(sockfd, data, *wlen); + if (ret < *wlen) { + error_report("failed to send a req, %s", strerror(errno)); + } + + return ret; +} static int do_req(int sockfd, SheepdogReq *hdr, void *data, unsigned int *wlen, unsigned int *rlen) { @@ -550,6 +573,40 @@ out: return ret; } +static int do_co_req(int sockfd, SheepdogReq *hdr, void *data, + unsigned int *wlen, unsigned int *rlen) +{ + int ret; + + socket_set_block(sockfd); + ret = send_co_req(sockfd, hdr, data, wlen); + if (ret < 0) { + goto out; + } + + ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); + if (ret < sizeof(*hdr)) { + error_report("failed to get a rsp, %s", strerror(errno)); + goto out; + } + + if (*rlen > hdr->data_length) { + *rlen = hdr->data_length; + } + + if (*rlen) { + ret = qemu_co_recv(sockfd, data, *rlen); + if (ret < *rlen) { + error_report("failed to get the data, %s", strerror(errno)); + goto out; + } + } + ret = 0; +out: + socket_set_nonblock(sockfd); + return ret; +} + static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, int create, enum AIOCBState aiocb_type); @@ -900,6 +957,10 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, hdr.flags = SD_FLAG_CMD_WRITE | flags; } + if (s->cache_enabled) { + hdr.flags |= SD_FLAG_CMD_CACHE; + } + hdr.oid = oid; hdr.cow_oid = old_oid; hdr.copies = s->inode.nr_copies; @@ -942,7 +1003,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, static int read_write_object(int fd, char *buf, uint64_t oid, int copies, unsigned int datalen, uint64_t offset, - int write, int create) + int write, int create, uint8_t cache) { SheepdogObjReq hdr; SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; @@ -965,6 +1026,11 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, rlen = datalen; hdr.opcode = SD_OP_READ_OBJ; } + + if (cache) { + hdr.flags |= SD_FLAG_CMD_CACHE; + } + hdr.oid = oid; hdr.data_length = datalen; hdr.offset = offset; @@ -986,15 +1052,18 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, } static int read_object(int fd, char *buf, uint64_t oid, int copies, - unsigned int datalen, uint64_t offset) + unsigned int datalen, uint64_t offset, uint8_t cache) { - return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0); + return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0, + cache); } static int write_object(int fd, char *buf, uint64_t oid, int copies, - unsigned int datalen, uint64_t offset, int create) + unsigned int datalen, uint64_t offset, int create, + uint8_t cache) { - return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create); + return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create, + cache); } static int sd_open(BlockDriverState *bs, const char *filename, int flags) @@ -1026,6 +1095,15 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) goto out; } + if (flags & BDRV_O_CACHE_WB) { + s->cache_enabled = 1; + s->flush_fd = connect_to_sdog(s->addr, s->port); + if (s->flush_fd < 0) { + error_report("failed to connect"); + goto out; + } + } + if (snapid) { dprintf("%" PRIx32 " snapshot inode was open.\n", vid); s->is_snapshot = 1; @@ -1038,7 +1116,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) } buf = g_malloc(SD_INODE_SIZE); - ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0); + ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0, + s->cache_enabled); closesocket(fd); @@ -1272,6 +1351,9 @@ static void sd_close(BlockDriverState *bs) qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL); closesocket(s->fd); + if (s->cache_enabled) { + closesocket(s->flush_fd); + } g_free(s->addr); } @@ -1305,7 +1387,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); s->inode.vdi_size = offset; ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, 0); + s->inode.nr_copies, datalen, 0, 0, s->cache_enabled); close(fd); if (ret < 0) { @@ -1387,7 +1469,7 @@ static int sd_create_branch(BDRVSheepdogState *s) } ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, - SD_INODE_SIZE, 0); + SD_INODE_SIZE, 0, s->cache_enabled); closesocket(fd); @@ -1575,6 +1657,36 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, return acb->ret; } +static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) +{ + BDRVSheepdogState *s = bs->opaque; + SheepdogObjReq hdr = { 0 }; + SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; + SheepdogInode *inode = &s->inode; + int ret; + unsigned int wlen = 0, rlen = 0; + + if (!s->cache_enabled) { + return 0; + } + + hdr.opcode = SD_OP_FLUSH_VDI; + hdr.oid = vid_to_vdi_oid(inode->vdi_id); + + ret = do_co_req(s->flush_fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen); + if (ret) { + error_report("failed to send a request to the sheep"); + return ret; + } + + if (rsp->result != SD_RES_SUCCESS) { + error_report("%s", sd_strerror(rsp->result)); + return -EIO; + } + + return 0; +} + static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) { BDRVSheepdogState *s = bs->opaque; @@ -1610,7 +1722,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) } ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, 0); + s->inode.nr_copies, datalen, 0, 0, s->cache_enabled); if (ret < 0) { error_report("failed to write snapshot's inode."); ret = -EIO; @@ -1629,7 +1741,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) inode = (SheepdogInode *)g_malloc(datalen); ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid), - s->inode.nr_copies, datalen, 0); + s->inode.nr_copies, datalen, 0, s->cache_enabled); if (ret < 0) { error_report("failed to read new inode info. %s", strerror(errno)); @@ -1684,7 +1796,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) buf = g_malloc(SD_INODE_SIZE); ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, - SD_INODE_SIZE, 0); + SD_INODE_SIZE, 0, s->cache_enabled); closesocket(fd); @@ -1779,7 +1891,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) /* we don't need to read entire object */ ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid), - 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0); + 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0, + s->cache_enabled); if (ret) { continue; @@ -1835,10 +1948,12 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, create = (offset == 0); if (load) { ret = read_object(fd, (char *)data, vmstate_oid, - s->inode.nr_copies, data_len, offset); + s->inode.nr_copies, data_len, offset, + s->cache_enabled); } else { ret = write_object(fd, (char *)data, vmstate_oid, - s->inode.nr_copies, data_len, offset, create); + s->inode.nr_copies, data_len, offset, create, + s->cache_enabled); } if (ret < 0) { @@ -1904,6 +2019,7 @@ BlockDriver bdrv_sheepdog = { .bdrv_co_readv = sd_co_readv, .bdrv_co_writev = sd_co_writev, + .bdrv_co_flush_to_disk = sd_co_flush_to_disk, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, diff --git a/block/stream.c b/block/stream.c index d1b3986a8a..0efe1adfd5 100644 --- a/block/stream.c +++ b/block/stream.c @@ -76,6 +76,39 @@ static int coroutine_fn stream_populate(BlockDriverState *bs, return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov); } +static void close_unused_images(BlockDriverState *top, BlockDriverState *base, + const char *base_id) +{ + BlockDriverState *intermediate; + intermediate = top->backing_hd; + + while (intermediate) { + BlockDriverState *unused; + + /* reached base */ + if (intermediate == base) { + break; + } + + unused = intermediate; + intermediate = intermediate->backing_hd; + unused->backing_hd = NULL; + bdrv_delete(unused); + } + top->backing_hd = base; + + pstrcpy(top->backing_file, sizeof(top->backing_file), ""); + pstrcpy(top->backing_format, sizeof(top->backing_format), ""); + if (base_id) { + pstrcpy(top->backing_file, sizeof(top->backing_file), base_id); + if (base->drv) { + pstrcpy(top->backing_format, sizeof(top->backing_format), + base->drv->format_name); + } + } + +} + /* * Given an image chain: [BASE] -> [INTER1] -> [INTER2] -> [TOP] * @@ -175,7 +208,7 @@ retry: break; } - + s->common.busy = true; if (base) { ret = is_allocated_base(bs, base, sector_num, STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n); @@ -189,6 +222,7 @@ retry: if (s->common.speed) { uint64_t delay_ns = ratelimit_calculate_delay(&s->limit, n); if (delay_ns > 0) { + s->common.busy = false; co_sleep_ns(rt_clock, delay_ns); /* Recheck cancellation and that sectors are unallocated */ @@ -208,6 +242,7 @@ retry: /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that qemu_aio_flush() returns. */ + s->common.busy = false; co_sleep_ns(rt_clock, 0); } @@ -215,12 +250,13 @@ retry: bdrv_disable_copy_on_read(bs); } - if (sector_num == end && ret == 0) { + if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) { const char *base_id = NULL; if (base) { base_id = s->backing_file_id; } ret = bdrv_change_backing_file(bs, base_id, NULL); + close_unused_images(bs, base, base_id); } qemu_vfree(buf); @@ -234,7 +270,6 @@ static int stream_set_speed(BlockJob *job, int64_t value) if (value < 0) { return -EINVAL; } - job->speed = value; ratelimit_set_speed(&s->limit, value / BDRV_SECTOR_SIZE); return 0; } diff --git a/block/vdi.c b/block/vdi.c index 6a0011fbcc..119d3c74da 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -144,29 +144,6 @@ void uuid_unparse(const uuid_t uu, char *out) #endif typedef struct { - BlockDriverAIOCB common; - int64_t sector_num; - QEMUIOVector *qiov; - uint8_t *buf; - /* Total number of sectors. */ - int nb_sectors; - /* Number of sectors for current AIO. */ - int n_sectors; - /* New allocated block map entry. */ - uint32_t bmap_first; - uint32_t bmap_last; - /* Buffer for new allocated block. */ - void *block_buffer; - void *orig_buf; - bool is_write; - int header_modified; - BlockDriverAIOCB *hd_aiocb; - struct iovec hd_iov; - QEMUIOVector hd_qiov; - QEMUBH *bh; -} VdiAIOCB; - -typedef struct { char text[0x40]; uint32_t signature; uint32_t version; @@ -489,332 +466,150 @@ static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs, return VDI_IS_ALLOCATED(bmap_entry); } -static void vdi_aio_cancel(BlockDriverAIOCB *blockacb) -{ - /* TODO: This code is untested. How can I get it executed? */ - VdiAIOCB *acb = container_of(blockacb, VdiAIOCB, common); - logout("\n"); - if (acb->hd_aiocb) { - bdrv_aio_cancel(acb->hd_aiocb); - } - qemu_aio_release(acb); -} - -static AIOPool vdi_aio_pool = { - .aiocb_size = sizeof(VdiAIOCB), - .cancel = vdi_aio_cancel, -}; - -static VdiAIOCB *vdi_aio_setup(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque, int is_write) -{ - VdiAIOCB *acb; - - logout("%p, %" PRId64 ", %p, %d, %p, %p, %d\n", - bs, sector_num, qiov, nb_sectors, cb, opaque, is_write); - - acb = qemu_aio_get(&vdi_aio_pool, bs, cb, opaque); - acb->hd_aiocb = NULL; - acb->sector_num = sector_num; - acb->qiov = qiov; - acb->is_write = is_write; - - if (qiov->niov > 1) { - acb->buf = qemu_blockalign(bs, qiov->size); - acb->orig_buf = acb->buf; - if (is_write) { - qemu_iovec_to_buffer(qiov, acb->buf); - } - } else { - acb->buf = (uint8_t *)qiov->iov->iov_base; - } - acb->nb_sectors = nb_sectors; - acb->n_sectors = 0; - acb->bmap_first = VDI_UNALLOCATED; - acb->bmap_last = VDI_UNALLOCATED; - acb->block_buffer = NULL; - acb->header_modified = 0; - return acb; -} - -static int vdi_schedule_bh(QEMUBHFunc *cb, VdiAIOCB *acb) -{ - logout("\n"); - - if (acb->bh) { - return -EIO; - } - - acb->bh = qemu_bh_new(cb, acb); - if (!acb->bh) { - return -EIO; - } - - qemu_bh_schedule(acb->bh); - - return 0; -} - -static void vdi_aio_read_cb(void *opaque, int ret); -static void vdi_aio_write_cb(void *opaque, int ret); - -static void vdi_aio_rw_bh(void *opaque) +static int vdi_co_read(BlockDriverState *bs, + int64_t sector_num, uint8_t *buf, int nb_sectors) { - VdiAIOCB *acb = opaque; - logout("\n"); - qemu_bh_delete(acb->bh); - acb->bh = NULL; - - if (acb->is_write) { - vdi_aio_write_cb(opaque, 0); - } else { - vdi_aio_read_cb(opaque, 0); - } -} - -static void vdi_aio_read_cb(void *opaque, int ret) -{ - VdiAIOCB *acb = opaque; - BlockDriverState *bs = acb->common.bs; BDRVVdiState *s = bs->opaque; uint32_t bmap_entry; uint32_t block_index; uint32_t sector_in_block; uint32_t n_sectors; + int ret = 0; - logout("%u sectors read\n", acb->n_sectors); - - acb->hd_aiocb = NULL; - - if (ret < 0) { - goto done; - } - - acb->nb_sectors -= acb->n_sectors; - - if (acb->nb_sectors == 0) { - /* request completed */ - ret = 0; - goto done; - } - - acb->sector_num += acb->n_sectors; - acb->buf += acb->n_sectors * SECTOR_SIZE; - - block_index = acb->sector_num / s->block_sectors; - sector_in_block = acb->sector_num % s->block_sectors; - n_sectors = s->block_sectors - sector_in_block; - if (n_sectors > acb->nb_sectors) { - n_sectors = acb->nb_sectors; - } - - logout("will read %u sectors starting at sector %" PRIu64 "\n", - n_sectors, acb->sector_num); + logout("\n"); - /* prepare next AIO request */ - acb->n_sectors = n_sectors; - bmap_entry = le32_to_cpu(s->bmap[block_index]); - if (!VDI_IS_ALLOCATED(bmap_entry)) { - /* Block not allocated, return zeros, no need to wait. */ - memset(acb->buf, 0, n_sectors * SECTOR_SIZE); - ret = vdi_schedule_bh(vdi_aio_rw_bh, acb); - if (ret < 0) { - goto done; + while (ret >= 0 && nb_sectors > 0) { + block_index = sector_num / s->block_sectors; + sector_in_block = sector_num % s->block_sectors; + n_sectors = s->block_sectors - sector_in_block; + if (n_sectors > nb_sectors) { + n_sectors = nb_sectors; } - } else { - uint64_t offset = s->header.offset_data / SECTOR_SIZE + - (uint64_t)bmap_entry * s->block_sectors + - sector_in_block; - acb->hd_iov.iov_base = (void *)acb->buf; - acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_readv(bs->file, offset, &acb->hd_qiov, - n_sectors, vdi_aio_read_cb, acb); - } - return; -done: - if (acb->qiov->niov > 1) { - qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); - qemu_vfree(acb->orig_buf); - } - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); -} -static BlockDriverAIOCB *vdi_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - VdiAIOCB *acb; - int ret; + logout("will read %u sectors starting at sector %" PRIu64 "\n", + n_sectors, sector_num); - logout("\n"); - acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); - ret = vdi_schedule_bh(vdi_aio_rw_bh, acb); - if (ret < 0) { - if (acb->qiov->niov > 1) { - qemu_vfree(acb->orig_buf); + /* prepare next AIO request */ + bmap_entry = le32_to_cpu(s->bmap[block_index]); + if (!VDI_IS_ALLOCATED(bmap_entry)) { + /* Block not allocated, return zeros, no need to wait. */ + memset(buf, 0, n_sectors * SECTOR_SIZE); + ret = 0; + } else { + uint64_t offset = s->header.offset_data / SECTOR_SIZE + + (uint64_t)bmap_entry * s->block_sectors + + sector_in_block; + ret = bdrv_read(bs->file, offset, buf, n_sectors); } - qemu_aio_release(acb); - return NULL; + logout("%u sectors read\n", n_sectors); + + nb_sectors -= n_sectors; + sector_num += n_sectors; + buf += n_sectors * SECTOR_SIZE; } - return &acb->common; + return ret; } -static void vdi_aio_write_cb(void *opaque, int ret) +static int vdi_co_write(BlockDriverState *bs, + int64_t sector_num, const uint8_t *buf, int nb_sectors) { - VdiAIOCB *acb = opaque; - BlockDriverState *bs = acb->common.bs; BDRVVdiState *s = bs->opaque; uint32_t bmap_entry; uint32_t block_index; uint32_t sector_in_block; uint32_t n_sectors; + uint32_t bmap_first = VDI_UNALLOCATED; + uint32_t bmap_last = VDI_UNALLOCATED; + uint8_t *block = NULL; + int ret = 0; - acb->hd_aiocb = NULL; + logout("\n"); - if (ret < 0) { - goto done; - } + while (ret >= 0 && nb_sectors > 0) { + block_index = sector_num / s->block_sectors; + sector_in_block = sector_num % s->block_sectors; + n_sectors = s->block_sectors - sector_in_block; + if (n_sectors > nb_sectors) { + n_sectors = nb_sectors; + } + + logout("will write %u sectors starting at sector %" PRIu64 "\n", + n_sectors, sector_num); - acb->nb_sectors -= acb->n_sectors; - acb->sector_num += acb->n_sectors; - acb->buf += acb->n_sectors * SECTOR_SIZE; - - if (acb->nb_sectors == 0) { - logout("finished data write\n"); - acb->n_sectors = 0; - if (acb->header_modified) { - VdiHeader *header = acb->block_buffer; - logout("now writing modified header\n"); - assert(VDI_IS_ALLOCATED(acb->bmap_first)); - *header = s->header; - vdi_header_to_le(header); - acb->header_modified = 0; - acb->hd_iov.iov_base = acb->block_buffer; - acb->hd_iov.iov_len = SECTOR_SIZE; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_writev(bs->file, 0, &acb->hd_qiov, 1, - vdi_aio_write_cb, acb); - return; - } else if (VDI_IS_ALLOCATED(acb->bmap_first)) { - /* One or more new blocks were allocated. */ + /* prepare next AIO request */ + bmap_entry = le32_to_cpu(s->bmap[block_index]); + if (!VDI_IS_ALLOCATED(bmap_entry)) { + /* Allocate new block and write to it. */ uint64_t offset; - uint32_t bmap_first; - uint32_t bmap_last; - g_free(acb->block_buffer); - acb->block_buffer = NULL; - bmap_first = acb->bmap_first; - bmap_last = acb->bmap_last; - logout("now writing modified block map entry %u...%u\n", - bmap_first, bmap_last); - /* Write modified sectors from block map. */ - bmap_first /= (SECTOR_SIZE / sizeof(uint32_t)); - bmap_last /= (SECTOR_SIZE / sizeof(uint32_t)); - n_sectors = bmap_last - bmap_first + 1; - offset = s->bmap_sector + bmap_first; - acb->bmap_first = VDI_UNALLOCATED; - acb->hd_iov.iov_base = (void *)((uint8_t *)&s->bmap[0] + - bmap_first * SECTOR_SIZE); - acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - logout("will write %u block map sectors starting from entry %u\n", - n_sectors, bmap_first); - acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov, - n_sectors, vdi_aio_write_cb, acb); - return; + bmap_entry = s->header.blocks_allocated; + s->bmap[block_index] = cpu_to_le32(bmap_entry); + s->header.blocks_allocated++; + offset = s->header.offset_data / SECTOR_SIZE + + (uint64_t)bmap_entry * s->block_sectors; + if (block == NULL) { + block = g_malloc(s->block_size); + bmap_first = block_index; + } + bmap_last = block_index; + /* Copy data to be written to new block and zero unused parts. */ + memset(block, 0, sector_in_block * SECTOR_SIZE); + memcpy(block + sector_in_block * SECTOR_SIZE, + buf, n_sectors * SECTOR_SIZE); + memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0, + (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE); + ret = bdrv_write(bs->file, offset, block, s->block_sectors); + } else { + uint64_t offset = s->header.offset_data / SECTOR_SIZE + + (uint64_t)bmap_entry * s->block_sectors + + sector_in_block; + ret = bdrv_write(bs->file, offset, buf, n_sectors); } - ret = 0; - goto done; - } - logout("%u sectors written\n", acb->n_sectors); + nb_sectors -= n_sectors; + sector_num += n_sectors; + buf += n_sectors * SECTOR_SIZE; - block_index = acb->sector_num / s->block_sectors; - sector_in_block = acb->sector_num % s->block_sectors; - n_sectors = s->block_sectors - sector_in_block; - if (n_sectors > acb->nb_sectors) { - n_sectors = acb->nb_sectors; + logout("%u sectors written\n", n_sectors); } - logout("will write %u sectors starting at sector %" PRIu64 "\n", - n_sectors, acb->sector_num); - - /* prepare next AIO request */ - acb->n_sectors = n_sectors; - bmap_entry = le32_to_cpu(s->bmap[block_index]); - if (!VDI_IS_ALLOCATED(bmap_entry)) { - /* Allocate new block and write to it. */ - uint64_t offset; - uint8_t *block; - bmap_entry = s->header.blocks_allocated; - s->bmap[block_index] = cpu_to_le32(bmap_entry); - s->header.blocks_allocated++; - offset = s->header.offset_data / SECTOR_SIZE + - (uint64_t)bmap_entry * s->block_sectors; - block = acb->block_buffer; - if (block == NULL) { - block = g_malloc(s->block_size); - acb->block_buffer = block; - acb->bmap_first = block_index; - assert(!acb->header_modified); - acb->header_modified = 1; - } - acb->bmap_last = block_index; - /* Copy data to be written to new block and zero unused parts. */ - memset(block, 0, sector_in_block * SECTOR_SIZE); - memcpy(block + sector_in_block * SECTOR_SIZE, - acb->buf, n_sectors * SECTOR_SIZE); - memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0, - (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE); - acb->hd_iov.iov_base = (void *)block; - acb->hd_iov.iov_len = s->block_size; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, - &acb->hd_qiov, s->block_sectors, - vdi_aio_write_cb, acb); - } else { - uint64_t offset = s->header.offset_data / SECTOR_SIZE + - (uint64_t)bmap_entry * s->block_sectors + - sector_in_block; - acb->hd_iov.iov_base = (void *)acb->buf; - acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov, - n_sectors, vdi_aio_write_cb, acb); + logout("finished data write\n"); + if (ret < 0) { + return ret; } - return; - -done: - if (acb->qiov->niov > 1) { - qemu_vfree(acb->orig_buf); - } - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); -} + if (block) { + /* One or more new blocks were allocated. */ + VdiHeader *header = (VdiHeader *) block; + uint8_t *base; + uint64_t offset; -static BlockDriverAIOCB *vdi_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - VdiAIOCB *acb; - int ret; + logout("now writing modified header\n"); + assert(VDI_IS_ALLOCATED(bmap_first)); + *header = s->header; + vdi_header_to_le(header); + ret = bdrv_write(bs->file, 0, block, 1); + g_free(block); + block = NULL; - logout("\n"); - acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); - ret = vdi_schedule_bh(vdi_aio_rw_bh, acb); - if (ret < 0) { - if (acb->qiov->niov > 1) { - qemu_vfree(acb->orig_buf); + if (ret < 0) { + return ret; } - qemu_aio_release(acb); - return NULL; + + logout("now writing modified block map entry %u...%u\n", + bmap_first, bmap_last); + /* Write modified sectors from block map. */ + bmap_first /= (SECTOR_SIZE / sizeof(uint32_t)); + bmap_last /= (SECTOR_SIZE / sizeof(uint32_t)); + n_sectors = bmap_last - bmap_first + 1; + offset = s->bmap_sector + bmap_first; + base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE; + logout("will write %u block map sectors starting from entry %u\n", + n_sectors, bmap_first); + ret = bdrv_write(bs->file, offset, base, n_sectors); } - return &acb->common; + return ret; } static int vdi_create(const char *filename, QEMUOptionParameter *options) @@ -930,13 +725,6 @@ static void vdi_close(BlockDriverState *bs) error_free(s->migration_blocker); } -static coroutine_fn int vdi_co_flush(BlockDriverState *bs) -{ - logout("\n"); - return bdrv_co_flush(bs->file); -} - - static QEMUOptionParameter vdi_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -969,13 +757,12 @@ static BlockDriver bdrv_vdi = { .bdrv_open = vdi_open, .bdrv_close = vdi_close, .bdrv_create = vdi_create, - .bdrv_co_flush_to_disk = vdi_co_flush, .bdrv_co_is_allocated = vdi_co_is_allocated, .bdrv_make_empty = vdi_make_empty, - .bdrv_aio_readv = vdi_aio_readv, + .bdrv_read = vdi_co_read, #if defined(CONFIG_VDI_WRITE) - .bdrv_aio_writev = vdi_aio_writev, + .bdrv_write = vdi_co_write, #endif .bdrv_get_info = vdi_get_info, diff --git a/block/vmdk.c b/block/vmdk.c index 45c003a0f1..18e9b4caf6 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1525,10 +1525,10 @@ static void vmdk_close(BlockDriverState *bs) static coroutine_fn int vmdk_co_flush(BlockDriverState *bs) { - int i, ret, err; BDRVVmdkState *s = bs->opaque; + int i, err; + int ret = 0; - ret = bdrv_co_flush(bs->file); for (i = 0; i < s->num_extents; i++) { err = bdrv_co_flush(s->extents[i].file); if (err < 0) { diff --git a/block/vpc.c b/block/vpc.c index 6b4816f563..5cd13d17a1 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -189,6 +189,9 @@ static int vpc_open(BlockDriverState *bs, int flags) fprintf(stderr, "block-vpc: The header checksum of '%s' is " "incorrect.\n", bs->filename); + /* Write 'checksum' back to footer, or else will leave it with zero. */ + footer->checksum = be32_to_cpu(checksum); + // The visible size of a image in Virtual PC depends on the geometry // rather than on the size stored in the footer (the size in the footer // is too large usually) @@ -507,11 +510,6 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num, return ret; } -static coroutine_fn int vpc_co_flush(BlockDriverState *bs) -{ - return bdrv_co_flush(bs->file); -} - /* * Calculates the number of cylinders, heads and sectors per cylinder * based on a given number of sectors. This is the algorithm described @@ -789,7 +787,6 @@ static BlockDriver bdrv_vpc = { .bdrv_read = vpc_co_read, .bdrv_write = vpc_co_write, - .bdrv_co_flush_to_disk = vpc_co_flush, .create_options = vpc_create_options, }; diff --git a/block_int.h b/block_int.h index b460c369ca..0e5a032e77 100644 --- a/block_int.h +++ b/block_int.h @@ -53,12 +53,6 @@ typedef struct BdrvTrackedRequest BdrvTrackedRequest; -typedef struct AIOPool { - void (*cancel)(BlockDriverAIOCB *acb); - int aiocb_size; - BlockDriverAIOCB *free_aiocb; -} AIOPool; - typedef struct BlockIOLimit { int64_t bps[3]; int64_t iops[3]; @@ -69,8 +63,13 @@ typedef struct BlockIOBaseValue { uint64_t ios[2]; } BlockIOBaseValue; -typedef void BlockJobCancelFunc(void *opaque); typedef struct BlockJob BlockJob; + +/** + * BlockJobType: + * + * A class type for block job objects. + */ typedef struct BlockJobType { /** Derived BlockJob struct size */ size_t instance_size; @@ -83,19 +82,48 @@ typedef struct BlockJobType { } BlockJobType; /** - * Long-running operation on a BlockDriverState + * BlockJob: + * + * Long-running operation on a BlockDriverState. */ struct BlockJob { + /** The job type, including the job vtable. */ const BlockJobType *job_type; + + /** The block device on which the job is operating. */ BlockDriverState *bs; + + /** + * Set to true if the job should cancel itself. The flag must + * always be tested just before toggling the busy flag from false + * to true. After a job has detected that the cancelled flag is + * true, it should not anymore issue any I/O operation to the + * block device. + */ bool cancelled; - /* These fields are published by the query-block-jobs QMP API */ + /** + * Set to false by the job while it is in a quiescent state, where + * no I/O is pending and cancellation can be processed without + * issuing new I/O. The busy flag must be set to false when the + * job goes to sleep on any condition that is not detected by + * #qemu_aio_wait, such as a timer. + */ + bool busy; + + /** Offset that is published by the query-block-jobs QMP API */ int64_t offset; + + /** Length that is published by the query-block-jobs QMP API */ int64_t len; + + /** Speed that was set with @block_job_set_speed. */ int64_t speed; + /** The completion function that will be called when the job completes. */ BlockDriverCompletionFunc *cb; + + /** The opaque value that is passed to the completion function. */ void *opaque; }; @@ -302,20 +330,8 @@ struct BlockDriverState { BlockJob *job; }; -struct BlockDriverAIOCB { - AIOPool *pool; - BlockDriverState *bs; - BlockDriverCompletionFunc *cb; - void *opaque; - BlockDriverAIOCB *next; -}; - void get_tmp_filename(char *filename, int size); -void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque); -void qemu_aio_release(void *p); - void bdrv_set_io_limits(BlockDriverState *bs, BlockIOLimit *io_limits); @@ -323,13 +339,90 @@ void bdrv_set_io_limits(BlockDriverState *bs, int is_windows_drive(const char *filename); #endif +/** + * block_job_create: + * @job_type: The class object for the newly-created job. + * @bs: The block + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * + * Create a new long-running block device job and return it. The job + * will call @cb asynchronously when the job completes. Note that + * @bs may have been closed at the time the @cb it is called. If + * this is the case, the job may be reported as either cancelled or + * completed. + * + * This function is not part of the public job interface; it should be + * called from a wrapper that is specific to the job type. + */ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque); + +/** + * block_job_complete: + * @job: The job being completed. + * @ret: The status code. + * + * Call the completion function that was registered at creation time, and + * free @job. + */ void block_job_complete(BlockJob *job, int ret); + +/** + * block_job_set_speed: + * @job: The job to set the speed for. + * @speed: The new value + * + * Set a rate-limiting parameter for the job; the actual meaning may + * vary depending on the job type. + */ int block_job_set_speed(BlockJob *job, int64_t value); + +/** + * block_job_cancel: + * @job: The job to be canceled. + * + * Asynchronously cancel the specified job. + */ void block_job_cancel(BlockJob *job); + +/** + * block_job_is_cancelled: + * @job: The job being queried. + * + * Returns whether the job is scheduled for cancellation. + */ bool block_job_is_cancelled(BlockJob *job); +/** + * block_job_cancel: + * @job: The job to be canceled. + * + * Asynchronously cancel the job and wait for it to reach a quiescent + * state. Note that the completion callback will still be called + * asynchronously, hence it is *not* valid to call #bdrv_delete + * immediately after #block_job_cancel_sync. Users of block jobs + * will usually protect the BlockDriverState objects with a reference + * count, should this be a concern. + */ +void block_job_cancel_sync(BlockJob *job); + +/** + * stream_start: + * @bs: Block device to operate on. + * @base: Block device that will become the new base, or %NULL to + * flatten the whole backing file chain onto @bs. + * @base_id: The file name that will be written to @bs as the new + * backing file if the job completes. Ignored if @base is %NULL. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * + * Start a streaming operation on @bs. Clusters that are unallocated + * in @bs, but allocated in any image between @base and @bs (both + * exclusive) will be written to @bs. At the end of a successful + * streaming job, the backing file of @bs will be changed to + * @base_id in the written image and to @base in the live BlockDriverState. + */ int stream_start(BlockDriverState *bs, BlockDriverState *base, const char *base_id, BlockDriverCompletionFunc *cb, void *opaque); diff --git a/blockdev.c b/blockdev.c index 1a500b830d..0c2440e249 100644 --- a/blockdev.c +++ b/blockdev.c @@ -64,6 +64,9 @@ void blockdev_mark_auto_del(BlockDriverState *bs) { DriveInfo *dinfo = drive_get_by_blockdev(bs); + if (bs->job) { + block_job_cancel(bs->job); + } if (dinfo) { dinfo->auto_del = 1; } @@ -532,8 +535,9 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) dinfo->unit = unit_id; dinfo->opts = opts; dinfo->refcount = 1; - if (serial) - strncpy(dinfo->serial, serial, sizeof(dinfo->serial) - 1); + if (serial) { + pstrcpy(dinfo->serial, sizeof(dinfo->serial), serial); + } QTAILQ_INSERT_TAIL(&drives, dinfo, next); bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error); @@ -591,6 +595,10 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) bdrv_flags |= BDRV_O_COPY_ON_READ; } + if (runstate_check(RUN_STATE_INMIGRATE)) { + bdrv_flags |= BDRV_O_INCOMING; + } + if (media == MEDIA_CDROM) { /* CDROM is fine for any interface, don't check. */ ro = 1; diff --git a/dma-helpers.c b/dma-helpers.c index c29ea6d3ab..7971a89c14 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -8,7 +8,6 @@ */ #include "dma.h" -#include "block_int.h" #include "trace.h" void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint) @@ -42,7 +41,7 @@ typedef struct { BlockDriverAIOCB *acb; QEMUSGList *sg; uint64_t sector_num; - bool to_dev; + DMADirection dir; bool in_cancel; int sg_cur_index; dma_addr_t sg_cur_byte; @@ -76,7 +75,8 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs) for (i = 0; i < dbs->iov.niov; ++i) { cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base, - dbs->iov.iov[i].iov_len, !dbs->to_dev, + dbs->iov.iov[i].iov_len, + dbs->dir != DMA_DIRECTION_TO_DEVICE, dbs->iov.iov[i].iov_len); } qemu_iovec_reset(&dbs->iov); @@ -123,7 +123,8 @@ static void dma_bdrv_cb(void *opaque, int ret) while (dbs->sg_cur_index < dbs->sg->nsg) { cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte; cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte; - mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->to_dev); + mem = cpu_physical_memory_map(cur_addr, &cur_len, + dbs->dir != DMA_DIRECTION_TO_DEVICE); if (!mem) break; qemu_iovec_add(&dbs->iov, mem, cur_len); @@ -170,11 +171,11 @@ static AIOPool dma_aio_pool = { BlockDriverAIOCB *dma_bdrv_io( BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num, DMAIOFunc *io_func, BlockDriverCompletionFunc *cb, - void *opaque, bool to_dev) + void *opaque, DMADirection dir) { DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque); - trace_dma_bdrv_io(dbs, bs, sector_num, to_dev); + trace_dma_bdrv_io(dbs, bs, sector_num, (dir == DMA_DIRECTION_TO_DEVICE)); dbs->acb = NULL; dbs->bs = bs; @@ -182,7 +183,7 @@ BlockDriverAIOCB *dma_bdrv_io( dbs->sector_num = sector_num; dbs->sg_cur_index = 0; dbs->sg_cur_byte = 0; - dbs->to_dev = to_dev; + dbs->dir = dir; dbs->io_func = io_func; dbs->bh = NULL; qemu_iovec_init(&dbs->iov, sg->nsg); @@ -195,14 +196,16 @@ BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector, void (*cb)(void *opaque, int ret), void *opaque) { - return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, false); + return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, + DMA_DIRECTION_FROM_DEVICE); } BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector, void (*cb)(void *opaque, int ret), void *opaque) { - return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, true); + return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, + DMA_DIRECTION_TO_DEVICE); } @@ -16,6 +16,11 @@ typedef struct ScatterGatherEntry ScatterGatherEntry; +typedef enum { + DMA_DIRECTION_TO_DEVICE = 0, + DMA_DIRECTION_FROM_DEVICE = 1, +} DMADirection; + struct QEMUSGList { ScatterGatherEntry *sg; int nsg; @@ -28,11 +33,6 @@ typedef target_phys_addr_t dma_addr_t; #define DMA_ADDR_FMT TARGET_FMT_plx -typedef enum { - DMA_DIRECTION_TO_DEVICE = 0, - DMA_DIRECTION_FROM_DEVICE = 1, -} DMADirection; - struct ScatterGatherEntry { dma_addr_t base; dma_addr_t len; @@ -50,7 +50,7 @@ typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num, BlockDriverAIOCB *dma_bdrv_io(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num, DMAIOFunc *io_func, BlockDriverCompletionFunc *cb, - void *opaque, bool to_dev); + void *opaque, DMADirection dir); BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector, BlockDriverCompletionFunc *cb, void *opaque); diff --git a/hw/ide/core.c b/hw/ide/core.c index 4d568acc9c..35723fd800 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -31,7 +31,6 @@ #include "sysemu.h" #include "dma.h" #include "blockdev.h" -#include "block_int.h" #include <hw/ide/internal.h> @@ -101,7 +100,7 @@ static void ide_identify(IDEState *s) put_le16(p + 21, 512); /* cache size in sectors */ put_le16(p + 22, 4); /* ecc bytes */ padstr((char *)(p + 23), s->version, 8); /* firmware version */ - padstr((char *)(p + 27), "QEMU HARDDISK", 40); /* model */ + padstr((char *)(p + 27), s->drive_model_str, 40); /* model */ #if MAX_MULT_SECTORS > 1 put_le16(p + 47, 0x8000 | MAX_MULT_SECTORS); #endif @@ -143,17 +142,25 @@ static void ide_identify(IDEState *s) put_le16(p + 82, (1 << 14) | (1 << 5) | 1); /* 13=flush_cache_ext,12=flush_cache,10=lba48 */ put_le16(p + 83, (1 << 14) | (1 << 13) | (1 <<12) | (1 << 10)); - /* 14=set to 1, 1=SMART self test, 0=SMART error logging */ - put_le16(p + 84, (1 << 14) | 0); + /* 14=set to 1, 8=has WWN, 1=SMART self test, 0=SMART error logging */ + if (s->wwn) { + put_le16(p + 84, (1 << 14) | (1 << 8) | 0); + } else { + put_le16(p + 84, (1 << 14) | 0); + } /* 14 = NOP supported, 5=WCACHE enabled, 0=SMART feature set enabled */ if (bdrv_enable_write_cache(s->bs)) put_le16(p + 85, (1 << 14) | (1 << 5) | 1); else put_le16(p + 85, (1 << 14) | 1); /* 13=flush_cache_ext,12=flush_cache,10=lba48 */ - put_le16(p + 86, (1 << 14) | (1 << 13) | (1 <<12) | (1 << 10)); - /* 14=set to 1, 1=smart self test, 0=smart error logging */ - put_le16(p + 87, (1 << 14) | 0); + put_le16(p + 86, (1 << 13) | (1 <<12) | (1 << 10)); + /* 14=set to 1, 8=has WWN, 1=SMART self test, 0=SMART error logging */ + if (s->wwn) { + put_le16(p + 87, (1 << 14) | (1 << 8) | 0); + } else { + put_le16(p + 87, (1 << 14) | 0); + } put_le16(p + 88, 0x3f | (1 << 13)); /* udma5 set and supported */ put_le16(p + 93, 1 | (1 << 14) | 0x2000); put_le16(p + 100, s->nb_sectors); @@ -163,6 +170,13 @@ static void ide_identify(IDEState *s) if (dev && dev->conf.physical_block_size) put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf)); + if (s->wwn) { + /* LE 16-bit words 111-108 contain 64-bit World Wide Name */ + put_le16(p + 108, s->wwn >> 48); + put_le16(p + 109, s->wwn >> 32); + put_le16(p + 110, s->wwn >> 16); + put_le16(p + 111, s->wwn); + } if (dev && dev->conf.discard_granularity) { put_le16(p + 169, 1); /* TRIM support */ } @@ -189,7 +203,7 @@ static void ide_atapi_identify(IDEState *s) put_le16(p + 21, 512); /* cache size in sectors */ put_le16(p + 22, 4); /* ecc bytes */ padstr((char *)(p + 23), s->version, 8); /* firmware version */ - padstr((char *)(p + 27), "QEMU DVD-ROM", 40); /* model */ + padstr((char *)(p + 27), s->drive_model_str, 40); /* model */ put_le16(p + 48, 1); /* dword I/O (XXX: should not be set on CDROM) */ #ifdef USE_DMA_CDROM put_le16(p + 49, 1 << 9 | 1 << 8); /* DMA and LBA supported */ @@ -246,7 +260,7 @@ static void ide_cfata_identify(IDEState *s) padstr((char *)(p + 10), s->drive_serial_str, 20); /* serial number */ put_le16(p + 22, 0x0004); /* ECC bytes */ padstr((char *) (p + 23), s->version, 8); /* Firmware Revision */ - padstr((char *) (p + 27), "QEMU MICRODRIVE", 40);/* Model number */ + padstr((char *) (p + 27), s->drive_model_str, 40);/* Model number */ #if MAX_MULT_SECTORS > 1 put_le16(p + 47, 0x8000 | MAX_MULT_SECTORS); #else @@ -604,7 +618,8 @@ void ide_dma_cb(void *opaque, int ret) break; case IDE_DMA_TRIM: s->bus->dma->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num, - ide_issue_trim, ide_dma_cb, s, true); + ide_issue_trim, ide_dma_cb, s, + DMA_DIRECTION_TO_DEVICE); break; } return; @@ -1834,7 +1849,8 @@ static const BlockDevOps ide_cd_block_ops = { }; int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind, - const char *version, const char *serial) + const char *version, const char *serial, const char *model, + uint64_t wwn) { int cylinders, heads, secs; uint64_t nb_sectors; @@ -1860,6 +1876,7 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind, s->heads = heads; s->sectors = secs; s->nb_sectors = nb_sectors; + s->wwn = wwn; /* The SMART values should be preserved across power cycles but they aren't. */ s->smart_enabled = 1; @@ -1880,11 +1897,27 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind, } } if (serial) { - strncpy(s->drive_serial_str, serial, sizeof(s->drive_serial_str)); + pstrcpy(s->drive_serial_str, sizeof(s->drive_serial_str), serial); } else { snprintf(s->drive_serial_str, sizeof(s->drive_serial_str), "QM%05d", s->drive_serial); } + if (model) { + pstrcpy(s->drive_model_str, sizeof(s->drive_model_str), model); + } else { + switch (kind) { + case IDE_CD: + strcpy(s->drive_model_str, "QEMU DVD-ROM"); + break; + case IDE_CFATA: + strcpy(s->drive_model_str, "QEMU MICRODRIVE"); + break; + default: + strcpy(s->drive_model_str, "QEMU HARDDISK"); + break; + } + } + if (version) { pstrcpy(s->version, sizeof(s->version), version); } else { @@ -1977,7 +2010,8 @@ void ide_init2_with_non_qdev_drives(IDEBus *bus, DriveInfo *hd0, if (dinfo) { if (ide_init_drive(&bus->ifs[i], dinfo->bdrv, dinfo->media_cd ? IDE_CD : IDE_HD, NULL, - *dinfo->serial ? dinfo->serial : NULL) < 0) { + *dinfo->serial ? dinfo->serial : NULL, + NULL, 0) < 0) { error_report("Can't set up IDE drive %s", dinfo->id); exit(1); } diff --git a/hw/ide/internal.h b/hw/ide/internal.h index c808a0ddf8..100efd3076 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -348,6 +348,8 @@ struct IDEState { uint8_t identify_data[512]; int drive_serial; char drive_serial_str[21]; + char drive_model_str[41]; + uint64_t wwn; /* ide regs */ uint8_t feature; uint8_t error; @@ -468,6 +470,8 @@ struct IDEDevice { BlockConf conf; char *version; char *serial; + char *model; + uint64_t wwn; }; #define BM_STATUS_DMAING 0x01 @@ -534,7 +538,8 @@ void ide_data_writel(void *opaque, uint32_t addr, uint32_t val); uint32_t ide_data_readl(void *opaque, uint32_t addr); int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind, - const char *version, const char *serial); + const char *version, const char *serial, const char *model, + uint64_t wwn); void ide_init2(IDEBus *bus, qemu_irq irq); void ide_init2_with_non_qdev_drives(IDEBus *bus, DriveInfo *hd0, DriveInfo *hd1, qemu_irq irq); diff --git a/hw/ide/macio.c b/hw/ide/macio.c index a4df24406a..7b38d9e683 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -149,7 +149,8 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) break; case IDE_DMA_TRIM: m->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num, - ide_issue_trim, pmac_ide_transfer_cb, s, true); + ide_issue_trim, pmac_ide_transfer_cb, s, + DMA_DIRECTION_TO_DEVICE); break; } return; diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index f6a48961c5..a46578d685 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -136,7 +136,8 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) } } - if (ide_init_drive(s, dev->conf.bs, kind, dev->version, serial) < 0) { + if (ide_init_drive(s, dev->conf.bs, kind, + dev->version, serial, dev->model, dev->wwn) < 0) { return -1; } @@ -173,7 +174,9 @@ static int ide_drive_initfn(IDEDevice *dev) #define DEFINE_IDE_DEV_PROPERTIES() \ DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ DEFINE_PROP_STRING("ver", IDEDrive, dev.version), \ - DEFINE_PROP_STRING("serial", IDEDrive, dev.serial) + DEFINE_PROP_HEX64("wwn", IDEDrive, dev.wwn, 0), \ + DEFINE_PROP_STRING("serial", IDEDrive, dev.serial),\ + DEFINE_PROP_STRING("model", IDEDrive, dev.model) static Property ide_hd_properties[] = { DEFINE_IDE_DEV_PROPERTIES(), diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c index edc09b7307..f022a02447 100644 --- a/hw/lsi53c895a.c +++ b/hw/lsi53c895a.c @@ -15,7 +15,6 @@ #include "hw.h" #include "pci.h" #include "scsi.h" -#include "block_int.h" #include "dma.h" //#define DEBUG_LSI diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c index bff9152df5..98dd06aeba 100644 --- a/hw/qdev-properties.c +++ b/hw/qdev-properties.c @@ -877,6 +877,52 @@ PropertyInfo qdev_prop_pci_devfn = { .max = 0xFFFFFFFFULL, }; +/* --- blocksize --- */ + +static void set_blocksize(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + int16_t *ptr = qdev_get_prop_ptr(dev, prop); + Error *local_err = NULL; + int64_t value; + + if (dev->state != DEV_STATE_CREATED) { + error_set(errp, QERR_PERMISSION_DENIED); + return; + } + + visit_type_int(v, &value, name, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + if (value < prop->info->min || value > prop->info->max) { + error_set(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE, + dev->id?:"", name, value, prop->info->min, + prop->info->max); + return; + } + + /* We rely on power-of-2 blocksizes for bitmasks */ + if ((value & (value - 1)) != 0) { + error_set(errp, QERR_PROPERTY_VALUE_NOT_POWER_OF_2, + dev->id?:"", name, value); + return; + } + + *ptr = value; +} + +PropertyInfo qdev_prop_blocksize = { + .name = "blocksize", + .get = get_int16, + .set = set_blocksize, + .min = 512, + .max = 65024, +}; + /* --- public helpers --- */ static Property *qdev_prop_walk(Property *props, const char *name) @@ -223,6 +223,7 @@ extern PropertyInfo qdev_prop_drive; extern PropertyInfo qdev_prop_netdev; extern PropertyInfo qdev_prop_vlan; extern PropertyInfo qdev_prop_pci_devfn; +extern PropertyInfo qdev_prop_blocksize; #define DEFINE_PROP(_name, _state, _field, _prop, _type) { \ .name = (_name), \ @@ -284,6 +285,8 @@ extern PropertyInfo qdev_prop_pci_devfn; #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_losttickpolicy, \ LostTickPolicy) +#define DEFINE_PROP_BLOCKSIZE(_n, _s, _f, _d) \ + DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_blocksize, uint16_t) #define DEFINE_PROP_END_OF_LIST() \ {} diff --git a/linux-aio.c b/linux-aio.c index d2fc2e7d02..15261ece3d 100644 --- a/linux-aio.c +++ b/linux-aio.c @@ -9,7 +9,6 @@ */ #include "qemu-common.h" #include "qemu-aio.h" -#include "block_int.h" #include "block/raw-posix-aio.h" #include <sys/eventfd.h> diff --git a/migration.c b/migration.c index 8c119ba8ff..94f7839e8b 100644 --- a/migration.c +++ b/migration.c @@ -91,6 +91,7 @@ void process_incoming_migration(QEMUFile *f) qemu_announce_self(); DPRINTF("successfully loaded vm state\n"); + bdrv_clear_incoming_migration_all(); /* Make sure all file formats flush their mutable metadata */ bdrv_invalidate_cache_all(); diff --git a/qemu-aio.h b/qemu-aio.h index 3bdd749f80..230c2f79a0 100644 --- a/qemu-aio.h +++ b/qemu-aio.h @@ -17,6 +17,27 @@ #include "qemu-common.h" #include "qemu-char.h" +typedef struct BlockDriverAIOCB BlockDriverAIOCB; +typedef void BlockDriverCompletionFunc(void *opaque, int ret); + +typedef struct AIOPool { + void (*cancel)(BlockDriverAIOCB *acb); + int aiocb_size; + BlockDriverAIOCB *free_aiocb; +} AIOPool; + +struct BlockDriverAIOCB { + AIOPool *pool; + BlockDriverState *bs; + BlockDriverCompletionFunc *cb; + void *opaque; + BlockDriverAIOCB *next; +}; + +void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); +void qemu_aio_release(void *p); + /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ typedef int (AioFlushHandler)(void *opaque); diff --git a/qemu-img.c b/qemu-img.c index 0e48b35296..6a61ca8d06 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -428,6 +428,13 @@ static int img_check(int argc, char **argv) } } + if (result.bfi.total_clusters != 0 && result.bfi.allocated_clusters != 0) { + printf("%" PRId64 "/%" PRId64 "= %0.2f%% allocated, %0.2f%% fragmented\n", + result.bfi.allocated_clusters, result.bfi.total_clusters, + result.bfi.allocated_clusters * 100.0 / result.bfi.total_clusters, + result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters); + } + bdrv_delete(bs); if (ret < 0 || result.check_errors) { @@ -716,7 +723,7 @@ static int img_convert(int argc, char **argv) ret = -1; goto out; } - + qemu_progress_init(progress, 2.0); qemu_progress_print(0, 100); @@ -1125,6 +1132,9 @@ static int img_info(int argc, char **argv) if (bdi.cluster_size != 0) { printf("cluster_size: %d\n", bdi.cluster_size); } + if (bdi.is_dirty) { + printf("cleanly shut down: no\n"); + } } bdrv_get_backing_filename(bs, backing_filename, sizeof(backing_filename)); if (backing_filename[0] != '\0') { @@ -17,6 +17,7 @@ #include "qemu-common.h" #include "block_int.h" #include "cmd.h" +#include "trace/control.h" #define VERSION "0.0.1" @@ -1783,6 +1784,7 @@ static void usage(const char *name) " -g, --growable allow file to grow (only applies to protocols)\n" " -m, --misalign misalign allocations for O_DIRECT\n" " -k, --native-aio use kernel AIO implementation (on Linux only)\n" +" -T, --trace FILE enable trace events listed in the given file\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" "\n", @@ -1794,7 +1796,7 @@ int main(int argc, char **argv) { int readonly = 0; int growable = 0; - const char *sopt = "hVc:rsnmgk"; + const char *sopt = "hVc:rsnmgkT:"; const struct option lopt[] = { { "help", 0, NULL, 'h' }, { "version", 0, NULL, 'V' }, @@ -1806,6 +1808,7 @@ int main(int argc, char **argv) { "misalign", 0, NULL, 'm' }, { "growable", 0, NULL, 'g' }, { "native-aio", 0, NULL, 'k' }, + { "trace", 1, NULL, 'T' }, { NULL, 0, NULL, 0 } }; int c; @@ -1837,6 +1840,11 @@ int main(int argc, char **argv) case 'k': flags |= BDRV_O_NATIVE_AIO; break; + case 'T': + if (!trace_backend_init(optarg, NULL)) { + exit(1); /* error message will have been printed */ + } + break; case 'V': printf("%s version %s\n", progname, VERSION); exit(0); @@ -241,9 +241,14 @@ static const QErrorStringTable qerror_table[] = { .desc = "Property '%(device).%(property)' can't find value '%(value)'", }, { + .error_fmt = QERR_PROPERTY_VALUE_NOT_POWER_OF_2, + .desc = "Property '%(device).%(property)' doesn't take " + "value '%(value)', it's not a power of 2", + }, + { .error_fmt = QERR_PROPERTY_VALUE_OUT_OF_RANGE, .desc = "Property '%(device).%(property)' doesn't take " - "value %(value) (minimum: %(min), maximum: %(max)'", + "value %(value) (minimum: %(min), maximum: %(max))", }, { .error_fmt = QERR_QGA_COMMAND_FAILED, @@ -202,6 +202,10 @@ QError *qobject_to_qerror(const QObject *obj); #define QERR_PROPERTY_VALUE_NOT_FOUND \ "{ 'class': 'PropertyValueNotFound', 'data': { 'device': %s, 'property': %s, 'value': %s } }" +#define QERR_PROPERTY_VALUE_NOT_POWER_OF_2 \ + "{ 'class': 'PropertyValueNotPowerOf2', 'data': { " \ + "'device': %s, 'property': %s, 'value': %"PRId64" } }" + #define QERR_PROPERTY_VALUE_OUT_OF_RANGE \ "{ 'class': 'PropertyValueOutOfRange', 'data': { 'device': %s, 'property': %s, 'value': %"PRId64", 'min': %"PRId64", 'max': %"PRId64" } }" diff --git a/scripts/tracetool b/scripts/tracetool index 47389b62ea..7b1c142b67 100755 --- a/scripts/tracetool +++ b/scripts/tracetool @@ -81,6 +81,10 @@ get_args() args=${1#*\(} args=${args%%\)*} echo "$args" + + if (echo "$args" | grep "[ *]next\($\|[, ]\)" > /dev/null 2>&1); then + echo -e "\n#error 'next' is a bad argument name (clash with systemtap keyword)\n " + fi } # Get the argument name list of a trace event diff --git a/tests/qemu-iotests/009 b/tests/qemu-iotests/009 index f7262b57bf..25368c819b 100755 --- a/tests/qemu-iotests/009 +++ b/tests/qemu-iotests/009 @@ -53,10 +53,10 @@ _make_test_img $size echo echo "creating pattern" $QEMU_IO \ - -c "write 2048k 4k -P 65" \ + -c "write -P 65 2048k 4k" \ -c "write 4k 4k" \ -c "write 9M 4k" \ - -c "read 2044k 8k -P 65 -s 4k -l 4k" \ + -c "read -P 65 -s 4k -l 4k 2044k 8k" \ $TEST_IMG | _filter_qemu_io echo diff --git a/tests/qemu-iotests/010 b/tests/qemu-iotests/010 index e3205aa206..7b5792934a 100755 --- a/tests/qemu-iotests/010 +++ b/tests/qemu-iotests/010 @@ -53,11 +53,11 @@ _make_test_img $size echo echo "creating pattern" $QEMU_IO \ - -c "write 2048k 4k -P 165" \ + -c "write -P 165 2048k 4k" \ -c "write 64k 4k" \ -c "write 9M 4k" \ - -c "write 2044k 4k -P 165" \ - -c "write 8M 4k -P 99" \ + -c "write -P 165 2044k 4k" \ + -c "write -P 99 8M 4k" \ -c "read -P 165 2044k 8k" \ $TEST_IMG | _filter_qemu_io diff --git a/tests/qemu-iotests/011 b/tests/qemu-iotests/011 index 59df1ae48e..b03df6887d 100755 --- a/tests/qemu-iotests/011 +++ b/tests/qemu-iotests/011 @@ -60,7 +60,7 @@ for i in `seq 1 10`; do # Note that we filter away the actual offset. That's because qemu # may re-order the two aio requests. We only want to make sure the # filesystem isn't corrupted afterwards anyway. - $QEMU_IO $TEST_IMG -c "aio_write $off1 1M" -c "aio_write $off2 1M" | \ + $QEMU_IO -c "aio_write $off1 1M" -c "aio_write $off2 1M" $TEST_IMG | \ _filter_qemu_io | \ sed -e 's/bytes at offset [0-9]*/bytes at offset XXX/g' done diff --git a/tests/qemu-iotests/031 b/tests/qemu-iotests/031 new file mode 100755 index 0000000000..6365f287e0 --- /dev/null +++ b/tests/qemu-iotests/031 @@ -0,0 +1,72 @@ +#!/bin/bash +# +# Test that all qcow2 header extensions survive a header rewrite +# +# Copyright (C) 2011 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.pattern + +# This tests qcow2-specific low-level functionality +_supported_fmt qcow2 +_supported_proto generic +_supported_os Linux + +CLUSTER_SIZE=65536 +echo +echo === Create image with unknown header extension === +echo +_make_test_img 64M +./qcow2.py $TEST_IMG add-header-ext 0x12345678 "This is a test header extension" +./qcow2.py $TEST_IMG dump-header +_check_test_img + +echo +echo === Rewrite header with no backing file === +echo +$QEMU_IMG rebase -u -b "" $TEST_IMG +./qcow2.py $TEST_IMG dump-header +_check_test_img + +echo +echo === Add a backing file and format === +echo +$QEMU_IMG rebase -u -b "/some/backing/file/path" -F host_device $TEST_IMG +./qcow2.py $TEST_IMG dump-header + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/031.out b/tests/qemu-iotests/031.out new file mode 100644 index 0000000000..0f1bf685f3 --- /dev/null +++ b/tests/qemu-iotests/031.out @@ -0,0 +1,76 @@ +QA output created by 031 + +=== Create image with unknown header extension === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 cluster_size=65536 +magic 0x514649fb +version 2 +backing_file_offset 0x0 +backing_file_size 0x0 +cluster_bits 16 +size 67108864 +crypt_method 0 +l1_size 1 +l1_table_offset 0x30000 +refcount_table_offset 0x10000 +refcount_table_clusters 1 +nb_snapshots 0 +snapshot_offset 0x0 + +Header extension: +magic 0x12345678 +length 31 +data 'This is a test header extension' + +No errors were found on the image. + +=== Rewrite header with no backing file === + +magic 0x514649fb +version 2 +backing_file_offset 0x0 +backing_file_size 0x0 +cluster_bits 16 +size 67108864 +crypt_method 0 +l1_size 1 +l1_table_offset 0x30000 +refcount_table_offset 0x10000 +refcount_table_clusters 1 +nb_snapshots 0 +snapshot_offset 0x0 + +Header extension: +magic 0x12345678 +length 31 +data 'This is a test header extension' + +No errors were found on the image. + +=== Add a backing file and format === + +magic 0x514649fb +version 2 +backing_file_offset 0x90 +backing_file_size 0x17 +cluster_bits 16 +size 67108864 +crypt_method 0 +l1_size 1 +l1_table_offset 0x30000 +refcount_table_offset 0x10000 +refcount_table_clusters 1 +nb_snapshots 0 +snapshot_offset 0x0 + +Header extension: +magic 0xe2792aca +length 11 +data 'host_device' + +Header extension: +magic 0x12345678 +length 31 +data 'This is a test header extension' + +*** done diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 26811ca660..4cb8dae8c6 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -57,16 +57,21 @@ _make_test_img() { # extra qemu-img options can be added by tests # at least one argument (the image size) needs to be added - local extra_img_options=$* + local extra_img_options="" local cluster_size_filter="s# cluster_size=[0-9]\\+##g" + local image_size=$* + if [ "$1" = "-b" ]; then + extra_img_options="$1 $2" + image_size=$3 + fi if [ \( "$IMGFMT" = "qcow2" -o "$IMGFMT" = "qed" \) -a -n "$CLUSTER_SIZE" ]; then extra_img_options="-o cluster_size=$CLUSTER_SIZE $extra_img_options" cluster_size_filter="" fi # XXX(hch): have global image options? - $QEMU_IMG create -f $IMGFMT $TEST_IMG $extra_img_options | \ + $QEMU_IMG create -f $IMGFMT $extra_img_options $TEST_IMG $image_size | \ sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" | \ sed -e "s#$TEST_DIR#TEST_DIR#g" | \ sed -e "s#$IMGFMT#IMGFMT#g" | \ diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index b549f10f17..1742ede180 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -37,3 +37,4 @@ 028 rw backing auto 029 rw auto quick 030 rw auto +031 rw auto quick diff --git a/tests/qemu-iotests/qcow2.py b/tests/qemu-iotests/qcow2.py new file mode 100755 index 0000000000..bfb47e88fc --- /dev/null +++ b/tests/qemu-iotests/qcow2.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python + +import sys +import struct +import string + +class QcowHeaderExtension: + + def __init__(self, magic, length, data): + self.magic = magic + self.length = length + self.data = data + + @classmethod + def create(cls, magic, data): + return QcowHeaderExtension(magic, len(data), data) + +class QcowHeader: + + uint32_t = 'I' + uint64_t = 'Q' + + fields = [ + # Version 2 header fields + [ uint32_t, '%#x', 'magic' ], + [ uint32_t, '%d', 'version' ], + [ uint64_t, '%#x', 'backing_file_offset' ], + [ uint32_t, '%#x', 'backing_file_size' ], + [ uint32_t, '%d', 'cluster_bits' ], + [ uint64_t, '%d', 'size' ], + [ uint32_t, '%d', 'crypt_method' ], + [ uint32_t, '%d', 'l1_size' ], + [ uint64_t, '%#x', 'l1_table_offset' ], + [ uint64_t, '%#x', 'refcount_table_offset' ], + [ uint32_t, '%d', 'refcount_table_clusters' ], + [ uint32_t, '%d', 'nb_snapshots' ], + [ uint64_t, '%#x', 'snapshot_offset' ], + ]; + + fmt = '>' + ''.join(field[0] for field in fields) + + def __init__(self, fd): + + buf_size = struct.calcsize(QcowHeader.fmt) + + fd.seek(0) + buf = fd.read(buf_size) + + header = struct.unpack(QcowHeader.fmt, buf) + self.__dict__ = dict((field[2], header[i]) + for i, field in enumerate(QcowHeader.fields)) + + self.cluster_size = 1 << self.cluster_bits + + fd.seek(self.get_header_length()) + self.load_extensions(fd) + + if self.backing_file_offset: + fd.seek(self.backing_file_offset) + self.backing_file = fd.read(self.backing_file_size) + else: + self.backing_file = None + + def get_header_length(self): + if self.version == 2: + return 72 + else: + raise Exception("version != 2 not supported") + + def load_extensions(self, fd): + self.extensions = [] + + if self.backing_file_offset != 0: + end = min(self.cluster_size, self.backing_file_offset) + else: + end = self.cluster_size + + while fd.tell() < end: + (magic, length) = struct.unpack('>II', fd.read(8)) + if magic == 0: + break + else: + padded = (length + 7) & ~7 + data = fd.read(padded) + self.extensions.append(QcowHeaderExtension(magic, length, data)) + + def update_extensions(self, fd): + + fd.seek(self.get_header_length()) + extensions = self.extensions + extensions.append(QcowHeaderExtension(0, 0, "")) + for ex in extensions: + buf = struct.pack('>II', ex.magic, ex.length) + fd.write(buf) + fd.write(ex.data) + + if self.backing_file != None: + self.backing_file_offset = fd.tell() + fd.write(self.backing_file) + + if fd.tell() > self.cluster_size: + raise Exception("I think I just broke the image...") + + + def update(self, fd): + header_bytes = self.get_header_length() + + self.update_extensions(fd) + + fd.seek(0) + header = tuple(self.__dict__[f] for t, p, f in QcowHeader.fields) + buf = struct.pack(QcowHeader.fmt, *header) + buf = buf[0:header_bytes-1] + fd.write(buf) + + def dump(self): + for f in QcowHeader.fields: + print "%-25s" % f[2], f[1] % self.__dict__[f[2]] + print "" + + def dump_extensions(self): + for ex in self.extensions: + + data = ex.data[:ex.length] + if all(c in string.printable for c in data): + data = "'%s'" % data + else: + data = "<binary>" + + print "Header extension:" + print "%-25s %#x" % ("magic", ex.magic) + print "%-25s %d" % ("length", ex.length) + print "%-25s %s" % ("data", data) + print "" + + +def cmd_dump_header(fd): + h = QcowHeader(fd) + h.dump() + h.dump_extensions() + +def cmd_add_header_ext(fd, magic, data): + try: + magic = int(magic, 0) + except: + print "'%s' is not a valid magic number" % magic + sys.exit(1) + + h = QcowHeader(fd) + h.extensions.append(QcowHeaderExtension.create(magic, data)) + h.update(fd) + +def cmd_del_header_ext(fd, magic): + try: + magic = int(magic, 0) + except: + print "'%s' is not a valid magic number" % magic + sys.exit(1) + + h = QcowHeader(fd) + found = False + + for ex in h.extensions: + if ex.magic == magic: + found = True + h.extensions.remove(ex) + + if not found: + print "No such header extension" + return + + h.update(fd) + +cmds = [ + [ 'dump-header', cmd_dump_header, 0, 'Dump image header and header extensions' ], + [ 'add-header-ext', cmd_add_header_ext, 2, 'Add a header extension' ], + [ 'del-header-ext', cmd_del_header_ext, 1, 'Delete a header extension' ], +] + +def main(filename, cmd, args): + fd = open(filename, "r+b") + try: + for name, handler, num_args, desc in cmds: + if name != cmd: + continue + elif len(args) != num_args: + usage() + return + else: + handler(fd, *args) + return + print "Unknown command '%s'" % cmd + finally: + fd.close() + +def usage(): + print "Usage: %s <file> <cmd> [<arg>, ...]" % sys.argv[0] + print "" + print "Supported commands:" + for name, handler, num_args, desc in cmds: + print " %-20s - %s" % (name, desc) + +if len(sys.argv) < 3: + usage() + sys.exit(1) + +main(sys.argv[1], sys.argv[2], sys.argv[3:]) diff --git a/trace-events b/trace-events index db2cd39950..a5f276d020 100644 --- a/trace-events +++ b/trace-events @@ -562,7 +562,7 @@ qemu_coroutine_terminate(void *co) "self %p" # qemu-coroutine-lock.c qemu_co_queue_next_bh(void) "" -qemu_co_queue_next(void *next) "next %p" +qemu_co_queue_next(void *nxt) "next %p" qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p" qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p" qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p" |