aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2013-03-15 10:47:21 -0500
committerAnthony Liguori <aliguori@us.ibm.com>2013-03-15 10:47:21 -0500
commitdc0b0616f726956001be09e9a65a6e0b0bd939db (patch)
tree9b5e0b67bafa20803a8a70b1e04dbbf1cf762a84
parentd4d7682484f339d70355b165a15f8f5e83638e40 (diff)
parent3618a094022e984d4e045c6db21aed961b7c6fc9 (diff)
Merge remote-tracking branch 'stefanha/block' into staging
# By Stefan Hajnoczi (14) and others # Via Stefan Hajnoczi * stefanha/block: (28 commits) blockdev: Fix up copyright and permission notice qemu-iotests: use -nographic in test case 007 qemu-iotests: add tests for rebasing zero clusters dataplane: fix hang introduced by AioContext transition coroutine: use AioContext for CoQueue BH threadpool: drop global thread pool block: add bdrv_get_aio_context() aio: add a ThreadPool instance to AioContext threadpool: add thread_pool_new() and thread_pool_free() threadpool: move globals into struct ThreadPool main-loop: add qemu_get_aio_context() sheepdog: set io_flush handler in do_co_req sheepdog: use non-blocking fd in coroutine context qcow2: make is_allocated return true for zero clusters qcow2: drop unnecessary flush in qcow2_update_snapshot_refcount() qcow2: drop flush in update_cluster_refcount() qcow2: flush in qcow2_update_snapshot_refcount() qcow2: set L2 cache dependency in qcow2_alloc_bytes() qcow2: flush refcount cache correctly in qcow2_write_snapshots() qcow2: flush refcount cache correctly in alloc_refcount_block() ...
-rw-r--r--async.c11
-rw-r--r--block.c87
-rw-r--r--block/blkverify.c2
-rw-r--r--block/bochs.c2
-rw-r--r--block/cloop.c2
-rw-r--r--block/cow.c2
-rw-r--r--block/dmg.c2
-rw-r--r--block/parallels.c2
-rw-r--r--block/qcow.c2
-rw-r--r--block/qcow2-cluster.c5
-rw-r--r--block/qcow2-refcount.c24
-rw-r--r--block/qcow2-snapshot.c10
-rw-r--r--block/qcow2.c49
-rw-r--r--block/qcow2.h1
-rw-r--r--block/qed.c4
-rw-r--r--block/raw-posix.c8
-rw-r--r--block/raw-win32.c4
-rw-r--r--block/raw.c2
-rw-r--r--block/sheepdog.c19
-rw-r--r--block/vdi.c2
-rw-r--r--block/vmdk.c4
-rw-r--r--block/vpc.c2
-rw-r--r--block/vvfat.c2
-rw-r--r--blockdev.c105
-rw-r--r--hw/dataplane/virtio-blk.c17
-rw-r--r--hw/xen_disk.c2
-rw-r--r--include/block/aio.h6
-rw-r--r--include/block/block.h4
-rw-r--r--include/block/block_int.h10
-rw-r--r--include/block/coroutine.h1
-rw-r--r--include/block/thread-pool.h15
-rw-r--r--include/qapi/qmp/qdict.h2
-rw-r--r--include/qemu/main-loop.h5
-rw-r--r--include/qemu/option.h1
-rw-r--r--include/sysemu/blockdev.h2
-rw-r--r--main-loop.c5
-rw-r--r--qemu-coroutine-lock.c55
-rw-r--r--qemu-img.c6
-rw-r--r--qemu-io.c2
-rw-r--r--qemu-nbd.c2
-rw-r--r--qobject/qdict.c22
-rwxr-xr-xtests/qemu-iotests/0077
-rwxr-xr-xtests/qemu-iotests/05075
-rw-r--r--tests/qemu-iotests/050.out17
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/test-thread-pool.c44
-rw-r--r--thread-pool.c243
-rw-r--r--trace-events4
-rw-r--r--util/qemu-option.c34
49 files changed, 702 insertions, 233 deletions
diff --git a/async.c b/async.c
index f2d47ba96d..90fe906539 100644
--- a/async.c
+++ b/async.c
@@ -24,6 +24,7 @@
#include "qemu-common.h"
#include "block/aio.h"
+#include "block/thread-pool.h"
#include "qemu/main-loop.h"
/***********************************************************/
@@ -172,6 +173,7 @@ aio_ctx_finalize(GSource *source)
{
AioContext *ctx = (AioContext *) source;
+ thread_pool_free(ctx->thread_pool);
aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
event_notifier_cleanup(&ctx->notifier);
g_array_free(ctx->pollfds, TRUE);
@@ -190,6 +192,14 @@ GSource *aio_get_g_source(AioContext *ctx)
return &ctx->source;
}
+ThreadPool *aio_get_thread_pool(AioContext *ctx)
+{
+ if (!ctx->thread_pool) {
+ ctx->thread_pool = thread_pool_new(ctx);
+ }
+ return ctx->thread_pool;
+}
+
void aio_notify(AioContext *ctx)
{
event_notifier_set(&ctx->notifier);
@@ -200,6 +210,7 @@ AioContext *aio_context_new(void)
AioContext *ctx;
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
+ ctx->thread_pool = NULL;
event_notifier_init(&ctx->notifier, false);
aio_set_event_notifier(ctx, &ctx->notifier,
(EventNotifierHandler *)
diff --git a/block.c b/block.c
index 124a9ebf65..037e15e065 100644
--- a/block.c
+++ b/block.c
@@ -665,15 +665,18 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
/*
* Common part for opening disk images and files
+ *
+ * Removes all processed options from *options.
*/
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
- const char *filename,
+ const char *filename, QDict *options,
int flags, BlockDriver *drv)
{
int ret, open_flags;
assert(drv != NULL);
assert(bs->file == NULL);
+ assert(options == NULL || bs->options != options);
trace_bdrv_open_common(bs, filename, flags, drv->format_name);
@@ -710,7 +713,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
} else {
assert(file != NULL);
bs->file = file;
- ret = drv->bdrv_open(bs, open_flags);
+ ret = drv->bdrv_open(bs, options, open_flags);
}
if (ret < 0) {
@@ -752,7 +755,7 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
}
bs = bdrv_new("");
- ret = bdrv_open_common(bs, NULL, filename, flags, drv);
+ ret = bdrv_open_common(bs, NULL, filename, NULL, flags, drv);
if (ret < 0) {
bdrv_delete(bs);
return ret;
@@ -788,7 +791,8 @@ int bdrv_open_backing_file(BlockDriverState *bs)
/* backing files always opened read-only */
back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);
- ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
+ ret = bdrv_open(bs->backing_hd, backing_filename, NULL,
+ back_flags, back_drv);
if (ret < 0) {
bdrv_delete(bs->backing_hd);
bs->backing_hd = NULL;
@@ -800,15 +804,29 @@ int bdrv_open_backing_file(BlockDriverState *bs)
/*
* Opens a disk image (raw, qcow2, vmdk, ...)
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use QINCREF() before calling bdrv_open.
*/
-int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
- BlockDriver *drv)
+int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
+ int flags, BlockDriver *drv)
{
int ret;
/* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
char tmp_filename[PATH_MAX + 1];
BlockDriverState *file = NULL;
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
+ /* For snapshot=on, create a temporary qcow2 overlay */
if (flags & BDRV_O_SNAPSHOT) {
BlockDriverState *bs1;
int64_t total_size;
@@ -822,10 +840,10 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
/* if there is a backing file, use it */
bs1 = bdrv_new("");
- ret = bdrv_open(bs1, filename, 0, drv);
+ ret = bdrv_open(bs1, filename, NULL, 0, drv);
if (ret < 0) {
bdrv_delete(bs1);
- return ret;
+ goto fail;
}
total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
@@ -836,15 +854,17 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
if (ret < 0) {
- return ret;
+ goto fail;
}
/* Real path is meaningless for protocols */
- if (is_protocol)
+ if (is_protocol) {
snprintf(backing_filename, sizeof(backing_filename),
"%s", filename);
- else if (!realpath(filename, backing_filename))
- return -errno;
+ } else if (!realpath(filename, backing_filename)) {
+ ret = -errno;
+ goto fail;
+ }
bdrv_qcow2 = bdrv_find_format("qcow2");
options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
@@ -859,7 +879,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
free_option_parameters(options);
if (ret < 0) {
- return ret;
+ goto fail;
}
filename = tmp_filename;
@@ -874,7 +894,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
ret = bdrv_file_open(&file, filename, bdrv_open_flags(bs, flags));
if (ret < 0) {
- return ret;
+ goto fail;
}
/* Find the right image format driver */
@@ -887,7 +907,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
}
/* Open the image */
- ret = bdrv_open_common(bs, file, filename, flags, drv);
+ ret = bdrv_open_common(bs, file, filename, options, flags, drv);
if (ret < 0) {
goto unlink_and_fail;
}
@@ -901,11 +921,22 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
if ((flags & BDRV_O_NO_BACKING) == 0) {
ret = bdrv_open_backing_file(bs);
if (ret < 0) {
- bdrv_close(bs);
- return ret;
+ goto close_and_fail;
}
}
+ /* Check if any unknown options were used */
+ if (qdict_size(options) != 0) {
+ const QDictEntry *entry = qdict_first(options);
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by "
+ "device '%s' doesn't support the option '%s'",
+ drv->format_name, bs->device_name, entry->key);
+
+ ret = -EINVAL;
+ goto close_and_fail;
+ }
+ QDECREF(options);
+
if (!bdrv_key_required(bs)) {
bdrv_dev_change_media_cb(bs, true);
}
@@ -924,6 +955,15 @@ unlink_and_fail:
if (bs->is_temporary) {
unlink(filename);
}
+fail:
+ QDECREF(bs->options);
+ QDECREF(options);
+ bs->options = NULL;
+ return ret;
+
+close_and_fail:
+ bdrv_close(bs);
+ QDECREF(options);
return ret;
}
@@ -1193,6 +1233,8 @@ void bdrv_close(BlockDriverState *bs)
bs->valid_key = 0;
bs->sg = 0;
bs->growable = 0;
+ QDECREF(bs->options);
+ bs->options = NULL;
if (bs->file != NULL) {
bdrv_delete(bs->file);
@@ -3190,7 +3232,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
if (bs->file) {
drv->bdrv_close(bs);
ret = bdrv_snapshot_goto(bs->file, snapshot_id);
- open_ret = drv->bdrv_open(bs, bs->open_flags);
+ open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
if (open_ret < 0) {
bdrv_delete(bs->file);
bs->drv = NULL;
@@ -4594,7 +4636,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
bs = bdrv_new("");
- ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
+ ret = bdrv_open(bs, backing_file->value.s, NULL, back_flags,
+ backing_drv);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not open '%s'",
backing_file->value.s);
@@ -4638,3 +4681,9 @@ out:
bdrv_delete(bs);
}
}
+
+AioContext *bdrv_get_aio_context(BlockDriverState *bs)
+{
+ /* Currently BlockDriverState always uses the main loop AioContext */
+ return qemu_get_aio_context();
+}
diff --git a/block/blkverify.c b/block/blkverify.c
index a7dd45909b..2086d97234 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -98,7 +98,7 @@ static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
/* Open the test file */
s->test_file = bdrv_new("");
- ret = bdrv_open(s->test_file, filename, flags, NULL);
+ ret = bdrv_open(s->test_file, filename, NULL, flags, NULL);
if (ret < 0) {
bdrv_delete(s->test_file);
s->test_file = NULL;
diff --git a/block/bochs.c b/block/bochs.c
index a6eb33da42..d7078c0775 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -108,7 +108,7 @@ static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static int bochs_open(BlockDriverState *bs, int flags)
+static int bochs_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVBochsState *s = bs->opaque;
int i;
diff --git a/block/cloop.c b/block/cloop.c
index 8fe13e92a1..6ea7cf4046 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -53,7 +53,7 @@ static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static int cloop_open(BlockDriverState *bs, int flags)
+static int cloop_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVCloopState *s = bs->opaque;
uint32_t offsets_size, max_compressed_block_size = 1, i;
diff --git a/block/cow.c b/block/cow.c
index 4baf9042fe..d73e08cf92 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -58,7 +58,7 @@ static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static int cow_open(BlockDriverState *bs, int flags)
+static int cow_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVCowState *s = bs->opaque;
struct cow_header_v2 cow_header;
diff --git a/block/dmg.c b/block/dmg.c
index 6d85801a84..c1066df13a 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -85,7 +85,7 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
return 0;
}
-static int dmg_open(BlockDriverState *bs, int flags)
+static int dmg_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVDMGState *s = bs->opaque;
uint64_t info_begin,info_end,last_in_offset,last_out_offset;
diff --git a/block/parallels.c b/block/parallels.c
index 8688f6ca4c..18b3ac0b28 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -68,7 +68,7 @@ static int parallels_probe(const uint8_t *buf, int buf_size, const char *filenam
return 0;
}
-static int parallels_open(BlockDriverState *bs, int flags)
+static int parallels_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVParallelsState *s = bs->opaque;
int i;
diff --git a/block/qcow.c b/block/qcow.c
index a7135eea47..f6750a5bd3 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -92,7 +92,7 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static int qcow_open(BlockDriverState *bs, int flags)
+static int qcow_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVQcowState *s = bs->opaque;
int len, i, shift, ret;
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 56fccf9487..d72d063e6d 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -454,6 +454,9 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
*cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
break;
case QCOW2_CLUSTER_ZERO:
+ if (s->qcow_version < 3) {
+ return -EIO;
+ }
c = count_contiguous_clusters(nb_clusters, s->cluster_size,
&l2_table[l2_index], 0,
QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
@@ -668,7 +671,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
}
/* Update L2 table. */
- if (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS) {
+ if (s->use_lazy_refcounts) {
qcow2_mark_dirty(bs);
}
if (qcow2_need_accurate_refcounts(s)) {
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 55543edf77..9bfb390519 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -201,7 +201,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
*refcount_block = NULL;
/* We write to the refcount table, so we might depend on L2 tables */
- qcow2_cache_flush(bs, s->l2_table_cache);
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret < 0) {
+ return ret;
+ }
/* Allocate the refcount block itself and mark it as used */
int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
@@ -237,7 +240,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
goto fail_block;
}
- bdrv_flush(bs->file);
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail_block;
+ }
/* Initialize the new refcount block only after updating its refcount,
* update_refcount uses the refcount cache itself */
@@ -526,8 +532,6 @@ static int update_cluster_refcount(BlockDriverState *bs,
return ret;
}
- bdrv_flush(bs->file);
-
return get_refcount(bs, cluster_index);
}
@@ -663,7 +667,11 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
}
}
- bdrv_flush(bs->file);
+ /* The cluster refcount was incremented, either by qcow2_alloc_clusters()
+ * or explicitly by update_cluster_refcount(). Refcount blocks must be
+ * flushed before the caller's L2 table updates.
+ */
+ qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
return offset;
}
@@ -782,10 +790,6 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
if (ret < 0) {
goto fail;
}
-
- /* TODO Flushing once for the whole function should
- * be enough */
- bdrv_flush(bs->file);
}
/* compressed clusters are never modified */
refcount = 2;
@@ -841,7 +845,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
}
}
- ret = 0;
+ ret = bdrv_flush(bs);
fail:
if (l2_table) {
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index eb8fcd5549..992a5c865c 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -180,11 +180,14 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
/* Allocate space for the new snapshot list */
snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
- bdrv_flush(bs->file);
offset = snapshots_offset;
if (offset < 0) {
return offset;
}
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
+ return ret;
+ }
/* Write all snapshots to the new list */
for(i = 0; i < s->nb_snapshots; i++) {
@@ -378,11 +381,6 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto fail;
}
- ret = bdrv_flush(bs);
- if (ret < 0) {
- goto fail;
- }
-
/* Append the new snapshot to the snapshot list */
new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
if (s->snapshots) {
diff --git a/block/qcow2.c b/block/qcow2.c
index 7610e569e3..1f99866cf4 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -285,11 +285,26 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
return ret;
}
-static int qcow2_open(BlockDriverState *bs, int flags)
+static QemuOptsList qcow2_runtime_opts = {
+ .name = "qcow2",
+ .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
+ .desc = {
+ {
+ .name = "lazy_refcounts",
+ .type = QEMU_OPT_BOOL,
+ .help = "Postpone refcount updates",
+ },
+ { /* end of list */ }
+ },
+};
+
+static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVQcowState *s = bs->opaque;
int len, i, ret = 0;
QCowHeader header;
+ QemuOpts *opts;
+ Error *local_err = NULL;
uint64_t ext_end;
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
@@ -495,6 +510,28 @@ static int qcow2_open(BlockDriverState *bs, int flags)
}
}
+ /* Enable lazy_refcounts according to image and command line options */
+ opts = qemu_opts_create_nofail(&qcow2_runtime_opts);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ s->use_lazy_refcounts = qemu_opt_get_bool(opts, "lazy_refcounts",
+ (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
+
+ qemu_opts_del(opts);
+
+ if (s->use_lazy_refcounts && s->qcow_version < 3) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Lazy refcounts require "
+ "a qcow2 image with at least qemu 1.1 compatibility level");
+ ret = -EINVAL;
+ goto fail;
+ }
+
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
@@ -584,7 +621,7 @@ static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
*pnum = 0;
}
- return (cluster_offset != 0);
+ return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
}
/* handle reading after the end of the backing file */
@@ -665,10 +702,6 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
break;
case QCOW2_CLUSTER_ZERO:
- if (s->qcow_version < 3) {
- ret = -EIO;
- goto fail;
- }
qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
break;
@@ -912,7 +945,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs)
qcow2_close(bs);
memset(s, 0, sizeof(BDRVQcowState));
- qcow2_open(bs, flags);
+ qcow2_open(bs, NULL, flags);
if (crypt_method) {
s->crypt_method = crypt_method;
@@ -1265,7 +1298,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
*/
BlockDriver* drv = bdrv_find_format("qcow2");
assert(drv != NULL);
- ret = bdrv_open(bs, filename,
+ ret = bdrv_open(bs, filename, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv);
if (ret < 0) {
goto out;
diff --git a/block/qcow2.h b/block/qcow2.h
index 718b52baca..103abdb2c0 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -173,6 +173,7 @@ typedef struct BDRVQcowState {
int flags;
int qcow_version;
+ bool use_lazy_refcounts;
uint64_t incompatible_features;
uint64_t compatible_features;
diff --git a/block/qed.c b/block/qed.c
index b8515e58b3..46e12b358b 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -373,7 +373,7 @@ static void bdrv_qed_rebind(BlockDriverState *bs)
s->bs = bs;
}
-static int bdrv_qed_open(BlockDriverState *bs, int flags)
+static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVQEDState *s = bs->opaque;
QEDHeader le_header;
@@ -1526,7 +1526,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
bdrv_qed_close(bs);
memset(s, 0, sizeof(BDRVQEDState));
- bdrv_qed_open(bs, bs->open_flags);
+ bdrv_qed_open(bs, NULL, bs->open_flags);
}
static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 4dfdf985b0..8a3cdbc1f3 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -750,6 +750,7 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
BlockDriverCompletionFunc *cb, void *opaque, int type)
{
RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+ ThreadPool *pool;
acb->bs = bs;
acb->aio_type = type;
@@ -763,7 +764,8 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
acb->aio_offset = sector_num * 512;
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
- return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+ pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+ return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
@@ -1413,6 +1415,7 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
{
BDRVRawState *s = bs->opaque;
RawPosixAIOData *acb;
+ ThreadPool *pool;
if (fd_open(bs) < 0)
return NULL;
@@ -1424,7 +1427,8 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
acb->aio_offset = 0;
acb->aio_ioctl_buf = buf;
acb->aio_ioctl_cmd = req;
- return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+ pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+ return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
diff --git a/block/raw-win32.c b/block/raw-win32.c
index b89ac19ffa..18e0068b26 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -144,6 +144,7 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
BlockDriverCompletionFunc *cb, void *opaque, int type)
{
RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
+ ThreadPool *pool;
acb->bs = bs;
acb->hfile = hfile;
@@ -157,7 +158,8 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
acb->aio_offset = sector_num * 512;
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
- return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+ pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+ return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
int qemu_ftruncate64(int fd, int64_t length)
diff --git a/block/raw.c b/block/raw.c
index 75812db3c2..ce10422006 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -3,7 +3,7 @@
#include "block/block_int.h"
#include "qemu/module.h"
-static int raw_open(BlockDriverState *bs, int flags)
+static int raw_open(BlockDriverState *bs, QDict *options, int flags)
{
bs->sg = bs->file->sg;
return 0;
diff --git a/block/sheepdog.c b/block/sheepdog.c
index c711c28613..4245328569 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -468,6 +468,8 @@ static int connect_to_sdog(BDRVSheepdogState *s)
if (err != NULL) {
qerror_report_err(err);
error_free(err);
+ } else {
+ socket_set_nonblock(fd);
}
return fd;
@@ -499,6 +501,13 @@ static void restart_co_req(void *opaque)
qemu_coroutine_enter(co, NULL);
}
+static int have_co_req(void *opaque)
+{
+ /* this handler is set only when there is a pending request, so
+ * always returns 1. */
+ return 1;
+}
+
typedef struct SheepdogReqCo {
int sockfd;
SheepdogReq *hdr;
@@ -521,15 +530,14 @@ static coroutine_fn void do_co_req(void *opaque)
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
- qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, NULL, co);
+ qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, have_co_req, co);
- socket_set_block(sockfd);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
goto out;
}
- qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, NULL, co);
+ qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, have_co_req, co);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret < sizeof(*hdr)) {
@@ -552,8 +560,9 @@ static coroutine_fn void do_co_req(void *opaque)
}
ret = 0;
out:
+ /* there is at most one request for this sockfd, so it is safe to
+ * set each handler to NULL. */
qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL, NULL);
- socket_set_nonblock(sockfd);
srco->ret = ret;
srco->finished = true;
@@ -776,8 +785,6 @@ static int get_sheep_fd(BDRVSheepdogState *s)
return fd;
}
- socket_set_nonblock(fd);
-
qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
return fd;
}
diff --git a/block/vdi.c b/block/vdi.c
index 87c691b504..2662d89af6 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -364,7 +364,7 @@ static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
return result;
}
-static int vdi_open(BlockDriverState *bs, int flags)
+static int vdi_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVVdiState *s = bs->opaque;
VdiHeader header;
diff --git a/block/vmdk.c b/block/vmdk.c
index aef1abcb4f..e92104a830 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -723,7 +723,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
return vmdk_parse_extents(buf, bs, bs->file->filename);
}
-static int vmdk_open(BlockDriverState *bs, int flags)
+static int vmdk_open(BlockDriverState *bs, QDict *options, int flags)
{
int ret;
BDRVVmdkState *s = bs->opaque;
@@ -1527,7 +1527,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
if (backing_file) {
char parent_filename[PATH_MAX];
BlockDriverState *bs = bdrv_new("");
- ret = bdrv_open(bs, backing_file, 0, NULL);
+ ret = bdrv_open(bs, backing_file, NULL, 0, NULL);
if (ret != 0) {
bdrv_delete(bs);
return ret;
diff --git a/block/vpc.c b/block/vpc.c
index 82229ef5a0..3cad52e54c 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -155,7 +155,7 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static int vpc_open(BlockDriverState *bs, int flags)
+static int vpc_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVVPCState *s = bs->opaque;
int i;
diff --git a/block/vvfat.c b/block/vvfat.c
index 06e6654824..b8eb38ab36 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2830,7 +2830,7 @@ static int enable_write_target(BDRVVVFATState *s)
return -1;
}
- ret = bdrv_open(s->qcow, s->qcow_filename,
+ ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
if (ret < 0) {
return ret;
diff --git a/blockdev.c b/blockdev.c
index 0e67d06330..09f76b782f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -5,6 +5,29 @@
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* later. See the COPYING file in the top-level directory.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
*/
#include "sysemu/blockdev.h"
@@ -22,6 +45,7 @@
#include "sysemu/arch_init.h"
static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
+extern QemuOptsList qemu_common_drive_opts;
static const char *const if_name[IF_COUNT] = {
[IF_NONE] = "none",
@@ -191,6 +215,7 @@ static void drive_uninit(DriveInfo *dinfo)
bdrv_delete(dinfo->bdrv);
g_free(dinfo->id);
QTAILQ_REMOVE(&drives, dinfo, next);
+ g_free(dinfo->serial);
g_free(dinfo);
}
@@ -287,7 +312,7 @@ static bool do_check_io_limits(BlockIOLimit *io_limits, Error **errp)
return true;
}
-DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
+DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
{
const char *buf;
const char *file = NULL;
@@ -310,10 +335,36 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
bool copy_on_read;
int ret;
Error *error = NULL;
+ QemuOpts *opts;
+ QDict *bs_opts;
+ const char *id;
translation = BIOS_ATA_TRANSLATION_AUTO;
media = MEDIA_DISK;
+ /* Check common options by copying from all_opts to opts, all other options
+ * are stored in bs_opts. */
+ id = qemu_opts_id(all_opts);
+ opts = qemu_opts_create(&qemu_common_drive_opts, id, 1, &error);
+ if (error_is_set(&error)) {
+ qerror_report_err(error);
+ error_free(error);
+ return NULL;
+ }
+
+ bs_opts = qdict_new();
+ qemu_opts_to_qdict(all_opts, bs_opts);
+ qemu_opts_absorb_qdict(opts, bs_opts, &error);
+ if (error_is_set(&error)) {
+ qerror_report_err(error);
+ error_free(error);
+ return NULL;
+ }
+
+ if (id) {
+ qdict_del(bs_opts, "id");
+ }
+
/* extract parameters */
bus_id = qemu_opt_get_number(opts, "bus", 0);
unit_id = qemu_opt_get_number(opts, "unit", -1);
@@ -564,9 +615,11 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
dinfo->heads = heads;
dinfo->secs = secs;
dinfo->trans = translation;
- dinfo->opts = opts;
+ dinfo->opts = all_opts;
dinfo->refcount = 1;
- dinfo->serial = serial;
+ if (serial != NULL) {
+ dinfo->serial = g_strdup(serial);
+ }
QTAILQ_INSERT_TAIL(&drives, dinfo, next);
bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
@@ -587,17 +640,20 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
case IF_MTD:
break;
case IF_VIRTIO:
+ {
/* add virtio block device */
- opts = qemu_opts_create_nofail(qemu_find_opts("device"));
+ QemuOpts *devopts;
+ devopts = qemu_opts_create_nofail(qemu_find_opts("device"));
if (arch_type == QEMU_ARCH_S390X) {
- qemu_opt_set(opts, "driver", "virtio-blk-s390");
+ qemu_opt_set(devopts, "driver", "virtio-blk-s390");
} else {
- qemu_opt_set(opts, "driver", "virtio-blk-pci");
+ qemu_opt_set(devopts, "driver", "virtio-blk-pci");
}
- qemu_opt_set(opts, "drive", dinfo->id);
+ qemu_opt_set(devopts, "drive", dinfo->id);
if (devaddr)
- qemu_opt_set(opts, "addr", devaddr);
+ qemu_opt_set(devopts, "addr", devaddr);
break;
+ }
default:
abort();
}
@@ -635,7 +691,9 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
error_report("warning: disabling copy_on_read on readonly drive");
}
- ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
+ ret = bdrv_open(dinfo->bdrv, file, bs_opts, bdrv_flags, drv);
+ bs_opts = NULL;
+
if (ret < 0) {
if (ret == -EMEDIUMTYPE) {
error_report("could not open disk image %s: not in %s format",
@@ -649,9 +707,14 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
if (bdrv_key_required(dinfo->bdrv))
autostart = 0;
+
+ qemu_opts_del(opts);
+
return dinfo;
err:
+ qemu_opts_del(opts);
+ QDECREF(bs_opts);
bdrv_delete(dinfo->bdrv);
g_free(dinfo->id);
QTAILQ_REMOVE(&drives, dinfo, next);
@@ -820,7 +883,9 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
/* We will manually add the backing_hd field to the bs later */
states->new_bs = bdrv_new("");
- ret = bdrv_open(states->new_bs, new_image_file,
+ /* TODO Inherit bs->options or only take explicit options with an
+ * extended QMP command? */
+ ret = bdrv_open(states->new_bs, new_image_file, NULL,
flags | BDRV_O_NO_BACKING, drv);
if (ret != 0) {
error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
@@ -921,7 +986,7 @@ static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
int bdrv_flags, BlockDriver *drv,
const char *password, Error **errp)
{
- if (bdrv_open(bs, filename, bdrv_flags, drv) < 0) {
+ if (bdrv_open(bs, filename, NULL, bdrv_flags, drv) < 0) {
error_set(errp, QERR_OPEN_FILE_FAILED, filename);
return;
}
@@ -1330,7 +1395,7 @@ void qmp_drive_mirror(const char *device, const char *target,
* file.
*/
target_bs = bdrv_new("");
- ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
+ ret = bdrv_open(target_bs, target, NULL, flags | BDRV_O_NO_BACKING, drv);
if (ret < 0) {
bdrv_delete(target_bs);
@@ -1459,9 +1524,9 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
return dummy.next;
}
-QemuOptsList qemu_drive_opts = {
+QemuOptsList qemu_common_drive_opts = {
.name = "drive",
- .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
.desc = {
{
.name = "bus",
@@ -1580,3 +1645,15 @@ QemuOptsList qemu_drive_opts = {
{ /* end of list */ }
},
};
+
+QemuOptsList qemu_drive_opts = {
+ .name = "drive",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
+ .desc = {
+ /*
+ * no elements => accept any params
+ * validation will happen later
+ */
+ { /* end of list */ }
+ },
+};
diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c
index dfe5f9b9cd..1242d61e71 100644
--- a/hw/dataplane/virtio-blk.c
+++ b/hw/dataplane/virtio-blk.c
@@ -263,6 +263,11 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
}
}
+static int flush_true(EventNotifier *e)
+{
+ return true;
+}
+
static void handle_notify(EventNotifier *e)
{
VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
@@ -342,6 +347,14 @@ static void handle_notify(EventNotifier *e)
}
}
+static int flush_io(EventNotifier *e)
+{
+ VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
+ io_notifier);
+
+ return s->num_reqs > 0;
+}
+
static void handle_io(EventNotifier *e)
{
VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
@@ -472,7 +485,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
exit(1);
}
s->host_notifier = *virtio_queue_get_host_notifier(vq);
- aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify, NULL);
+ aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify, flush_true);
/* Set up ioqueue */
ioq_init(&s->ioqueue, s->fd, REQ_MAX);
@@ -480,7 +493,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb);
}
s->io_notifier = *ioq_get_notifier(&s->ioqueue);
- aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io, NULL);
+ aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io, flush_io);
s->started = true;
trace_virtio_blk_data_plane_start(s);
diff --git a/hw/xen_disk.c b/hw/xen_disk.c
index cc09a2f1fa..83329e2e69 100644
--- a/hw/xen_disk.c
+++ b/hw/xen_disk.c
@@ -763,7 +763,7 @@ static int blk_init(struct XenDevice *xendev)
xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
blkdev->bs = bdrv_new(blkdev->dev);
if (blkdev->bs) {
- if (bdrv_open(blkdev->bs, blkdev->filename, qflags,
+ if (bdrv_open(blkdev->bs, blkdev->filename, NULL, qflags,
bdrv_find_whitelisted_format(blkdev->fileproto)) != 0) {
bdrv_delete(blkdev->bs);
blkdev->bs = NULL;
diff --git a/include/block/aio.h b/include/block/aio.h
index 5b54d383fc..183679374f 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -66,6 +66,9 @@ typedef struct AioContext {
/* GPollFDs for aio_poll() */
GArray *pollfds;
+
+ /* Thread pool for performing work and receiving completion callbacks */
+ struct ThreadPool *thread_pool;
} AioContext;
/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
@@ -223,6 +226,9 @@ void aio_set_event_notifier(AioContext *ctx,
*/
GSource *aio_get_g_source(AioContext *ctx);
+/* Return the ThreadPool bound to this AioContext */
+struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
+
/* Functions to operate on the main QEMU AioContext. */
bool qemu_aio_wait(void);
diff --git a/include/block/block.h b/include/block/block.h
index 0f750d7da3..d4f34d6462 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -137,8 +137,8 @@ int bdrv_parse_cache_flags(const char *mode, int *flags);
int bdrv_parse_discard_flags(const char *mode, int *flags);
int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
int bdrv_open_backing_file(BlockDriverState *bs);
-int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
- BlockDriver *drv);
+int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
+ int flags, BlockDriver *drv);
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
BlockDriverState *bs, int flags);
int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index eaad53e426..ce0aa26b8e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -82,7 +82,7 @@ struct BlockDriver {
void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
- int (*bdrv_open)(BlockDriverState *bs, int flags);
+ int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags);
int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags);
int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors);
@@ -286,6 +286,7 @@ struct BlockDriverState {
/* long-running background operation */
BlockJob *job;
+ QDict *options;
};
int get_tmp_filename(char *filename, int size);
@@ -293,6 +294,13 @@ int get_tmp_filename(char *filename, int size);
void bdrv_set_io_limits(BlockDriverState *bs,
BlockIOLimit *io_limits);
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
#ifdef _WIN32
int is_windows_drive(const char *filename);
#endif
diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index c31fae3f3c..a978162a3f 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -104,6 +104,7 @@ bool qemu_in_coroutine(void);
*/
typedef struct CoQueue {
QTAILQ_HEAD(, Coroutine) entries;
+ AioContext *ctx;
} CoQueue;
/**
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
index 200703e35f..32afcdd1d6 100644
--- a/include/block/thread-pool.h
+++ b/include/block/thread-pool.h
@@ -26,9 +26,16 @@
typedef int ThreadPoolFunc(void *opaque);
-BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
- BlockDriverCompletionFunc *cb, void *opaque);
-int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
-void thread_pool_submit(ThreadPoolFunc *func, void *arg);
+typedef struct ThreadPool ThreadPool;
+
+ThreadPool *thread_pool_new(struct AioContext *ctx);
+void thread_pool_free(ThreadPool *pool);
+
+BlockDriverAIOCB *thread_pool_submit_aio(ThreadPool *pool,
+ ThreadPoolFunc *func, void *arg,
+ BlockDriverCompletionFunc *cb, void *opaque);
+int coroutine_fn thread_pool_submit_co(ThreadPool *pool,
+ ThreadPoolFunc *func, void *arg);
+void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg);
#endif
diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h
index 6d9a4be3a5..685b2e3fcb 100644
--- a/include/qapi/qmp/qdict.h
+++ b/include/qapi/qmp/qdict.h
@@ -64,4 +64,6 @@ int64_t qdict_get_try_int(const QDict *qdict, const char *key,
int qdict_get_try_bool(const QDict *qdict, const char *key, int def_value);
const char *qdict_get_try_str(const QDict *qdict, const char *key);
+QDict *qdict_clone_shallow(const QDict *src);
+
#endif /* QDICT_H */
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 09952885a9..6f0200a7ac 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -82,6 +82,11 @@ int qemu_init_main_loop(void);
int main_loop_wait(int nonblocking);
/**
+ * qemu_get_aio_context: Return the main loop's AioContext
+ */
+AioContext *qemu_get_aio_context(void);
+
+/**
* qemu_notify_event: Force processing of pending events.
*
* Similar to signaling a condition variable, qemu_notify_event forces
diff --git a/include/qemu/option.h b/include/qemu/option.h
index ba197cddcf..bdb6d218b4 100644
--- a/include/qemu/option.h
+++ b/include/qemu/option.h
@@ -149,6 +149,7 @@ void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
Error **errp);
QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict);
+void qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp);
typedef int (*qemu_opts_loopfunc)(QemuOpts *opts, void *opaque);
int qemu_opts_print(QemuOpts *opts, void *dummy);
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index 1fe533299e..804ec8839b 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -40,7 +40,7 @@ struct DriveInfo {
int media_cd;
int cyls, heads, secs, trans;
QemuOpts *opts;
- const char *serial;
+ char *serial;
QTAILQ_ENTRY(DriveInfo) next;
int refcount;
};
diff --git a/main-loop.c b/main-loop.c
index 8c9b58c14c..eb80ff369f 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -109,6 +109,11 @@ static int qemu_signal_init(void)
static AioContext *qemu_aio_context;
+AioContext *qemu_get_aio_context(void)
+{
+ return qemu_aio_context;
+}
+
void qemu_notify_event(void)
{
if (!qemu_aio_context) {
diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c
index 97ef01c796..86efe1f86a 100644
--- a/qemu-coroutine-lock.c
+++ b/qemu-coroutine-lock.c
@@ -29,28 +29,36 @@
#include "block/aio.h"
#include "trace.h"
-static QTAILQ_HEAD(, Coroutine) unlock_bh_queue =
- QTAILQ_HEAD_INITIALIZER(unlock_bh_queue);
-static QEMUBH* unlock_bh;
+/* Coroutines are awoken from a BH to allow the current coroutine to complete
+ * its flow of execution. The BH may run after the CoQueue has been destroyed,
+ * so keep BH data in a separate heap-allocated struct.
+ */
+typedef struct {
+ QEMUBH *bh;
+ QTAILQ_HEAD(, Coroutine) entries;
+} CoQueueNextData;
static void qemu_co_queue_next_bh(void *opaque)
{
+ CoQueueNextData *data = opaque;
Coroutine *next;
trace_qemu_co_queue_next_bh();
- while ((next = QTAILQ_FIRST(&unlock_bh_queue))) {
- QTAILQ_REMOVE(&unlock_bh_queue, next, co_queue_next);
+ while ((next = QTAILQ_FIRST(&data->entries))) {
+ QTAILQ_REMOVE(&data->entries, next, co_queue_next);
qemu_coroutine_enter(next, NULL);
}
+
+ qemu_bh_delete(data->bh);
+ g_slice_free(CoQueueNextData, data);
}
void qemu_co_queue_init(CoQueue *queue)
{
QTAILQ_INIT(&queue->entries);
- if (!unlock_bh) {
- unlock_bh = qemu_bh_new(qemu_co_queue_next_bh, NULL);
- }
+ /* This will be exposed to callers once there are multiple AioContexts */
+ queue->ctx = qemu_get_aio_context();
}
void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
@@ -69,26 +77,39 @@ void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue)
assert(qemu_in_coroutine());
}
-bool qemu_co_queue_next(CoQueue *queue)
+static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
{
Coroutine *next;
+ CoQueueNextData *data;
+
+ if (QTAILQ_EMPTY(&queue->entries)) {
+ return false;
+ }
- next = QTAILQ_FIRST(&queue->entries);
- if (next) {
+ data = g_slice_new(CoQueueNextData);
+ data->bh = aio_bh_new(queue->ctx, qemu_co_queue_next_bh, data);
+ QTAILQ_INIT(&data->entries);
+ qemu_bh_schedule(data->bh);
+
+ while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) {
QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
- QTAILQ_INSERT_TAIL(&unlock_bh_queue, next, co_queue_next);
+ QTAILQ_INSERT_TAIL(&data->entries, next, co_queue_next);
trace_qemu_co_queue_next(next);
- qemu_bh_schedule(unlock_bh);
+ if (single) {
+ break;
+ }
}
+ return true;
+}
- return (next != NULL);
+bool qemu_co_queue_next(CoQueue *queue)
+{
+ return qemu_co_queue_do_restart(queue, true);
}
void qemu_co_queue_restart_all(CoQueue *queue)
{
- while (qemu_co_queue_next(queue)) {
- /* Do nothing */
- }
+ qemu_co_queue_do_restart(queue, false);
}
bool qemu_co_queue_empty(CoQueue *queue)
diff --git a/qemu-img.c b/qemu-img.c
index 471de7d646..31627b0da8 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -276,7 +276,7 @@ static BlockDriverState *bdrv_new_open(const char *filename,
drv = NULL;
}
- ret = bdrv_open(bs, filename, flags, drv);
+ ret = bdrv_open(bs, filename, NULL, flags, drv);
if (ret < 0) {
error_report("Could not open '%s': %s", filename, strerror(-ret));
goto fail;
@@ -2156,7 +2156,7 @@ static int img_rebase(int argc, char **argv)
bs_old_backing = bdrv_new("old_backing");
bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
- ret = bdrv_open(bs_old_backing, backing_name, BDRV_O_FLAGS,
+ ret = bdrv_open(bs_old_backing, backing_name, NULL, BDRV_O_FLAGS,
old_backing_drv);
if (ret) {
error_report("Could not open old backing file '%s'", backing_name);
@@ -2164,7 +2164,7 @@ static int img_rebase(int argc, char **argv)
}
if (out_baseimg[0]) {
bs_new_backing = bdrv_new("new_backing");
- ret = bdrv_open(bs_new_backing, out_baseimg, BDRV_O_FLAGS,
+ ret = bdrv_open(bs_new_backing, out_baseimg, NULL, BDRV_O_FLAGS,
new_backing_drv);
if (ret) {
error_report("Could not open new backing file '%s'",
diff --git a/qemu-io.c b/qemu-io.c
index 7b3de42773..79be516953 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -1773,7 +1773,7 @@ static int openfile(char *name, int flags, int growable)
} else {
bs = bdrv_new("hda");
- if (bdrv_open(bs, name, flags, NULL) < 0) {
+ if (bdrv_open(bs, name, NULL, flags, NULL) < 0) {
fprintf(stderr, "%s: can't open device %s\n", progname, name);
bdrv_delete(bs);
bs = NULL;
diff --git a/qemu-nbd.c b/qemu-nbd.c
index e7268d0a9f..ca722ed425 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -557,7 +557,7 @@ int main(int argc, char **argv)
bs = bdrv_new("hda");
srcpath = argv[optind];
- if ((ret = bdrv_open(bs, srcpath, flags, NULL)) < 0) {
+ if ((ret = bdrv_open(bs, srcpath, NULL, flags, NULL)) < 0) {
errno = -ret;
err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]);
}
diff --git a/qobject/qdict.c b/qobject/qdict.c
index 7543ccc10f..ed381f9a50 100644
--- a/qobject/qdict.c
+++ b/qobject/qdict.c
@@ -401,6 +401,28 @@ const QDictEntry *qdict_next(const QDict *qdict, const QDictEntry *entry)
}
/**
+ * qdict_clone_shallow(): Clones a given QDict. Its entries are not copied, but
+ * another reference is added.
+ */
+QDict *qdict_clone_shallow(const QDict *src)
+{
+ QDict *dest;
+ QDictEntry *entry;
+ int i;
+
+ dest = qdict_new();
+
+ for (i = 0; i < QDICT_BUCKET_MAX; i++) {
+ QLIST_FOREACH(entry, &src->table[i], next) {
+ qobject_incref(entry->value);
+ qdict_put_obj(dest, entry->key, entry->value);
+ }
+ }
+
+ return dest;
+}
+
+/**
* qentry_destroy(): Free all the memory allocated by a QDictEntry
*/
static void qentry_destroy(QDictEntry *e)
diff --git a/tests/qemu-iotests/007 b/tests/qemu-iotests/007
index 0139264c4f..c454f2c8ec 100755
--- a/tests/qemu-iotests/007
+++ b/tests/qemu-iotests/007
@@ -50,10 +50,9 @@ _make_test_img 1M
for i in `seq 1 10`; do
echo "savevm $i"
- # XXX(hch): adding -nographic would be good, but hangs the test
- $QEMU -hda $TEST_IMG -monitor stdio >/dev/null 2>&1 <<EOF
-savevm test-$i
-quit
+ $QEMU -nographic -hda $TEST_IMG -serial none -monitor stdio >/dev/null 2>&1 <<EOF
+savevm test-$i
+quit
EOF
done
diff --git a/tests/qemu-iotests/050 b/tests/qemu-iotests/050
new file mode 100755
index 0000000000..05793e2d4b
--- /dev/null
+++ b/tests/qemu-iotests/050
@@ -0,0 +1,75 @@
+#!/bin/bash
+#
+# Test qemu-img rebase with zero clusters
+#
+# Copyright (C) 2013 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=pbonzini@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+ rm -f $TEST_IMG.old
+ rm -f $TEST_IMG.new
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2 qed
+_supported_proto file
+_supported_os Linux
+
+if test "$IMGFMT" = qcow2 && test $IMGOPTS = ""; then
+ IMGOPTS=compat=1.1
+fi
+
+echo
+echo "== Creating images =="
+
+size=10M
+_make_test_img $size
+$QEMU_IO -c "write -P 0x40 0 1048576" $TEST_IMG | _filter_qemu_io
+mv $TEST_IMG $TEST_IMG.old
+
+_make_test_img $size
+$QEMU_IO -c "write -P 0x5a 0 1048576" $TEST_IMG | _filter_qemu_io
+mv $TEST_IMG $TEST_IMG.new
+
+_make_test_img -b $TEST_IMG.old $size
+$QEMU_IO -c "write -z 0 1048576" $TEST_IMG | _filter_qemu_io
+
+echo
+echo "== Rebasing the image =="
+
+$QEMU_IMG rebase -b $TEST_IMG.new $TEST_IMG
+$QEMU_IO -c "read -P 0x00 0 1048576" $TEST_IMG | _filter_qemu_io
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/050.out b/tests/qemu-iotests/050.out
new file mode 100644
index 0000000000..3f5f7e1e7b
--- /dev/null
+++ b/tests/qemu-iotests/050.out
@@ -0,0 +1,17 @@
+QA output created by 050
+
+== Creating images ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=10485760
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=10485760
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=10485760 backing_file='TEST_DIR/t.IMGFMT.old'
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Rebasing the image ==
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index fcf57e0510..1d7e4f31a0 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -56,3 +56,4 @@
047 rw auto
048 img auto quick
049 rw auto
+050 rw auto backing quick
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
index 9998e031f2..22915aac10 100644
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@@ -4,6 +4,8 @@
#include "block/thread-pool.h"
#include "block/block.h"
+static AioContext *ctx;
+static ThreadPool *pool;
static int active;
typedef struct {
@@ -38,19 +40,10 @@ static void done_cb(void *opaque, int ret)
active--;
}
-/* A non-blocking poll of the main AIO context (we cannot use aio_poll
- * because we do not know the AioContext).
- */
-static void qemu_aio_wait_nonblocking(void)
-{
- qemu_notify_event();
- qemu_aio_wait();
-}
-
/* Wait until all aio and bh activity has finished */
static void qemu_aio_wait_all(void)
{
- while (qemu_aio_wait()) {
+ while (aio_poll(ctx, true)) {
/* Do nothing */
}
}
@@ -58,7 +51,7 @@ static void qemu_aio_wait_all(void)
static void test_submit(void)
{
WorkerTestData data = { .n = 0 };
- thread_pool_submit(worker_cb, &data);
+ thread_pool_submit(pool, worker_cb, &data);
qemu_aio_wait_all();
g_assert_cmpint(data.n, ==, 1);
}
@@ -66,7 +59,8 @@ static void test_submit(void)
static void test_submit_aio(void)
{
WorkerTestData data = { .n = 0, .ret = -EINPROGRESS };
- data.aiocb = thread_pool_submit_aio(worker_cb, &data, done_cb, &data);
+ data.aiocb = thread_pool_submit_aio(pool, worker_cb, &data,
+ done_cb, &data);
/* The callbacks are not called until after the first wait. */
active = 1;
@@ -84,7 +78,7 @@ static void co_test_cb(void *opaque)
active = 1;
data->n = 0;
data->ret = -EINPROGRESS;
- thread_pool_submit_co(worker_cb, data);
+ thread_pool_submit_co(pool, worker_cb, data);
/* The test continues in test_submit_co, after qemu_coroutine_enter... */
@@ -126,12 +120,12 @@ static void test_submit_many(void)
for (i = 0; i < 100; i++) {
data[i].n = 0;
data[i].ret = -EINPROGRESS;
- thread_pool_submit_aio(worker_cb, &data[i], done_cb, &data[i]);
+ thread_pool_submit_aio(pool, worker_cb, &data[i], done_cb, &data[i]);
}
active = 100;
while (active > 0) {
- qemu_aio_wait();
+ aio_poll(ctx, true);
}
for (i = 0; i < 100; i++) {
g_assert_cmpint(data[i].n, ==, 1);
@@ -154,7 +148,7 @@ static void test_cancel(void)
for (i = 0; i < 100; i++) {
data[i].n = 0;
data[i].ret = -EINPROGRESS;
- data[i].aiocb = thread_pool_submit_aio(long_cb, &data[i],
+ data[i].aiocb = thread_pool_submit_aio(pool, long_cb, &data[i],
done_cb, &data[i]);
}
@@ -162,7 +156,8 @@ static void test_cancel(void)
* run, but do not waste too much time...
*/
active = 100;
- qemu_aio_wait_nonblocking();
+ aio_notify(ctx);
+ aio_poll(ctx, false);
/* Wait some time for the threads to start, with some sanity
* testing on the behavior of the scheduler...
@@ -208,11 +203,10 @@ static void test_cancel(void)
int main(int argc, char **argv)
{
- /* These should be removed once each AioContext has its thread pool.
- * The test should create its own AioContext.
- */
- qemu_init_main_loop();
- bdrv_init();
+ int ret;
+
+ ctx = aio_context_new();
+ pool = aio_get_thread_pool(ctx);
g_test_init(&argc, &argv, NULL);
g_test_add_func("/thread-pool/submit", test_submit);
@@ -220,5 +214,9 @@ int main(int argc, char **argv)
g_test_add_func("/thread-pool/submit-co", test_submit_co);
g_test_add_func("/thread-pool/submit-many", test_submit_many);
g_test_add_func("/thread-pool/cancel", test_cancel);
- return g_test_run();
+
+ ret = g_test_run();
+
+ aio_context_unref(ctx);
+ return ret;
}
diff --git a/thread-pool.c b/thread-pool.c
index e3ca64d790..0ebd4c2964 100644
--- a/thread-pool.c
+++ b/thread-pool.c
@@ -24,7 +24,7 @@
#include "qemu/event_notifier.h"
#include "block/thread-pool.h"
-static void do_spawn_thread(void);
+static void do_spawn_thread(ThreadPool *pool);
typedef struct ThreadPoolElement ThreadPoolElement;
@@ -37,6 +37,7 @@ enum ThreadState {
struct ThreadPoolElement {
BlockDriverAIOCB common;
+ ThreadPool *pool;
ThreadPoolFunc *func;
void *arg;
@@ -54,49 +55,56 @@ struct ThreadPoolElement {
QLIST_ENTRY(ThreadPoolElement) all;
};
-static EventNotifier notifier;
-static QemuMutex lock;
-static QemuCond check_cancel;
-static QemuSemaphore sem;
-static int max_threads = 64;
-static QEMUBH *new_thread_bh;
-
-/* The following variables are protected by the global mutex. */
-static QLIST_HEAD(, ThreadPoolElement) head;
-
-/* The following variables are protected by lock. */
-static QTAILQ_HEAD(, ThreadPoolElement) request_list;
-static int cur_threads;
-static int idle_threads;
-static int new_threads; /* backlog of threads we need to create */
-static int pending_threads; /* threads created but not running yet */
-static int pending_cancellations; /* whether we need a cond_broadcast */
-
-static void *worker_thread(void *unused)
+struct ThreadPool {
+ EventNotifier notifier;
+ AioContext *ctx;
+ QemuMutex lock;
+ QemuCond check_cancel;
+ QemuCond worker_stopped;
+ QemuSemaphore sem;
+ int max_threads;
+ QEMUBH *new_thread_bh;
+
+ /* The following variables are only accessed from one AioContext. */
+ QLIST_HEAD(, ThreadPoolElement) head;
+
+ /* The following variables are protected by lock. */
+ QTAILQ_HEAD(, ThreadPoolElement) request_list;
+ int cur_threads;
+ int idle_threads;
+ int new_threads; /* backlog of threads we need to create */
+ int pending_threads; /* threads created but not running yet */
+ int pending_cancellations; /* whether we need a cond_broadcast */
+ bool stopping;
+};
+
+static void *worker_thread(void *opaque)
{
- qemu_mutex_lock(&lock);
- pending_threads--;
- do_spawn_thread();
+ ThreadPool *pool = opaque;
+
+ qemu_mutex_lock(&pool->lock);
+ pool->pending_threads--;
+ do_spawn_thread(pool);
- while (1) {
+ while (!pool->stopping) {
ThreadPoolElement *req;
int ret;
do {
- idle_threads++;
- qemu_mutex_unlock(&lock);
- ret = qemu_sem_timedwait(&sem, 10000);
- qemu_mutex_lock(&lock);
- idle_threads--;
- } while (ret == -1 && !QTAILQ_EMPTY(&request_list));
- if (ret == -1) {
+ pool->idle_threads++;
+ qemu_mutex_unlock(&pool->lock);
+ ret = qemu_sem_timedwait(&pool->sem, 10000);
+ qemu_mutex_lock(&pool->lock);
+ pool->idle_threads--;
+ } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list));
+ if (ret == -1 || pool->stopping) {
break;
}
- req = QTAILQ_FIRST(&request_list);
- QTAILQ_REMOVE(&request_list, req, reqs);
+ req = QTAILQ_FIRST(&pool->request_list);
+ QTAILQ_REMOVE(&pool->request_list, req, reqs);
req->state = THREAD_ACTIVE;
- qemu_mutex_unlock(&lock);
+ qemu_mutex_unlock(&pool->lock);
ret = req->func(req->arg);
@@ -105,45 +113,48 @@ static void *worker_thread(void *unused)
smp_wmb();
req->state = THREAD_DONE;
- qemu_mutex_lock(&lock);
- if (pending_cancellations) {
- qemu_cond_broadcast(&check_cancel);
+ qemu_mutex_lock(&pool->lock);
+ if (pool->pending_cancellations) {
+ qemu_cond_broadcast(&pool->check_cancel);
}
- event_notifier_set(&notifier);
+ event_notifier_set(&pool->notifier);
}
- cur_threads--;
- qemu_mutex_unlock(&lock);
+ pool->cur_threads--;
+ qemu_cond_signal(&pool->worker_stopped);
+ qemu_mutex_unlock(&pool->lock);
return NULL;
}
-static void do_spawn_thread(void)
+static void do_spawn_thread(ThreadPool *pool)
{
QemuThread t;
/* Runs with lock taken. */
- if (!new_threads) {
+ if (!pool->new_threads) {
return;
}
- new_threads--;
- pending_threads++;
+ pool->new_threads--;
+ pool->pending_threads++;
- qemu_thread_create(&t, worker_thread, NULL, QEMU_THREAD_DETACHED);
+ qemu_thread_create(&t, worker_thread, pool, QEMU_THREAD_DETACHED);
}
static void spawn_thread_bh_fn(void *opaque)
{
- qemu_mutex_lock(&lock);
- do_spawn_thread();
- qemu_mutex_unlock(&lock);
+ ThreadPool *pool = opaque;
+
+ qemu_mutex_lock(&pool->lock);
+ do_spawn_thread(pool);
+ qemu_mutex_unlock(&pool->lock);
}
-static void spawn_thread(void)
+static void spawn_thread(ThreadPool *pool)
{
- cur_threads++;
- new_threads++;
+ pool->cur_threads++;
+ pool->new_threads++;
/* If there are threads being created, they will spawn new workers, so
* we don't spend time creating many threads in a loop holding a mutex or
* starving the current vcpu.
@@ -151,23 +162,25 @@ static void spawn_thread(void)
* If there are no idle threads, ask the main thread to create one, so we
* inherit the correct affinity instead of the vcpu affinity.
*/
- if (!pending_threads) {
- qemu_bh_schedule(new_thread_bh);
+ if (!pool->pending_threads) {
+ qemu_bh_schedule(pool->new_thread_bh);
}
}
static void event_notifier_ready(EventNotifier *notifier)
{
+ ThreadPool *pool = container_of(notifier, ThreadPool, notifier);
ThreadPoolElement *elem, *next;
event_notifier_test_and_clear(notifier);
restart:
- QLIST_FOREACH_SAFE(elem, &head, all, next) {
+ QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
continue;
}
if (elem->state == THREAD_DONE) {
- trace_thread_pool_complete(elem, elem->common.opaque, elem->ret);
+ trace_thread_pool_complete(pool, elem, elem->common.opaque,
+ elem->ret);
}
if (elem->state == THREAD_DONE && elem->common.cb) {
QLIST_REMOVE(elem, all);
@@ -186,34 +199,36 @@ restart:
static int thread_pool_active(EventNotifier *notifier)
{
- return !QLIST_EMPTY(&head);
+ ThreadPool *pool = container_of(notifier, ThreadPool, notifier);
+ return !QLIST_EMPTY(&pool->head);
}
static void thread_pool_cancel(BlockDriverAIOCB *acb)
{
ThreadPoolElement *elem = (ThreadPoolElement *)acb;
+ ThreadPool *pool = elem->pool;
trace_thread_pool_cancel(elem, elem->common.opaque);
- qemu_mutex_lock(&lock);
+ qemu_mutex_lock(&pool->lock);
if (elem->state == THREAD_QUEUED &&
/* No thread has yet started working on elem. we can try to "steal"
* the item from the worker if we can get a signal from the
* semaphore. Because this is non-blocking, we can do it with
* the lock taken and ensure that elem will remain THREAD_QUEUED.
*/
- qemu_sem_timedwait(&sem, 0) == 0) {
- QTAILQ_REMOVE(&request_list, elem, reqs);
+ qemu_sem_timedwait(&pool->sem, 0) == 0) {
+ QTAILQ_REMOVE(&pool->request_list, elem, reqs);
elem->state = THREAD_CANCELED;
- event_notifier_set(&notifier);
+ event_notifier_set(&pool->notifier);
} else {
- pending_cancellations++;
+ pool->pending_cancellations++;
while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
- qemu_cond_wait(&check_cancel, &lock);
+ qemu_cond_wait(&pool->check_cancel, &pool->lock);
}
- pending_cancellations--;
+ pool->pending_cancellations--;
}
- qemu_mutex_unlock(&lock);
+ qemu_mutex_unlock(&pool->lock);
}
static const AIOCBInfo thread_pool_aiocb_info = {
@@ -221,7 +236,8 @@ static const AIOCBInfo thread_pool_aiocb_info = {
.cancel = thread_pool_cancel,
};
-BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
+BlockDriverAIOCB *thread_pool_submit_aio(ThreadPool *pool,
+ ThreadPoolFunc *func, void *arg,
BlockDriverCompletionFunc *cb, void *opaque)
{
ThreadPoolElement *req;
@@ -230,18 +246,19 @@ BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
req->func = func;
req->arg = arg;
req->state = THREAD_QUEUED;
+ req->pool = pool;
- QLIST_INSERT_HEAD(&head, req, all);
+ QLIST_INSERT_HEAD(&pool->head, req, all);
- trace_thread_pool_submit(req, arg);
+ trace_thread_pool_submit(pool, req, arg);
- qemu_mutex_lock(&lock);
- if (idle_threads == 0 && cur_threads < max_threads) {
- spawn_thread();
+ qemu_mutex_lock(&pool->lock);
+ if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) {
+ spawn_thread(pool);
}
- QTAILQ_INSERT_TAIL(&request_list, req, reqs);
- qemu_mutex_unlock(&lock);
- qemu_sem_post(&sem);
+ QTAILQ_INSERT_TAIL(&pool->request_list, req, reqs);
+ qemu_mutex_unlock(&pool->lock);
+ qemu_sem_post(&pool->sem);
return &req->common;
}
@@ -258,32 +275,80 @@ static void thread_pool_co_cb(void *opaque, int ret)
qemu_coroutine_enter(co->co, NULL);
}
-int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
+int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func,
+ void *arg)
{
ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS };
assert(qemu_in_coroutine());
- thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc);
+ thread_pool_submit_aio(pool, func, arg, thread_pool_co_cb, &tpc);
qemu_coroutine_yield();
return tpc.ret;
}
-void thread_pool_submit(ThreadPoolFunc *func, void *arg)
+void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg)
+{
+ thread_pool_submit_aio(pool, func, arg, NULL, NULL);
+}
+
+static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
{
- thread_pool_submit_aio(func, arg, NULL, NULL);
+ if (!ctx) {
+ ctx = qemu_get_aio_context();
+ }
+
+ memset(pool, 0, sizeof(*pool));
+ event_notifier_init(&pool->notifier, false);
+ pool->ctx = ctx;
+ qemu_mutex_init(&pool->lock);
+ qemu_cond_init(&pool->check_cancel);
+ qemu_cond_init(&pool->worker_stopped);
+ qemu_sem_init(&pool->sem, 0);
+ pool->max_threads = 64;
+ pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool);
+
+ QLIST_INIT(&pool->head);
+ QTAILQ_INIT(&pool->request_list);
+
+ aio_set_event_notifier(ctx, &pool->notifier, event_notifier_ready,
+ thread_pool_active);
}
-static void thread_pool_init(void)
+ThreadPool *thread_pool_new(AioContext *ctx)
{
- QLIST_INIT(&head);
- event_notifier_init(&notifier, false);
- qemu_mutex_init(&lock);
- qemu_cond_init(&check_cancel);
- qemu_sem_init(&sem, 0);
- qemu_aio_set_event_notifier(&notifier, event_notifier_ready,
- thread_pool_active);
-
- QTAILQ_INIT(&request_list);
- new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL);
+ ThreadPool *pool = g_new(ThreadPool, 1);
+ thread_pool_init_one(pool, ctx);
+ return pool;
}
-block_init(thread_pool_init)
+void thread_pool_free(ThreadPool *pool)
+{
+ if (!pool) {
+ return;
+ }
+
+ assert(QLIST_EMPTY(&pool->head));
+
+ qemu_mutex_lock(&pool->lock);
+
+ /* Stop new threads from spawning */
+ qemu_bh_delete(pool->new_thread_bh);
+ pool->cur_threads -= pool->new_threads;
+ pool->new_threads = 0;
+
+ /* Wait for worker threads to terminate */
+ pool->stopping = true;
+ while (pool->cur_threads > 0) {
+ qemu_sem_post(&pool->sem);
+ qemu_cond_wait(&pool->worker_stopped, &pool->lock);
+ }
+
+ qemu_mutex_unlock(&pool->lock);
+
+ aio_set_event_notifier(pool->ctx, &pool->notifier, NULL, NULL);
+ qemu_sem_destroy(&pool->sem);
+ qemu_cond_destroy(&pool->check_cancel);
+ qemu_cond_destroy(&pool->worker_stopped);
+ qemu_mutex_destroy(&pool->lock);
+ event_notifier_cleanup(&pool->notifier);
+ g_free(pool);
+}
diff --git a/trace-events b/trace-events
index d6a847d18a..cd73b7f3ea 100644
--- a/trace-events
+++ b/trace-events
@@ -115,8 +115,8 @@ virtio_blk_data_plane_complete_request(void *s, unsigned int head, int ret) "dat
vring_setup(uint64_t physical, void *desc, void *avail, void *used) "vring physical %#"PRIx64" desc %p avail %p used %p"
# thread-pool.c
-thread_pool_submit(void *req, void *opaque) "req %p opaque %p"
-thread_pool_complete(void *req, void *opaque, int ret) "req %p opaque %p ret %d"
+thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
+thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
# posix-aio-compat.c
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 5a1d03c331..8b74bf1f7a 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -1067,6 +1067,40 @@ QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
}
/*
+ * Adds all QDict entries to the QemuOpts that can be added and removes them
+ * from the QDict. When this function returns, the QDict contains only those
+ * entries that couldn't be added to the QemuOpts.
+ */
+void qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp)
+{
+ const QDictEntry *entry, *next;
+
+ entry = qdict_first(qdict);
+
+ while (entry != NULL) {
+ Error *local_err = NULL;
+ OptsFromQDictState state = {
+ .errp = &local_err,
+ .opts = opts,
+ };
+
+ next = qdict_next(qdict, entry);
+
+ if (find_desc_by_name(opts->list->desc, entry->key)) {
+ qemu_opts_from_qdict_1(entry->key, entry->value, &state);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ return;
+ } else {
+ qdict_del(qdict, entry->key);
+ }
+ }
+
+ entry = next;
+ }
+}
+
+/*
* Convert from QemuOpts to QDict.
* The QDict values are of type QString.
* TODO We'll want to use types appropriate for opt->desc->type, but