aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-04-30 13:46:42 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-04-30 13:46:42 +0100
commitf38d1ea49711232651a817ec9d04c9d9e4816c44 (patch)
treea3f1600f9a56e1e39b0dc830dd0f8fa005b46c6a
parentc3811c08ac0c80e9d823317dde07b4c12de67069 (diff)
parent68bf7336533faa6aa90fdd4558edddbf5d8ef814 (diff)
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches - Fix permission update order problems with block graph changes - qemu-img convert: Unshare write permission for source - vhost-user-blk: Fail gracefully on too large queue size # gpg: Signature made Fri 30 Apr 2021 11:27:51 BST # gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6 # gpg: issuer "kwolf@redhat.com" # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * remotes/kevin/tags/for-upstream: (39 commits) vhost-user-blk: Fail gracefully on too large queue size qemu-img convert: Unshare write permission for source block: Add BDRV_O_NO_SHARE for blk_new_open() block: refactor bdrv_node_check_perm() block: rename bdrv_replace_child_safe() to bdrv_replace_child() block: refactor bdrv_child_set_perm_safe() transaction action block: inline bdrv_replace_child() block: inline bdrv_check_perm_common() block: drop unused permission update functions block: bdrv_reopen_multiple: refresh permissions on updated graph block: bdrv_reopen_multiple(): move bdrv_flush to separate pre-prepare block: add bdrv_set_backing_noperm() transaction action block: make bdrv_refresh_limits() to be a transaction action block: make bdrv_unset_inherits_from to be a transaction action block: drop ignore_children for permission update functions block/backup-top: drop .active block: introduce bdrv_drop_filter() block: add bdrv_remove_filter_or_cow transaction action block: adapt bdrv_append() for inserting filters block: split out bdrv_replace_node_noperm() ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS6
-rw-r--r--block.c1327
-rw-r--r--block/backup-top.c48
-rw-r--r--block/block-backend.c30
-rw-r--r--block/commit.c1
-rw-r--r--block/file-posix.c91
-rw-r--r--block/io.c31
-rw-r--r--block/mirror.c3
-rw-r--r--blockdev.c4
-rw-r--r--blockjob.c11
-rw-r--r--hw/block/vhost-user-blk.c5
-rw-r--r--include/block/block.h14
-rw-r--r--include/block/block_int.h8
-rw-r--r--include/qemu/transactions.h63
-rw-r--r--qemu-img.c2
-rwxr-xr-xtests/qemu-iotests/2452
-rw-r--r--tests/qemu-iotests/283.out2
-rw-r--r--tests/qemu-iotests/tests/qsd-jobs.out2
-rw-r--r--tests/unit/test-bdrv-drain.c2
-rw-r--r--tests/unit/test-bdrv-graph-mod.c209
-rw-r--r--util/meson.build1
-rw-r--r--util/transactions.c96
22 files changed, 1279 insertions, 679 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 36055f14c5..4c05ff8bba 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2532,6 +2532,12 @@ M: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
S: Maintained
F: scripts/simplebench/
+Transactions helper
+M: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+S: Maintained
+F: include/qemu/transactions.h
+F: util/transactions.c
+
QAPI
M: Markus Armbruster <armbru@redhat.com>
M: Michael Roth <michael.roth@amd.com>
diff --git a/block.c b/block.c
index c5b887cec1..874c22c43e 100644
--- a/block.c
+++ b/block.c
@@ -2,6 +2,7 @@
* QEMU System Emulator block driver
*
* Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2020 Virtuozzo International GmbH.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -82,6 +83,25 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
BdrvChildRole child_role,
Error **errp);
+static void bdrv_replace_child_noperm(BdrvChild *child,
+ BlockDriverState *new_bs);
+static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
+ BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ BdrvChild **child,
+ Transaction *tran,
+ Error **errp);
+static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
+ Transaction *tran);
+
+static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue,
+ Transaction *set_backings_tran, Error **errp);
+static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
+
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;
@@ -1394,6 +1414,13 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
return 0;
}
+static AioContext *bdrv_child_cb_get_parent_aio_context(BdrvChild *c)
+{
+ BlockDriverState *bs = c->opaque;
+
+ return bdrv_get_aio_context(bs);
+}
+
const BdrvChildClass child_of_bds = {
.parent_is_bds = true,
.get_parent_desc = bdrv_child_get_parent_desc,
@@ -1407,8 +1434,14 @@ const BdrvChildClass child_of_bds = {
.can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
.set_aio_ctx = bdrv_child_cb_set_aio_ctx,
.update_filename = bdrv_child_cb_update_filename,
+ .get_parent_aio_context = bdrv_child_cb_get_parent_aio_context,
};
+AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
+{
+ return c->klass->get_parent_aio_context(c);
+}
+
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
int open_flags = flags;
@@ -1547,7 +1580,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
return ret;
}
- bdrv_refresh_limits(bs, &local_err);
+ bdrv_refresh_limits(bs, NULL, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return -EINVAL;
@@ -1955,12 +1988,6 @@ static int bdrv_fill_options(QDict **options, const char *filename,
return 0;
}
-static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
- uint64_t perm, uint64_t shared,
- GSList *ignore_children, Error **errp);
-static void bdrv_child_abort_perm_update(BdrvChild *c);
-static void bdrv_child_set_perm(BdrvChild *c);
-
typedef struct BlockReopenQueueEntry {
bool prepared;
bool perms_checked;
@@ -2008,6 +2035,57 @@ bool bdrv_is_writable(BlockDriverState *bs)
return bdrv_is_writable_after_reopen(bs, NULL);
}
+static char *bdrv_child_user_desc(BdrvChild *c)
+{
+ if (c->klass->get_parent_desc) {
+ return c->klass->get_parent_desc(c);
+ }
+
+ return g_strdup("another user");
+}
+
+static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
+{
+ g_autofree char *user = NULL;
+ g_autofree char *perm_names = NULL;
+
+ if ((b->perm & a->shared_perm) == b->perm) {
+ return true;
+ }
+
+ perm_names = bdrv_perm_names(b->perm & ~a->shared_perm);
+ user = bdrv_child_user_desc(a);
+ error_setg(errp, "Conflicts with use by %s as '%s', which does not "
+ "allow '%s' on %s",
+ user, a->name, perm_names, bdrv_get_node_name(b->bs));
+
+ return false;
+}
+
+static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
+{
+ BdrvChild *a, *b;
+
+ /*
+ * During the loop we'll look at each pair twice. That's correct because
+ * bdrv_a_allow_b() is asymmetric and we should check each pair in both
+ * directions.
+ */
+ QLIST_FOREACH(a, &bs->parents, next_parent) {
+ QLIST_FOREACH(b, &bs->parents, next_parent) {
+ if (a == b) {
+ continue;
+ }
+
+ if (!bdrv_a_allow_b(a, b, errp)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
BdrvChild *c, BdrvChildRole role,
BlockReopenQueue *reopen_queue,
@@ -2025,22 +2103,186 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
}
/*
- * Check whether permissions on this node can be changed in a way that
- * @cumulative_perms and @cumulative_shared_perms are the new cumulative
- * permissions of all its parents. This involves checking whether all necessary
- * permission changes to child nodes can be performed.
+ * Adds the whole subtree of @bs (including @bs itself) to the @list (except for
+ * nodes that are already in the @list, of course) so that final list is
+ * topologically sorted. Return the result (GSList @list object is updated, so
+ * don't use old reference after function call).
+ *
+ * On function start @list must be already topologically sorted and for any node
+ * in the @list the whole subtree of the node must be in the @list as well. The
+ * simplest way to satisfy this criteria: use only result of
+ * bdrv_topological_dfs() or NULL as @list parameter.
+ */
+static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found,
+ BlockDriverState *bs)
+{
+ BdrvChild *child;
+ g_autoptr(GHashTable) local_found = NULL;
+
+ if (!found) {
+ assert(!list);
+ found = local_found = g_hash_table_new(NULL, NULL);
+ }
+
+ if (g_hash_table_contains(found, bs)) {
+ return list;
+ }
+ g_hash_table_add(found, bs);
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ list = bdrv_topological_dfs(list, found, child->bs);
+ }
+
+ return g_slist_prepend(list, bs);
+}
+
+typedef struct BdrvChildSetPermState {
+ BdrvChild *child;
+ uint64_t old_perm;
+ uint64_t old_shared_perm;
+} BdrvChildSetPermState;
+
+static void bdrv_child_set_perm_abort(void *opaque)
+{
+ BdrvChildSetPermState *s = opaque;
+
+ s->child->perm = s->old_perm;
+ s->child->shared_perm = s->old_shared_perm;
+}
+
+static TransactionActionDrv bdrv_child_set_pem_drv = {
+ .abort = bdrv_child_set_perm_abort,
+ .clean = g_free,
+};
+
+static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
+ uint64_t shared, Transaction *tran)
+{
+ BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
+
+ *s = (BdrvChildSetPermState) {
+ .child = c,
+ .old_perm = c->perm,
+ .old_shared_perm = c->shared_perm,
+ };
+
+ c->perm = perm;
+ c->shared_perm = shared;
+
+ tran_add(tran, &bdrv_child_set_pem_drv, s);
+}
+
+static void bdrv_drv_set_perm_commit(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ uint64_t cumulative_perms, cumulative_shared_perms;
+
+ if (bs->drv->bdrv_set_perm) {
+ bdrv_get_cumulative_perm(bs, &cumulative_perms,
+ &cumulative_shared_perms);
+ bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
+ }
+}
+
+static void bdrv_drv_set_perm_abort(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+
+ if (bs->drv->bdrv_abort_perm_update) {
+ bs->drv->bdrv_abort_perm_update(bs);
+ }
+}
+
+TransactionActionDrv bdrv_drv_set_perm_drv = {
+ .abort = bdrv_drv_set_perm_abort,
+ .commit = bdrv_drv_set_perm_commit,
+};
+
+static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
+ uint64_t shared_perm, Transaction *tran,
+ Error **errp)
+{
+ if (!bs->drv) {
+ return 0;
+ }
+
+ if (bs->drv->bdrv_check_perm) {
+ int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if (tran) {
+ tran_add(tran, &bdrv_drv_set_perm_drv, bs);
+ }
+
+ return 0;
+}
+
+typedef struct BdrvReplaceChildState {
+ BdrvChild *child;
+ BlockDriverState *old_bs;
+} BdrvReplaceChildState;
+
+static void bdrv_replace_child_commit(void *opaque)
+{
+ BdrvReplaceChildState *s = opaque;
+
+ bdrv_unref(s->old_bs);
+}
+
+static void bdrv_replace_child_abort(void *opaque)
+{
+ BdrvReplaceChildState *s = opaque;
+ BlockDriverState *new_bs = s->child->bs;
+
+ /* old_bs reference is transparently moved from @s to @s->child */
+ bdrv_replace_child_noperm(s->child, s->old_bs);
+ bdrv_unref(new_bs);
+}
+
+static TransactionActionDrv bdrv_replace_child_drv = {
+ .commit = bdrv_replace_child_commit,
+ .abort = bdrv_replace_child_abort,
+ .clean = g_free,
+};
+
+/*
+ * bdrv_replace_child
*
- * A call to this function must always be followed by a call to bdrv_set_perm()
- * or bdrv_abort_perm_update().
+ * Note: real unref of old_bs is done only on commit.
*/
-static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
- uint64_t cumulative_perms,
- uint64_t cumulative_shared_perms,
- GSList *ignore_children, Error **errp)
+static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs,
+ Transaction *tran)
+{
+ BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
+ *s = (BdrvReplaceChildState) {
+ .child = child,
+ .old_bs = child->bs,
+ };
+ tran_add(tran, &bdrv_replace_child_drv, s);
+
+ if (new_bs) {
+ bdrv_ref(new_bs);
+ }
+ bdrv_replace_child_noperm(child, new_bs);
+ /* old_bs reference is transparently moved from @child to @s */
+}
+
+/*
+ * Refresh permissions in @bs subtree. The function is intended to be called
+ * after some graph modification that was done without permission update.
+ */
+static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
+ Transaction *tran, Error **errp)
{
BlockDriver *drv = bs->drv;
BdrvChild *c;
int ret;
+ uint64_t cumulative_perms, cumulative_shared_perms;
+
+ bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
/* Write permissions never work with read-only images */
if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
@@ -2049,15 +2291,8 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
if (!bdrv_is_writable_after_reopen(bs, NULL)) {
error_setg(errp, "Block node is read-only");
} else {
- uint64_t current_perms, current_shared;
- bdrv_get_cumulative_perm(bs, &current_perms, &current_shared);
- if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
- error_setg(errp, "Cannot make block node read-only, there is "
- "a writer on it");
- } else {
- error_setg(errp, "Cannot make block node read-only and create "
- "a writer on it");
- }
+ error_setg(errp, "Read-only block node '%s' cannot support "
+ "read-write users", bdrv_get_node_name(bs));
}
return -EPERM;
@@ -2084,12 +2319,10 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
return 0;
}
- if (drv->bdrv_check_perm) {
- ret = drv->bdrv_check_perm(bs, cumulative_perms,
- cumulative_shared_perms, errp);
- if (ret < 0) {
- return ret;
- }
+ ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran,
+ errp);
+ if (ret < 0) {
+ return ret;
}
/* Drivers that never have children can omit .bdrv_child_perm() */
@@ -2105,68 +2338,32 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
bdrv_child_perm(bs, c->bs, c, c->role, q,
cumulative_perms, cumulative_shared_perms,
&cur_perm, &cur_shared);
- ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children,
- errp);
- if (ret < 0) {
- return ret;
- }
+ bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
}
return 0;
}
-/*
- * Notifies drivers that after a previous bdrv_check_perm() call, the
- * permission update is not performed and any preparations made for it (e.g.
- * taken file locks) need to be undone.
- *
- * This function recursively notifies all child nodes.
- */
-static void bdrv_abort_perm_update(BlockDriverState *bs)
+static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
+ Transaction *tran, Error **errp)
{
- BlockDriver *drv = bs->drv;
- BdrvChild *c;
-
- if (!drv) {
- return;
- }
-
- if (drv->bdrv_abort_perm_update) {
- drv->bdrv_abort_perm_update(bs);
- }
-
- QLIST_FOREACH(c, &bs->children, next) {
- bdrv_child_abort_perm_update(c);
- }
-}
-
-static void bdrv_set_perm(BlockDriverState *bs)
-{
- uint64_t cumulative_perms, cumulative_shared_perms;
- BlockDriver *drv = bs->drv;
- BdrvChild *c;
-
- if (!drv) {
- return;
- }
+ int ret;
+ BlockDriverState *bs;
- bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
+ for ( ; list; list = list->next) {
+ bs = list->data;
- /* Update this node */
- if (drv->bdrv_set_perm) {
- drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
- }
+ if (bdrv_parent_perms_conflict(bs, errp)) {
+ return -EINVAL;
+ }
- /* Drivers that never have children can omit .bdrv_child_perm() */
- if (!drv->bdrv_child_perm) {
- assert(QLIST_EMPTY(&bs->children));
- return;
+ ret = bdrv_node_refresh_perm(bs, q, tran, errp);
+ if (ret < 0) {
+ return ret;
+ }
}
- /* Update all children */
- QLIST_FOREACH(c, &bs->children, next) {
- bdrv_child_set_perm(c);
- }
+ return 0;
}
void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
@@ -2185,15 +2382,6 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
*shared_perm = cumulative_shared_perms;
}
-static char *bdrv_child_user_desc(BdrvChild *c)
-{
- if (c->klass->get_parent_desc) {
- return c->klass->get_parent_desc(c);
- }
-
- return g_strdup("another user");
-}
-
char *bdrv_perm_names(uint64_t perm)
{
struct perm_name {
@@ -2223,140 +2411,33 @@ char *bdrv_perm_names(uint64_t perm)
return g_string_free(result, FALSE);
}
-/*
- * Checks whether a new reference to @bs can be added if the new user requires
- * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is
- * set, the BdrvChild objects in this list are ignored in the calculations;
- * this allows checking permission updates for an existing reference.
- *
- * Needs to be followed by a call to either bdrv_set_perm() or
- * bdrv_abort_perm_update(). */
-static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
- uint64_t new_used_perm,
- uint64_t new_shared_perm,
- GSList *ignore_children,
- Error **errp)
-{
- BdrvChild *c;
- uint64_t cumulative_perms = new_used_perm;
- uint64_t cumulative_shared_perms = new_shared_perm;
-
-
- /* There is no reason why anyone couldn't tolerate write_unchanged */
- assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
-
- QLIST_FOREACH(c, &bs->parents, next_parent) {
- if (g_slist_find(ignore_children, c)) {
- continue;
- }
-
- if ((new_used_perm & c->shared_perm) != new_used_perm) {
- char *user = bdrv_child_user_desc(c);
- char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
-
- error_setg(errp, "Conflicts with use by %s as '%s', which does not "
- "allow '%s' on %s",
- user, c->name, perm_names, bdrv_get_node_name(c->bs));
- g_free(user);
- g_free(perm_names);
- return -EPERM;
- }
-
- if ((c->perm & new_shared_perm) != c->perm) {
- char *user = bdrv_child_user_desc(c);
- char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
-
- error_setg(errp, "Conflicts with use by %s as '%s', which uses "
- "'%s' on %s",
- user, c->name, perm_names, bdrv_get_node_name(c->bs));
- g_free(user);
- g_free(perm_names);
- return -EPERM;
- }
-
- cumulative_perms |= c->perm;
- cumulative_shared_perms &= c->shared_perm;
- }
-
- return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms,
- ignore_children, errp);
-}
-
-/* Needs to be followed by a call to either bdrv_child_set_perm() or
- * bdrv_child_abort_perm_update(). */
-static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
- uint64_t perm, uint64_t shared,
- GSList *ignore_children, Error **errp)
-{
- int ret;
-
- ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c);
- ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp);
- g_slist_free(ignore_children);
-
- if (ret < 0) {
- return ret;
- }
-
- if (!c->has_backup_perm) {
- c->has_backup_perm = true;
- c->backup_perm = c->perm;
- c->backup_shared_perm = c->shared_perm;
- }
- /*
- * Note: it's OK if c->has_backup_perm was already set, as we can find the
- * same child twice during check_perm procedure
- */
-
- c->perm = perm;
- c->shared_perm = shared;
-
- return 0;
-}
-
-static void bdrv_child_set_perm(BdrvChild *c)
-{
- c->has_backup_perm = false;
-
- bdrv_set_perm(c->bs);
-}
-
-static void bdrv_child_abort_perm_update(BdrvChild *c)
-{
- if (c->has_backup_perm) {
- c->perm = c->backup_perm;
- c->shared_perm = c->backup_shared_perm;
- c->has_backup_perm = false;
- }
-
- bdrv_abort_perm_update(c->bs);
-}
static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
{
int ret;
- uint64_t perm, shared_perm;
+ Transaction *tran = tran_new();
+ g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
- bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
- ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, errp);
- if (ret < 0) {
- bdrv_abort_perm_update(bs);
- return ret;
- }
- bdrv_set_perm(bs);
+ ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
+ tran_finalize(tran, ret);
- return 0;
+ return ret;
}
int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
Error **errp)
{
Error *local_err = NULL;
+ Transaction *tran = tran_new();
int ret;
- ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, &local_err);
+ bdrv_child_set_perm(c, perm, shared, tran);
+
+ ret = bdrv_refresh_perms(c->bs, &local_err);
+
+ tran_finalize(tran, ret);
+
if (ret < 0) {
- bdrv_child_abort_perm_update(c);
if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
/* tighten permissions */
error_propagate(errp, local_err);
@@ -2370,12 +2451,9 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
error_free(local_err);
ret = 0;
}
- return ret;
}
- bdrv_child_set_perm(c);
-
- return 0;
+ return ret;
}
int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
@@ -2627,37 +2705,177 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
}
}
+static void bdrv_child_free(void *opaque)
+{
+ BdrvChild *c = opaque;
+
+ g_free(c->name);
+ g_free(c);
+}
+
+static void bdrv_remove_empty_child(BdrvChild *child)
+{
+ assert(!child->bs);
+ QLIST_SAFE_REMOVE(child, next);
+ bdrv_child_free(child);
+}
+
+typedef struct BdrvAttachChildCommonState {
+ BdrvChild **child;
+ AioContext *old_parent_ctx;
+ AioContext *old_child_ctx;
+} BdrvAttachChildCommonState;
+
+static void bdrv_attach_child_common_abort(void *opaque)
+{
+ BdrvAttachChildCommonState *s = opaque;
+ BdrvChild *child = *s->child;
+ BlockDriverState *bs = child->bs;
+
+ bdrv_replace_child_noperm(child, NULL);
+
+ if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
+ bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort);
+ }
+
+ if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) {
+ GSList *ignore = g_slist_prepend(NULL, child);
+
+ child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore,
+ &error_abort);
+ g_slist_free(ignore);
+ ignore = g_slist_prepend(NULL, child);
+ child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore);
+
+ g_slist_free(ignore);
+ }
+
+ bdrv_unref(bs);
+ bdrv_remove_empty_child(child);
+ *s->child = NULL;
+}
+
+static TransactionActionDrv bdrv_attach_child_common_drv = {
+ .abort = bdrv_attach_child_common_abort,
+ .clean = g_free,
+};
+
/*
- * Updates @child to change its reference to point to @new_bs, including
- * checking and applying the necessary permission updates both to the old node
- * and to @new_bs.
- *
- * NULL is passed as @new_bs for removing the reference before freeing @child.
- *
- * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
- * function uses bdrv_set_perm() to update the permissions according to the new
- * reference that @new_bs gets.
- *
- * Callers must ensure that child->frozen is false.
+ * Common part of attaching bdrv child to bs or to blk or to job
*/
-static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
+static int bdrv_attach_child_common(BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ uint64_t perm, uint64_t shared_perm,
+ void *opaque, BdrvChild **child,
+ Transaction *tran, Error **errp)
+{
+ BdrvChild *new_child;
+ AioContext *parent_ctx;
+ AioContext *child_ctx = bdrv_get_aio_context(child_bs);
+
+ assert(child);
+ assert(*child == NULL);
+
+ new_child = g_new(BdrvChild, 1);
+ *new_child = (BdrvChild) {
+ .bs = NULL,
+ .name = g_strdup(child_name),
+ .klass = child_class,
+ .role = child_role,
+ .perm = perm,
+ .shared_perm = shared_perm,
+ .opaque = opaque,
+ };
+
+ /*
+ * If the AioContexts don't match, first try to move the subtree of
+ * child_bs into the AioContext of the new parent. If this doesn't work,
+ * try moving the parent into the AioContext of child_bs instead.
+ */
+ parent_ctx = bdrv_child_get_parent_aio_context(new_child);
+ if (child_ctx != parent_ctx) {
+ Error *local_err = NULL;
+ int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err);
+
+ if (ret < 0 && child_class->can_set_aio_ctx) {
+ GSList *ignore = g_slist_prepend(NULL, new_child);
+ if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore,
+ NULL))
+ {
+ error_free(local_err);
+ ret = 0;
+ g_slist_free(ignore);
+ ignore = g_slist_prepend(NULL, new_child);
+ child_class->set_aio_ctx(new_child, child_ctx, &ignore);
+ }
+ g_slist_free(ignore);
+ }
+
+ if (ret < 0) {
+ error_propagate(errp, local_err);
+ bdrv_remove_empty_child(new_child);
+ return ret;
+ }
+ }
+
+ bdrv_ref(child_bs);
+ bdrv_replace_child_noperm(new_child, child_bs);
+
+ *child = new_child;
+
+ BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
+ *s = (BdrvAttachChildCommonState) {
+ .child = child,
+ .old_parent_ctx = parent_ctx,
+ .old_child_ctx = child_ctx,
+ };
+ tran_add(tran, &bdrv_attach_child_common_drv, s);
+
+ return 0;
+}
+
+static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
+ BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ BdrvChild **child,
+ Transaction *tran,
+ Error **errp)
{
- BlockDriverState *old_bs = child->bs;
+ int ret;
+ uint64_t perm, shared_perm;
- /* Asserts that child->frozen == false */
- bdrv_replace_child_noperm(child, new_bs);
+ assert(parent_bs->drv);
+ bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
+ bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
+ perm, shared_perm, &perm, &shared_perm);
+
+ ret = bdrv_attach_child_common(child_bs, child_name, child_class,
+ child_role, perm, shared_perm, parent_bs,
+ child, tran, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ QLIST_INSERT_HEAD(&parent_bs->children, *child, next);
/*
- * Start with the new node's permissions. If @new_bs is a (direct
- * or indirect) child of @old_bs, we must complete the permission
- * update on @new_bs before we loosen the restrictions on @old_bs.
- * Otherwise, bdrv_check_perm() on @old_bs would re-initiate
- * updating the permissions of @new_bs, and thus not purely loosen
- * restrictions.
+ * child is removed in bdrv_attach_child_common_abort(), so don't care to
+ * abort this change separately.
*/
- if (new_bs) {
- bdrv_set_perm(new_bs);
- }
+
+ return 0;
+}
+
+static void bdrv_detach_child(BdrvChild *child)
+{
+ BlockDriverState *old_bs = child->bs;
+
+ bdrv_replace_child_noperm(child, NULL);
+ bdrv_remove_empty_child(child);
if (old_bs) {
/*
@@ -2667,8 +2885,10 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
*/
bdrv_refresh_perms(old_bs, NULL);
- /* When the parent requiring a non-default AioContext is removed, the
- * node moves back to the main AioContext */
+ /*
+ * When the parent requiring a non-default AioContext is removed, the
+ * node moves back to the main AioContext
+ */
bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
}
}
@@ -2687,61 +2907,25 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildClass *child_class,
BdrvChildRole child_role,
- AioContext *ctx,
uint64_t perm, uint64_t shared_perm,
void *opaque, Error **errp)
{
- BdrvChild *child;
- Error *local_err = NULL;
int ret;
+ BdrvChild *child = NULL;
+ Transaction *tran = tran_new();
- ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
+ ret = bdrv_attach_child_common(child_bs, child_name, child_class,
+ child_role, perm, shared_perm, opaque,
+ &child, tran, errp);
if (ret < 0) {
- bdrv_abort_perm_update(child_bs);
bdrv_unref(child_bs);
return NULL;
}
- child = g_new(BdrvChild, 1);
- *child = (BdrvChild) {
- .bs = NULL,
- .name = g_strdup(child_name),
- .klass = child_class,
- .role = child_role,
- .perm = perm,
- .shared_perm = shared_perm,
- .opaque = opaque,
- };
-
- /* If the AioContexts don't match, first try to move the subtree of
- * child_bs into the AioContext of the new parent. If this doesn't work,
- * try moving the parent into the AioContext of child_bs instead. */
- if (bdrv_get_aio_context(child_bs) != ctx) {
- ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err);
- if (ret < 0 && child_class->can_set_aio_ctx) {
- GSList *ignore = g_slist_prepend(NULL, child);
- ctx = bdrv_get_aio_context(child_bs);
- if (child_class->can_set_aio_ctx(child, ctx, &ignore, NULL)) {
- error_free(local_err);
- ret = 0;
- g_slist_free(ignore);
- ignore = g_slist_prepend(NULL, child);
- child_class->set_aio_ctx(child, ctx, &ignore);
- }
- g_slist_free(ignore);
- }
- if (ret < 0) {
- error_propagate(errp, local_err);
- g_free(child);
- bdrv_abort_perm_update(child_bs);
- bdrv_unref(child_bs);
- return NULL;
- }
- }
-
- /* This performs the matching bdrv_set_perm() for the above check. */
- bdrv_replace_child(child, child_bs);
+ ret = bdrv_refresh_perms(child_bs, errp);
+ tran_finalize(tran, ret);
+ bdrv_unref(child_bs);
return child;
}
@@ -2763,34 +2947,27 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BdrvChildRole child_role,
Error **errp)
{
- BdrvChild *child;
- uint64_t perm, shared_perm;
-
- bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
-
- assert(parent_bs->drv);
- bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
- perm, shared_perm, &perm, &shared_perm);
+ int ret;
+ BdrvChild *child = NULL;
+ Transaction *tran = tran_new();
- child = bdrv_root_attach_child(child_bs, child_name, child_class,
- child_role, bdrv_get_aio_context(parent_bs),
- perm, shared_perm, parent_bs, errp);
- if (child == NULL) {
- return NULL;
+ ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class,
+ child_role, &child, tran, errp);
+ if (ret < 0) {
+ goto out;
}
- QLIST_INSERT_HEAD(&parent_bs->children, child, next);
- return child;
-}
+ ret = bdrv_refresh_perms(parent_bs, errp);
+ if (ret < 0) {
+ goto out;
+ }
-static void bdrv_detach_child(BdrvChild *child)
-{
- QLIST_SAFE_REMOVE(child, next);
+out:
+ tran_finalize(tran, ret);
- bdrv_replace_child(child, NULL);
+ bdrv_unref(child_bs);
- g_free(child->name);
- g_free(child);
+ return child;
}
/* Callers must ensure that child->frozen is false. */
@@ -2803,11 +2980,49 @@ void bdrv_root_unref_child(BdrvChild *child)
bdrv_unref(child_bs);
}
+typedef struct BdrvSetInheritsFrom {
+ BlockDriverState *bs;
+ BlockDriverState *old_inherits_from;
+} BdrvSetInheritsFrom;
+
+static void bdrv_set_inherits_from_abort(void *opaque)
+{
+ BdrvSetInheritsFrom *s = opaque;
+
+ s->bs->inherits_from = s->old_inherits_from;
+}
+
+static TransactionActionDrv bdrv_set_inherits_from_drv = {
+ .abort = bdrv_set_inherits_from_abort,
+ .clean = g_free,
+};
+
+/* @tran is allowed to be NULL. In this case no rollback is possible */
+static void bdrv_set_inherits_from(BlockDriverState *bs,
+ BlockDriverState *new_inherits_from,
+ Transaction *tran)
+{
+ if (tran) {
+ BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1);
+
+ *s = (BdrvSetInheritsFrom) {
+ .bs = bs,
+ .old_inherits_from = bs->inherits_from,
+ };
+
+ tran_add(tran, &bdrv_set_inherits_from_drv, s);
+ }
+
+ bs->inherits_from = new_inherits_from;
+}
+
/**
* Clear all inherits_from pointers from children and grandchildren of
* @root that point to @root, where necessary.
+ * @tran is allowed to be NULL. In this case no rollback is possible
*/
-static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child)
+static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
+ Transaction *tran)
{
BdrvChild *c;
@@ -2822,12 +3037,12 @@ static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child)
}
}
if (c == NULL) {
- child->bs->inherits_from = NULL;
+ bdrv_set_inherits_from(child->bs, NULL, tran);
}
}
QLIST_FOREACH(c, &child->bs->children, next) {
- bdrv_unset_inherits_from(root, c);
+ bdrv_unset_inherits_from(root, c, tran);
}
}
@@ -2838,7 +3053,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
return;
}
- bdrv_unset_inherits_from(parent, child);
+ bdrv_unset_inherits_from(parent, child, NULL);
bdrv_root_unref_child(child);
}
@@ -2883,8 +3098,9 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
* Sets the bs->backing link of a BDS. A new reference is created; callers
* which don't need their own reference any more must call bdrv_unref().
*/
-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
- Error **errp)
+static int bdrv_set_backing_noperm(BlockDriverState *bs,
+ BlockDriverState *backing_hd,
+ Transaction *tran, Error **errp)
{
int ret = 0;
bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
@@ -2894,36 +3110,53 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
return -EPERM;
}
- if (backing_hd) {
- bdrv_ref(backing_hd);
- }
-
if (bs->backing) {
/* Cannot be frozen, we checked that above */
- bdrv_unref_child(bs, bs->backing);
- bs->backing = NULL;
+ bdrv_unset_inherits_from(bs, bs->backing, tran);
+ bdrv_remove_filter_or_cow_child(bs, tran);
}
if (!backing_hd) {
goto out;
}
- bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_of_bds,
- bdrv_backing_role(bs), errp);
- if (!bs->backing) {
- ret = -EPERM;
- goto out;
+ ret = bdrv_attach_child_noperm(bs, backing_hd, "backing",
+ &child_of_bds, bdrv_backing_role(bs),
+ &bs->backing, tran, errp);
+ if (ret < 0) {
+ return ret;
}
- /* If backing_hd was already part of bs's backing chain, and
+
+ /*
+ * If backing_hd was already part of bs's backing chain, and
* inherits_from pointed recursively to bs then let's update it to
- * point directly to bs (else it will become NULL). */
+ * point directly to bs (else it will become NULL).
+ */
if (update_inherits_from) {
- backing_hd->inherits_from = bs;
+ bdrv_set_inherits_from(backing_hd, bs, tran);
+ }
+
+out:
+ bdrv_refresh_limits(bs, tran, NULL);
+
+ return 0;
+}
+
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp)
+{
+ int ret;
+ Transaction *tran = tran_new();
+
+ ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
+ if (ret < 0) {
+ goto out;
}
+ ret = bdrv_refresh_perms(bs, errp);
out:
- bdrv_refresh_limits(bs, NULL);
+ tran_finalize(tran, ret);
return ret;
}
@@ -3213,11 +3446,6 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
goto out;
}
- /* bdrv_append() consumes a strong reference to bs_snapshot
- * (i.e. it will call bdrv_unref() on it) even on error, so in
- * order to be able to return one, we have to increase
- * bs_snapshot's refcount here */
- bdrv_ref(bs_snapshot);
ret = bdrv_append(bs_snapshot, bs, errp);
if (ret < 0) {
bs_snapshot = NULL;
@@ -3729,10 +3957,6 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
bs_entry->state.explicit_options = explicit_options;
bs_entry->state.flags = flags;
- /* This needs to be overwritten in bdrv_reopen_prepare() */
- bs_entry->state.perm = UINT64_MAX;
- bs_entry->state.shared_perm = 0;
-
/*
* If keep_old_opts is false then it means that unspecified
* options must be reset to their original value. We don't allow
@@ -3817,38 +4041,49 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
{
int ret = -1;
BlockReopenQueueEntry *bs_entry, *next;
+ Transaction *tran = tran_new();
+ g_autoptr(GHashTable) found = NULL;
+ g_autoptr(GSList) refresh_list = NULL;
assert(bs_queue != NULL);
QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
- assert(bs_entry->state.bs->quiesce_counter > 0);
- if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) {
+ ret = bdrv_flush(bs_entry->state.bs);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Error flushing drive");
goto cleanup;
}
+ }
+
+ QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
+ assert(bs_entry->state.bs->quiesce_counter > 0);
+ ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp);
+ if (ret < 0) {
+ goto abort;
+ }
bs_entry->prepared = true;
}
+ found = g_hash_table_new(NULL, NULL);
QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
BDRVReopenState *state = &bs_entry->state;
- ret = bdrv_check_perm(state->bs, bs_queue, state->perm,
- state->shared_perm, NULL, errp);
- if (ret < 0) {
- goto cleanup_perm;
- }
- /* Check if new_backing_bs would accept the new permissions */
- if (state->replace_backing_bs && state->new_backing_bs) {
- uint64_t nperm, nshared;
- bdrv_child_perm(state->bs, state->new_backing_bs,
- NULL, bdrv_backing_role(state->bs),
- bs_queue, state->perm, state->shared_perm,
- &nperm, &nshared);
- ret = bdrv_check_update_perm(state->new_backing_bs, NULL,
- nperm, nshared, NULL, errp);
- if (ret < 0) {
- goto cleanup_perm;
- }
+
+ refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs);
+ if (state->old_backing_bs) {
+ refresh_list = bdrv_topological_dfs(refresh_list, found,
+ state->old_backing_bs);
}
- bs_entry->perms_checked = true;
+ }
+
+ /*
+ * Note that file-posix driver rely on permission update done during reopen
+ * (even if no permission changed), because it wants "new" permissions for
+ * reconfiguring the fd and that's why it does it in raw_check_perm(), not
+ * in raw_reopen_prepare() which is called with "old" permissions.
+ */
+ ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp);
+ if (ret < 0) {
+ goto abort;
}
/*
@@ -3864,51 +4099,31 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
bdrv_reopen_commit(&bs_entry->state);
}
- ret = 0;
-cleanup_perm:
- QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
- BDRVReopenState *state = &bs_entry->state;
-
- if (!bs_entry->perms_checked) {
- continue;
- }
-
- if (ret == 0) {
- uint64_t perm, shared;
+ tran_commit(tran);
- bdrv_get_cumulative_perm(state->bs, &perm, &shared);
- assert(perm == state->perm);
- assert(shared == state->shared_perm);
+ QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
+ BlockDriverState *bs = bs_entry->state.bs;
- bdrv_set_perm(state->bs);
- } else {
- bdrv_abort_perm_update(state->bs);
- if (state->replace_backing_bs && state->new_backing_bs) {
- bdrv_abort_perm_update(state->new_backing_bs);
- }
+ if (bs->drv->bdrv_reopen_commit_post) {
+ bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
}
}
- if (ret == 0) {
- QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
- BlockDriverState *bs = bs_entry->state.bs;
+ ret = 0;
+ goto cleanup;
- if (bs->drv->bdrv_reopen_commit_post)
- bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
+abort:
+ tran_abort(tran);
+ QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+ if (bs_entry->prepared) {
+ bdrv_reopen_abort(&bs_entry->state);
}
+ qobject_unref(bs_entry->state.explicit_options);
+ qobject_unref(bs_entry->state.options);
}
+
cleanup:
QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
- if (ret) {
- if (bs_entry->prepared) {
- bdrv_reopen_abort(&bs_entry->state);
- }
- qobject_unref(bs_entry->state.explicit_options);
- qobject_unref(bs_entry->state.options);
- }
- if (bs_entry->state.new_backing_bs) {
- bdrv_unref(bs_entry->state.new_backing_bs);
- }
g_free(bs_entry);
}
g_free(bs_queue);
@@ -3933,53 +4148,6 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
return ret;
}
-static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q,
- BdrvChild *c)
-{
- BlockReopenQueueEntry *entry;
-
- QTAILQ_FOREACH(entry, q, entry) {
- BlockDriverState *bs = entry->state.bs;
- BdrvChild *child;
-
- QLIST_FOREACH(child, &bs->children, next) {
- if (child == c) {
- return entry;
- }
- }
- }
-
- return NULL;
-}
-
-static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs,
- uint64_t *perm, uint64_t *shared)
-{
- BdrvChild *c;
- BlockReopenQueueEntry *parent;
- uint64_t cumulative_perms = 0;
- uint64_t cumulative_shared_perms = BLK_PERM_ALL;
-
- QLIST_FOREACH(c, &bs->parents, next_parent) {
- parent = find_parent_in_reopen_queue(q, c);
- if (!parent) {
- cumulative_perms |= c->perm;
- cumulative_shared_perms &= c->shared_perm;
- } else {
- uint64_t nperm, nshared;
-
- bdrv_child_perm(parent->state.bs, bs, c, c->role, q,
- parent->state.perm, parent->state.shared_perm,
- &nperm, &nshared);
-
- cumulative_perms |= nperm;
- cumulative_shared_perms &= nshared;
- }
- }
- *perm = cumulative_perms;
- *shared = cumulative_shared_perms;
-}
-
static bool bdrv_reopen_can_attach(BlockDriverState *parent,
BdrvChild *child,
BlockDriverState *new_child,
@@ -4021,6 +4189,7 @@ static bool bdrv_reopen_can_attach(BlockDriverState *parent,
* Return 0 on success, otherwise return < 0 and set @errp.
*/
static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
+ Transaction *set_backings_tran,
Error **errp)
{
BlockDriverState *bs = reopen_state->bs;
@@ -4097,6 +4266,8 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
/* If we want to replace the backing file we need some extra checks */
if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) {
+ int ret;
+
/* Check for implicit nodes between bs and its backing file */
if (bs != overlay_bs) {
error_setg(errp, "Cannot change backing link if '%s' has "
@@ -4117,9 +4288,11 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
return -EPERM;
}
reopen_state->replace_backing_bs = true;
- if (new_backing_bs) {
- bdrv_ref(new_backing_bs);
- reopen_state->new_backing_bs = new_backing_bs;
+ reopen_state->old_backing_bs = bs->backing ? bs->backing->bs : NULL;
+ ret = bdrv_set_backing_noperm(bs, new_backing_bs, set_backings_tran,
+ errp);
+ if (ret < 0) {
+ return ret;
}
}
@@ -4143,8 +4316,9 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
* commit() for any other BDS that have been left in a prepare() state
*
*/
-int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
- Error **errp)
+static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue,
+ Transaction *set_backings_tran, Error **errp)
{
int ret = -1;
int old_flags;
@@ -4211,16 +4385,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
goto error;
}
- /* Calculate required permissions after reopening */
- bdrv_reopen_perm(queue, reopen_state->bs,
- &reopen_state->perm, &reopen_state->shared_perm);
-
- ret = bdrv_flush(reopen_state->bs);
- if (ret) {
- error_setg_errno(errp, -ret, "Error flushing drive");
- goto error;
- }
-
if (drv->bdrv_reopen_prepare) {
/*
* If a driver-specific option is missing, it means that we
@@ -4274,7 +4438,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
* either a reference to an existing node (using its node name)
* or NULL to simply detach the current backing file.
*/
- ret = bdrv_reopen_parse_backing(reopen_state, errp);
+ ret = bdrv_reopen_parse_backing(reopen_state, set_backings_tran, errp);
if (ret < 0) {
goto error;
}
@@ -4359,7 +4523,7 @@ error:
* makes them final by swapping the staging BlockDriverState contents into
* the active BlockDriverState contents.
*/
-void bdrv_reopen_commit(BDRVReopenState *reopen_state)
+static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
{
BlockDriver *drv;
BlockDriverState *bs;
@@ -4396,30 +4560,14 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
qdict_del(bs->explicit_options, child->name);
qdict_del(bs->options, child->name);
}
-
- /*
- * Change the backing file if a new one was specified. We do this
- * after updating bs->options, so bdrv_refresh_filename() (called
- * from bdrv_set_backing_hd()) has the new values.
- */
- if (reopen_state->replace_backing_bs) {
- BlockDriverState *old_backing_bs = child_bs(bs->backing);
- assert(!old_backing_bs || !old_backing_bs->implicit);
- /* Abort the permission update on the backing bs we're detaching */
- if (old_backing_bs) {
- bdrv_abort_perm_update(old_backing_bs);
- }
- bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort);
- }
-
- bdrv_refresh_limits(bs, NULL);
+ bdrv_refresh_limits(bs, NULL, NULL);
}
/*
* Abort the reopen, and delete and free the staged changes in
* reopen_state
*/
-void bdrv_reopen_abort(BDRVReopenState *reopen_state)
+static void bdrv_reopen_abort(BDRVReopenState *reopen_state)
{
BlockDriver *drv;
@@ -4585,78 +4733,176 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to)
return ret;
}
+typedef struct BdrvRemoveFilterOrCowChild {
+ BdrvChild *child;
+ bool is_backing;
+} BdrvRemoveFilterOrCowChild;
+
+static void bdrv_remove_filter_or_cow_child_abort(void *opaque)
+{
+ BdrvRemoveFilterOrCowChild *s = opaque;
+ BlockDriverState *parent_bs = s->child->opaque;
+
+ QLIST_INSERT_HEAD(&parent_bs->children, s->child, next);
+ if (s->is_backing) {
+ parent_bs->backing = s->child;
+ } else {
+ parent_bs->file = s->child;
+ }
+
+ /*
+ * We don't have to restore child->bs here to undo bdrv_replace_child()
+ * because that function is transactionable and it registered own completion
+ * entries in @tran, so .abort() for bdrv_replace_child_safe() will be
+ * called automatically.
+ */
+}
+
+static void bdrv_remove_filter_or_cow_child_commit(void *opaque)
+{
+ BdrvRemoveFilterOrCowChild *s = opaque;
+
+ bdrv_child_free(s->child);
+}
+
+static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = {
+ .abort = bdrv_remove_filter_or_cow_child_abort,
+ .commit = bdrv_remove_filter_or_cow_child_commit,
+ .clean = g_free,
+};
+
/*
- * With auto_skip=true bdrv_replace_node_common skips updating from parents
- * if it creates a parent-child relation loop or if parent is block-job.
- *
- * With auto_skip=false the error is returned if from has a parent which should
- * not be updated.
+ * A function to remove backing-chain child of @bs if exists: cow child for
+ * format nodes (always .backing) and filter child for filters (may be .file or
+ * .backing)
*/
-static int bdrv_replace_node_common(BlockDriverState *from,
- BlockDriverState *to,
- bool auto_skip, Error **errp)
+static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
+ Transaction *tran)
{
- BdrvChild *c, *next;
- GSList *list = NULL, *p;
- uint64_t perm = 0, shared = BLK_PERM_ALL;
- int ret;
+ BdrvRemoveFilterOrCowChild *s;
+ BdrvChild *child = bdrv_filter_or_cow_child(bs);
- /* Make sure that @from doesn't go away until we have successfully attached
- * all of its parents to @to. */
- bdrv_ref(from);
+ if (!child) {
+ return;
+ }
- assert(qemu_get_current_aio_context() == qemu_get_aio_context());
- assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
- bdrv_drained_begin(from);
+ if (child->bs) {
+ bdrv_replace_child(child, NULL, tran);
+ }
+
+ s = g_new(BdrvRemoveFilterOrCowChild, 1);
+ *s = (BdrvRemoveFilterOrCowChild) {
+ .child = child,
+ .is_backing = (child == bs->backing),
+ };
+ tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
+
+ QLIST_SAFE_REMOVE(child, next);
+ if (s->is_backing) {
+ bs->backing = NULL;
+ } else {
+ bs->file = NULL;
+ }
+}
+
+static int bdrv_replace_node_noperm(BlockDriverState *from,
+ BlockDriverState *to,
+ bool auto_skip, Transaction *tran,
+ Error **errp)
+{
+ BdrvChild *c, *next;
- /* Put all parents into @list and calculate their cumulative permissions */
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
assert(c->bs == from);
if (!should_update_child(c, to)) {
if (auto_skip) {
continue;
}
- ret = -EINVAL;
error_setg(errp, "Should not change '%s' link to '%s'",
c->name, from->node_name);
- goto out;
+ return -EINVAL;
}
if (c->frozen) {
- ret = -EPERM;
error_setg(errp, "Cannot change '%s' link to '%s'",
c->name, from->node_name);
- goto out;
+ return -EPERM;
}
- list = g_slist_prepend(list, c);
- perm |= c->perm;
- shared &= c->shared_perm;
+ bdrv_replace_child(c, to, tran);
}
- /* Check whether the required permissions can be granted on @to, ignoring
- * all BdrvChild in @list so that they can't block themselves. */
- ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp);
+ return 0;
+}
+
+/*
+ * With auto_skip=true bdrv_replace_node_common skips updating from parents
+ * if it creates a parent-child relation loop or if parent is block-job.
+ *
+ * With auto_skip=false the error is returned if from has a parent which should
+ * not be updated.
+ *
+ * With @detach_subchain=true @to must be in a backing chain of @from. In this
+ * case backing link of the cow-parent of @to is removed.
+ */
+static int bdrv_replace_node_common(BlockDriverState *from,
+ BlockDriverState *to,
+ bool auto_skip, bool detach_subchain,
+ Error **errp)
+{
+ Transaction *tran = tran_new();
+ g_autoptr(GHashTable) found = NULL;
+ g_autoptr(GSList) refresh_list = NULL;
+ BlockDriverState *to_cow_parent;
+ int ret;
+
+ if (detach_subchain) {
+ assert(bdrv_chain_contains(from, to));
+ assert(from != to);
+ for (to_cow_parent = from;
+ bdrv_filter_or_cow_bs(to_cow_parent) != to;
+ to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent))
+ {
+ ;
+ }
+ }
+
+ /* Make sure that @from doesn't go away until we have successfully attached
+ * all of its parents to @to. */
+ bdrv_ref(from);
+
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+ assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
+ bdrv_drained_begin(from);
+
+ /*
+ * Do the replacement without permission update.
+ * Replacement may influence the permissions, we should calculate new
+ * permissions based on new graph. If we fail, we'll roll-back the
+ * replacement.
+ */
+ ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp);
if (ret < 0) {
- bdrv_abort_perm_update(to);
goto out;
}
- /* Now actually perform the change. We performed the permission check for
- * all elements of @list at once, so set the permissions all at once at the
- * very end. */
- for (p = list; p != NULL; p = p->next) {
- c = p->data;
-
- bdrv_ref(to);
- bdrv_replace_child_noperm(c, to);
- bdrv_unref(from);
+ if (detach_subchain) {
+ bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
}
- bdrv_set_perm(to);
+ found = g_hash_table_new(NULL, NULL);
+
+ refresh_list = bdrv_topological_dfs(refresh_list, found, to);
+ refresh_list = bdrv_topological_dfs(refresh_list, found, from);
+
+ ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
+ if (ret < 0) {
+ goto out;
+ }
ret = 0;
out:
- g_slist_free(list);
+ tran_finalize(tran, ret);
+
bdrv_drained_end(from);
bdrv_unref(from);
@@ -4666,7 +4912,13 @@ out:
int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp)
{
- return bdrv_replace_node_common(from, to, true, errp);
+ return bdrv_replace_node_common(from, to, true, false, errp);
+}
+
+int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
+{
+ return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true,
+ errp);
}
/*
@@ -4676,37 +4928,36 @@ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
* This will modify the BlockDriverState fields, and swap contents
* between bs_new and bs_top. Both bs_new and bs_top are modified.
*
- * bs_new must not be attached to a BlockBackend.
+ * bs_new must not be attached to a BlockBackend and must not have backing
+ * child.
*
* This function does not create any image files.
- *
- * bdrv_append() takes ownership of a bs_new reference and unrefs it because
- * that's what the callers commonly need. bs_new will be referenced by the old
- * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
- * reference of its own, it must call bdrv_ref().
*/
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
Error **errp)
{
- int ret = bdrv_set_backing_hd(bs_new, bs_top, errp);
+ int ret;
+ Transaction *tran = tran_new();
+
+ assert(!bs_new->backing);
+
+ ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
+ &child_of_bds, bdrv_backing_role(bs_new),
+ &bs_new->backing, tran, errp);
if (ret < 0) {
goto out;
}
- ret = bdrv_replace_node(bs_top, bs_new, errp);
+ ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
if (ret < 0) {
- bdrv_set_backing_hd(bs_new, NULL, &error_abort);
goto out;
}
- ret = 0;
-
+ ret = bdrv_refresh_perms(bs_new, errp);
out:
- /*
- * bs_new is now referenced by its new parents, we don't need the
- * additional reference any more.
- */
- bdrv_unref(bs_new);
+ tran_finalize(tran, ret);
+
+ bdrv_refresh_limits(bs_top, NULL, NULL);
return ret;
}
@@ -5002,7 +5253,17 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
updated_children = g_slist_prepend(updated_children, c);
}
- bdrv_replace_node_common(top, base, false, &local_err);
+ /*
+ * It seems correct to pass detach_subchain=true here, but it triggers
+ * one more yet not fixed bug, when due to nested aio_poll loop we switch to
+ * another drained section, which modify the graph (for example, removing
+ * the child, which we keep in updated_children list). So, it's a TODO.
+ *
+ * Note, bug triggered if pass detach_subchain=true here and run
+ * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash.
+ * That's a FIXME.
+ */
+ bdrv_replace_node_common(top, base, false, false, &local_err);
if (local_err) {
error_report_err(local_err);
goto exit;
diff --git a/block/backup-top.c b/block/backup-top.c
index 589e8b651d..425e3778be 100644
--- a/block/backup-top.c
+++ b/block/backup-top.c
@@ -37,7 +37,6 @@
typedef struct BDRVBackupTopState {
BlockCopyState *bcs;
BdrvChild *target;
- bool active;
int64_t cluster_size;
} BDRVBackupTopState;
@@ -45,12 +44,6 @@ static coroutine_fn int backup_top_co_preadv(
BlockDriverState *bs, uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags)
{
- BDRVBackupTopState *s = bs->opaque;
-
- if (!s->active) {
- return -EIO;
- }
-
return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
}
@@ -60,10 +53,6 @@ static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset,
BDRVBackupTopState *s = bs->opaque;
uint64_t off, end;
- if (!s->active) {
- return -EIO;
- }
-
if (flags & BDRV_REQ_WRITE_UNCHANGED) {
return 0;
}
@@ -137,21 +126,6 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
- BDRVBackupTopState *s = bs->opaque;
-
- if (!s->active) {
- /*
- * The filter node may be in process of bdrv_append(), which firstly do
- * bdrv_set_backing_hd() and then bdrv_replace_node(). This means that
- * we can't unshare BLK_PERM_WRITE during bdrv_append() operation. So,
- * let's require nothing during bdrv_append() and refresh permissions
- * after it (see bdrv_backup_top_append()).
- */
- *nperm = 0;
- *nshared = BLK_PERM_ALL;
- return;
- }
-
if (!(role & BDRV_CHILD_FILTERED)) {
/*
* Target child
@@ -234,7 +208,6 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
bdrv_drained_begin(source);
- bdrv_ref(top);
ret = bdrv_append(top, source, errp);
if (ret < 0) {
error_prepend(errp, "Cannot append backup-top filter: ");
@@ -242,17 +215,6 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
}
appended = true;
- /*
- * bdrv_append() finished successfully, now we can require permissions
- * we want.
- */
- state->active = true;
- ret = bdrv_child_refresh_perms(top, top->backing, errp);
- if (ret < 0) {
- error_prepend(errp, "Cannot set permissions for backup-top filter: ");
- goto fail;
- }
-
state->cluster_size = cluster_size;
state->bcs = block_copy_state_new(top->backing, state->target,
cluster_size, perf->use_copy_range,
@@ -269,7 +231,6 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
fail:
if (appended) {
- state->active = false;
bdrv_backup_top_drop(top);
} else {
bdrv_unref(top);
@@ -284,16 +245,9 @@ void bdrv_backup_top_drop(BlockDriverState *bs)
{
BDRVBackupTopState *s = bs->opaque;
- bdrv_drained_begin(bs);
+ bdrv_drop_filter(bs, &error_abort);
block_copy_state_free(s->bcs);
- s->active = false;
- bdrv_child_refresh_perms(bs, bs->backing, &error_abort);
- bdrv_replace_node(bs, bs->backing->bs, &error_abort);
- bdrv_set_backing_hd(bs, NULL, &error_abort);
-
- bdrv_drained_end(bs);
-
bdrv_unref(bs);
}
diff --git a/block/block-backend.c b/block/block-backend.c
index 413af51f3b..6fca9853e1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -298,6 +298,13 @@ static void blk_root_detach(BdrvChild *child)
}
}
+static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
+{
+ BlockBackend *blk = c->opaque;
+
+ return blk_get_aio_context(blk);
+}
+
static const BdrvChildClass child_root = {
.inherit_options = blk_root_inherit_options,
@@ -318,6 +325,8 @@ static const BdrvChildClass child_root = {
.can_set_aio_ctx = blk_root_can_set_aio_ctx,
.set_aio_ctx = blk_root_set_aio_ctx,
+
+ .get_parent_aio_context = blk_root_get_parent_aio_context,
};
/*
@@ -398,15 +407,19 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
BlockBackend *blk;
BlockDriverState *bs;
uint64_t perm = 0;
+ uint64_t shared = BLK_PERM_ALL;
- /* blk_new_open() is mainly used in .bdrv_create implementations and the
- * tools where sharing isn't a concern because the BDS stays private, so we
- * just request permission according to the flags.
+ /*
+ * blk_new_open() is mainly used in .bdrv_create implementations and the
+ * tools where sharing isn't a major concern because the BDS stays private
+ * and the file is generally not supposed to be used by a second process,
+ * so we just request permission according to the flags.
*
* The exceptions are xen_disk and blockdev_init(); in these cases, the
* caller of blk_new_open() doesn't make use of the permissions, but they
* shouldn't hurt either. We can still share everything here because the
- * guest devices will add their own blockers if they can't share. */
+ * guest devices will add their own blockers if they can't share.
+ */
if ((flags & BDRV_O_NO_IO) == 0) {
perm |= BLK_PERM_CONSISTENT_READ;
if (flags & BDRV_O_RDWR) {
@@ -416,8 +429,11 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
if (flags & BDRV_O_RESIZE) {
perm |= BLK_PERM_RESIZE;
}
+ if (flags & BDRV_O_NO_SHARE) {
+ shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
+ }
- blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL);
+ blk = blk_new(qemu_get_aio_context(), perm, shared);
bs = bdrv_open(filename, reference, options, flags, errp);
if (!bs) {
blk_unref(blk);
@@ -426,7 +442,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
- blk->ctx, perm, BLK_PERM_ALL, blk, errp);
+ perm, shared, blk, errp);
if (!blk->root) {
blk_unref(blk);
return NULL;
@@ -840,7 +856,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
- blk->ctx, blk->perm, blk->shared_perm,
+ blk->perm, blk->shared_perm,
blk, errp);
if (blk->root == NULL) {
return -EPERM;
diff --git a/block/commit.c b/block/commit.c
index dd9ba87349..b89bb20b75 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -312,6 +312,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
commit_top_bs->total_sectors = top->total_sectors;
ret = bdrv_append(commit_top_bs, top, errp);
+ bdrv_unref(commit_top_bs); /* referenced by new parents or failed */
if (ret < 0) {
commit_top_bs = NULL;
goto fail;
diff --git a/block/file-posix.c b/block/file-posix.c
index 20e14f8e96..10b71d9a13 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -175,7 +175,6 @@ typedef struct BDRVRawState {
} BDRVRawState;
typedef struct BDRVRawReopenState {
- int fd;
int open_flags;
bool drop_cache;
bool check_cache_dropped;
@@ -1075,7 +1074,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
BDRVRawReopenState *rs;
QemuOpts *opts;
int ret;
- Error *local_err = NULL;
assert(state != NULL);
assert(state->bs != NULL);
@@ -1101,32 +1099,18 @@ static int raw_reopen_prepare(BDRVReopenState *state,
* bdrv_reopen_prepare() will detect changes and complain. */
qemu_opts_to_qdict(opts, state->options);
- rs->fd = raw_reconfigure_getfd(state->bs, state->flags, &rs->open_flags,
- state->perm, true, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- ret = -1;
- goto out;
- }
-
- /* Fail already reopen_prepare() if we can't get a working O_DIRECT
- * alignment with the new fd. */
- if (rs->fd != -1) {
- raw_probe_alignment(state->bs, rs->fd, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- ret = -EINVAL;
- goto out_fd;
- }
- }
+ /*
+ * As part of reopen prepare we also want to create new fd by
+ * raw_reconfigure_getfd(). But it wants updated "perm", when in
+ * bdrv_reopen_multiple() .bdrv_reopen_prepare() callback called prior to
+ * permission update. Happily, permission update is always a part (a seprate
+ * stage) of bdrv_reopen_multiple() so we can rely on this fact and
+ * reconfigure fd in raw_check_perm().
+ */
s->reopen_state = state;
ret = 0;
-out_fd:
- if (ret < 0) {
- qemu_close(rs->fd);
- rs->fd = -1;
- }
+
out:
qemu_opts_del(opts);
return ret;
@@ -1140,10 +1124,6 @@ static void raw_reopen_commit(BDRVReopenState *state)
s->drop_cache = rs->drop_cache;
s->check_cache_dropped = rs->check_cache_dropped;
s->open_flags = rs->open_flags;
-
- qemu_close(s->fd);
- s->fd = rs->fd;
-
g_free(state->opaque);
state->opaque = NULL;
@@ -1162,10 +1142,6 @@ static void raw_reopen_abort(BDRVReopenState *state)
return;
}
- if (rs->fd >= 0) {
- qemu_close(rs->fd);
- rs->fd = -1;
- }
g_free(state->opaque);
state->opaque = NULL;
@@ -3073,39 +3049,30 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
Error **errp)
{
BDRVRawState *s = bs->opaque;
- BDRVRawReopenState *rs = NULL;
+ int input_flags = s->reopen_state ? s->reopen_state->flags : bs->open_flags;
int open_flags;
int ret;
- if (s->perm_change_fd) {
+ /* We may need a new fd if auto-read-only switches the mode */
+ ret = raw_reconfigure_getfd(bs, input_flags, &open_flags, perm,
+ false, errp);
+ if (ret < 0) {
+ return ret;
+ } else if (ret != s->fd) {
+ Error *local_err = NULL;
+
/*
- * In the context of reopen, this function may be called several times
- * (directly and recursively while change permissions of the parent).
- * This is even true for children that don't inherit from the original
- * reopen node, so s->reopen_state is not set.
- *
- * Ignore all but the first call.
+ * Fail already check_perm() if we can't get a working O_DIRECT
+ * alignment with the new fd.
*/
- return 0;
- }
-
- if (s->reopen_state) {
- /* We already have a new file descriptor to set permissions for */
- assert(s->reopen_state->perm == perm);
- assert(s->reopen_state->shared_perm == shared);
- rs = s->reopen_state->opaque;
- s->perm_change_fd = rs->fd;
- s->perm_change_flags = rs->open_flags;
- } else {
- /* We may need a new fd if auto-read-only switches the mode */
- ret = raw_reconfigure_getfd(bs, bs->open_flags, &open_flags, perm,
- false, errp);
- if (ret < 0) {
- return ret;
- } else if (ret != s->fd) {
- s->perm_change_fd = ret;
- s->perm_change_flags = open_flags;
+ raw_probe_alignment(bs, ret, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return -EINVAL;
}
+
+ s->perm_change_fd = ret;
+ s->perm_change_flags = open_flags;
}
/* Prepare permissions on old fd to avoid conflicts between old and new,
@@ -3127,7 +3094,7 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
return 0;
fail:
- if (s->perm_change_fd && !s->reopen_state) {
+ if (s->perm_change_fd) {
qemu_close(s->perm_change_fd);
}
s->perm_change_fd = 0;
@@ -3158,7 +3125,7 @@ static void raw_abort_perm_update(BlockDriverState *bs)
/* For reopen, .bdrv_reopen_abort is called afterwards and will close
* the file descriptor. */
- if (s->perm_change_fd && !s->reopen_state) {
+ if (s->perm_change_fd) {
qemu_close(s->perm_change_fd);
}
s->perm_change_fd = 0;
diff --git a/block/io.c b/block/io.c
index ca2dca3007..35b6c56efc 100644
--- a/block/io.c
+++ b/block/io.c
@@ -133,13 +133,40 @@ static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
}
-void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
+typedef struct BdrvRefreshLimitsState {
+ BlockDriverState *bs;
+ BlockLimits old_bl;
+} BdrvRefreshLimitsState;
+
+static void bdrv_refresh_limits_abort(void *opaque)
+{
+ BdrvRefreshLimitsState *s = opaque;
+
+ s->bs->bl = s->old_bl;
+}
+
+static TransactionActionDrv bdrv_refresh_limits_drv = {
+ .abort = bdrv_refresh_limits_abort,
+ .clean = g_free,
+};
+
+/* @tran is allowed to be NULL, in this case no rollback is possible. */
+void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
{
ERRP_GUARD();
BlockDriver *drv = bs->drv;
BdrvChild *c;
bool have_limits;
+ if (tran) {
+ BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
+ *s = (BdrvRefreshLimitsState) {
+ .bs = bs,
+ .old_bl = bs->bl,
+ };
+ tran_add(tran, &bdrv_refresh_limits_drv, s);
+ }
+
memset(&bs->bl, 0, sizeof(bs->bl));
if (!drv) {
@@ -156,7 +183,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
{
- bdrv_refresh_limits(c->bs, errp);
+ bdrv_refresh_limits(c->bs, tran, errp);
if (*errp) {
return;
}
diff --git a/block/mirror.c b/block/mirror.c
index 5a71bd8bbc..840b8e8c15 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1630,9 +1630,6 @@ static BlockJob *mirror_start_job(
bs_opaque->is_commit = target_is_backing;
- /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
- * it alive until block_job_create() succeeds even if bs has no parent. */
- bdrv_ref(mirror_top_bs);
bdrv_drained_begin(bs);
ret = bdrv_append(mirror_top_bs, bs, errp);
bdrv_drained_end(bs);
diff --git a/blockdev.c b/blockdev.c
index a57590aae4..834c2304a1 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1576,10 +1576,6 @@ static void external_snapshot_prepare(BlkActionState *common,
goto out;
}
- /* This removes our old bs and adds the new bs. This is an operation that
- * can fail, so we need to do it in .prepare; undoing it for abort is
- * always possible. */
- bdrv_ref(state->new_bs);
ret = bdrv_append(state->new_bs, state->old_bs, errp);
if (ret < 0) {
goto out;
diff --git a/blockjob.c b/blockjob.c
index 207e8c7fd9..2fe1d788ba 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -163,6 +163,13 @@ static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx,
job->job.aio_context = ctx;
}
+static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
+{
+ BlockJob *job = c->opaque;
+
+ return job->job.aio_context;
+}
+
static const BdrvChildClass child_job = {
.get_parent_desc = child_job_get_parent_desc,
.drained_begin = child_job_drained_begin,
@@ -171,6 +178,7 @@ static const BdrvChildClass child_job = {
.can_set_aio_ctx = child_job_can_set_aio_ctx,
.set_aio_ctx = child_job_set_aio_ctx,
.stay_at_node = true,
+ .get_parent_aio_context = child_job_get_parent_aio_context,
};
void block_job_remove_all_bdrv(BlockJob *job)
@@ -221,8 +229,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
aio_context_release(job->job.aio_context);
}
- c = bdrv_root_attach_child(bs, name, &child_job, 0,
- job->job.aio_context, perm, shared_perm, job,
+ c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job,
errp);
if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
aio_context_acquire(job->job.aio_context);
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 0b5b9d44cd..f5e9682703 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -467,6 +467,11 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
error_setg(errp, "vhost-user-blk: queue size must be non-zero");
return;
}
+ if (s->queue_size > VIRTQUEUE_MAX_SIZE) {
+ error_setg(errp, "vhost-user-blk: queue size must not exceed %d",
+ VIRTQUEUE_MAX_SIZE);
+ return;
+ }
if (!vhost_user_init(&s->vhost_user, &s->chardev, errp)) {
return;
diff --git a/include/block/block.h b/include/block/block.h
index b3f6e509d4..82185965ff 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -9,6 +9,7 @@
#include "block/dirty-bitmap.h"
#include "block/blockjob.h"
#include "qemu/hbitmap.h"
+#include "qemu/transactions.h"
/*
* generated_co_wrapper
@@ -101,6 +102,7 @@ typedef struct HDGeometry {
uint32_t cylinders;
} HDGeometry;
+#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
#define BDRV_O_RDWR 0x0002
#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
@@ -207,8 +209,7 @@ typedef struct BDRVReopenState {
BlockdevDetectZeroesOptions detect_zeroes;
bool backing_missing;
bool replace_backing_bs; /* new_backing_bs is ignored if this is false */
- BlockDriverState *new_backing_bs; /* If NULL then detach the current bs */
- uint64_t perm, shared_perm;
+ BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
QDict *options;
QDict *explicit_options;
void *opaque;
@@ -362,6 +363,7 @@ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp);
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
int flags, Error **errp);
+int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
int bdrv_parse_aio(const char *mode, int *flags);
int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
@@ -387,10 +389,6 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
Error **errp);
-int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue, Error **errp);
-void bdrv_reopen_commit(BDRVReopenState *reopen_state);
-void bdrv_reopen_abort(BDRVReopenState *reopen_state);
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
@@ -424,7 +422,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
BlockDriverState *in_bs, Error **errp);
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-void bdrv_refresh_limits(BlockDriverState *bs, Error **errp);
+void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
int bdrv_commit(BlockDriverState *bs);
int bdrv_make_empty(BdrvChild *c, Error **errp);
int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
@@ -702,6 +700,8 @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
GSList **ignore, Error **errp);
bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
GSList **ignore, Error **errp);
+AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
+
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 88e4111939..c823f5b1b3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -789,6 +789,8 @@ struct BdrvChildClass {
bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
GSList **ignore, Error **errp);
void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
+
+ AioContext *(*get_parent_aio_context)(BdrvChild *child);
};
extern const BdrvChildClass child_of_bds;
@@ -811,11 +813,6 @@ struct BdrvChild {
*/
uint64_t shared_perm;
- /* backup of permissions during permission update procedure */
- bool has_backup_perm;
- uint64_t backup_perm;
- uint64_t backup_shared_perm;
-
/*
* This link is frozen: the child can neither be replaced nor
* detached from the parent.
@@ -1306,7 +1303,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildClass *child_class,
BdrvChildRole child_role,
- AioContext *ctx,
uint64_t perm, uint64_t shared_perm,
void *opaque, Error **errp);
void bdrv_root_unref_child(BdrvChild *child);
diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h
new file mode 100644
index 0000000000..92c5965235
--- /dev/null
+++ b/include/qemu/transactions.h
@@ -0,0 +1,63 @@
+/*
+ * Simple transactions API
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Author:
+ * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * = Generic transaction API =
+ *
+ * The intended usage is the following: you create "prepare" functions, which
+ * represents the actions. They will usually have Transaction* argument, and
+ * call tran_add() to register finalization callbacks. For finalization
+ * callbacks, prepare corresponding TransactionActionDrv structures.
+ *
+ * Then, when you need to make a transaction, create an empty Transaction by
+ * tran_create(), call your "prepare" functions on it, and finally call
+ * tran_abort() or tran_commit() to finalize the transaction by corresponding
+ * finalization actions in reverse order.
+ */
+
+#ifndef QEMU_TRANSACTIONS_H
+#define QEMU_TRANSACTIONS_H
+
+#include <gmodule.h>
+
+typedef struct TransactionActionDrv {
+ void (*abort)(void *opaque);
+ void (*commit)(void *opaque);
+ void (*clean)(void *opaque);
+} TransactionActionDrv;
+
+typedef struct Transaction Transaction;
+
+Transaction *tran_new(void);
+void tran_add(Transaction *tran, TransactionActionDrv *drv, void *opaque);
+void tran_abort(Transaction *tran);
+void tran_commit(Transaction *tran);
+
+static inline void tran_finalize(Transaction *tran, int ret)
+{
+ if (ret < 0) {
+ tran_abort(tran);
+ } else {
+ tran_commit(tran);
+ }
+}
+
+#endif /* QEMU_TRANSACTIONS_H */
diff --git a/qemu-img.c b/qemu-img.c
index babb5573ab..a5993682aa 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2146,7 +2146,7 @@ static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
static int img_convert(int argc, char **argv)
{
- int c, bs_i, flags, src_flags = 0;
+ int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
*src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
*out_filename, *out_baseimg_param, *snapshot_name = NULL;
diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245
index 11104b9208..fc5297e268 100755
--- a/tests/qemu-iotests/245
+++ b/tests/qemu-iotests/245
@@ -905,7 +905,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
# We can't reopen hd1 to read-only, as block-stream requires it to be
# read-write
self.reopen(opts['backing'], {'read-only': True},
- "Cannot make block node read-only, there is a writer on it")
+ "Read-only block node 'hd1' cannot support read-write users")
# We can't remove hd2 while the stream job is ongoing
opts['backing']['backing'] = None
diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out
index 37c35058ae..97e62a4c94 100644
--- a/tests/qemu-iotests/283.out
+++ b/tests/qemu-iotests/283.out
@@ -5,7 +5,7 @@
{"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}}
{"return": {}}
{"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}}
-{"error": {"class": "GenericError", "desc": "Cannot set permissions for backup-top filter: Conflicts with use by other as 'image', which uses 'write' on base"}}
+{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by source as 'image', which does not allow 'write' on base"}}
=== backup-top should be gone after job-finalize ===
diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out
index 5f41491e05..9f52255da8 100644
--- a/tests/qemu-iotests/tests/qsd-jobs.out
+++ b/tests/qemu-iotests/tests/qsd-jobs.out
@@ -16,7 +16,7 @@ QMP_VERSION
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
-{"error": {"class": "GenericError", "desc": "Conflicts with use by a block device as 'root', which uses 'write' on fmt_base"}}
+{"error": {"class": "GenericError", "desc": "Conflicts with use by stream job 'job0' as 'intermediate node', which does not allow 'write' on fmt_base"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}}
*** done
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 8a29e33e00..892f7f47d8 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -1478,7 +1478,6 @@ static void test_append_to_drained(void)
g_assert_cmpint(base_s->drain_count, ==, 1);
g_assert_cmpint(base->in_flight, ==, 0);
- /* Takes ownership of overlay, so we don't have to unref it later */
bdrv_append(overlay, base, &error_abort);
g_assert_cmpint(base->in_flight, ==, 0);
g_assert_cmpint(overlay->in_flight, ==, 0);
@@ -1495,6 +1494,7 @@ static void test_append_to_drained(void)
g_assert_cmpint(overlay->quiesce_counter, ==, 0);
g_assert_cmpint(overlay_s->drain_count, ==, 0);
+ bdrv_unref(overlay);
bdrv_unref(base);
blk_unref(blk);
}
diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c
index c4f7d16039..88f25c0cdb 100644
--- a/tests/unit/test-bdrv-graph-mod.c
+++ b/tests/unit/test-bdrv-graph-mod.c
@@ -1,7 +1,7 @@
/*
* Block node graph modifications tests
*
- * Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved.
+ * Copyright (c) 2019-2021 Virtuozzo International GmbH. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -44,6 +44,21 @@ static BlockDriver bdrv_no_perm = {
.bdrv_child_perm = no_perm_default_perms,
};
+static void exclusive_write_perms(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ *nperm = BLK_PERM_WRITE;
+ *nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE;
+}
+
+static BlockDriver bdrv_exclusive_writer = {
+ .format_name = "exclusive-writer",
+ .bdrv_child_perm = exclusive_write_perms,
+};
+
static BlockDriverState *no_perm_node(const char *name)
{
return bdrv_new_open_driver(&bdrv_no_perm, name, BDRV_O_RDWR, &error_abort);
@@ -55,6 +70,12 @@ static BlockDriverState *pass_through_node(const char *name)
BDRV_O_RDWR, &error_abort);
}
+static BlockDriverState *exclusive_writer_node(const char *name)
+{
+ return bdrv_new_open_driver(&bdrv_exclusive_writer, name,
+ BDRV_O_RDWR, &error_abort);
+}
+
/*
* test_update_perm_tree
*
@@ -117,6 +138,7 @@ static void test_update_perm_tree(void)
ret = bdrv_append(filter, bs, NULL);
g_assert_cmpint(ret, <, 0);
+ bdrv_unref(filter);
blk_unref(root);
}
@@ -181,10 +203,189 @@ static void test_should_update_child(void)
bdrv_append(filter, bs, &error_abort);
g_assert(target->backing->bs == bs);
+ bdrv_unref(filter);
bdrv_unref(bs);
blk_unref(root);
}
+/*
+ * test_parallel_exclusive_write
+ *
+ * Check that when we replace node, old permissions of the node being removed
+ * doesn't break the replacement.
+ */
+static void test_parallel_exclusive_write(void)
+{
+ BlockDriverState *top = exclusive_writer_node("top");
+ BlockDriverState *base = no_perm_node("base");
+ BlockDriverState *fl1 = pass_through_node("fl1");
+ BlockDriverState *fl2 = pass_through_node("fl2");
+
+ /*
+ * bdrv_attach_child() eats child bs reference, so we need two @base
+ * references for two filters:
+ */
+ bdrv_ref(base);
+
+ bdrv_attach_child(top, fl1, "backing", &child_of_bds, BDRV_CHILD_DATA,
+ &error_abort);
+ bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+ &error_abort);
+ bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+ &error_abort);
+
+ bdrv_replace_node(fl1, fl2, &error_abort);
+
+ bdrv_unref(fl2);
+ bdrv_unref(top);
+}
+
+static void write_to_file_perms(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ if (bs->file && c == bs->file) {
+ *nperm = BLK_PERM_WRITE;
+ *nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE;
+ } else {
+ *nperm = 0;
+ *nshared = BLK_PERM_ALL;
+ }
+}
+
+static BlockDriver bdrv_write_to_file = {
+ .format_name = "tricky-perm",
+ .bdrv_child_perm = write_to_file_perms,
+};
+
+
+/*
+ * The following test shows that topological-sort order is required for
+ * permission update, simple DFS is not enough.
+ *
+ * Consider the block driver which has two filter children: one active
+ * with exclusive write access and one inactive with no specific
+ * permissions.
+ *
+ * And, these two children has a common base child, like this:
+ *
+ * ┌─────┐ ┌──────┐
+ * │ fl2 │ ◀── │ top │
+ * └─────┘ └──────┘
+ * │ │
+ * │ │ w
+ * │ ▼
+ * │ ┌──────┐
+ * │ │ fl1 │
+ * │ └──────┘
+ * │ │
+ * │ │ w
+ * │ ▼
+ * │ ┌──────┐
+ * └───────▶ │ base │
+ * └──────┘
+ *
+ * So, exclusive write is propagated.
+ *
+ * Assume, we want to make fl2 active instead of fl1.
+ * So, we set some option for top driver and do permission update.
+ *
+ * With simple DFS, if permission update goes first through
+ * top->fl1->base branch it will succeed: it firstly drop exclusive write
+ * permissions and than apply them for another BdrvChildren.
+ * But if permission update goes first through top->fl2->base branch it
+ * will fail, as when we try to update fl2->base child, old not yet
+ * updated fl1->base child will be in conflict.
+ *
+ * With topological-sort order we always update parents before children, so fl1
+ * and fl2 are both updated when we update base and there is no conflict.
+ */
+static void test_parallel_perm_update(void)
+{
+ BlockDriverState *top = no_perm_node("top");
+ BlockDriverState *tricky =
+ bdrv_new_open_driver(&bdrv_write_to_file, "tricky", BDRV_O_RDWR,
+ &error_abort);
+ BlockDriverState *base = no_perm_node("base");
+ BlockDriverState *fl1 = pass_through_node("fl1");
+ BlockDriverState *fl2 = pass_through_node("fl2");
+ BdrvChild *c_fl1, *c_fl2;
+
+ /*
+ * bdrv_attach_child() eats child bs reference, so we need two @base
+ * references for two filters:
+ */
+ bdrv_ref(base);
+
+ bdrv_attach_child(top, tricky, "file", &child_of_bds, BDRV_CHILD_DATA,
+ &error_abort);
+ c_fl1 = bdrv_attach_child(tricky, fl1, "first", &child_of_bds,
+ BDRV_CHILD_FILTERED, &error_abort);
+ c_fl2 = bdrv_attach_child(tricky, fl2, "second", &child_of_bds,
+ BDRV_CHILD_FILTERED, &error_abort);
+ bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+ &error_abort);
+ bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+ &error_abort);
+
+ /* Select fl1 as first child to be active */
+ tricky->file = c_fl1;
+ bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort);
+
+ assert(c_fl1->perm & BLK_PERM_WRITE);
+ assert(!(c_fl2->perm & BLK_PERM_WRITE));
+
+ /* Now, try to switch active child and update permissions */
+ tricky->file = c_fl2;
+ bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort);
+
+ assert(c_fl2->perm & BLK_PERM_WRITE);
+ assert(!(c_fl1->perm & BLK_PERM_WRITE));
+
+ /* Switch once more, to not care about real child order in the list */
+ tricky->file = c_fl1;
+ bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort);
+
+ assert(c_fl1->perm & BLK_PERM_WRITE);
+ assert(!(c_fl2->perm & BLK_PERM_WRITE));
+
+ bdrv_unref(top);
+}
+
+/*
+ * It's possible that filter required permissions allows to insert it to backing
+ * chain, like:
+ *
+ * 1. [top] -> [filter] -> [base]
+ *
+ * but doesn't allow to add it as a branch:
+ *
+ * 2. [filter] --\
+ * v
+ * [top] -> [base]
+ *
+ * So, inserting such filter should do all graph modifications and only then
+ * update permissions. If we try to go through intermediate state [2] and update
+ * permissions on it we'll fail.
+ *
+ * Let's check that bdrv_append() can append such a filter.
+ */
+static void test_append_greedy_filter(void)
+{
+ BlockDriverState *top = exclusive_writer_node("top");
+ BlockDriverState *base = no_perm_node("base");
+ BlockDriverState *fl = exclusive_writer_node("fl1");
+
+ bdrv_attach_child(top, base, "backing", &child_of_bds, BDRV_CHILD_COW,
+ &error_abort);
+
+ bdrv_append(fl, base, &error_abort);
+ bdrv_unref(fl);
+ bdrv_unref(top);
+}
+
int main(int argc, char *argv[])
{
bdrv_init();
@@ -195,6 +396,12 @@ int main(int argc, char *argv[])
g_test_add_func("/bdrv-graph-mod/update-perm-tree", test_update_perm_tree);
g_test_add_func("/bdrv-graph-mod/should-update-child",
test_should_update_child);
+ g_test_add_func("/bdrv-graph-mod/parallel-perm-update",
+ test_parallel_perm_update);
+ g_test_add_func("/bdrv-graph-mod/parallel-exclusive-write",
+ test_parallel_exclusive_write);
+ g_test_add_func("/bdrv-graph-mod/append-greedy-filter",
+ test_append_greedy_filter);
return g_test_run();
}
diff --git a/util/meson.build b/util/meson.build
index 510765cde4..97fad44105 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -41,6 +41,7 @@ util_ss.add(files('qsp.c'))
util_ss.add(files('range.c'))
util_ss.add(files('stats64.c'))
util_ss.add(files('systemd.c'))
+util_ss.add(files('transactions.c'))
util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c'))
util_ss.add(files('guest-random.c'))
util_ss.add(files('yank.c'))
diff --git a/util/transactions.c b/util/transactions.c
new file mode 100644
index 0000000000..d0bc9a3e73
--- /dev/null
+++ b/util/transactions.c
@@ -0,0 +1,96 @@
+/*
+ * Simple transactions API
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Author:
+ * Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu/transactions.h"
+#include "qemu/queue.h"
+
+typedef struct TransactionAction {
+ TransactionActionDrv *drv;
+ void *opaque;
+ QSLIST_ENTRY(TransactionAction) entry;
+} TransactionAction;
+
+struct Transaction {
+ QSLIST_HEAD(, TransactionAction) actions;
+};
+
+Transaction *tran_new(void)
+{
+ Transaction *tran = g_new(Transaction, 1);
+
+ QSLIST_INIT(&tran->actions);
+
+ return tran;
+}
+
+void tran_add(Transaction *tran, TransactionActionDrv *drv, void *opaque)
+{
+ TransactionAction *act;
+
+ act = g_new(TransactionAction, 1);
+ *act = (TransactionAction) {
+ .drv = drv,
+ .opaque = opaque
+ };
+
+ QSLIST_INSERT_HEAD(&tran->actions, act, entry);
+}
+
+void tran_abort(Transaction *tran)
+{
+ TransactionAction *act, *next;
+
+ QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
+ if (act->drv->abort) {
+ act->drv->abort(act->opaque);
+ }
+
+ if (act->drv->clean) {
+ act->drv->clean(act->opaque);
+ }
+
+ g_free(act);
+ }
+
+ g_free(tran);
+}
+
+void tran_commit(Transaction *tran)
+{
+ TransactionAction *act, *next;
+
+ QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
+ if (act->drv->commit) {
+ act->drv->commit(act->opaque);
+ }
+
+ if (act->drv->clean) {
+ act->drv->clean(act->opaque);
+ }
+
+ g_free(act);
+ }
+
+ g_free(tran);
+}