Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches # gpg: Signature made Thu 19 May 2016 16:09:27 BST using RSA key ID C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" * remotes/kevin/tags/for-upstream: (31 commits) qemu-iotests: Fix regression in 136 on aio_read invalid qemu-iotests: Simplify 109 with unaligned qemu-img compare qemu-io: Fix recent UI updates block: clarify error message for qmp-eject qemu-iotests: Some more write_zeroes tests qcow2: Fix write_zeroes with partially allocated backing file cluster qcow2: fix condition in is_zero_cluster block: Propagate AioContext change to all children block: Remove BlockDriverState.blk block: Don't return throttling info in query-named-block-nodes block: Avoid bs->blk in bdrv_next() block: Add bdrv_has_blk() block: Remove bdrv_aio_multiwrite() blockjob: Don't touch BDS iostatus blockjob: Don't set iostatus of target block: User BdrvChild callback for device name block: Use BdrvChild callbacks for change_media/resize block: Don't check throttled reqs in bdrv_requests_pending() Revert "block: Forbid I/O throttling on nodes with multiple parents for 2.6" block: Remove bdrv_move_feature_fields() ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Peter Maydell <peter.maydell@linaro.org> 2016-05-19 16:54:12 +0100
committer: Peter Maydell <peter.maydell@linaro.org> 2016-05-19 16:54:12 +0100
commit: 6bd8ab6889f45a42d69a3a65a4d6e7fc2453c84c (patch)
tree: 8333fbb0dc3140307c828ed2ebc5b4e81facbbd7 /block
parent: 776efef32439a31cb13a6acfe8aab833687745ad (diff)
parent: 7753da2351e0b0ff6825d080aff58d73c994ff47 (diff)
13 files changed, 378 insertions, 604 deletions
diff --git a/block/backup.c b/block/backup.c
index 491fd14068..fec45e8212 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -218,15 +218,6 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
     ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }
 
-static void backup_iostatus_reset(BlockJob *job)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-
-    if (s->target->blk) {
-        blk_iostatus_reset(s->target->blk);
-    }
-}
-
 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
 {
     BdrvDirtyBitmap *bm;
@@ -263,7 +254,6 @@ static const BlockJobDriver backup_job_driver = {
     .instance_size  = sizeof(BackupBlockJob),
     .job_type       = BLOCK_JOB_TYPE_BACKUP,
     .set_speed      = backup_set_speed,
-    .iostatus_reset = backup_iostatus_reset,
     .commit         = backup_commit,
     .abort          = backup_abort,
 };
@@ -272,11 +262,11 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job,
                                             bool read, int error)
 {
     if (read) {
-        return block_job_error_action(&job->common, job->common.bs,
-                                      job->on_source_error, true, error);
+        return block_job_error_action(&job->common, job->on_source_error,
+                                      true, error);
     } else {
-        return block_job_error_action(&job->common, job->target,
-                                      job->on_target_error, false, error);
+        return block_job_error_action(&job->common, job->on_target_error,
+                                      false, error);
     }
 }
 
@@ -388,7 +378,6 @@ static void coroutine_fn backup_run(void *opaque)
     BackupCompleteData *data;
     BlockDriverState *bs = job->common.bs;
     BlockDriverState *target = job->target;
-    BlockdevOnError on_target_error = job->on_target_error;
     NotifierWithReturn before_write = {
         .notify = backup_before_write_notify,
     };
@@ -404,11 +393,6 @@ static void coroutine_fn backup_run(void *opaque)
 
     job->done_bitmap = bitmap_new(end);
 
-    if (target->blk) {
-        blk_set_on_error(target->blk, on_target_error, on_target_error);
-        blk_iostatus_enable(target->blk);
-    }
-
     bdrv_add_before_write_notifier(bs, &before_write);
 
     if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
@@ -484,9 +468,6 @@ static void coroutine_fn backup_run(void *opaque)
     qemu_co_rwlock_unlock(&job->flush_rwlock);
     g_free(job->done_bitmap);
 
-    if (target->blk) {
-        blk_iostatus_disable(target->blk);
-    }
     bdrv_op_unblock_all(target, job->common.blocker);
 
     data = g_malloc(sizeof(*data));
@@ -515,13 +496,6 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
-    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
-         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
-        return;
-    }
-
     if (!bdrv_is_inserted(bs)) {
         error_setg(errp, "Device is not inserted: %s",
                    bdrv_get_device_name(bs));
diff --git a/block/blkverify.c b/block/blkverify.c
index 9414b7a84e..4045396a3d 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -293,22 +293,6 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
     return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
 }
 
-/* Propagate AioContext changes to ->test_file */
-static void blkverify_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    bdrv_detach_aio_context(s->test_file->bs);
-}
-
-static void blkverify_attach_aio_context(BlockDriverState *bs,
-                                         AioContext *new_context)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    bdrv_attach_aio_context(s->test_file->bs, new_context);
-}
-
 static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
 {
     BDRVBlkverifyState *s = bs->opaque;
@@ -356,9 +340,6 @@ static BlockDriver bdrv_blkverify = {
     .bdrv_aio_writev                  = blkverify_aio_writev,
     .bdrv_aio_flush                   = blkverify_aio_flush,
 
-    .bdrv_attach_aio_context          = blkverify_attach_aio_context,
-    .bdrv_detach_aio_context          = blkverify_detach_aio_context,
-
     .is_filter                        = true,
     .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
 };
diff --git a/block/block-backend.c b/block/block-backend.c
index a1e2c7fa20..6928d61de4 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -34,6 +34,7 @@ struct BlockBackend {
     DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
     QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
     QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
+    BlockBackendPublic public;
 
     void *dev;                  /* attached device model, if any */
     /* TODO change to DeviceState when all users are qdevified */
@@ -74,6 +75,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
 };
 
 static void drive_info_del(DriveInfo *dinfo);
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
 
 /* All BlockBackends */
 static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -90,9 +92,26 @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options,
     /* We're not supposed to call this function for root nodes */
     abort();
 }
+static void blk_root_drained_begin(BdrvChild *child);
+static void blk_root_drained_end(BdrvChild *child);
+
+static void blk_root_change_media(BdrvChild *child, bool load);
+static void blk_root_resize(BdrvChild *child);
+
+static const char *blk_root_get_name(BdrvChild *child)
+{
+    return blk_name(child->opaque);
+}
 
 static const BdrvChildRole child_root = {
-    .inherit_options = blk_root_inherit_options,
+    .inherit_options    = blk_root_inherit_options,
+
+    .change_media       = blk_root_change_media,
+    .resize             = blk_root_resize,
+    .get_name           = blk_root_get_name,
+
+    .drained_begin      = blk_root_drained_begin,
+    .drained_end        = blk_root_drained_end,
 };
 
 /*
@@ -106,8 +125,12 @@ BlockBackend *blk_new(Error **errp)
 
     blk = g_new0(BlockBackend, 1);
     blk->refcnt = 1;
+    qemu_co_queue_init(&blk->public.throttled_reqs[0]);
+    qemu_co_queue_init(&blk->public.throttled_reqs[1]);
+
     notifier_list_init(&blk->remove_bs_notifiers);
     notifier_list_init(&blk->insert_bs_notifiers);
+
     QTAILQ_INSERT_TAIL(&block_backends, blk, link);
     return blk;
 }
@@ -128,7 +151,7 @@ BlockBackend *blk_new_with_bs(Error **errp)
 
     bs = bdrv_new_root();
     blk->root = bdrv_root_attach_child(bs, "root", &child_root);
-    bs->blk = blk;
+    blk->root->opaque = blk;
     return blk;
 }
 
@@ -177,10 +200,6 @@ static void blk_delete(BlockBackend *blk)
     }
     assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
     assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
-    if (blk->root_state.throttle_state) {
-        g_free(blk->root_state.throttle_group);
-        throttle_group_unref(blk->root_state.throttle_state);
-    }
     QTAILQ_REMOVE(&block_backends, blk, link);
     drive_info_del(blk->legacy_dinfo);
     block_acct_cleanup(&blk->stats);
@@ -267,28 +286,50 @@ BlockBackend *blk_next(BlockBackend *blk)
                : QTAILQ_FIRST(&monitor_block_backends);
 }
 
-/*
- * Iterates over all BlockDriverStates which are attached to a BlockBackend.
- * This function is for use by bdrv_next().
- *
- * @bs must be NULL or a BDS that is attached to a BB.
- */
-BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
-{
+struct BdrvNextIterator {
+    enum {
+        BDRV_NEXT_BACKEND_ROOTS,
+        BDRV_NEXT_MONITOR_OWNED,
+    } phase;
     BlockBackend *blk;
+    BlockDriverState *bs;
+};
 
-    if (bs) {
-        assert(bs->blk);
-        blk = bs->blk;
-    } else {
-        blk = NULL;
+/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
+ * the monitor or attached to a BlockBackend */
+BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
+{
+    if (!it) {
+        it = g_new(BdrvNextIterator, 1);
+        *it = (BdrvNextIterator) {
+            .phase = BDRV_NEXT_BACKEND_ROOTS,
+        };
+    }
+
+    /* First, return all root nodes of BlockBackends. In order to avoid
+     * returning a BDS twice when multiple BBs refer to it, we only return it
+     * if the BB is the first one in the parent list of the BDS. */
+    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
+        do {
+            it->blk = blk_all_next(it->blk);
+            *bs = it->blk ? blk_bs(it->blk) : NULL;
+        } while (it->blk && (*bs == NULL || bdrv_first_blk(*bs) != it->blk));
+
+        if (*bs) {
+            return it;
+        }
+        it->phase = BDRV_NEXT_MONITOR_OWNED;
     }
 
+    /* Then return the monitor-owned BDSes without a BB attached. Ignore all
+     * BDSes that are attached to a BlockBackend here; they have been handled
+     * by the above block already */
     do {
-        blk = blk_all_next(blk);
-    } while (blk && !blk->root);
+        it->bs = bdrv_next_monitor_owned(it->bs);
+        *bs = it->bs;
+    } while (*bs && bdrv_has_blk(*bs));
 
-    return blk ? blk->root->bs : NULL;
+    return *bs ? it : NULL;
 }
 
 /*
@@ -375,6 +416,26 @@ BlockDriverState *blk_bs(BlockBackend *blk)
     return blk->root ? blk->root->bs : NULL;
 }
 
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
+{
+    BdrvChild *child;
+    QLIST_FOREACH(child, &bs->parents, next_parent) {
+        if (child->role == &child_root) {
+            return child->opaque;
+        }
+    }
+
+    return NULL;
+}
+
+/*
+ * Returns true if @bs has an associated BlockBackend.
+ */
+bool bdrv_has_blk(BlockDriverState *bs)
+{
+    return bdrv_first_blk(bs) != NULL;
+}
+
 /*
  * Return @blk's DriveInfo if any, else null.
  */
@@ -411,17 +472,33 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
 }
 
 /*
+ * Returns a pointer to the publicly accessible fields of @blk.
+ */
+BlockBackendPublic *blk_get_public(BlockBackend *blk)
+{
+    return &blk->public;
+}
+
+/*
+ * Returns a BlockBackend given the associated @public fields.
+ */
+BlockBackend *blk_by_public(BlockBackendPublic *public)
+{
+    return container_of(public, BlockBackend, public);
+}
+
+/*
  * Disassociates the currently associated BlockDriverState from @blk.
  */
 void blk_remove_bs(BlockBackend *blk)
 {
-    assert(blk->root->bs->blk == blk);
-
     notifier_list_notify(&blk->remove_bs_notifiers, blk);
+    if (blk->public.throttle_state) {
+        throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+    }
 
     blk_update_root_state(blk);
 
-    blk->root->bs->blk = NULL;
     bdrv_root_unref_child(blk->root);
     blk->root = NULL;
 }
@@ -431,12 +508,15 @@ void blk_remove_bs(BlockBackend *blk)
  */
 void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
 {
-    assert(!blk->root && !bs->blk);
     bdrv_ref(bs);
     blk->root = bdrv_root_attach_child(bs, "root", &child_root);
-    bs->blk = blk;
+    blk->root->opaque = blk;
 
     notifier_list_notify(&blk->insert_bs_notifiers, blk);
+    if (blk->public.throttle_state) {
+        throttle_timers_attach_aio_context(
+            &blk->public.throttle_timers, bdrv_get_aio_context(bs));
+    }
 }
 
 /*
@@ -525,6 +605,11 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
     }
 }
 
+static void blk_root_change_media(BdrvChild *child, bool load)
+{
+    blk_dev_change_media_cb(child->opaque, load);
+}
+
 /*
  * Does @blk's attached device model have removable media?
  * %true if no device model is attached.
@@ -579,8 +664,10 @@ bool blk_dev_is_medium_locked(BlockBackend *blk)
 /*
  * Notify @blk's attached device model of a backend size change.
  */
-void blk_dev_resize_cb(BlockBackend *blk)
+static void blk_root_resize(BdrvChild *child)
 {
+    BlockBackend *blk = child->opaque;
+
     if (blk->dev_ops && blk->dev_ops->resize_cb) {
         blk->dev_ops->resize_cb(blk->dev_opaque);
     }
@@ -692,6 +779,11 @@ static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
         return ret;
     }
 
+    /* throttling disk I/O */
+    if (blk->public.throttle_state) {
+        throttle_group_co_io_limits_intercept(blk, bytes, false);
+    }
+
     return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
 }
 
@@ -706,6 +798,11 @@ static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
         return ret;
     }
 
+    /* throttling disk I/O */
+    if (blk->public.throttle_state) {
+        throttle_group_co_io_limits_intercept(blk, bytes, true);
+    }
+
     if (!blk->enable_write_cache) {
         flags |= BDRV_REQ_FUA;
     }
@@ -775,7 +872,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
                           int count)
 {
-    BlockDriverState *bs = blk_bs(blk);
     int ret;
 
     ret = blk_check_byte_request(blk, offset, count);
@@ -783,9 +879,9 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
         return ret;
     }
 
-    bdrv_no_throttling_begin(bs);
+    blk_root_drained_begin(blk->root);
     ret = blk_pread(blk, offset, buf, count);
-    bdrv_no_throttling_end(bs);
+    blk_root_drained_end(blk->root);
     return ret;
 }
 
@@ -1008,20 +1104,6 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
     bdrv_aio_cancel_async(acb);
 }
 
-int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
-{
-    int i, ret;
-
-    for (i = 0; i < num_reqs; i++) {
-        ret = blk_check_request(blk, reqs[i].sector, reqs[i].nb_sectors);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    return bdrv_aio_multiwrite(blk_bs(blk), reqs, num_reqs);
-}
-
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
     if (!blk_is_available(blk)) {
@@ -1334,7 +1416,14 @@ void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
     BlockDriverState *bs = blk_bs(blk);
 
     if (bs) {
+        if (blk->public.throttle_state) {
+            throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+        }
         bdrv_set_aio_context(bs, new_context);
+        if (blk->public.throttle_state) {
+            throttle_timers_attach_aio_context(&blk->public.throttle_timers,
+                                               new_context);
+        }
     }
 }
 
@@ -1499,19 +1588,6 @@ void blk_update_root_state(BlockBackend *blk)
     blk->root_state.open_flags    = blk->root->bs->open_flags;
     blk->root_state.read_only     = blk->root->bs->read_only;
     blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
-
-    if (blk->root_state.throttle_group) {
-        g_free(blk->root_state.throttle_group);
-        throttle_group_unref(blk->root_state.throttle_state);
-    }
-    if (blk->root->bs->throttle_state) {
-        const char *name = throttle_group_get_name(blk->root->bs);
-        blk->root_state.throttle_group = g_strdup(name);
-        blk->root_state.throttle_state = throttle_group_incref(name);
-    } else {
-        blk->root_state.throttle_group = NULL;
-        blk->root_state.throttle_state = NULL;
-    }
 }
 
 /*
@@ -1522,9 +1598,6 @@ void blk_update_root_state(BlockBackend *blk)
 void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
 {
     bs->detect_zeroes = blk->root_state.detect_zeroes;
-    if (blk->root_state.throttle_group) {
-        bdrv_io_limits_enable(bs, blk->root_state.throttle_group);
-    }
 }
 
 /*
@@ -1587,3 +1660,59 @@ int blk_flush_all(void)
 
     return result;
 }
+
+
+/* throttling disk I/O limits */
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
+{
+    throttle_group_config(blk, cfg);
+}
+
+void blk_io_limits_disable(BlockBackend *blk)
+{
+    assert(blk->public.throttle_state);
+    bdrv_drained_begin(blk_bs(blk));
+    throttle_group_unregister_blk(blk);
+    bdrv_drained_end(blk_bs(blk));
+}
+
+/* should be called before blk_set_io_limits if a limit is set */
+void blk_io_limits_enable(BlockBackend *blk, const char *group)
+{
+    assert(!blk->public.throttle_state);
+    throttle_group_register_blk(blk, group);
+}
+
+void blk_io_limits_update_group(BlockBackend *blk, const char *group)
+{
+    /* this BB is not part of any group */
+    if (!blk->public.throttle_state) {
+        return;
+    }
+
+    /* this BB is a part of the same group than the one we want */
+    if (!g_strcmp0(throttle_group_get_name(blk), group)) {
+        return;
+    }
+
+    /* need to change the group this bs belong to */
+    blk_io_limits_disable(blk);
+    blk_io_limits_enable(blk, group);
+}
+
+static void blk_root_drained_begin(BdrvChild *child)
+{
+    BlockBackend *blk = child->opaque;
+
+    if (blk->public.io_limits_disabled++ == 0) {
+        throttle_group_restart_blk(blk);
+    }
+}
+
+static void blk_root_drained_end(BdrvChild *child)
+{
+    BlockBackend *blk = child->opaque;
+
+    assert(blk->public.io_limits_disabled);
+    --blk->public.io_limits_disabled;
+}
diff --git a/block/commit.c b/block/commit.c
index cba0e8c1e8..f308c8c6f0 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -214,13 +214,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
     BlockDriverState *overlay_bs;
     Error *local_err = NULL;
 
-    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
-         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
-        error_setg(errp, "Invalid parameter combination");
-        return;
-    }
-
     assert(top != bs);
     if (top == base) {
         error_setg(errp, "Invalid files for merge: top and base are the same");
diff --git a/block/io.c b/block/io.c
index cd6d71a503..60a6bd8bdb 100644
--- a/block/io.c
+++ b/block/io.c
@@ -27,7 +27,6 @@
 #include "sysemu/block-backend.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
-#include "block/throttle-groups.h"
 #include "qemu/cutils.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
@@ -46,56 +45,26 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
 
-/* throttling disk I/O limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
-                        ThrottleConfig *cfg)
+static void bdrv_parent_drained_begin(BlockDriverState *bs)
 {
-    throttle_group_config(bs, cfg);
-}
+    BdrvChild *c;
 
-void bdrv_no_throttling_begin(BlockDriverState *bs)
-{
-    if (bs->io_limits_disabled++ == 0) {
-        throttle_group_restart_bs(bs);
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        if (c->role->drained_begin) {
+            c->role->drained_begin(c);
+        }
     }
 }
 
-void bdrv_no_throttling_end(BlockDriverState *bs)
+static void bdrv_parent_drained_end(BlockDriverState *bs)
 {
-    assert(bs->io_limits_disabled);
-    --bs->io_limits_disabled;
-}
-
-void bdrv_io_limits_disable(BlockDriverState *bs)
-{
-    assert(bs->throttle_state);
-    bdrv_no_throttling_begin(bs);
-    throttle_group_unregister_bs(bs);
-    bdrv_no_throttling_end(bs);
-}
-
-/* should be called before bdrv_set_io_limits if a limit is set */
-void bdrv_io_limits_enable(BlockDriverState *bs, const char *group)
-{
-    assert(!bs->throttle_state);
-    throttle_group_register_bs(bs, group);
-}
-
-void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group)
-{
-    /* this bs is not part of any group */
-    if (!bs->throttle_state) {
-        return;
-    }
+    BdrvChild *c;
 
-    /* this bs is a part of the same group than the one we want */
-    if (!g_strcmp0(throttle_group_get_name(bs), group)) {
-        return;
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        if (c->role->drained_end) {
+            c->role->drained_end(c);
+        }
     }
-
-    /* need to change the group this bs belong to */
-    bdrv_io_limits_disable(bs);
-    bdrv_io_limits_enable(bs, group);
 }
 
 void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
@@ -182,12 +151,6 @@ bool bdrv_requests_pending(BlockDriverState *bs)
     if (!QLIST_EMPTY(&bs->tracked_requests)) {
         return true;
     }
-    if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
-        return true;
-    }
-    if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
-        return true;
-    }
 
     QLIST_FOREACH(child, &bs->children, next) {
         if (bdrv_requests_pending(child->bs)) {
@@ -275,17 +238,17 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
  */
 void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
 {
-    bdrv_no_throttling_begin(bs);
+    bdrv_parent_drained_begin(bs);
     bdrv_io_unplugged_begin(bs);
     bdrv_drain_recurse(bs);
     bdrv_co_yield_to_drain(bs);
     bdrv_io_unplugged_end(bs);
-    bdrv_no_throttling_end(bs);
+    bdrv_parent_drained_end(bs);
 }
 
 void bdrv_drain(BlockDriverState *bs)
 {
-    bdrv_no_throttling_begin(bs);
+    bdrv_parent_drained_begin(bs);
     bdrv_io_unplugged_begin(bs);
     bdrv_drain_recurse(bs);
     if (qemu_in_coroutine()) {
@@ -294,7 +257,7 @@ void bdrv_drain(BlockDriverState *bs)
         bdrv_drain_poll(bs);
     }
     bdrv_io_unplugged_end(bs);
-    bdrv_no_throttling_end(bs);
+    bdrv_parent_drained_end(bs);
 }
 
 /*
@@ -307,17 +270,18 @@ void bdrv_drain_all(void)
 {
     /* Always run first iteration so any pending completion BHs run */
     bool busy = true;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
     GSList *aio_ctxs = NULL, *ctx;
 
-    while ((bs = bdrv_next(bs))) {
+    while ((it = bdrv_next(it, &bs))) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
         if (bs->job) {
             block_job_pause(bs->job);
         }
-        bdrv_no_throttling_begin(bs);
+        bdrv_parent_drained_begin(bs);
         bdrv_io_unplugged_begin(bs);
         bdrv_drain_recurse(bs);
         aio_context_release(aio_context);
@@ -338,10 +302,10 @@ void bdrv_drain_all(void)
 
         for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
             AioContext *aio_context = ctx->data;
-            bs = NULL;
+            it = NULL;
 
             aio_context_acquire(aio_context);
-            while ((bs = bdrv_next(bs))) {
+            while ((it = bdrv_next(it, &bs))) {
                 if (aio_context == bdrv_get_aio_context(bs)) {
                     if (bdrv_requests_pending(bs)) {
                         busy = true;
@@ -354,13 +318,13 @@ void bdrv_drain_all(void)
         }
     }
 
-    bs = NULL;
-    while ((bs = bdrv_next(bs))) {
+    it = NULL;
+    while ((it = bdrv_next(it, &bs))) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
         bdrv_io_unplugged_end(bs);
-        bdrv_no_throttling_end(bs);
+        bdrv_parent_drained_end(bs);
         if (bs->job) {
             block_job_resume(bs->job);
         }
@@ -1069,11 +1033,6 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
         flags |= BDRV_REQ_COPY_ON_READ;
     }
 
-    /* throttling disk I/O */
-    if (bs->throttle_state) {
-        throttle_group_co_io_limits_intercept(bs, bytes, false);
-    }
-
     /* Align read if necessary by padding qiov */
     if (offset & (align - 1)) {
         head_buf = qemu_blockalign(bs, align);
@@ -1430,11 +1389,6 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs,
         return ret;
     }
 
-    /* throttling disk I/O */
-    if (bs->throttle_state) {
-        throttle_group_co_io_limits_intercept(bs, bytes, true);
-    }
-
     /*
      * Align write if necessary by performing a read-modify-write cycle.
      * Pad qiov with the read parts and be sure to have a tracked request not
@@ -1925,200 +1879,6 @@ BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
                                  cb, opaque, true);
 }
 
-
-typedef struct MultiwriteCB {
-    int error;
-    int num_requests;
-    int num_callbacks;
-    struct {
-        BlockCompletionFunc *cb;
-        void *opaque;
-        QEMUIOVector *free_qiov;
-    } callbacks[];
-} MultiwriteCB;
-
-static void multiwrite_user_cb(MultiwriteCB *mcb)
-{
-    int i;
-
-    for (i = 0; i < mcb->num_callbacks; i++) {
-        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
-        if (mcb->callbacks[i].free_qiov) {
-            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
-        }
-        g_free(mcb->callbacks[i].free_qiov);
-    }
-}
-
-static void multiwrite_cb(void *opaque, int ret)
-{
-    MultiwriteCB *mcb = opaque;
-
-    trace_multiwrite_cb(mcb, ret);
-
-    if (ret < 0 && !mcb->error) {
-        mcb->error = ret;
-    }
-
-    mcb->num_requests--;
-    if (mcb->num_requests == 0) {
-        multiwrite_user_cb(mcb);
-        g_free(mcb);
-    }
-}
-
-static int multiwrite_req_compare(const void *a, const void *b)
-{
-    const BlockRequest *req1 = a, *req2 = b;
-
-    /*
-     * Note that we can't simply subtract req2->sector from req1->sector
-     * here as that could overflow the return value.
-     */
-    if (req1->sector > req2->sector) {
-        return 1;
-    } else if (req1->sector < req2->sector) {
-        return -1;
-    } else {
-        return 0;
-    }
-}
-
-/*
- * Takes a bunch of requests and tries to merge them. Returns the number of
- * requests that remain after merging.
- */
-static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
-    int num_reqs, MultiwriteCB *mcb)
-{
-    int i, outidx;
-
-    // Sort requests by start sector
-    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
-
-    // Check if adjacent requests touch the same clusters. If so, combine them,
-    // filling up gaps with zero sectors.
-    outidx = 0;
-    for (i = 1; i < num_reqs; i++) {
-        int merge = 0;
-        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
-
-        // Handle exactly sequential writes and overlapping writes.
-        if (reqs[i].sector <= oldreq_last) {
-            merge = 1;
-        }
-
-        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 >
-            bs->bl.max_iov) {
-            merge = 0;
-        }
-
-        if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
-            reqs[i].nb_sectors > bs->bl.max_transfer_length) {
-            merge = 0;
-        }
-
-        if (merge) {
-            size_t size;
-            QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
-            qemu_iovec_init(qiov,
-                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
-
-            // Add the first request to the merged one. If the requests are
-            // overlapping, drop the last sectors of the first request.
-            size = (reqs[i].sector - reqs[outidx].sector) << 9;
-            qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
-
-            // We should need to add any zeros between the two requests
-            assert (reqs[i].sector <= oldreq_last);
-
-            // Add the second request
-            qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
-
-            // Add tail of first request, if necessary
-            if (qiov->size < reqs[outidx].qiov->size) {
-                qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
-                                  reqs[outidx].qiov->size - qiov->size);
-            }
-
-            reqs[outidx].nb_sectors = qiov->size >> 9;
-            reqs[outidx].qiov = qiov;
-
-            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
-        } else {
-            outidx++;
-            reqs[outidx].sector     = reqs[i].sector;
-            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
-            reqs[outidx].qiov       = reqs[i].qiov;
-        }
-    }
-
-    if (bs->blk) {
-        block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
-                              num_reqs - outidx - 1);
-    }
-
-    return outidx + 1;
-}
-
-/*
- * Submit multiple AIO write requests at once.
- *
- * On success, the function returns 0 and all requests in the reqs array have
- * been submitted. In error case this function returns -1, and any of the
- * requests may or may not be submitted yet. In particular, this means that the
- * callback will be called for some of the requests, for others it won't. The
- * caller must check the error field of the BlockRequest to wait for the right
- * callbacks (if error != 0, no callback will be called).
- *
- * The implementation may modify the contents of the reqs array, e.g. to merge
- * requests. However, the fields opaque and error are left unmodified as they
- * are used to signal failure for a single request to the caller.
- */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
-{
-    MultiwriteCB *mcb;
-    int i;
-
-    /* don't submit writes if we don't have a medium */
-    if (bs->drv == NULL) {
-        for (i = 0; i < num_reqs; i++) {
-            reqs[i].error = -ENOMEDIUM;
-        }
-        return -1;
-    }
-
-    if (num_reqs == 0) {
-        return 0;
-    }
-
-    // Create MultiwriteCB structure
-    mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
-    mcb->num_requests = 0;
-    mcb->num_callbacks = num_reqs;
-
-    for (i = 0; i < num_reqs; i++) {
-        mcb->callbacks[i].cb = reqs[i].cb;
-        mcb->callbacks[i].opaque = reqs[i].opaque;
-    }
-
-    // Check for mergable requests
-    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
-
-    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
-
-    /* Run the aio requests. */
-    mcb->num_requests = num_reqs;
-    for (i = 0; i < num_reqs; i++) {
-        bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
-                              reqs[i].nb_sectors, reqs[i].flags,
-                              multiwrite_cb, mcb,
-                              true);
-    }
-
-    return 0;
-}
-
 void bdrv_aio_cancel(BlockAIOCB *acb)
 {
     qemu_aio_ref(acb);
@@ -2789,11 +2549,14 @@ void bdrv_drained_begin(BlockDriverState *bs)
     if (!bs->quiesce_counter++) {
         aio_disable_external(bdrv_get_aio_context(bs));
     }
+    bdrv_parent_drained_begin(bs);
     bdrv_drain(bs);
 }
 
 void bdrv_drained_end(BlockDriverState *bs)
 {
+    bdrv_parent_drained_end(bs);
+
     assert(bs->quiesce_counter > 0);
     if (--bs->quiesce_counter > 0) {
         return;
diff --git a/block/mirror.c b/block/mirror.c
index 039f48125e..b9986d8218 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -80,11 +80,11 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
 {
     s->synced = false;
     if (read) {
-        return block_job_error_action(&s->common, s->common.bs,
-                                      s->on_source_error, true, error);
+        return block_job_error_action(&s->common, s->on_source_error,
+                                      true, error);
     } else {
-        return block_job_error_action(&s->common, s->target,
-                                      s->on_target_error, false, error);
+        return block_job_error_action(&s->common, s->on_target_error,
+                                      false, error);
     }
 }
 
@@ -468,7 +468,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
 
         /* This was checked in mirror_start_job(), but meanwhile one of the
          * nodes could have been newly attached to a BlockBackend. */
-        if (to_replace->blk && s->target->blk) {
+        if (bdrv_has_blk(to_replace) && bdrv_has_blk(s->target)) {
             error_report("block job: Can't create node with two BlockBackends");
             data->ret = -EINVAL;
             goto out;
@@ -710,9 +710,6 @@ immediate_exit:
     g_free(s->cow_bitmap);
     g_free(s->in_flight_bitmap);
     bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
-    if (s->target->blk) {
-        blk_iostatus_disable(s->target->blk);
-    }
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
@@ -739,15 +736,6 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
     ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }
 
-static void mirror_iostatus_reset(BlockJob *job)
-{
-    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-
-    if (s->target->blk) {
-        blk_iostatus_reset(s->target->blk);
-    }
-}
-
 static void mirror_complete(BlockJob *job, Error **errp)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
@@ -793,7 +781,6 @@ static const BlockJobDriver mirror_job_driver = {
     .instance_size = sizeof(MirrorBlockJob),
     .job_type      = BLOCK_JOB_TYPE_MIRROR,
     .set_speed     = mirror_set_speed,
-    .iostatus_reset= mirror_iostatus_reset,
     .complete      = mirror_complete,
 };
 
@@ -801,8 +788,6 @@ static const BlockJobDriver commit_active_job_driver = {
     .instance_size = sizeof(MirrorBlockJob),
     .job_type      = BLOCK_JOB_TYPE_COMMIT,
     .set_speed     = mirror_set_speed,
-    .iostatus_reset
-                   = mirror_iostatus_reset,
     .complete      = mirror_complete,
 };
 
@@ -827,13 +812,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 
     assert ((granularity & (granularity - 1)) == 0);
 
-    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
-         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
-        return;
-    }
-
     if (buf_size < 0) {
         error_setg(errp, "Invalid parameter 'buf-size'");
         return;
@@ -853,7 +831,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
     } else {
         replaced_bs = bs;
     }
-    if (replaced_bs->blk && target->blk) {
+    if (bdrv_has_blk(replaced_bs) && bdrv_has_blk(target)) {
         error_setg(errp, "Can't create node with two BlockBackends");
         return;
     }
@@ -882,10 +860,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 
     bdrv_op_block_all(s->target, s->common.blocker);
 
-    if (s->target->blk) {
-        blk_set_on_error(s->target->blk, on_target_error, on_target_error);
-        blk_iostatus_enable(s->target->blk);
-    }
     s->common.co = qemu_coroutine_create(mirror_run);
     trace_mirror_start(bs, s, s->common.co, opaque);
     qemu_coroutine_enter(s->common.co, s);
diff --git a/block/qapi.c b/block/qapi.c
index c5f6ba643c..5594f74d17 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -67,10 +67,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
     info->backing_file_depth = bdrv_get_backing_file_depth(bs);
     info->detect_zeroes = bs->detect_zeroes;
 
-    if (bs->throttle_state) {
+    if (blk && blk_get_public(blk)->throttle_state) {
         ThrottleConfig cfg;
 
-        throttle_group_get_config(bs, &cfg);
+        throttle_group_get_config(blk, &cfg);
 
         info->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
         info->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
@@ -118,7 +118,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
         info->iops_size = cfg.op_size;
 
         info->has_group = true;
-        info->group = g_strdup(throttle_group_get_name(bs));
+        info->group = g_strdup(throttle_group_get_name(blk));
     }
 
     info->write_threshold = bdrv_write_threshold_get(bs);
diff --git a/block/qcow2.c b/block/qcow2.c
index 49d7cff17a..c9306a78c1 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2413,7 +2413,7 @@ static bool is_zero_cluster(BlockDriverState *bs, int64_t start)
     BlockDriverState *file;
     int64_t res = bdrv_get_block_status_above(bs, NULL, start,
                                               s->cluster_sectors, &nr, &file);
-    return res >= 0 && ((res & BDRV_BLOCK_ZERO) || !(res & BDRV_BLOCK_DATA));
+    return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == s->cluster_sectors;
 }
 
 static bool is_zero_cluster_top_locked(BlockDriverState *bs, int64_t start)
@@ -2424,6 +2424,7 @@ static bool is_zero_cluster_top_locked(BlockDriverState *bs, int64_t start)
     int ret;
 
     ret = qcow2_get_cluster_offset(bs, start << BDRV_SECTOR_BITS, &nr, &off);
+    assert(nr == s->cluster_sectors);
     return ret == QCOW2_CLUSTER_UNALLOCATED || ret == QCOW2_CLUSTER_ZERO;
 }
 
diff --git a/block/quorum.c b/block/quorum.c
index 1ec3511528..ec6f3b9059 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -989,27 +989,6 @@ static void quorum_close(BlockDriverState *bs)
     g_free(s->children);
 }
 
-static void quorum_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_detach_aio_context(s->children[i]->bs);
-    }
-}
-
-static void quorum_attach_aio_context(BlockDriverState *bs,
-                                      AioContext *new_context)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_attach_aio_context(s->children[i]->bs, new_context);
-    }
-}
-
 static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
                              Error **errp)
 {
@@ -1127,9 +1106,6 @@ static BlockDriver bdrv_quorum = {
     .bdrv_aio_readv                     = quorum_aio_readv,
     .bdrv_aio_writev                    = quorum_aio_writev,
 
-    .bdrv_detach_aio_context            = quorum_detach_aio_context,
-    .bdrv_attach_aio_context            = quorum_attach_aio_context,
-
     .bdrv_add_child                     = quorum_add_child,
     .bdrv_del_child                     = quorum_del_child,
 
diff --git a/block/snapshot.c b/block/snapshot.c
index e9d721df68..3917ec5c91 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -373,9 +373,10 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
 bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
 {
     bool ok = true;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    while (ok && (bs = bdrv_next(bs))) {
+    while (ok && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -393,10 +394,11 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
                              Error **err)
 {
     int ret = 0;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
     QEMUSnapshotInfo sn1, *snapshot = &sn1;
 
-    while (ret == 0 && (bs = bdrv_next(bs))) {
+    while (ret == 0 && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -415,9 +417,10 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
 int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
 {
     int err = 0;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    while (err == 0 && (bs = bdrv_next(bs))) {
+    while (err == 0 && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -435,9 +438,10 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
 {
     QEMUSnapshotInfo sn;
     int err = 0;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    while (err == 0 && (bs = bdrv_next(bs))) {
+    while (err == 0 && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -457,9 +461,10 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
                              BlockDriverState **first_bad_bs)
 {
     int err = 0;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    while (err == 0 && (bs = bdrv_next(bs))) {
+    while (err == 0 && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -480,9 +485,10 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
 BlockDriverState *bdrv_all_find_vmstate_bs(void)
 {
     bool not_found = true;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    while (not_found && (bs = bdrv_next(bs))) {
+    while (not_found && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
diff --git a/block/stream.c b/block/stream.c
index 332b9a183e..40aa32212e 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -163,8 +163,7 @@ wait:
         }
         if (ret < 0) {
             BlockErrorAction action =
-                block_job_error_action(&s->common, s->common.bs, s->on_error,
-                                       true, -ret);
+                block_job_error_action(&s->common, s->on_error, true, -ret);
             if (action == BLOCK_ERROR_ACTION_STOP) {
                 n = 0;
                 continue;
@@ -224,13 +223,6 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
 {
     StreamBlockJob *s;
 
-    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
-         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "on-error");
-        return;
-    }
-
     s = block_job_create(&stream_job_driver, bs, speed, cb, opaque, errp);
     if (!s) {
         return;
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 9ac063a0cd..59545e287e 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -23,13 +23,14 @@
  */
 
 #include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
 #include "block/throttle-groups.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
 #include "sysemu/qtest.h"
 
 /* The ThrottleGroup structure (with its ThrottleState) is shared
- * among different BlockDriverState and it's independent from
+ * among different BlockBackends and it's independent from
  * AioContext, so in order to use it from different threads it needs
  * its own locking.
  *
@@ -39,26 +40,26 @@
  * The whole ThrottleGroup structure is private and invisible to
  * outside users, that only use it through its ThrottleState.
  *
- * In addition to the ThrottleGroup structure, BlockDriverState has
+ * In addition to the ThrottleGroup structure, BlockBackendPublic has
  * fields that need to be accessed by other members of the group and
- * therefore also need to be protected by this lock. Once a BDS is
- * registered in a group those fields can be accessed by other threads
- * any time.
+ * therefore also need to be protected by this lock. Once a
+ * BlockBackend is registered in a group those fields can be accessed
+ * by other threads any time.
  *
  * Again, all this is handled internally and is mostly transparent to
  * the outside. The 'throttle_timers' field however has an additional
  * constraint because it may be temporarily invalid (see for example
  * bdrv_set_aio_context()). Therefore in this file a thread will
- * access some other BDS's timers only after verifying that that BDS
- * has throttled requests in the queue.
+ * access some other BlockBackend's timers only after verifying that
+ * that BlockBackend has throttled requests in the queue.
  */
 typedef struct ThrottleGroup {
     char *name; /* This is constant during the lifetime of the group */
 
     QemuMutex lock; /* This lock protects the following four fields */
     ThrottleState ts;
-    QLIST_HEAD(, BlockDriverState) head;
-    BlockDriverState *tokens[2];
+    QLIST_HEAD(, BlockBackendPublic) head;
+    BlockBackend *tokens[2];
     bool any_timer_armed[2];
 
     /* These two are protected by the global throttle_groups_lock */
@@ -132,94 +133,95 @@ void throttle_group_unref(ThrottleState *ts)
     qemu_mutex_unlock(&throttle_groups_lock);
 }
 
-/* Get the name from a BlockDriverState's ThrottleGroup. The name (and
- * the pointer) is guaranteed to remain constant during the lifetime
- * of the group.
+/* Get the name from a BlockBackend's ThrottleGroup. The name (and the pointer)
+ * is guaranteed to remain constant during the lifetime of the group.
  *
- * @bs:   a BlockDriverState that is member of a throttling group
+ * @blk:  a BlockBackend that is member of a throttling group
  * @ret:  the name of the group.
  */
-const char *throttle_group_get_name(BlockDriverState *bs)
+const char *throttle_group_get_name(BlockBackend *blk)
 {
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
     return tg->name;
 }
 
-/* Return the next BlockDriverState in the round-robin sequence,
- * simulating a circular list.
+/* Return the next BlockBackend in the round-robin sequence, simulating a
+ * circular list.
  *
  * This assumes that tg->lock is held.
  *
- * @bs:  the current BlockDriverState
- * @ret: the next BlockDriverState in the sequence
+ * @blk: the current BlockBackend
+ * @ret: the next BlockBackend in the sequence
  */
-static BlockDriverState *throttle_group_next_bs(BlockDriverState *bs)
+static BlockBackend *throttle_group_next_blk(BlockBackend *blk)
 {
-    ThrottleState *ts = bs->throttle_state;
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleState *ts = blkp->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    BlockDriverState *next = QLIST_NEXT(bs, round_robin);
+    BlockBackendPublic *next = QLIST_NEXT(blkp, round_robin);
 
     if (!next) {
-        return QLIST_FIRST(&tg->head);
+        next = QLIST_FIRST(&tg->head);
     }
 
-    return next;
+    return blk_by_public(next);
 }
 
-/* Return the next BlockDriverState in the round-robin sequence with
- * pending I/O requests.
+/* Return the next BlockBackend in the round-robin sequence with pending I/O
+ * requests.
  *
  * This assumes that tg->lock is held.
  *
- * @bs:        the current BlockDriverState
+ * @blk:       the current BlockBackend
  * @is_write:  the type of operation (read/write)
- * @ret:       the next BlockDriverState with pending requests, or bs
- *             if there is none.
+ * @ret:       the next BlockBackend with pending requests, or blk if there is
+ *             none.
  */
-static BlockDriverState *next_throttle_token(BlockDriverState *bs,
-                                             bool is_write)
+static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write)
 {
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    BlockDriverState *token, *start;
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+    BlockBackend *token, *start;
 
     start = token = tg->tokens[is_write];
 
     /* get next bs round in round robin style */
-    token = throttle_group_next_bs(token);
-    while (token != start && !token->pending_reqs[is_write]) {
-        token = throttle_group_next_bs(token);
+    token = throttle_group_next_blk(token);
+    while (token != start && !blkp->pending_reqs[is_write]) {
+        token = throttle_group_next_blk(token);
     }
 
     /* If no IO are queued for scheduling on the next round robin token
      * then decide the token is the current bs because chances are
      * the current bs get the current request queued.
      */
-    if (token == start && !token->pending_reqs[is_write]) {
-        token = bs;
+    if (token == start && !blkp->pending_reqs[is_write]) {
+        token = blk;
     }
 
     return token;
 }
 
-/* Check if the next I/O request for a BlockDriverState needs to be
- * throttled or not. If there's no timer set in this group, set one
- * and update the token accordingly.
+/* Check if the next I/O request for a BlockBackend needs to be throttled or
+ * not. If there's no timer set in this group, set one and update the token
+ * accordingly.
  *
  * This assumes that tg->lock is held.
  *
- * @bs:         the current BlockDriverState
+ * @blk:        the current BlockBackend
  * @is_write:   the type of operation (read/write)
  * @ret:        whether the I/O request needs to be throttled or not
  */
-static bool throttle_group_schedule_timer(BlockDriverState *bs,
-                                          bool is_write)
+static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
 {
-    ThrottleState *ts = bs->throttle_state;
-    ThrottleTimers *tt = &bs->throttle_timers;
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleState *ts = blkp->throttle_state;
+    ThrottleTimers *tt = &blkp->throttle_timers;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     bool must_wait;
 
-    if (bs->io_limits_disabled) {
+    if (blkp->io_limits_disabled) {
         return false;
     }
 
@@ -230,9 +232,9 @@ static bool throttle_group_schedule_timer(BlockDriverState *bs,
 
     must_wait = throttle_schedule_timer(ts, tt, is_write);
 
-    /* If a timer just got armed, set bs as the current token */
+    /* If a timer just got armed, set blk as the current token */
     if (must_wait) {
-        tg->tokens[is_write] = bs;
+        tg->tokens[is_write] = blk;
         tg->any_timer_armed[is_write] = true;
     }
 
@@ -243,18 +245,19 @@ static bool throttle_group_schedule_timer(BlockDriverState *bs,
  *
  * This assumes that tg->lock is held.
  *
- * @bs:        the current BlockDriverState
+ * @blk:       the current BlockBackend
  * @is_write:  the type of operation (read/write)
  */
-static void schedule_next_request(BlockDriverState *bs, bool is_write)
+static void schedule_next_request(BlockBackend *blk, bool is_write)
 {
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
     bool must_wait;
-    BlockDriverState *token;
+    BlockBackend *token;
 
     /* Check if there's any pending request to schedule next */
-    token = next_throttle_token(bs, is_write);
-    if (!token->pending_reqs[is_write]) {
+    token = next_throttle_token(blk, is_write);
+    if (!blkp->pending_reqs[is_write]) {
         return;
     }
 
@@ -263,12 +266,12 @@ static void schedule_next_request(BlockDriverState *bs, bool is_write)
 
     /* If it doesn't have to wait, queue it for immediate execution */
     if (!must_wait) {
-        /* Give preference to requests from the current bs */
+        /* Give preference to requests from the current blk */
         if (qemu_in_coroutine() &&
-            qemu_co_queue_next(&bs->throttled_reqs[is_write])) {
-            token = bs;
+            qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
+            token = blk;
         } else {
-            ThrottleTimers *tt = &token->throttle_timers;
+            ThrottleTimers *tt = &blkp->throttle_timers;
             int64_t now = qemu_clock_get_ns(tt->clock_type);
             timer_mod(tt->timers[is_write], now + 1);
             tg->any_timer_armed[is_write] = true;
@@ -281,48 +284,50 @@ static void schedule_next_request(BlockDriverState *bs, bool is_write)
  * if necessary, and schedule the next request using a round robin
  * algorithm.
  *
- * @bs:        the current BlockDriverState
+ * @blk:       the current BlockBackend
  * @bytes:     the number of bytes for this I/O
  * @is_write:  the type of operation (read/write)
  */
-void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs,
+void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
                                                         unsigned int bytes,
                                                         bool is_write)
 {
     bool must_wait;
-    BlockDriverState *token;
+    BlockBackend *token;
 
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
     qemu_mutex_lock(&tg->lock);
 
     /* First we check if this I/O has to be throttled. */
-    token = next_throttle_token(bs, is_write);
+    token = next_throttle_token(blk, is_write);
     must_wait = throttle_group_schedule_timer(token, is_write);
 
     /* Wait if there's a timer set or queued requests of this type */
-    if (must_wait || bs->pending_reqs[is_write]) {
-        bs->pending_reqs[is_write]++;
+    if (must_wait || blkp->pending_reqs[is_write]) {
+        blkp->pending_reqs[is_write]++;
         qemu_mutex_unlock(&tg->lock);
-        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
+        qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
         qemu_mutex_lock(&tg->lock);
-        bs->pending_reqs[is_write]--;
+        blkp->pending_reqs[is_write]--;
     }
 
     /* The I/O will be executed, so do the accounting */
-    throttle_account(bs->throttle_state, is_write, bytes);
+    throttle_account(blkp->throttle_state, is_write, bytes);
 
     /* Schedule the next request */
-    schedule_next_request(bs, is_write);
+    schedule_next_request(blk, is_write);
 
     qemu_mutex_unlock(&tg->lock);
 }
 
-void throttle_group_restart_bs(BlockDriverState *bs)
+void throttle_group_restart_blk(BlockBackend *blk)
 {
+    BlockBackendPublic *blkp = blk_get_public(blk);
     int i;
 
     for (i = 0; i < 2; i++) {
-        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
+        while (qemu_co_enter_next(&blkp->throttled_reqs[i])) {
             ;
         }
     }
@@ -332,13 +337,14 @@ void throttle_group_restart_bs(BlockDriverState *bs)
  * to throttle_config(), but guarantees atomicity within the
  * throttling group.
  *
- * @bs:  a BlockDriverState that is member of the group
+ * @blk: a BlockBackend that is a member of the group
  * @cfg: the configuration to set
  */
-void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg)
+void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
 {
-    ThrottleTimers *tt = &bs->throttle_timers;
-    ThrottleState *ts = bs->throttle_state;
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleTimers *tt = &blkp->throttle_timers;
+    ThrottleState *ts = blkp->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     qemu_mutex_lock(&tg->lock);
     /* throttle_config() cancels the timers */
@@ -351,20 +357,21 @@ void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg)
     throttle_config(ts, tt, cfg);
     qemu_mutex_unlock(&tg->lock);
 
-    qemu_co_enter_next(&bs->throttled_reqs[0]);
-    qemu_co_enter_next(&bs->throttled_reqs[1]);
+    qemu_co_enter_next(&blkp->throttled_reqs[0]);
+    qemu_co_enter_next(&blkp->throttled_reqs[1]);
 }
 
 /* Get the throttle configuration from a particular group. Similar to
  * throttle_get_config(), but guarantees atomicity within the
  * throttling group.
  *
- * @bs:  a BlockDriverState that is member of the group
+ * @blk: a BlockBackend that is a member of the group
  * @cfg: the configuration will be written here
  */
-void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg)
+void throttle_group_get_config(BlockBackend *blk, ThrottleConfig *cfg)
 {
-    ThrottleState *ts = bs->throttle_state;
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleState *ts = blkp->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     qemu_mutex_lock(&tg->lock);
     throttle_get_config(ts, cfg);
@@ -374,12 +381,13 @@ void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg)
 /* ThrottleTimers callback. This wakes up a request that was waiting
  * because it had been throttled.
  *
- * @bs:        the BlockDriverState whose request had been throttled
+ * @blk:       the BlockBackend whose request had been throttled
  * @is_write:  the type of operation (read/write)
  */
-static void timer_cb(BlockDriverState *bs, bool is_write)
+static void timer_cb(BlockBackend *blk, bool is_write)
 {
-    ThrottleState *ts = bs->throttle_state;
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleState *ts = blkp->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     bool empty_queue;
 
@@ -389,13 +397,13 @@ static void timer_cb(BlockDriverState *bs, bool is_write)
     qemu_mutex_unlock(&tg->lock);
 
     /* Run the request that was waiting for this timer */
-    empty_queue = !qemu_co_enter_next(&bs->throttled_reqs[is_write]);
+    empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
 
     /* If the request queue was empty then we have to take care of
      * scheduling the next one */
     if (empty_queue) {
         qemu_mutex_lock(&tg->lock);
-        schedule_next_request(bs, is_write);
+        schedule_next_request(blk, is_write);
         qemu_mutex_unlock(&tg->lock);
     }
 }
@@ -410,17 +418,17 @@ static void write_timer_cb(void *opaque)
     timer_cb(opaque, true);
 }
 
-/* Register a BlockDriverState in the throttling group, also
- * initializing its timers and updating its throttle_state pointer to
- * point to it. If a throttling group with that name does not exist
- * yet, it will be created.
+/* Register a BlockBackend in the throttling group, also initializing its
+ * timers and updating its throttle_state pointer to point to it. If a
+ * throttling group with that name does not exist yet, it will be created.
  *
- * @bs:        the BlockDriverState to insert
+ * @blk:       the BlockBackend to insert
  * @groupname: the name of the group
  */
-void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
+void throttle_group_register_blk(BlockBackend *blk, const char *groupname)
 {
     int i;
+    BlockBackendPublic *blkp = blk_get_public(blk);
     ThrottleState *ts = throttle_group_incref(groupname);
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     int clock_type = QEMU_CLOCK_REALTIME;
@@ -430,67 +438,67 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
         clock_type = QEMU_CLOCK_VIRTUAL;
     }
 
-    bs->throttle_state = ts;
+    blkp->throttle_state = ts;
 
     qemu_mutex_lock(&tg->lock);
-    /* If the ThrottleGroup is new set this BlockDriverState as the token */
+    /* If the ThrottleGroup is new set this BlockBackend as the token */
     for (i = 0; i < 2; i++) {
         if (!tg->tokens[i]) {
-            tg->tokens[i] = bs;
+            tg->tokens[i] = blk;
         }
     }
 
-    QLIST_INSERT_HEAD(&tg->head, bs, round_robin);
+    QLIST_INSERT_HEAD(&tg->head, blkp, round_robin);
 
-    throttle_timers_init(&bs->throttle_timers,
-                         bdrv_get_aio_context(bs),
+    throttle_timers_init(&blkp->throttle_timers,
+                         blk_get_aio_context(blk),
                          clock_type,
                          read_timer_cb,
                          write_timer_cb,
-                         bs);
+                         blk);
 
     qemu_mutex_unlock(&tg->lock);
 }
 
-/* Unregister a BlockDriverState from its group, removing it from the
- * list, destroying the timers and setting the throttle_state pointer
- * to NULL.
+/* Unregister a BlockBackend from its group, removing it from the list,
+ * destroying the timers and setting the throttle_state pointer to NULL.
  *
- * The BlockDriverState must not have pending throttled requests, so
- * the caller has to drain them first.
+ * The BlockBackend must not have pending throttled requests, so the caller has
+ * to drain them first.
  *
  * The group will be destroyed if it's empty after this operation.
  *
- * @bs: the BlockDriverState to remove
+ * @blk: the BlockBackend to remove
  */
-void throttle_group_unregister_bs(BlockDriverState *bs)
+void throttle_group_unregister_blk(BlockBackend *blk)
 {
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+    BlockBackendPublic *blkp = blk_get_public(blk);
+    ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
     int i;
 
-    assert(bs->pending_reqs[0] == 0 && bs->pending_reqs[1] == 0);
-    assert(qemu_co_queue_empty(&bs->throttled_reqs[0]));
-    assert(qemu_co_queue_empty(&bs->throttled_reqs[1]));
+    assert(blkp->pending_reqs[0] == 0 && blkp->pending_reqs[1] == 0);
+    assert(qemu_co_queue_empty(&blkp->throttled_reqs[0]));
+    assert(qemu_co_queue_empty(&blkp->throttled_reqs[1]));
 
     qemu_mutex_lock(&tg->lock);
     for (i = 0; i < 2; i++) {
-        if (tg->tokens[i] == bs) {
-            BlockDriverState *token = throttle_group_next_bs(bs);
-            /* Take care of the case where this is the last bs in the group */
-            if (token == bs) {
+        if (tg->tokens[i] == blk) {
+            BlockBackend *token = throttle_group_next_blk(blk);
+            /* Take care of the case where this is the last blk in the group */
+            if (token == blk) {
                 token = NULL;
             }
             tg->tokens[i] = token;
         }
     }
 
-    /* remove the current bs from the list */
-    QLIST_REMOVE(bs, round_robin);
-    throttle_timers_destroy(&bs->throttle_timers);
+    /* remove the current blk from the list */
+    QLIST_REMOVE(blkp, round_robin);
+    throttle_timers_destroy(&blkp->throttle_timers);
     qemu_mutex_unlock(&tg->lock);
 
     throttle_group_unref(&tg->ts);
-    bs->throttle_state = NULL;
+    blkp->throttle_state = NULL;
 }
 
 static void throttle_groups_init(void)
diff --git a/block/vmdk.c b/block/vmdk.c
index 1cb4b8529c..372e5edc15 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2344,27 +2344,6 @@ static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
     return 0;
 }
 
-static void vmdk_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVVmdkState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_extents; i++) {
-        bdrv_detach_aio_context(s->extents[i].file->bs);
-    }
-}
-
-static void vmdk_attach_aio_context(BlockDriverState *bs,
-                                    AioContext *new_context)
-{
-    BDRVVmdkState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_extents; i++) {
-        bdrv_attach_aio_context(s->extents[i].file->bs, new_context);
-    }
-}
-
 static QemuOptsList vmdk_create_opts = {
     .name = "vmdk-create-opts",
     .head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head),
@@ -2434,8 +2413,6 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_get_specific_info       = vmdk_get_specific_info,
     .bdrv_refresh_limits          = vmdk_refresh_limits,
     .bdrv_get_info                = vmdk_get_info,
-    .bdrv_detach_aio_context      = vmdk_detach_aio_context,
-    .bdrv_attach_aio_context      = vmdk_attach_aio_context,
 
     .supports_backing             = true,
     .create_opts                  = &vmdk_create_opts,
author	Peter Maydell <peter.maydell@linaro.org>	2016-05-19 16:54:12 +0100
committer	Peter Maydell <peter.maydell@linaro.org>	2016-05-19 16:54:12 +0100
commit	6bd8ab6889f45a42d69a3a65a4d6e7fc2453c84c (patch)
tree	8333fbb0dc3140307c828ed2ebc5b4e81facbbd7 /block
parent	776efef32439a31cb13a6acfe8aab833687745ad (diff)
parent	7753da2351e0b0ff6825d080aff58d73c994ff47 (diff)