aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2017-06-20 16:01:15 +0100
committerPeter Maydell <peter.maydell@linaro.org>2017-06-20 16:01:15 +0100
commit65a0e3e842df7f73ff2e4a61948f992a41e570a8 (patch)
treeb1364b41d6f6b270c589433478297e2a91289af6 /block
parent7e56accdaf35234b69c33c85e4a44a5d56325e53 (diff)
parent5b50bf77ce6e773b04a303a2912876c9a1bfca43 (diff)
Merge remote-tracking branch 'remotes/famz/tags/docker-and-block-pull-request' into staging
# gpg: Signature made Fri 16 Jun 2017 01:18:46 BST # gpg: using RSA key 0xCA35624C6A9171C6 # gpg: Good signature from "Fam Zheng <famz@redhat.com>" # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 5003 7CB7 9706 0F76 F021 AD56 CA35 624C 6A91 71C6 * remotes/famz/tags/docker-and-block-pull-request: (23 commits) block: make accounting thread-safe block: split BlockAcctStats creation and setup block: introduce block_account_one_io block: protect modification of dirty bitmaps with a mutex migration/block: reset dirty bitmap before reading block: introduce dirty_bitmap_mutex block: protect tracked_requests and flush_queue with reqs_lock block: access write_gen with atomics block: use Stat64 for wr_highest_offset util: add stats64 module throttle-groups: protect throttled requests with a CoMutex throttle-groups: do not use qemu_co_enter_next throttle-groups: only start one coroutine from drained_begin block: access io_plugged with atomic ops block: access wakeup with atomic ops block: access serialising_in_flight with atomic ops block: access io_limits_disabled with atomic ops block: access quiesce_counter with atomic ops block: access copy_on_read with atomic ops docker: Add flex and bison to centos6 image ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r--block/accounting.c78
-rw-r--r--block/block-backend.c6
-rw-r--r--block/dirty-bitmap.c112
-rw-r--r--block/io.c51
-rw-r--r--block/mirror.c14
-rw-r--r--block/nfs.c4
-rw-r--r--block/qapi.c2
-rw-r--r--block/sheepdog.c3
-rw-r--r--block/throttle-groups.c91
9 files changed, 267 insertions, 94 deletions
diff --git a/block/accounting.c b/block/accounting.c
index 3f457c4e73..87ef5bbfaa 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -32,23 +32,28 @@
static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;
-void block_acct_init(BlockAcctStats *stats, bool account_invalid,
- bool account_failed)
+void block_acct_init(BlockAcctStats *stats)
{
- stats->account_invalid = account_invalid;
- stats->account_failed = account_failed;
-
+ qemu_mutex_init(&stats->lock);
if (qtest_enabled()) {
clock_type = QEMU_CLOCK_VIRTUAL;
}
}
+void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
+ bool account_failed)
+{
+ stats->account_invalid = account_invalid;
+ stats->account_failed = account_failed;
+}
+
void block_acct_cleanup(BlockAcctStats *stats)
{
BlockAcctTimedStats *s, *next;
QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
g_free(s);
}
+ qemu_mutex_destroy(&stats->lock);
}
void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
@@ -58,12 +63,15 @@ void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
s = g_new0(BlockAcctTimedStats, 1);
s->interval_length = interval_length;
+ s->stats = stats;
+ qemu_mutex_lock(&stats->lock);
QSLIST_INSERT_HEAD(&stats->intervals, s, entries);
for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
timed_average_init(&s->latency[i], clock_type,
(uint64_t) interval_length * NANOSECONDS_PER_SECOND);
}
+ qemu_mutex_unlock(&stats->lock);
}
BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
@@ -86,7 +94,8 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
cookie->type = type;
}
-void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
+static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
+ bool failed)
{
BlockAcctTimedStats *s;
int64_t time_ns = qemu_clock_get_ns(clock_type);
@@ -98,31 +107,16 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
assert(cookie->type < BLOCK_MAX_IOTYPE);
- stats->nr_bytes[cookie->type] += cookie->bytes;
- stats->nr_ops[cookie->type]++;
- stats->total_time_ns[cookie->type] += latency_ns;
- stats->last_access_time_ns = time_ns;
+ qemu_mutex_lock(&stats->lock);
- QSLIST_FOREACH(s, &stats->intervals, entries) {
- timed_average_account(&s->latency[cookie->type], latency_ns);
+ if (failed) {
+ stats->failed_ops[cookie->type]++;
+ } else {
+ stats->nr_bytes[cookie->type] += cookie->bytes;
+ stats->nr_ops[cookie->type]++;
}
-}
-
-void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
- assert(cookie->type < BLOCK_MAX_IOTYPE);
-
- stats->failed_ops[cookie->type]++;
-
- if (stats->account_failed) {
- BlockAcctTimedStats *s;
- int64_t time_ns = qemu_clock_get_ns(clock_type);
- int64_t latency_ns = time_ns - cookie->start_time_ns;
-
- if (qtest_enabled()) {
- latency_ns = qtest_latency_ns;
- }
+ if (!failed || stats->account_failed) {
stats->total_time_ns[cookie->type] += latency_ns;
stats->last_access_time_ns = time_ns;
@@ -130,29 +124,45 @@ void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
timed_average_account(&s->latency[cookie->type], latency_ns);
}
}
+
+ qemu_mutex_unlock(&stats->lock);
+}
+
+void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+ block_account_one_io(stats, cookie, false);
+}
+
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+ block_account_one_io(stats, cookie, true);
}
void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
{
assert(type < BLOCK_MAX_IOTYPE);
- /* block_acct_done() and block_acct_failed() update
- * total_time_ns[], but this one does not. The reason is that
- * invalid requests are accounted during their submission,
- * therefore there's no actual I/O involved. */
-
+ /* block_account_one_io() updates total_time_ns[], but this one does
+ * not. The reason is that invalid requests are accounted during their
+ * submission, therefore there's no actual I/O involved.
+ */
+ qemu_mutex_lock(&stats->lock);
stats->invalid_ops[type]++;
if (stats->account_invalid) {
stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
}
+ qemu_mutex_unlock(&stats->lock);
}
void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
int num_requests)
{
assert(type < BLOCK_MAX_IOTYPE);
+
+ qemu_mutex_lock(&stats->lock);
stats->merged[type] += num_requests;
+ qemu_mutex_unlock(&stats->lock);
}
int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
@@ -167,7 +177,9 @@ double block_acct_queue_depth(BlockAcctTimedStats *stats,
assert(type < BLOCK_MAX_IOTYPE);
+ qemu_mutex_lock(&stats->stats->lock);
sum = timed_average_sum(&stats->latency[type], &elapsed);
+ qemu_mutex_unlock(&stats->stats->lock);
return (double) sum / elapsed;
}
diff --git a/block/block-backend.c b/block/block-backend.c
index 7d7f3697d1..a2bbae90b1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -216,8 +216,10 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
+ qemu_co_mutex_init(&blk->public.throttled_reqs_lock);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
qemu_co_queue_init(&blk->public.throttled_reqs[1]);
+ block_acct_init(&blk->stats);
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
@@ -1953,7 +1955,7 @@ static void blk_root_drained_begin(BdrvChild *child)
/* Note that blk->root may not be accessible here yet if we are just
* attaching to a BlockDriverState that is drained. Use child instead. */
- if (blk->public.io_limits_disabled++ == 0) {
+ if (atomic_fetch_inc(&blk->public.io_limits_disabled) == 0) {
throttle_group_restart_blk(blk);
}
}
@@ -1964,7 +1966,7 @@ static void blk_root_drained_end(BdrvChild *child)
assert(blk->quiesce_counter);
assert(blk->public.io_limits_disabled);
- --blk->public.io_limits_disabled;
+ atomic_dec(&blk->public.io_limits_disabled);
if (--blk->quiesce_counter == 0) {
if (blk->dev_ops && blk->dev_ops->drained_end) {
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 519737c8d3..a04c6e4154 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -37,6 +37,7 @@
* or enabled. A frozen bitmap can only abdicate() or reclaim().
*/
struct BdrvDirtyBitmap {
+ QemuMutex *mutex;
HBitmap *bitmap; /* Dirty sector bitmap implementation */
HBitmap *meta; /* Meta dirty bitmap */
BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
@@ -52,6 +53,27 @@ struct BdrvDirtyBitmapIter {
BdrvDirtyBitmap *bitmap;
};
+static inline void bdrv_dirty_bitmaps_lock(BlockDriverState *bs)
+{
+ qemu_mutex_lock(&bs->dirty_bitmap_mutex);
+}
+
+static inline void bdrv_dirty_bitmaps_unlock(BlockDriverState *bs)
+{
+ qemu_mutex_unlock(&bs->dirty_bitmap_mutex);
+}
+
+void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap)
+{
+ qemu_mutex_lock(bitmap->mutex);
+}
+
+void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap)
+{
+ qemu_mutex_unlock(bitmap->mutex);
+}
+
+/* Called with BQL or dirty_bitmap lock taken. */
BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
{
BdrvDirtyBitmap *bm;
@@ -65,6 +87,7 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
return NULL;
}
+/* Called with BQL taken. */
void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
@@ -72,6 +95,7 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
bitmap->name = NULL;
}
+/* Called with BQL taken. */
BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
uint32_t granularity,
const char *name,
@@ -96,11 +120,14 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
return NULL;
}
bitmap = g_new0(BdrvDirtyBitmap, 1);
+ bitmap->mutex = &bs->dirty_bitmap_mutex;
bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
bitmap->size = bitmap_size;
bitmap->name = g_strdup(name);
bitmap->disabled = false;
+ bdrv_dirty_bitmaps_lock(bs);
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+ bdrv_dirty_bitmaps_unlock(bs);
return bitmap;
}
@@ -119,20 +146,24 @@ void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int chunk_size)
{
assert(!bitmap->meta);
+ qemu_mutex_lock(bitmap->mutex);
bitmap->meta = hbitmap_create_meta(bitmap->bitmap,
chunk_size * BITS_PER_BYTE);
+ qemu_mutex_unlock(bitmap->mutex);
}
void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(bitmap->meta);
+ qemu_mutex_lock(bitmap->mutex);
hbitmap_free_meta(bitmap->bitmap);
bitmap->meta = NULL;
+ qemu_mutex_unlock(bitmap->mutex);
}
-int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap, int64_t sector,
- int nb_sectors)
+int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, int64_t sector,
+ int nb_sectors)
{
uint64_t i;
int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
@@ -147,11 +178,26 @@ int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
return false;
}
+int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, int64_t sector,
+ int nb_sectors)
+{
+ bool dirty;
+
+ qemu_mutex_lock(bitmap->mutex);
+ dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors);
+ qemu_mutex_unlock(bitmap->mutex);
+
+ return dirty;
+}
+
void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, int64_t sector,
int nb_sectors)
{
+ qemu_mutex_lock(bitmap->mutex);
hbitmap_reset(bitmap->meta, sector, nb_sectors);
+ qemu_mutex_unlock(bitmap->mutex);
}
int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
@@ -164,16 +210,19 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
return bitmap->name;
}
+/* Called with BQL taken. */
bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
{
return bitmap->successor;
}
+/* Called with BQL taken. */
bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
{
return !(bitmap->disabled || bitmap->successor);
}
+/* Called with BQL taken. */
DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
{
if (bdrv_dirty_bitmap_frozen(bitmap)) {
@@ -188,6 +237,7 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
/**
* Create a successor bitmap destined to replace this bitmap after an operation.
* Requires that the bitmap is not frozen and has no successor.
+ * Called with BQL taken.
*/
int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, Error **errp)
@@ -220,6 +270,7 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
/**
* For a bitmap with a successor, yield our name to the successor,
* delete the old bitmap, and return a handle to the new bitmap.
+ * Called with BQL taken.
*/
BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
@@ -247,6 +298,7 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
* In cases of failure where we can no longer safely delete the parent,
* we may wish to re-join the parent and child/successor.
* The merged parent will be un-frozen, but not explicitly re-enabled.
+ * Called with BQL taken.
*/
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *parent,
@@ -271,25 +323,30 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
/**
* Truncates _all_ bitmaps attached to a BDS.
+ * Called with BQL taken.
*/
void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
{
BdrvDirtyBitmap *bitmap;
uint64_t size = bdrv_nb_sectors(bs);
+ bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
assert(!bdrv_dirty_bitmap_frozen(bitmap));
assert(!bitmap->active_iterators);
hbitmap_truncate(bitmap->bitmap, size);
bitmap->size = size;
}
+ bdrv_dirty_bitmaps_unlock(bs);
}
+/* Called with BQL taken. */
static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
bool only_named)
{
BdrvDirtyBitmap *bm, *next;
+ bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
assert(!bm->active_iterators);
@@ -301,15 +358,19 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
g_free(bm);
if (bitmap) {
- return;
+ goto out;
}
}
}
if (bitmap) {
abort();
}
+
+out:
+ bdrv_dirty_bitmaps_unlock(bs);
}
+/* Called with BQL taken. */
void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
{
bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
@@ -318,18 +379,21 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
/**
* Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
* There must not be any frozen bitmaps attached.
+ * Called with BQL taken.
*/
void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
{
bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
}
+/* Called with BQL taken. */
void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
bitmap->disabled = true;
}
+/* Called with BQL taken. */
void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
@@ -342,6 +406,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
BlockDirtyInfoList *list = NULL;
BlockDirtyInfoList **plist = &list;
+ bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
@@ -354,12 +419,14 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
*plist = entry;
plist = &entry->next;
}
+ bdrv_dirty_bitmaps_unlock(bs);
return list;
}
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
- int64_t sector)
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t sector)
{
if (bitmap) {
return hbitmap_get(bitmap->bitmap, sector);
@@ -432,23 +499,42 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
return hbitmap_iter_next(&iter->hbi);
}
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int64_t nr_sectors)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors)
{
+ bdrv_dirty_bitmap_lock(bitmap);
+ bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
+ bdrv_dirty_bitmap_unlock(bitmap);
+}
+
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int64_t nr_sectors)
+{
assert(bdrv_dirty_bitmap_enabled(bitmap));
- hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+ hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors)
{
- assert(bdrv_dirty_bitmap_enabled(bitmap));
- hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+ bdrv_dirty_bitmap_lock(bitmap);
+ bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
+ bdrv_dirty_bitmap_unlock(bitmap);
}
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
+ bdrv_dirty_bitmap_lock(bitmap);
if (!out) {
hbitmap_reset_all(bitmap->bitmap);
} else {
@@ -457,6 +543,7 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
hbitmap_granularity(backup));
*out = backup;
}
+ bdrv_dirty_bitmap_unlock(bitmap);
}
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
@@ -508,12 +595,19 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
int64_t nr_sectors)
{
BdrvDirtyBitmap *bitmap;
+
+ if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
+ return;
+ }
+
+ bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
if (!bdrv_dirty_bitmap_enabled(bitmap)) {
continue;
}
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
+ bdrv_dirty_bitmaps_unlock(bs);
}
/**
diff --git a/block/io.c b/block/io.c
index ed31810c0a..91611ffb2a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -130,13 +130,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
*/
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
- bs->copy_on_read++;
+ atomic_inc(&bs->copy_on_read);
}
void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
- assert(bs->copy_on_read > 0);
- bs->copy_on_read--;
+ int old = atomic_fetch_dec(&bs->copy_on_read);
+ assert(old >= 1);
}
/* Check if any requests are in-flight (including throttled requests) */
@@ -241,7 +241,7 @@ void bdrv_drained_begin(BlockDriverState *bs)
return;
}
- if (!bs->quiesce_counter++) {
+ if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
aio_disable_external(bdrv_get_aio_context(bs));
bdrv_parent_drained_begin(bs);
}
@@ -252,7 +252,7 @@ void bdrv_drained_begin(BlockDriverState *bs)
void bdrv_drained_end(BlockDriverState *bs)
{
assert(bs->quiesce_counter > 0);
- if (--bs->quiesce_counter > 0) {
+ if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
return;
}
@@ -375,11 +375,13 @@ void bdrv_drain_all(void)
static void tracked_request_end(BdrvTrackedRequest *req)
{
if (req->serialising) {
- req->bs->serialising_in_flight--;
+ atomic_dec(&req->bs->serialising_in_flight);
}
+ qemu_co_mutex_lock(&req->bs->reqs_lock);
QLIST_REMOVE(req, list);
qemu_co_queue_restart_all(&req->wait_queue);
+ qemu_co_mutex_unlock(&req->bs->reqs_lock);
}
/**
@@ -404,7 +406,9 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
qemu_co_queue_init(&req->wait_queue);
+ qemu_co_mutex_lock(&bs->reqs_lock);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
+ qemu_co_mutex_unlock(&bs->reqs_lock);
}
static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
@@ -414,7 +418,7 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
- overlap_offset;
if (!req->serialising) {
- req->bs->serialising_in_flight++;
+ atomic_inc(&req->bs->serialising_in_flight);
req->serialising = true;
}
@@ -501,7 +505,8 @@ static void dummy_bh_cb(void *opaque)
void bdrv_wakeup(BlockDriverState *bs)
{
- if (bs->wakeup) {
+ /* The barrier (or an atomic op) is in the caller. */
+ if (atomic_read(&bs->wakeup)) {
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
}
}
@@ -519,12 +524,13 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
bool retry;
bool waited = false;
- if (!bs->serialising_in_flight) {
+ if (!atomic_read(&bs->serialising_in_flight)) {
return false;
}
do {
retry = false;
+ qemu_co_mutex_lock(&bs->reqs_lock);
QLIST_FOREACH(req, &bs->tracked_requests, list) {
if (req == self || (!req->serialising && !self->serialising)) {
continue;
@@ -543,7 +549,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
* (instead of producing a deadlock in the former case). */
if (!req->waiting_for) {
self->waiting_for = req;
- qemu_co_queue_wait(&req->wait_queue, NULL);
+ qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
self->waiting_for = NULL;
retry = true;
waited = true;
@@ -551,6 +557,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
}
}
}
+ qemu_co_mutex_unlock(&bs->reqs_lock);
} while (retry);
return waited;
@@ -1144,7 +1151,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
bdrv_inc_in_flight(bs);
/* Don't do copy-on-read if we read data before write operation */
- if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
+ if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
flags |= BDRV_REQ_COPY_ON_READ;
}
@@ -1401,12 +1408,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
- ++bs->write_gen;
+ atomic_inc(&bs->write_gen);
bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
- if (bs->wr_highest_offset < offset + bytes) {
- bs->wr_highest_offset = offset + bytes;
- }
+ stat64_max(&bs->wr_highest_offset, offset + bytes);
if (ret >= 0) {
bs->total_sectors = MAX(bs->total_sectors, end_sector);
@@ -2292,14 +2297,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
goto early_exit;
}
- current_gen = bs->write_gen;
+ qemu_co_mutex_lock(&bs->reqs_lock);
+ current_gen = atomic_read(&bs->write_gen);
/* Wait until any previous flushes are completed */
while (bs->active_flush_req) {
- qemu_co_queue_wait(&bs->flush_queue, NULL);
+ qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
}
+ /* Flushes reach this point in nondecreasing current_gen order. */
bs->active_flush_req = true;
+ qemu_co_mutex_unlock(&bs->reqs_lock);
/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
@@ -2371,9 +2379,12 @@ out:
if (ret == 0) {
bs->flushed_gen = current_gen;
}
+
+ qemu_co_mutex_lock(&bs->reqs_lock);
bs->active_flush_req = false;
/* Return value is ignored - it's ok if wait queue is empty */
qemu_co_queue_next(&bs->flush_queue);
+ qemu_co_mutex_unlock(&bs->reqs_lock);
early_exit:
bdrv_dec_in_flight(bs);
@@ -2517,7 +2528,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
}
ret = 0;
out:
- ++bs->write_gen;
+ atomic_inc(&bs->write_gen);
bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
req.bytes >> BDRV_SECTOR_BITS);
tracked_request_end(&req);
@@ -2644,7 +2655,7 @@ void bdrv_io_plug(BlockDriverState *bs)
bdrv_io_plug(child->bs);
}
- if (bs->io_plugged++ == 0) {
+ if (atomic_fetch_inc(&bs->io_plugged) == 0) {
BlockDriver *drv = bs->drv;
if (drv && drv->bdrv_io_plug) {
drv->bdrv_io_plug(bs);
@@ -2657,7 +2668,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
BdrvChild *child;
assert(bs->io_plugged);
- if (--bs->io_plugged == 0) {
+ if (atomic_fetch_dec(&bs->io_plugged) == 1) {
BlockDriver *drv = bs->drv;
if (drv && drv->bdrv_io_unplug) {
drv->bdrv_io_unplug(bs);
diff --git a/block/mirror.c b/block/mirror.c
index a2a970301c..19afcc6f1a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -342,6 +342,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
MAX_IO_SECTORS);
+ bdrv_dirty_bitmap_lock(s->dirty_bitmap);
sector_num = bdrv_dirty_iter_next(s->dbi);
if (sector_num < 0) {
bdrv_set_dirty_iter(s->dbi, 0);
@@ -349,6 +350,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
assert(sector_num >= 0);
}
+ bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
first_chunk = sector_num / sectors_per_chunk;
while (test_bit(first_chunk, s->in_flight_bitmap)) {
@@ -360,12 +362,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
/* Find the number of consective dirty chunks following the first dirty
* one, and wait for in flight requests in them. */
+ bdrv_dirty_bitmap_lock(s->dirty_bitmap);
while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
int64_t next_dirty;
int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
int64_t next_chunk = next_sector / sectors_per_chunk;
if (next_sector >= end ||
- !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+ !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_sector)) {
break;
}
if (test_bit(next_chunk, s->in_flight_bitmap)) {
@@ -386,8 +389,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
* calling bdrv_get_block_status_above could yield - if some blocks are
* marked dirty in this window, we need to know.
*/
- bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
- nb_chunks * sectors_per_chunk);
+ bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, sector_num,
+ nb_chunks * sectors_per_chunk);
+ bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
+
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
while (nb_chunks > 0 && sector_num < end) {
int64_t ret;
@@ -506,6 +511,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
Error *local_err = NULL;
+ bdrv_release_dirty_bitmap(src, s->dirty_bitmap);
+
/* Make sure that the source BDS doesn't go away before we called
* block_job_completed(). */
bdrv_ref(src);
@@ -904,7 +911,6 @@ immediate_exit:
g_free(s->cow_bitmap);
g_free(s->in_flight_bitmap);
bdrv_dirty_iter_free(s->dbi);
- bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
data = g_malloc(sizeof(*data));
data->ret = ret;
diff --git a/block/nfs.c b/block/nfs.c
index 848b2c0bb0..18c87d2f25 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -730,7 +730,9 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
if (task->ret < 0) {
error_report("NFS Error: %s", nfs_get_error(nfs));
}
- task->complete = 1;
+
+ /* Set task->complete before reading bs->wakeup. */
+ atomic_mb_set(&task->complete, 1);
bdrv_wakeup(task->bs);
}
diff --git a/block/qapi.c b/block/qapi.c
index a40922ea26..14b60ae66c 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -441,7 +441,7 @@ static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
s->node_name = g_strdup(bdrv_get_node_name(bs));
}
- s->stats->wr_highest_offset = bs->wr_highest_offset;
+ s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset);
if (bs->file) {
s->has_parent = true;
diff --git a/block/sheepdog.c b/block/sheepdog.c
index a18315a1ca..5ebf5d9fbb 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -698,7 +698,8 @@ out:
srco->co = NULL;
srco->ret = ret;
- srco->finished = true;
+ /* Set srco->finished before reading bs->wakeup. */
+ atomic_mb_set(&srco->finished, true);
if (srco->bs) {
bdrv_wakeup(srco->bs);
}
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index b73e7a800b..a181cb1dee 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -240,7 +240,7 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
bool must_wait;
- if (blkp->io_limits_disabled) {
+ if (atomic_read(&blkp->io_limits_disabled)) {
return false;
}
@@ -260,6 +260,25 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
return must_wait;
}
+/* Start the next pending I/O request for a BlockBackend. Return whether
+ * any request was actually pending.
+ *
+ * @blk: the current BlockBackend
+ * @is_write: the type of operation (read/write)
+ */
+static bool coroutine_fn throttle_group_co_restart_queue(BlockBackend *blk,
+ bool is_write)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ bool ret;
+
+ qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
+ ret = qemu_co_queue_next(&blkp->throttled_reqs[is_write]);
+ qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
+
+ return ret;
+}
+
/* Look for the next pending I/O request and schedule it.
*
* This assumes that tg->lock is held.
@@ -287,12 +306,12 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
if (!must_wait) {
/* Give preference to requests from the current blk */
if (qemu_in_coroutine() &&
- qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
+ throttle_group_co_restart_queue(blk, is_write)) {
token = blk;
} else {
ThrottleTimers *tt = &blk_get_public(token)->throttle_timers;
int64_t now = qemu_clock_get_ns(tt->clock_type);
- timer_mod(tt->timers[is_write], now + 1);
+ timer_mod(tt->timers[is_write], now);
tg->any_timer_armed[is_write] = true;
}
tg->tokens[is_write] = token;
@@ -326,7 +345,10 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
if (must_wait || blkp->pending_reqs[is_write]) {
blkp->pending_reqs[is_write]++;
qemu_mutex_unlock(&tg->lock);
- qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
+ qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
+ qemu_co_queue_wait(&blkp->throttled_reqs[is_write],
+ &blkp->throttled_reqs_lock);
+ qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
qemu_mutex_lock(&tg->lock);
blkp->pending_reqs[is_write]--;
}
@@ -340,15 +362,50 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
qemu_mutex_unlock(&tg->lock);
}
+typedef struct {
+ BlockBackend *blk;
+ bool is_write;
+} RestartData;
+
+static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
+{
+ RestartData *data = opaque;
+ BlockBackend *blk = data->blk;
+ bool is_write = data->is_write;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+ bool empty_queue;
+
+ empty_queue = !throttle_group_co_restart_queue(blk, is_write);
+
+ /* If the request queue was empty then we have to take care of
+ * scheduling the next one */
+ if (empty_queue) {
+ qemu_mutex_lock(&tg->lock);
+ schedule_next_request(blk, is_write);
+ qemu_mutex_unlock(&tg->lock);
+ }
+}
+
+static void throttle_group_restart_queue(BlockBackend *blk, bool is_write)
+{
+ Coroutine *co;
+ RestartData rd = {
+ .blk = blk,
+ .is_write = is_write
+ };
+
+ co = qemu_coroutine_create(throttle_group_restart_queue_entry, &rd);
+ aio_co_enter(blk_get_aio_context(blk), co);
+}
+
void throttle_group_restart_blk(BlockBackend *blk)
{
BlockBackendPublic *blkp = blk_get_public(blk);
- int i;
- for (i = 0; i < 2; i++) {
- while (qemu_co_enter_next(&blkp->throttled_reqs[i])) {
- ;
- }
+ if (blkp->throttle_state) {
+ throttle_group_restart_queue(blk, 0);
+ throttle_group_restart_queue(blk, 1);
}
}
@@ -376,8 +433,7 @@ void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
throttle_config(ts, tt, cfg);
qemu_mutex_unlock(&tg->lock);
- qemu_co_enter_next(&blkp->throttled_reqs[0]);
- qemu_co_enter_next(&blkp->throttled_reqs[1]);
+ throttle_group_restart_blk(blk);
}
/* Get the throttle configuration from a particular group. Similar to
@@ -408,7 +464,6 @@ static void timer_cb(BlockBackend *blk, bool is_write)
BlockBackendPublic *blkp = blk_get_public(blk);
ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
- bool empty_queue;
/* The timer has just been fired, so we can update the flag */
qemu_mutex_lock(&tg->lock);
@@ -416,17 +471,7 @@ static void timer_cb(BlockBackend *blk, bool is_write)
qemu_mutex_unlock(&tg->lock);
/* Run the request that was waiting for this timer */
- aio_context_acquire(blk_get_aio_context(blk));
- empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
- aio_context_release(blk_get_aio_context(blk));
-
- /* If the request queue was empty then we have to take care of
- * scheduling the next one */
- if (empty_queue) {
- qemu_mutex_lock(&tg->lock);
- schedule_next_request(blk, is_write);
- qemu_mutex_unlock(&tg->lock);
- }
+ throttle_group_restart_queue(blk, is_write);
}
static void read_timer_cb(void *opaque)