diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-11-12 17:22:06 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-11-12 17:22:06 +0000 |
commit | b2df6a79df6343d0ed4ea05d83b3ff1d849e8d25 (patch) | |
tree | dbdbb73641e92a85f29ff12b7d010320ef6cd619 /include | |
parent | cfcc7c144879ebe61ac2472216314fc1331b4450 (diff) | |
parent | aece5edc96f211eec6febdafc9bbbb99315a2efd (diff) |
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches (rebased Stefan's pull request)
# gpg: Signature made Thu 12 Nov 2015 15:34:16 GMT using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
* remotes/kevin/tags/for-upstream: (43 commits)
block: Update copyright of the accounting code
scsi-disk: Account for failed operations
macio: Account for failed operations
ide: Account for failed and invalid operations
atapi: Account for failed and invalid operations
xen_disk: Account for failed and invalid operations
virtio-blk: Account for failed and invalid operations
nvme: Account for failed and invalid operations
iotests: Add test for the block device statistics
block: Use QEMU_CLOCK_VIRTUAL for the accounting code in qtest mode
qemu-io: Account for failed, invalid and flush operations
block: New option to define the intervals for collecting I/O statistics
block: Add average I/O queue depth to BlockDeviceTimedStats
block: Compute minimum, maximum and average I/O latencies
block: Allow configuring whether to account failed and invalid ops
block: Add statistics for failed and invalid I/O operations
block: Add idle_time_ns to BlockDeviceStats
util: Infrastructure for computing recent averages
block: define 'clock_type' for the accounting code
ide: Account for write operations correctly
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/block/accounting.h | 28 | ||||
-rw-r--r-- | include/block/block.h | 18 | ||||
-rw-r--r-- | include/block/block_int.h | 23 | ||||
-rw-r--r-- | include/block/blockjob.h | 85 | ||||
-rw-r--r-- | include/qemu/timed-average.h | 64 |
5 files changed, 207 insertions, 11 deletions
diff --git a/include/block/accounting.h b/include/block/accounting.h index 66637cdfed..0f46cb4ec1 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -2,6 +2,7 @@ * QEMU System Emulator block accounting * * Copyright (c) 2011 Christoph Hellwig + * Copyright (c) 2015 Igalia, S.L. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -25,8 +26,12 @@ #define BLOCK_ACCOUNTING_H #include <stdint.h> +#include <stdbool.h> #include "qemu/typedefs.h" +#include "qemu/timed-average.h" + +typedef struct BlockAcctTimedStats BlockAcctTimedStats; enum BlockAcctType { BLOCK_ACCT_READ, @@ -35,11 +40,23 @@ enum BlockAcctType { BLOCK_MAX_IOTYPE, }; +struct BlockAcctTimedStats { + TimedAverage latency[BLOCK_MAX_IOTYPE]; + unsigned interval_length; /* in seconds */ + QSLIST_ENTRY(BlockAcctTimedStats) entries; +}; + typedef struct BlockAcctStats { uint64_t nr_bytes[BLOCK_MAX_IOTYPE]; uint64_t nr_ops[BLOCK_MAX_IOTYPE]; + uint64_t invalid_ops[BLOCK_MAX_IOTYPE]; + uint64_t failed_ops[BLOCK_MAX_IOTYPE]; uint64_t total_time_ns[BLOCK_MAX_IOTYPE]; uint64_t merged[BLOCK_MAX_IOTYPE]; + int64_t last_access_time_ns; + QSLIST_HEAD(, BlockAcctTimedStats) intervals; + bool account_invalid; + bool account_failed; } BlockAcctStats; typedef struct BlockAcctCookie { @@ -48,10 +65,21 @@ typedef struct BlockAcctCookie { enum BlockAcctType type; } BlockAcctCookie; +void block_acct_init(BlockAcctStats *stats, bool account_invalid, + bool account_failed); +void block_acct_cleanup(BlockAcctStats *stats); +void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length); +BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, + BlockAcctTimedStats *s); void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, int64_t bytes, enum BlockAcctType type); void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type); void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, int num_requests); +int64_t block_acct_idle_time_ns(BlockAcctStats *stats); +double block_acct_queue_depth(BlockAcctTimedStats *stats, + enum BlockAcctType type); #endif diff --git a/include/block/block.h b/include/block/block.h index 610db923d5..73edb1a79c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -14,6 +14,7 @@ typedef struct BlockDriver BlockDriver; typedef struct BlockJob BlockJob; typedef struct BdrvChild BdrvChild; typedef struct BdrvChildRole BdrvChildRole; +typedef struct BlockJobTxn BlockJobTxn; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ @@ -335,10 +336,18 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb); typedef struct BlockRequest { /* Fields to be filled by multiwrite caller */ - int64_t sector; - int nb_sectors; - int flags; - QEMUIOVector *qiov; + union { + struct { + int64_t sector; + int nb_sectors; + int flags; + QEMUIOVector *qiov; + }; + struct { + int req; + void *buf; + }; + }; BlockCompletionFunc *cb; void *opaque; @@ -493,7 +502,6 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int nr_sectors); -void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap); void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); diff --git a/include/block/block_int.h b/include/block/block_int.h index 603145a21d..4012e36437 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -60,11 +60,19 @@ #define BLOCK_PROBE_BUF_SIZE 512 +enum BdrvTrackedRequestType { + BDRV_TRACKED_READ, + BDRV_TRACKED_WRITE, + BDRV_TRACKED_FLUSH, + BDRV_TRACKED_IOCTL, + BDRV_TRACKED_DISCARD, +}; + typedef struct BdrvTrackedRequest { BlockDriverState *bs; int64_t offset; unsigned int bytes; - bool is_write; + enum BdrvTrackedRequestType type; bool serialising; int64_t overlap_offset; @@ -219,7 +227,6 @@ struct BlockDriver { void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); /* to control generic scsi devices */ - int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf); BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque); @@ -288,6 +295,12 @@ struct BlockDriver { */ int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); + /** + * Drain and stop any internal sources of requests in the driver, and + * remain so until next I/O callback (e.g. bdrv_co_writev) is called. + */ + void (*bdrv_drain)(BlockDriverState *bs); + QLIST_ENTRY(BlockDriver) list; }; @@ -656,6 +669,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, * @on_target_error: The action to take upon error writing to the target. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. + * @txn: Transaction that this job is part of (may be NULL). * * Start a backup operation on @bs. Clusters in @bs are written to @target * until the job is cancelled or manually completed. @@ -666,7 +680,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockCompletionFunc *cb, void *opaque, - Error **errp); + BlockJobTxn *txn, Error **errp); void blk_set_bs(BlockBackend *blk, BlockDriverState *bs); @@ -680,4 +694,7 @@ void blk_dev_resize_cb(BlockBackend *blk); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); bool bdrv_requests_pending(BlockDriverState *bs); +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); +void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); + #endif /* BLOCK_INT_H */ diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 289b13f0c0..d84ccd8d2c 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -50,6 +50,26 @@ typedef struct BlockJobDriver { * manually. */ void (*complete)(BlockJob *job, Error **errp); + + /** + * If the callback is not NULL, it will be invoked when all the jobs + * belonging to the same transaction complete; or upon this job's + * completion if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*commit)(BlockJob *job); + + /** + * If the callback is not NULL, it will be invoked when any job in the + * same transaction fails; or upon this job's failure (due to error or + * cancellation) if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*abort)(BlockJob *job); } BlockJobDriver; /** @@ -130,6 +150,21 @@ struct BlockJob { /** The opaque value that is passed to the completion function. */ void *opaque; + + /** Reference count of the block job */ + int refcnt; + + /* True if this job has reported completion by calling block_job_completed. + */ + bool completed; + + /* ret code passed to block_job_completed. + */ + int ret; + + /** Non-NULL if this job is part of a transaction */ + BlockJobTxn *txn; + QLIST_ENTRY(BlockJob) txn_list; }; /** @@ -174,12 +209,21 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); void block_job_yield(BlockJob *job); /** - * block_job_release: + * block_job_ref: * @bs: The block device. * - * Release job resources when an error occurred or job completed. + * Grab a reference to the block job. Should be paired with block_job_unref. */ -void block_job_release(BlockDriverState *bs); +void block_job_ref(BlockJob *job); + +/** + * block_job_unref: + * @bs: The block device. + * + * Release reference to the block job and release resources if it is the last + * reference. + */ +void block_job_unref(BlockJob *job); /** * block_job_completed: @@ -364,4 +408,39 @@ void block_job_defer_to_main_loop(BlockJob *job, BlockJobDeferToMainLoopFn *fn, void *opaque); +/** + * block_job_txn_new: + * + * Allocate and return a new block job transaction. Jobs can be added to the + * transaction using block_job_txn_add_job(). + * + * The transaction is automatically freed when the last job completes or is + * cancelled. + * + * All jobs in the transaction either complete successfully or fail/cancel as a + * group. Jobs wait for each other before completing. Cancelling one job + * cancels all jobs in the transaction. + */ +BlockJobTxn *block_job_txn_new(void); + +/** + * block_job_txn_unref: + * + * Release a reference that was previously acquired with block_job_txn_add_job + * or block_job_txn_new. If it's the last reference to the object, it will be + * freed. + */ +void block_job_txn_unref(BlockJobTxn *txn); + +/** + * block_job_txn_add_job: + * @txn: The transaction (may be NULL) + * @job: Job to add to the transaction + * + * Add @job to the transaction. The @job must not already be in a transaction. + * The caller must call either block_job_txn_unref() or block_job_completed() + * to release the reference that is automatically grabbed here. + */ +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job); + #endif diff --git a/include/qemu/timed-average.h b/include/qemu/timed-average.h new file mode 100644 index 0000000000..364bf88f70 --- /dev/null +++ b/include/qemu/timed-average.h @@ -0,0 +1,64 @@ +/* + * QEMU timed average computation + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * BenoƮt Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef TIMED_AVERAGE_H +#define TIMED_AVERAGE_H + +#include <stdint.h> + +#include "qemu/timer.h" + +typedef struct TimedAverageWindow TimedAverageWindow; +typedef struct TimedAverage TimedAverage; + +/* All fields of both structures are private */ + +struct TimedAverageWindow { + uint64_t min; /* minimum value accounted in the window */ + uint64_t max; /* maximum value accounted in the window */ + uint64_t sum; /* sum of all values */ + uint64_t count; /* number of values */ + int64_t expiration; /* the end of the current window in ns */ +}; + +struct TimedAverage { + uint64_t period; /* period in nanoseconds */ + TimedAverageWindow windows[2]; /* two overlapping windows of with + * an offset of period / 2 between them */ + unsigned current; /* the current window index: it's also the + * oldest window index */ + QEMUClockType clock_type; /* the clock used */ +}; + +void timed_average_init(TimedAverage *ta, QEMUClockType clock_type, + uint64_t period); + +void timed_average_account(TimedAverage *ta, uint64_t value); + +uint64_t timed_average_min(TimedAverage *ta); +uint64_t timed_average_avg(TimedAverage *ta); +uint64_t timed_average_max(TimedAverage *ta); +uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed); + +#endif |