diff options
Diffstat (limited to 'include/block/block-common.h')
-rw-r--r-- | include/block/block-common.h | 418 |
1 files changed, 418 insertions, 0 deletions
diff --git a/include/block/block-common.h b/include/block/block-common.h new file mode 100644 index 0000000000..0c5dc4a86a --- /dev/null +++ b/include/block/block-common.h @@ -0,0 +1,418 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_COMMON_H +#define BLOCK_COMMON_H + +#include "block/aio.h" +#include "block/aio-wait.h" +#include "qemu/iov.h" +#include "qemu/coroutine.h" +#include "block/accounting.h" +#include "block/dirty-bitmap.h" +#include "block/blockjob.h" +#include "qemu/hbitmap.h" +#include "qemu/transactions.h" + +/* + * generated_co_wrapper + * + * Function specifier, which does nothing but mark functions to be + * generated by scripts/block-coroutine-wrapper.py + * + * Read more in docs/devel/block-coroutine-wrapper.rst + */ +#define generated_co_wrapper + +/* block.c */ +typedef struct BlockDriver BlockDriver; +typedef struct BdrvChild BdrvChild; +typedef struct BdrvChildClass BdrvChildClass; + +typedef struct BlockDriverInfo { + /* in bytes, 0 if irrelevant */ + int cluster_size; + /* offset at which the VM state can be saved (0 if not possible) */ + int64_t vm_state_offset; + bool is_dirty; + /* + * True if this block driver only supports compressed writes + */ + bool needs_compressed_writes; +} BlockDriverInfo; + +typedef struct BlockFragInfo { + uint64_t allocated_clusters; + uint64_t total_clusters; + uint64_t fragmented_clusters; + uint64_t compressed_clusters; +} BlockFragInfo; + +typedef enum { + BDRV_REQ_COPY_ON_READ = 0x1, + BDRV_REQ_ZERO_WRITE = 0x2, + + /* + * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate + * that the block driver should unmap (discard) blocks if it is guaranteed + * that the result will read back as zeroes. The flag is only passed to the + * driver if the block device is opened with BDRV_O_UNMAP. + */ + BDRV_REQ_MAY_UNMAP = 0x4, + + BDRV_REQ_FUA = 0x10, + BDRV_REQ_WRITE_COMPRESSED = 0x20, + + /* + * Signifies that this write request will not change the visible disk + * content. + */ + BDRV_REQ_WRITE_UNCHANGED = 0x40, + + /* + * Forces request serialisation. Use only with write requests. + */ + BDRV_REQ_SERIALISING = 0x80, + + /* + * Execute the request only if the operation can be offloaded or otherwise + * be executed efficiently, but return an error instead of using a slow + * fallback. + */ + BDRV_REQ_NO_FALLBACK = 0x100, + + /* + * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read + * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR + * filter is involved), in which case it signals that the COR operation + * need not read the data into memory (qiov) but only ensure they are + * copied to the top layer (i.e., that COR operation is done). + */ + BDRV_REQ_PREFETCH = 0x200, + + /* + * If we need to wait for other requests, just fail immediately. Used + * only together with BDRV_REQ_SERIALISING. + */ + BDRV_REQ_NO_WAIT = 0x400, + + /* Mask of valid flags */ + BDRV_REQ_MASK = 0x7ff, +} BdrvRequestFlags; + +#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */ +#define BDRV_O_RDWR 0x0002 +#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ +#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save + writes in a snapshot */ +#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ +#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ +#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the + thread pool */ +#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ +#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ +#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ +#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */ +#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ +#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ +#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */ +#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given: + select an appropriate protocol driver, + ignoring the format layer */ +#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */ +#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening + read-write fails */ +#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */ + +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH) + + +/* Option names of options parsed by the block layer */ + +#define BDRV_OPT_CACHE_WB "cache.writeback" +#define BDRV_OPT_CACHE_DIRECT "cache.direct" +#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" +#define BDRV_OPT_READ_ONLY "read-only" +#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only" +#define BDRV_OPT_DISCARD "discard" +#define BDRV_OPT_FORCE_SHARE "force-share" + + +#define BDRV_SECTOR_BITS 9 +#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) + +#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \ + INT_MAX >> BDRV_SECTOR_BITS) +#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) + +/* + * We want allow aligning requests and disk length up to any 32bit alignment + * and don't afraid of overflow. + * To achieve it, and in the same time use some pretty number as maximum disk + * size, let's define maximum "length" (a limit for any offset/bytes request and + * for disk size) to be the greatest power of 2 less than INT64_MAX. + */ +#define BDRV_MAX_ALIGNMENT (1L << 30) +#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT)) + +/* + * Allocation status flags for bdrv_block_status() and friends. + * + * Public flags: + * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer + * BDRV_BLOCK_ZERO: offset reads as zero + * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data + * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this + * layer rather than any backing, set by block layer + * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this + * layer, set by block layer + * + * Internal flags: + * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request + * that the block layer recompute the answer from the returned + * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. + * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for + * zeroes in file child of current block node inside + * returned region. Only valid together with both + * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not + * appear with BDRV_BLOCK_ZERO. + * + * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the + * host offset within the returned BDS that is allocated for the + * corresponding raw guest data. However, whether that offset + * actually contains data also depends on BDRV_BLOCK_DATA, as follows: + * + * DATA ZERO OFFSET_VALID + * t t t sectors read as zero, returned file is zero at offset + * t f t sectors read as valid from file at offset + * f t t sectors preallocated, read as zero, returned file not + * necessarily zero at offset + * f f t sectors preallocated but read from backing_hd, + * returned file contains garbage at offset + * t t f sectors preallocated, read as zero, unknown offset + * t f f sectors read from unknown file or offset + * f t f not allocated or unknown offset, read as zero + * f f f not allocated or unknown offset, read from backing_hd + */ +#define BDRV_BLOCK_DATA 0x01 +#define BDRV_BLOCK_ZERO 0x02 +#define BDRV_BLOCK_OFFSET_VALID 0x04 +#define BDRV_BLOCK_RAW 0x08 +#define BDRV_BLOCK_ALLOCATED 0x10 +#define BDRV_BLOCK_EOF 0x20 +#define BDRV_BLOCK_RECURSE 0x40 + +typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; + +typedef struct BDRVReopenState { + BlockDriverState *bs; + int flags; + BlockdevDetectZeroesOptions detect_zeroes; + bool backing_missing; + BlockDriverState *old_backing_bs; /* keep pointer for permissions update */ + BlockDriverState *old_file_bs; /* keep pointer for permissions update */ + QDict *options; + QDict *explicit_options; + void *opaque; +} BDRVReopenState; + +/* + * Block operation types + */ +typedef enum BlockOpType { + BLOCK_OP_TYPE_BACKUP_SOURCE, + BLOCK_OP_TYPE_BACKUP_TARGET, + BLOCK_OP_TYPE_CHANGE, + BLOCK_OP_TYPE_COMMIT_SOURCE, + BLOCK_OP_TYPE_COMMIT_TARGET, + BLOCK_OP_TYPE_DATAPLANE, + BLOCK_OP_TYPE_DRIVE_DEL, + BLOCK_OP_TYPE_EJECT, + BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, + BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, + BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, + BLOCK_OP_TYPE_MIRROR_SOURCE, + BLOCK_OP_TYPE_MIRROR_TARGET, + BLOCK_OP_TYPE_RESIZE, + BLOCK_OP_TYPE_STREAM, + BLOCK_OP_TYPE_REPLACE, + BLOCK_OP_TYPE_MAX, +} BlockOpType; + +/* Block node permission constants */ +enum { + /** + * A user that has the "permission" of consistent reads is guaranteed that + * their view of the contents of the block device is complete and + * self-consistent, representing the contents of a disk at a specific + * point. + * + * For most block devices (including their backing files) this is true, but + * the property cannot be maintained in a few situations like for + * intermediate nodes of a commit block job. + */ + BLK_PERM_CONSISTENT_READ = 0x01, + + /** This permission is required to change the visible disk contents. */ + BLK_PERM_WRITE = 0x02, + + /** + * This permission (which is weaker than BLK_PERM_WRITE) is both enough and + * required for writes to the block node when the caller promises that + * the visible disk content doesn't change. + * + * As the BLK_PERM_WRITE permission is strictly stronger, either is + * sufficient to perform an unchanging write. + */ + BLK_PERM_WRITE_UNCHANGED = 0x04, + + /** This permission is required to change the size of a block node. */ + BLK_PERM_RESIZE = 0x08, + + /** + * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU + * 6.1 and earlier may still lock the corresponding byte in block/file-posix + * locking. So, implementing some new permission should be very careful to + * not interfere with this old unused thing. + */ + + BLK_PERM_ALL = 0x0f, + + DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_WRITE_UNCHANGED + | BLK_PERM_RESIZE, + + DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, +}; + +/* + * Flags that parent nodes assign to child nodes to specify what kind of + * role(s) they take. + * + * At least one of DATA, METADATA, FILTERED, or COW must be set for + * every child. + */ +enum BdrvChildRoleBits { + /* + * This child stores data. + * Any node may have an arbitrary number of such children. + */ + BDRV_CHILD_DATA = (1 << 0), + + /* + * This child stores metadata. + * Any node may have an arbitrary number of metadata-storing + * children. + */ + BDRV_CHILD_METADATA = (1 << 1), + + /* + * A child that always presents exactly the same visible data as + * the parent, e.g. by virtue of the parent forwarding all reads + * and writes. + * This flag is mutually exclusive with DATA, METADATA, and COW. + * Any node may have at most one filtered child at a time. + */ + BDRV_CHILD_FILTERED = (1 << 2), + + /* + * Child from which to read all data that isn't allocated in the + * parent (i.e., the backing child); such data is copied to the + * parent through COW (and optionally COR). + * This field is mutually exclusive with DATA, METADATA, and + * FILTERED. + * Any node may have at most one such backing child at a time. + */ + BDRV_CHILD_COW = (1 << 3), + + /* + * The primary child. For most drivers, this is the child whose + * filename applies best to the parent node. + * Any node may have at most one primary child at a time. + */ + BDRV_CHILD_PRIMARY = (1 << 4), + + /* Useful combination of flags */ + BDRV_CHILD_IMAGE = BDRV_CHILD_DATA + | BDRV_CHILD_METADATA + | BDRV_CHILD_PRIMARY, +}; + +/* Mask of BdrvChildRoleBits values */ +typedef unsigned int BdrvChildRole; + +typedef struct BdrvCheckResult { + int corruptions; + int leaks; + int check_errors; + int corruptions_fixed; + int leaks_fixed; + int64_t image_end_offset; + BlockFragInfo bfi; +} BdrvCheckResult; + +typedef enum { + BDRV_FIX_LEAKS = 1, + BDRV_FIX_ERRORS = 2, +} BdrvCheckMode; + +typedef struct BlockSizes { + uint32_t phys; + uint32_t log; +} BlockSizes; + +typedef struct HDGeometry { + uint32_t heads; + uint32_t sectors; + uint32_t cylinders; +} HDGeometry; + +/* + * Common functions that are neither I/O nor Global State. + * + * These functions must never call any function from other categories + * (I/O, "I/O or GS", Global State) except this one, but can be invoked by + * all of them. + */ + +char *bdrv_perm_names(uint64_t perm); +uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm); + +void bdrv_init_with_whitelist(void); +bool bdrv_uses_whitelist(void); +int bdrv_is_whitelisted(BlockDriver *drv, bool read_only); + +int bdrv_parse_aio(const char *mode, int *flags); +int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough); +int bdrv_parse_discard_flags(const char *mode, int *flags); + +int path_has_protocol(const char *path); +int path_is_absolute(const char *path); +char *path_combine(const char *base_path, const char *filename); + +char *bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + Error **errp); + +#endif /* BLOCK_COMMON_H */ |