diff options
Diffstat (limited to 'include/block')
-rw-r--r-- | include/block/aes.h | 26 | ||||
-rw-r--r-- | include/block/aio.h | 240 | ||||
-rw-r--r-- | include/block/block.h | 439 | ||||
-rw-r--r-- | include/block/block_int.h | 366 | ||||
-rw-r--r-- | include/block/blockjob.h | 278 | ||||
-rw-r--r-- | include/block/coroutine.h | 211 | ||||
-rw-r--r-- | include/block/coroutine_int.h | 49 | ||||
-rw-r--r-- | include/block/nbd.h | 100 | ||||
-rw-r--r-- | include/block/thread-pool.h | 34 |
9 files changed, 1743 insertions, 0 deletions
diff --git a/include/block/aes.h b/include/block/aes.h new file mode 100644 index 0000000000..a0167eb7d5 --- /dev/null +++ b/include/block/aes.h @@ -0,0 +1,26 @@ +#ifndef QEMU_AES_H +#define QEMU_AES_H + +#define AES_MAXNR 14 +#define AES_BLOCK_SIZE 16 + +struct aes_key_st { + uint32_t rd_key[4 *(AES_MAXNR + 1)]; + int rounds; +}; +typedef struct aes_key_st AES_KEY; + +int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +int AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); + +void AES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void AES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, + const unsigned long length, const AES_KEY *key, + unsigned char *ivec, const int enc); + +#endif diff --git a/include/block/aio.h b/include/block/aio.h new file mode 100644 index 0000000000..31884a8f16 --- /dev/null +++ b/include/block/aio.h @@ -0,0 +1,240 @@ +/* + * QEMU aio implementation + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_AIO_H +#define QEMU_AIO_H + +#include "qemu-common.h" +#include "qemu-queue.h" +#include "event_notifier.h" + +typedef struct BlockDriverAIOCB BlockDriverAIOCB; +typedef void BlockDriverCompletionFunc(void *opaque, int ret); + +typedef struct AIOCBInfo { + void (*cancel)(BlockDriverAIOCB *acb); + size_t aiocb_size; +} AIOCBInfo; + +struct BlockDriverAIOCB { + const AIOCBInfo *aiocb_info; + BlockDriverState *bs; + BlockDriverCompletionFunc *cb; + void *opaque; +}; + +void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); +void qemu_aio_release(void *p); + +typedef struct AioHandler AioHandler; +typedef void QEMUBHFunc(void *opaque); +typedef void IOHandler(void *opaque); + +typedef struct AioContext { + GSource source; + + /* The list of registered AIO handlers */ + QLIST_HEAD(, AioHandler) aio_handlers; + + /* This is a simple lock used to protect the aio_handlers list. + * Specifically, it's used to ensure that no callbacks are removed while + * we're walking and dispatching callbacks. + */ + int walking_handlers; + + /* Anchor of the list of Bottom Halves belonging to the context */ + struct QEMUBH *first_bh; + + /* A simple lock used to protect the first_bh list, and ensure that + * no callbacks are removed while we're walking and dispatching callbacks. + */ + int walking_bh; + + /* Used for aio_notify. */ + EventNotifier notifier; +} AioContext; + +/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ +typedef int (AioFlushEventNotifierHandler)(EventNotifier *e); + +/** + * aio_context_new: Allocate a new AioContext. + * + * AioContext provide a mini event-loop that can be waited on synchronously. + * They also provide bottom halves, a service to execute a piece of code + * as soon as possible. + */ +AioContext *aio_context_new(void); + +/** + * aio_context_ref: + * @ctx: The AioContext to operate on. + * + * Add a reference to an AioContext. + */ +void aio_context_ref(AioContext *ctx); + +/** + * aio_context_unref: + * @ctx: The AioContext to operate on. + * + * Drop a reference to an AioContext. + */ +void aio_context_unref(AioContext *ctx); + +/** + * aio_bh_new: Allocate a new bottom half structure. + * + * Bottom halves are lightweight callbacks whose invocation is guaranteed + * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure + * is opaque and must be allocated prior to its use. + */ +QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); + +/** + * aio_notify: Force processing of pending events. + * + * Similar to signaling a condition variable, aio_notify forces + * aio_wait to exit, so that the next call will re-examine pending events. + * The caller of aio_notify will usually call aio_wait again very soon, + * or go through another iteration of the GLib main loop. Hence, aio_notify + * also has the side effect of recalculating the sets of file descriptors + * that the main loop waits for. + * + * Calling aio_notify is rarely necessary, because for example scheduling + * a bottom half calls it already. + */ +void aio_notify(AioContext *ctx); + +/** + * aio_bh_poll: Poll bottom halves for an AioContext. + * + * These are internal functions used by the QEMU main loop. + */ +int aio_bh_poll(AioContext *ctx); + +/** + * qemu_bh_schedule: Schedule a bottom half. + * + * Scheduling a bottom half interrupts the main loop and causes the + * execution of the callback that was passed to qemu_bh_new. + * + * Bottom halves that are scheduled from a bottom half handler are instantly + * invoked. This can create an infinite loop if a bottom half handler + * schedules itself. + * + * @bh: The bottom half to be scheduled. + */ +void qemu_bh_schedule(QEMUBH *bh); + +/** + * qemu_bh_cancel: Cancel execution of a bottom half. + * + * Canceling execution of a bottom half undoes the effect of calls to + * qemu_bh_schedule without freeing its resources yet. While cancellation + * itself is also wait-free and thread-safe, it can of course race with the + * loop that executes bottom halves unless you are holding the iothread + * mutex. This makes it mostly useless if you are not holding the mutex. + * + * @bh: The bottom half to be canceled. + */ +void qemu_bh_cancel(QEMUBH *bh); + +/** + *qemu_bh_delete: Cancel execution of a bottom half and free its resources. + * + * Deleting a bottom half frees the memory that was allocated for it by + * qemu_bh_new. It also implies canceling the bottom half if it was + * scheduled. + * + * @bh: The bottom half to be deleted. + */ +void qemu_bh_delete(QEMUBH *bh); + +/* Return whether there are any pending callbacks from the GSource + * attached to the AioContext. + * + * This is used internally in the implementation of the GSource. + */ +bool aio_pending(AioContext *ctx); + +/* Progress in completing AIO work to occur. This can issue new pending + * aio as a result of executing I/O completion or bh callbacks. + * + * If there is no pending AIO operation or completion (bottom half), + * return false. If there are pending bottom halves, return true. + * + * If there are no pending bottom halves, but there are pending AIO + * operations, it may not be possible to make any progress without + * blocking. If @blocking is true, this function will wait until one + * or more AIO events have completed, to ensure something has moved + * before returning. + * + * If @blocking is false, this function will also return false if the + * function cannot make any progress without blocking. + */ +bool aio_poll(AioContext *ctx, bool blocking); + +#ifdef CONFIG_POSIX +/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ +typedef int (AioFlushHandler)(void *opaque); + +/* Register a file descriptor and associated callbacks. Behaves very similarly + * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will + * be invoked when using qemu_aio_wait(). + * + * Code that invokes AIO completion functions should rely on this function + * instead of qemu_set_fd_handler[2]. + */ +void aio_set_fd_handler(AioContext *ctx, + int fd, + IOHandler *io_read, + IOHandler *io_write, + AioFlushHandler *io_flush, + void *opaque); +#endif + +/* Register an event notifier and associated callbacks. Behaves very similarly + * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks + * will be invoked when using qemu_aio_wait(). + * + * Code that invokes AIO completion functions should rely on this function + * instead of event_notifier_set_handler. + */ +void aio_set_event_notifier(AioContext *ctx, + EventNotifier *notifier, + EventNotifierHandler *io_read, + AioFlushEventNotifierHandler *io_flush); + +/* Return a GSource that lets the main loop poll the file descriptors attached + * to this AioContext. + */ +GSource *aio_get_g_source(AioContext *ctx); + +/* Functions to operate on the main QEMU AioContext. */ + +bool qemu_aio_wait(void); +void qemu_aio_set_event_notifier(EventNotifier *notifier, + EventNotifierHandler *io_read, + AioFlushEventNotifierHandler *io_flush); + +#ifdef CONFIG_POSIX +void qemu_aio_set_fd_handler(int fd, + IOHandler *io_read, + IOHandler *io_write, + AioFlushHandler *io_flush, + void *opaque); +#endif + +#endif diff --git a/include/block/block.h b/include/block/block.h new file mode 100644 index 0000000000..d49ce4dbc5 --- /dev/null +++ b/include/block/block.h @@ -0,0 +1,439 @@ +#ifndef BLOCK_H +#define BLOCK_H + +#include "block/aio.h" +#include "qemu-common.h" +#include "qemu-option.h" +#include "block/coroutine.h" +#include "qapi/qmp/qobject.h" +#include "qapi-types.h" + +/* block.c */ +typedef struct BlockDriver BlockDriver; +typedef struct BlockJob BlockJob; + +typedef struct BlockDriverInfo { + /* in bytes, 0 if irrelevant */ + int cluster_size; + /* offset at which the VM state can be saved (0 if not possible) */ + int64_t vm_state_offset; + bool is_dirty; +} BlockDriverInfo; + +typedef struct BlockFragInfo { + uint64_t allocated_clusters; + uint64_t total_clusters; + uint64_t fragmented_clusters; +} BlockFragInfo; + +typedef struct QEMUSnapshotInfo { + char id_str[128]; /* unique snapshot id */ + /* the following fields are informative. They are not needed for + the consistency of the snapshot */ + char name[256]; /* user chosen name */ + uint64_t vm_state_size; /* VM state info size */ + uint32_t date_sec; /* UTC date of the snapshot */ + uint32_t date_nsec; + uint64_t vm_clock_nsec; /* VM clock relative to boot */ +} QEMUSnapshotInfo; + +/* Callbacks for block device models */ +typedef struct BlockDevOps { + /* + * Runs when virtual media changed (monitor commands eject, change) + * Argument load is true on load and false on eject. + * Beware: doesn't run when a host device's physical media + * changes. Sure would be useful if it did. + * Device models with removable media must implement this callback. + */ + void (*change_media_cb)(void *opaque, bool load); + /* + * Runs when an eject request is issued from the monitor, the tray + * is closed, and the medium is locked. + * Device models that do not implement is_medium_locked will not need + * this callback. Device models that can lock the medium or tray might + * want to implement the callback and unlock the tray when "force" is + * true, even if they do not support eject requests. + */ + void (*eject_request_cb)(void *opaque, bool force); + /* + * Is the virtual tray open? + * Device models implement this only when the device has a tray. + */ + bool (*is_tray_open)(void *opaque); + /* + * Is the virtual medium locked into the device? + * Device models implement this only when device has such a lock. + */ + bool (*is_medium_locked)(void *opaque); + /* + * Runs when the size changed (e.g. monitor command block_resize) + */ + void (*resize_cb)(void *opaque); +} BlockDevOps; + +#define BDRV_O_RDWR 0x0002 +#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ +#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ +#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */ +#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ +#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ +#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ +#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ +#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */ +#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ +#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ + +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH) + +#define BDRV_SECTOR_BITS 9 +#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) +#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) + +typedef enum { + BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP +} BlockErrorAction; + +typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; + +typedef struct BDRVReopenState { + BlockDriverState *bs; + int flags; + void *opaque; +} BDRVReopenState; + + +void bdrv_iostatus_enable(BlockDriverState *bs); +void bdrv_iostatus_reset(BlockDriverState *bs); +void bdrv_iostatus_disable(BlockDriverState *bs); +bool bdrv_iostatus_is_enabled(const BlockDriverState *bs); +void bdrv_iostatus_set_err(BlockDriverState *bs, int error); +void bdrv_info_print(Monitor *mon, const QObject *data); +void bdrv_info(Monitor *mon, QObject **ret_data); +void bdrv_stats_print(Monitor *mon, const QObject *data); +void bdrv_info_stats(Monitor *mon, QObject **ret_data); + +/* disk I/O throttling */ +void bdrv_io_limits_enable(BlockDriverState *bs); +void bdrv_io_limits_disable(BlockDriverState *bs); +bool bdrv_io_limits_enabled(BlockDriverState *bs); + +void bdrv_init(void); +void bdrv_init_with_whitelist(void); +BlockDriver *bdrv_find_protocol(const char *filename); +BlockDriver *bdrv_find_format(const char *format_name); +BlockDriver *bdrv_find_whitelisted_format(const char *format_name); +int bdrv_create(BlockDriver *drv, const char* filename, + QEMUOptionParameter *options); +int bdrv_create_file(const char* filename, QEMUOptionParameter *options); +BlockDriverState *bdrv_new(const char *device_name); +void bdrv_make_anon(BlockDriverState *bs); +void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old); +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); +void bdrv_delete(BlockDriverState *bs); +int bdrv_parse_cache_flags(const char *mode, int *flags); +int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags); +int bdrv_open_backing_file(BlockDriverState *bs); +int bdrv_open(BlockDriverState *bs, const char *filename, int flags, + BlockDriver *drv); +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, int flags); +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp); +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); +void bdrv_reopen_commit(BDRVReopenState *reopen_state); +void bdrv_reopen_abort(BDRVReopenState *reopen_state); +void bdrv_close(BlockDriverState *bs); +void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify); +int bdrv_attach_dev(BlockDriverState *bs, void *dev); +void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev); +void bdrv_detach_dev(BlockDriverState *bs, void *dev); +void *bdrv_get_attached_dev(BlockDriverState *bs); +void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, + void *opaque); +void bdrv_dev_eject_request(BlockDriverState *bs, bool force); +bool bdrv_dev_has_removable_media(BlockDriverState *bs); +bool bdrv_dev_is_tray_open(BlockDriverState *bs); +bool bdrv_dev_is_medium_locked(BlockDriverState *bs); +int bdrv_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors); +int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors); +int bdrv_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors); +int bdrv_pread(BlockDriverState *bs, int64_t offset, + void *buf, int count); +int bdrv_pwrite(BlockDriverState *bs, int64_t offset, + const void *buf, int count); +int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, + const void *buf, int count); +int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov); +int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); +int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov); +/* + * Efficiently zero a region of the disk image. Note that this is a regular + * I/O request like read or write and should have a reasonable size. This + * function is not suitable for zeroing the entire image in a single request + * because it may allocate memory for the entire region. + */ +int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors); +int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum); +int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, + BlockDriverState *base, + int64_t sector_num, + int nb_sectors, int *pnum); +BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, + const char *backing_file); +int bdrv_get_backing_file_depth(BlockDriverState *bs); +int bdrv_truncate(BlockDriverState *bs, int64_t offset); +int64_t bdrv_getlength(BlockDriverState *bs); +int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); +void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); +int bdrv_commit(BlockDriverState *bs); +int bdrv_commit_all(void); +int bdrv_change_backing_file(BlockDriverState *bs, + const char *backing_file, const char *backing_fmt); +void bdrv_register(BlockDriver *bdrv); +int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, + BlockDriverState *base); +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs); +BlockDriverState *bdrv_find_base(BlockDriverState *bs); + + +typedef struct BdrvCheckResult { + int corruptions; + int leaks; + int check_errors; + int corruptions_fixed; + int leaks_fixed; + BlockFragInfo bfi; +} BdrvCheckResult; + +typedef enum { + BDRV_FIX_LEAKS = 1, + BDRV_FIX_ERRORS = 2, +} BdrvCheckMode; + +int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); + +/* async block I/O */ +typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, + int sector_num); +BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); +BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); +BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); +BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); +void bdrv_aio_cancel(BlockDriverAIOCB *acb); + +typedef struct BlockRequest { + /* Fields to be filled by multiwrite caller */ + int64_t sector; + int nb_sectors; + QEMUIOVector *qiov; + BlockDriverCompletionFunc *cb; + void *opaque; + + /* Filled by multiwrite implementation */ + int error; +} BlockRequest; + +int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, + int num_reqs); + +/* sg packet commands */ +int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf); +BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockDriverCompletionFunc *cb, void *opaque); + +/* Invalidate any cached metadata used by image formats */ +void bdrv_invalidate_cache(BlockDriverState *bs); +void bdrv_invalidate_cache_all(void); + +void bdrv_clear_incoming_migration_all(void); + +/* Ensure contents are flushed to disk. */ +int bdrv_flush(BlockDriverState *bs); +int coroutine_fn bdrv_co_flush(BlockDriverState *bs); +void bdrv_flush_all(void); +void bdrv_close_all(void); +void bdrv_drain_all(void); + +int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); +int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); +int bdrv_has_zero_init(BlockDriverState *bs); +int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, + int *pnum); + +void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read); +BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error); +void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, + bool is_read, int error); +int bdrv_is_read_only(BlockDriverState *bs); +int bdrv_is_sg(BlockDriverState *bs); +int bdrv_enable_write_cache(BlockDriverState *bs); +void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce); +int bdrv_is_inserted(BlockDriverState *bs); +int bdrv_media_changed(BlockDriverState *bs); +void bdrv_lock_medium(BlockDriverState *bs, bool locked); +void bdrv_eject(BlockDriverState *bs, bool eject_flag); +const char *bdrv_get_format_name(BlockDriverState *bs); +BlockDriverState *bdrv_find(const char *name); +BlockDriverState *bdrv_next(BlockDriverState *bs); +void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), + void *opaque); +int bdrv_is_encrypted(BlockDriverState *bs); +int bdrv_key_required(BlockDriverState *bs); +int bdrv_set_key(BlockDriverState *bs, const char *key); +int bdrv_query_missing_keys(void); +void bdrv_iterate_format(void (*it)(void *opaque, const char *name), + void *opaque); +const char *bdrv_get_device_name(BlockDriverState *bs); +int bdrv_get_flags(BlockDriverState *bs); +int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors); +int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); + +const char *bdrv_get_encrypted_filename(BlockDriverState *bs); +void bdrv_get_backing_filename(BlockDriverState *bs, + char *filename, int filename_size); +void bdrv_get_full_backing_filename(BlockDriverState *bs, + char *dest, size_t sz); +BlockInfo *bdrv_query_info(BlockDriverState *s); +BlockStats *bdrv_query_stats(const BlockDriverState *bs); +int bdrv_can_snapshot(BlockDriverState *bs); +int bdrv_is_snapshot(BlockDriverState *bs); +BlockDriverState *bdrv_snapshots(void); +int bdrv_snapshot_create(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info); +int bdrv_snapshot_goto(BlockDriverState *bs, + const char *snapshot_id); +int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id); +int bdrv_snapshot_list(BlockDriverState *bs, + QEMUSnapshotInfo **psn_info); +int bdrv_snapshot_load_tmp(BlockDriverState *bs, + const char *snapshot_name); +char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn); + +char *get_human_readable_size(char *buf, int buf_size, int64_t size); +int path_is_absolute(const char *path); +void path_combine(char *dest, int dest_size, + const char *base_path, + const char *filename); + +int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, + int64_t pos, int size); + +int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size); + +void bdrv_img_create(const char *filename, const char *fmt, + const char *base_filename, const char *base_fmt, + char *options, uint64_t img_size, int flags, Error **errp); + +void bdrv_set_buffer_alignment(BlockDriverState *bs, int align); +void *qemu_blockalign(BlockDriverState *bs, size_t size); + +#define BDRV_SECTORS_PER_DIRTY_CHUNK 2048 + +void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable); +int bdrv_get_dirty(BlockDriverState *bs, int64_t sector); +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector); +int64_t bdrv_get_dirty_count(BlockDriverState *bs); + +void bdrv_enable_copy_on_read(BlockDriverState *bs); +void bdrv_disable_copy_on_read(BlockDriverState *bs); + +void bdrv_set_in_use(BlockDriverState *bs, int in_use); +int bdrv_in_use(BlockDriverState *bs); + +enum BlockAcctType { + BDRV_ACCT_READ, + BDRV_ACCT_WRITE, + BDRV_ACCT_FLUSH, + BDRV_MAX_IOTYPE, +}; + +typedef struct BlockAcctCookie { + int64_t bytes; + int64_t start_time_ns; + enum BlockAcctType type; +} BlockAcctCookie; + +void bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, + int64_t bytes, enum BlockAcctType type); +void bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie); + +typedef enum { + BLKDBG_L1_UPDATE, + + BLKDBG_L1_GROW_ALLOC_TABLE, + BLKDBG_L1_GROW_WRITE_TABLE, + BLKDBG_L1_GROW_ACTIVATE_TABLE, + + BLKDBG_L2_LOAD, + BLKDBG_L2_UPDATE, + BLKDBG_L2_UPDATE_COMPRESSED, + BLKDBG_L2_ALLOC_COW_READ, + BLKDBG_L2_ALLOC_WRITE, + + BLKDBG_READ_AIO, + BLKDBG_READ_BACKING_AIO, + BLKDBG_READ_COMPRESSED, + + BLKDBG_WRITE_AIO, + BLKDBG_WRITE_COMPRESSED, + + BLKDBG_VMSTATE_LOAD, + BLKDBG_VMSTATE_SAVE, + + BLKDBG_COW_READ, + BLKDBG_COW_WRITE, + + BLKDBG_REFTABLE_LOAD, + BLKDBG_REFTABLE_GROW, + + BLKDBG_REFBLOCK_LOAD, + BLKDBG_REFBLOCK_UPDATE, + BLKDBG_REFBLOCK_UPDATE_PART, + BLKDBG_REFBLOCK_ALLOC, + BLKDBG_REFBLOCK_ALLOC_HOOKUP, + BLKDBG_REFBLOCK_ALLOC_WRITE, + BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS, + BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE, + BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE, + + BLKDBG_CLUSTER_ALLOC, + BLKDBG_CLUSTER_ALLOC_BYTES, + BLKDBG_CLUSTER_FREE, + + BLKDBG_EVENT_MAX, +} BlkDebugEvent; + +#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt) +void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event); + +int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag); +int bdrv_debug_resume(BlockDriverState *bs, const char *tag); +bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); + +#endif diff --git a/include/block/block_int.h b/include/block/block_int.h new file mode 100644 index 0000000000..d06de2637b --- /dev/null +++ b/include/block/block_int.h @@ -0,0 +1,366 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_INT_H +#define BLOCK_INT_H + +#include "block/block.h" +#include "qemu-option.h" +#include "qemu-queue.h" +#include "block/coroutine.h" +#include "qemu-timer.h" +#include "qapi-types.h" +#include "qapi/qmp/qerror.h" +#include "monitor.h" + +#define BLOCK_FLAG_ENCRYPT 1 +#define BLOCK_FLAG_COMPAT6 4 +#define BLOCK_FLAG_LAZY_REFCOUNTS 8 + +#define BLOCK_IO_LIMIT_READ 0 +#define BLOCK_IO_LIMIT_WRITE 1 +#define BLOCK_IO_LIMIT_TOTAL 2 + +#define BLOCK_IO_SLICE_TIME 100000000 +#define NANOSECONDS_PER_SECOND 1000000000.0 + +#define BLOCK_OPT_SIZE "size" +#define BLOCK_OPT_ENCRYPT "encryption" +#define BLOCK_OPT_COMPAT6 "compat6" +#define BLOCK_OPT_BACKING_FILE "backing_file" +#define BLOCK_OPT_BACKING_FMT "backing_fmt" +#define BLOCK_OPT_CLUSTER_SIZE "cluster_size" +#define BLOCK_OPT_TABLE_SIZE "table_size" +#define BLOCK_OPT_PREALLOC "preallocation" +#define BLOCK_OPT_SUBFMT "subformat" +#define BLOCK_OPT_COMPAT_LEVEL "compat" +#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" + +typedef struct BdrvTrackedRequest BdrvTrackedRequest; + +typedef struct BlockIOLimit { + int64_t bps[3]; + int64_t iops[3]; +} BlockIOLimit; + +typedef struct BlockIOBaseValue { + uint64_t bytes[2]; + uint64_t ios[2]; +} BlockIOBaseValue; + +struct BlockDriver { + const char *format_name; + int instance_size; + int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); + int (*bdrv_probe_device)(const char *filename); + + /* For handling image reopen for split or non-split files */ + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + + int (*bdrv_open)(BlockDriverState *bs, int flags); + int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags); + int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors); + int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors); + void (*bdrv_close)(BlockDriverState *bs); + void (*bdrv_rebind)(BlockDriverState *bs); + int (*bdrv_create)(const char *filename, QEMUOptionParameter *options); + int (*bdrv_set_key)(BlockDriverState *bs, const char *key); + int (*bdrv_make_empty)(BlockDriverState *bs); + /* aio */ + BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); + BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); + BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); + BlockDriverAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); + + int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + /* + * Efficiently zero a region of the disk image. Typically an image format + * would use a compact metadata representation to implement this. This + * function pointer may be NULL and .bdrv_co_writev() will be called + * instead. + */ + int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors); + int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors); + int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *pnum); + + /* + * Invalidate any cached meta-data. + */ + void (*bdrv_invalidate_cache)(BlockDriverState *bs); + + /* + * Flushes all data that was already written to the OS all the way down to + * the disk (for example raw-posix calls fsync()). + */ + int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); + + /* + * Flushes all internal caches to the OS. The data may still sit in a + * writeback cache of the host OS, but it will survive a crash of the qemu + * process. + */ + int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); + + const char *protocol_name; + int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset); + int64_t (*bdrv_getlength)(BlockDriverState *bs); + int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); + int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors); + + int (*bdrv_snapshot_create)(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info); + int (*bdrv_snapshot_goto)(BlockDriverState *bs, + const char *snapshot_id); + int (*bdrv_snapshot_delete)(BlockDriverState *bs, const char *snapshot_id); + int (*bdrv_snapshot_list)(BlockDriverState *bs, + QEMUSnapshotInfo **psn_info); + int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, + const char *snapshot_name); + int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); + + int (*bdrv_save_vmstate)(BlockDriverState *bs, const uint8_t *buf, + int64_t pos, int size); + int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size); + + int (*bdrv_change_backing_file)(BlockDriverState *bs, + const char *backing_file, const char *backing_fmt); + + /* removable device specific */ + int (*bdrv_is_inserted)(BlockDriverState *bs); + int (*bdrv_media_changed)(BlockDriverState *bs); + void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); + void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); + + /* to control generic scsi devices */ + int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf); + BlockDriverAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockDriverCompletionFunc *cb, void *opaque); + + /* List of options for creating images, terminated by name == NULL */ + QEMUOptionParameter *create_options; + + + /* + * Returns 0 for completed check, -errno for internal errors. + * The check results are stored in result. + */ + int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result, + BdrvCheckMode fix); + + void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event); + + /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ + int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, + const char *tag); + int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); + bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); + + /* + * Returns 1 if newly created images are guaranteed to contain only + * zeros, 0 otherwise. + */ + int (*bdrv_has_zero_init)(BlockDriverState *bs); + + QLIST_ENTRY(BlockDriver) list; +}; + +/* + * Note: the function bdrv_append() copies and swaps contents of + * BlockDriverStates, so if you add new fields to this struct, please + * inspect bdrv_append() to determine if the new fields need to be + * copied as well. + */ +struct BlockDriverState { + int64_t total_sectors; /* if we are reading a disk image, give its + size in sectors */ + int read_only; /* if true, the media is read only */ + int open_flags; /* flags used to open the file, re-used for re-open */ + int encrypted; /* if true, the media is encrypted */ + int valid_key; /* if true, a valid encryption key has been set */ + int sg; /* if true, the device is a /dev/sg* */ + int copy_on_read; /* if true, copy read backing sectors into image + note this is a reference count */ + + BlockDriver *drv; /* NULL means no media */ + void *opaque; + + void *dev; /* attached device model, if any */ + /* TODO change to DeviceState when all users are qdevified */ + const BlockDevOps *dev_ops; + void *dev_opaque; + + char filename[1024]; + char backing_file[1024]; /* if non zero, the image is a diff of + this file image */ + char backing_format[16]; /* if non-zero and backing_file exists */ + int is_temporary; + + BlockDriverState *backing_hd; + BlockDriverState *file; + + NotifierList close_notifiers; + + /* number of in-flight copy-on-read requests */ + unsigned int copy_on_read_in_flight; + + /* the time for latest disk I/O */ + int64_t slice_time; + int64_t slice_start; + int64_t slice_end; + BlockIOLimit io_limits; + BlockIOBaseValue io_base; + CoQueue throttled_reqs; + QEMUTimer *block_timer; + bool io_limits_enabled; + + /* I/O stats (display with "info blockstats"). */ + uint64_t nr_bytes[BDRV_MAX_IOTYPE]; + uint64_t nr_ops[BDRV_MAX_IOTYPE]; + uint64_t total_time_ns[BDRV_MAX_IOTYPE]; + uint64_t wr_highest_sector; + + /* Whether the disk can expand beyond total_sectors */ + int growable; + + /* the memory alignment required for the buffers handled by this driver */ + int buffer_alignment; + + /* do we need to tell the quest if we have a volatile write cache? */ + int enable_write_cache; + + /* NOTE: the following infos are only hints for real hardware + drivers. They are not used by the block driver */ + BlockdevOnError on_read_error, on_write_error; + bool iostatus_enabled; + BlockDeviceIoStatus iostatus; + char device_name[32]; + unsigned long *dirty_bitmap; + int64_t dirty_count; + int in_use; /* users other than guest access, eg. block migration */ + QTAILQ_ENTRY(BlockDriverState) list; + + QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; + + /* long-running background operation */ + BlockJob *job; + +}; + +int get_tmp_filename(char *filename, int size); + +void bdrv_set_io_limits(BlockDriverState *bs, + BlockIOLimit *io_limits); + +#ifdef _WIN32 +int is_windows_drive(const char *filename); +#endif +void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, + enum MonitorEvent ev, + BlockErrorAction action, bool is_read); + +/** + * stream_start: + * @bs: Block device to operate on. + * @base: Block device that will become the new base, or %NULL to + * flatten the whole backing file chain onto @bs. + * @base_id: The file name that will be written to @bs as the new + * backing file if the job completes. Ignored if @base is %NULL. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Start a streaming operation on @bs. Clusters that are unallocated + * in @bs, but allocated in any image between @base and @bs (both + * exclusive) will be written to @bs. At the end of a successful + * streaming job, the backing file of @bs will be changed to + * @base_id in the written image and to @base in the live BlockDriverState. + */ +void stream_start(BlockDriverState *bs, BlockDriverState *base, + const char *base_id, int64_t speed, BlockdevOnError on_error, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + +/** + * commit_start: + * @bs: Top Block device + * @base: Block device that will be written into, and become the new top + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + */ +void commit_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverState *top, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + +/* + * mirror_start: + * @bs: Block device to operate on. + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @mode: Whether to collapse all images in the chain to the target. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Start a mirroring operation on @bs. Clusters that are allocated + * in @bs will be written to @bs until the job is cancelled or + * manually completed. At the end of a successful mirroring job, + * @bs will be switched to read from @target. + */ +void mirror_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, MirrorSyncMode mode, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + +#endif /* BLOCK_INT_H */ diff --git a/include/block/blockjob.h b/include/block/blockjob.h new file mode 100644 index 0000000000..c290d07bba --- /dev/null +++ b/include/block/blockjob.h @@ -0,0 +1,278 @@ +/* + * Declarations for long-running block device operations + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCKJOB_H +#define BLOCKJOB_H 1 + +#include "block/block.h" + +/** + * BlockJobType: + * + * A class type for block job objects. + */ +typedef struct BlockJobType { + /** Derived BlockJob struct size */ + size_t instance_size; + + /** String describing the operation, part of query-block-jobs QMP API */ + const char *job_type; + + /** Optional callback for job types that support setting a speed limit */ + void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); + + /** Optional callback for job types that need to forward I/O status reset */ + void (*iostatus_reset)(BlockJob *job); + + /** + * Optional callback for job types whose completion must be triggered + * manually. + */ + void (*complete)(BlockJob *job, Error **errp); +} BlockJobType; + +/** + * BlockJob: + * + * Long-running operation on a BlockDriverState. + */ +struct BlockJob { + /** The job type, including the job vtable. */ + const BlockJobType *job_type; + + /** The block device on which the job is operating. */ + BlockDriverState *bs; + + /** + * The coroutine that executes the job. If not NULL, it is + * reentered when busy is false and the job is cancelled. + */ + Coroutine *co; + + /** + * Set to true if the job should cancel itself. The flag must + * always be tested just before toggling the busy flag from false + * to true. After a job has been cancelled, it should only yield + * if #qemu_aio_wait will ("sooner or later") reenter the coroutine. + */ + bool cancelled; + + /** + * Set to true if the job is either paused, or will pause itself + * as soon as possible (if busy == true). + */ + bool paused; + + /** + * Set to false by the job while it is in a quiescent state, where + * no I/O is pending and the job has yielded on any condition + * that is not detected by #qemu_aio_wait, such as a timer. + */ + bool busy; + + /** Status that is published by the query-block-jobs QMP API */ + BlockDeviceIoStatus iostatus; + + /** Offset that is published by the query-block-jobs QMP API */ + int64_t offset; + + /** Length that is published by the query-block-jobs QMP API */ + int64_t len; + + /** Speed that was set with @block_job_set_speed. */ + int64_t speed; + + /** The completion function that will be called when the job completes. */ + BlockDriverCompletionFunc *cb; + + /** The opaque value that is passed to the completion function. */ + void *opaque; +}; + +/** + * block_job_create: + * @job_type: The class object for the newly-created job. + * @bs: The block + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Create a new long-running block device job and return it. The job + * will call @cb asynchronously when the job completes. Note that + * @bs may have been closed at the time the @cb it is called. If + * this is the case, the job may be reported as either cancelled or + * completed. + * + * This function is not part of the public job interface; it should be + * called from a wrapper that is specific to the job type. + */ +void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, + int64_t speed, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + +/** + * block_job_sleep_ns: + * @job: The job that calls the function. + * @clock: The clock to sleep on. + * @ns: How many nanoseconds to stop for. + * + * Put the job to sleep (assuming that it wasn't canceled) for @ns + * nanoseconds. Canceling the job will interrupt the wait immediately. + */ +void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns); + +/** + * block_job_completed: + * @job: The job being completed. + * @ret: The status code. + * + * Call the completion function that was registered at creation time, and + * free @job. + */ +void block_job_completed(BlockJob *job, int ret); + +/** + * block_job_set_speed: + * @job: The job to set the speed for. + * @speed: The new value + * @errp: Error object. + * + * Set a rate-limiting parameter for the job; the actual meaning may + * vary depending on the job type. + */ +void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); + +/** + * block_job_cancel: + * @job: The job to be canceled. + * + * Asynchronously cancel the specified job. + */ +void block_job_cancel(BlockJob *job); + +/** + * block_job_complete: + * @job: The job to be completed. + * @errp: Error object. + * + * Asynchronously complete the specified job. + */ +void block_job_complete(BlockJob *job, Error **errp); + +/** + * block_job_is_cancelled: + * @job: The job being queried. + * + * Returns whether the job is scheduled for cancellation. + */ +bool block_job_is_cancelled(BlockJob *job); + +/** + * block_job_query: + * @job: The job to get information about. + * + * Return information about a job. + */ +BlockJobInfo *block_job_query(BlockJob *job); + +/** + * block_job_pause: + * @job: The job to be paused. + * + * Asynchronously pause the specified job. + */ +void block_job_pause(BlockJob *job); + +/** + * block_job_resume: + * @job: The job to be resumed. + * + * Resume the specified job. + */ +void block_job_resume(BlockJob *job); + +/** + * qobject_from_block_job: + * @job: The job whose information is requested. + * + * Return a QDict corresponding to @job's query-block-jobs entry. + */ +QObject *qobject_from_block_job(BlockJob *job); + +/** + * block_job_ready: + * @job: The job which is now ready to complete. + * + * Send a BLOCK_JOB_READY event for the specified job. + */ +void block_job_ready(BlockJob *job); + +/** + * block_job_is_paused: + * @job: The job being queried. + * + * Returns whether the job is currently paused, or will pause + * as soon as it reaches a sleeping point. + */ +bool block_job_is_paused(BlockJob *job); + +/** + * block_job_cancel_sync: + * @job: The job to be canceled. + * + * Synchronously cancel the job. The completion callback is called + * before the function returns. The job may actually complete + * instead of canceling itself; the circumstances under which this + * happens depend on the kind of job that is active. + * + * Returns the return value from the job if the job actually completed + * during the call, or -ECANCELED if it was canceled. + */ +int block_job_cancel_sync(BlockJob *job); + +/** + * block_job_iostatus_reset: + * @job: The job whose I/O status should be reset. + * + * Reset I/O status on @job and on BlockDriverState objects it uses, + * other than job->bs. + */ +void block_job_iostatus_reset(BlockJob *job); + +/** + * block_job_error_action: + * @job: The job to signal an error for. + * @bs: The block device on which to set an I/O error. + * @on_err: The error action setting. + * @is_read: Whether the operation was a read. + * @error: The error that was reported. + * + * Report an I/O error for a block job and possibly stop the VM. Return the + * action that was selected based on @on_err and @error. + */ +BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, + BlockdevOnError on_err, + int is_read, int error); +#endif diff --git a/include/block/coroutine.h b/include/block/coroutine.h new file mode 100644 index 0000000000..34c15d4116 --- /dev/null +++ b/include/block/coroutine.h @@ -0,0 +1,211 @@ +/* + * QEMU coroutine implementation + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * Kevin Wolf <kwolf@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef QEMU_COROUTINE_H +#define QEMU_COROUTINE_H + +#include <stdbool.h> +#include "qemu-queue.h" +#include "qemu-timer.h" + +/** + * Coroutines are a mechanism for stack switching and can be used for + * cooperative userspace threading. These functions provide a simple but + * useful flavor of coroutines that is suitable for writing sequential code, + * rather than callbacks, for operations that need to give up control while + * waiting for events to complete. + * + * These functions are re-entrant and may be used outside the global mutex. + */ + +/** + * Mark a function that executes in coroutine context + * + * Functions that execute in coroutine context cannot be called directly from + * normal functions. In the future it would be nice to enable compiler or + * static checker support for catching such errors. This annotation might make + * it possible and in the meantime it serves as documentation. + * + * For example: + * + * static void coroutine_fn foo(void) { + * .... + * } + */ +#define coroutine_fn + +typedef struct Coroutine Coroutine; + +/** + * Coroutine entry point + * + * When the coroutine is entered for the first time, opaque is passed in as an + * argument. + * + * When this function returns, the coroutine is destroyed automatically and + * execution continues in the caller who last entered the coroutine. + */ +typedef void coroutine_fn CoroutineEntry(void *opaque); + +/** + * Create a new coroutine + * + * Use qemu_coroutine_enter() to actually transfer control to the coroutine. + */ +Coroutine *qemu_coroutine_create(CoroutineEntry *entry); + +/** + * Transfer control to a coroutine + * + * The opaque argument is passed as the argument to the entry point when + * entering the coroutine for the first time. It is subsequently ignored. + */ +void qemu_coroutine_enter(Coroutine *coroutine, void *opaque); + +/** + * Transfer control back to a coroutine's caller + * + * This function does not return until the coroutine is re-entered using + * qemu_coroutine_enter(). + */ +void coroutine_fn qemu_coroutine_yield(void); + +/** + * Get the currently executing coroutine + */ +Coroutine *coroutine_fn qemu_coroutine_self(void); + +/** + * Return whether or not currently inside a coroutine + * + * This can be used to write functions that work both when in coroutine context + * and when not in coroutine context. Note that such functions cannot use the + * coroutine_fn annotation since they work outside coroutine context. + */ +bool qemu_in_coroutine(void); + + + +/** + * CoQueues are a mechanism to queue coroutines in order to continue executing + * them later. They provide the fundamental primitives on which coroutine locks + * are built. + */ +typedef struct CoQueue { + QTAILQ_HEAD(, Coroutine) entries; +} CoQueue; + +/** + * Initialise a CoQueue. This must be called before any other operation is used + * on the CoQueue. + */ +void qemu_co_queue_init(CoQueue *queue); + +/** + * Adds the current coroutine to the CoQueue and transfers control to the + * caller of the coroutine. + */ +void coroutine_fn qemu_co_queue_wait(CoQueue *queue); + +/** + * Adds the current coroutine to the head of the CoQueue and transfers control to the + * caller of the coroutine. + */ +void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue); + +/** + * Restarts the next coroutine in the CoQueue and removes it from the queue. + * + * Returns true if a coroutine was restarted, false if the queue is empty. + */ +bool qemu_co_queue_next(CoQueue *queue); + +/** + * Restarts all coroutines in the CoQueue and leaves the queue empty. + */ +void qemu_co_queue_restart_all(CoQueue *queue); + +/** + * Checks if the CoQueue is empty. + */ +bool qemu_co_queue_empty(CoQueue *queue); + + +/** + * Provides a mutex that can be used to synchronise coroutines + */ +typedef struct CoMutex { + bool locked; + CoQueue queue; +} CoMutex; + +/** + * Initialises a CoMutex. This must be called before any other operation is used + * on the CoMutex. + */ +void qemu_co_mutex_init(CoMutex *mutex); + +/** + * Locks the mutex. If the lock cannot be taken immediately, control is + * transferred to the caller of the current coroutine. + */ +void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex); + +/** + * Unlocks the mutex and schedules the next coroutine that was waiting for this + * lock to be run. + */ +void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex); + +typedef struct CoRwlock { + bool writer; + int reader; + CoQueue queue; +} CoRwlock; + +/** + * Initialises a CoRwlock. This must be called before any other operation + * is used on the CoRwlock + */ +void qemu_co_rwlock_init(CoRwlock *lock); + +/** + * Read locks the CoRwlock. If the lock cannot be taken immediately because + * of a parallel writer, control is transferred to the caller of the current + * coroutine. + */ +void qemu_co_rwlock_rdlock(CoRwlock *lock); + +/** + * Write Locks the mutex. If the lock cannot be taken immediately because + * of a parallel reader, control is transferred to the caller of the current + * coroutine. + */ +void qemu_co_rwlock_wrlock(CoRwlock *lock); + +/** + * Unlocks the read/write lock and schedules the next coroutine that was + * waiting for this lock to be run. + */ +void qemu_co_rwlock_unlock(CoRwlock *lock); + +/** + * Yield the coroutine for a given duration + * + * Note this function uses timers and hence only works when a main loop is in + * use. See main-loop.h and do not use from qemu-tool programs. + */ +void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns); + +#endif /* QEMU_COROUTINE_H */ diff --git a/include/block/coroutine_int.h b/include/block/coroutine_int.h new file mode 100644 index 0000000000..282a3ceda6 --- /dev/null +++ b/include/block/coroutine_int.h @@ -0,0 +1,49 @@ +/* + * Coroutine internals + * + * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_COROUTINE_INT_H +#define QEMU_COROUTINE_INT_H + +#include "qemu-queue.h" +#include "block/coroutine.h" + +typedef enum { + COROUTINE_YIELD = 1, + COROUTINE_TERMINATE = 2, +} CoroutineAction; + +struct Coroutine { + CoroutineEntry *entry; + void *entry_arg; + Coroutine *caller; + QSLIST_ENTRY(Coroutine) pool_next; + QTAILQ_ENTRY(Coroutine) co_queue_next; +}; + +Coroutine *qemu_coroutine_new(void); +void qemu_coroutine_delete(Coroutine *co); +CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to, + CoroutineAction action); + +#endif diff --git a/include/block/nbd.h b/include/block/nbd.h new file mode 100644 index 0000000000..344f05b794 --- /dev/null +++ b/include/block/nbd.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> + * + * Network Block Device + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef NBD_H +#define NBD_H + +#include <sys/types.h> + +#include "qemu-common.h" + +struct nbd_request { + uint32_t magic; + uint32_t type; + uint64_t handle; + uint64_t from; + uint32_t len; +} QEMU_PACKED; + +struct nbd_reply { + uint32_t magic; + uint32_t error; + uint64_t handle; +} QEMU_PACKED; + +#define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */ +#define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */ +#define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */ +#define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */ +#define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */ +#define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ + +#define NBD_CMD_MASK_COMMAND 0x0000ffff +#define NBD_CMD_FLAG_FUA (1 << 16) + +enum { + NBD_CMD_READ = 0, + NBD_CMD_WRITE = 1, + NBD_CMD_DISC = 2, + NBD_CMD_FLUSH = 3, + NBD_CMD_TRIM = 4 +}; + +#define NBD_DEFAULT_PORT 10809 + +#define NBD_BUFFER_SIZE (1024*1024) + +ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read); +int tcp_socket_outgoing(const char *address, uint16_t port); +int tcp_socket_incoming(const char *address, uint16_t port); +int tcp_socket_outgoing_spec(const char *address_and_port); +int tcp_socket_incoming_spec(const char *address_and_port); +int unix_socket_outgoing(const char *path); +int unix_socket_incoming(const char *path); + +int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, + off_t *size, size_t *blocksize); +int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize); +ssize_t nbd_send_request(int csock, struct nbd_request *request); +ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply); +int nbd_client(int fd); +int nbd_disconnect(int fd); + +typedef struct NBDExport NBDExport; +typedef struct NBDClient NBDClient; + +NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, + off_t size, uint32_t nbdflags, + void (*close)(NBDExport *)); +void nbd_export_close(NBDExport *exp); +void nbd_export_get(NBDExport *exp); +void nbd_export_put(NBDExport *exp); + +BlockDriverState *nbd_export_get_blockdev(NBDExport *exp); + +NBDExport *nbd_export_find(const char *name); +void nbd_export_set_name(NBDExport *exp, const char *name); +void nbd_export_close_all(void); + +NBDClient *nbd_client_new(NBDExport *exp, int csock, + void (*close)(NBDClient *)); +void nbd_client_close(NBDClient *client); +void nbd_client_get(NBDClient *client); +void nbd_client_put(NBDClient *client); + +#endif diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h new file mode 100644 index 0000000000..a87b287081 --- /dev/null +++ b/include/block/thread-pool.h @@ -0,0 +1,34 @@ +/* + * QEMU block layer thread pool + * + * Copyright IBM, Corp. 2008 + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef QEMU_THREAD_POOL_H +#define QEMU_THREAD_POOL_H 1 + +#include "qemu-common.h" +#include "qemu-queue.h" +#include "qemu-thread.h" +#include "block/coroutine.h" +#include "block/block_int.h" + +typedef int ThreadPoolFunc(void *opaque); + +BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, + BlockDriverCompletionFunc *cb, void *opaque); +int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg); +void thread_pool_submit(ThreadPoolFunc *func, void *arg); + +#endif |