aboutsummaryrefslogtreecommitdiff
path: root/include/block
diff options
context:
space:
mode:
Diffstat (limited to 'include/block')
-rw-r--r--include/block/aes.h26
-rw-r--r--include/block/aio.h240
-rw-r--r--include/block/block.h439
-rw-r--r--include/block/block_int.h366
-rw-r--r--include/block/blockjob.h278
-rw-r--r--include/block/coroutine.h211
-rw-r--r--include/block/coroutine_int.h49
-rw-r--r--include/block/nbd.h100
-rw-r--r--include/block/thread-pool.h34
9 files changed, 1743 insertions, 0 deletions
diff --git a/include/block/aes.h b/include/block/aes.h
new file mode 100644
index 0000000000..a0167eb7d5
--- /dev/null
+++ b/include/block/aes.h
@@ -0,0 +1,26 @@
+#ifndef QEMU_AES_H
+#define QEMU_AES_H
+
+#define AES_MAXNR 14
+#define AES_BLOCK_SIZE 16
+
+struct aes_key_st {
+ uint32_t rd_key[4 *(AES_MAXNR + 1)];
+ int rounds;
+};
+typedef struct aes_key_st AES_KEY;
+
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned long length, const AES_KEY *key,
+ unsigned char *ivec, const int enc);
+
+#endif
diff --git a/include/block/aio.h b/include/block/aio.h
new file mode 100644
index 0000000000..31884a8f16
--- /dev/null
+++ b/include/block/aio.h
@@ -0,0 +1,240 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_AIO_H
+#define QEMU_AIO_H
+
+#include "qemu-common.h"
+#include "qemu-queue.h"
+#include "event_notifier.h"
+
+typedef struct BlockDriverAIOCB BlockDriverAIOCB;
+typedef void BlockDriverCompletionFunc(void *opaque, int ret);
+
+typedef struct AIOCBInfo {
+ void (*cancel)(BlockDriverAIOCB *acb);
+ size_t aiocb_size;
+} AIOCBInfo;
+
+struct BlockDriverAIOCB {
+ const AIOCBInfo *aiocb_info;
+ BlockDriverState *bs;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+};
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void qemu_aio_release(void *p);
+
+typedef struct AioHandler AioHandler;
+typedef void QEMUBHFunc(void *opaque);
+typedef void IOHandler(void *opaque);
+
+typedef struct AioContext {
+ GSource source;
+
+ /* The list of registered AIO handlers */
+ QLIST_HEAD(, AioHandler) aio_handlers;
+
+ /* This is a simple lock used to protect the aio_handlers list.
+ * Specifically, it's used to ensure that no callbacks are removed while
+ * we're walking and dispatching callbacks.
+ */
+ int walking_handlers;
+
+ /* Anchor of the list of Bottom Halves belonging to the context */
+ struct QEMUBH *first_bh;
+
+ /* A simple lock used to protect the first_bh list, and ensure that
+ * no callbacks are removed while we're walking and dispatching callbacks.
+ */
+ int walking_bh;
+
+ /* Used for aio_notify. */
+ EventNotifier notifier;
+} AioContext;
+
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushEventNotifierHandler)(EventNotifier *e);
+
+/**
+ * aio_context_new: Allocate a new AioContext.
+ *
+ * AioContext provide a mini event-loop that can be waited on synchronously.
+ * They also provide bottom halves, a service to execute a piece of code
+ * as soon as possible.
+ */
+AioContext *aio_context_new(void);
+
+/**
+ * aio_context_ref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Add a reference to an AioContext.
+ */
+void aio_context_ref(AioContext *ctx);
+
+/**
+ * aio_context_unref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Drop a reference to an AioContext.
+ */
+void aio_context_unref(AioContext *ctx);
+
+/**
+ * aio_bh_new: Allocate a new bottom half structure.
+ *
+ * Bottom halves are lightweight callbacks whose invocation is guaranteed
+ * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
+ * is opaque and must be allocated prior to its use.
+ */
+QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+
+/**
+ * aio_notify: Force processing of pending events.
+ *
+ * Similar to signaling a condition variable, aio_notify forces
+ * aio_wait to exit, so that the next call will re-examine pending events.
+ * The caller of aio_notify will usually call aio_wait again very soon,
+ * or go through another iteration of the GLib main loop. Hence, aio_notify
+ * also has the side effect of recalculating the sets of file descriptors
+ * that the main loop waits for.
+ *
+ * Calling aio_notify is rarely necessary, because for example scheduling
+ * a bottom half calls it already.
+ */
+void aio_notify(AioContext *ctx);
+
+/**
+ * aio_bh_poll: Poll bottom halves for an AioContext.
+ *
+ * These are internal functions used by the QEMU main loop.
+ */
+int aio_bh_poll(AioContext *ctx);
+
+/**
+ * qemu_bh_schedule: Schedule a bottom half.
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked. This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ */
+void qemu_bh_schedule(QEMUBH *bh);
+
+/**
+ * qemu_bh_cancel: Cancel execution of a bottom half.
+ *
+ * Canceling execution of a bottom half undoes the effect of calls to
+ * qemu_bh_schedule without freeing its resources yet. While cancellation
+ * itself is also wait-free and thread-safe, it can of course race with the
+ * loop that executes bottom halves unless you are holding the iothread
+ * mutex. This makes it mostly useless if you are not holding the mutex.
+ *
+ * @bh: The bottom half to be canceled.
+ */
+void qemu_bh_cancel(QEMUBH *bh);
+
+/**
+ *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
+ *
+ * Deleting a bottom half frees the memory that was allocated for it by
+ * qemu_bh_new. It also implies canceling the bottom half if it was
+ * scheduled.
+ *
+ * @bh: The bottom half to be deleted.
+ */
+void qemu_bh_delete(QEMUBH *bh);
+
+/* Return whether there are any pending callbacks from the GSource
+ * attached to the AioContext.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_pending(AioContext *ctx);
+
+/* Progress in completing AIO work to occur. This can issue new pending
+ * aio as a result of executing I/O completion or bh callbacks.
+ *
+ * If there is no pending AIO operation or completion (bottom half),
+ * return false. If there are pending bottom halves, return true.
+ *
+ * If there are no pending bottom halves, but there are pending AIO
+ * operations, it may not be possible to make any progress without
+ * blocking. If @blocking is true, this function will wait until one
+ * or more AIO events have completed, to ensure something has moved
+ * before returning.
+ *
+ * If @blocking is false, this function will also return false if the
+ * function cannot make any progress without blocking.
+ */
+bool aio_poll(AioContext *ctx, bool blocking);
+
+#ifdef CONFIG_POSIX
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushHandler)(void *opaque);
+
+/* Register a file descriptor and associated callbacks. Behaves very similarly
+ * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
+ * be invoked when using qemu_aio_wait().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of qemu_set_fd_handler[2].
+ */
+void aio_set_fd_handler(AioContext *ctx,
+ int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque);
+#endif
+
+/* Register an event notifier and associated callbacks. Behaves very similarly
+ * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
+ * will be invoked when using qemu_aio_wait().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of event_notifier_set_handler.
+ */
+void aio_set_event_notifier(AioContext *ctx,
+ EventNotifier *notifier,
+ EventNotifierHandler *io_read,
+ AioFlushEventNotifierHandler *io_flush);
+
+/* Return a GSource that lets the main loop poll the file descriptors attached
+ * to this AioContext.
+ */
+GSource *aio_get_g_source(AioContext *ctx);
+
+/* Functions to operate on the main QEMU AioContext. */
+
+bool qemu_aio_wait(void);
+void qemu_aio_set_event_notifier(EventNotifier *notifier,
+ EventNotifierHandler *io_read,
+ AioFlushEventNotifierHandler *io_flush);
+
+#ifdef CONFIG_POSIX
+void qemu_aio_set_fd_handler(int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque);
+#endif
+
+#endif
diff --git a/include/block/block.h b/include/block/block.h
new file mode 100644
index 0000000000..d49ce4dbc5
--- /dev/null
+++ b/include/block/block.h
@@ -0,0 +1,439 @@
+#ifndef BLOCK_H
+#define BLOCK_H
+
+#include "block/aio.h"
+#include "qemu-common.h"
+#include "qemu-option.h"
+#include "block/coroutine.h"
+#include "qapi/qmp/qobject.h"
+#include "qapi-types.h"
+
+/* block.c */
+typedef struct BlockDriver BlockDriver;
+typedef struct BlockJob BlockJob;
+
+typedef struct BlockDriverInfo {
+ /* in bytes, 0 if irrelevant */
+ int cluster_size;
+ /* offset at which the VM state can be saved (0 if not possible) */
+ int64_t vm_state_offset;
+ bool is_dirty;
+} BlockDriverInfo;
+
+typedef struct BlockFragInfo {
+ uint64_t allocated_clusters;
+ uint64_t total_clusters;
+ uint64_t fragmented_clusters;
+} BlockFragInfo;
+
+typedef struct QEMUSnapshotInfo {
+ char id_str[128]; /* unique snapshot id */
+ /* the following fields are informative. They are not needed for
+ the consistency of the snapshot */
+ char name[256]; /* user chosen name */
+ uint64_t vm_state_size; /* VM state info size */
+ uint32_t date_sec; /* UTC date of the snapshot */
+ uint32_t date_nsec;
+ uint64_t vm_clock_nsec; /* VM clock relative to boot */
+} QEMUSnapshotInfo;
+
+/* Callbacks for block device models */
+typedef struct BlockDevOps {
+ /*
+ * Runs when virtual media changed (monitor commands eject, change)
+ * Argument load is true on load and false on eject.
+ * Beware: doesn't run when a host device's physical media
+ * changes. Sure would be useful if it did.
+ * Device models with removable media must implement this callback.
+ */
+ void (*change_media_cb)(void *opaque, bool load);
+ /*
+ * Runs when an eject request is issued from the monitor, the tray
+ * is closed, and the medium is locked.
+ * Device models that do not implement is_medium_locked will not need
+ * this callback. Device models that can lock the medium or tray might
+ * want to implement the callback and unlock the tray when "force" is
+ * true, even if they do not support eject requests.
+ */
+ void (*eject_request_cb)(void *opaque, bool force);
+ /*
+ * Is the virtual tray open?
+ * Device models implement this only when the device has a tray.
+ */
+ bool (*is_tray_open)(void *opaque);
+ /*
+ * Is the virtual medium locked into the device?
+ * Device models implement this only when device has such a lock.
+ */
+ bool (*is_medium_locked)(void *opaque);
+ /*
+ * Runs when the size changed (e.g. monitor command block_resize)
+ */
+ void (*resize_cb)(void *opaque);
+} BlockDevOps;
+
+#define BDRV_O_RDWR 0x0002
+#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
+#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
+#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
+#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
+#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
+#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
+#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
+#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
+#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
+
+#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
+
+#define BDRV_SECTOR_BITS 9
+#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
+#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
+
+typedef enum {
+ BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
+} BlockErrorAction;
+
+typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+ BlockDriverState *bs;
+ int flags;
+ void *opaque;
+} BDRVReopenState;
+
+
+void bdrv_iostatus_enable(BlockDriverState *bs);
+void bdrv_iostatus_reset(BlockDriverState *bs);
+void bdrv_iostatus_disable(BlockDriverState *bs);
+bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
+void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
+void bdrv_info_print(Monitor *mon, const QObject *data);
+void bdrv_info(Monitor *mon, QObject **ret_data);
+void bdrv_stats_print(Monitor *mon, const QObject *data);
+void bdrv_info_stats(Monitor *mon, QObject **ret_data);
+
+/* disk I/O throttling */
+void bdrv_io_limits_enable(BlockDriverState *bs);
+void bdrv_io_limits_disable(BlockDriverState *bs);
+bool bdrv_io_limits_enabled(BlockDriverState *bs);
+
+void bdrv_init(void);
+void bdrv_init_with_whitelist(void);
+BlockDriver *bdrv_find_protocol(const char *filename);
+BlockDriver *bdrv_find_format(const char *format_name);
+BlockDriver *bdrv_find_whitelisted_format(const char *format_name);
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QEMUOptionParameter *options);
+int bdrv_create_file(const char* filename, QEMUOptionParameter *options);
+BlockDriverState *bdrv_new(const char *device_name);
+void bdrv_make_anon(BlockDriverState *bs);
+void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+void bdrv_delete(BlockDriverState *bs);
+int bdrv_parse_cache_flags(const char *mode, int *flags);
+int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
+int bdrv_open_backing_file(BlockDriverState *bs);
+int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
+ BlockDriver *drv);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, int flags);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+void bdrv_reopen_abort(BDRVReopenState *reopen_state);
+void bdrv_close(BlockDriverState *bs);
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify);
+int bdrv_attach_dev(BlockDriverState *bs, void *dev);
+void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
+void bdrv_detach_dev(BlockDriverState *bs, void *dev);
+void *bdrv_get_attached_dev(BlockDriverState *bs);
+void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
+ void *opaque);
+void bdrv_dev_eject_request(BlockDriverState *bs, bool force);
+bool bdrv_dev_has_removable_media(BlockDriverState *bs);
+bool bdrv_dev_is_tray_open(BlockDriverState *bs);
+bool bdrv_dev_is_medium_locked(BlockDriverState *bs);
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+int bdrv_pread(BlockDriverState *bs, int64_t offset,
+ void *buf, int count);
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count);
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count);
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+/*
+ * Efficiently zero a region of the disk image. Note that this is a regular
+ * I/O request like read or write and should have a reasonable size. This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors);
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum);
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum);
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file);
+int bdrv_get_backing_file_depth(BlockDriverState *bs);
+int bdrv_truncate(BlockDriverState *bs, int64_t offset);
+int64_t bdrv_getlength(BlockDriverState *bs);
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
+int bdrv_commit(BlockDriverState *bs);
+int bdrv_commit_all(void);
+int bdrv_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+ BlockDriverState *base);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
+
+
+typedef struct BdrvCheckResult {
+ int corruptions;
+ int leaks;
+ int check_errors;
+ int corruptions_fixed;
+ int leaks_fixed;
+ BlockFragInfo bfi;
+} BdrvCheckResult;
+
+typedef enum {
+ BDRV_FIX_LEAKS = 1,
+ BDRV_FIX_ERRORS = 2,
+} BdrvCheckMode;
+
+int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
+
+/* async block I/O */
+typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
+ int sector_num);
+BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void bdrv_aio_cancel(BlockDriverAIOCB *acb);
+
+typedef struct BlockRequest {
+ /* Fields to be filled by multiwrite caller */
+ int64_t sector;
+ int nb_sectors;
+ QEMUIOVector *qiov;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+
+ /* Filled by multiwrite implementation */
+ int error;
+} BlockRequest;
+
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs,
+ int num_reqs);
+
+/* sg packet commands */
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf);
+BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+/* Invalidate any cached metadata used by image formats */
+void bdrv_invalidate_cache(BlockDriverState *bs);
+void bdrv_invalidate_cache_all(void);
+
+void bdrv_clear_incoming_migration_all(void);
+
+/* Ensure contents are flushed to disk. */
+int bdrv_flush(BlockDriverState *bs);
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+void bdrv_flush_all(void);
+void bdrv_close_all(void);
+void bdrv_drain_all(void);
+
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_has_zero_init(BlockDriverState *bs);
+int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ int *pnum);
+
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+ bool is_read, int error);
+int bdrv_is_read_only(BlockDriverState *bs);
+int bdrv_is_sg(BlockDriverState *bs);
+int bdrv_enable_write_cache(BlockDriverState *bs);
+void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
+int bdrv_is_inserted(BlockDriverState *bs);
+int bdrv_media_changed(BlockDriverState *bs);
+void bdrv_lock_medium(BlockDriverState *bs, bool locked);
+void bdrv_eject(BlockDriverState *bs, bool eject_flag);
+const char *bdrv_get_format_name(BlockDriverState *bs);
+BlockDriverState *bdrv_find(const char *name);
+BlockDriverState *bdrv_next(BlockDriverState *bs);
+void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs),
+ void *opaque);
+int bdrv_is_encrypted(BlockDriverState *bs);
+int bdrv_key_required(BlockDriverState *bs);
+int bdrv_set_key(BlockDriverState *bs, const char *key);
+int bdrv_query_missing_keys(void);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque);
+const char *bdrv_get_device_name(BlockDriverState *bs);
+int bdrv_get_flags(BlockDriverState *bs);
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size);
+void bdrv_get_full_backing_filename(BlockDriverState *bs,
+ char *dest, size_t sz);
+BlockInfo *bdrv_query_info(BlockDriverState *s);
+BlockStats *bdrv_query_stats(const BlockDriverState *bs);
+int bdrv_can_snapshot(BlockDriverState *bs);
+int bdrv_is_snapshot(BlockDriverState *bs);
+BlockDriverState *bdrv_snapshots(void);
+int bdrv_snapshot_create(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+int bdrv_snapshot_goto(BlockDriverState *bs,
+ const char *snapshot_id);
+int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
+int bdrv_snapshot_list(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+int bdrv_snapshot_load_tmp(BlockDriverState *bs,
+ const char *snapshot_name);
+char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn);
+
+char *get_human_readable_size(char *buf, int buf_size, int64_t size);
+int path_is_absolute(const char *path);
+void path_combine(char *dest, int dest_size,
+ const char *base_path,
+ const char *filename);
+
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size);
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags, Error **errp);
+
+void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+
+#define BDRV_SECTORS_PER_DIRTY_CHUNK 2048
+
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable);
+int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector);
+int64_t bdrv_get_dirty_count(BlockDriverState *bs);
+
+void bdrv_enable_copy_on_read(BlockDriverState *bs);
+void bdrv_disable_copy_on_read(BlockDriverState *bs);
+
+void bdrv_set_in_use(BlockDriverState *bs, int in_use);
+int bdrv_in_use(BlockDriverState *bs);
+
+enum BlockAcctType {
+ BDRV_ACCT_READ,
+ BDRV_ACCT_WRITE,
+ BDRV_ACCT_FLUSH,
+ BDRV_MAX_IOTYPE,
+};
+
+typedef struct BlockAcctCookie {
+ int64_t bytes;
+ int64_t start_time_ns;
+ enum BlockAcctType type;
+} BlockAcctCookie;
+
+void bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
+ int64_t bytes, enum BlockAcctType type);
+void bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie);
+
+typedef enum {
+ BLKDBG_L1_UPDATE,
+
+ BLKDBG_L1_GROW_ALLOC_TABLE,
+ BLKDBG_L1_GROW_WRITE_TABLE,
+ BLKDBG_L1_GROW_ACTIVATE_TABLE,
+
+ BLKDBG_L2_LOAD,
+ BLKDBG_L2_UPDATE,
+ BLKDBG_L2_UPDATE_COMPRESSED,
+ BLKDBG_L2_ALLOC_COW_READ,
+ BLKDBG_L2_ALLOC_WRITE,
+
+ BLKDBG_READ_AIO,
+ BLKDBG_READ_BACKING_AIO,
+ BLKDBG_READ_COMPRESSED,
+
+ BLKDBG_WRITE_AIO,
+ BLKDBG_WRITE_COMPRESSED,
+
+ BLKDBG_VMSTATE_LOAD,
+ BLKDBG_VMSTATE_SAVE,
+
+ BLKDBG_COW_READ,
+ BLKDBG_COW_WRITE,
+
+ BLKDBG_REFTABLE_LOAD,
+ BLKDBG_REFTABLE_GROW,
+
+ BLKDBG_REFBLOCK_LOAD,
+ BLKDBG_REFBLOCK_UPDATE,
+ BLKDBG_REFBLOCK_UPDATE_PART,
+ BLKDBG_REFBLOCK_ALLOC,
+ BLKDBG_REFBLOCK_ALLOC_HOOKUP,
+ BLKDBG_REFBLOCK_ALLOC_WRITE,
+ BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS,
+ BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE,
+ BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE,
+
+ BLKDBG_CLUSTER_ALLOC,
+ BLKDBG_CLUSTER_ALLOC_BYTES,
+ BLKDBG_CLUSTER_FREE,
+
+ BLKDBG_EVENT_MAX,
+} BlkDebugEvent;
+
+#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
+void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
+#endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
new file mode 100644
index 0000000000..d06de2637b
--- /dev/null
+++ b/include/block/block_int.h
@@ -0,0 +1,366 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_H
+#define BLOCK_INT_H
+
+#include "block/block.h"
+#include "qemu-option.h"
+#include "qemu-queue.h"
+#include "block/coroutine.h"
+#include "qemu-timer.h"
+#include "qapi-types.h"
+#include "qapi/qmp/qerror.h"
+#include "monitor.h"
+
+#define BLOCK_FLAG_ENCRYPT 1
+#define BLOCK_FLAG_COMPAT6 4
+#define BLOCK_FLAG_LAZY_REFCOUNTS 8
+
+#define BLOCK_IO_LIMIT_READ 0
+#define BLOCK_IO_LIMIT_WRITE 1
+#define BLOCK_IO_LIMIT_TOTAL 2
+
+#define BLOCK_IO_SLICE_TIME 100000000
+#define NANOSECONDS_PER_SECOND 1000000000.0
+
+#define BLOCK_OPT_SIZE "size"
+#define BLOCK_OPT_ENCRYPT "encryption"
+#define BLOCK_OPT_COMPAT6 "compat6"
+#define BLOCK_OPT_BACKING_FILE "backing_file"
+#define BLOCK_OPT_BACKING_FMT "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE "table_size"
+#define BLOCK_OPT_PREALLOC "preallocation"
+#define BLOCK_OPT_SUBFMT "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+
+typedef struct BdrvTrackedRequest BdrvTrackedRequest;
+
+typedef struct BlockIOLimit {
+ int64_t bps[3];
+ int64_t iops[3];
+} BlockIOLimit;
+
+typedef struct BlockIOBaseValue {
+ uint64_t bytes[2];
+ uint64_t ios[2];
+} BlockIOBaseValue;
+
+struct BlockDriver {
+ const char *format_name;
+ int instance_size;
+ int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+ int (*bdrv_probe_device)(const char *filename);
+
+ /* For handling image reopen for split or non-split files */
+ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+ void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+
+ int (*bdrv_open)(BlockDriverState *bs, int flags);
+ int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags);
+ int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+ int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+ void (*bdrv_close)(BlockDriverState *bs);
+ void (*bdrv_rebind)(BlockDriverState *bs);
+ int (*bdrv_create)(const char *filename, QEMUOptionParameter *options);
+ int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
+ int (*bdrv_make_empty)(BlockDriverState *bs);
+ /* aio */
+ BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+ int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL and .bdrv_co_writev() will be called
+ * instead.
+ */
+ int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+ int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+ int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum);
+
+ /*
+ * Invalidate any cached meta-data.
+ */
+ void (*bdrv_invalidate_cache)(BlockDriverState *bs);
+
+ /*
+ * Flushes all data that was already written to the OS all the way down to
+ * the disk (for example raw-posix calls fsync()).
+ */
+ int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+
+ /*
+ * Flushes all internal caches to the OS. The data may still sit in a
+ * writeback cache of the host OS, but it will survive a crash of the qemu
+ * process.
+ */
+ int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+
+ const char *protocol_name;
+ int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
+ int64_t (*bdrv_getlength)(BlockDriverState *bs);
+ int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
+ int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+
+ int (*bdrv_snapshot_create)(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+ int (*bdrv_snapshot_goto)(BlockDriverState *bs,
+ const char *snapshot_id);
+ int (*bdrv_snapshot_delete)(BlockDriverState *bs, const char *snapshot_id);
+ int (*bdrv_snapshot_list)(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+ int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
+ const char *snapshot_name);
+ int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+ int (*bdrv_save_vmstate)(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size);
+ int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+ int (*bdrv_change_backing_file)(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+
+ /* removable device specific */
+ int (*bdrv_is_inserted)(BlockDriverState *bs);
+ int (*bdrv_media_changed)(BlockDriverState *bs);
+ void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
+ void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
+
+ /* to control generic scsi devices */
+ int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
+ BlockDriverAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+ /* List of options for creating images, terminated by name == NULL */
+ QEMUOptionParameter *create_options;
+
+
+ /*
+ * Returns 0 for completed check, -errno for internal errors.
+ * The check results are stored in result.
+ */
+ int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
+ BdrvCheckMode fix);
+
+ void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
+
+ /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+ int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+ const char *tag);
+ int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+ bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
+ /*
+ * Returns 1 if newly created images are guaranteed to contain only
+ * zeros, 0 otherwise.
+ */
+ int (*bdrv_has_zero_init)(BlockDriverState *bs);
+
+ QLIST_ENTRY(BlockDriver) list;
+};
+
+/*
+ * Note: the function bdrv_append() copies and swaps contents of
+ * BlockDriverStates, so if you add new fields to this struct, please
+ * inspect bdrv_append() to determine if the new fields need to be
+ * copied as well.
+ */
+struct BlockDriverState {
+ int64_t total_sectors; /* if we are reading a disk image, give its
+ size in sectors */
+ int read_only; /* if true, the media is read only */
+ int open_flags; /* flags used to open the file, re-used for re-open */
+ int encrypted; /* if true, the media is encrypted */
+ int valid_key; /* if true, a valid encryption key has been set */
+ int sg; /* if true, the device is a /dev/sg* */
+ int copy_on_read; /* if true, copy read backing sectors into image
+ note this is a reference count */
+
+ BlockDriver *drv; /* NULL means no media */
+ void *opaque;
+
+ void *dev; /* attached device model, if any */
+ /* TODO change to DeviceState when all users are qdevified */
+ const BlockDevOps *dev_ops;
+ void *dev_opaque;
+
+ char filename[1024];
+ char backing_file[1024]; /* if non zero, the image is a diff of
+ this file image */
+ char backing_format[16]; /* if non-zero and backing_file exists */
+ int is_temporary;
+
+ BlockDriverState *backing_hd;
+ BlockDriverState *file;
+
+ NotifierList close_notifiers;
+
+ /* number of in-flight copy-on-read requests */
+ unsigned int copy_on_read_in_flight;
+
+ /* the time for latest disk I/O */
+ int64_t slice_time;
+ int64_t slice_start;
+ int64_t slice_end;
+ BlockIOLimit io_limits;
+ BlockIOBaseValue io_base;
+ CoQueue throttled_reqs;
+ QEMUTimer *block_timer;
+ bool io_limits_enabled;
+
+ /* I/O stats (display with "info blockstats"). */
+ uint64_t nr_bytes[BDRV_MAX_IOTYPE];
+ uint64_t nr_ops[BDRV_MAX_IOTYPE];
+ uint64_t total_time_ns[BDRV_MAX_IOTYPE];
+ uint64_t wr_highest_sector;
+
+ /* Whether the disk can expand beyond total_sectors */
+ int growable;
+
+ /* the memory alignment required for the buffers handled by this driver */
+ int buffer_alignment;
+
+ /* do we need to tell the quest if we have a volatile write cache? */
+ int enable_write_cache;
+
+ /* NOTE: the following infos are only hints for real hardware
+ drivers. They are not used by the block driver */
+ BlockdevOnError on_read_error, on_write_error;
+ bool iostatus_enabled;
+ BlockDeviceIoStatus iostatus;
+ char device_name[32];
+ unsigned long *dirty_bitmap;
+ int64_t dirty_count;
+ int in_use; /* users other than guest access, eg. block migration */
+ QTAILQ_ENTRY(BlockDriverState) list;
+
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+
+ /* long-running background operation */
+ BlockJob *job;
+
+};
+
+int get_tmp_filename(char *filename, int size);
+
+void bdrv_set_io_limits(BlockDriverState *bs,
+ BlockIOLimit *io_limits);
+
+#ifdef _WIN32
+int is_windows_drive(const char *filename);
+#endif
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+ enum MonitorEvent ev,
+ BlockErrorAction action, bool is_read);
+
+/**
+ * stream_start:
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @base_id: The file name that will be written to @bs as the new
+ * backing file if the job completes. Ignored if @base is %NULL.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a streaming operation on @bs. Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs. At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @base_id in the written image and to @base in the live BlockDriverState.
+ */
+void stream_start(BlockDriverState *bs, BlockDriverState *base,
+ const char *base_id, int64_t speed, BlockdevOnError on_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * commit_start:
+ * @bs: Top Block device
+ * @base: Block device that will be written into, and become the new top
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+ BlockDriverState *top, int64_t speed,
+ BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/*
+ * mirror_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs. Clusters that are allocated
+ * in @bs will be written to @bs until the job is cancelled or
+ * manually completed. At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed, MirrorSyncMode mode,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+#endif /* BLOCK_INT_H */
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
new file mode 100644
index 0000000000..c290d07bba
--- /dev/null
+++ b/include/block/blockjob.h
@@ -0,0 +1,278 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCKJOB_H
+#define BLOCKJOB_H 1
+
+#include "block/block.h"
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
+typedef struct BlockJobType {
+ /** Derived BlockJob struct size */
+ size_t instance_size;
+
+ /** String describing the operation, part of query-block-jobs QMP API */
+ const char *job_type;
+
+ /** Optional callback for job types that support setting a speed limit */
+ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+
+ /** Optional callback for job types that need to forward I/O status reset */
+ void (*iostatus_reset)(BlockJob *job);
+
+ /**
+ * Optional callback for job types whose completion must be triggered
+ * manually.
+ */
+ void (*complete)(BlockJob *job, Error **errp);
+} BlockJobType;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+ /** The job type, including the job vtable. */
+ const BlockJobType *job_type;
+
+ /** The block device on which the job is operating. */
+ BlockDriverState *bs;
+
+ /**
+ * The coroutine that executes the job. If not NULL, it is
+ * reentered when busy is false and the job is cancelled.
+ */
+ Coroutine *co;
+
+ /**
+ * Set to true if the job should cancel itself. The flag must
+ * always be tested just before toggling the busy flag from false
+ * to true. After a job has been cancelled, it should only yield
+ * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+ */
+ bool cancelled;
+
+ /**
+ * Set to true if the job is either paused, or will pause itself
+ * as soon as possible (if busy == true).
+ */
+ bool paused;
+
+ /**
+ * Set to false by the job while it is in a quiescent state, where
+ * no I/O is pending and the job has yielded on any condition
+ * that is not detected by #qemu_aio_wait, such as a timer.
+ */
+ bool busy;
+
+ /** Status that is published by the query-block-jobs QMP API */
+ BlockDeviceIoStatus iostatus;
+
+ /** Offset that is published by the query-block-jobs QMP API */
+ int64_t offset;
+
+ /** Length that is published by the query-block-jobs QMP API */
+ int64_t len;
+
+ /** Speed that was set with @block_job_set_speed. */
+ int64_t speed;
+
+ /** The completion function that will be called when the job completes. */
+ BlockDriverCompletionFunc *cb;
+
+ /** The opaque value that is passed to the completion function. */
+ void *opaque;
+};
+
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it. The job
+ * will call @cb asynchronously when the job completes. Note that
+ * @bs may have been closed at the time the @cb it is called. If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+ int64_t speed, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds. Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
+
+/**
+ * block_job_completed:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_completed(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ * @errp: Error object.
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_complete:
+ * @job: The job to be completed.
+ * @errp: Error object.
+ *
+ * Asynchronously complete the specified job.
+ */
+void block_job_complete(BlockJob *job, Error **errp);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_query:
+ * @job: The job to get information about.
+ *
+ * Return information about a job.
+ */
+BlockJobInfo *block_job_query(BlockJob *job);
+
+/**
+ * block_job_pause:
+ * @job: The job to be paused.
+ *
+ * Asynchronously pause the specified job.
+ */
+void block_job_pause(BlockJob *job);
+
+/**
+ * block_job_resume:
+ * @job: The job to be resumed.
+ *
+ * Resume the specified job.
+ */
+void block_job_resume(BlockJob *job);
+
+/**
+ * qobject_from_block_job:
+ * @job: The job whose information is requested.
+ *
+ * Return a QDict corresponding to @job's query-block-jobs entry.
+ */
+QObject *qobject_from_block_job(BlockJob *job);
+
+/**
+ * block_job_ready:
+ * @job: The job which is now ready to complete.
+ *
+ * Send a BLOCK_JOB_READY event for the specified job.
+ */
+void block_job_ready(BlockJob *job);
+
+/**
+ * block_job_is_paused:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is currently paused, or will pause
+ * as soon as it reaches a sleeping point.
+ */
+bool block_job_is_paused(BlockJob *job);
+
+/**
+ * block_job_cancel_sync:
+ * @job: The job to be canceled.
+ *
+ * Synchronously cancel the job. The completion callback is called
+ * before the function returns. The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int block_job_cancel_sync(BlockJob *job);
+
+/**
+ * block_job_iostatus_reset:
+ * @job: The job whose I/O status should be reset.
+ *
+ * Reset I/O status on @job and on BlockDriverState objects it uses,
+ * other than job->bs.
+ */
+void block_job_iostatus_reset(BlockJob *job);
+
+/**
+ * block_job_error_action:
+ * @job: The job to signal an error for.
+ * @bs: The block device on which to set an I/O error.
+ * @on_err: The error action setting.
+ * @is_read: Whether the operation was a read.
+ * @error: The error that was reported.
+ *
+ * Report an I/O error for a block job and possibly stop the VM. Return the
+ * action that was selected based on @on_err and @error.
+ */
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+ BlockdevOnError on_err,
+ int is_read, int error);
+#endif
diff --git a/include/block/coroutine.h b/include/block/coroutine.h
new file mode 100644
index 0000000000..34c15d4116
--- /dev/null
+++ b/include/block/coroutine.h
@@ -0,0 +1,211 @@
+/*
+ * QEMU coroutine implementation
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_COROUTINE_H
+#define QEMU_COROUTINE_H
+
+#include <stdbool.h>
+#include "qemu-queue.h"
+#include "qemu-timer.h"
+
+/**
+ * Coroutines are a mechanism for stack switching and can be used for
+ * cooperative userspace threading. These functions provide a simple but
+ * useful flavor of coroutines that is suitable for writing sequential code,
+ * rather than callbacks, for operations that need to give up control while
+ * waiting for events to complete.
+ *
+ * These functions are re-entrant and may be used outside the global mutex.
+ */
+
+/**
+ * Mark a function that executes in coroutine context
+ *
+ * Functions that execute in coroutine context cannot be called directly from
+ * normal functions. In the future it would be nice to enable compiler or
+ * static checker support for catching such errors. This annotation might make
+ * it possible and in the meantime it serves as documentation.
+ *
+ * For example:
+ *
+ * static void coroutine_fn foo(void) {
+ * ....
+ * }
+ */
+#define coroutine_fn
+
+typedef struct Coroutine Coroutine;
+
+/**
+ * Coroutine entry point
+ *
+ * When the coroutine is entered for the first time, opaque is passed in as an
+ * argument.
+ *
+ * When this function returns, the coroutine is destroyed automatically and
+ * execution continues in the caller who last entered the coroutine.
+ */
+typedef void coroutine_fn CoroutineEntry(void *opaque);
+
+/**
+ * Create a new coroutine
+ *
+ * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
+ */
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry);
+
+/**
+ * Transfer control to a coroutine
+ *
+ * The opaque argument is passed as the argument to the entry point when
+ * entering the coroutine for the first time. It is subsequently ignored.
+ */
+void qemu_coroutine_enter(Coroutine *coroutine, void *opaque);
+
+/**
+ * Transfer control back to a coroutine's caller
+ *
+ * This function does not return until the coroutine is re-entered using
+ * qemu_coroutine_enter().
+ */
+void coroutine_fn qemu_coroutine_yield(void);
+
+/**
+ * Get the currently executing coroutine
+ */
+Coroutine *coroutine_fn qemu_coroutine_self(void);
+
+/**
+ * Return whether or not currently inside a coroutine
+ *
+ * This can be used to write functions that work both when in coroutine context
+ * and when not in coroutine context. Note that such functions cannot use the
+ * coroutine_fn annotation since they work outside coroutine context.
+ */
+bool qemu_in_coroutine(void);
+
+
+
+/**
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
+ * them later. They provide the fundamental primitives on which coroutine locks
+ * are built.
+ */
+typedef struct CoQueue {
+ QTAILQ_HEAD(, Coroutine) entries;
+} CoQueue;
+
+/**
+ * Initialise a CoQueue. This must be called before any other operation is used
+ * on the CoQueue.
+ */
+void qemu_co_queue_init(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the head of the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue);
+
+/**
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
+ *
+ * Returns true if a coroutine was restarted, false if the queue is empty.
+ */
+bool qemu_co_queue_next(CoQueue *queue);
+
+/**
+ * Restarts all coroutines in the CoQueue and leaves the queue empty.
+ */
+void qemu_co_queue_restart_all(CoQueue *queue);
+
+/**
+ * Checks if the CoQueue is empty.
+ */
+bool qemu_co_queue_empty(CoQueue *queue);
+
+
+/**
+ * Provides a mutex that can be used to synchronise coroutines
+ */
+typedef struct CoMutex {
+ bool locked;
+ CoQueue queue;
+} CoMutex;
+
+/**
+ * Initialises a CoMutex. This must be called before any other operation is used
+ * on the CoMutex.
+ */
+void qemu_co_mutex_init(CoMutex *mutex);
+
+/**
+ * Locks the mutex. If the lock cannot be taken immediately, control is
+ * transferred to the caller of the current coroutine.
+ */
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
+
+/**
+ * Unlocks the mutex and schedules the next coroutine that was waiting for this
+ * lock to be run.
+ */
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
+
+typedef struct CoRwlock {
+ bool writer;
+ int reader;
+ CoQueue queue;
+} CoRwlock;
+
+/**
+ * Initialises a CoRwlock. This must be called before any other operation
+ * is used on the CoRwlock
+ */
+void qemu_co_rwlock_init(CoRwlock *lock);
+
+/**
+ * Read locks the CoRwlock. If the lock cannot be taken immediately because
+ * of a parallel writer, control is transferred to the caller of the current
+ * coroutine.
+ */
+void qemu_co_rwlock_rdlock(CoRwlock *lock);
+
+/**
+ * Write Locks the mutex. If the lock cannot be taken immediately because
+ * of a parallel reader, control is transferred to the caller of the current
+ * coroutine.
+ */
+void qemu_co_rwlock_wrlock(CoRwlock *lock);
+
+/**
+ * Unlocks the read/write lock and schedules the next coroutine that was
+ * waiting for this lock to be run.
+ */
+void qemu_co_rwlock_unlock(CoRwlock *lock);
+
+/**
+ * Yield the coroutine for a given duration
+ *
+ * Note this function uses timers and hence only works when a main loop is in
+ * use. See main-loop.h and do not use from qemu-tool programs.
+ */
+void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns);
+
+#endif /* QEMU_COROUTINE_H */
diff --git a/include/block/coroutine_int.h b/include/block/coroutine_int.h
new file mode 100644
index 0000000000..282a3ceda6
--- /dev/null
+++ b/include/block/coroutine_int.h
@@ -0,0 +1,49 @@
+/*
+ * Coroutine internals
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_COROUTINE_INT_H
+#define QEMU_COROUTINE_INT_H
+
+#include "qemu-queue.h"
+#include "block/coroutine.h"
+
+typedef enum {
+ COROUTINE_YIELD = 1,
+ COROUTINE_TERMINATE = 2,
+} CoroutineAction;
+
+struct Coroutine {
+ CoroutineEntry *entry;
+ void *entry_arg;
+ Coroutine *caller;
+ QSLIST_ENTRY(Coroutine) pool_next;
+ QTAILQ_ENTRY(Coroutine) co_queue_next;
+};
+
+Coroutine *qemu_coroutine_new(void);
+void qemu_coroutine_delete(Coroutine *co);
+CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
+ CoroutineAction action);
+
+#endif
diff --git a/include/block/nbd.h b/include/block/nbd.h
new file mode 100644
index 0000000000..344f05b794
--- /dev/null
+++ b/include/block/nbd.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
+ *
+ * Network Block Device
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef NBD_H
+#define NBD_H
+
+#include <sys/types.h>
+
+#include "qemu-common.h"
+
+struct nbd_request {
+ uint32_t magic;
+ uint32_t type;
+ uint64_t handle;
+ uint64_t from;
+ uint32_t len;
+} QEMU_PACKED;
+
+struct nbd_reply {
+ uint32_t magic;
+ uint32_t error;
+ uint64_t handle;
+} QEMU_PACKED;
+
+#define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */
+#define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */
+#define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */
+#define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */
+#define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */
+#define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
+
+#define NBD_CMD_MASK_COMMAND 0x0000ffff
+#define NBD_CMD_FLAG_FUA (1 << 16)
+
+enum {
+ NBD_CMD_READ = 0,
+ NBD_CMD_WRITE = 1,
+ NBD_CMD_DISC = 2,
+ NBD_CMD_FLUSH = 3,
+ NBD_CMD_TRIM = 4
+};
+
+#define NBD_DEFAULT_PORT 10809
+
+#define NBD_BUFFER_SIZE (1024*1024)
+
+ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read);
+int tcp_socket_outgoing(const char *address, uint16_t port);
+int tcp_socket_incoming(const char *address, uint16_t port);
+int tcp_socket_outgoing_spec(const char *address_and_port);
+int tcp_socket_incoming_spec(const char *address_and_port);
+int unix_socket_outgoing(const char *path);
+int unix_socket_incoming(const char *path);
+
+int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
+ off_t *size, size_t *blocksize);
+int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize);
+ssize_t nbd_send_request(int csock, struct nbd_request *request);
+ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply);
+int nbd_client(int fd);
+int nbd_disconnect(int fd);
+
+typedef struct NBDExport NBDExport;
+typedef struct NBDClient NBDClient;
+
+NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
+ off_t size, uint32_t nbdflags,
+ void (*close)(NBDExport *));
+void nbd_export_close(NBDExport *exp);
+void nbd_export_get(NBDExport *exp);
+void nbd_export_put(NBDExport *exp);
+
+BlockDriverState *nbd_export_get_blockdev(NBDExport *exp);
+
+NBDExport *nbd_export_find(const char *name);
+void nbd_export_set_name(NBDExport *exp, const char *name);
+void nbd_export_close_all(void);
+
+NBDClient *nbd_client_new(NBDExport *exp, int csock,
+ void (*close)(NBDClient *));
+void nbd_client_close(NBDClient *client);
+void nbd_client_get(NBDClient *client);
+void nbd_client_put(NBDClient *client);
+
+#endif
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
new file mode 100644
index 0000000000..a87b287081
--- /dev/null
+++ b/include/block/thread-pool.h
@@ -0,0 +1,34 @@
+/*
+ * QEMU block layer thread pool
+ *
+ * Copyright IBM, Corp. 2008
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef QEMU_THREAD_POOL_H
+#define QEMU_THREAD_POOL_H 1
+
+#include "qemu-common.h"
+#include "qemu-queue.h"
+#include "qemu-thread.h"
+#include "block/coroutine.h"
+#include "block/block_int.h"
+
+typedef int ThreadPoolFunc(void *opaque);
+
+BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
+ BlockDriverCompletionFunc *cb, void *opaque);
+int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
+void thread_pool_submit(ThreadPoolFunc *func, void *arg);
+
+#endif