diff options
Diffstat (limited to 'block')
37 files changed, 3399 insertions, 747 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs index b5754d39bf..c067f38e1d 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -2,10 +2,21 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o -block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o -block-obj-y += stream.o -block-obj-$(CONFIG_WIN32) += raw-win32.o +block-obj-y += parallels.o blkdebug.o blkverify.o +block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o +block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o + +ifeq ($(CONFIG_POSIX),y) +block-obj-y += nbd.o sheepdog.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o +block-obj-$(CONFIG_GLUSTERFS) += gluster.o +endif + +common-obj-y += stream.o +common-obj-y += commit.o +common-obj-y += mirror.o + +$(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS) diff --git a/block/blkdebug.c b/block/blkdebug.c index 59dcea0650..6f7463772b 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -23,13 +23,17 @@ */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" +#include "qemu/config-file.h" +#include "block/block_int.h" +#include "qemu/module.h" typedef struct BDRVBlkdebugState { int state; + int new_state; + QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX]; QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; + QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs; } BDRVBlkdebugState; typedef struct BlkdebugAIOCB { @@ -38,9 +42,15 @@ typedef struct BlkdebugAIOCB { int ret; } BlkdebugAIOCB; +typedef struct BlkdebugSuspendedReq { + Coroutine *co; + char *tag; + QLIST_ENTRY(BlkdebugSuspendedReq) next; +} BlkdebugSuspendedReq; + static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb); -static AIOPool blkdebug_aio_pool = { +static const AIOCBInfo blkdebug_aiocb_info = { .aiocb_size = sizeof(BlkdebugAIOCB), .cancel = blkdebug_aio_cancel, }; @@ -48,6 +58,7 @@ static AIOPool blkdebug_aio_pool = { enum { ACTION_INJECT_ERROR, ACTION_SET_STATE, + ACTION_SUSPEND, }; typedef struct BlkdebugRule { @@ -64,6 +75,9 @@ typedef struct BlkdebugRule { struct { int new_state; } set_state; + struct { + char *tag; + } suspend; } options; QLIST_ENTRY(BlkdebugRule) next; QSIMPLEQ_ENTRY(BlkdebugRule) active_next; @@ -225,6 +239,11 @@ static int add_rule(QemuOpts *opts, void *opaque) rule->options.set_state.new_state = qemu_opt_get_number(opts, "new_state", 0); break; + + case ACTION_SUSPEND: + rule->options.suspend.tag = + g_strdup(qemu_opt_get(opts, "tag")); + break; }; /* Add the rule */ @@ -233,12 +252,32 @@ static int add_rule(QemuOpts *opts, void *opaque) return 0; } +static void remove_rule(BlkdebugRule *rule) +{ + switch (rule->action) { + case ACTION_INJECT_ERROR: + case ACTION_SET_STATE: + break; + case ACTION_SUSPEND: + g_free(rule->options.suspend.tag); + break; + } + + QLIST_REMOVE(rule, next); + g_free(rule); +} + static int read_config(BDRVBlkdebugState *s, const char *filename) { FILE *f; int ret; struct add_rule_data d; + /* Allow usage without config file */ + if (!*filename) { + return 0; + } + f = fopen(filename, "r"); if (f == NULL) { return -errno; @@ -334,7 +373,7 @@ static BlockDriverAIOCB *inject_error(BlockDriverState *bs, return NULL; } - acb = qemu_aio_get(&blkdebug_aio_pool, bs, cb, opaque); + acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque); acb->ret = -error; bh = qemu_bh_new(error_callback_bh, acb); @@ -388,6 +427,7 @@ static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs, return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque); } + static void blkdebug_close(BlockDriverState *bs) { BDRVBlkdebugState *s = bs->opaque; @@ -396,19 +436,39 @@ static void blkdebug_close(BlockDriverState *bs) for (i = 0; i < BLKDBG_EVENT_MAX; i++) { QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { - QLIST_REMOVE(rule, next); - g_free(rule); + remove_rule(rule); } } } +static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq r; + + r = (BlkdebugSuspendedReq) { + .co = qemu_coroutine_self(), + .tag = g_strdup(rule->options.suspend.tag), + }; + + remove_rule(rule); + QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next); + + printf("blkdebug: Suspended request '%s'\n", r.tag); + qemu_coroutine_yield(); + printf("blkdebug: Resuming request '%s'\n", r.tag); + + QLIST_REMOVE(&r, next); + g_free(r.tag); +} + static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, - int old_state, bool injected) + bool injected) { BDRVBlkdebugState *s = bs->opaque; /* Only process rules for the current state */ - if (rule->state && rule->state != old_state) { + if (rule->state && rule->state != s->state) { return injected; } @@ -423,7 +483,11 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, break; case ACTION_SET_STATE: - s->state = rule->options.set_state.new_state; + s->new_state = rule->options.set_state.new_state; + break; + + case ACTION_SUSPEND: + suspend_request(bs, rule); break; } return injected; @@ -432,16 +496,70 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event) { BDRVBlkdebugState *s = bs->opaque; - struct BlkdebugRule *rule; - int old_state = s->state; + struct BlkdebugRule *rule, *next; bool injected; assert((int)event >= 0 && event < BLKDBG_EVENT_MAX); injected = false; - QLIST_FOREACH(rule, &s->rules[event], next) { - injected = process_rule(bs, rule, old_state, injected); + s->new_state = s->state; + QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) { + injected = process_rule(bs, rule, injected); + } + s->state = s->new_state; +} + +static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + struct BlkdebugRule *rule; + BlkDebugEvent blkdebug_event; + + if (get_event_by_name(event, &blkdebug_event) < 0) { + return -ENOENT; + } + + + rule = g_malloc(sizeof(*rule)); + *rule = (struct BlkdebugRule) { + .event = blkdebug_event, + .action = ACTION_SUSPEND, + .state = 0, + .options.suspend.tag = g_strdup(tag), + }; + + QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next); + + return 0; +} + +static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + qemu_coroutine_enter(r->co, NULL); + return 0; + } + } + return -ENOENT; +} + + +static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + return true; + } } + return false; } static int64_t blkdebug_getlength(BlockDriverState *bs) @@ -462,7 +580,10 @@ static BlockDriver bdrv_blkdebug = { .bdrv_aio_readv = blkdebug_aio_readv, .bdrv_aio_writev = blkdebug_aio_writev, - .bdrv_debug_event = blkdebug_debug_event, + .bdrv_debug_event = blkdebug_debug_event, + .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, + .bdrv_debug_resume = blkdebug_debug_resume, + .bdrv_debug_is_suspended = blkdebug_debug_is_suspended, }; static void bdrv_blkdebug_init(void) diff --git a/block/blkverify.c b/block/blkverify.c index 9d5f1ec5b9..a7dd45909b 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -8,8 +8,8 @@ */ #include <stdarg.h> -#include "qemu_socket.h" /* for EINPROGRESS on Windows */ -#include "block_int.h" +#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ +#include "block/block_int.h" typedef struct { BlockDriverState *test_file; @@ -48,7 +48,7 @@ static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb) } } -static AIOPool blkverify_aio_pool = { +static const AIOCBInfo blkverify_aiocb_info = { .aiocb_size = sizeof(BlkverifyAIOCB), .cancel = blkverify_aio_cancel, }; @@ -233,7 +233,7 @@ static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write, BlockDriverCompletionFunc *cb, void *opaque) { - BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aio_pool, bs, cb, opaque); + BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque); acb->bh = NULL; acb->is_write = is_write; diff --git a/block/bochs.c b/block/bochs.c index ab7944dc43..1b1d9cdbe5 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -23,8 +23,8 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" +#include "block/block_int.h" +#include "qemu/module.h" /**************************************************************/ diff --git a/block/cloop.c b/block/cloop.c index 7570eb8e74..5a0d0d805f 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -22,8 +22,8 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" +#include "block/block_int.h" +#include "qemu/module.h" #include <zlib.h> typedef struct BDRVCloopState { diff --git a/block/commit.c b/block/commit.c new file mode 100644 index 0000000000..61ebdba54f --- /dev/null +++ b/block/commit.c @@ -0,0 +1,259 @@ +/* + * Live block commit + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Jeff Cody <jcody@redhat.com> + * Based on stream.c by Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "block/block_int.h" +#include "block/blockjob.h" +#include "qemu/ratelimit.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */ +}; + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct CommitBlockJob { + BlockJob common; + RateLimit limit; + BlockDriverState *active; + BlockDriverState *top; + BlockDriverState *base; + BlockdevOnError on_error; + int base_flags; + int orig_overlay_flags; +} CommitBlockJob; + +static int coroutine_fn commit_populate(BlockDriverState *bs, + BlockDriverState *base, + int64_t sector_num, int nb_sectors, + void *buf) +{ + int ret = 0; + + ret = bdrv_read(bs, sector_num, buf, nb_sectors); + if (ret) { + return ret; + } + + ret = bdrv_write(base, sector_num, buf, nb_sectors); + if (ret) { + return ret; + } + + return 0; +} + +static void coroutine_fn commit_run(void *opaque) +{ + CommitBlockJob *s = opaque; + BlockDriverState *active = s->active; + BlockDriverState *top = s->top; + BlockDriverState *base = s->base; + BlockDriverState *overlay_bs = NULL; + int64_t sector_num, end; + int ret = 0; + int n = 0; + void *buf; + int bytes_written = 0; + int64_t base_len; + + ret = s->common.len = bdrv_getlength(top); + + + if (s->common.len < 0) { + goto exit_restore_reopen; + } + + ret = base_len = bdrv_getlength(base); + if (base_len < 0) { + goto exit_restore_reopen; + } + + if (base_len < s->common.len) { + ret = bdrv_truncate(base, s->common.len); + if (ret) { + goto exit_restore_reopen; + } + } + + overlay_bs = bdrv_find_overlay(active, top); + + end = s->common.len >> BDRV_SECTOR_BITS; + buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE); + + for (sector_num = 0; sector_num < end; sector_num += n) { + uint64_t delay_ns = 0; + bool copy; + +wait: + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that bdrv_drain_all() returns. + */ + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + if (block_job_is_cancelled(&s->common)) { + break; + } + /* Copy if allocated above the base */ + ret = bdrv_co_is_allocated_above(top, base, sector_num, + COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, + &n); + copy = (ret == 1); + trace_commit_one_iteration(s, sector_num, n, ret); + if (copy) { + if (s->common.speed) { + delay_ns = ratelimit_calculate_delay(&s->limit, n); + if (delay_ns > 0) { + goto wait; + } + } + ret = commit_populate(top, base, sector_num, n, buf); + bytes_written += n * BDRV_SECTOR_SIZE; + } + if (ret < 0) { + if (s->on_error == BLOCKDEV_ON_ERROR_STOP || + s->on_error == BLOCKDEV_ON_ERROR_REPORT|| + (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) { + goto exit_free_buf; + } else { + n = 0; + continue; + } + } + /* Publish progress */ + s->common.offset += n * BDRV_SECTOR_SIZE; + } + + ret = 0; + + if (!block_job_is_cancelled(&s->common) && sector_num == end) { + /* success */ + ret = bdrv_drop_intermediate(active, top, base); + } + +exit_free_buf: + qemu_vfree(buf); + +exit_restore_reopen: + /* restore base open flags here if appropriate (e.g., change the base back + * to r/o). These reopens do not need to be atomic, since we won't abort + * even on failure here */ + if (s->base_flags != bdrv_get_flags(base)) { + bdrv_reopen(base, s->base_flags, NULL); + } + if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { + bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); + } + + block_job_completed(&s->common, ret); +} + +static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static BlockJobType commit_job_type = { + .instance_size = sizeof(CommitBlockJob), + .job_type = "commit", + .set_speed = commit_set_speed, +}; + +void commit_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverState *top, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + CommitBlockJob *s; + BlockReopenQueue *reopen_queue = NULL; + int orig_overlay_flags; + int orig_base_flags; + BlockDriverState *overlay_bs; + Error *local_err = NULL; + + if ((on_error == BLOCKDEV_ON_ERROR_STOP || + on_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER_COMBINATION); + return; + } + + /* Once we support top == active layer, remove this check */ + if (top == bs) { + error_setg(errp, + "Top image as the active layer is currently unsupported"); + return; + } + + if (top == base) { + error_setg(errp, "Invalid files for merge: top and base are the same"); + return; + } + + overlay_bs = bdrv_find_overlay(bs, top); + + if (overlay_bs == NULL) { + error_setg(errp, "Could not find overlay image for %s:", top->filename); + return; + } + + orig_base_flags = bdrv_get_flags(base); + orig_overlay_flags = bdrv_get_flags(overlay_bs); + + /* convert base & overlay_bs to r/w, if necessary */ + if (!(orig_base_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, base, + orig_base_flags | BDRV_O_RDWR); + } + if (!(orig_overlay_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, + orig_overlay_flags | BDRV_O_RDWR); + } + if (reopen_queue) { + bdrv_reopen_multiple(reopen_queue, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + } + + + s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp); + if (!s) { + return; + } + + s->base = base; + s->top = top; + s->active = bs; + + s->base_flags = orig_base_flags; + s->orig_overlay_flags = orig_overlay_flags; + + s->on_error = on_error; + s->common.co = qemu_coroutine_create(commit_run); + + trace_commit_start(bs, base, top, s, s->common.co, opaque); + qemu_coroutine_enter(s->common.co, s); +} diff --git a/block/cow.c b/block/cow.c index a5a00eb9ca..a33ce950d4 100644 --- a/block/cow.c +++ b/block/cow.c @@ -22,8 +22,8 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" +#include "block/block_int.h" +#include "qemu/module.h" /**************************************************************/ /* COW block driver using file system holes */ diff --git a/block/curl.c b/block/curl.c index e7c3634d35..47df9524ea 100644 --- a/block/curl.c +++ b/block/curl.c @@ -22,7 +22,7 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "block_int.h" +#include "block/block_int.h" #include <curl/curl.h> // #define DEBUG @@ -438,7 +438,7 @@ static void curl_aio_cancel(BlockDriverAIOCB *blockacb) // Do we have to implement canceling? Seems to work without... } -static AIOPool curl_aio_pool = { +static const AIOCBInfo curl_aiocb_info = { .aiocb_size = sizeof(CURLAIOCB), .cancel = curl_aio_cancel, }; @@ -505,7 +505,7 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs, { CURLAIOCB *acb; - acb = qemu_aio_get(&curl_aio_pool, bs, cb, opaque); + acb = qemu_aio_get(&curl_aiocb_info, bs, cb, opaque); acb->qiov = qiov; acb->sector_num = sector_num; @@ -542,8 +542,7 @@ static void curl_close(BlockDriverState *bs) } if (s->multi) curl_multi_cleanup(s->multi); - if (s->url) - free(s->url); + g_free(s->url); } static int64_t curl_getlength(BlockDriverState *bs) diff --git a/block/dmg.c b/block/dmg.c index 37902a4347..ac397dc8f7 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -22,9 +22,9 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "block_int.h" -#include "bswap.h" -#include "module.h" +#include "block/block_int.h" +#include "qemu/bswap.h" +#include "qemu/module.h" #include <zlib.h> typedef struct BDRVDMGState { diff --git a/block/gluster.c b/block/gluster.c new file mode 100644 index 0000000000..0f2c32a3fa --- /dev/null +++ b/block/gluster.c @@ -0,0 +1,624 @@ +/* + * GlusterFS backend for QEMU + * + * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com> + * + * Pipe handling mechanism in AIO implementation is derived from + * block/rbd.c. Hence, + * + * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>, + * Josh Durgin <josh.durgin@dreamhost.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#include <glusterfs/api/glfs.h> +#include "block/block_int.h" +#include "qemu/sockets.h" +#include "qemu/uri.h" + +typedef struct GlusterAIOCB { + BlockDriverAIOCB common; + int64_t size; + int ret; + bool *finished; + QEMUBH *bh; +} GlusterAIOCB; + +typedef struct BDRVGlusterState { + struct glfs *glfs; + int fds[2]; + struct glfs_fd *fd; + int qemu_aio_count; + int event_reader_pos; + GlusterAIOCB *event_acb; +} BDRVGlusterState; + +#define GLUSTER_FD_READ 0 +#define GLUSTER_FD_WRITE 1 + +typedef struct GlusterConf { + char *server; + int port; + char *volname; + char *image; + char *transport; +} GlusterConf; + +static void qemu_gluster_gconf_free(GlusterConf *gconf) +{ + g_free(gconf->server); + g_free(gconf->volname); + g_free(gconf->image); + g_free(gconf->transport); + g_free(gconf); +} + +static int parse_volume_options(GlusterConf *gconf, char *path) +{ + char *p, *q; + + if (!path) { + return -EINVAL; + } + + /* volume */ + p = q = path + strspn(path, "/"); + p += strcspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->volname = g_strndup(q, p - q); + + /* image */ + p += strspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->image = g_strdup(p); + return 0; +} + +/* + * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] + * + * 'gluster' is the protocol. + * + * 'transport' specifies the transport type used to connect to gluster + * management daemon (glusterd). Valid transport types are + * tcp, unix and rdma. If a transport type isn't specified, then tcp + * type is assumed. + * + * 'server' specifies the server where the volume file specification for + * the given volume resides. This can be either hostname, ipv4 address + * or ipv6 address. ipv6 address needs to be within square brackets [ ]. + * If transport type is 'unix', then 'server' field should not be specifed. + * The 'socket' field needs to be populated with the path to unix domain + * socket. + * + * 'port' is the port number on which glusterd is listening. This is optional + * and if not specified, QEMU will send 0 which will make gluster to use the + * default port. If the transport type is unix, then 'port' should not be + * specified. + * + * 'volname' is the name of the gluster volume which contains the VM image. + * + * 'image' is the path to the actual VM image that resides on gluster volume. + * + * Examples: + * + * file=gluster://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img + * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img + * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img + * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img + * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket + * file=gluster+rdma://1.2.3.4:24007/testvol/a.img + */ +static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) +{ + URI *uri; + QueryParams *qp = NULL; + bool is_unix = false; + int ret = 0; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + /* transport */ + if (!strcmp(uri->scheme, "gluster")) { + gconf->transport = g_strdup("tcp"); + } else if (!strcmp(uri->scheme, "gluster+tcp")) { + gconf->transport = g_strdup("tcp"); + } else if (!strcmp(uri->scheme, "gluster+unix")) { + gconf->transport = g_strdup("unix"); + is_unix = true; + } else if (!strcmp(uri->scheme, "gluster+rdma")) { + gconf->transport = g_strdup("rdma"); + } else { + ret = -EINVAL; + goto out; + } + + ret = parse_volume_options(gconf, uri->path); + if (ret < 0) { + goto out; + } + + qp = query_params_parse(uri->query); + if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) { + ret = -EINVAL; + goto out; + } + + if (is_unix) { + if (uri->server || uri->port) { + ret = -EINVAL; + goto out; + } + if (strcmp(qp->p[0].name, "socket")) { + ret = -EINVAL; + goto out; + } + gconf->server = g_strdup(qp->p[0].value); + } else { + gconf->server = g_strdup(uri->server); + gconf->port = uri->port; + } + +out: + if (qp) { + query_params_free(qp); + } + uri_free(uri); + return ret; +} + +static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename) +{ + struct glfs *glfs = NULL; + int ret; + int old_errno; + + ret = qemu_gluster_parseuri(gconf, filename); + if (ret < 0) { + error_report("Usage: file=gluster[+transport]://[server[:port]]/" + "volname/image[?socket=...]"); + errno = -ret; + goto out; + } + + glfs = glfs_new(gconf->volname); + if (!glfs) { + goto out; + } + + ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server, + gconf->port); + if (ret < 0) { + goto out; + } + + /* + * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when + * GlusterFS makes GF_LOG_* macros available to libgfapi users. + */ + ret = glfs_set_logging(glfs, "-", 4); + if (ret < 0) { + goto out; + } + + ret = glfs_init(glfs); + if (ret) { + error_report("Gluster connection failed for server=%s port=%d " + "volume=%s image=%s transport=%s\n", gconf->server, gconf->port, + gconf->volname, gconf->image, gconf->transport); + goto out; + } + return glfs; + +out: + if (glfs) { + old_errno = errno; + glfs_fini(glfs); + errno = old_errno; + } + return NULL; +} + +static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s) +{ + int ret; + bool *finished = acb->finished; + BlockDriverCompletionFunc *cb = acb->common.cb; + void *opaque = acb->common.opaque; + + if (!acb->ret || acb->ret == acb->size) { + ret = 0; /* Success */ + } else if (acb->ret < 0) { + ret = acb->ret; /* Read/Write failed */ + } else { + ret = -EIO; /* Partial read/write - fail it */ + } + + s->qemu_aio_count--; + qemu_aio_release(acb); + cb(opaque, ret); + if (finished) { + *finished = true; + } +} + +static void qemu_gluster_aio_event_reader(void *opaque) +{ + BDRVGlusterState *s = opaque; + ssize_t ret; + + do { + char *p = (char *)&s->event_acb; + + ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos, + sizeof(s->event_acb) - s->event_reader_pos); + if (ret > 0) { + s->event_reader_pos += ret; + if (s->event_reader_pos == sizeof(s->event_acb)) { + s->event_reader_pos = 0; + qemu_gluster_complete_aio(s->event_acb, s); + } + } + } while (ret < 0 && errno == EINTR); +} + +static int qemu_gluster_aio_flush_cb(void *opaque) +{ + BDRVGlusterState *s = opaque; + + return (s->qemu_aio_count > 0); +} + +static int qemu_gluster_open(BlockDriverState *bs, const char *filename, + int bdrv_flags) +{ + BDRVGlusterState *s = bs->opaque; + int open_flags = O_BINARY; + int ret = 0; + GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); + + s->glfs = qemu_gluster_init(gconf, filename); + if (!s->glfs) { + ret = -errno; + goto out; + } + + if (bdrv_flags & BDRV_O_RDWR) { + open_flags |= O_RDWR; + } else { + open_flags |= O_RDONLY; + } + + if ((bdrv_flags & BDRV_O_NOCACHE)) { + open_flags |= O_DIRECT; + } + + s->fd = glfs_open(s->glfs, gconf->image, open_flags); + if (!s->fd) { + ret = -errno; + goto out; + } + + ret = qemu_pipe(s->fds); + if (ret < 0) { + ret = -errno; + goto out; + } + fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], + qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s); + +out: + qemu_gluster_gconf_free(gconf); + if (!ret) { + return ret; + } + if (s->fd) { + glfs_close(s->fd); + } + if (s->glfs) { + glfs_fini(s->glfs); + } + return ret; +} + +static int qemu_gluster_create(const char *filename, + QEMUOptionParameter *options) +{ + struct glfs *glfs; + struct glfs_fd *fd; + int ret = 0; + int64_t total_size = 0; + GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); + + glfs = qemu_gluster_init(gconf, filename); + if (!glfs) { + ret = -errno; + goto out; + } + + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + total_size = options->value.n / BDRV_SECTOR_SIZE; + } + options++; + } + + fd = glfs_creat(glfs, gconf->image, + O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); + if (!fd) { + ret = -errno; + } else { + if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { + ret = -errno; + } + if (glfs_close(fd) != 0) { + ret = -errno; + } + } +out: + qemu_gluster_gconf_free(gconf); + if (glfs) { + glfs_fini(glfs); + } + return ret; +} + +static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; + bool finished = false; + + acb->finished = &finished; + while (!finished) { + qemu_aio_wait(); + } +} + +static const AIOCBInfo gluster_aiocb_info = { + .aiocb_size = sizeof(GlusterAIOCB), + .cancel = qemu_gluster_aio_cancel, +}; + +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)arg; + BlockDriverState *bs = acb->common.bs; + BDRVGlusterState *s = bs->opaque; + int retval; + + acb->ret = ret; + retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb)); + if (retval != sizeof(acb)) { + /* + * Gluster AIO callback thread failed to notify the waiting + * QEMU thread about IO completion. + * + * Complete this IO request and make the disk inaccessible for + * subsequent reads and writes. + */ + error_report("Gluster failed to notify QEMU about IO completion"); + + qemu_mutex_lock_iothread(); /* We are in gluster thread context */ + acb->common.cb(acb->common.opaque, -EIO); + qemu_aio_release(acb); + s->qemu_aio_count--; + close(s->fds[GLUSTER_FD_READ]); + close(s->fds[GLUSTER_FD_WRITE]); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, + NULL); + bs->drv = NULL; /* Make the disk inaccessible */ + qemu_mutex_unlock_iothread(); + } +} + +static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int write) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + size_t size; + off_t offset; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + s->qemu_aio_count++; + + acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); + acb->size = size; + acb->ret = 0; + acb->finished = NULL; + + if (write) { + ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, acb); + } else { + ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, acb); + } + + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); +} + +static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); +} + +static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + + acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); + acb->size = 0; + acb->ret = 0; + acb->finished = NULL; + s->qemu_aio_count++; + + ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static int64_t qemu_gluster_getlength(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + int64_t ret; + + ret = glfs_lseek(s->fd, 0, SEEK_END); + if (ret < 0) { + return -errno; + } else { + return ret; + } +} + +static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + struct stat st; + int ret; + + ret = glfs_fstat(s->fd, &st); + if (ret < 0) { + return -errno; + } else { + return st.st_blocks * 512; + } +} + +static void qemu_gluster_close(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + + close(s->fds[GLUSTER_FD_READ]); + close(s->fds[GLUSTER_FD_WRITE]); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL); + + if (s->fd) { + glfs_close(s->fd); + s->fd = NULL; + } + glfs_fini(s->glfs); +} + +static QEMUOptionParameter qemu_gluster_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { NULL } +}; + +static BlockDriver bdrv_gluster = { + .format_name = "gluster", + .protocol_name = "gluster", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_tcp = { + .format_name = "gluster", + .protocol_name = "gluster+tcp", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_unix = { + .format_name = "gluster", + .protocol_name = "gluster+unix", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_rdma = { + .format_name = "gluster", + .protocol_name = "gluster+rdma", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static void bdrv_gluster_init(void) +{ + bdrv_register(&bdrv_gluster_rdma); + bdrv_register(&bdrv_gluster_unix); + bdrv_register(&bdrv_gluster_tcp); + bdrv_register(&bdrv_gluster); +} + +block_init(bdrv_gluster_init); diff --git a/block/iscsi.c b/block/iscsi.c index bb9cf82459..041ee07de3 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -27,8 +27,9 @@ #include <poll.h> #include <arpa/inet.h> #include "qemu-common.h" -#include "qemu-error.h" -#include "block_int.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "block/block_int.h" #include "trace.h" #include "hw/scsi-defs.h" @@ -65,21 +66,44 @@ typedef struct IscsiAIOCB { #endif } IscsiAIOCB; -struct IscsiTask { - IscsiLun *iscsilun; - BlockDriverState *bs; - int status; - int complete; -}; +static void +iscsi_bh_cb(void *p) +{ + IscsiAIOCB *acb = p; + + qemu_bh_delete(acb->bh); + + if (acb->canceled == 0) { + acb->common.cb(acb->common.opaque, acb->status); + } + + if (acb->task != NULL) { + scsi_free_scsi_task(acb->task); + acb->task = NULL; + } + + qemu_aio_release(acb); +} + +static void +iscsi_schedule_bh(IscsiAIOCB *acb) +{ + if (acb->bh) { + return; + } + acb->bh = qemu_bh_new(iscsi_bh_cb, acb); + qemu_bh_schedule(acb->bh); +} + static void iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data, void *private_data) { - IscsiAIOCB *acb = (IscsiAIOCB *)private_data; + IscsiAIOCB *acb = private_data; - scsi_free_scsi_task(acb->task); - acb->task = NULL; + acb->status = -ECANCELED; + iscsi_schedule_bh(acb); } static void @@ -88,18 +112,22 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb) IscsiAIOCB *acb = (IscsiAIOCB *)blockacb; IscsiLun *iscsilun = acb->iscsilun; - acb->canceled = 1; + if (acb->status != -EINPROGRESS) { + return; + } - acb->common.cb(acb->common.opaque, -ECANCELED); + acb->canceled = 1; - /* send a task mgmt call to the target to cancel the task on the target - * this also cancels the task in libiscsi - */ + /* send a task mgmt call to the target to cancel the task on the target */ iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task, - iscsi_abort_task_cb, &acb); + iscsi_abort_task_cb, acb); + + while (acb->status == -EINPROGRESS) { + qemu_aio_wait(); + } } -static AIOPool iscsi_aio_pool = { +static const AIOCBInfo iscsi_aiocb_info = { .aiocb_size = sizeof(IscsiAIOCB), .cancel = iscsi_aio_cancel, }; @@ -133,12 +161,6 @@ iscsi_set_events(IscsiLun *iscsilun) } - /* If we just added an event, the callback might be delayed - * unless we call qemu_notify_event(). - */ - if (ev & ~iscsilun->events) { - qemu_notify_event(); - } iscsilun->events = ev; } @@ -163,41 +185,6 @@ iscsi_process_write(void *arg) } -static int -iscsi_schedule_bh(QEMUBHFunc *cb, IscsiAIOCB *acb) -{ - acb->bh = qemu_bh_new(cb, acb); - if (!acb->bh) { - error_report("oom: could not create iscsi bh"); - return -EIO; - } - - qemu_bh_schedule(acb->bh); - return 0; -} - -static void -iscsi_readv_writev_bh_cb(void *p) -{ - IscsiAIOCB *acb = p; - - qemu_bh_delete(acb->bh); - - if (!acb->canceled) { - acb->common.cb(acb->common.opaque, acb->status); - } - - qemu_aio_release(acb); - - if (acb->canceled) { - return; - } - - scsi_free_scsi_task(acb->task); - acb->task = NULL; -} - - static void iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -208,8 +195,7 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status, g_free(acb->buf); - if (acb->canceled) { - qemu_aio_release(acb); + if (acb->canceled != 0) { return; } @@ -220,7 +206,7 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status, acb->status = -EIO; } - iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb); + iscsi_schedule_bh(acb); } static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun) @@ -242,13 +228,15 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, uint64_t lba; struct iscsi_data data; - acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque); + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb); acb->iscsilun = iscsilun; acb->qiov = qiov; acb->canceled = 0; + acb->bh = NULL; + acb->status = -EINPROGRESS; /* XXX we should pass the iovec to write16 to avoid the extra copy */ /* this will allow us to get rid of 'buf' completely */ @@ -268,10 +256,6 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, acb->task->xfer_dir = SCSI_XFER_WRITE; acb->task->cdb_size = 16; acb->task->cdb[0] = 0x8a; - if (!(bs->open_flags & BDRV_O_CACHE_WB)) { - /* set FUA on writes when cache mode is write through */ - acb->task->cdb[1] |= 0x04; - } lba = sector_qemu2lun(sector_num, iscsilun); *(uint32_t *)&acb->task->cdb[2] = htonl(lba >> 32); *(uint32_t *)&acb->task->cdb[6] = htonl(lba & 0xffffffff); @@ -305,8 +289,7 @@ iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status, trace_iscsi_aio_read16_cb(iscsi, status, acb, acb->canceled); - if (acb->canceled) { - qemu_aio_release(acb); + if (acb->canceled != 0) { return; } @@ -317,7 +300,7 @@ iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status, acb->status = -EIO; } - iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb); + iscsi_schedule_bh(acb); } static BlockDriverAIOCB * @@ -336,13 +319,15 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num, qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors; - acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque); + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb); acb->iscsilun = iscsilun; acb->qiov = qiov; acb->canceled = 0; + acb->bh = NULL; + acb->status = -EINPROGRESS; acb->read_size = qemu_read_size; acb->buf = NULL; @@ -389,7 +374,7 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num, *(uint16_t *)&acb->task->cdb[7] = htons(num_sectors); break; } - + if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task, iscsi_aio_read16_cb, NULL, @@ -417,8 +402,7 @@ iscsi_synccache10_cb(struct iscsi_context *iscsi, int status, { IscsiAIOCB *acb = opaque; - if (acb->canceled) { - qemu_aio_release(acb); + if (acb->canceled != 0) { return; } @@ -429,7 +413,7 @@ iscsi_synccache10_cb(struct iscsi_context *iscsi, int status, acb->status = -EIO; } - iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb); + iscsi_schedule_bh(acb); } static BlockDriverAIOCB * @@ -440,10 +424,12 @@ iscsi_aio_flush(BlockDriverState *bs, struct iscsi_context *iscsi = iscsilun->iscsi; IscsiAIOCB *acb; - acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque); + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); acb->iscsilun = iscsilun; acb->canceled = 0; + acb->bh = NULL; + acb->status = -EINPROGRESS; acb->task = iscsi_synchronizecache10_task(iscsi, iscsilun->lun, 0, 0, 0, 0, @@ -467,8 +453,7 @@ iscsi_unmap_cb(struct iscsi_context *iscsi, int status, { IscsiAIOCB *acb = opaque; - if (acb->canceled) { - qemu_aio_release(acb); + if (acb->canceled != 0) { return; } @@ -479,7 +464,7 @@ iscsi_unmap_cb(struct iscsi_context *iscsi, int status, acb->status = -EIO; } - iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb); + iscsi_schedule_bh(acb); } static BlockDriverAIOCB * @@ -492,10 +477,12 @@ iscsi_aio_discard(BlockDriverState *bs, IscsiAIOCB *acb; struct unmap_list list[1]; - acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque); + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); acb->iscsilun = iscsilun; acb->canceled = 0; + acb->bh = NULL; + acb->status = -EINPROGRESS; list[0].lba = sector_qemu2lun(sector_num, iscsilun); list[0].num = nb_sectors * BDRV_SECTOR_SIZE / iscsilun->block_size; @@ -523,8 +510,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, { IscsiAIOCB *acb = opaque; - if (acb->canceled) { - qemu_aio_release(acb); + if (acb->canceled != 0) { return; } @@ -552,7 +538,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss); } - iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb); + iscsi_schedule_bh(acb); } static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, @@ -566,10 +552,12 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, assert(req == SG_IO); - acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque); + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); acb->iscsilun = iscsilun; acb->canceled = 0; + acb->bh = NULL; + acb->status = -EINPROGRESS; acb->buf = NULL; acb->ioh = buf; @@ -624,9 +612,17 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, return &acb->common; } + +static void ioctl_cb(void *opaque, int status) +{ + int *p_status = opaque; + *p_status = status; +} + static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) { IscsiLun *iscsilun = bs->opaque; + int status; switch (req) { case SG_GET_VERSION_NUM: @@ -635,6 +631,15 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) case SG_GET_SCSI_ID: ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type; break; + case SG_IO: + status = -EINPROGRESS; + iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status); + + while (status == -EINPROGRESS) { + qemu_aio_wait(); + } + + return 0; default: return -1; } @@ -654,158 +659,6 @@ iscsi_getlength(BlockDriverState *bs) return len; } -static void -iscsi_readcapacity16_cb(struct iscsi_context *iscsi, int status, - void *command_data, void *opaque) -{ - struct IscsiTask *itask = opaque; - struct scsi_readcapacity16 *rc16; - struct scsi_task *task = command_data; - - if (status != 0) { - error_report("iSCSI: Failed to read capacity of iSCSI lun. %s", - iscsi_get_error(iscsi)); - itask->status = 1; - itask->complete = 1; - scsi_free_scsi_task(task); - return; - } - - rc16 = scsi_datain_unmarshall(task); - if (rc16 == NULL) { - error_report("iSCSI: Failed to unmarshall readcapacity16 data."); - itask->status = 1; - itask->complete = 1; - scsi_free_scsi_task(task); - return; - } - - itask->iscsilun->block_size = rc16->block_length; - itask->iscsilun->num_blocks = rc16->returned_lba + 1; - itask->bs->total_sectors = itask->iscsilun->num_blocks * - itask->iscsilun->block_size / BDRV_SECTOR_SIZE ; - - itask->status = 0; - itask->complete = 1; - scsi_free_scsi_task(task); -} - -static void -iscsi_readcapacity10_cb(struct iscsi_context *iscsi, int status, - void *command_data, void *opaque) -{ - struct IscsiTask *itask = opaque; - struct scsi_readcapacity10 *rc10; - struct scsi_task *task = command_data; - - if (status != 0) { - error_report("iSCSI: Failed to read capacity of iSCSI lun. %s", - iscsi_get_error(iscsi)); - itask->status = 1; - itask->complete = 1; - scsi_free_scsi_task(task); - return; - } - - rc10 = scsi_datain_unmarshall(task); - if (rc10 == NULL) { - error_report("iSCSI: Failed to unmarshall readcapacity10 data."); - itask->status = 1; - itask->complete = 1; - scsi_free_scsi_task(task); - return; - } - - itask->iscsilun->block_size = rc10->block_size; - itask->iscsilun->num_blocks = rc10->lba + 1; - itask->bs->total_sectors = itask->iscsilun->num_blocks * - itask->iscsilun->block_size / BDRV_SECTOR_SIZE ; - - itask->status = 0; - itask->complete = 1; - scsi_free_scsi_task(task); -} - -static void -iscsi_inquiry_cb(struct iscsi_context *iscsi, int status, void *command_data, - void *opaque) -{ - struct IscsiTask *itask = opaque; - struct scsi_task *task = command_data; - struct scsi_inquiry_standard *inq; - - if (status != 0) { - itask->status = 1; - itask->complete = 1; - scsi_free_scsi_task(task); - return; - } - - inq = scsi_datain_unmarshall(task); - if (inq == NULL) { - error_report("iSCSI: Failed to unmarshall inquiry data."); - itask->status = 1; - itask->complete = 1; - scsi_free_scsi_task(task); - return; - } - - itask->iscsilun->type = inq->periperal_device_type; - - scsi_free_scsi_task(task); - - switch (itask->iscsilun->type) { - case TYPE_DISK: - task = iscsi_readcapacity16_task(iscsi, itask->iscsilun->lun, - iscsi_readcapacity16_cb, opaque); - if (task == NULL) { - error_report("iSCSI: failed to send readcapacity16 command."); - itask->status = 1; - itask->complete = 1; - return; - } - break; - case TYPE_ROM: - task = iscsi_readcapacity10_task(iscsi, itask->iscsilun->lun, - 0, 0, - iscsi_readcapacity10_cb, opaque); - if (task == NULL) { - error_report("iSCSI: failed to send readcapacity16 command."); - itask->status = 1; - itask->complete = 1; - return; - } - break; - default: - itask->status = 0; - itask->complete = 1; - } -} - -static void -iscsi_connect_cb(struct iscsi_context *iscsi, int status, void *command_data, - void *opaque) -{ - struct IscsiTask *itask = opaque; - struct scsi_task *task; - - if (status != 0) { - itask->status = 1; - itask->complete = 1; - return; - } - - task = iscsi_inquiry_task(iscsi, itask->iscsilun->lun, - 0, 0, 36, - iscsi_inquiry_cb, opaque); - if (task == NULL) { - error_report("iSCSI: failed to send inquiry command."); - itask->status = 1; - itask->complete = 1; - return; - } -} - static int parse_chap(struct iscsi_context *iscsi, const char *target) { QemuOptsList *list; @@ -918,7 +771,10 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) IscsiLun *iscsilun = bs->opaque; struct iscsi_context *iscsi = NULL; struct iscsi_url *iscsi_url = NULL; - struct IscsiTask task; + struct scsi_task *task = NULL; + struct scsi_inquiry_standard *inq = NULL; + struct scsi_readcapacity10 *rc10 = NULL; + struct scsi_readcapacity16 *rc16 = NULL; char *initiator_name = NULL; int ret; @@ -931,8 +787,7 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) iscsi_url = iscsi_parse_full_url(iscsi, filename); if (iscsi_url == NULL) { - error_report("Failed to parse URL : %s %s", filename, - iscsi_get_error(iscsi)); + error_report("Failed to parse URL : %s", filename); ret = -EINVAL; goto out; } @@ -982,33 +837,80 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) /* check if we got HEADER_DIGEST via the options */ parse_header_digest(iscsi, iscsi_url->target); - task.iscsilun = iscsilun; - task.status = 0; - task.complete = 0; - task.bs = bs; + if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) { + error_report("iSCSI: Failed to connect to LUN : %s", + iscsi_get_error(iscsi)); + ret = -EINVAL; + goto out; + } iscsilun->iscsi = iscsi; iscsilun->lun = iscsi_url->lun; - if (iscsi_full_connect_async(iscsi, iscsi_url->portal, iscsi_url->lun, - iscsi_connect_cb, &task) - != 0) { - error_report("iSCSI: Failed to start async connect."); + task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 0, 0, 36); + + if (task == NULL || task->status != SCSI_STATUS_GOOD) { + error_report("iSCSI: failed to send inquiry command."); ret = -EINVAL; goto out; } - while (!task.complete) { - iscsi_set_events(iscsilun); - qemu_aio_wait(); - } - if (task.status != 0) { - error_report("iSCSI: Failed to connect to LUN : %s", - iscsi_get_error(iscsi)); + inq = scsi_datain_unmarshall(task); + if (inq == NULL) { + error_report("iSCSI: Failed to unmarshall inquiry data."); ret = -EINVAL; goto out; } + iscsilun->type = inq->periperal_device_type; + + scsi_free_scsi_task(task); + + switch (iscsilun->type) { + case TYPE_DISK: + task = iscsi_readcapacity16_sync(iscsi, iscsilun->lun); + if (task == NULL || task->status != SCSI_STATUS_GOOD) { + error_report("iSCSI: failed to send readcapacity16 command."); + ret = -EINVAL; + goto out; + } + rc16 = scsi_datain_unmarshall(task); + if (rc16 == NULL) { + error_report("iSCSI: Failed to unmarshall readcapacity16 data."); + ret = -EINVAL; + goto out; + } + iscsilun->block_size = rc16->block_length; + iscsilun->num_blocks = rc16->returned_lba + 1; + break; + case TYPE_ROM: + task = iscsi_readcapacity10_sync(iscsi, iscsilun->lun, 0, 0); + if (task == NULL || task->status != SCSI_STATUS_GOOD) { + error_report("iSCSI: failed to send readcapacity10 command."); + ret = -EINVAL; + goto out; + } + rc10 = scsi_datain_unmarshall(task); + if (rc10 == NULL) { + error_report("iSCSI: Failed to unmarshall readcapacity10 data."); + ret = -EINVAL; + goto out; + } + iscsilun->block_size = rc10->block_size; + if (rc10->lba == 0) { + /* blank disk loaded */ + iscsilun->num_blocks = 0; + } else { + iscsilun->num_blocks = rc10->lba + 1; + } + break; + default: + break; + } + + bs->total_sectors = iscsilun->num_blocks * + iscsilun->block_size / BDRV_SECTOR_SIZE ; + /* Medium changer or tape. We dont have any emulation for this so this must * be sg ioctl compatible. We force it to be sg, otherwise qemu will try * to read from the device to guess the image format. @@ -1027,6 +929,9 @@ out: if (iscsi_url != NULL) { iscsi_destroy_url(iscsi_url); } + if (task != NULL) { + scsi_free_scsi_task(task); + } if (ret) { if (iscsi != NULL) { @@ -1047,6 +952,11 @@ static void iscsi_close(BlockDriverState *bs) memset(iscsilun, 0, sizeof(IscsiLun)); } +static int iscsi_has_zero_init(BlockDriverState *bs) +{ + return 0; +} + static BlockDriver bdrv_iscsi = { .format_name = "iscsi", .protocol_name = "iscsi", @@ -1062,6 +972,7 @@ static BlockDriver bdrv_iscsi = { .bdrv_aio_flush = iscsi_aio_flush, .bdrv_aio_discard = iscsi_aio_discard, + .bdrv_has_zero_init = iscsi_has_zero_init, #ifdef __linux__ .bdrv_ioctl = iscsi_ioctl, diff --git a/block/linux-aio.c b/block/linux-aio.c new file mode 100644 index 0000000000..ee0f8d10c9 --- /dev/null +++ b/block/linux-aio.c @@ -0,0 +1,216 @@ +/* + * Linux native AIO support. + * + * Copyright (C) 2009 IBM, Corp. + * Copyright (C) 2009 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu-common.h" +#include "block/aio.h" +#include "qemu/queue.h" +#include "block/raw-aio.h" +#include "qemu/event_notifier.h" + +#include <libaio.h> + +/* + * Queue size (per-device). + * + * XXX: eventually we need to communicate this to the guest and/or make it + * tunable by the guest. If we get more outstanding requests at a time + * than this we will get EAGAIN from io_submit which is communicated to + * the guest as an I/O error. + */ +#define MAX_EVENTS 128 + +struct qemu_laiocb { + BlockDriverAIOCB common; + struct qemu_laio_state *ctx; + struct iocb iocb; + ssize_t ret; + size_t nbytes; + QEMUIOVector *qiov; + bool is_read; + QLIST_ENTRY(qemu_laiocb) node; +}; + +struct qemu_laio_state { + io_context_t ctx; + EventNotifier e; + int count; +}; + +static inline ssize_t io_event_ret(struct io_event *ev) +{ + return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res); +} + +/* + * Completes an AIO request (calls the callback and frees the ACB). + */ +static void qemu_laio_process_completion(struct qemu_laio_state *s, + struct qemu_laiocb *laiocb) +{ + int ret; + + s->count--; + + ret = laiocb->ret; + if (ret != -ECANCELED) { + if (ret == laiocb->nbytes) { + ret = 0; + } else if (ret >= 0) { + /* Short reads mean EOF, pad with zeros. */ + if (laiocb->is_read) { + qemu_iovec_memset(laiocb->qiov, ret, 0, + laiocb->qiov->size - ret); + } else { + ret = -EINVAL; + } + } + + laiocb->common.cb(laiocb->common.opaque, ret); + } + + qemu_aio_release(laiocb); +} + +static void qemu_laio_completion_cb(EventNotifier *e) +{ + struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e); + + while (event_notifier_test_and_clear(&s->e)) { + struct io_event events[MAX_EVENTS]; + struct timespec ts = { 0 }; + int nevents, i; + + do { + nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts); + } while (nevents == -EINTR); + + for (i = 0; i < nevents; i++) { + struct iocb *iocb = events[i].obj; + struct qemu_laiocb *laiocb = + container_of(iocb, struct qemu_laiocb, iocb); + + laiocb->ret = io_event_ret(&events[i]); + qemu_laio_process_completion(s, laiocb); + } + } +} + +static int qemu_laio_flush_cb(EventNotifier *e) +{ + struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e); + + return (s->count > 0) ? 1 : 0; +} + +static void laio_cancel(BlockDriverAIOCB *blockacb) +{ + struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb; + struct io_event event; + int ret; + + if (laiocb->ret != -EINPROGRESS) + return; + + /* + * Note that as of Linux 2.6.31 neither the block device code nor any + * filesystem implements cancellation of AIO request. + * Thus the polling loop below is the normal code path. + */ + ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event); + if (ret == 0) { + laiocb->ret = -ECANCELED; + return; + } + + /* + * We have to wait for the iocb to finish. + * + * The only way to get the iocb status update is by polling the io context. + * We might be able to do this slightly more optimal by removing the + * O_NONBLOCK flag. + */ + while (laiocb->ret == -EINPROGRESS) { + qemu_laio_completion_cb(&laiocb->ctx->e); + } +} + +static const AIOCBInfo laio_aiocb_info = { + .aiocb_size = sizeof(struct qemu_laiocb), + .cancel = laio_cancel, +}; + +BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type) +{ + struct qemu_laio_state *s = aio_ctx; + struct qemu_laiocb *laiocb; + struct iocb *iocbs; + off_t offset = sector_num * 512; + + laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque); + laiocb->nbytes = nb_sectors * 512; + laiocb->ctx = s; + laiocb->ret = -EINPROGRESS; + laiocb->is_read = (type == QEMU_AIO_READ); + laiocb->qiov = qiov; + + iocbs = &laiocb->iocb; + + switch (type) { + case QEMU_AIO_WRITE: + io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset); + break; + case QEMU_AIO_READ: + io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset); + break; + /* Currently Linux kernel does not support other operations */ + default: + fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", + __func__, type); + goto out_free_aiocb; + } + io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); + s->count++; + + if (io_submit(s->ctx, 1, &iocbs) < 0) + goto out_dec_count; + return &laiocb->common; + +out_dec_count: + s->count--; +out_free_aiocb: + qemu_aio_release(laiocb); + return NULL; +} + +void *laio_init(void) +{ + struct qemu_laio_state *s; + + s = g_malloc0(sizeof(*s)); + if (event_notifier_init(&s->e, false) < 0) { + goto out_free_state; + } + + if (io_setup(MAX_EVENTS, &s->ctx) != 0) { + goto out_close_efd; + } + + qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb, + qemu_laio_flush_cb); + + return s; + +out_close_efd: + event_notifier_cleanup(&s->e); +out_free_state: + g_free(s); + return NULL; +} diff --git a/block/mirror.c b/block/mirror.c new file mode 100644 index 0000000000..8aeacbf12c --- /dev/null +++ b/block/mirror.c @@ -0,0 +1,322 @@ +/* + * Image mirroring + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "block/blockjob.h" +#include "block/block_int.h" +#include "qemu/ratelimit.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */ +}; + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct MirrorBlockJob { + BlockJob common; + RateLimit limit; + BlockDriverState *target; + MirrorSyncMode mode; + BlockdevOnError on_source_error, on_target_error; + bool synced; + bool should_complete; + int64_t sector_num; + uint8_t *buf; +} MirrorBlockJob; + +static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, + int error) +{ + s->synced = false; + if (read) { + return block_job_error_action(&s->common, s->common.bs, + s->on_source_error, true, error); + } else { + return block_job_error_action(&s->common, s->target, + s->on_target_error, false, error); + } +} + +static int coroutine_fn mirror_iteration(MirrorBlockJob *s, + BlockErrorAction *p_action) +{ + BlockDriverState *source = s->common.bs; + BlockDriverState *target = s->target; + QEMUIOVector qiov; + int ret, nb_sectors; + int64_t end; + struct iovec iov; + + end = s->common.len >> BDRV_SECTOR_BITS; + s->sector_num = bdrv_get_next_dirty(source, s->sector_num); + nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num); + bdrv_reset_dirty(source, s->sector_num, nb_sectors); + + /* Copy the dirty cluster. */ + iov.iov_base = s->buf; + iov.iov_len = nb_sectors * 512; + qemu_iovec_init_external(&qiov, &iov, 1); + + trace_mirror_one_iteration(s, s->sector_num, nb_sectors); + ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); + if (ret < 0) { + *p_action = mirror_error_action(s, true, -ret); + goto fail; + } + ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); + if (ret < 0) { + *p_action = mirror_error_action(s, false, -ret); + s->synced = false; + goto fail; + } + return 0; + +fail: + /* Try again later. */ + bdrv_set_dirty(source, s->sector_num, nb_sectors); + return ret; +} + +static void coroutine_fn mirror_run(void *opaque) +{ + MirrorBlockJob *s = opaque; + BlockDriverState *bs = s->common.bs; + int64_t sector_num, end; + int ret = 0; + int n; + + if (block_job_is_cancelled(&s->common)) { + goto immediate_exit; + } + + s->common.len = bdrv_getlength(bs); + if (s->common.len < 0) { + block_job_completed(&s->common, s->common.len); + return; + } + + end = s->common.len >> BDRV_SECTOR_BITS; + s->buf = qemu_blockalign(bs, BLOCK_SIZE); + + if (s->mode != MIRROR_SYNC_MODE_NONE) { + /* First part, loop on the sectors and initialize the dirty bitmap. */ + BlockDriverState *base; + base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; + for (sector_num = 0; sector_num < end; ) { + int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; + ret = bdrv_co_is_allocated_above(bs, base, + sector_num, next - sector_num, &n); + + if (ret < 0) { + goto immediate_exit; + } + + assert(n > 0); + if (ret == 1) { + bdrv_set_dirty(bs, sector_num, n); + sector_num = next; + } else { + sector_num += n; + } + } + } + + s->sector_num = -1; + for (;;) { + uint64_t delay_ns; + int64_t cnt; + bool should_complete; + + cnt = bdrv_get_dirty_count(bs); + if (cnt != 0) { + BlockErrorAction action = BDRV_ACTION_REPORT; + ret = mirror_iteration(s, &action); + if (ret < 0 && action == BDRV_ACTION_REPORT) { + goto immediate_exit; + } + cnt = bdrv_get_dirty_count(bs); + } + + should_complete = false; + if (cnt == 0) { + trace_mirror_before_flush(s); + ret = bdrv_flush(s->target); + if (ret < 0) { + if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) { + goto immediate_exit; + } + } else { + /* We're out of the streaming phase. From now on, if the job + * is cancelled we will actually complete all pending I/O and + * report completion. This way, block-job-cancel will leave + * the target in a consistent state. + */ + s->common.offset = end * BDRV_SECTOR_SIZE; + if (!s->synced) { + block_job_ready(&s->common); + s->synced = true; + } + + should_complete = s->should_complete || + block_job_is_cancelled(&s->common); + cnt = bdrv_get_dirty_count(bs); + } + } + + if (cnt == 0 && should_complete) { + /* The dirty bitmap is not updated while operations are pending. + * If we're about to exit, wait for pending operations before + * calling bdrv_get_dirty_count(bs), or we may exit while the + * source has dirty data to copy! + * + * Note that I/O can be submitted by the guest while + * mirror_populate runs. + */ + trace_mirror_before_drain(s, cnt); + bdrv_drain_all(); + cnt = bdrv_get_dirty_count(bs); + } + + ret = 0; + trace_mirror_before_sleep(s, cnt, s->synced); + if (!s->synced) { + /* Publish progress */ + s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; + + if (s->common.speed) { + delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); + } else { + delay_ns = 0; + } + + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that bdrv_drain_all() returns. + */ + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + if (block_job_is_cancelled(&s->common)) { + break; + } + } else if (!should_complete) { + delay_ns = (cnt == 0 ? SLICE_TIME : 0); + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + } else if (cnt == 0) { + /* The two disks are in sync. Exit and report successful + * completion. + */ + assert(QLIST_EMPTY(&bs->tracked_requests)); + s->common.cancelled = false; + break; + } + } + +immediate_exit: + g_free(s->buf); + bdrv_set_dirty_tracking(bs, false); + bdrv_iostatus_disable(s->target); + if (s->should_complete && ret == 0) { + if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { + bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); + } + bdrv_swap(s->target, s->common.bs); + } + bdrv_close(s->target); + bdrv_delete(s->target); + block_job_completed(&s->common, ret); +} + +static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static void mirror_iostatus_reset(BlockJob *job) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + + bdrv_iostatus_reset(s->target); +} + +static void mirror_complete(BlockJob *job, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + int ret; + + ret = bdrv_open_backing_file(s->target); + if (ret < 0) { + char backing_filename[PATH_MAX]; + bdrv_get_full_backing_filename(s->target, backing_filename, + sizeof(backing_filename)); + error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); + return; + } + if (!s->synced) { + error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); + return; + } + + s->should_complete = true; + block_job_resume(job); +} + +static BlockJobType mirror_job_type = { + .instance_size = sizeof(MirrorBlockJob), + .job_type = "mirror", + .set_speed = mirror_set_speed, + .iostatus_reset= mirror_iostatus_reset, + .complete = mirror_complete, +}; + +void mirror_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, MirrorSyncMode mode, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + MirrorBlockJob *s; + + if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || + on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); + return; + } + + s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); + if (!s) { + return; + } + + s->on_source_error = on_source_error; + s->on_target_error = on_target_error; + s->target = target; + s->mode = mode; + bdrv_set_dirty_tracking(bs, true); + bdrv_set_enable_write_cache(s->target, true); + bdrv_set_on_error(s->target, on_target_error, on_target_error); + bdrv_iostatus_enable(s->target); + s->common.co = qemu_coroutine_create(mirror_run); + trace_mirror_start(bs, s, s->common.co, opaque); + qemu_coroutine_enter(s->common.co, s); +} diff --git a/block/nbd.c b/block/nbd.c index 2bce47bf7a..a5812948d2 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -27,10 +27,11 @@ */ #include "qemu-common.h" -#include "nbd.h" -#include "block_int.h" -#include "module.h" -#include "qemu_socket.h" +#include "block/nbd.h" +#include "qemu/uri.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "qemu/sockets.h" #include <sys/types.h> #include <unistd.h> @@ -55,7 +56,6 @@ typedef struct BDRVNBDState { uint32_t nbdflags; off_t size; size_t blocksize; - char *export_name; /* An NBD server may export several devices */ CoMutex send_mutex; CoMutex free_sema; @@ -65,13 +65,75 @@ typedef struct BDRVNBDState { Coroutine *recv_coroutine[MAX_NBD_REQUESTS]; struct nbd_reply reply; - /* If it begins with '/', this is a UNIX domain socket. Otherwise, - * it's a string of the form <hostname|ip4|\[ip6\]>:port - */ + int is_unix; char *host_spec; + char *export_name; /* An NBD server may export several devices */ } BDRVNBDState; -static int nbd_config(BDRVNBDState *s, const char *filename, int flags) +static int nbd_parse_uri(BDRVNBDState *s, const char *filename) +{ + URI *uri; + const char *p; + QueryParams *qp = NULL; + int ret = 0; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + /* transport */ + if (!strcmp(uri->scheme, "nbd")) { + s->is_unix = false; + } else if (!strcmp(uri->scheme, "nbd+tcp")) { + s->is_unix = false; + } else if (!strcmp(uri->scheme, "nbd+unix")) { + s->is_unix = true; + } else { + ret = -EINVAL; + goto out; + } + + p = uri->path ? uri->path : "/"; + p += strspn(p, "/"); + if (p[0]) { + s->export_name = g_strdup(p); + } + + qp = query_params_parse(uri->query); + if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) { + ret = -EINVAL; + goto out; + } + + if (s->is_unix) { + /* nbd+unix:///export?socket=path */ + if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) { + ret = -EINVAL; + goto out; + } + s->host_spec = g_strdup(qp->p[0].value); + } else { + /* nbd[+tcp]://host:port/export */ + if (!uri->server) { + ret = -EINVAL; + goto out; + } + if (!uri->port) { + uri->port = NBD_DEFAULT_PORT; + } + s->host_spec = g_strdup_printf("%s:%d", uri->server, uri->port); + } + +out: + if (qp) { + query_params_free(qp); + } + uri_free(uri); + return ret; +} + +static int nbd_config(BDRVNBDState *s, const char *filename) { char *file; char *export_name; @@ -79,6 +141,10 @@ static int nbd_config(BDRVNBDState *s, const char *filename, int flags) const char *unixpath; int err = -EINVAL; + if (strstr(filename, "://")) { + return nbd_parse_uri(s, filename); + } + file = g_strdup(filename); export_name = strstr(file, EN_OPTSTR); @@ -98,11 +164,10 @@ static int nbd_config(BDRVNBDState *s, const char *filename, int flags) /* are we a UNIX or TCP socket? */ if (strstart(host_spec, "unix:", &unixpath)) { - if (unixpath[0] != '/') { /* We demand an absolute path*/ - goto out; - } + s->is_unix = true; s->host_spec = g_strdup(unixpath); } else { + s->is_unix = false; s->host_spec = g_strdup(host_spec); } @@ -262,7 +327,7 @@ static int nbd_establish_connection(BlockDriverState *bs) off_t size; size_t blocksize; - if (s->host_spec[0] == '/') { + if (s->is_unix) { sock = unix_socket_outgoing(s->host_spec); } else { sock = tcp_socket_outgoing_spec(s->host_spec); @@ -320,7 +385,7 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) qemu_co_mutex_init(&s->free_sema); /* Pop the config into our state object. Exit if invalid. */ - result = nbd_config(s, filename, flags); + result = nbd_config(s, filename); if (result != 0) { return result; } @@ -498,6 +563,33 @@ static int64_t nbd_getlength(BlockDriverState *bs) static BlockDriver bdrv_nbd = { .format_name = "nbd", + .protocol_name = "nbd", + .instance_size = sizeof(BDRVNBDState), + .bdrv_file_open = nbd_open, + .bdrv_co_readv = nbd_co_readv, + .bdrv_co_writev = nbd_co_writev, + .bdrv_close = nbd_close, + .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_discard = nbd_co_discard, + .bdrv_getlength = nbd_getlength, +}; + +static BlockDriver bdrv_nbd_tcp = { + .format_name = "nbd", + .protocol_name = "nbd+tcp", + .instance_size = sizeof(BDRVNBDState), + .bdrv_file_open = nbd_open, + .bdrv_co_readv = nbd_co_readv, + .bdrv_co_writev = nbd_co_writev, + .bdrv_close = nbd_close, + .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_discard = nbd_co_discard, + .bdrv_getlength = nbd_getlength, +}; + +static BlockDriver bdrv_nbd_unix = { + .format_name = "nbd", + .protocol_name = "nbd+unix", .instance_size = sizeof(BDRVNBDState), .bdrv_file_open = nbd_open, .bdrv_co_readv = nbd_co_readv, @@ -506,12 +598,13 @@ static BlockDriver bdrv_nbd = { .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_discard = nbd_co_discard, .bdrv_getlength = nbd_getlength, - .protocol_name = "nbd", }; static void bdrv_nbd_init(void) { bdrv_register(&bdrv_nbd); + bdrv_register(&bdrv_nbd_tcp); + bdrv_register(&bdrv_nbd_unix); } block_init(bdrv_nbd_init); diff --git a/block/parallels.c b/block/parallels.c index d30f0ecf77..377375046f 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -24,8 +24,8 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" +#include "block/block_int.h" +#include "qemu/module.h" /**************************************************************/ diff --git a/block/qcow.c b/block/qcow.c index 7b5ab87d2d..4276610afd 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -22,11 +22,11 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" +#include "block/block_int.h" +#include "qemu/module.h" #include <zlib.h> -#include "aes.h" -#include "migration.h" +#include "block/aes.h" +#include "migration/migration.h" /**************************************************************/ /* QEMU COW block driver with compression and encryption support */ @@ -197,6 +197,15 @@ static int qcow_open(BlockDriverState *bs, int flags) return ret; } + +/* We have nothing to do for QCOW reopen, stubs just return + * success */ +static int qcow_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int qcow_set_key(BlockDriverState *bs, const char *key) { BDRVQcowState *s = bs->opaque; @@ -868,6 +877,7 @@ static BlockDriver bdrv_qcow = { .bdrv_probe = qcow_probe, .bdrv_open = qcow_open, .bdrv_close = qcow_close, + .bdrv_reopen_prepare = qcow_reopen_prepare, .bdrv_create = qcow_create, .bdrv_co_readv = qcow_co_readv, diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 2d4322a8dd..2f3114ecc2 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -22,7 +22,7 @@ * THE SOFTWARE. */ -#include "block_int.h" +#include "block/block_int.h" #include "qemu-common.h" #include "qcow2.h" #include "trace.h" diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index e179211c57..56fccf9487 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -25,7 +25,7 @@ #include <zlib.h> #include "qemu-common.h" -#include "block_int.h" +#include "block/block_int.h" #include "block/qcow2.h" #include "trace.h" @@ -615,57 +615,67 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return cluster_offset; } -int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) { BDRVQcowState *s = bs->opaque; - int i, j = 0, l2_index, ret; - uint64_t *old_cluster, start_sect, *l2_table; - uint64_t cluster_offset = m->alloc_offset; - bool cow = false; - - trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + int ret; - if (m->nb_clusters == 0) + if (r->nb_sectors == 0) { return 0; + } - old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); + qemu_co_mutex_unlock(&s->lock); + ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset, + r->offset / BDRV_SECTOR_SIZE, + r->offset / BDRV_SECTOR_SIZE + r->nb_sectors); + qemu_co_mutex_lock(&s->lock); - /* copy content of unmodified sectors */ - start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; - if (m->n_start) { - cow = true; - qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) - goto err; - } - - if (m->nb_available & (s->cluster_sectors - 1)) { - cow = true; - qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, start_sect, cluster_offset, m->nb_available, - align_offset(m->nb_available, s->cluster_sectors)); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) - goto err; + if (ret < 0) { + return ret; } /* - * Update L2 table. - * * Before we update the L2 table to actually point to the new cluster, we * need to be sure that the refcounts have been increased and COW was * handled. */ - if (cow) { - qcow2_cache_depends_on_flush(s->l2_table_cache); + qcow2_cache_depends_on_flush(s->l2_table_cache); + + return 0; +} + +int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +{ + BDRVQcowState *s = bs->opaque; + int i, j = 0, l2_index, ret; + uint64_t *old_cluster, *l2_table; + uint64_t cluster_offset = m->alloc_offset; + + trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + assert(m->nb_clusters > 0); + + old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); + + /* copy content of unmodified sectors */ + ret = perform_cow(bs, m, &m->cow_start); + if (ret < 0) { + goto err; + } + + ret = perform_cow(bs, m, &m->cow_end); + if (ret < 0) { + goto err; } + /* Update L2 table. */ + if (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS) { + qcow2_mark_dirty(bs); + } if (qcow2_need_accurate_refcounts(s)) { qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); } + ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); if (ret < 0) { goto err; @@ -743,38 +753,16 @@ out: } /* - * Allocates new clusters for the given guest_offset. - * - * At most *nb_clusters are allocated, and on return *nb_clusters is updated to - * contain the number of clusters that have been allocated and are contiguous - * in the image file. - * - * If *host_offset is non-zero, it specifies the offset in the image file at - * which the new clusters must start. *nb_clusters can be 0 on return in this - * case if the cluster at host_offset is already in use. If *host_offset is - * zero, the clusters can be allocated anywhere in the image file. - * - * *host_offset is updated to contain the offset into the image file at which - * the first allocated cluster starts. - * - * Return 0 on success and -errno in error cases. -EAGAIN means that the - * function has been waiting for another request and the allocation must be - * restarted, but the whole request should not be failed. + * Check if there already is an AIO write request in flight which allocates + * the same cluster. In this case we need to wait until the previous + * request has completed and updated the L2 table accordingly. */ -static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, - uint64_t *host_offset, unsigned int *nb_clusters) +static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, + unsigned int *nb_clusters) { BDRVQcowState *s = bs->opaque; QCowL2Meta *old_alloc; - trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, - *host_offset, *nb_clusters); - - /* - * Check if there already is an AIO write request in flight which allocates - * the same cluster. In this case we need to wait until the previous - * request has completed and updated the L2 table accordingly. - */ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { uint64_t start = guest_offset >> s->cluster_bits; @@ -807,6 +795,42 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, abort(); } + return 0; +} + +/* + * Allocates new clusters for the given guest_offset. + * + * At most *nb_clusters are allocated, and on return *nb_clusters is updated to + * contain the number of clusters that have been allocated and are contiguous + * in the image file. + * + * If *host_offset is non-zero, it specifies the offset in the image file at + * which the new clusters must start. *nb_clusters can be 0 on return in this + * case if the cluster at host_offset is already in use. If *host_offset is + * zero, the clusters can be allocated anywhere in the image file. + * + * *host_offset is updated to contain the offset into the image file at which + * the first allocated cluster starts. + * + * Return 0 on success and -errno in error cases. -EAGAIN means that the + * function has been waiting for another request and the allocation must be + * restarted, but the whole request should not be failed. + */ +static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, unsigned int *nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + int ret; + + trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, + *host_offset, *nb_clusters); + + ret = handle_dependencies(bs, guest_offset, nb_clusters); + if (ret < 0) { + return ret; + } + /* Allocate new clusters */ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); if (*host_offset == 0) { @@ -818,7 +842,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, *host_offset = cluster_offset; return 0; } else { - int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); + ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); if (ret < 0) { return ret; } @@ -847,7 +871,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, * Return 0 on success and -errno in error cases */ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m) + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m) { BDRVQcowState *s = bs->opaque; int l2_index, ret, sectors; @@ -919,12 +943,6 @@ again: } /* If there is something left to allocate, do that now */ - *m = (QCowL2Meta) { - .cluster_offset = cluster_offset, - .nb_clusters = 0, - }; - qemu_co_queue_init(&m->dependent_requests); - if (nb_clusters > 0) { uint64_t alloc_offset; uint64_t alloc_cluster_offset; @@ -957,22 +975,40 @@ again: * * avail_sectors: Number of sectors from the start of the first * newly allocated to the end of the last newly allocated cluster. + * + * nb_sectors: The number of sectors from the start of the first + * newly allocated cluster to the end of the aread that the write + * request actually writes to (excluding COW at the end) */ int requested_sectors = n_end - keep_clusters * s->cluster_sectors; int avail_sectors = nb_clusters << (s->cluster_bits - BDRV_SECTOR_BITS); + int alloc_n_start = keep_clusters == 0 ? n_start : 0; + int nb_sectors = MIN(requested_sectors, avail_sectors); + + if (keep_clusters == 0) { + cluster_offset = alloc_cluster_offset; + } + + *m = g_malloc0(sizeof(**m)); - *m = (QCowL2Meta) { - .cluster_offset = keep_clusters == 0 ? - alloc_cluster_offset : cluster_offset, + **m = (QCowL2Meta) { .alloc_offset = alloc_cluster_offset, - .offset = alloc_offset, - .n_start = keep_clusters == 0 ? n_start : 0, + .offset = alloc_offset & ~(s->cluster_size - 1), .nb_clusters = nb_clusters, - .nb_available = MIN(requested_sectors, avail_sectors), + .nb_available = nb_sectors, + + .cow_start = { + .offset = 0, + .nb_sectors = alloc_n_start, + }, + .cow_end = { + .offset = nb_sectors * BDRV_SECTOR_SIZE, + .nb_sectors = avail_sectors - nb_sectors, + }, }; - qemu_co_queue_init(&m->dependent_requests); - QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); + qemu_co_queue_init(&(*m)->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); } } @@ -984,12 +1020,13 @@ again: assert(sectors > n_start); *num = sectors - n_start; + *host_offset = cluster_offset; return 0; fail: - if (m->nb_clusters > 0) { - QLIST_REMOVE(m, next_in_flight); + if (*m && (*m)->nb_clusters > 0) { + QLIST_REMOVE(*m, next_in_flight); } return ret; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 5e3f9153fb..6a95aa6c92 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -23,7 +23,7 @@ */ #include "qemu-common.h" -#include "block_int.h" +#include "block/block_int.h" #include "block/qcow2.h" static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size); @@ -301,7 +301,8 @@ static int alloc_refcount_block(BlockDriverState *bs, uint64_t last_table_size; uint64_t blocks_clusters; do { - uint64_t table_clusters = size_to_clusters(s, table_size); + uint64_t table_clusters = + size_to_clusters(s, table_size * sizeof(uint64_t)); blocks_clusters = 1 + ((table_clusters + refcount_block_clusters - 1) / refcount_block_clusters); diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 4e7c93b8b3..eb8fcd5549 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -23,7 +23,7 @@ */ #include "qemu-common.h" -#include "block_int.h" +#include "block/block_int.h" #include "block/qcow2.h" typedef struct QEMU_PACKED QCowSnapshotHeader { diff --git a/block/qcow2.c b/block/qcow2.c index 8f183f1465..d603f98a9c 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -22,13 +22,13 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" +#include "block/block_int.h" +#include "qemu/module.h" #include <zlib.h> -#include "aes.h" +#include "block/aes.h" #include "block/qcow2.h" -#include "qemu-error.h" -#include "qerror.h" +#include "qemu/error-report.h" +#include "qapi/qmp/qerror.h" #include "trace.h" /* @@ -52,6 +52,7 @@ typedef struct { uint32_t magic; uint32_t len; } QCowExtension; + #define QCOW2_EXT_MAGIC_END 0 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 @@ -221,7 +222,7 @@ static void report_unsupported_feature(BlockDriverState *bs, * updated successfully. Therefore it is not required to check the return * value of this function. */ -static int qcow2_mark_dirty(BlockDriverState *bs) +int qcow2_mark_dirty(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; uint64_t val; @@ -558,6 +559,14 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key) return 0; } +/* We have nothing to do for QCOW2 reopen, stubs just return + * success */ +static int qcow2_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -736,21 +745,6 @@ fail: return ret; } -static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m) -{ - /* Take the request off the list of running requests */ - if (m->nb_clusters != 0) { - QLIST_REMOVE(m, next_in_flight); - } - - /* Restart all dependent requests */ - if (!qemu_co_queue_empty(&m->dependent_requests)) { - qemu_co_mutex_unlock(&s->lock); - qemu_co_queue_restart_all(&m->dependent_requests); - qemu_co_mutex_lock(&s->lock); - } -} - static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, int64_t sector_num, int remaining_sectors, @@ -765,15 +759,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, QEMUIOVector hd_qiov; uint64_t bytes_done = 0; uint8_t *cluster_data = NULL; - QCowL2Meta l2meta = { - .nb_clusters = 0, - }; + QCowL2Meta *l2meta; trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, remaining_sectors); - qemu_co_queue_init(&l2meta.dependent_requests); - qemu_iovec_init(&hd_qiov, qiov->niov); s->cluster_cache_offset = -1; /* disable compressed cache */ @@ -782,6 +772,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, while (remaining_sectors != 0) { + l2meta = NULL; + trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num & (s->cluster_sectors - 1); n_end = index_in_cluster + remaining_sectors; @@ -791,17 +783,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, - index_in_cluster, n_end, &cur_nr_sectors, &l2meta); + index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta); if (ret < 0) { goto fail; } - if (l2meta.nb_clusters > 0 && - (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) { - qcow2_mark_dirty(bs); - } - - cluster_offset = l2meta.cluster_offset; assert((cluster_offset & 511) == 0); qemu_iovec_reset(&hd_qiov); @@ -826,8 +812,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, cur_nr_sectors * 512); } - BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); qemu_co_mutex_unlock(&s->lock); + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); trace_qcow2_writev_data(qemu_coroutine_self(), (cluster_offset >> 9) + index_in_cluster); ret = bdrv_co_writev(bs->file, @@ -838,12 +824,24 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } - ret = qcow2_alloc_cluster_link_l2(bs, &l2meta); - if (ret < 0) { - goto fail; - } + if (l2meta != NULL) { + ret = qcow2_alloc_cluster_link_l2(bs, l2meta); + if (ret < 0) { + goto fail; + } + + /* Take the request off the list of running requests */ + if (l2meta->nb_clusters != 0) { + QLIST_REMOVE(l2meta, next_in_flight); + } - run_dependent_requests(s, &l2meta); + qemu_co_mutex_unlock(&s->lock); + qemu_co_queue_restart_all(&l2meta->dependent_requests); + qemu_co_mutex_lock(&s->lock); + + g_free(l2meta); + l2meta = NULL; + } remaining_sectors -= cur_nr_sectors; sector_num += cur_nr_sectors; @@ -853,10 +851,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, ret = 0; fail: - run_dependent_requests(s, &l2meta); - qemu_co_mutex_unlock(&s->lock); + if (l2meta != NULL) { + if (l2meta->nb_clusters != 0) { + QLIST_REMOVE(l2meta, next_in_flight); + } + qemu_co_queue_restart_all(&l2meta->dependent_requests); + g_free(l2meta); + } + qemu_iovec_destroy(&hd_qiov); qemu_vfree(cluster_data); trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); @@ -1087,6 +1091,7 @@ int qcow2_update_header(BlockDriverState *bs) goto fail; } + /* Using strncpy is ok here, since buf is not NUL-terminated. */ strncpy(buf, bs->backing_file, buflen); header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); @@ -1118,31 +1123,33 @@ static int preallocate(BlockDriverState *bs) { uint64_t nb_sectors; uint64_t offset; + uint64_t host_offset = 0; int num; int ret; - QCowL2Meta meta; + QCowL2Meta *meta; nb_sectors = bdrv_getlength(bs) >> 9; offset = 0; - qemu_co_queue_init(&meta.dependent_requests); - meta.cluster_offset = 0; while (nb_sectors) { num = MIN(nb_sectors, INT_MAX >> 9); - ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta); + ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, + &host_offset, &meta); if (ret < 0) { return ret; } - ret = qcow2_alloc_cluster_link_l2(bs, &meta); + ret = qcow2_alloc_cluster_link_l2(bs, meta); if (ret < 0) { - qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters); + qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters); return ret; } /* There are no dependent requests, but we need to remove our request * from the list of in-flight requests */ - run_dependent_requests(bs->opaque, &meta); + if (meta != NULL) { + QLIST_REMOVE(meta, next_in_flight); + } /* TODO Preallocate data if requested */ @@ -1155,10 +1162,10 @@ static int preallocate(BlockDriverState *bs) * all of the allocated clusters (otherwise we get failing reads after * EOF). Extend the image to the last allocated sector. */ - if (meta.cluster_offset != 0) { + if (host_offset != 0) { uint8_t buf[512]; memset(buf, 0, 512); - ret = bdrv_write(bs->file, (meta.cluster_offset >> 9) + num - 1, buf, 1); + ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1); if (ret < 0) { return ret; } @@ -1679,6 +1686,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_probe = qcow2_probe, .bdrv_open = qcow2_open, .bdrv_close = qcow2_close, + .bdrv_reopen_prepare = qcow2_reopen_prepare, .bdrv_create = qcow2_create, .bdrv_co_is_allocated = qcow2_co_is_allocated, .bdrv_set_key = qcow2_set_key, diff --git a/block/qcow2.h b/block/qcow2.h index b4eb65470e..718b52baca 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -25,8 +25,8 @@ #ifndef BLOCK_QCOW2_H #define BLOCK_QCOW2_H -#include "aes.h" -#include "qemu-coroutine.h" +#include "block/aes.h" +#include "block/coroutine.h" //#define DEBUG_ALLOC //#define DEBUG_ALLOC2 @@ -196,17 +196,56 @@ typedef struct QCowCreateState { struct QCowAIOCB; -/* XXX This could be private for qcow2-cluster.c */ +typedef struct Qcow2COWRegion { + /** + * Offset of the COW region in bytes from the start of the first cluster + * touched by the request. + */ + uint64_t offset; + + /** Number of sectors to copy */ + int nb_sectors; +} Qcow2COWRegion; + +/** + * Describes an in-flight (part of a) write request that writes to clusters + * that are not referenced in their L2 table yet. + */ typedef struct QCowL2Meta { + /** Guest offset of the first newly allocated cluster */ uint64_t offset; - uint64_t cluster_offset; + + /** Host offset of the first newly allocated cluster */ uint64_t alloc_offset; - int n_start; + + /** + * Number of sectors from the start of the first allocated cluster to + * the end of the (possibly shortened) request + */ int nb_available; + + /** Number of newly allocated clusters */ int nb_clusters; + + /** + * Requests that overlap with this allocation and wait to be restarted + * when the allocating request has completed. + */ CoQueue dependent_requests; + /** + * The COW Region between the start of the first allocated cluster and the + * area the guest actually writes to. + */ + Qcow2COWRegion cow_start; + + /** + * The COW Region between the area the guest actually writes to and the + * end of the last allocated cluster. + */ + Qcow2COWRegion cow_end; + QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; @@ -264,6 +303,8 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s) /* qcow2.c functions */ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); + +int qcow2_mark_dirty(BlockDriverState *bs); int qcow2_update_header(BlockDriverState *bs); /* qcow2-refcount.c functions */ @@ -297,7 +338,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int *num, uint64_t *cluster_offset); int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m); + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m); uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset, int compressed_size); diff --git a/block/qed-table.c b/block/qed-table.c index ce07b05549..76d2dcccf8 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -13,7 +13,7 @@ */ #include "trace.h" -#include "qemu_socket.h" /* for EINPROGRESS on Windows */ +#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ #include "qed.h" typedef struct { @@ -103,7 +103,6 @@ static void qed_write_table_cb(void *opaque, int ret) out: qemu_vfree(write_table_cb->table); gencb_complete(&write_table_cb->gencb, ret); - return; } /** diff --git a/block/qed.c b/block/qed.c index a02dbfd72d..cf85d8f2b4 100644 --- a/block/qed.c +++ b/block/qed.c @@ -12,11 +12,11 @@ * */ -#include "qemu-timer.h" +#include "qemu/timer.h" #include "trace.h" #include "qed.h" -#include "qerror.h" -#include "migration.h" +#include "qapi/qmp/qerror.h" +#include "migration/migration.h" static void qed_aio_cancel(BlockDriverAIOCB *blockacb) { @@ -30,7 +30,7 @@ static void qed_aio_cancel(BlockDriverAIOCB *blockacb) } } -static AIOPool qed_aio_pool = { +static const AIOCBInfo qed_aiocb_info = { .aiocb_size = sizeof(QEDAIOCB), .cancel = qed_aio_cancel, }; @@ -505,6 +505,14 @@ out: return ret; } +/* We have nothing to do for QED reopen, stubs just return + * success */ +static int bdrv_qed_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static void bdrv_qed_close(BlockDriverState *bs) { BDRVQEDState *s = bs->opaque; @@ -1303,7 +1311,7 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque, int flags) { - QEDAIOCB *acb = qemu_aio_get(&qed_aio_pool, bs, cb, opaque); + QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque); trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, opaque, flags); @@ -1363,10 +1371,21 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs, int nb_sectors) { BlockDriverAIOCB *blockacb; + BDRVQEDState *s = bs->opaque; QEDWriteZeroesCB cb = { .done = false }; QEMUIOVector qiov; struct iovec iov; + /* Refuse if there are untouched backing file sectors */ + if (bs->backing_hd) { + if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) { + return -ENOTSUP; + } + if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) { + return -ENOTSUP; + } + } + /* Zero writes start without an I/O buffer. If a buffer becomes necessary * then it will be allocated during request processing. */ @@ -1553,6 +1572,7 @@ static BlockDriver bdrv_qed = { .bdrv_rebind = bdrv_qed_rebind, .bdrv_open = bdrv_qed_open, .bdrv_close = bdrv_qed_close, + .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, .bdrv_create = bdrv_qed_create, .bdrv_co_is_allocated = bdrv_qed_co_is_allocated, .bdrv_make_empty = bdrv_qed_make_empty, diff --git a/block/qed.h b/block/qed.h index a063bf70af..2b4ddedf31 100644 --- a/block/qed.h +++ b/block/qed.h @@ -15,7 +15,7 @@ #ifndef BLOCK_QED_H #define BLOCK_QED_H -#include "block_int.h" +#include "block/block_int.h" /* The layout of a QED file is as follows: * diff --git a/block/raw-posix-aio.h b/block/raw-aio.h index ba118f616b..e77f361148 100644 --- a/block/raw-posix-aio.h +++ b/block/raw-aio.h @@ -1,5 +1,5 @@ /* - * QEMU Posix block I/O backend AIO support + * Declarations for AIO in the raw protocol * * Copyright IBM, Corp. 2008 * @@ -12,8 +12,8 @@ * Contributions after 2012-01-13 are licensed under the terms of the * GNU GPL, version 2 or (at your option) any later version. */ -#ifndef QEMU_RAW_POSIX_AIO_H -#define QEMU_RAW_POSIX_AIO_H +#ifndef QEMU_RAW_AIO_H +#define QEMU_RAW_AIO_H /* AIO request types */ #define QEMU_AIO_READ 0x0001 @@ -27,19 +27,22 @@ #define QEMU_AIO_MISALIGNED 0x1000 -/* posix-aio-compat.c - thread pool based implementation */ -int paio_init(void); -BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque, int type); -BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, - unsigned long int req, void *buf, - BlockDriverCompletionFunc *cb, void *opaque); - /* linux-aio.c - Linux native implementation */ +#ifdef CONFIG_LINUX_AIO void *laio_init(void); BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int type); +#endif + +#ifdef _WIN32 +typedef struct QEMUWin32AIOState QEMUWin32AIOState; +QEMUWin32AIOState *win32_aio_init(void); +int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile); +BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, + QEMUWin32AIOState *aio, HANDLE hfile, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type); +#endif -#endif /* QEMU_RAW_POSIX_AIO_H */ +#endif /* QEMU_RAW_AIO_H */ diff --git a/block/raw-posix.c b/block/raw-posix.c index 6be20b1925..87d888ed01 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -22,12 +22,14 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "qemu-timer.h" -#include "qemu-char.h" -#include "qemu-log.h" -#include "block_int.h" -#include "module.h" -#include "block/raw-posix-aio.h" +#include "qemu/timer.h" +#include "qemu/log.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "trace.h" +#include "block/thread-pool.h" +#include "qemu/iov.h" +#include "raw-aio.h" #if defined(__APPLE__) && (__MACH__) #include <paths.h> @@ -133,16 +135,36 @@ typedef struct BDRVRawState { int use_aio; void *aio_ctx; #endif - uint8_t *aligned_buf; - unsigned aligned_buf_size; #ifdef CONFIG_XFS bool is_xfs : 1; #endif } BDRVRawState; +typedef struct BDRVRawReopenState { + int fd; + int open_flags; +#ifdef CONFIG_LINUX_AIO + int use_aio; +#endif +} BDRVRawReopenState; + static int fd_open(BlockDriverState *bs); static int64_t raw_getlength(BlockDriverState *bs); +typedef struct RawPosixAIOData { + BlockDriverState *bs; + int aio_fildes; + union { + struct iovec *aio_iov; + void *aio_ioctl_buf; + }; + int aio_niov; + size_t aio_nbytes; +#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ + off_t aio_offset; + int aio_type; +} RawPosixAIOData; + #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) static int cdrom_reopen(BlockDriverState *bs); #endif @@ -185,6 +207,57 @@ static int raw_normalize_devicepath(const char **filename) } #endif +static void raw_parse_flags(int bdrv_flags, int *open_flags) +{ + assert(open_flags != NULL); + + *open_flags |= O_BINARY; + *open_flags &= ~O_ACCMODE; + if (bdrv_flags & BDRV_O_RDWR) { + *open_flags |= O_RDWR; + } else { + *open_flags |= O_RDONLY; + } + + /* Use O_DSYNC for write-through caching, no flags for write-back caching, + * and O_DIRECT for no caching. */ + if ((bdrv_flags & BDRV_O_NOCACHE)) { + *open_flags |= O_DIRECT; + } +} + +#ifdef CONFIG_LINUX_AIO +static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags) +{ + int ret = -1; + assert(aio_ctx != NULL); + assert(use_aio != NULL); + /* + * Currently Linux do AIO only for files opened with O_DIRECT + * specified so check NOCACHE flag too + */ + if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) == + (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) { + + /* if non-NULL, laio_init() has already been run */ + if (*aio_ctx == NULL) { + *aio_ctx = laio_init(); + if (!*aio_ctx) { + goto error; + } + } + *use_aio = 1; + } else { + *use_aio = 0; + } + + ret = 0; + +error: + return ret; +} +#endif + static int raw_open_common(BlockDriverState *bs, const char *filename, int bdrv_flags, int open_flags) { @@ -196,20 +269,8 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, return ret; } - s->open_flags = open_flags | O_BINARY; - s->open_flags &= ~O_ACCMODE; - if (bdrv_flags & BDRV_O_RDWR) { - s->open_flags |= O_RDWR; - } else { - s->open_flags |= O_RDONLY; - } - - /* Use O_DSYNC for write-through caching, no flags for write-back caching, - * and O_DIRECT for no caching. */ - if ((bdrv_flags & BDRV_O_NOCACHE)) - s->open_flags |= O_DIRECT; - if (!(bdrv_flags & BDRV_O_CACHE_WB)) - s->open_flags |= O_DSYNC; + s->open_flags = open_flags; + raw_parse_flags(bdrv_flags, &s->open_flags); s->fd = -1; fd = qemu_open(filename, s->open_flags, 0644); @@ -220,45 +281,13 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, return ret; } s->fd = fd; - s->aligned_buf = NULL; - - if ((bdrv_flags & BDRV_O_NOCACHE)) { - /* - * Allocate a buffer for read/modify/write cycles. Chose the size - * pessimistically as we don't know the block size yet. - */ - s->aligned_buf_size = 32 * MAX_BLOCKSIZE; - s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size); - if (s->aligned_buf == NULL) { - goto out_close; - } - } - - /* We're falling back to POSIX AIO in some cases so init always */ - if (paio_init() < 0) { - goto out_free_buf; - } - -#ifdef CONFIG_LINUX_AIO - /* - * Currently Linux do AIO only for files opened with O_DIRECT - * specified so check NOCACHE flag too - */ - if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) == - (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) { - s->aio_ctx = laio_init(); - if (!s->aio_ctx) { - goto out_free_buf; - } - s->use_aio = 1; - } else -#endif - { #ifdef CONFIG_LINUX_AIO - s->use_aio = 0; -#endif + if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) { + qemu_close(fd); + return -errno; } +#endif #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { @@ -267,12 +296,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, #endif return 0; - -out_free_buf: - qemu_vfree(s->aligned_buf); -out_close: - qemu_close(fd); - return -errno; } static int raw_open(BlockDriverState *bs, const char *filename, int flags) @@ -283,6 +306,113 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) return raw_open_common(bs, filename, flags, 0); } +static int raw_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVRawState *s; + BDRVRawReopenState *raw_s; + int ret = 0; + + assert(state != NULL); + assert(state->bs != NULL); + + s = state->bs->opaque; + + state->opaque = g_malloc0(sizeof(BDRVRawReopenState)); + raw_s = state->opaque; + +#ifdef CONFIG_LINUX_AIO + raw_s->use_aio = s->use_aio; + + /* we can use s->aio_ctx instead of a copy, because the use_aio flag is + * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio() + * won't override aio_ctx if aio_ctx is non-NULL */ + if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) { + return -1; + } +#endif + + if (s->type == FTYPE_FD || s->type == FTYPE_CD) { + raw_s->open_flags |= O_NONBLOCK; + } + + raw_parse_flags(state->flags, &raw_s->open_flags); + + raw_s->fd = -1; + + int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK; +#ifdef O_NOATIME + fcntl_flags |= O_NOATIME; +#endif + + if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) { + /* dup the original fd */ + /* TODO: use qemu fcntl wrapper */ +#ifdef F_DUPFD_CLOEXEC + raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0); +#else + raw_s->fd = dup(s->fd); + if (raw_s->fd != -1) { + qemu_set_cloexec(raw_s->fd); + } +#endif + if (raw_s->fd >= 0) { + ret = fcntl_setfl(raw_s->fd, raw_s->open_flags); + if (ret) { + qemu_close(raw_s->fd); + raw_s->fd = -1; + } + } + } + + /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */ + if (raw_s->fd == -1) { + assert(!(raw_s->open_flags & O_CREAT)); + raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags); + if (raw_s->fd == -1) { + ret = -1; + } + } + return ret; +} + + +static void raw_reopen_commit(BDRVReopenState *state) +{ + BDRVRawReopenState *raw_s = state->opaque; + BDRVRawState *s = state->bs->opaque; + + s->open_flags = raw_s->open_flags; + + qemu_close(s->fd); + s->fd = raw_s->fd; +#ifdef CONFIG_LINUX_AIO + s->use_aio = raw_s->use_aio; +#endif + + g_free(state->opaque); + state->opaque = NULL; +} + + +static void raw_reopen_abort(BDRVReopenState *state) +{ + BDRVRawReopenState *raw_s = state->opaque; + + /* nothing to do if NULL, we didn't get far enough */ + if (raw_s == NULL) { + return; + } + + if (raw_s->fd >= 0) { + qemu_close(raw_s->fd); + raw_s->fd = -1; + } + g_free(state->opaque); + state->opaque = NULL; +} + + /* XXX: use host sector size if necessary with: #ifdef DIOCGSECTORSIZE { @@ -316,6 +446,267 @@ static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) return 1; } +static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) +{ + int ret; + + ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf); + if (ret == -1) { + return -errno; + } + + /* + * This looks weird, but the aio code only considers a request + * successful if it has written the full number of bytes. + * + * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command, + * so in fact we return the ioctl command here to make posix_aio_read() + * happy.. + */ + return aiocb->aio_nbytes; +} + +static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb) +{ + int ret; + + ret = qemu_fdatasync(aiocb->aio_fildes); + if (ret == -1) { + return -errno; + } + return 0; +} + +#ifdef CONFIG_PREADV + +static bool preadv_present = true; + +static ssize_t +qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return preadv(fd, iov, nr_iov, offset); +} + +static ssize_t +qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return pwritev(fd, iov, nr_iov, offset); +} + +#else + +static bool preadv_present = false; + +static ssize_t +qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return -ENOSYS; +} + +static ssize_t +qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return -ENOSYS; +} + +#endif + +static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb) +{ + ssize_t len; + + do { + if (aiocb->aio_type & QEMU_AIO_WRITE) + len = qemu_pwritev(aiocb->aio_fildes, + aiocb->aio_iov, + aiocb->aio_niov, + aiocb->aio_offset); + else + len = qemu_preadv(aiocb->aio_fildes, + aiocb->aio_iov, + aiocb->aio_niov, + aiocb->aio_offset); + } while (len == -1 && errno == EINTR); + + if (len == -1) { + return -errno; + } + return len; +} + +/* + * Read/writes the data to/from a given linear buffer. + * + * Returns the number of bytes handles or -errno in case of an error. Short + * reads are only returned if the end of the file is reached. + */ +static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf) +{ + ssize_t offset = 0; + ssize_t len; + + while (offset < aiocb->aio_nbytes) { + if (aiocb->aio_type & QEMU_AIO_WRITE) { + len = pwrite(aiocb->aio_fildes, + (const char *)buf + offset, + aiocb->aio_nbytes - offset, + aiocb->aio_offset + offset); + } else { + len = pread(aiocb->aio_fildes, + buf + offset, + aiocb->aio_nbytes - offset, + aiocb->aio_offset + offset); + } + if (len == -1 && errno == EINTR) { + continue; + } else if (len == -1) { + offset = -errno; + break; + } else if (len == 0) { + break; + } + offset += len; + } + + return offset; +} + +static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) +{ + ssize_t nbytes; + char *buf; + + if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) { + /* + * If there is just a single buffer, and it is properly aligned + * we can just use plain pread/pwrite without any problems. + */ + if (aiocb->aio_niov == 1) { + return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base); + } + /* + * We have more than one iovec, and all are properly aligned. + * + * Try preadv/pwritev first and fall back to linearizing the + * buffer if it's not supported. + */ + if (preadv_present) { + nbytes = handle_aiocb_rw_vector(aiocb); + if (nbytes == aiocb->aio_nbytes || + (nbytes < 0 && nbytes != -ENOSYS)) { + return nbytes; + } + preadv_present = false; + } + + /* + * XXX(hch): short read/write. no easy way to handle the reminder + * using these interfaces. For now retry using plain + * pread/pwrite? + */ + } + + /* + * Ok, we have to do it the hard way, copy all segments into + * a single aligned buffer. + */ + buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes); + if (aiocb->aio_type & QEMU_AIO_WRITE) { + char *p = buf; + int i; + + for (i = 0; i < aiocb->aio_niov; ++i) { + memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len); + p += aiocb->aio_iov[i].iov_len; + } + } + + nbytes = handle_aiocb_rw_linear(aiocb, buf); + if (!(aiocb->aio_type & QEMU_AIO_WRITE)) { + char *p = buf; + size_t count = aiocb->aio_nbytes, copy; + int i; + + for (i = 0; i < aiocb->aio_niov && count; ++i) { + copy = count; + if (copy > aiocb->aio_iov[i].iov_len) { + copy = aiocb->aio_iov[i].iov_len; + } + memcpy(aiocb->aio_iov[i].iov_base, p, copy); + p += copy; + count -= copy; + } + } + qemu_vfree(buf); + + return nbytes; +} + +static int aio_worker(void *arg) +{ + RawPosixAIOData *aiocb = arg; + ssize_t ret = 0; + + switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { + case QEMU_AIO_READ: + ret = handle_aiocb_rw(aiocb); + if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) { + iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret, + 0, aiocb->aio_nbytes - ret); + + ret = aiocb->aio_nbytes; + } + if (ret == aiocb->aio_nbytes) { + ret = 0; + } else if (ret >= 0 && ret < aiocb->aio_nbytes) { + ret = -EINVAL; + } + break; + case QEMU_AIO_WRITE: + ret = handle_aiocb_rw(aiocb); + if (ret == aiocb->aio_nbytes) { + ret = 0; + } else if (ret >= 0 && ret < aiocb->aio_nbytes) { + ret = -EINVAL; + } + break; + case QEMU_AIO_FLUSH: + ret = handle_aiocb_flush(aiocb); + break; + case QEMU_AIO_IOCTL: + ret = handle_aiocb_ioctl(aiocb); + break; + default: + fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); + ret = -EINVAL; + break; + } + + g_slice_free(RawPosixAIOData, aiocb); + return ret; +} + +static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type) +{ + RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); + + acb->bs = bs; + acb->aio_type = type; + acb->aio_fildes = fd; + + if (qiov) { + acb->aio_iov = qiov->iov; + acb->aio_niov = qiov->niov; + } + acb->aio_nbytes = nb_sectors * 512; + acb->aio_offset = sector_num * 512; + + trace_paio_submit(acb, opaque, sector_num, nb_sectors, type); + return thread_pool_submit_aio(aio_worker, acb, cb, opaque); +} + static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int type) @@ -330,7 +721,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, * boundary. Check if this is the case or tell the low-level * driver that it needs to copy the buffer. */ - if (s->aligned_buf) { + if ((bs->open_flags & BDRV_O_NOCACHE)) { if (!qiov_is_aligned(bs, qiov)) { type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO @@ -378,8 +769,6 @@ static void raw_close(BlockDriverState *bs) if (s->fd >= 0) { qemu_close(s->fd); s->fd = -1; - if (s->aligned_buf != NULL) - qemu_vfree(s->aligned_buf); } } @@ -735,6 +1124,9 @@ static BlockDriver bdrv_file = { .instance_size = sizeof(BDRVRawState), .bdrv_probe = NULL, /* no probe for protocols */ .bdrv_file_open = raw_open, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, .bdrv_close = raw_close, .bdrv_create = raw_create, .bdrv_co_discard = raw_co_discard, @@ -937,10 +1329,19 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; + RawPosixAIOData *acb; if (fd_open(bs) < 0) return NULL; - return paio_ioctl(bs, s->fd, req, buf, cb, opaque); + + acb = g_slice_new(RawPosixAIOData); + acb->bs = bs; + acb->aio_type = QEMU_AIO_IOCTL; + acb->aio_fildes = s->fd; + acb->aio_offset = 0; + acb->aio_ioctl_buf = buf; + acb->aio_ioctl_cmd = req; + return thread_pool_submit_aio(aio_worker, acb, cb, opaque); } #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) @@ -1004,6 +1405,9 @@ static BlockDriver bdrv_host_device = { .bdrv_probe_device = hdev_probe_device, .bdrv_file_open = hdev_open, .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, .bdrv_create = hdev_create, .create_options = raw_create_options, .bdrv_has_zero_init = hdev_has_zero_init, @@ -1125,6 +1529,9 @@ static BlockDriver bdrv_host_floppy = { .bdrv_probe_device = floppy_probe_device, .bdrv_file_open = floppy_open, .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, .bdrv_create = hdev_create, .create_options = raw_create_options, .bdrv_has_zero_init = hdev_has_zero_init, @@ -1224,6 +1631,9 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_probe_device = cdrom_probe_device, .bdrv_file_open = cdrom_open, .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, .bdrv_create = hdev_create, .create_options = raw_create_options, .bdrv_has_zero_init = hdev_has_zero_init, @@ -1343,6 +1753,9 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_probe_device = cdrom_probe_device, .bdrv_file_open = cdrom_open, .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, .bdrv_create = hdev_create, .create_options = raw_create_options, .bdrv_has_zero_init = hdev_has_zero_init, @@ -1363,6 +1776,40 @@ static BlockDriver bdrv_host_cdrom = { }; #endif /* __FreeBSD__ */ +#ifdef CONFIG_LINUX_AIO +/** + * Return the file descriptor for Linux AIO + * + * This function is a layering violation and should be removed when it becomes + * possible to call the block layer outside the global mutex. It allows the + * caller to hijack the file descriptor so I/O can be performed outside the + * block layer. + */ +int raw_get_aio_fd(BlockDriverState *bs) +{ + BDRVRawState *s; + + if (!bs->drv) { + return -ENOMEDIUM; + } + + if (bs->drv == bdrv_find_format("raw")) { + bs = bs->file; + } + + /* raw-posix has several protocols so just check for raw_aio_readv */ + if (bs->drv->bdrv_aio_readv != raw_aio_readv) { + return -ENOTSUP; + } + + s = bs->opaque; + if (!s->use_aio) { + return -ENOTSUP; + } + return s->fd; +} +#endif /* CONFIG_LINUX_AIO */ + static void bdrv_file_init(void) { /* diff --git a/block/raw-win32.c b/block/raw-win32.c index c56bf83375..b89ac19ffa 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -22,9 +22,13 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "qemu-timer.h" -#include "block_int.h" -#include "module.h" +#include "qemu/timer.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "raw-aio.h" +#include "trace.h" +#include "block/thread-pool.h" +#include "qemu/iov.h" #include <windows.h> #include <winioctl.h> @@ -32,12 +36,130 @@ #define FTYPE_CD 1 #define FTYPE_HARDDISK 2 +static QEMUWin32AIOState *aio; + +typedef struct RawWin32AIOData { + BlockDriverState *bs; + HANDLE hfile; + struct iovec *aio_iov; + int aio_niov; + size_t aio_nbytes; + off64_t aio_offset; + int aio_type; +} RawWin32AIOData; + typedef struct BDRVRawState { HANDLE hfile; int type; char drive_path[16]; /* format: "d:\" */ + QEMUWin32AIOState *aio; } BDRVRawState; +/* + * Read/writes the data to/from a given linear buffer. + * + * Returns the number of bytes handles or -errno in case of an error. Short + * reads are only returned if the end of the file is reached. + */ +static size_t handle_aiocb_rw(RawWin32AIOData *aiocb) +{ + size_t offset = 0; + int i; + + for (i = 0; i < aiocb->aio_niov; i++) { + OVERLAPPED ov; + DWORD ret, ret_count, len; + + memset(&ov, 0, sizeof(ov)); + ov.Offset = (aiocb->aio_offset + offset); + ov.OffsetHigh = (aiocb->aio_offset + offset) >> 32; + len = aiocb->aio_iov[i].iov_len; + if (aiocb->aio_type & QEMU_AIO_WRITE) { + ret = WriteFile(aiocb->hfile, aiocb->aio_iov[i].iov_base, + len, &ret_count, &ov); + } else { + ret = ReadFile(aiocb->hfile, aiocb->aio_iov[i].iov_base, + len, &ret_count, &ov); + } + if (!ret) { + ret_count = 0; + } + if (ret_count != len) { + break; + } + offset += len; + } + + return offset; +} + +static int aio_worker(void *arg) +{ + RawWin32AIOData *aiocb = arg; + ssize_t ret = 0; + size_t count; + + switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { + case QEMU_AIO_READ: + count = handle_aiocb_rw(aiocb); + if (count < aiocb->aio_nbytes && aiocb->bs->growable) { + /* A short read means that we have reached EOF. Pad the buffer + * with zeros for bytes after EOF. */ + iov_memset(aiocb->aio_iov, aiocb->aio_niov, count, + 0, aiocb->aio_nbytes - count); + + count = aiocb->aio_nbytes; + } + if (count == aiocb->aio_nbytes) { + ret = 0; + } else { + ret = -EINVAL; + } + break; + case QEMU_AIO_WRITE: + count = handle_aiocb_rw(aiocb); + if (count == aiocb->aio_nbytes) { + count = 0; + } else { + count = -EINVAL; + } + break; + case QEMU_AIO_FLUSH: + if (!FlushFileBuffers(aiocb->hfile)) { + return -EIO; + } + break; + default: + fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); + ret = -EINVAL; + break; + } + + g_slice_free(RawWin32AIOData, aiocb); + return ret; +} + +static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type) +{ + RawWin32AIOData *acb = g_slice_new(RawWin32AIOData); + + acb->bs = bs; + acb->hfile = hfile; + acb->aio_type = type; + + if (qiov) { + acb->aio_iov = qiov->iov; + acb->aio_niov = qiov->niov; + } + acb->aio_nbytes = nb_sectors * 512; + acb->aio_offset = sector_num * 512; + + trace_paio_submit(acb, opaque, sector_num, nb_sectors, type); + return thread_pool_submit_aio(aio_worker, acb, cb, opaque); +} + int qemu_ftruncate64(int fd, int64_t length) { LARGE_INTEGER li; @@ -77,6 +199,26 @@ static int set_sparse(int fd) NULL, 0, NULL, 0, &returned, NULL); } +static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) +{ + assert(access_flags != NULL); + assert(overlapped != NULL); + + if (flags & BDRV_O_RDWR) { + *access_flags = GENERIC_READ | GENERIC_WRITE; + } else { + *access_flags = GENERIC_READ; + } + + *overlapped = FILE_ATTRIBUTE_NORMAL; + if (flags & BDRV_O_NATIVE_AIO) { + *overlapped |= FILE_FLAG_OVERLAPPED; + } + if (flags & BDRV_O_NOCACHE) { + *overlapped |= FILE_FLAG_NO_BUFFERING; + } +} + static int raw_open(BlockDriverState *bs, const char *filename, int flags) { BDRVRawState *s = bs->opaque; @@ -85,17 +227,15 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) s->type = FTYPE_FILE; - if (flags & BDRV_O_RDWR) { - access_flags = GENERIC_READ | GENERIC_WRITE; - } else { - access_flags = GENERIC_READ; + raw_parse_flags(flags, &access_flags, &overlapped); + + if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) { + aio = win32_aio_init(); + if (aio == NULL) { + return -EINVAL; + } } - overlapped = FILE_ATTRIBUTE_NORMAL; - if (flags & BDRV_O_NOCACHE) - overlapped |= FILE_FLAG_NO_BUFFERING; - if (!(flags & BDRV_O_CACHE_WB)) - overlapped |= FILE_FLAG_WRITE_THROUGH; s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, OPEN_EXISTING, overlapped, NULL); @@ -104,64 +244,53 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) if (err == ERROR_ACCESS_DENIED) return -EACCES; - return -1; + return -EINVAL; + } + + if (flags & BDRV_O_NATIVE_AIO) { + int ret = win32_aio_attach(aio, s->hfile); + if (ret < 0) { + CloseHandle(s->hfile); + return ret; + } + s->aio = aio; } return 0; } -static int raw_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) +static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - OVERLAPPED ov; - DWORD ret_count; - int ret; - int64_t offset = sector_num * 512; - int count = nb_sectors * 512; - - memset(&ov, 0, sizeof(ov)); - ov.Offset = offset; - ov.OffsetHigh = offset >> 32; - ret = ReadFile(s->hfile, buf, count, &ret_count, &ov); - if (!ret) - return ret_count; - if (ret_count == count) - ret_count = 0; - return ret_count; + if (s->aio) { + return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov, + nb_sectors, cb, opaque, QEMU_AIO_READ); + } else { + return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors, + cb, opaque, QEMU_AIO_READ); + } } -static int raw_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) +static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - OVERLAPPED ov; - DWORD ret_count; - int ret; - int64_t offset = sector_num * 512; - int count = nb_sectors * 512; - - memset(&ov, 0, sizeof(ov)); - ov.Offset = offset; - ov.OffsetHigh = offset >> 32; - ret = WriteFile(s->hfile, buf, count, &ret_count, &ov); - if (!ret) - return ret_count; - if (ret_count == count) - ret_count = 0; - return ret_count; + if (s->aio) { + return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov, + nb_sectors, cb, opaque, QEMU_AIO_WRITE); + } else { + return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors, + cb, opaque, QEMU_AIO_WRITE); + } } -static int raw_flush(BlockDriverState *bs) +static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - int ret; - - ret = FlushFileBuffers(s->hfile); - if (ret == 0) { - return -EIO; - } - - return 0; + return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH); } static void raw_close(BlockDriverState *bs) @@ -174,13 +303,24 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset) { BDRVRawState *s = bs->opaque; LONG low, high; + DWORD dwPtrLow; low = offset; high = offset >> 32; - if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN)) - return -EIO; - if (!SetEndOfFile(s->hfile)) + + /* + * An error has occurred if the return value is INVALID_SET_FILE_POINTER + * and GetLastError doesn't return NO_ERROR. + */ + dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN); + if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) { + fprintf(stderr, "SetFilePointer error: %lu\n", GetLastError()); + return -EIO; + } + if (SetEndOfFile(s->hfile) == 0) { + fprintf(stderr, "SetEndOfFile error: %lu\n", GetLastError()); return -EIO; + } return 0; } @@ -282,9 +422,9 @@ static BlockDriver bdrv_file = { .bdrv_close = raw_close, .bdrv_create = raw_create, - .bdrv_read = raw_read, - .bdrv_write = raw_write, - .bdrv_co_flush_to_disk = raw_flush, + .bdrv_aio_readv = raw_aio_readv, + .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_flush = raw_aio_flush, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -374,18 +514,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) } s->type = find_device_type(bs, filename); - if (flags & BDRV_O_RDWR) { - access_flags = GENERIC_READ | GENERIC_WRITE; - } else { - access_flags = GENERIC_READ; - } + raw_parse_flags(flags, &access_flags, &overlapped); + create_flags = OPEN_EXISTING; - overlapped = FILE_ATTRIBUTE_NORMAL; - if (flags & BDRV_O_NOCACHE) - overlapped |= FILE_FLAG_NO_BUFFERING; - if (!(flags & BDRV_O_CACHE_WB)) - overlapped |= FILE_FLAG_WRITE_THROUGH; s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, create_flags, overlapped, NULL); @@ -413,9 +545,9 @@ static BlockDriver bdrv_host_device = { .bdrv_close = raw_close, .bdrv_has_zero_init = hdev_has_zero_init, - .bdrv_read = raw_read, - .bdrv_write = raw_write, - .bdrv_co_flush_to_disk = raw_flush, + .bdrv_aio_readv = raw_aio_readv, + .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_flush = raw_aio_flush, .bdrv_getlength = raw_getlength, .bdrv_get_allocated_file_size diff --git a/block/raw.c b/block/raw.c index ff34ea41e7..75812db3c2 100644 --- a/block/raw.c +++ b/block/raw.c @@ -1,7 +1,7 @@ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" +#include "block/block_int.h" +#include "qemu/module.h" static int raw_open(BlockDriverState *bs, int flags) { @@ -9,6 +9,14 @@ static int raw_open(BlockDriverState *bs, int flags) return 0; } +/* We have nothing to do for raw reopen, stubs just return + * success */ +static int raw_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -115,6 +123,8 @@ static BlockDriver bdrv_raw = { .bdrv_open = raw_open, .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_co_readv = raw_co_readv, .bdrv_co_writev = raw_co_writev, .bdrv_co_is_allocated = raw_co_is_allocated, diff --git a/block/rbd.c b/block/rbd.c index 5a0f79fc8f..8cd10a7b59 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -14,8 +14,8 @@ #include <inttypes.h> #include "qemu-common.h" -#include "qemu-error.h" -#include "block_int.h" +#include "qemu/error-report.h" +#include "block/block_int.h" #include <rbd/librbd.h> @@ -69,7 +69,7 @@ typedef enum { typedef struct RBDAIOCB { BlockDriverAIOCB common; QEMUBH *bh; - int ret; + int64_t ret; QEMUIOVector *qiov; char *bounce; RBDAIOCmd cmd; @@ -77,6 +77,7 @@ typedef struct RBDAIOCB { int error; struct BDRVRBDState *s; int cancelled; + int status; } RBDAIOCB; typedef struct RADOSCB { @@ -86,7 +87,7 @@ typedef struct RADOSCB { int done; int64_t size; char *buf; - int ret; + int64_t ret; } RADOSCB; #define RBD_FD_READ 0 @@ -376,12 +377,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) RBDAIOCB *acb = rcb->acb; int64_t r; - if (acb->cancelled) { - qemu_vfree(acb->bounce); - qemu_aio_release(acb); - goto done; - } - r = rcb->ret; if (acb->cmd == RBD_AIO_WRITE || @@ -409,7 +404,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) /* Note that acb->bh can be NULL in case where the aio was cancelled */ acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb); qemu_bh_schedule(acb->bh); -done: g_free(rcb); } @@ -487,12 +481,6 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags) rados_conf_set(s->cluster, "rbd_cache", "false"); } else { rados_conf_set(s->cluster, "rbd_cache", "true"); - if (!(flags & BDRV_O_CACHE_WB)) { - r = rados_conf_set(s->cluster, "rbd_cache_max_dirty", "0"); - if (r < 0) { - rados_conf_set(s->cluster, "rbd_cache", "false"); - } - } } if (strstr(conf, "conf=") == NULL) { @@ -574,9 +562,15 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb) { RBDAIOCB *acb = (RBDAIOCB *) blockacb; acb->cancelled = 1; + + while (acb->status == -EINPROGRESS) { + qemu_aio_wait(); + } + + qemu_aio_release(acb); } -static AIOPool rbd_aio_pool = { +static const AIOCBInfo rbd_aiocb_info = { .aiocb_size = sizeof(RBDAIOCB), .cancel = qemu_rbd_aio_cancel, }; @@ -645,8 +639,11 @@ static void rbd_aio_bh_cb(void *opaque) acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); qemu_bh_delete(acb->bh); acb->bh = NULL; + acb->status = 0; - qemu_aio_release(acb); + if (!acb->cancelled) { + qemu_aio_release(acb); + } } static int rbd_aio_discard_wrapper(rbd_image_t image, @@ -678,7 +675,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, BDRVRBDState *s = bs->opaque; - acb = qemu_aio_get(&rbd_aio_pool, bs, cb, opaque); + acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque); acb->cmd = cmd; acb->qiov = qiov; if (cmd == RBD_AIO_DISCARD) { @@ -691,6 +688,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, acb->s = s; acb->cancelled = 0; acb->bh = NULL; + acb->status = -EINPROGRESS; if (cmd == RBD_AIO_WRITE) { qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); diff --git a/block/sheepdog.c b/block/sheepdog.c index a04ad99ead..e821746116 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -13,10 +13,10 @@ */ #include "qemu-common.h" -#include "qemu-error.h" -#include "qemu_socket.h" -#include "block_int.h" -#include "bitops.h" +#include "qemu/error-report.h" +#include "qemu/sockets.h" +#include "block/block_int.h" +#include "qemu/bitops.h" #define SD_PROTO_VER 0x01 @@ -201,12 +201,12 @@ static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) return hval; } -static inline int is_data_obj_writable(SheepdogInode *inode, unsigned int idx) +static inline bool is_data_obj_writable(SheepdogInode *inode, unsigned int idx) { return inode->vdi_id == inode->data_vdi_id[idx]; } -static inline int is_data_obj(uint64_t oid) +static inline bool is_data_obj(uint64_t oid) { return !(VDI_BIT & oid); } @@ -231,7 +231,7 @@ static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx) return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx; } -static inline int is_snapshot(struct SheepdogInode *inode) +static inline bool is_snapshot(struct SheepdogInode *inode) { return !!inode->snap_ctime; } @@ -281,7 +281,7 @@ struct SheepdogAIOCB { Coroutine *coroutine; void (*aio_done_func)(SheepdogAIOCB *); - int canceled; + bool canceled; int nr_pending; }; @@ -292,8 +292,8 @@ typedef struct BDRVSheepdogState { uint32_t max_dirty_data_idx; char name[SD_MAX_VDI_LEN]; - int is_snapshot; - uint8_t cache_enabled; + bool is_snapshot; + bool cache_enabled; char *addr; char *port; @@ -417,10 +417,10 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb) */ acb->ret = -EIO; qemu_coroutine_enter(acb->coroutine, NULL); - acb->canceled = 1; + acb->canceled = true; } -static AIOPool sd_aio_pool = { +static const AIOCBInfo sd_aiocb_info = { .aiocb_size = sizeof(SheepdogAIOCB), .cancel = sd_aio_cancel, }; @@ -431,7 +431,7 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, { SheepdogAIOCB *acb; - acb = qemu_aio_get(&sd_aio_pool, bs, cb, opaque); + acb = qemu_aio_get(&sd_aiocb_info, bs, cb, opaque); acb->qiov = qiov; @@ -439,7 +439,7 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, acb->nb_sectors = nb_sectors; acb->aio_done_func = NULL; - acb->canceled = 0; + acb->canceled = false; acb->coroutine = qemu_coroutine_self(); acb->ret = 0; acb->nr_pending = 0; @@ -485,6 +485,7 @@ static int connect_to_sdog(const char *addr, const char *port) if (errno == EINTR) { goto reconnect; } + close(fd); break; } @@ -612,7 +613,7 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data, } static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, - struct iovec *iov, int niov, int create, + struct iovec *iov, int niov, bool create, enum AIOCBState aiocb_type); @@ -645,7 +646,7 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); ret = add_aio_request(s, aio_req, acb->qiov->iov, - acb->qiov->niov, 0, acb->aiocb_type); + acb->qiov->niov, false, acb->aiocb_type); if (ret < 0) { error_report("add_aio_request is failed"); free_aio_req(s, aio_req); @@ -713,16 +714,17 @@ static void coroutine_fn aio_read_response(void *opaque) * and max_dirty_data_idx are changed to include updated * index between them. */ - s->inode.data_vdi_id[idx] = s->inode.vdi_id; - s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx); - s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx); - + if (rsp.result == SD_RES_SUCCESS) { + s->inode.data_vdi_id[idx] = s->inode.vdi_id; + s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx); + s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx); + } /* * Some requests may be blocked because simultaneous * create requests are not allowed, so we search the * pending requests here. */ - send_pending_req(s, vid_to_data_oid(s->inode.vdi_id, idx)); + send_pending_req(s, aio_req->oid); } break; case AIOCB_READ_UDATA: @@ -865,14 +867,14 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename, s->port = 0; } - strncpy(vdi, p, SD_MAX_VDI_LEN); + pstrcpy(vdi, SD_MAX_VDI_LEN, p); p = strchr(vdi, ':'); if (p) { *p++ = '\0'; *snapid = strtoul(p, NULL, 10); if (*snapid == 0) { - strncpy(tag, p, SD_MAX_VDI_TAG_LEN); + pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p); } } else { *snapid = CURRENT_VDI_ID; /* search current vdi */ @@ -899,7 +901,10 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid, return fd; } - memset(buf, 0, sizeof(buf)); + /* This pair of strncpy calls ensures that the buffer is zero-filled, + * which is desirable since we'll soon be sending those bytes, and + * don't want the send_req to read uninitialized data. + */ strncpy(buf, filename, SD_MAX_VDI_LEN); strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN); @@ -939,7 +944,7 @@ out: } static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, - struct iovec *iov, int niov, int create, + struct iovec *iov, int niov, bool create, enum AIOCBState aiocb_type) { int nr_copies = s->inode.nr_copies; @@ -1018,7 +1023,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, static int read_write_object(int fd, char *buf, uint64_t oid, int copies, unsigned int datalen, uint64_t offset, - int write, int create, uint8_t cache) + bool write, bool create, bool cache) { SheepdogObjReq hdr; SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; @@ -1067,18 +1072,18 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, } static int read_object(int fd, char *buf, uint64_t oid, int copies, - unsigned int datalen, uint64_t offset, uint8_t cache) + unsigned int datalen, uint64_t offset, bool cache) { - return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0, - cache); + return read_write_object(fd, buf, oid, copies, datalen, offset, false, + false, cache); } static int write_object(int fd, char *buf, uint64_t oid, int copies, - unsigned int datalen, uint64_t offset, int create, - uint8_t cache) + unsigned int datalen, uint64_t offset, bool create, + bool cache) { - return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create, - cache); + return read_write_object(fd, buf, oid, copies, datalen, offset, true, + create, cache); } static int sd_open(BlockDriverState *bs, const char *filename, int flags) @@ -1113,19 +1118,17 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) goto out; } - if (flags & BDRV_O_CACHE_WB) { - s->cache_enabled = 1; - s->flush_fd = connect_to_sdog(s->addr, s->port); - if (s->flush_fd < 0) { - error_report("failed to connect"); - ret = s->flush_fd; - goto out; - } + s->cache_enabled = true; + s->flush_fd = connect_to_sdog(s->addr, s->port); + if (s->flush_fd < 0) { + error_report("failed to connect"); + ret = s->flush_fd; + goto out; } if (snapid || tag[0] != '\0') { dprintf("%" PRIx32 " snapshot inode was open.\n", vid); - s->is_snapshot = 1; + s->is_snapshot = true; } fd = connect_to_sdog(s->addr, s->port); @@ -1150,7 +1153,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) s->max_dirty_data_idx = 0; bs->total_sectors = s->inode.vdi_size / SECTOR_SIZE; - strncpy(s->name, vdi, sizeof(s->name)); + pstrcpy(s->name, sizeof(s->name), vdi); qemu_co_mutex_init(&s->lock); g_free(buf); return 0; @@ -1178,8 +1181,11 @@ static int do_sd_create(char *filename, int64_t vdi_size, return fd; } + /* FIXME: would it be better to fail (e.g., return -EIO) when filename + * does not fit in buf? For now, just truncate and avoid buffer overrun. + */ memset(buf, 0, sizeof(buf)); - strncpy(buf, filename, SD_MAX_VDI_LEN); + pstrcpy(buf, sizeof(buf), filename); memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_NEW_VDI; @@ -1265,7 +1271,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) BDRVSheepdogState *s; char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; uint32_t snapid; - int prealloc = 0; + bool prealloc = false; const char *vdiname; s = g_malloc0(sizeof(BDRVSheepdogState)); @@ -1287,9 +1293,9 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) backing_file = options->value.s; } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { if (!options->value.s || !strcmp(options->value.s, "off")) { - prealloc = 0; + prealloc = false; } else if (!strcmp(options->value.s, "full")) { - prealloc = 1; + prealloc = true; } else { error_report("Invalid preallocation mode: '%s'", options->value.s); @@ -1417,7 +1423,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); s->inode.vdi_size = offset; ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, 0, s->cache_enabled); + s->inode.nr_copies, datalen, 0, false, s->cache_enabled); close(fd); if (ret < 0) { @@ -1456,7 +1462,7 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), data_len, offset, 0, 0, offset); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - ret = add_aio_request(s, aio_req, &iov, 1, 0, AIOCB_WRITE_UDATA); + ret = add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); if (ret) { free_aio_req(s, aio_req); acb->ret = -EIO; @@ -1510,7 +1516,7 @@ static int sd_create_branch(BDRVSheepdogState *s) memcpy(&s->inode, buf, sizeof(s->inode)); - s->is_snapshot = 0; + s->is_snapshot = false; ret = 0; dprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id); @@ -1565,7 +1571,7 @@ static int coroutine_fn sd_co_rw_vector(void *p) while (done != total) { uint8_t flags = 0; uint64_t old_oid = 0; - int create = 0; + bool create = false; oid = vid_to_data_oid(inode->data_vdi_id[idx], idx); @@ -1580,10 +1586,10 @@ static int coroutine_fn sd_co_rw_vector(void *p) break; case AIOCB_WRITE_UDATA: if (!inode->data_vdi_id[idx]) { - create = 1; + create = true; } else if (!is_data_obj_writable(inode, idx)) { /* Copy-On-Write */ - create = 1; + create = true; old_oid = oid; flags = SD_FLAG_CMD_COW; } @@ -1717,7 +1723,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) if (rsp->result == SD_RES_INVALID_PARMS) { dprintf("disable write cache since the server doesn't support it\n"); - s->cache_enabled = 0; + s->cache_enabled = false; closesocket(s->flush_fd); return 0; } @@ -1753,6 +1759,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) s->inode.vm_state_size = sn_info->vm_state_size; s->inode.vm_clock_nsec = sn_info->vm_clock_nsec; + /* It appears that inode.tag does not require a NUL terminator, + * which means this use of strncpy is ok. + */ strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag)); /* we don't need to update entire object */ datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); @@ -1765,7 +1774,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) } ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, 0, s->cache_enabled); + s->inode.nr_copies, datalen, 0, false, s->cache_enabled); if (ret < 0) { error_report("failed to write snapshot's inode."); goto cleanup; @@ -1812,13 +1821,13 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) memcpy(old_s, s, sizeof(BDRVSheepdogState)); - memset(vdi, 0, sizeof(vdi)); - strncpy(vdi, s->name, sizeof(vdi)); + pstrcpy(vdi, sizeof(vdi), s->name); - memset(tag, 0, sizeof(tag)); snapid = strtoul(snapshot_id, NULL, 10); - if (!snapid) { - strncpy(tag, s->name, sizeof(tag)); + if (snapid) { + tag[0] = 0; + } else { + pstrcpy(tag, sizeof(tag), s->name); } ret = find_vdi_name(s, vdi, snapid, tag, &vid, 1); @@ -1852,7 +1861,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) goto out; } - s->is_snapshot = 1; + s->is_snapshot = true; g_free(buf); g_free(old_s); @@ -1947,8 +1956,9 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u", inode.snap_id); - strncpy(sn_tab[found].name, inode.tag, - MIN(sizeof(sn_tab[found].name), sizeof(inode.tag))); + pstrcpy(sn_tab[found].name, + MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)), + inode.tag); found++; } } @@ -1969,8 +1979,8 @@ out: static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, int64_t pos, int size, int load) { - int fd, create; - int ret = 0, remaining = size; + bool create; + int fd, ret = 0, remaining = size; unsigned int data_len; uint64_t vmstate_oid; uint32_t vdi_index; @@ -1985,7 +1995,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, vdi_index = pos / SD_DATA_OBJ_SIZE; offset = pos % SD_DATA_OBJ_SIZE; - data_len = MIN(remaining, SD_DATA_OBJ_SIZE); + data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset); vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index); @@ -2006,6 +2016,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, } pos += data_len; + data += data_len; remaining -= data_len; } ret = size; diff --git a/block/stream.c b/block/stream.c index 37c46525d2..d6df06f35a 100644 --- a/block/stream.c +++ b/block/stream.c @@ -12,7 +12,8 @@ */ #include "trace.h" -#include "block_int.h" +#include "block/block_int.h" +#include "block/blockjob.h" #include "qemu/ratelimit.h" enum { @@ -30,6 +31,7 @@ typedef struct StreamBlockJob { BlockJob common; RateLimit limit; BlockDriverState *base; + BlockdevOnError on_error; char backing_file_id[1024]; } StreamBlockJob; @@ -77,13 +79,14 @@ static void coroutine_fn stream_run(void *opaque) BlockDriverState *bs = s->common.bs; BlockDriverState *base = s->base; int64_t sector_num, end; + int error = 0; int ret = 0; int n = 0; void *buf; s->common.len = bdrv_getlength(bs); if (s->common.len < 0) { - block_job_complete(&s->common, s->common.len); + block_job_completed(&s->common, s->common.len); return; } @@ -105,7 +108,7 @@ static void coroutine_fn stream_run(void *opaque) wait: /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that qemu_aio_flush() returns. + * with no pending I/O here so that bdrv_drain_all() returns. */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { @@ -122,6 +125,12 @@ wait: * known-unallocated area [sector_num, sector_num+n). */ ret = bdrv_co_is_allocated_above(bs->backing_hd, base, sector_num, n, &n); + + /* Finish early if end of backing file has been reached */ + if (ret == 0 && n == 0) { + n = end - sector_num; + } + copy = (ret == 1); } trace_stream_one_iteration(s, sector_num, n, ret); @@ -135,7 +144,19 @@ wait: ret = stream_populate(bs, sector_num, n, buf); } if (ret < 0) { - break; + BlockErrorAction action = + block_job_error_action(&s->common, s->common.bs, s->on_error, + true, -ret); + if (action == BDRV_ACTION_STOP) { + n = 0; + continue; + } + if (error == 0) { + error = ret; + } + if (action == BDRV_ACTION_REPORT) { + break; + } } ret = 0; @@ -147,6 +168,9 @@ wait: bdrv_disable_copy_on_read(bs); } + /* Do not remove the backing file if an error was there but ignored. */ + ret = error; + if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) { const char *base_id = NULL, *base_fmt = NULL; if (base) { @@ -160,7 +184,7 @@ wait: } qemu_vfree(buf); - block_job_complete(&s->common, ret); + block_job_completed(&s->common, ret); } static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp) @@ -182,11 +206,19 @@ static BlockJobType stream_job_type = { void stream_start(BlockDriverState *bs, BlockDriverState *base, const char *base_id, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { StreamBlockJob *s; + if ((on_error == BLOCKDEV_ON_ERROR_STOP || + on_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-error"); + return; + } + s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp); if (!s) { return; @@ -197,6 +229,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base, pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id); } + s->on_error = on_error; s->common.co = qemu_coroutine_create(stream_run); trace_stream_start(bs, base, s, s->common.co, opaque); qemu_coroutine_enter(s->common.co, s); diff --git a/block/vdi.c b/block/vdi.c index c4f1529db9..021abaa227 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -50,19 +50,16 @@ */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" -#include "migration.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "migration/migration.h" #if defined(CONFIG_UUID) #include <uuid/uuid.h> #else /* TODO: move uuid emulation to some central place in QEMU. */ -#include "sysemu.h" /* UUID_FMT */ +#include "sysemu/sysemu.h" /* UUID_FMT */ typedef unsigned char uuid_t[16]; -void uuid_generate(uuid_t out); -int uuid_is_null(const uuid_t uu); -void uuid_unparse(const uuid_t uu, char *out); #endif /* Code configuration options. */ @@ -124,18 +121,18 @@ void uuid_unparse(const uuid_t uu, char *out); #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED) #if !defined(CONFIG_UUID) -void uuid_generate(uuid_t out) +static inline void uuid_generate(uuid_t out) { memset(out, 0, sizeof(uuid_t)); } -int uuid_is_null(const uuid_t uu) +static inline int uuid_is_null(const uuid_t uu) { uuid_t null_uuid = { 0 }; return memcmp(uu, null_uuid, sizeof(uuid_t)) == 0; } -void uuid_unparse(const uuid_t uu, char *out) +static inline void uuid_unparse(const uuid_t uu, char *out) { snprintf(out, 37, UUID_FMT, uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7], @@ -454,6 +451,12 @@ static int vdi_open(BlockDriverState *bs, int flags) return -1; } +static int vdi_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -628,7 +631,6 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options) VdiHeader header; size_t i; size_t bmap_size; - uint32_t *bmap; logout("\n"); @@ -693,21 +695,21 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options) result = -errno; } - bmap = NULL; if (bmap_size > 0) { - bmap = (uint32_t *)g_malloc0(bmap_size); - } - for (i = 0; i < blocks; i++) { - if (image_type == VDI_TYPE_STATIC) { - bmap[i] = i; - } else { - bmap[i] = VDI_UNALLOCATED; + uint32_t *bmap = g_malloc0(bmap_size); + for (i = 0; i < blocks; i++) { + if (image_type == VDI_TYPE_STATIC) { + bmap[i] = i; + } else { + bmap[i] = VDI_UNALLOCATED; + } } + if (write(fd, bmap, bmap_size) < 0) { + result = -errno; + } + g_free(bmap); } - if (write(fd, bmap, bmap_size) < 0) { - result = -errno; - } - g_free(bmap); + if (image_type == VDI_TYPE_STATIC) { if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) { result = -errno; @@ -762,6 +764,7 @@ static BlockDriver bdrv_vdi = { .bdrv_probe = vdi_probe, .bdrv_open = vdi_open, .bdrv_close = vdi_close, + .bdrv_reopen_prepare = vdi_reopen_prepare, .bdrv_create = vdi_create, .bdrv_co_is_allocated = vdi_co_is_allocated, .bdrv_make_empty = vdi_make_empty, diff --git a/block/vmdk.c b/block/vmdk.c index daee4268be..19298c2a3e 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -24,9 +24,9 @@ */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" -#include "migration.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "migration/migration.h" #include <zlib.h> #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') @@ -35,6 +35,7 @@ #define VMDK4_FLAG_RGD (1 << 1) #define VMDK4_FLAG_COMPRESS (1 << 16) #define VMDK4_FLAG_MARKER (1 << 17) +#define VMDK4_GD_AT_END 0xffffffffffffffffULL typedef struct { uint32_t version; @@ -57,8 +58,8 @@ typedef struct { int64_t desc_offset; int64_t desc_size; int32_t num_gtes_per_gte; - int64_t gd_offset; int64_t rgd_offset; + int64_t gd_offset; int64_t grain_offset; char filler[1]; char check_bytes[4]; @@ -115,6 +116,13 @@ typedef struct VmdkGrainMarker { uint8_t data[0]; } VmdkGrainMarker; +enum { + MARKER_END_OF_STREAM = 0, + MARKER_GRAIN_TABLE = 1, + MARKER_GRAIN_DIRECTORY = 2, + MARKER_FOOTER = 3, +}; + static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) { uint32_t magic; @@ -292,6 +300,40 @@ static int vmdk_is_cid_valid(BlockDriverState *bs) return 1; } +/* Queue extents, if any, for reopen() */ +static int vmdk_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVVmdkState *s; + int ret = -1; + int i; + VmdkExtent *e; + + assert(state != NULL); + assert(state->bs != NULL); + + if (queue == NULL) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, + "No reopen queue for VMDK extents"); + goto exit; + } + + s = state->bs->opaque; + + assert(s != NULL); + + for (i = 0; i < s->num_extents; i++) { + e = &s->extents[i]; + if (e->file != state->bs->file) { + bdrv_reopen_queue(queue, e->file, state->flags); + } + } + ret = 0; + +exit: + return ret; +} + static int vmdk_parent_open(BlockDriverState *bs) { char *p_name; @@ -451,6 +493,54 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (header.capacity == 0 && header.desc_offset) { return vmdk_open_desc_file(bs, flags, header.desc_offset << 9); } + + if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) { + /* + * The footer takes precedence over the header, so read it in. The + * footer starts at offset -1024 from the end: One sector for the + * footer, and another one for the end-of-stream marker. + */ + struct { + struct { + uint64_t val; + uint32_t size; + uint32_t type; + uint8_t pad[512 - 16]; + } QEMU_PACKED footer_marker; + + uint32_t magic; + VMDK4Header header; + uint8_t pad[512 - 4 - sizeof(VMDK4Header)]; + + struct { + uint64_t val; + uint32_t size; + uint32_t type; + uint8_t pad[512 - 16]; + } QEMU_PACKED eos_marker; + } QEMU_PACKED footer; + + ret = bdrv_pread(file, + bs->file->total_sectors * 512 - 1536, + &footer, sizeof(footer)); + if (ret < 0) { + return ret; + } + + /* Some sanity checks for the footer */ + if (be32_to_cpu(footer.magic) != VMDK4_MAGIC || + le32_to_cpu(footer.footer_marker.size) != 0 || + le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER || + le64_to_cpu(footer.eos_marker.val) != 0 || + le32_to_cpu(footer.eos_marker.size) != 0 || + le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM) + { + return -EINVAL; + } + + header = footer.header; + } + l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte) * le64_to_cpu(header.granularity); if (l1_entry_sectors == 0) { @@ -1002,6 +1092,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, BDRVVmdkState *s = bs->opaque; int ret; uint64_t n, index_in_cluster; + uint64_t extent_begin_sector, extent_relative_sector_num; VmdkExtent *extent = NULL; uint64_t cluster_offset; @@ -1013,7 +1104,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, ret = get_cluster_offset( bs, extent, NULL, sector_num << 9, 0, &cluster_offset); - index_in_cluster = sector_num % extent->cluster_sectors; + extent_begin_sector = extent->end_sector - extent->sectors; + extent_relative_sector_num = sector_num - extent_begin_sector; + index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; @@ -1064,6 +1157,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, VmdkExtent *extent = NULL; int n, ret; int64_t index_in_cluster; + uint64_t extent_begin_sector, extent_relative_sector_num; uint64_t cluster_offset; VmdkMetaData m_data; @@ -1106,7 +1200,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, if (ret) { return -EINVAL; } - index_in_cluster = sector_num % extent->cluster_sectors; + extent_begin_sector = extent->end_sector - extent->sectors; + extent_relative_sector_num = sector_num - extent_begin_sector; + index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; @@ -1318,8 +1414,7 @@ static int relative_path(char *dest, int dest_size, return -1; } if (path_is_absolute(target)) { - dest[dest_size - 1] = '\0'; - strncpy(dest, target, dest_size - 1); + pstrcpy(dest, dest_size, target); return 0; } while (base[i] == target[i]) { @@ -1590,6 +1685,7 @@ static BlockDriver bdrv_vmdk = { .instance_size = sizeof(BDRVVmdkState), .bdrv_probe = vmdk_probe, .bdrv_open = vmdk_open, + .bdrv_reopen_prepare = vmdk_reopen_prepare, .bdrv_read = vmdk_co_read, .bdrv_write = vmdk_co_write, .bdrv_close = vmdk_close, diff --git a/block/vpc.c b/block/vpc.c index c0b82c4f57..7948609e50 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -23,9 +23,12 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" -#include "migration.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "migration/migration.h" +#if defined(CONFIG_UUID) +#include <uuid/uuid.h> +#endif /**************************************************************/ @@ -198,7 +201,8 @@ static int vpc_open(BlockDriverState *bs, int flags) bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; - if (bs->total_sectors >= 65535 * 16 * 255) { + /* Allow a maximum disk size of approximately 2 TB */ + if (bs->total_sectors >= 65535LL * 255 * 255) { err = -EFBIG; goto fail; } @@ -265,6 +269,12 @@ static int vpc_open(BlockDriverState *bs, int flags) return err; } +static int vpc_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + /* * Returns the absolute byte offset of the given sector in the image file. * If the sector is not allocated, -1 is returned instead. @@ -518,19 +528,27 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num, * Note that the geometry doesn't always exactly match total_sectors but * may round it down. * - * Returns 0 on success, -EFBIG if the size is larger than 127 GB + * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override + * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB) + * and instead allow up to 255 heads. */ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, uint8_t* heads, uint8_t* secs_per_cyl) { uint32_t cyls_times_heads; - if (total_sectors > 65535 * 16 * 255) + /* Allow a maximum disk size of approximately 2 TB */ + if (total_sectors > 65535LL * 255 * 255) { return -EFBIG; + } if (total_sectors > 65535 * 16 * 63) { *secs_per_cyl = 255; - *heads = 16; + if (total_sectors > 65535 * 16 * 255) { + *heads = 255; + } else { + *heads = 16; + } cyls_times_heads = total_sectors / *secs_per_cyl; } else { *secs_per_cyl = 17; @@ -733,7 +751,9 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options) footer->type = be32_to_cpu(disk_type); - /* TODO uuid is missing */ +#if defined(CONFIG_UUID) + uuid_generate(footer->uuid); +#endif footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE)); @@ -783,6 +803,7 @@ static BlockDriver bdrv_vpc = { .bdrv_probe = vpc_probe, .bdrv_open = vpc_open, .bdrv_close = vpc_close, + .bdrv_reopen_prepare = vpc_reopen_prepare, .bdrv_create = vpc_create, .bdrv_read = vpc_co_read, diff --git a/block/vvfat.c b/block/vvfat.c index 59d3c5b8ac..83706ce556 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -25,9 +25,9 @@ #include <sys/stat.h> #include <dirent.h> #include "qemu-common.h" -#include "block_int.h" -#include "module.h" -#include "migration.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "migration/migration.h" #ifndef S_IWGRP #define S_IWGRP 0 diff --git a/block/win32-aio.c b/block/win32-aio.c new file mode 100644 index 0000000000..46a5db78cc --- /dev/null +++ b/block/win32-aio.c @@ -0,0 +1,226 @@ +/* + * Block driver for RAW files (win32) + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "qemu/timer.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "qemu-common.h" +#include "block/aio.h" +#include "raw-aio.h" +#include "qemu/event_notifier.h" +#include <windows.h> +#include <winioctl.h> + +#define FTYPE_FILE 0 +#define FTYPE_CD 1 +#define FTYPE_HARDDISK 2 + +struct QEMUWin32AIOState { + HANDLE hIOCP; + EventNotifier e; + int count; +}; + +typedef struct QEMUWin32AIOCB { + BlockDriverAIOCB common; + struct QEMUWin32AIOState *ctx; + int nbytes; + OVERLAPPED ov; + QEMUIOVector *qiov; + void *buf; + bool is_read; + bool is_linear; +} QEMUWin32AIOCB; + +/* + * Completes an AIO request (calls the callback and frees the ACB). + */ +static void win32_aio_process_completion(QEMUWin32AIOState *s, + QEMUWin32AIOCB *waiocb, DWORD count) +{ + int ret; + s->count--; + + if (waiocb->ov.Internal != 0) { + ret = -EIO; + } else { + ret = 0; + if (count < waiocb->nbytes) { + /* Short reads mean EOF, pad with zeros. */ + if (waiocb->is_read) { + qemu_iovec_memset(waiocb->qiov, count, 0, + waiocb->qiov->size - count); + } else { + ret = -EINVAL; + } + } + } + + if (!waiocb->is_linear) { + if (ret == 0 && waiocb->is_read) { + QEMUIOVector *qiov = waiocb->qiov; + char *p = waiocb->buf; + int i; + + for (i = 0; i < qiov->niov; ++i) { + memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); + p += qiov->iov[i].iov_len; + } + g_free(waiocb->buf); + } + } + + + waiocb->common.cb(waiocb->common.opaque, ret); + qemu_aio_release(waiocb); +} + +static void win32_aio_completion_cb(EventNotifier *e) +{ + QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e); + DWORD count; + ULONG_PTR key; + OVERLAPPED *ov; + + event_notifier_test_and_clear(&s->e); + while (GetQueuedCompletionStatus(s->hIOCP, &count, &key, &ov, 0)) { + QEMUWin32AIOCB *waiocb = container_of(ov, QEMUWin32AIOCB, ov); + + win32_aio_process_completion(s, waiocb, count); + } +} + +static int win32_aio_flush_cb(EventNotifier *e) +{ + QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e); + + return (s->count > 0) ? 1 : 0; +} + +static void win32_aio_cancel(BlockDriverAIOCB *blockacb) +{ + QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb; + + /* + * CancelIoEx is only supported in Vista and newer. For now, just + * wait for completion. + */ + while (!HasOverlappedIoCompleted(&waiocb->ov)) { + qemu_aio_wait(); + } +} + +static const AIOCBInfo win32_aiocb_info = { + .aiocb_size = sizeof(QEMUWin32AIOCB), + .cancel = win32_aio_cancel, +}; + +BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, + QEMUWin32AIOState *aio, HANDLE hfile, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type) +{ + struct QEMUWin32AIOCB *waiocb; + uint64_t offset = sector_num * 512; + DWORD rc; + + waiocb = qemu_aio_get(&win32_aiocb_info, bs, cb, opaque); + waiocb->nbytes = nb_sectors * 512; + waiocb->qiov = qiov; + waiocb->is_read = (type == QEMU_AIO_READ); + + if (qiov->niov > 1) { + waiocb->buf = qemu_blockalign(bs, qiov->size); + if (type & QEMU_AIO_WRITE) { + char *p = waiocb->buf; + int i; + + for (i = 0; i < qiov->niov; ++i) { + memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); + p += qiov->iov[i].iov_len; + } + } + waiocb->is_linear = false; + } else { + waiocb->buf = qiov->iov[0].iov_base; + waiocb->is_linear = true; + } + + memset(&waiocb->ov, 0, sizeof(waiocb->ov)); + waiocb->ov.Offset = (DWORD)offset; + waiocb->ov.OffsetHigh = (DWORD)(offset >> 32); + waiocb->ov.hEvent = event_notifier_get_handle(&aio->e); + + aio->count++; + + if (type & QEMU_AIO_READ) { + rc = ReadFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov); + } else { + rc = WriteFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov); + } + if(rc == 0 && GetLastError() != ERROR_IO_PENDING) { + goto out_dec_count; + } + return &waiocb->common; + +out_dec_count: + aio->count--; + qemu_aio_release(waiocb); + return NULL; +} + +int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile) +{ + if (CreateIoCompletionPort(hfile, aio->hIOCP, (ULONG_PTR) 0, 0) == NULL) { + return -EINVAL; + } else { + return 0; + } +} + +QEMUWin32AIOState *win32_aio_init(void) +{ + QEMUWin32AIOState *s; + + s = g_malloc0(sizeof(*s)); + if (event_notifier_init(&s->e, false) < 0) { + goto out_free_state; + } + + s->hIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); + if (s->hIOCP == NULL) { + goto out_close_efd; + } + + qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb, + win32_aio_flush_cb); + + return s; + +out_close_efd: + event_notifier_cleanup(&s->e); +out_free_state: + g_free(s); + return NULL; +} |