aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c89
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/backup.c38
-rw-r--r--block/commit.c23
-rw-r--r--block/dmg-lzfse.c49
-rw-r--r--block/dmg.c65
-rw-r--r--block/dmg.h3
-rw-r--r--block/file-posix.c382
-rw-r--r--block/mirror.c38
-rw-r--r--block/qcow2-cluster.c70
-rw-r--r--block/qcow2.c170
-rw-r--r--block/qcow2.h4
-rw-r--r--block/replication.c67
-rw-r--r--block/stream.c20
-rw-r--r--blockdev.c11
-rwxr-xr-xconfigure31
-rw-r--r--include/block/block.h6
-rw-r--r--include/block/block_backup.h13
-rw-r--r--include/scsi/pr-manager.h8
-rw-r--r--qemu-io-cmds.c29
-rw-r--r--scsi/pr-manager.c21
-rw-r--r--scsi/trace-events2
-rwxr-xr-xtests/qemu-iotests/13318
-rw-r--r--tests/qemu-iotests/133.out15
-rwxr-xr-xtests/qemu-iotests/2354
25 files changed, 671 insertions, 507 deletions
diff --git a/block.c b/block.c
index 811239ca23..4f5ff2cc12 100644
--- a/block.c
+++ b/block.c
@@ -1079,11 +1079,11 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
const char *filename, Error **errp)
{
BlockDriverState *parent = c->opaque;
- int orig_flags = bdrv_get_flags(parent);
+ bool read_only = bdrv_is_read_only(parent);
int ret;
- if (!(orig_flags & BDRV_O_RDWR)) {
- ret = bdrv_reopen(parent, orig_flags | BDRV_O_RDWR, errp);
+ if (read_only) {
+ ret = bdrv_reopen_set_read_only(parent, false, errp);
if (ret < 0) {
return ret;
}
@@ -1095,8 +1095,8 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
error_setg_errno(errp, -ret, "Could not update backing file link");
}
- if (!(orig_flags & BDRV_O_RDWR)) {
- bdrv_reopen(parent, orig_flags, NULL);
+ if (read_only) {
+ bdrv_reopen_set_read_only(parent, true, NULL);
}
return ret;
@@ -1139,22 +1139,18 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)
{
*flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
- assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
*flags |= BDRV_O_NO_FLUSH;
}
- assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) {
*flags |= BDRV_O_NOCACHE;
}
- assert(qemu_opt_find(opts, BDRV_OPT_READ_ONLY));
if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) {
*flags |= BDRV_O_RDWR;
}
- assert(qemu_opt_find(opts, BDRV_OPT_AUTO_READ_ONLY));
if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) {
*flags |= BDRV_O_AUTO_RDONLY;
}
@@ -2931,7 +2927,6 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
QDict *options,
- int flags,
const BdrvChildRole *role,
QDict *parent_options,
int parent_flags)
@@ -2940,7 +2935,9 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
BlockReopenQueueEntry *bs_entry;
BdrvChild *child;
- QDict *old_options, *explicit_options;
+ QDict *old_options, *explicit_options, *options_copy;
+ int flags;
+ QemuOpts *opts;
/* Make sure that the caller remembered to use a drained section. This is
* important to avoid graph changes between the recursive queuing here and
@@ -2966,22 +2963,11 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
/*
* Precedence of options:
* 1. Explicitly passed in options (highest)
- * 2. Set in flags (only for top level)
- * 3. Retained from explicitly set options of bs
- * 4. Inherited from parent node
- * 5. Retained from effective options of bs
+ * 2. Retained from explicitly set options of bs
+ * 3. Inherited from parent node
+ * 4. Retained from effective options of bs
*/
- if (!parent_options) {
- /*
- * Any setting represented by flags is always updated. If the
- * corresponding QDict option is set, it takes precedence. Otherwise
- * the flag is translated into a QDict option. The old setting of bs is
- * not considered.
- */
- update_options_from_flags(options, flags);
- }
-
/* Old explicitly set values (don't overwrite by inherited value) */
if (bs_entry) {
old_options = qdict_clone_shallow(bs_entry->state.explicit_options);
@@ -2995,16 +2981,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
/* Inherit from parent node */
if (parent_options) {
- QemuOpts *opts;
- QDict *options_copy;
- assert(!flags);
+ flags = 0;
role->inherit_options(&flags, options, parent_flags, parent_options);
- options_copy = qdict_clone_shallow(options);
- opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
- qemu_opts_absorb_qdict(opts, options_copy, NULL);
- update_flags_from_options(&flags, opts);
- qemu_opts_del(opts);
- qobject_unref(options_copy);
+ } else {
+ flags = bdrv_get_flags(bs);
}
/* Old values are used for options that aren't set yet */
@@ -3012,6 +2992,14 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
bdrv_join_options(bs, options, old_options);
qobject_unref(old_options);
+ /* We have the final set of options so let's update the flags */
+ options_copy = qdict_clone_shallow(options);
+ opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options_copy, NULL);
+ update_flags_from_options(&flags, opts);
+ qemu_opts_del(opts);
+ qobject_unref(options_copy);
+
/* bdrv_open_inherit() sets and clears some additional flags internally */
flags &= ~BDRV_O_PROTOCOL;
if (flags & BDRV_O_RDWR) {
@@ -3051,7 +3039,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
qdict_extract_subqdict(options, &new_child_options, child_key_dot);
g_free(child_key_dot);
- bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
+ bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options,
child->role, options, flags);
}
@@ -3060,10 +3048,9 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
- QDict *options, int flags)
+ QDict *options)
{
- return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
- NULL, NULL, 0);
+ return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, NULL, 0);
}
/*
@@ -3125,22 +3112,18 @@ cleanup:
return ret;
}
-
-/* Reopen a single BlockDriverState with the specified flags. */
-int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
+int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
+ Error **errp)
{
- int ret = -1;
- Error *local_err = NULL;
+ int ret;
BlockReopenQueue *queue;
+ QDict *opts = qdict_new();
- bdrv_subtree_drained_begin(bs);
-
- queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
- ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
- if (local_err != NULL) {
- error_propagate(errp, local_err);
- }
+ qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
+ bdrv_subtree_drained_begin(bs);
+ queue = bdrv_reopen_queue(NULL, bs, opts);
+ ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, errp);
bdrv_subtree_drained_end(bs);
return ret;
@@ -3214,6 +3197,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
Error **errp)
{
int ret = -1;
+ int old_flags;
Error *local_err = NULL;
BlockDriver *drv;
QemuOpts *opts;
@@ -3240,7 +3224,12 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
goto error;
}
+ /* This was already called in bdrv_reopen_queue_child() so the flags
+ * are up-to-date. This time we simply want to remove the options from
+ * QemuOpts in order to indicate that they have been processed. */
+ old_flags = reopen_state->flags;
update_flags_from_options(&reopen_state->flags, opts);
+ assert(old_flags == reopen_state->flags);
discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD);
if (discard != NULL) {
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 46d585cfd0..7a81892a52 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -57,6 +57,8 @@ ssh.o-libs := $(LIBSSH2_LIBS)
block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
dmg-bz2.o-libs := $(BZIP2_LIBS)
+block-obj-$(if $(CONFIG_LZFSE),m,n) += dmg-lzfse.o
+dmg-lzfse.o-libs := $(LZFSE_LIBS)
qcow.o-libs := -lz
linux-aio.o-libs := -laio
parallels.o-cflags := $(LIBXML2_CFLAGS)
diff --git a/block/backup.c b/block/backup.c
index 4d084f6ca6..b829b251eb 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -28,6 +28,13 @@
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
+typedef struct CowRequest {
+ int64_t start_byte;
+ int64_t end_byte;
+ QLIST_ENTRY(CowRequest) list;
+ CoQueue wait_queue; /* coroutines blocked on this request */
+} CowRequest;
+
typedef struct BackupBlockJob {
BlockJob common;
BlockBackend *target;
@@ -322,37 +329,6 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
hbitmap_set(backup_job->copy_bitmap, 0, len);
}
-void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
- uint64_t bytes)
-{
- BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
- int64_t start, end;
-
- assert(block_job_driver(job) == &backup_job_driver);
-
- start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
- end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
- wait_for_overlapping_requests(backup_job, start, end);
-}
-
-void backup_cow_request_begin(CowRequest *req, BlockJob *job,
- int64_t offset, uint64_t bytes)
-{
- BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
- int64_t start, end;
-
- assert(block_job_driver(job) == &backup_job_driver);
-
- start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
- end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
- cow_request_begin(req, backup_job, start, end);
-}
-
-void backup_cow_request_end(CowRequest *req)
-{
- cow_request_end(req);
-}
-
static void backup_drain(BlockJob *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
diff --git a/block/commit.c b/block/commit.c
index a2da5740b0..53148e610b 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -38,7 +38,7 @@ typedef struct CommitBlockJob {
BlockBackend *base;
BlockDriverState *base_bs;
BlockdevOnError on_error;
- int base_flags;
+ bool base_read_only;
char *backing_file_str;
} CommitBlockJob;
@@ -124,8 +124,8 @@ static void commit_clean(Job *job)
/* restore base open flags here if appropriate (e.g., change the base back
* to r/o). These reopens do not need to be atomic, since we won't abort
* even on failure here */
- if (s->base_flags != bdrv_get_flags(s->base_bs)) {
- bdrv_reopen(s->base_bs, s->base_flags, NULL);
+ if (s->base_read_only) {
+ bdrv_reopen_set_read_only(s->base_bs, true, NULL);
}
g_free(s->backing_file_str);
@@ -264,7 +264,6 @@ void commit_start(const char *job_id, BlockDriverState *bs,
const char *filter_node_name, Error **errp)
{
CommitBlockJob *s;
- int orig_base_flags;
BlockDriverState *iter;
BlockDriverState *commit_top_bs = NULL;
Error *local_err = NULL;
@@ -283,11 +282,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
}
/* convert base to r/w, if necessary */
- orig_base_flags = bdrv_get_flags(base);
- if (!(orig_base_flags & BDRV_O_RDWR)) {
- bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err);
- if (local_err != NULL) {
- error_propagate(errp, local_err);
+ s->base_read_only = bdrv_is_read_only(base);
+ if (s->base_read_only) {
+ if (bdrv_reopen_set_read_only(base, false, errp) != 0) {
goto fail;
}
}
@@ -363,7 +360,6 @@ void commit_start(const char *job_id, BlockDriverState *bs,
goto fail;
}
- s->base_flags = orig_base_flags;
s->backing_file_str = g_strdup(backing_file_str);
s->on_error = on_error;
@@ -395,7 +391,7 @@ int bdrv_commit(BlockDriverState *bs)
BlockDriverState *commit_top_bs = NULL;
BlockDriver *drv = bs->drv;
int64_t offset, length, backing_length;
- int ro, open_flags;
+ int ro;
int64_t n;
int ret = 0;
uint8_t *buf = NULL;
@@ -414,10 +410,9 @@ int bdrv_commit(BlockDriverState *bs)
}
ro = bs->backing->bs->read_only;
- open_flags = bs->backing->bs->open_flags;
if (ro) {
- if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
+ if (bdrv_reopen_set_read_only(bs->backing->bs, false, NULL)) {
return -EACCES;
}
}
@@ -527,7 +522,7 @@ ro_cleanup:
if (ro) {
/* ignoring error return here */
- bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
+ bdrv_reopen_set_read_only(bs->backing->bs, true, NULL);
}
return ret;
diff --git a/block/dmg-lzfse.c b/block/dmg-lzfse.c
new file mode 100644
index 0000000000..19d25bc646
--- /dev/null
+++ b/block/dmg-lzfse.c
@@ -0,0 +1,49 @@
+/*
+ * DMG lzfse uncompression
+ *
+ * Copyright (c) 2018 Julio Cesar Faracco
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "dmg.h"
+#include <lzfse.h>
+
+static int dmg_uncompress_lzfse_do(char *next_in, unsigned int avail_in,
+ char *next_out, unsigned int avail_out)
+{
+ size_t out_size = lzfse_decode_buffer((uint8_t *) next_out, avail_out,
+ (uint8_t *) next_in, avail_in,
+ NULL);
+
+ /* We need to decode the single chunk only. */
+ /* So, out_size == avail_out is not an error here. */
+ if (out_size > 0) {
+ return out_size;
+ }
+ return -1;
+}
+
+__attribute__((constructor))
+static void dmg_lzfse_init(void)
+{
+ assert(!dmg_uncompress_lzfse);
+ dmg_uncompress_lzfse = dmg_uncompress_lzfse_do;
+}
diff --git a/block/dmg.c b/block/dmg.c
index 1d9283ba2f..50e91aef6d 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -33,6 +33,9 @@
int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
char *next_out, unsigned int avail_out);
+int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
+ char *next_out, unsigned int avail_out);
+
enum {
/* Limit chunk sizes to prevent unreasonable amounts of memory being used
* or truncating when converting to 32-bit types
@@ -41,6 +44,19 @@ enum {
DMG_SECTORCOUNTS_MAX = DMG_LENGTHS_MAX / 512,
};
+enum {
+ /* DMG Block Type */
+ UDZE = 0, /* Zeroes */
+ UDRW, /* RAW type */
+ UDIG, /* Ignore */
+ UDCO = 0x80000004,
+ UDZO,
+ UDBZ,
+ ULFO,
+ UDCM = 0x7ffffffe, /* Comments */
+ UDLE /* Last Entry */
+};
+
static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
{
int len;
@@ -105,15 +121,16 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
uint32_t uncompressed_sectors = 0;
switch (s->types[chunk]) {
- case 0x80000005: /* zlib compressed */
- case 0x80000006: /* bzip2 compressed */
+ case UDZO: /* zlib compressed */
+ case UDBZ: /* bzip2 compressed */
+ case ULFO: /* lzfse compressed */
compressed_size = s->lengths[chunk];
uncompressed_sectors = s->sectorcounts[chunk];
break;
- case 1: /* copy */
+ case UDRW: /* copy */
uncompressed_sectors = DIV_ROUND_UP(s->lengths[chunk], 512);
break;
- case 2: /* zero */
+ case UDIG: /* zero */
/* as the all-zeroes block may be large, it is treated specially: the
* sector is not copied from a large buffer, a simple memset is used
* instead. Therefore uncompressed_sectors does not need to be set. */
@@ -182,12 +199,14 @@ typedef struct DmgHeaderState {
static bool dmg_is_known_block_type(uint32_t entry_type)
{
switch (entry_type) {
- case 0x00000001: /* uncompressed */
- case 0x00000002: /* zeroes */
- case 0x80000005: /* zlib */
+ case UDRW: /* uncompressed */
+ case UDIG: /* zeroes */
+ case UDZO: /* zlib */
return true;
- case 0x80000006: /* bzip2 */
+ case UDBZ: /* bzip2 */
return !!dmg_uncompress_bz2;
+ case ULFO: /* lzfse */
+ return !!dmg_uncompress_lzfse;
default:
return false;
}
@@ -425,6 +444,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
}
block_module_load_one("dmg-bz2");
+ block_module_load_one("dmg-lzfse");
s->n_chunks = 0;
s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
@@ -579,7 +599,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
s->current_chunk = s->n_chunks;
switch (s->types[chunk]) { /* block entry type */
- case 0x80000005: { /* zlib compressed */
+ case UDZO: { /* zlib compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
ret = bdrv_pread(bs->file, s->offsets[chunk],
@@ -602,7 +622,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
return -1;
}
break; }
- case 0x80000006: /* bzip2 compressed */
+ case UDBZ: /* bzip2 compressed */
if (!dmg_uncompress_bz2) {
break;
}
@@ -623,14 +643,35 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
return ret;
}
break;
- case 1: /* copy */
+ case ULFO:
+ if (!dmg_uncompress_lzfse) {
+ break;
+ }
+ /* we need to buffer, because only the chunk as whole can be
+ * inflated. */
+ ret = bdrv_pread(bs->file, s->offsets[chunk],
+ s->compressed_chunk, s->lengths[chunk]);
+ if (ret != s->lengths[chunk]) {
+ return -1;
+ }
+
+ ret = dmg_uncompress_lzfse((char *)s->compressed_chunk,
+ (unsigned int) s->lengths[chunk],
+ (char *)s->uncompressed_chunk,
+ (unsigned int)
+ (512 * s->sectorcounts[chunk]));
+ if (ret < 0) {
+ return ret;
+ }
+ break;
+ case UDRW: /* copy */
ret = bdrv_pread(bs->file, s->offsets[chunk],
s->uncompressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
}
break;
- case 2: /* zero */
+ case UDIG: /* zero */
/* see dmg_read, it is treated specially. No buffer needs to be
* pre-filled, the zeroes can be set directly. */
break;
diff --git a/block/dmg.h b/block/dmg.h
index 2ecf239ba5..f28929998f 100644
--- a/block/dmg.h
+++ b/block/dmg.h
@@ -55,4 +55,7 @@ typedef struct BDRVDMGState {
extern int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
char *next_out, unsigned int avail_out);
+extern int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
+ char *next_out, unsigned int avail_out);
+
#endif
diff --git a/block/file-posix.c b/block/file-posix.c
index 07bbdab953..d8f0b93752 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -182,25 +182,29 @@ static int64_t raw_getlength(BlockDriverState *bs);
typedef struct RawPosixAIOData {
BlockDriverState *bs;
+ int aio_type;
int aio_fildes;
- union {
- struct iovec *aio_iov;
- void *aio_ioctl_buf;
- };
- int aio_niov;
- uint64_t aio_nbytes;
-#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
+
off_t aio_offset;
- int aio_type;
+ uint64_t aio_nbytes;
+
union {
struct {
+ struct iovec *iov;
+ int niov;
+ } io;
+ struct {
+ uint64_t cmd;
+ void *buf;
+ } ioctl;
+ struct {
int aio_fd2;
off_t aio_offset2;
- };
+ } copy_range;
struct {
PreallocMode prealloc;
Error **errp;
- };
+ } truncate;
};
} RawPosixAIOData;
@@ -1148,20 +1152,24 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
}
#endif
-static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
+#if defined(__linux__)
+static int handle_aiocb_ioctl(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
int ret;
- ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
+ ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf);
if (ret == -1) {
return -errno;
}
return 0;
}
+#endif /* linux */
-static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
+static int handle_aiocb_flush(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
BDRVRawState *s = aiocb->bs->opaque;
int ret;
@@ -1233,13 +1241,13 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
do {
if (aiocb->aio_type & QEMU_AIO_WRITE)
len = qemu_pwritev(aiocb->aio_fildes,
- aiocb->aio_iov,
- aiocb->aio_niov,
+ aiocb->io.iov,
+ aiocb->io.niov,
aiocb->aio_offset);
else
len = qemu_preadv(aiocb->aio_fildes,
- aiocb->aio_iov,
- aiocb->aio_niov,
+ aiocb->io.iov,
+ aiocb->io.niov,
aiocb->aio_offset);
} while (len == -1 && errno == EINTR);
@@ -1295,8 +1303,9 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
return offset;
}
-static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
+static int handle_aiocb_rw(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
ssize_t nbytes;
char *buf;
@@ -1305,8 +1314,9 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
* If there is just a single buffer, and it is properly aligned
* we can just use plain pread/pwrite without any problems.
*/
- if (aiocb->aio_niov == 1) {
- return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
+ if (aiocb->io.niov == 1) {
+ nbytes = handle_aiocb_rw_linear(aiocb, aiocb->io.iov->iov_base);
+ goto out;
}
/*
* We have more than one iovec, and all are properly aligned.
@@ -1318,7 +1328,7 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
nbytes = handle_aiocb_rw_vector(aiocb);
if (nbytes == aiocb->aio_nbytes ||
(nbytes < 0 && nbytes != -ENOSYS)) {
- return nbytes;
+ goto out;
}
preadv_present = false;
}
@@ -1336,16 +1346,17 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
*/
buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes);
if (buf == NULL) {
- return -ENOMEM;
+ nbytes = -ENOMEM;
+ goto out;
}
if (aiocb->aio_type & QEMU_AIO_WRITE) {
char *p = buf;
int i;
- for (i = 0; i < aiocb->aio_niov; ++i) {
- memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
- p += aiocb->aio_iov[i].iov_len;
+ for (i = 0; i < aiocb->io.niov; ++i) {
+ memcpy(p, aiocb->io.iov[i].iov_base, aiocb->io.iov[i].iov_len);
+ p += aiocb->io.iov[i].iov_len;
}
assert(p - buf == aiocb->aio_nbytes);
}
@@ -1356,12 +1367,12 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
size_t count = aiocb->aio_nbytes, copy;
int i;
- for (i = 0; i < aiocb->aio_niov && count; ++i) {
+ for (i = 0; i < aiocb->io.niov && count; ++i) {
copy = count;
- if (copy > aiocb->aio_iov[i].iov_len) {
- copy = aiocb->aio_iov[i].iov_len;
+ if (copy > aiocb->io.iov[i].iov_len) {
+ copy = aiocb->io.iov[i].iov_len;
}
- memcpy(aiocb->aio_iov[i].iov_base, p, copy);
+ memcpy(aiocb->io.iov[i].iov_base, p, copy);
assert(count >= copy);
p += copy;
count -= copy;
@@ -1370,7 +1381,21 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
}
qemu_vfree(buf);
- return nbytes;
+out:
+ if (nbytes == aiocb->aio_nbytes) {
+ return 0;
+ } else if (nbytes >= 0 && nbytes < aiocb->aio_nbytes) {
+ if (aiocb->aio_type & QEMU_AIO_WRITE) {
+ return -EINVAL;
+ } else {
+ iov_memset(aiocb->io.iov, aiocb->io.niov, nbytes,
+ 0, aiocb->aio_nbytes - nbytes);
+ return 0;
+ }
+ } else {
+ assert(nbytes < 0);
+ return nbytes;
+ }
}
#ifdef CONFIG_XFS
@@ -1460,8 +1485,9 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb)
return ret;
}
-static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
+static int handle_aiocb_write_zeroes(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
#if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS)
BDRVRawState *s = aiocb->bs->opaque;
#endif
@@ -1525,8 +1551,9 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
return -ENOTSUP;
}
-static ssize_t handle_aiocb_write_zeroes_unmap(RawPosixAIOData *aiocb)
+static int handle_aiocb_write_zeroes_unmap(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque;
int ret;
@@ -1568,18 +1595,20 @@ static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
}
#endif
-static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb)
+static int handle_aiocb_copy_range(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
uint64_t bytes = aiocb->aio_nbytes;
off_t in_off = aiocb->aio_offset;
- off_t out_off = aiocb->aio_offset2;
+ off_t out_off = aiocb->copy_range.aio_offset2;
while (bytes) {
ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off,
- aiocb->aio_fd2, &out_off,
+ aiocb->copy_range.aio_fd2, &out_off,
bytes, 0);
trace_file_copy_file_range(aiocb->bs, aiocb->aio_fildes, in_off,
- aiocb->aio_fd2, out_off, bytes, 0, ret);
+ aiocb->copy_range.aio_fd2, out_off, bytes,
+ 0, ret);
if (ret == 0) {
/* No progress (e.g. when beyond EOF), let the caller fall back to
* buffer I/O. */
@@ -1600,8 +1629,9 @@ static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb)
return 0;
}
-static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
+static int handle_aiocb_discard(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
int ret = -EOPNOTSUPP;
BDRVRawState *s = aiocb->bs->opaque;
@@ -1640,15 +1670,17 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
return ret;
}
-static int handle_aiocb_truncate(RawPosixAIOData *aiocb)
+static int handle_aiocb_truncate(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
int result = 0;
int64_t current_length = 0;
char *buf = NULL;
struct stat st;
int fd = aiocb->aio_fildes;
int64_t offset = aiocb->aio_offset;
- Error **errp = aiocb->errp;
+ PreallocMode prealloc = aiocb->truncate.prealloc;
+ Error **errp = aiocb->truncate.errp;
if (fstat(fd, &st) < 0) {
result = -errno;
@@ -1657,12 +1689,12 @@ static int handle_aiocb_truncate(RawPosixAIOData *aiocb)
}
current_length = st.st_size;
- if (current_length > offset && aiocb->prealloc != PREALLOC_MODE_OFF) {
+ if (current_length > offset && prealloc != PREALLOC_MODE_OFF) {
error_setg(errp, "Cannot use preallocation for shrinking files");
return -ENOTSUP;
}
- switch (aiocb->prealloc) {
+ switch (prealloc) {
#ifdef CONFIG_POSIX_FALLOCATE
case PREALLOC_MODE_FALLOC:
/*
@@ -1743,7 +1775,7 @@ static int handle_aiocb_truncate(RawPosixAIOData *aiocb)
default:
result = -ENOTSUP;
error_setg(errp, "Unsupported preallocation mode: %s",
- PreallocMode_str(aiocb->prealloc));
+ PreallocMode_str(prealloc));
return result;
}
@@ -1759,104 +1791,19 @@ out:
return result;
}
-static int aio_worker(void *arg)
+static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs,
+ ThreadPoolFunc func, void *arg)
{
- RawPosixAIOData *aiocb = arg;
- ssize_t ret = 0;
-
- switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
- case QEMU_AIO_READ:
- ret = handle_aiocb_rw(aiocb);
- if (ret >= 0 && ret < aiocb->aio_nbytes) {
- iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
- 0, aiocb->aio_nbytes - ret);
-
- ret = aiocb->aio_nbytes;
- }
- if (ret == aiocb->aio_nbytes) {
- ret = 0;
- } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
- ret = -EINVAL;
- }
- break;
- case QEMU_AIO_WRITE:
- ret = handle_aiocb_rw(aiocb);
- if (ret == aiocb->aio_nbytes) {
- ret = 0;
- } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
- ret = -EINVAL;
- }
- break;
- case QEMU_AIO_FLUSH:
- ret = handle_aiocb_flush(aiocb);
- break;
- case QEMU_AIO_IOCTL:
- ret = handle_aiocb_ioctl(aiocb);
- break;
- case QEMU_AIO_DISCARD:
- ret = handle_aiocb_discard(aiocb);
- break;
- case QEMU_AIO_WRITE_ZEROES:
- ret = handle_aiocb_write_zeroes(aiocb);
- break;
- case QEMU_AIO_WRITE_ZEROES | QEMU_AIO_DISCARD:
- ret = handle_aiocb_write_zeroes_unmap(aiocb);
- break;
- case QEMU_AIO_COPY_RANGE:
- ret = handle_aiocb_copy_range(aiocb);
- break;
- case QEMU_AIO_TRUNCATE:
- ret = handle_aiocb_truncate(aiocb);
- break;
- default:
- error_report("invalid aio request (0x%x)", aiocb->aio_type);
- ret = -EINVAL;
- break;
- }
-
- g_free(aiocb);
- return ret;
-}
-
-static int paio_submit_co_full(BlockDriverState *bs, int fd,
- int64_t offset, int fd2, int64_t offset2,
- QEMUIOVector *qiov,
- int bytes, int type)
-{
- RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
- ThreadPool *pool;
-
- acb->bs = bs;
- acb->aio_type = type;
- acb->aio_fildes = fd;
- acb->aio_fd2 = fd2;
- acb->aio_offset2 = offset2;
-
- acb->aio_nbytes = bytes;
- acb->aio_offset = offset;
-
- if (qiov) {
- acb->aio_iov = qiov->iov;
- acb->aio_niov = qiov->niov;
- assert(qiov->size == bytes);
- }
-
- trace_file_paio_submit_co(offset, bytes, type);
- pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
- return thread_pool_submit_co(pool, aio_worker, acb);
-}
-
-static inline int paio_submit_co(BlockDriverState *bs, int fd,
- int64_t offset, QEMUIOVector *qiov,
- int bytes, int type)
-{
- return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type);
+ /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */
+ ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+ return thread_pool_submit_co(pool, func, arg);
}
static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
+ RawPosixAIOData acb;
if (fd_open(bs) < 0)
return -EIO;
@@ -1879,7 +1826,20 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
}
}
- return paio_submit_co(bs, s->fd, offset, qiov, bytes, type);
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_fildes = s->fd,
+ .aio_type = type,
+ .aio_offset = offset,
+ .aio_nbytes = bytes,
+ .io = {
+ .iov = qiov->iov,
+ .niov = qiov->niov,
+ },
+ };
+
+ assert(qiov->size == bytes);
+ return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb);
}
static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
@@ -1922,6 +1882,7 @@ static void raw_aio_unplug(BlockDriverState *bs)
static int raw_co_flush_to_disk(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
+ RawPosixAIOData acb;
int ret;
ret = fd_open(bs);
@@ -1929,7 +1890,13 @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
return ret;
}
- return paio_submit_co(bs, s->fd, 0, NULL, 0, QEMU_AIO_FLUSH);
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_fildes = s->fd,
+ .aio_type = QEMU_AIO_FLUSH,
+ };
+
+ return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
}
static void raw_aio_attach_aio_context(BlockDriverState *bs,
@@ -1968,21 +1935,20 @@ static int coroutine_fn
raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset,
PreallocMode prealloc, Error **errp)
{
- RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
- ThreadPool *pool;
+ RawPosixAIOData acb;
- *acb = (RawPosixAIOData) {
+ acb = (RawPosixAIOData) {
.bs = bs,
.aio_fildes = fd,
.aio_type = QEMU_AIO_TRUNCATE,
.aio_offset = offset,
- .prealloc = prealloc,
- .errp = errp,
+ .truncate = {
+ .prealloc = prealloc,
+ .errp = errp,
+ },
};
- /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */
- pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
- return thread_pool_submit_co(pool, aio_worker, acb);
+ return raw_thread_pool_submit(bs, handle_aiocb_truncate, &acb);
}
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
@@ -2613,25 +2579,67 @@ static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs,
}
static coroutine_fn int
-raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev)
{
BDRVRawState *s = bs->opaque;
+ RawPosixAIOData acb;
+
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_fildes = s->fd,
+ .aio_type = QEMU_AIO_DISCARD,
+ .aio_offset = offset,
+ .aio_nbytes = bytes,
+ };
+
+ if (blkdev) {
+ acb.aio_type |= QEMU_AIO_BLKDEV;
+ }
- return paio_submit_co(bs, s->fd, offset, NULL, bytes, QEMU_AIO_DISCARD);
+ return raw_thread_pool_submit(bs, handle_aiocb_discard, &acb);
}
-static int coroutine_fn raw_co_pwrite_zeroes(
- BlockDriverState *bs, int64_t offset,
- int bytes, BdrvRequestFlags flags)
+static coroutine_fn int
+raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+{
+ return raw_do_pdiscard(bs, offset, bytes, false);
+}
+
+static int coroutine_fn
+raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
+ BdrvRequestFlags flags, bool blkdev)
{
BDRVRawState *s = bs->opaque;
- int operation = QEMU_AIO_WRITE_ZEROES;
+ RawPosixAIOData acb;
+ ThreadPoolFunc *handler;
+
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_fildes = s->fd,
+ .aio_type = QEMU_AIO_WRITE_ZEROES,
+ .aio_offset = offset,
+ .aio_nbytes = bytes,
+ };
+
+ if (blkdev) {
+ acb.aio_type |= QEMU_AIO_BLKDEV;
+ }
if (flags & BDRV_REQ_MAY_UNMAP) {
- operation |= QEMU_AIO_DISCARD;
+ acb.aio_type |= QEMU_AIO_DISCARD;
+ handler = handle_aiocb_write_zeroes_unmap;
+ } else {
+ handler = handle_aiocb_write_zeroes;
}
- return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
+ return raw_thread_pool_submit(bs, handler, &acb);
+}
+
+static int coroutine_fn raw_co_pwrite_zeroes(
+ BlockDriverState *bs, int64_t offset,
+ int bytes, BdrvRequestFlags flags)
+{
+ return raw_do_pwrite_zeroes(bs, offset, bytes, flags, false);
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -2702,6 +2710,7 @@ static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
+ RawPosixAIOData acb;
BDRVRawState *s = bs->opaque;
BDRVRawState *src_s;
@@ -2714,8 +2723,20 @@ static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
if (fd_open(src->bs) < 0 || fd_open(dst->bs) < 0) {
return -EIO;
}
- return paio_submit_co_full(bs, src_s->fd, src_offset, s->fd, dst_offset,
- NULL, bytes, QEMU_AIO_COPY_RANGE);
+
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_type = QEMU_AIO_COPY_RANGE,
+ .aio_fildes = src_s->fd,
+ .aio_offset = src_offset,
+ .aio_nbytes = bytes,
+ .copy_range = {
+ .aio_fd2 = s->fd,
+ .aio_offset2 = dst_offset,
+ },
+ };
+
+ return raw_thread_pool_submit(bs, handle_aiocb_copy_range, &acb);
}
BlockDriver bdrv_file = {
@@ -3063,36 +3084,39 @@ hdev_open_Mac_error:
}
#if defined(__linux__)
-
-static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
{
BDRVRawState *s = bs->opaque;
- RawPosixAIOData *acb;
- ThreadPool *pool;
+ RawPosixAIOData acb;
+ int ret;
- if (fd_open(bs) < 0)
- return NULL;
+ ret = fd_open(bs);
+ if (ret < 0) {
+ return ret;
+ }
if (req == SG_IO && s->pr_mgr) {
struct sg_io_hdr *io_hdr = buf;
if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT ||
io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) {
return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs),
- s->fd, io_hdr, cb, opaque);
+ s->fd, io_hdr);
}
}
- acb = g_new(RawPosixAIOData, 1);
- acb->bs = bs;
- acb->aio_type = QEMU_AIO_IOCTL;
- acb->aio_fildes = s->fd;
- acb->aio_offset = 0;
- acb->aio_ioctl_buf = buf;
- acb->aio_ioctl_cmd = req;
- pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
- return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_type = QEMU_AIO_IOCTL,
+ .aio_fildes = s->fd,
+ .aio_offset = 0,
+ .ioctl = {
+ .buf = buf,
+ .cmd = req,
+ },
+ };
+
+ return raw_thread_pool_submit(bs, handle_aiocb_ioctl, &acb);
}
#endif /* linux */
@@ -3109,22 +3133,18 @@ static int fd_open(BlockDriverState *bs)
static coroutine_fn int
hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
{
- BDRVRawState *s = bs->opaque;
int ret;
ret = fd_open(bs);
if (ret < 0) {
return ret;
}
- return paio_submit_co(bs, s->fd, offset, NULL, bytes,
- QEMU_AIO_DISCARD | QEMU_AIO_BLKDEV);
+ return raw_do_pdiscard(bs, offset, bytes, true);
}
static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags)
{
- BDRVRawState *s = bs->opaque;
- int operation = QEMU_AIO_WRITE_ZEROES | QEMU_AIO_BLKDEV;
int rc;
rc = fd_open(bs);
@@ -3132,11 +3152,7 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
return rc;
}
- if (flags & BDRV_REQ_MAY_UNMAP) {
- operation |= QEMU_AIO_DISCARD;
- }
-
- return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
+ return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true);
}
static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts,
@@ -3241,7 +3257,7 @@ static BlockDriver bdrv_host_device = {
/* generic scsi device */
#ifdef __linux__
- .bdrv_aio_ioctl = hdev_aio_ioctl,
+ .bdrv_co_ioctl = hdev_co_ioctl,
#endif
};
@@ -3363,7 +3379,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_lock_medium = cdrom_lock_medium,
/* generic scsi device */
- .bdrv_aio_ioctl = hdev_aio_ioctl,
+ .bdrv_co_ioctl = hdev_co_ioctl,
};
#endif /* __linux__ */
diff --git a/block/mirror.c b/block/mirror.c
index 8f52c6215d..ab59ad77e8 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -277,7 +277,8 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
return ret;
}
-static inline void mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
+static inline void coroutine_fn
+mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
{
MirrorOp *op;
@@ -295,7 +296,8 @@ static inline void mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
abort();
}
-static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
+static inline void coroutine_fn
+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
{
/* Only non-active operations use up in-flight slots */
mirror_wait_for_any_operation(s, false);
@@ -598,7 +600,7 @@ static void mirror_free_init(MirrorBlockJob *s)
* mirror_resume() because mirror_run() will begin iterating again
* when the job is resumed.
*/
-static void mirror_wait_for_all_io(MirrorBlockJob *s)
+static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s)
{
while (s->in_flight > 0) {
mirror_wait_for_free_in_flight_slot(s);
@@ -669,9 +671,10 @@ static int mirror_exit_common(Job *job)
if (s->should_complete && !abort) {
BlockDriverState *to_replace = s->to_replace ?: src;
+ bool ro = bdrv_is_read_only(to_replace);
- if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) {
- bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL);
+ if (ro != bdrv_is_read_only(target_bs)) {
+ bdrv_reopen_set_read_only(target_bs, ro, NULL);
}
/* The mirror job has no requests in flight any more, but we need to
@@ -731,7 +734,7 @@ static void mirror_abort(Job *job)
assert(ret == 0);
}
-static void mirror_throttle(MirrorBlockJob *s)
+static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
{
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -1106,7 +1109,7 @@ static void mirror_complete(Job *job, Error **errp)
job_enter(job);
}
-static void mirror_pause(Job *job)
+static void coroutine_fn mirror_pause(Job *job)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
@@ -1177,9 +1180,10 @@ static const BlockJobDriver commit_active_job_driver = {
.drain = mirror_drain,
};
-static void do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
- uint64_t offset, uint64_t bytes,
- QEMUIOVector *qiov, int flags)
+static void coroutine_fn
+do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BdrvDirtyBitmapIter *iter;
QEMUIOVector target_qiov;
@@ -1689,13 +1693,15 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque,
bool auto_complete, Error **errp)
{
- int orig_base_flags;
+ bool base_read_only;
Error *local_err = NULL;
- orig_base_flags = bdrv_get_flags(base);
+ base_read_only = bdrv_is_read_only(base);
- if (bdrv_reopen(base, bs->open_flags, errp)) {
- return;
+ if (base_read_only) {
+ if (bdrv_reopen_set_read_only(base, false, errp) < 0) {
+ return;
+ }
}
mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
@@ -1714,6 +1720,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
error_restore_flags:
/* ignore error and errp for bdrv_reopen, because we want to propagate
* the original error */
- bdrv_reopen(base, orig_base_flags, NULL);
+ if (base_read_only) {
+ bdrv_reopen_set_read_only(base, true, NULL);
+ }
return;
}
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index d37fe08b3d..e2737429f5 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1571,76 +1571,6 @@ again:
return 0;
}
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
- const uint8_t *buf, int buf_size)
-{
- z_stream strm1, *strm = &strm1;
- int ret, out_len;
-
- memset(strm, 0, sizeof(*strm));
-
- strm->next_in = (uint8_t *)buf;
- strm->avail_in = buf_size;
- strm->next_out = out_buf;
- strm->avail_out = out_buf_size;
-
- ret = inflateInit2(strm, -12);
- if (ret != Z_OK)
- return -1;
- ret = inflate(strm, Z_FINISH);
- out_len = strm->next_out - out_buf;
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
- out_len != out_buf_size) {
- inflateEnd(strm);
- return -1;
- }
- inflateEnd(strm);
- return 0;
-}
-
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
- BDRVQcow2State *s = bs->opaque;
- int ret, csize, nb_csectors, sector_offset;
- uint64_t coffset;
-
- coffset = cluster_offset & s->cluster_offset_mask;
- if (s->cluster_cache_offset != coffset) {
- nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
- sector_offset = coffset & 511;
- csize = nb_csectors * 512 - sector_offset;
-
- /* Allocate buffers on first decompress operation, most images are
- * uncompressed and the memory overhead can be avoided. The buffers
- * are freed in .bdrv_close().
- */
- if (!s->cluster_data) {
- /* one more sector for decompressed data alignment */
- s->cluster_data = qemu_try_blockalign(bs->file->bs,
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512);
- if (!s->cluster_data) {
- return -ENOMEM;
- }
- }
- if (!s->cluster_cache) {
- s->cluster_cache = g_malloc(s->cluster_size);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
- ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
- nb_csectors);
- if (ret < 0) {
- return ret;
- }
- if (decompress_buffer(s->cluster_cache, s->cluster_size,
- s->cluster_data + sector_offset, csize) < 0) {
- return -EIO;
- }
- s->cluster_cache_offset = coffset;
- }
- return 0;
-}
-
/*
* This discards as many clusters of nb_clusters as possible at once (i.e.
* all clusters in the same L2 slice) and returns the number of discarded
diff --git a/block/qcow2.c b/block/qcow2.c
index bc8868c36a..4897abae5e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -74,6 +74,13 @@ typedef struct {
#define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
#define QCOW2_EXT_MAGIC_BITMAPS 0x23852875
+static int coroutine_fn
+qcow2_co_preadv_compressed(BlockDriverState *bs,
+ uint64_t file_cluster_offset,
+ uint64_t offset,
+ uint64_t bytes,
+ QEMUIOVector *qiov);
+
static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
{
const QCowHeader *cow_header = (const void *)buf;
@@ -1414,7 +1421,6 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
goto fail;
}
- s->cluster_cache_offset = -1;
s->flags = flags;
ret = qcow2_refcount_init(bs);
@@ -1914,15 +1920,15 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
break;
case QCOW2_CLUSTER_COMPRESSED:
- /* add AIO support for compressed blocks ? */
- ret = qcow2_decompress_cluster(bs, cluster_offset);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = qcow2_co_preadv_compressed(bs, cluster_offset,
+ offset, cur_bytes,
+ &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
- qemu_iovec_from_buf(&hd_qiov, 0,
- s->cluster_cache + offset_in_cluster,
- cur_bytes);
break;
case QCOW2_CLUSTER_NORMAL:
@@ -2058,8 +2064,6 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
qemu_iovec_init(&hd_qiov, qiov->niov);
- s->cluster_cache_offset = -1; /* disable compressed cache */
-
qemu_co_mutex_lock(&s->lock);
while (bytes != 0) {
@@ -2223,8 +2227,6 @@ static void qcow2_close(BlockDriverState *bs)
g_free(s->image_backing_file);
g_free(s->image_backing_format);
- g_free(s->cluster_cache);
- qemu_vfree(s->cluster_data);
qcow2_refcount_close(bs);
qcow2_free_snapshots(bs);
}
@@ -3401,7 +3403,6 @@ qcow2_co_copy_range_to(BlockDriverState *bs,
QCowL2Meta *l2meta = NULL;
assert(!bs->encrypted);
- s->cluster_cache_offset = -1; /* disable compressed cache */
qemu_co_mutex_lock(&s->lock);
@@ -3722,14 +3723,15 @@ fail:
/*
* qcow2_compress()
*
- * @dest - destination buffer, at least of @size-1 bytes
- * @src - source buffer, @size bytes
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
*
* Returns: compressed size on success
- * -1 if compression is inefficient
+ * -1 destination buffer is not enough to store compressed data
* -2 on any other error
*/
-static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
+static ssize_t qcow2_compress(void *dest, size_t dest_size,
+ const void *src, size_t src_size)
{
ssize_t ret;
z_stream strm;
@@ -3738,20 +3740,20 @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
memset(&strm, 0, sizeof(strm));
ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-12, 9, Z_DEFAULT_STRATEGY);
- if (ret != 0) {
+ if (ret != Z_OK) {
return -2;
}
/* strm.next_in is not const in old zlib versions, such as those used on
* OpenBSD/NetBSD, so cast the const away */
- strm.avail_in = size;
+ strm.avail_in = src_size;
strm.next_in = (void *) src;
- strm.avail_out = size - 1;
+ strm.avail_out = dest_size;
strm.next_out = dest;
ret = deflate(&strm, Z_FINISH);
if (ret == Z_STREAM_END) {
- ret = size - 1 - strm.avail_out;
+ ret = dest_size - strm.avail_out;
} else {
ret = (ret == Z_OK ? -1 : -2);
}
@@ -3761,20 +3763,68 @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
return ret;
}
+/*
+ * qcow2_decompress()
+ *
+ * Decompress some data (not more than @src_size bytes) to produce exactly
+ * @dest_size bytes.
+ *
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: 0 on success
+ * -1 on fail
+ */
+static ssize_t qcow2_decompress(void *dest, size_t dest_size,
+ const void *src, size_t src_size)
+{
+ int ret = 0;
+ z_stream strm;
+
+ memset(&strm, 0, sizeof(strm));
+ strm.avail_in = src_size;
+ strm.next_in = (void *) src;
+ strm.avail_out = dest_size;
+ strm.next_out = dest;
+
+ ret = inflateInit2(&strm, -12);
+ if (ret != Z_OK) {
+ return -1;
+ }
+
+ ret = inflate(&strm, Z_FINISH);
+ if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
+ /* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
+ * @src buffer may be processed partly (because in qcow2 we know size of
+ * compressed data with precision of one sector) */
+ ret = -1;
+ }
+
+ inflateEnd(&strm);
+
+ return ret;
+}
+
#define MAX_COMPRESS_THREADS 4
+typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size,
+ const void *src, size_t src_size);
typedef struct Qcow2CompressData {
void *dest;
+ size_t dest_size;
const void *src;
- size_t size;
+ size_t src_size;
ssize_t ret;
+
+ Qcow2CompressFunc func;
} Qcow2CompressData;
static int qcow2_compress_pool_func(void *opaque)
{
Qcow2CompressData *data = opaque;
- data->ret = qcow2_compress(data->dest, data->src, data->size);
+ data->ret = data->func(data->dest, data->dest_size,
+ data->src, data->src_size);
return 0;
}
@@ -3784,17 +3834,19 @@ static void qcow2_compress_complete(void *opaque, int ret)
qemu_coroutine_enter(opaque);
}
-/* See qcow2_compress definition for parameters description */
-static ssize_t qcow2_co_compress(BlockDriverState *bs,
- void *dest, const void *src, size_t size)
+static ssize_t coroutine_fn
+qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+ const void *src, size_t src_size, Qcow2CompressFunc func)
{
BDRVQcow2State *s = bs->opaque;
BlockAIOCB *acb;
ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
Qcow2CompressData arg = {
.dest = dest,
+ .dest_size = dest_size,
.src = src,
- .size = size,
+ .src_size = src_size,
+ .func = func,
};
while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
@@ -3817,6 +3869,22 @@ static ssize_t qcow2_co_compress(BlockDriverState *bs,
return arg.ret;
}
+static ssize_t coroutine_fn
+qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+ const void *src, size_t src_size)
+{
+ return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
+ qcow2_compress);
+}
+
+static ssize_t coroutine_fn
+qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
+ const void *src, size_t src_size)
+{
+ return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
+ qcow2_decompress);
+}
+
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static coroutine_fn int
@@ -3861,7 +3929,8 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
out_buf = g_malloc(s->cluster_size);
- out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size);
+ out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1,
+ buf, s->cluster_size);
if (out_len == -2) {
ret = -EINVAL;
goto fail;
@@ -3909,6 +3978,55 @@ fail:
return ret;
}
+static int coroutine_fn
+qcow2_co_preadv_compressed(BlockDriverState *bs,
+ uint64_t file_cluster_offset,
+ uint64_t offset,
+ uint64_t bytes,
+ QEMUIOVector *qiov)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int ret = 0, csize, nb_csectors;
+ uint64_t coffset;
+ uint8_t *buf, *out_buf;
+ struct iovec iov;
+ QEMUIOVector local_qiov;
+ int offset_in_cluster = offset_into_cluster(s, offset);
+
+ coffset = file_cluster_offset & s->cluster_offset_mask;
+ nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
+ csize = nb_csectors * 512 - (coffset & 511);
+
+ buf = g_try_malloc(csize);
+ if (!buf) {
+ return -ENOMEM;
+ }
+ iov.iov_base = buf;
+ iov.iov_len = csize;
+ qemu_iovec_init_external(&local_qiov, &iov, 1);
+
+ out_buf = qemu_blockalign(bs, s->cluster_size);
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
+ ret = bdrv_co_preadv(bs->file, coffset, csize, &local_qiov, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
+ ret = -EIO;
+ goto fail;
+ }
+
+ qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes);
+
+fail:
+ qemu_vfree(out_buf);
+ g_free(buf);
+
+ return ret;
+}
+
static int make_completely_empty(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
diff --git a/block/qcow2.h b/block/qcow2.h
index 8662b68575..a98d24500b 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -278,9 +278,6 @@ typedef struct BDRVQcow2State {
QEMUTimer *cache_clean_timer;
unsigned cache_clean_interval;
- uint8_t *cluster_cache;
- uint8_t *cluster_data;
- uint64_t cluster_cache_offset;
QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs;
uint64_t *refcount_table;
@@ -616,7 +613,6 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size);
int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
uint8_t *buf, int nb_sectors, bool enc, Error **errp);
diff --git a/block/replication.c b/block/replication.c
index 6349d6958e..e70dd95001 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -20,6 +20,7 @@
#include "block/block_backup.h"
#include "sysemu/block-backend.h"
#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
#include "replication.h"
typedef enum {
@@ -39,8 +40,8 @@ typedef struct BDRVReplicationState {
char *top_id;
ReplicationState *rs;
Error *blocker;
- int orig_hidden_flags;
- int orig_secondary_flags;
+ bool orig_hidden_read_only;
+ bool orig_secondary_read_only;
int error;
} BDRVReplicationState;
@@ -218,9 +219,6 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs,
QEMUIOVector *qiov)
{
BDRVReplicationState *s = bs->opaque;
- BdrvChild *child = s->secondary_disk;
- BlockJob *job = NULL;
- CowRequest req;
int ret;
if (s->mode == REPLICATION_MODE_PRIMARY) {
@@ -233,28 +231,9 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs,
return ret;
}
- if (child && child->bs) {
- job = child->bs->job;
- }
-
- if (job) {
- uint64_t remaining_bytes = remaining_sectors * BDRV_SECTOR_SIZE;
-
- backup_wait_for_overlapping_requests(child->bs->job,
- sector_num * BDRV_SECTOR_SIZE,
- remaining_bytes);
- backup_cow_request_begin(&req, child->bs->job,
- sector_num * BDRV_SECTOR_SIZE,
- remaining_bytes);
- ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE,
- remaining_bytes, qiov, 0);
- backup_cow_request_end(&req);
- goto out;
- }
-
ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE,
remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
-out:
+
return replication_return_value(s, ret);
}
@@ -371,44 +350,38 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
}
}
+/* This function is supposed to be called twice:
+ * first with writable = true, then with writable = false.
+ * The first call puts s->hidden_disk and s->secondary_disk in
+ * r/w mode, and the second puts them back in their original state.
+ */
static void reopen_backing_file(BlockDriverState *bs, bool writable,
Error **errp)
{
BDRVReplicationState *s = bs->opaque;
BlockReopenQueue *reopen_queue = NULL;
- int orig_hidden_flags, orig_secondary_flags;
- int new_hidden_flags, new_secondary_flags;
Error *local_err = NULL;
if (writable) {
- orig_hidden_flags = s->orig_hidden_flags =
- bdrv_get_flags(s->hidden_disk->bs);
- new_hidden_flags = (orig_hidden_flags | BDRV_O_RDWR) &
- ~BDRV_O_INACTIVE;
- orig_secondary_flags = s->orig_secondary_flags =
- bdrv_get_flags(s->secondary_disk->bs);
- new_secondary_flags = (orig_secondary_flags | BDRV_O_RDWR) &
- ~BDRV_O_INACTIVE;
- } else {
- orig_hidden_flags = (s->orig_hidden_flags | BDRV_O_RDWR) &
- ~BDRV_O_INACTIVE;
- new_hidden_flags = s->orig_hidden_flags;
- orig_secondary_flags = (s->orig_secondary_flags | BDRV_O_RDWR) &
- ~BDRV_O_INACTIVE;
- new_secondary_flags = s->orig_secondary_flags;
+ s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs);
+ s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs);
}
bdrv_subtree_drained_begin(s->hidden_disk->bs);
bdrv_subtree_drained_begin(s->secondary_disk->bs);
- if (orig_hidden_flags != new_hidden_flags) {
- reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL,
- new_hidden_flags);
+ if (s->orig_hidden_read_only) {
+ QDict *opts = qdict_new();
+ qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
+ reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs,
+ opts);
}
- if (!(orig_secondary_flags & BDRV_O_RDWR)) {
+ if (s->orig_secondary_read_only) {
+ QDict *opts = qdict_new();
+ qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs,
- NULL, new_secondary_flags);
+ opts);
}
if (reopen_queue) {
diff --git a/block/stream.c b/block/stream.c
index 81a7ec8ece..7a49ac0992 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -34,7 +34,7 @@ typedef struct StreamBlockJob {
BlockDriverState *base;
BlockdevOnError on_error;
char *backing_file_str;
- int bs_flags;
+ bool bs_read_only;
} StreamBlockJob;
static int coroutine_fn stream_populate(BlockBackend *blk,
@@ -89,10 +89,10 @@ static void stream_clean(Job *job)
BlockDriverState *bs = blk_bs(bjob->blk);
/* Reopen the image back in read-only mode if necessary */
- if (s->bs_flags != bdrv_get_flags(bs)) {
+ if (s->bs_read_only) {
/* Give up write permissions before making it read-only */
blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
- bdrv_reopen(bs, s->bs_flags, NULL);
+ bdrv_reopen_set_read_only(bs, true, NULL);
}
g_free(s->backing_file_str);
@@ -226,12 +226,12 @@ void stream_start(const char *job_id, BlockDriverState *bs,
{
StreamBlockJob *s;
BlockDriverState *iter;
- int orig_bs_flags;
+ bool bs_read_only;
/* Make sure that the image is opened in read-write mode */
- orig_bs_flags = bdrv_get_flags(bs);
- if (!(orig_bs_flags & BDRV_O_RDWR)) {
- if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
+ bs_read_only = bdrv_is_read_only(bs);
+ if (bs_read_only) {
+ if (bdrv_reopen_set_read_only(bs, false, errp) != 0) {
return;
}
}
@@ -261,7 +261,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
s->base = base;
s->backing_file_str = g_strdup(backing_file_str);
- s->bs_flags = orig_bs_flags;
+ s->bs_read_only = bs_read_only;
s->on_error = on_error;
trace_stream_start(bs, base, s);
@@ -269,7 +269,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
return;
fail:
- if (orig_bs_flags != bdrv_get_flags(bs)) {
- bdrv_reopen(bs, orig_bs_flags, NULL);
+ if (bs_read_only) {
+ bdrv_reopen_set_read_only(bs, true, NULL);
}
}
diff --git a/blockdev.c b/blockdev.c
index 81f95d920b..e6c8349409 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1703,8 +1703,7 @@ static void external_snapshot_commit(BlkActionState *common)
* bdrv_reopen_multiple() across all the entries at once, because we
* don't want to abort all of them if one of them fails the reopen */
if (!atomic_read(&state->old_bs->copy_on_read)) {
- bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
- NULL);
+ bdrv_reopen_set_read_only(state->old_bs, true, NULL);
}
aio_context_release(aio_context);
@@ -4100,7 +4099,6 @@ void qmp_change_backing_file(const char *device,
BlockDriverState *image_bs = NULL;
Error *local_err = NULL;
bool ro;
- int open_flags;
int ret;
bs = qmp_get_root_bs(device, errp);
@@ -4142,13 +4140,10 @@ void qmp_change_backing_file(const char *device,
}
/* if not r/w, reopen to make r/w */
- open_flags = image_bs->open_flags;
ro = bdrv_is_read_only(image_bs);
if (ro) {
- bdrv_reopen(image_bs, open_flags | BDRV_O_RDWR, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (bdrv_reopen_set_read_only(image_bs, false, errp) != 0) {
goto out;
}
}
@@ -4164,7 +4159,7 @@ void qmp_change_backing_file(const char *device,
}
if (ro) {
- bdrv_reopen(image_bs, open_flags, &local_err);
+ bdrv_reopen_set_read_only(image_bs, true, &local_err);
error_propagate(errp, local_err);
}
diff --git a/configure b/configure
index 0886f45bf4..224d3071ac 100755
--- a/configure
+++ b/configure
@@ -434,6 +434,7 @@ capstone=""
lzo=""
snappy=""
bzip2=""
+lzfse=""
guest_agent=""
guest_agent_with_vss="no"
guest_agent_ntddscsi="no"
@@ -1310,6 +1311,10 @@ for opt do
;;
--enable-bzip2) bzip2="yes"
;;
+ --enable-lzfse) lzfse="yes"
+ ;;
+ --disable-lzfse) lzfse="no"
+ ;;
--enable-guest-agent) guest_agent="yes"
;;
--disable-guest-agent) guest_agent="no"
@@ -1745,6 +1750,8 @@ disabled with --disable-FEATURE, default is enabled if available:
snappy support of snappy compression library
bzip2 support of bzip2 compression library
(for reading bzip2-compressed dmg images)
+ lzfse support of lzfse compression library
+ (for reading lzfse-compressed dmg images)
seccomp seccomp support
coroutine-pool coroutine freelist (better performance)
glusterfs GlusterFS backend
@@ -2275,6 +2282,24 @@ EOF
fi
##########################################
+# lzfse check
+
+if test "$lzfse" != "no" ; then
+ cat > $TMPC << EOF
+#include <lzfse.h>
+int main(void) { lzfse_decode_scratch_size(); return 0; }
+EOF
+ if compile_prog "" "-llzfse" ; then
+ lzfse="yes"
+ else
+ if test "$lzfse" = "yes"; then
+ feature_not_found "lzfse" "Install lzfse devel"
+ fi
+ lzfse="no"
+ fi
+fi
+
+##########################################
# libseccomp check
libseccomp_minver="2.2.0"
@@ -6096,6 +6121,7 @@ echo "Live block migration $live_block_migration"
echo "lzo support $lzo"
echo "snappy support $snappy"
echo "bzip2 support $bzip2"
+echo "lzfse support $lzfse"
echo "NUMA host support $numa"
echo "libxml2 $libxml2"
echo "tcmalloc support $tcmalloc"
@@ -6618,6 +6644,11 @@ if test "$bzip2" = "yes" ; then
echo "BZIP2_LIBS=-lbz2" >> $config_host_mak
fi
+if test "$lzfse" = "yes" ; then
+ echo "CONFIG_LZFSE=y" >> $config_host_mak
+ echo "LZFSE_LIBS=-llzfse" >> $config_host_mak
+fi
+
if test "$libiscsi" = "yes" ; then
echo "CONFIG_LIBISCSI=m" >> $config_host_mak
echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak
diff --git a/include/block/block.h b/include/block/block.h
index 7f5453b45b..f70a843b72 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -299,10 +299,10 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
int flags, Error **errp);
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs,
- QDict *options, int flags);
+ BlockDriverState *bs, QDict *options);
int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp);
-int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
+ Error **errp);
int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp);
void bdrv_reopen_commit(BDRVReopenState *reopen_state);
diff --git a/include/block/block_backup.h b/include/block/block_backup.h
index 994a3bd2ec..157596c296 100644
--- a/include/block/block_backup.h
+++ b/include/block/block_backup.h
@@ -20,19 +20,6 @@
#include "block/block_int.h"
-typedef struct CowRequest {
- int64_t start_byte;
- int64_t end_byte;
- QLIST_ENTRY(CowRequest) list;
- CoQueue wait_queue; /* coroutines blocked on this request */
-} CowRequest;
-
-void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
- uint64_t bytes);
-void backup_cow_request_begin(CowRequest *req, BlockJob *job,
- int64_t offset, uint64_t bytes);
-void backup_cow_request_end(CowRequest *req);
-
void backup_do_checkpoint(BlockJob *job, Error **errp);
#endif
diff --git a/include/scsi/pr-manager.h b/include/scsi/pr-manager.h
index 50a77b08fc..6ad5fd1ff7 100644
--- a/include/scsi/pr-manager.h
+++ b/include/scsi/pr-manager.h
@@ -5,6 +5,7 @@
#include "qapi/visitor.h"
#include "qom/object_interfaces.h"
#include "block/aio.h"
+#include "qemu/coroutine.h"
#define TYPE_PR_MANAGER "pr-manager"
@@ -37,11 +38,8 @@ typedef struct PRManagerClass {
} PRManagerClass;
bool pr_manager_is_connected(PRManager *pr_mgr);
-BlockAIOCB *pr_manager_execute(PRManager *pr_mgr,
- AioContext *ctx, int fd,
- struct sg_io_hdr *hdr,
- BlockCompletionFunc *complete,
- void *opaque);
+int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd,
+ struct sg_io_hdr *hdr);
PRManager *pr_manager_lookup(const char *id, Error **errp);
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 5363482213..2c39124036 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -10,6 +10,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
#include "qemu-io.h"
#include "sysemu/block-backend.h"
#include "block/block.h"
@@ -1978,6 +1979,7 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
int flags = bs->open_flags;
bool writethrough = !blk_enable_write_cache(blk);
bool has_rw_option = false;
+ bool has_cache_option = false;
BlockReopenQueue *brq;
Error *local_err = NULL;
@@ -1989,6 +1991,7 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
error_report("Invalid cache option: %s", optarg);
return -EINVAL;
}
+ has_cache_option = true;
break;
case 'o':
if (!qemu_opts_parse_noisily(&reopen_opts, optarg, 0)) {
@@ -2046,11 +2049,33 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
}
qopts = qemu_opts_find(&reopen_opts, NULL);
- opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL;
+ opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : qdict_new();
qemu_opts_reset(&reopen_opts);
+ if (qdict_haskey(opts, BDRV_OPT_READ_ONLY)) {
+ if (has_rw_option) {
+ error_report("Cannot set both -r/-w and '" BDRV_OPT_READ_ONLY "'");
+ qobject_unref(opts);
+ return -EINVAL;
+ }
+ } else {
+ qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
+ }
+
+ if (qdict_haskey(opts, BDRV_OPT_CACHE_DIRECT) ||
+ qdict_haskey(opts, BDRV_OPT_CACHE_NO_FLUSH)) {
+ if (has_cache_option) {
+ error_report("Cannot set both -c and the cache options");
+ qobject_unref(opts);
+ return -EINVAL;
+ }
+ } else {
+ qdict_put_bool(opts, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
+ qdict_put_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, flags & BDRV_O_NO_FLUSH);
+ }
+
bdrv_subtree_drained_begin(bs);
- brq = bdrv_reopen_queue(NULL, bs, opts, flags);
+ brq = bdrv_reopen_queue(NULL, bs, opts);
bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err);
bdrv_subtree_drained_end(bs);
diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c
index 2a8f300dde..d9f4e8c3ad 100644
--- a/scsi/pr-manager.c
+++ b/scsi/pr-manager.c
@@ -48,24 +48,21 @@ static int pr_manager_worker(void *opaque)
}
-BlockAIOCB *pr_manager_execute(PRManager *pr_mgr,
- AioContext *ctx, int fd,
- struct sg_io_hdr *hdr,
- BlockCompletionFunc *complete,
- void *opaque)
+int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd,
+ struct sg_io_hdr *hdr)
{
- PRManagerData *data = g_new(PRManagerData, 1);
ThreadPool *pool = aio_get_thread_pool(ctx);
+ PRManagerData data = {
+ .pr_mgr = pr_mgr,
+ .fd = fd,
+ .hdr = hdr,
+ };
- trace_pr_manager_execute(fd, hdr->cmdp[0], hdr->cmdp[1], opaque);
- data->pr_mgr = pr_mgr;
- data->fd = fd;
- data->hdr = hdr;
+ trace_pr_manager_execute(fd, hdr->cmdp[0], hdr->cmdp[1]);
/* The matching object_unref is in pr_manager_worker. */
object_ref(OBJECT(pr_mgr));
- return thread_pool_submit_aio(pool, pr_manager_worker,
- data, complete, opaque);
+ return thread_pool_submit_co(pool, pr_manager_worker, &data);
}
bool pr_manager_is_connected(PRManager *pr_mgr)
diff --git a/scsi/trace-events b/scsi/trace-events
index 45f5b6e49b..f8a68b11eb 100644
--- a/scsi/trace-events
+++ b/scsi/trace-events
@@ -1,3 +1,3 @@
# scsi/pr-manager.c
-pr_manager_execute(int fd, int cmd, int sa, void *opaque) "fd=%d cmd=0x%02x service action=0x%02x opaque=%p"
+pr_manager_execute(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x"
pr_manager_run(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x"
diff --git a/tests/qemu-iotests/133 b/tests/qemu-iotests/133
index a9a47a3c36..565e0b1b6e 100755
--- a/tests/qemu-iotests/133
+++ b/tests/qemu-iotests/133
@@ -91,6 +91,24 @@ echo
IMGOPTSSYNTAX=false $QEMU_IO -f null-co -c 'reopen' -c 'info' \
"json:{'driver': 'null-co', 'size': 65536}"
+echo
+echo "=== Check that mixing -c/-r/-w and their corresponding options is forbidden ==="
+echo
+
+$QEMU_IO -c 'reopen -r -o read-only=on' $TEST_IMG
+$QEMU_IO -c 'reopen -w -o read-only=on' $TEST_IMG
+$QEMU_IO -c 'reopen -c none -o cache.direct=on' $TEST_IMG
+$QEMU_IO -c 'reopen -c writeback -o cache.direct=on' $TEST_IMG
+$QEMU_IO -c 'reopen -c directsync -o cache.no-flush=on' $TEST_IMG
+
+echo
+echo "=== Check that invalid options are handled correctly ==="
+echo
+
+$QEMU_IO -c 'reopen -o read-only=foo' $TEST_IMG
+$QEMU_IO -c 'reopen -o cache.no-flush=bar' $TEST_IMG
+$QEMU_IO -c 'reopen -o cache.direct=baz' $TEST_IMG
+$QEMU_IO -c 'reopen -o auto-read-only=qux' $TEST_IMG
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/133.out b/tests/qemu-iotests/133.out
index f4a85aeb63..414c7fa27f 100644
--- a/tests/qemu-iotests/133.out
+++ b/tests/qemu-iotests/133.out
@@ -24,4 +24,19 @@ Cannot change the option 'driver'
format name: null-co
format name: null-co
+
+=== Check that mixing -c/-r/-w and their corresponding options is forbidden ===
+
+Cannot set both -r/-w and 'read-only'
+Cannot set both -r/-w and 'read-only'
+Cannot set both -c and the cache options
+Cannot set both -c and the cache options
+Cannot set both -c and the cache options
+
+=== Check that invalid options are handled correctly ===
+
+Parameter 'read-only' expects 'on' or 'off'
+Parameter 'cache.no-flush' expects 'on' or 'off'
+Parameter 'cache.direct' expects 'on' or 'off'
+Parameter 'auto-read-only' expects 'on' or 'off'
*** done
diff --git a/tests/qemu-iotests/235 b/tests/qemu-iotests/235
index da044ed34e..d6edd97ab4 100755
--- a/tests/qemu-iotests/235
+++ b/tests/qemu-iotests/235
@@ -49,7 +49,9 @@ qemu_img_create('-f', iotests.imgfmt, '-o', 'preallocation=metadata', disk,
str(size))
vm = QEMUMachine(iotests.qemu_prog)
-vm.add_args('-machine', 'pc,accel=kvm')
+vm.add_args('-machine', 'accel=kvm')
+if iotests.qemu_default_machine == 's390-ccw-virtio':
+ vm.add_args('-no-shutdown')
vm.add_args('-drive', 'id=src,file=' + disk)
vm.launch()