diff options
-rw-r--r-- | block.c | 89 | ||||
-rw-r--r-- | block/Makefile.objs | 2 | ||||
-rw-r--r-- | block/backup.c | 38 | ||||
-rw-r--r-- | block/commit.c | 23 | ||||
-rw-r--r-- | block/dmg-lzfse.c | 49 | ||||
-rw-r--r-- | block/dmg.c | 65 | ||||
-rw-r--r-- | block/dmg.h | 3 | ||||
-rw-r--r-- | block/file-posix.c | 382 | ||||
-rw-r--r-- | block/mirror.c | 38 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 70 | ||||
-rw-r--r-- | block/qcow2.c | 170 | ||||
-rw-r--r-- | block/qcow2.h | 4 | ||||
-rw-r--r-- | block/replication.c | 67 | ||||
-rw-r--r-- | block/stream.c | 20 | ||||
-rw-r--r-- | blockdev.c | 11 | ||||
-rwxr-xr-x | configure | 31 | ||||
-rw-r--r-- | include/block/block.h | 6 | ||||
-rw-r--r-- | include/block/block_backup.h | 13 | ||||
-rw-r--r-- | include/scsi/pr-manager.h | 8 | ||||
-rw-r--r-- | qemu-io-cmds.c | 29 | ||||
-rw-r--r-- | scsi/pr-manager.c | 21 | ||||
-rw-r--r-- | scsi/trace-events | 2 | ||||
-rwxr-xr-x | tests/qemu-iotests/133 | 18 | ||||
-rw-r--r-- | tests/qemu-iotests/133.out | 15 | ||||
-rwxr-xr-x | tests/qemu-iotests/235 | 4 |
25 files changed, 671 insertions, 507 deletions
@@ -1079,11 +1079,11 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, const char *filename, Error **errp) { BlockDriverState *parent = c->opaque; - int orig_flags = bdrv_get_flags(parent); + bool read_only = bdrv_is_read_only(parent); int ret; - if (!(orig_flags & BDRV_O_RDWR)) { - ret = bdrv_reopen(parent, orig_flags | BDRV_O_RDWR, errp); + if (read_only) { + ret = bdrv_reopen_set_read_only(parent, false, errp); if (ret < 0) { return ret; } @@ -1095,8 +1095,8 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, error_setg_errno(errp, -ret, "Could not update backing file link"); } - if (!(orig_flags & BDRV_O_RDWR)) { - bdrv_reopen(parent, orig_flags, NULL); + if (read_only) { + bdrv_reopen_set_read_only(parent, true, NULL); } return ret; @@ -1139,22 +1139,18 @@ static void update_flags_from_options(int *flags, QemuOpts *opts) { *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY); - assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH)); if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { *flags |= BDRV_O_NO_FLUSH; } - assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT)); if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) { *flags |= BDRV_O_NOCACHE; } - assert(qemu_opt_find(opts, BDRV_OPT_READ_ONLY)); if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) { *flags |= BDRV_O_RDWR; } - assert(qemu_opt_find(opts, BDRV_OPT_AUTO_READ_ONLY)); if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) { *flags |= BDRV_O_AUTO_RDONLY; } @@ -2931,7 +2927,6 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, QDict *options, - int flags, const BdrvChildRole *role, QDict *parent_options, int parent_flags) @@ -2940,7 +2935,9 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockReopenQueueEntry *bs_entry; BdrvChild *child; - QDict *old_options, *explicit_options; + QDict *old_options, *explicit_options, *options_copy; + int flags; + QemuOpts *opts; /* Make sure that the caller remembered to use a drained section. This is * important to avoid graph changes between the recursive queuing here and @@ -2966,22 +2963,11 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, /* * Precedence of options: * 1. Explicitly passed in options (highest) - * 2. Set in flags (only for top level) - * 3. Retained from explicitly set options of bs - * 4. Inherited from parent node - * 5. Retained from effective options of bs + * 2. Retained from explicitly set options of bs + * 3. Inherited from parent node + * 4. Retained from effective options of bs */ - if (!parent_options) { - /* - * Any setting represented by flags is always updated. If the - * corresponding QDict option is set, it takes precedence. Otherwise - * the flag is translated into a QDict option. The old setting of bs is - * not considered. - */ - update_options_from_flags(options, flags); - } - /* Old explicitly set values (don't overwrite by inherited value) */ if (bs_entry) { old_options = qdict_clone_shallow(bs_entry->state.explicit_options); @@ -2995,16 +2981,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, /* Inherit from parent node */ if (parent_options) { - QemuOpts *opts; - QDict *options_copy; - assert(!flags); + flags = 0; role->inherit_options(&flags, options, parent_flags, parent_options); - options_copy = qdict_clone_shallow(options); - opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); - qemu_opts_absorb_qdict(opts, options_copy, NULL); - update_flags_from_options(&flags, opts); - qemu_opts_del(opts); - qobject_unref(options_copy); + } else { + flags = bdrv_get_flags(bs); } /* Old values are used for options that aren't set yet */ @@ -3012,6 +2992,14 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, bdrv_join_options(bs, options, old_options); qobject_unref(old_options); + /* We have the final set of options so let's update the flags */ + options_copy = qdict_clone_shallow(options); + opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options_copy, NULL); + update_flags_from_options(&flags, opts); + qemu_opts_del(opts); + qobject_unref(options_copy); + /* bdrv_open_inherit() sets and clears some additional flags internally */ flags &= ~BDRV_O_PROTOCOL; if (flags & BDRV_O_RDWR) { @@ -3051,7 +3039,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, qdict_extract_subqdict(options, &new_child_options, child_key_dot); g_free(child_key_dot); - bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0, + bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, child->role, options, flags); } @@ -3060,10 +3048,9 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, - QDict *options, int flags) + QDict *options) { - return bdrv_reopen_queue_child(bs_queue, bs, options, flags, - NULL, NULL, 0); + return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, NULL, 0); } /* @@ -3125,22 +3112,18 @@ cleanup: return ret; } - -/* Reopen a single BlockDriverState with the specified flags. */ -int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) +int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, + Error **errp) { - int ret = -1; - Error *local_err = NULL; + int ret; BlockReopenQueue *queue; + QDict *opts = qdict_new(); - bdrv_subtree_drained_begin(bs); - - queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); - ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); - } + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); + bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(NULL, bs, opts); + ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, errp); bdrv_subtree_drained_end(bs); return ret; @@ -3214,6 +3197,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp) { int ret = -1; + int old_flags; Error *local_err = NULL; BlockDriver *drv; QemuOpts *opts; @@ -3240,7 +3224,12 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, goto error; } + /* This was already called in bdrv_reopen_queue_child() so the flags + * are up-to-date. This time we simply want to remove the options from + * QemuOpts in order to indicate that they have been processed. */ + old_flags = reopen_state->flags; update_flags_from_options(&reopen_state->flags, opts); + assert(old_flags == reopen_state->flags); discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD); if (discard != NULL) { diff --git a/block/Makefile.objs b/block/Makefile.objs index 46d585cfd0..7a81892a52 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -57,6 +57,8 @@ ssh.o-libs := $(LIBSSH2_LIBS) block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y) dmg-bz2.o-libs := $(BZIP2_LIBS) +block-obj-$(if $(CONFIG_LZFSE),m,n) += dmg-lzfse.o +dmg-lzfse.o-libs := $(LZFSE_LIBS) qcow.o-libs := -lz linux-aio.o-libs := -laio parallels.o-cflags := $(LIBXML2_CFLAGS) diff --git a/block/backup.c b/block/backup.c index 4d084f6ca6..b829b251eb 100644 --- a/block/backup.c +++ b/block/backup.c @@ -28,6 +28,13 @@ #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) +typedef struct CowRequest { + int64_t start_byte; + int64_t end_byte; + QLIST_ENTRY(CowRequest) list; + CoQueue wait_queue; /* coroutines blocked on this request */ +} CowRequest; + typedef struct BackupBlockJob { BlockJob common; BlockBackend *target; @@ -322,37 +329,6 @@ void backup_do_checkpoint(BlockJob *job, Error **errp) hbitmap_set(backup_job->copy_bitmap, 0, len); } -void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset, - uint64_t bytes) -{ - BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); - int64_t start, end; - - assert(block_job_driver(job) == &backup_job_driver); - - start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size); - end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size); - wait_for_overlapping_requests(backup_job, start, end); -} - -void backup_cow_request_begin(CowRequest *req, BlockJob *job, - int64_t offset, uint64_t bytes) -{ - BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); - int64_t start, end; - - assert(block_job_driver(job) == &backup_job_driver); - - start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size); - end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size); - cow_request_begin(req, backup_job, start, end); -} - -void backup_cow_request_end(CowRequest *req) -{ - cow_request_end(req); -} - static void backup_drain(BlockJob *job) { BackupBlockJob *s = container_of(job, BackupBlockJob, common); diff --git a/block/commit.c b/block/commit.c index a2da5740b0..53148e610b 100644 --- a/block/commit.c +++ b/block/commit.c @@ -38,7 +38,7 @@ typedef struct CommitBlockJob { BlockBackend *base; BlockDriverState *base_bs; BlockdevOnError on_error; - int base_flags; + bool base_read_only; char *backing_file_str; } CommitBlockJob; @@ -124,8 +124,8 @@ static void commit_clean(Job *job) /* restore base open flags here if appropriate (e.g., change the base back * to r/o). These reopens do not need to be atomic, since we won't abort * even on failure here */ - if (s->base_flags != bdrv_get_flags(s->base_bs)) { - bdrv_reopen(s->base_bs, s->base_flags, NULL); + if (s->base_read_only) { + bdrv_reopen_set_read_only(s->base_bs, true, NULL); } g_free(s->backing_file_str); @@ -264,7 +264,6 @@ void commit_start(const char *job_id, BlockDriverState *bs, const char *filter_node_name, Error **errp) { CommitBlockJob *s; - int orig_base_flags; BlockDriverState *iter; BlockDriverState *commit_top_bs = NULL; Error *local_err = NULL; @@ -283,11 +282,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, } /* convert base to r/w, if necessary */ - orig_base_flags = bdrv_get_flags(base); - if (!(orig_base_flags & BDRV_O_RDWR)) { - bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); + s->base_read_only = bdrv_is_read_only(base); + if (s->base_read_only) { + if (bdrv_reopen_set_read_only(base, false, errp) != 0) { goto fail; } } @@ -363,7 +360,6 @@ void commit_start(const char *job_id, BlockDriverState *bs, goto fail; } - s->base_flags = orig_base_flags; s->backing_file_str = g_strdup(backing_file_str); s->on_error = on_error; @@ -395,7 +391,7 @@ int bdrv_commit(BlockDriverState *bs) BlockDriverState *commit_top_bs = NULL; BlockDriver *drv = bs->drv; int64_t offset, length, backing_length; - int ro, open_flags; + int ro; int64_t n; int ret = 0; uint8_t *buf = NULL; @@ -414,10 +410,9 @@ int bdrv_commit(BlockDriverState *bs) } ro = bs->backing->bs->read_only; - open_flags = bs->backing->bs->open_flags; if (ro) { - if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) { + if (bdrv_reopen_set_read_only(bs->backing->bs, false, NULL)) { return -EACCES; } } @@ -527,7 +522,7 @@ ro_cleanup: if (ro) { /* ignoring error return here */ - bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL); + bdrv_reopen_set_read_only(bs->backing->bs, true, NULL); } return ret; diff --git a/block/dmg-lzfse.c b/block/dmg-lzfse.c new file mode 100644 index 0000000000..19d25bc646 --- /dev/null +++ b/block/dmg-lzfse.c @@ -0,0 +1,49 @@ +/* + * DMG lzfse uncompression + * + * Copyright (c) 2018 Julio Cesar Faracco + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "dmg.h" +#include <lzfse.h> + +static int dmg_uncompress_lzfse_do(char *next_in, unsigned int avail_in, + char *next_out, unsigned int avail_out) +{ + size_t out_size = lzfse_decode_buffer((uint8_t *) next_out, avail_out, + (uint8_t *) next_in, avail_in, + NULL); + + /* We need to decode the single chunk only. */ + /* So, out_size == avail_out is not an error here. */ + if (out_size > 0) { + return out_size; + } + return -1; +} + +__attribute__((constructor)) +static void dmg_lzfse_init(void) +{ + assert(!dmg_uncompress_lzfse); + dmg_uncompress_lzfse = dmg_uncompress_lzfse_do; +} diff --git a/block/dmg.c b/block/dmg.c index 1d9283ba2f..50e91aef6d 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -33,6 +33,9 @@ int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in, char *next_out, unsigned int avail_out); +int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in, + char *next_out, unsigned int avail_out); + enum { /* Limit chunk sizes to prevent unreasonable amounts of memory being used * or truncating when converting to 32-bit types @@ -41,6 +44,19 @@ enum { DMG_SECTORCOUNTS_MAX = DMG_LENGTHS_MAX / 512, }; +enum { + /* DMG Block Type */ + UDZE = 0, /* Zeroes */ + UDRW, /* RAW type */ + UDIG, /* Ignore */ + UDCO = 0x80000004, + UDZO, + UDBZ, + ULFO, + UDCM = 0x7ffffffe, /* Comments */ + UDLE /* Last Entry */ +}; + static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename) { int len; @@ -105,15 +121,16 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk, uint32_t uncompressed_sectors = 0; switch (s->types[chunk]) { - case 0x80000005: /* zlib compressed */ - case 0x80000006: /* bzip2 compressed */ + case UDZO: /* zlib compressed */ + case UDBZ: /* bzip2 compressed */ + case ULFO: /* lzfse compressed */ compressed_size = s->lengths[chunk]; uncompressed_sectors = s->sectorcounts[chunk]; break; - case 1: /* copy */ + case UDRW: /* copy */ uncompressed_sectors = DIV_ROUND_UP(s->lengths[chunk], 512); break; - case 2: /* zero */ + case UDIG: /* zero */ /* as the all-zeroes block may be large, it is treated specially: the * sector is not copied from a large buffer, a simple memset is used * instead. Therefore uncompressed_sectors does not need to be set. */ @@ -182,12 +199,14 @@ typedef struct DmgHeaderState { static bool dmg_is_known_block_type(uint32_t entry_type) { switch (entry_type) { - case 0x00000001: /* uncompressed */ - case 0x00000002: /* zeroes */ - case 0x80000005: /* zlib */ + case UDRW: /* uncompressed */ + case UDIG: /* zeroes */ + case UDZO: /* zlib */ return true; - case 0x80000006: /* bzip2 */ + case UDBZ: /* bzip2 */ return !!dmg_uncompress_bz2; + case ULFO: /* lzfse */ + return !!dmg_uncompress_lzfse; default: return false; } @@ -425,6 +444,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, } block_module_load_one("dmg-bz2"); + block_module_load_one("dmg-lzfse"); s->n_chunks = 0; s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL; @@ -579,7 +599,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) s->current_chunk = s->n_chunks; switch (s->types[chunk]) { /* block entry type */ - case 0x80000005: { /* zlib compressed */ + case UDZO: { /* zlib compressed */ /* we need to buffer, because only the chunk as whole can be * inflated. */ ret = bdrv_pread(bs->file, s->offsets[chunk], @@ -602,7 +622,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) return -1; } break; } - case 0x80000006: /* bzip2 compressed */ + case UDBZ: /* bzip2 compressed */ if (!dmg_uncompress_bz2) { break; } @@ -623,14 +643,35 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) return ret; } break; - case 1: /* copy */ + case ULFO: + if (!dmg_uncompress_lzfse) { + break; + } + /* we need to buffer, because only the chunk as whole can be + * inflated. */ + ret = bdrv_pread(bs->file, s->offsets[chunk], + s->compressed_chunk, s->lengths[chunk]); + if (ret != s->lengths[chunk]) { + return -1; + } + + ret = dmg_uncompress_lzfse((char *)s->compressed_chunk, + (unsigned int) s->lengths[chunk], + (char *)s->uncompressed_chunk, + (unsigned int) + (512 * s->sectorcounts[chunk])); + if (ret < 0) { + return ret; + } + break; + case UDRW: /* copy */ ret = bdrv_pread(bs->file, s->offsets[chunk], s->uncompressed_chunk, s->lengths[chunk]); if (ret != s->lengths[chunk]) { return -1; } break; - case 2: /* zero */ + case UDIG: /* zero */ /* see dmg_read, it is treated specially. No buffer needs to be * pre-filled, the zeroes can be set directly. */ break; diff --git a/block/dmg.h b/block/dmg.h index 2ecf239ba5..f28929998f 100644 --- a/block/dmg.h +++ b/block/dmg.h @@ -55,4 +55,7 @@ typedef struct BDRVDMGState { extern int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in, char *next_out, unsigned int avail_out); +extern int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in, + char *next_out, unsigned int avail_out); + #endif diff --git a/block/file-posix.c b/block/file-posix.c index 07bbdab953..d8f0b93752 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -182,25 +182,29 @@ static int64_t raw_getlength(BlockDriverState *bs); typedef struct RawPosixAIOData { BlockDriverState *bs; + int aio_type; int aio_fildes; - union { - struct iovec *aio_iov; - void *aio_ioctl_buf; - }; - int aio_niov; - uint64_t aio_nbytes; -#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ + off_t aio_offset; - int aio_type; + uint64_t aio_nbytes; + union { struct { + struct iovec *iov; + int niov; + } io; + struct { + uint64_t cmd; + void *buf; + } ioctl; + struct { int aio_fd2; off_t aio_offset2; - }; + } copy_range; struct { PreallocMode prealloc; Error **errp; - }; + } truncate; }; } RawPosixAIOData; @@ -1148,20 +1152,24 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo) } #endif -static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) +#if defined(__linux__) +static int handle_aiocb_ioctl(void *opaque) { + RawPosixAIOData *aiocb = opaque; int ret; - ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf); + ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf); if (ret == -1) { return -errno; } return 0; } +#endif /* linux */ -static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb) +static int handle_aiocb_flush(void *opaque) { + RawPosixAIOData *aiocb = opaque; BDRVRawState *s = aiocb->bs->opaque; int ret; @@ -1233,13 +1241,13 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb) do { if (aiocb->aio_type & QEMU_AIO_WRITE) len = qemu_pwritev(aiocb->aio_fildes, - aiocb->aio_iov, - aiocb->aio_niov, + aiocb->io.iov, + aiocb->io.niov, aiocb->aio_offset); else len = qemu_preadv(aiocb->aio_fildes, - aiocb->aio_iov, - aiocb->aio_niov, + aiocb->io.iov, + aiocb->io.niov, aiocb->aio_offset); } while (len == -1 && errno == EINTR); @@ -1295,8 +1303,9 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf) return offset; } -static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) +static int handle_aiocb_rw(void *opaque) { + RawPosixAIOData *aiocb = opaque; ssize_t nbytes; char *buf; @@ -1305,8 +1314,9 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) * If there is just a single buffer, and it is properly aligned * we can just use plain pread/pwrite without any problems. */ - if (aiocb->aio_niov == 1) { - return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base); + if (aiocb->io.niov == 1) { + nbytes = handle_aiocb_rw_linear(aiocb, aiocb->io.iov->iov_base); + goto out; } /* * We have more than one iovec, and all are properly aligned. @@ -1318,7 +1328,7 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) nbytes = handle_aiocb_rw_vector(aiocb); if (nbytes == aiocb->aio_nbytes || (nbytes < 0 && nbytes != -ENOSYS)) { - return nbytes; + goto out; } preadv_present = false; } @@ -1336,16 +1346,17 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) */ buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes); if (buf == NULL) { - return -ENOMEM; + nbytes = -ENOMEM; + goto out; } if (aiocb->aio_type & QEMU_AIO_WRITE) { char *p = buf; int i; - for (i = 0; i < aiocb->aio_niov; ++i) { - memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len); - p += aiocb->aio_iov[i].iov_len; + for (i = 0; i < aiocb->io.niov; ++i) { + memcpy(p, aiocb->io.iov[i].iov_base, aiocb->io.iov[i].iov_len); + p += aiocb->io.iov[i].iov_len; } assert(p - buf == aiocb->aio_nbytes); } @@ -1356,12 +1367,12 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) size_t count = aiocb->aio_nbytes, copy; int i; - for (i = 0; i < aiocb->aio_niov && count; ++i) { + for (i = 0; i < aiocb->io.niov && count; ++i) { copy = count; - if (copy > aiocb->aio_iov[i].iov_len) { - copy = aiocb->aio_iov[i].iov_len; + if (copy > aiocb->io.iov[i].iov_len) { + copy = aiocb->io.iov[i].iov_len; } - memcpy(aiocb->aio_iov[i].iov_base, p, copy); + memcpy(aiocb->io.iov[i].iov_base, p, copy); assert(count >= copy); p += copy; count -= copy; @@ -1370,7 +1381,21 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) } qemu_vfree(buf); - return nbytes; +out: + if (nbytes == aiocb->aio_nbytes) { + return 0; + } else if (nbytes >= 0 && nbytes < aiocb->aio_nbytes) { + if (aiocb->aio_type & QEMU_AIO_WRITE) { + return -EINVAL; + } else { + iov_memset(aiocb->io.iov, aiocb->io.niov, nbytes, + 0, aiocb->aio_nbytes - nbytes); + return 0; + } + } else { + assert(nbytes < 0); + return nbytes; + } } #ifdef CONFIG_XFS @@ -1460,8 +1485,9 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) return ret; } -static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb) +static int handle_aiocb_write_zeroes(void *opaque) { + RawPosixAIOData *aiocb = opaque; #if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS) BDRVRawState *s = aiocb->bs->opaque; #endif @@ -1525,8 +1551,9 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb) return -ENOTSUP; } -static ssize_t handle_aiocb_write_zeroes_unmap(RawPosixAIOData *aiocb) +static int handle_aiocb_write_zeroes_unmap(void *opaque) { + RawPosixAIOData *aiocb = opaque; BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque; int ret; @@ -1568,18 +1595,20 @@ static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd, } #endif -static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb) +static int handle_aiocb_copy_range(void *opaque) { + RawPosixAIOData *aiocb = opaque; uint64_t bytes = aiocb->aio_nbytes; off_t in_off = aiocb->aio_offset; - off_t out_off = aiocb->aio_offset2; + off_t out_off = aiocb->copy_range.aio_offset2; while (bytes) { ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off, - aiocb->aio_fd2, &out_off, + aiocb->copy_range.aio_fd2, &out_off, bytes, 0); trace_file_copy_file_range(aiocb->bs, aiocb->aio_fildes, in_off, - aiocb->aio_fd2, out_off, bytes, 0, ret); + aiocb->copy_range.aio_fd2, out_off, bytes, + 0, ret); if (ret == 0) { /* No progress (e.g. when beyond EOF), let the caller fall back to * buffer I/O. */ @@ -1600,8 +1629,9 @@ static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb) return 0; } -static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) +static int handle_aiocb_discard(void *opaque) { + RawPosixAIOData *aiocb = opaque; int ret = -EOPNOTSUPP; BDRVRawState *s = aiocb->bs->opaque; @@ -1640,15 +1670,17 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) return ret; } -static int handle_aiocb_truncate(RawPosixAIOData *aiocb) +static int handle_aiocb_truncate(void *opaque) { + RawPosixAIOData *aiocb = opaque; int result = 0; int64_t current_length = 0; char *buf = NULL; struct stat st; int fd = aiocb->aio_fildes; int64_t offset = aiocb->aio_offset; - Error **errp = aiocb->errp; + PreallocMode prealloc = aiocb->truncate.prealloc; + Error **errp = aiocb->truncate.errp; if (fstat(fd, &st) < 0) { result = -errno; @@ -1657,12 +1689,12 @@ static int handle_aiocb_truncate(RawPosixAIOData *aiocb) } current_length = st.st_size; - if (current_length > offset && aiocb->prealloc != PREALLOC_MODE_OFF) { + if (current_length > offset && prealloc != PREALLOC_MODE_OFF) { error_setg(errp, "Cannot use preallocation for shrinking files"); return -ENOTSUP; } - switch (aiocb->prealloc) { + switch (prealloc) { #ifdef CONFIG_POSIX_FALLOCATE case PREALLOC_MODE_FALLOC: /* @@ -1743,7 +1775,7 @@ static int handle_aiocb_truncate(RawPosixAIOData *aiocb) default: result = -ENOTSUP; error_setg(errp, "Unsupported preallocation mode: %s", - PreallocMode_str(aiocb->prealloc)); + PreallocMode_str(prealloc)); return result; } @@ -1759,104 +1791,19 @@ out: return result; } -static int aio_worker(void *arg) +static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs, + ThreadPoolFunc func, void *arg) { - RawPosixAIOData *aiocb = arg; - ssize_t ret = 0; - - switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { - case QEMU_AIO_READ: - ret = handle_aiocb_rw(aiocb); - if (ret >= 0 && ret < aiocb->aio_nbytes) { - iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret, - 0, aiocb->aio_nbytes - ret); - - ret = aiocb->aio_nbytes; - } - if (ret == aiocb->aio_nbytes) { - ret = 0; - } else if (ret >= 0 && ret < aiocb->aio_nbytes) { - ret = -EINVAL; - } - break; - case QEMU_AIO_WRITE: - ret = handle_aiocb_rw(aiocb); - if (ret == aiocb->aio_nbytes) { - ret = 0; - } else if (ret >= 0 && ret < aiocb->aio_nbytes) { - ret = -EINVAL; - } - break; - case QEMU_AIO_FLUSH: - ret = handle_aiocb_flush(aiocb); - break; - case QEMU_AIO_IOCTL: - ret = handle_aiocb_ioctl(aiocb); - break; - case QEMU_AIO_DISCARD: - ret = handle_aiocb_discard(aiocb); - break; - case QEMU_AIO_WRITE_ZEROES: - ret = handle_aiocb_write_zeroes(aiocb); - break; - case QEMU_AIO_WRITE_ZEROES | QEMU_AIO_DISCARD: - ret = handle_aiocb_write_zeroes_unmap(aiocb); - break; - case QEMU_AIO_COPY_RANGE: - ret = handle_aiocb_copy_range(aiocb); - break; - case QEMU_AIO_TRUNCATE: - ret = handle_aiocb_truncate(aiocb); - break; - default: - error_report("invalid aio request (0x%x)", aiocb->aio_type); - ret = -EINVAL; - break; - } - - g_free(aiocb); - return ret; -} - -static int paio_submit_co_full(BlockDriverState *bs, int fd, - int64_t offset, int fd2, int64_t offset2, - QEMUIOVector *qiov, - int bytes, int type) -{ - RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); - ThreadPool *pool; - - acb->bs = bs; - acb->aio_type = type; - acb->aio_fildes = fd; - acb->aio_fd2 = fd2; - acb->aio_offset2 = offset2; - - acb->aio_nbytes = bytes; - acb->aio_offset = offset; - - if (qiov) { - acb->aio_iov = qiov->iov; - acb->aio_niov = qiov->niov; - assert(qiov->size == bytes); - } - - trace_file_paio_submit_co(offset, bytes, type); - pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); - return thread_pool_submit_co(pool, aio_worker, acb); -} - -static inline int paio_submit_co(BlockDriverState *bs, int fd, - int64_t offset, QEMUIOVector *qiov, - int bytes, int type) -{ - return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type); + /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */ + ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + return thread_pool_submit_co(pool, func, arg); } static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int type) { BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; if (fd_open(bs) < 0) return -EIO; @@ -1879,7 +1826,20 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, } } - return paio_submit_co(bs, s->fd, offset, qiov, bytes, type); + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = type, + .aio_offset = offset, + .aio_nbytes = bytes, + .io = { + .iov = qiov->iov, + .niov = qiov->niov, + }, + }; + + assert(qiov->size == bytes); + return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb); } static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, @@ -1922,6 +1882,7 @@ static void raw_aio_unplug(BlockDriverState *bs) static int raw_co_flush_to_disk(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; int ret; ret = fd_open(bs); @@ -1929,7 +1890,13 @@ static int raw_co_flush_to_disk(BlockDriverState *bs) return ret; } - return paio_submit_co(bs, s->fd, 0, NULL, 0, QEMU_AIO_FLUSH); + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = QEMU_AIO_FLUSH, + }; + + return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb); } static void raw_aio_attach_aio_context(BlockDriverState *bs, @@ -1968,21 +1935,20 @@ static int coroutine_fn raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, PreallocMode prealloc, Error **errp) { - RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); - ThreadPool *pool; + RawPosixAIOData acb; - *acb = (RawPosixAIOData) { + acb = (RawPosixAIOData) { .bs = bs, .aio_fildes = fd, .aio_type = QEMU_AIO_TRUNCATE, .aio_offset = offset, - .prealloc = prealloc, - .errp = errp, + .truncate = { + .prealloc = prealloc, + .errp = errp, + }, }; - /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */ - pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); - return thread_pool_submit_co(pool, aio_worker, acb); + return raw_thread_pool_submit(bs, handle_aiocb_truncate, &acb); } static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, @@ -2613,25 +2579,67 @@ static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs, } static coroutine_fn int -raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev) { BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = QEMU_AIO_DISCARD, + .aio_offset = offset, + .aio_nbytes = bytes, + }; + + if (blkdev) { + acb.aio_type |= QEMU_AIO_BLKDEV; + } - return paio_submit_co(bs, s->fd, offset, NULL, bytes, QEMU_AIO_DISCARD); + return raw_thread_pool_submit(bs, handle_aiocb_discard, &acb); } -static int coroutine_fn raw_co_pwrite_zeroes( - BlockDriverState *bs, int64_t offset, - int bytes, BdrvRequestFlags flags) +static coroutine_fn int +raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +{ + return raw_do_pdiscard(bs, offset, bytes, false); +} + +static int coroutine_fn +raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, + BdrvRequestFlags flags, bool blkdev) { BDRVRawState *s = bs->opaque; - int operation = QEMU_AIO_WRITE_ZEROES; + RawPosixAIOData acb; + ThreadPoolFunc *handler; + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = QEMU_AIO_WRITE_ZEROES, + .aio_offset = offset, + .aio_nbytes = bytes, + }; + + if (blkdev) { + acb.aio_type |= QEMU_AIO_BLKDEV; + } if (flags & BDRV_REQ_MAY_UNMAP) { - operation |= QEMU_AIO_DISCARD; + acb.aio_type |= QEMU_AIO_DISCARD; + handler = handle_aiocb_write_zeroes_unmap; + } else { + handler = handle_aiocb_write_zeroes; } - return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation); + return raw_thread_pool_submit(bs, handler, &acb); +} + +static int coroutine_fn raw_co_pwrite_zeroes( + BlockDriverState *bs, int64_t offset, + int bytes, BdrvRequestFlags flags) +{ + return raw_do_pwrite_zeroes(bs, offset, bytes, flags, false); } static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) @@ -2702,6 +2710,7 @@ static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { + RawPosixAIOData acb; BDRVRawState *s = bs->opaque; BDRVRawState *src_s; @@ -2714,8 +2723,20 @@ static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, if (fd_open(src->bs) < 0 || fd_open(dst->bs) < 0) { return -EIO; } - return paio_submit_co_full(bs, src_s->fd, src_offset, s->fd, dst_offset, - NULL, bytes, QEMU_AIO_COPY_RANGE); + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_type = QEMU_AIO_COPY_RANGE, + .aio_fildes = src_s->fd, + .aio_offset = src_offset, + .aio_nbytes = bytes, + .copy_range = { + .aio_fd2 = s->fd, + .aio_offset2 = dst_offset, + }, + }; + + return raw_thread_pool_submit(bs, handle_aiocb_copy_range, &acb); } BlockDriver bdrv_file = { @@ -3063,36 +3084,39 @@ hdev_open_Mac_error: } #if defined(__linux__) - -static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs, - unsigned long int req, void *buf, - BlockCompletionFunc *cb, void *opaque) +static int coroutine_fn +hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) { BDRVRawState *s = bs->opaque; - RawPosixAIOData *acb; - ThreadPool *pool; + RawPosixAIOData acb; + int ret; - if (fd_open(bs) < 0) - return NULL; + ret = fd_open(bs); + if (ret < 0) { + return ret; + } if (req == SG_IO && s->pr_mgr) { struct sg_io_hdr *io_hdr = buf; if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT || io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) { return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs), - s->fd, io_hdr, cb, opaque); + s->fd, io_hdr); } } - acb = g_new(RawPosixAIOData, 1); - acb->bs = bs; - acb->aio_type = QEMU_AIO_IOCTL; - acb->aio_fildes = s->fd; - acb->aio_offset = 0; - acb->aio_ioctl_buf = buf; - acb->aio_ioctl_cmd = req; - pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); - return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); + acb = (RawPosixAIOData) { + .bs = bs, + .aio_type = QEMU_AIO_IOCTL, + .aio_fildes = s->fd, + .aio_offset = 0, + .ioctl = { + .buf = buf, + .cmd = req, + }, + }; + + return raw_thread_pool_submit(bs, handle_aiocb_ioctl, &acb); } #endif /* linux */ @@ -3109,22 +3133,18 @@ static int fd_open(BlockDriverState *bs) static coroutine_fn int hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { - BDRVRawState *s = bs->opaque; int ret; ret = fd_open(bs); if (ret < 0) { return ret; } - return paio_submit_co(bs, s->fd, offset, NULL, bytes, - QEMU_AIO_DISCARD | QEMU_AIO_BLKDEV); + return raw_do_pdiscard(bs, offset, bytes, true); } static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags) { - BDRVRawState *s = bs->opaque; - int operation = QEMU_AIO_WRITE_ZEROES | QEMU_AIO_BLKDEV; int rc; rc = fd_open(bs); @@ -3132,11 +3152,7 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, return rc; } - if (flags & BDRV_REQ_MAY_UNMAP) { - operation |= QEMU_AIO_DISCARD; - } - - return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation); + return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true); } static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, @@ -3241,7 +3257,7 @@ static BlockDriver bdrv_host_device = { /* generic scsi device */ #ifdef __linux__ - .bdrv_aio_ioctl = hdev_aio_ioctl, + .bdrv_co_ioctl = hdev_co_ioctl, #endif }; @@ -3363,7 +3379,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_lock_medium = cdrom_lock_medium, /* generic scsi device */ - .bdrv_aio_ioctl = hdev_aio_ioctl, + .bdrv_co_ioctl = hdev_co_ioctl, }; #endif /* __linux__ */ diff --git a/block/mirror.c b/block/mirror.c index 8f52c6215d..ab59ad77e8 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -277,7 +277,8 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, return ret; } -static inline void mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) +static inline void coroutine_fn +mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) { MirrorOp *op; @@ -295,7 +296,8 @@ static inline void mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) abort(); } -static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) +static inline void coroutine_fn +mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) { /* Only non-active operations use up in-flight slots */ mirror_wait_for_any_operation(s, false); @@ -598,7 +600,7 @@ static void mirror_free_init(MirrorBlockJob *s) * mirror_resume() because mirror_run() will begin iterating again * when the job is resumed. */ -static void mirror_wait_for_all_io(MirrorBlockJob *s) +static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) { while (s->in_flight > 0) { mirror_wait_for_free_in_flight_slot(s); @@ -669,9 +671,10 @@ static int mirror_exit_common(Job *job) if (s->should_complete && !abort) { BlockDriverState *to_replace = s->to_replace ?: src; + bool ro = bdrv_is_read_only(to_replace); - if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) { - bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL); + if (ro != bdrv_is_read_only(target_bs)) { + bdrv_reopen_set_read_only(target_bs, ro, NULL); } /* The mirror job has no requests in flight any more, but we need to @@ -731,7 +734,7 @@ static void mirror_abort(Job *job) assert(ret == 0); } -static void mirror_throttle(MirrorBlockJob *s) +static void coroutine_fn mirror_throttle(MirrorBlockJob *s) { int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); @@ -1106,7 +1109,7 @@ static void mirror_complete(Job *job, Error **errp) job_enter(job); } -static void mirror_pause(Job *job) +static void coroutine_fn mirror_pause(Job *job) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); @@ -1177,9 +1180,10 @@ static const BlockJobDriver commit_active_job_driver = { .drain = mirror_drain, }; -static void do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +static void coroutine_fn +do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BdrvDirtyBitmapIter *iter; QEMUIOVector target_qiov; @@ -1689,13 +1693,15 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque, bool auto_complete, Error **errp) { - int orig_base_flags; + bool base_read_only; Error *local_err = NULL; - orig_base_flags = bdrv_get_flags(base); + base_read_only = bdrv_is_read_only(base); - if (bdrv_reopen(base, bs->open_flags, errp)) { - return; + if (base_read_only) { + if (bdrv_reopen_set_read_only(base, false, errp) < 0) { + return; + } } mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0, @@ -1714,6 +1720,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, error_restore_flags: /* ignore error and errp for bdrv_reopen, because we want to propagate * the original error */ - bdrv_reopen(base, orig_base_flags, NULL); + if (base_read_only) { + bdrv_reopen_set_read_only(base, true, NULL); + } return; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index d37fe08b3d..e2737429f5 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1571,76 +1571,6 @@ again: return 0; } -static int decompress_buffer(uint8_t *out_buf, int out_buf_size, - const uint8_t *buf, int buf_size) -{ - z_stream strm1, *strm = &strm1; - int ret, out_len; - - memset(strm, 0, sizeof(*strm)); - - strm->next_in = (uint8_t *)buf; - strm->avail_in = buf_size; - strm->next_out = out_buf; - strm->avail_out = out_buf_size; - - ret = inflateInit2(strm, -12); - if (ret != Z_OK) - return -1; - ret = inflate(strm, Z_FINISH); - out_len = strm->next_out - out_buf; - if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || - out_len != out_buf_size) { - inflateEnd(strm); - return -1; - } - inflateEnd(strm); - return 0; -} - -int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) -{ - BDRVQcow2State *s = bs->opaque; - int ret, csize, nb_csectors, sector_offset; - uint64_t coffset; - - coffset = cluster_offset & s->cluster_offset_mask; - if (s->cluster_cache_offset != coffset) { - nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; - sector_offset = coffset & 511; - csize = nb_csectors * 512 - sector_offset; - - /* Allocate buffers on first decompress operation, most images are - * uncompressed and the memory overhead can be avoided. The buffers - * are freed in .bdrv_close(). - */ - if (!s->cluster_data) { - /* one more sector for decompressed data alignment */ - s->cluster_data = qemu_try_blockalign(bs->file->bs, - QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512); - if (!s->cluster_data) { - return -ENOMEM; - } - } - if (!s->cluster_cache) { - s->cluster_cache = g_malloc(s->cluster_size); - } - - BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); - ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, - nb_csectors); - if (ret < 0) { - return ret; - } - if (decompress_buffer(s->cluster_cache, s->cluster_size, - s->cluster_data + sector_offset, csize) < 0) { - return -EIO; - } - s->cluster_cache_offset = coffset; - } - return 0; -} - /* * This discards as many clusters of nb_clusters as possible at once (i.e. * all clusters in the same L2 slice) and returns the number of discarded diff --git a/block/qcow2.c b/block/qcow2.c index bc8868c36a..4897abae5e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -74,6 +74,13 @@ typedef struct { #define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77 #define QCOW2_EXT_MAGIC_BITMAPS 0x23852875 +static int coroutine_fn +qcow2_co_preadv_compressed(BlockDriverState *bs, + uint64_t file_cluster_offset, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov); + static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) { const QCowHeader *cow_header = (const void *)buf; @@ -1414,7 +1421,6 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, goto fail; } - s->cluster_cache_offset = -1; s->flags = flags; ret = qcow2_refcount_init(bs); @@ -1914,15 +1920,15 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, break; case QCOW2_CLUSTER_COMPRESSED: - /* add AIO support for compressed blocks ? */ - ret = qcow2_decompress_cluster(bs, cluster_offset); + qemu_co_mutex_unlock(&s->lock); + ret = qcow2_co_preadv_compressed(bs, cluster_offset, + offset, cur_bytes, + &hd_qiov); + qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto fail; } - qemu_iovec_from_buf(&hd_qiov, 0, - s->cluster_cache + offset_in_cluster, - cur_bytes); break; case QCOW2_CLUSTER_NORMAL: @@ -2058,8 +2064,6 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, qemu_iovec_init(&hd_qiov, qiov->niov); - s->cluster_cache_offset = -1; /* disable compressed cache */ - qemu_co_mutex_lock(&s->lock); while (bytes != 0) { @@ -2223,8 +2227,6 @@ static void qcow2_close(BlockDriverState *bs) g_free(s->image_backing_file); g_free(s->image_backing_format); - g_free(s->cluster_cache); - qemu_vfree(s->cluster_data); qcow2_refcount_close(bs); qcow2_free_snapshots(bs); } @@ -3401,7 +3403,6 @@ qcow2_co_copy_range_to(BlockDriverState *bs, QCowL2Meta *l2meta = NULL; assert(!bs->encrypted); - s->cluster_cache_offset = -1; /* disable compressed cache */ qemu_co_mutex_lock(&s->lock); @@ -3722,14 +3723,15 @@ fail: /* * qcow2_compress() * - * @dest - destination buffer, at least of @size-1 bytes - * @src - source buffer, @size bytes + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes * * Returns: compressed size on success - * -1 if compression is inefficient + * -1 destination buffer is not enough to store compressed data * -2 on any other error */ -static ssize_t qcow2_compress(void *dest, const void *src, size_t size) +static ssize_t qcow2_compress(void *dest, size_t dest_size, + const void *src, size_t src_size) { ssize_t ret; z_stream strm; @@ -3738,20 +3740,20 @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size) memset(&strm, 0, sizeof(strm)); ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -12, 9, Z_DEFAULT_STRATEGY); - if (ret != 0) { + if (ret != Z_OK) { return -2; } /* strm.next_in is not const in old zlib versions, such as those used on * OpenBSD/NetBSD, so cast the const away */ - strm.avail_in = size; + strm.avail_in = src_size; strm.next_in = (void *) src; - strm.avail_out = size - 1; + strm.avail_out = dest_size; strm.next_out = dest; ret = deflate(&strm, Z_FINISH); if (ret == Z_STREAM_END) { - ret = size - 1 - strm.avail_out; + ret = dest_size - strm.avail_out; } else { ret = (ret == Z_OK ? -1 : -2); } @@ -3761,20 +3763,68 @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size) return ret; } +/* + * qcow2_decompress() + * + * Decompress some data (not more than @src_size bytes) to produce exactly + * @dest_size bytes. + * + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: 0 on success + * -1 on fail + */ +static ssize_t qcow2_decompress(void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + int ret = 0; + z_stream strm; + + memset(&strm, 0, sizeof(strm)); + strm.avail_in = src_size; + strm.next_in = (void *) src; + strm.avail_out = dest_size; + strm.next_out = dest; + + ret = inflateInit2(&strm, -12); + if (ret != Z_OK) { + return -1; + } + + ret = inflate(&strm, Z_FINISH); + if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) { + /* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but + * @src buffer may be processed partly (because in qcow2 we know size of + * compressed data with precision of one sector) */ + ret = -1; + } + + inflateEnd(&strm); + + return ret; +} + #define MAX_COMPRESS_THREADS 4 +typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size, + const void *src, size_t src_size); typedef struct Qcow2CompressData { void *dest; + size_t dest_size; const void *src; - size_t size; + size_t src_size; ssize_t ret; + + Qcow2CompressFunc func; } Qcow2CompressData; static int qcow2_compress_pool_func(void *opaque) { Qcow2CompressData *data = opaque; - data->ret = qcow2_compress(data->dest, data->src, data->size); + data->ret = data->func(data->dest, data->dest_size, + data->src, data->src_size); return 0; } @@ -3784,17 +3834,19 @@ static void qcow2_compress_complete(void *opaque, int ret) qemu_coroutine_enter(opaque); } -/* See qcow2_compress definition for parameters description */ -static ssize_t qcow2_co_compress(BlockDriverState *bs, - void *dest, const void *src, size_t size) +static ssize_t coroutine_fn +qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size, Qcow2CompressFunc func) { BDRVQcow2State *s = bs->opaque; BlockAIOCB *acb; ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); Qcow2CompressData arg = { .dest = dest, + .dest_size = dest_size, .src = src, - .size = size, + .src_size = src_size, + .func = func, }; while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) { @@ -3817,6 +3869,22 @@ static ssize_t qcow2_co_compress(BlockDriverState *bs, return arg.ret; } +static ssize_t coroutine_fn +qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, + qcow2_compress); +} + +static ssize_t coroutine_fn +qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, + qcow2_decompress); +} + /* XXX: put compressed sectors first, then all the cluster aligned tables to avoid losing bytes in alignment */ static coroutine_fn int @@ -3861,7 +3929,8 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, out_buf = g_malloc(s->cluster_size); - out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size); + out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1, + buf, s->cluster_size); if (out_len == -2) { ret = -EINVAL; goto fail; @@ -3909,6 +3978,55 @@ fail: return ret; } +static int coroutine_fn +qcow2_co_preadv_compressed(BlockDriverState *bs, + uint64_t file_cluster_offset, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov) +{ + BDRVQcow2State *s = bs->opaque; + int ret = 0, csize, nb_csectors; + uint64_t coffset; + uint8_t *buf, *out_buf; + struct iovec iov; + QEMUIOVector local_qiov; + int offset_in_cluster = offset_into_cluster(s, offset); + + coffset = file_cluster_offset & s->cluster_offset_mask; + nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1; + csize = nb_csectors * 512 - (coffset & 511); + + buf = g_try_malloc(csize); + if (!buf) { + return -ENOMEM; + } + iov.iov_base = buf; + iov.iov_len = csize; + qemu_iovec_init_external(&local_qiov, &iov, 1); + + out_buf = qemu_blockalign(bs, s->cluster_size); + + BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); + ret = bdrv_co_preadv(bs->file, coffset, csize, &local_qiov, 0); + if (ret < 0) { + goto fail; + } + + if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) { + ret = -EIO; + goto fail; + } + + qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes); + +fail: + qemu_vfree(out_buf); + g_free(buf); + + return ret; +} + static int make_completely_empty(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; diff --git a/block/qcow2.h b/block/qcow2.h index 8662b68575..a98d24500b 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -278,9 +278,6 @@ typedef struct BDRVQcow2State { QEMUTimer *cache_clean_timer; unsigned cache_clean_interval; - uint8_t *cluster_cache; - uint8_t *cluster_data; - uint64_t cluster_cache_offset; QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs; uint64_t *refcount_table; @@ -616,7 +613,6 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size); int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size); int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); -int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, uint8_t *buf, int nb_sectors, bool enc, Error **errp); diff --git a/block/replication.c b/block/replication.c index 6349d6958e..e70dd95001 100644 --- a/block/replication.c +++ b/block/replication.c @@ -20,6 +20,7 @@ #include "block/block_backup.h" #include "sysemu/block-backend.h" #include "qapi/error.h" +#include "qapi/qmp/qdict.h" #include "replication.h" typedef enum { @@ -39,8 +40,8 @@ typedef struct BDRVReplicationState { char *top_id; ReplicationState *rs; Error *blocker; - int orig_hidden_flags; - int orig_secondary_flags; + bool orig_hidden_read_only; + bool orig_secondary_read_only; int error; } BDRVReplicationState; @@ -218,9 +219,6 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs, QEMUIOVector *qiov) { BDRVReplicationState *s = bs->opaque; - BdrvChild *child = s->secondary_disk; - BlockJob *job = NULL; - CowRequest req; int ret; if (s->mode == REPLICATION_MODE_PRIMARY) { @@ -233,28 +231,9 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs, return ret; } - if (child && child->bs) { - job = child->bs->job; - } - - if (job) { - uint64_t remaining_bytes = remaining_sectors * BDRV_SECTOR_SIZE; - - backup_wait_for_overlapping_requests(child->bs->job, - sector_num * BDRV_SECTOR_SIZE, - remaining_bytes); - backup_cow_request_begin(&req, child->bs->job, - sector_num * BDRV_SECTOR_SIZE, - remaining_bytes); - ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, - remaining_bytes, qiov, 0); - backup_cow_request_end(&req); - goto out; - } - ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); -out: + return replication_return_value(s, ret); } @@ -371,44 +350,38 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp) } } +/* This function is supposed to be called twice: + * first with writable = true, then with writable = false. + * The first call puts s->hidden_disk and s->secondary_disk in + * r/w mode, and the second puts them back in their original state. + */ static void reopen_backing_file(BlockDriverState *bs, bool writable, Error **errp) { BDRVReplicationState *s = bs->opaque; BlockReopenQueue *reopen_queue = NULL; - int orig_hidden_flags, orig_secondary_flags; - int new_hidden_flags, new_secondary_flags; Error *local_err = NULL; if (writable) { - orig_hidden_flags = s->orig_hidden_flags = - bdrv_get_flags(s->hidden_disk->bs); - new_hidden_flags = (orig_hidden_flags | BDRV_O_RDWR) & - ~BDRV_O_INACTIVE; - orig_secondary_flags = s->orig_secondary_flags = - bdrv_get_flags(s->secondary_disk->bs); - new_secondary_flags = (orig_secondary_flags | BDRV_O_RDWR) & - ~BDRV_O_INACTIVE; - } else { - orig_hidden_flags = (s->orig_hidden_flags | BDRV_O_RDWR) & - ~BDRV_O_INACTIVE; - new_hidden_flags = s->orig_hidden_flags; - orig_secondary_flags = (s->orig_secondary_flags | BDRV_O_RDWR) & - ~BDRV_O_INACTIVE; - new_secondary_flags = s->orig_secondary_flags; + s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs); + s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs); } bdrv_subtree_drained_begin(s->hidden_disk->bs); bdrv_subtree_drained_begin(s->secondary_disk->bs); - if (orig_hidden_flags != new_hidden_flags) { - reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL, - new_hidden_flags); + if (s->orig_hidden_read_only) { + QDict *opts = qdict_new(); + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); + reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, + opts); } - if (!(orig_secondary_flags & BDRV_O_RDWR)) { + if (s->orig_secondary_read_only) { + QDict *opts = qdict_new(); + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs, - NULL, new_secondary_flags); + opts); } if (reopen_queue) { diff --git a/block/stream.c b/block/stream.c index 81a7ec8ece..7a49ac0992 100644 --- a/block/stream.c +++ b/block/stream.c @@ -34,7 +34,7 @@ typedef struct StreamBlockJob { BlockDriverState *base; BlockdevOnError on_error; char *backing_file_str; - int bs_flags; + bool bs_read_only; } StreamBlockJob; static int coroutine_fn stream_populate(BlockBackend *blk, @@ -89,10 +89,10 @@ static void stream_clean(Job *job) BlockDriverState *bs = blk_bs(bjob->blk); /* Reopen the image back in read-only mode if necessary */ - if (s->bs_flags != bdrv_get_flags(bs)) { + if (s->bs_read_only) { /* Give up write permissions before making it read-only */ blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort); - bdrv_reopen(bs, s->bs_flags, NULL); + bdrv_reopen_set_read_only(bs, true, NULL); } g_free(s->backing_file_str); @@ -226,12 +226,12 @@ void stream_start(const char *job_id, BlockDriverState *bs, { StreamBlockJob *s; BlockDriverState *iter; - int orig_bs_flags; + bool bs_read_only; /* Make sure that the image is opened in read-write mode */ - orig_bs_flags = bdrv_get_flags(bs); - if (!(orig_bs_flags & BDRV_O_RDWR)) { - if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) { + bs_read_only = bdrv_is_read_only(bs); + if (bs_read_only) { + if (bdrv_reopen_set_read_only(bs, false, errp) != 0) { return; } } @@ -261,7 +261,7 @@ void stream_start(const char *job_id, BlockDriverState *bs, s->base = base; s->backing_file_str = g_strdup(backing_file_str); - s->bs_flags = orig_bs_flags; + s->bs_read_only = bs_read_only; s->on_error = on_error; trace_stream_start(bs, base, s); @@ -269,7 +269,7 @@ void stream_start(const char *job_id, BlockDriverState *bs, return; fail: - if (orig_bs_flags != bdrv_get_flags(bs)) { - bdrv_reopen(bs, orig_bs_flags, NULL); + if (bs_read_only) { + bdrv_reopen_set_read_only(bs, true, NULL); } } diff --git a/blockdev.c b/blockdev.c index 81f95d920b..e6c8349409 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1703,8 +1703,7 @@ static void external_snapshot_commit(BlkActionState *common) * bdrv_reopen_multiple() across all the entries at once, because we * don't want to abort all of them if one of them fails the reopen */ if (!atomic_read(&state->old_bs->copy_on_read)) { - bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR, - NULL); + bdrv_reopen_set_read_only(state->old_bs, true, NULL); } aio_context_release(aio_context); @@ -4100,7 +4099,6 @@ void qmp_change_backing_file(const char *device, BlockDriverState *image_bs = NULL; Error *local_err = NULL; bool ro; - int open_flags; int ret; bs = qmp_get_root_bs(device, errp); @@ -4142,13 +4140,10 @@ void qmp_change_backing_file(const char *device, } /* if not r/w, reopen to make r/w */ - open_flags = image_bs->open_flags; ro = bdrv_is_read_only(image_bs); if (ro) { - bdrv_reopen(image_bs, open_flags | BDRV_O_RDWR, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (bdrv_reopen_set_read_only(image_bs, false, errp) != 0) { goto out; } } @@ -4164,7 +4159,7 @@ void qmp_change_backing_file(const char *device, } if (ro) { - bdrv_reopen(image_bs, open_flags, &local_err); + bdrv_reopen_set_read_only(image_bs, true, &local_err); error_propagate(errp, local_err); } @@ -434,6 +434,7 @@ capstone="" lzo="" snappy="" bzip2="" +lzfse="" guest_agent="" guest_agent_with_vss="no" guest_agent_ntddscsi="no" @@ -1310,6 +1311,10 @@ for opt do ;; --enable-bzip2) bzip2="yes" ;; + --enable-lzfse) lzfse="yes" + ;; + --disable-lzfse) lzfse="no" + ;; --enable-guest-agent) guest_agent="yes" ;; --disable-guest-agent) guest_agent="no" @@ -1745,6 +1750,8 @@ disabled with --disable-FEATURE, default is enabled if available: snappy support of snappy compression library bzip2 support of bzip2 compression library (for reading bzip2-compressed dmg images) + lzfse support of lzfse compression library + (for reading lzfse-compressed dmg images) seccomp seccomp support coroutine-pool coroutine freelist (better performance) glusterfs GlusterFS backend @@ -2275,6 +2282,24 @@ EOF fi ########################################## +# lzfse check + +if test "$lzfse" != "no" ; then + cat > $TMPC << EOF +#include <lzfse.h> +int main(void) { lzfse_decode_scratch_size(); return 0; } +EOF + if compile_prog "" "-llzfse" ; then + lzfse="yes" + else + if test "$lzfse" = "yes"; then + feature_not_found "lzfse" "Install lzfse devel" + fi + lzfse="no" + fi +fi + +########################################## # libseccomp check libseccomp_minver="2.2.0" @@ -6096,6 +6121,7 @@ echo "Live block migration $live_block_migration" echo "lzo support $lzo" echo "snappy support $snappy" echo "bzip2 support $bzip2" +echo "lzfse support $lzfse" echo "NUMA host support $numa" echo "libxml2 $libxml2" echo "tcmalloc support $tcmalloc" @@ -6618,6 +6644,11 @@ if test "$bzip2" = "yes" ; then echo "BZIP2_LIBS=-lbz2" >> $config_host_mak fi +if test "$lzfse" = "yes" ; then + echo "CONFIG_LZFSE=y" >> $config_host_mak + echo "LZFSE_LIBS=-llzfse" >> $config_host_mak +fi + if test "$libiscsi" = "yes" ; then echo "CONFIG_LIBISCSI=m" >> $config_host_mak echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak diff --git a/include/block/block.h b/include/block/block.h index 7f5453b45b..f70a843b72 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -299,10 +299,10 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, int flags, Error **errp); BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, - BlockDriverState *bs, - QDict *options, int flags); + BlockDriverState *bs, QDict *options); int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp); -int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp); +int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, + Error **errp); int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); void bdrv_reopen_commit(BDRVReopenState *reopen_state); diff --git a/include/block/block_backup.h b/include/block/block_backup.h index 994a3bd2ec..157596c296 100644 --- a/include/block/block_backup.h +++ b/include/block/block_backup.h @@ -20,19 +20,6 @@ #include "block/block_int.h" -typedef struct CowRequest { - int64_t start_byte; - int64_t end_byte; - QLIST_ENTRY(CowRequest) list; - CoQueue wait_queue; /* coroutines blocked on this request */ -} CowRequest; - -void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset, - uint64_t bytes); -void backup_cow_request_begin(CowRequest *req, BlockJob *job, - int64_t offset, uint64_t bytes); -void backup_cow_request_end(CowRequest *req); - void backup_do_checkpoint(BlockJob *job, Error **errp); #endif diff --git a/include/scsi/pr-manager.h b/include/scsi/pr-manager.h index 50a77b08fc..6ad5fd1ff7 100644 --- a/include/scsi/pr-manager.h +++ b/include/scsi/pr-manager.h @@ -5,6 +5,7 @@ #include "qapi/visitor.h" #include "qom/object_interfaces.h" #include "block/aio.h" +#include "qemu/coroutine.h" #define TYPE_PR_MANAGER "pr-manager" @@ -37,11 +38,8 @@ typedef struct PRManagerClass { } PRManagerClass; bool pr_manager_is_connected(PRManager *pr_mgr); -BlockAIOCB *pr_manager_execute(PRManager *pr_mgr, - AioContext *ctx, int fd, - struct sg_io_hdr *hdr, - BlockCompletionFunc *complete, - void *opaque); +int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd, + struct sg_io_hdr *hdr); PRManager *pr_manager_lookup(const char *id, Error **errp); diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 5363482213..2c39124036 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -10,6 +10,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "qapi/qmp/qdict.h" #include "qemu-io.h" #include "sysemu/block-backend.h" #include "block/block.h" @@ -1978,6 +1979,7 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) int flags = bs->open_flags; bool writethrough = !blk_enable_write_cache(blk); bool has_rw_option = false; + bool has_cache_option = false; BlockReopenQueue *brq; Error *local_err = NULL; @@ -1989,6 +1991,7 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) error_report("Invalid cache option: %s", optarg); return -EINVAL; } + has_cache_option = true; break; case 'o': if (!qemu_opts_parse_noisily(&reopen_opts, optarg, 0)) { @@ -2046,11 +2049,33 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) } qopts = qemu_opts_find(&reopen_opts, NULL); - opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL; + opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : qdict_new(); qemu_opts_reset(&reopen_opts); + if (qdict_haskey(opts, BDRV_OPT_READ_ONLY)) { + if (has_rw_option) { + error_report("Cannot set both -r/-w and '" BDRV_OPT_READ_ONLY "'"); + qobject_unref(opts); + return -EINVAL; + } + } else { + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR)); + } + + if (qdict_haskey(opts, BDRV_OPT_CACHE_DIRECT) || + qdict_haskey(opts, BDRV_OPT_CACHE_NO_FLUSH)) { + if (has_cache_option) { + error_report("Cannot set both -c and the cache options"); + qobject_unref(opts); + return -EINVAL; + } + } else { + qdict_put_bool(opts, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); + qdict_put_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, flags & BDRV_O_NO_FLUSH); + } + bdrv_subtree_drained_begin(bs); - brq = bdrv_reopen_queue(NULL, bs, opts, flags); + brq = bdrv_reopen_queue(NULL, bs, opts); bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err); bdrv_subtree_drained_end(bs); diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c index 2a8f300dde..d9f4e8c3ad 100644 --- a/scsi/pr-manager.c +++ b/scsi/pr-manager.c @@ -48,24 +48,21 @@ static int pr_manager_worker(void *opaque) } -BlockAIOCB *pr_manager_execute(PRManager *pr_mgr, - AioContext *ctx, int fd, - struct sg_io_hdr *hdr, - BlockCompletionFunc *complete, - void *opaque) +int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd, + struct sg_io_hdr *hdr) { - PRManagerData *data = g_new(PRManagerData, 1); ThreadPool *pool = aio_get_thread_pool(ctx); + PRManagerData data = { + .pr_mgr = pr_mgr, + .fd = fd, + .hdr = hdr, + }; - trace_pr_manager_execute(fd, hdr->cmdp[0], hdr->cmdp[1], opaque); - data->pr_mgr = pr_mgr; - data->fd = fd; - data->hdr = hdr; + trace_pr_manager_execute(fd, hdr->cmdp[0], hdr->cmdp[1]); /* The matching object_unref is in pr_manager_worker. */ object_ref(OBJECT(pr_mgr)); - return thread_pool_submit_aio(pool, pr_manager_worker, - data, complete, opaque); + return thread_pool_submit_co(pool, pr_manager_worker, &data); } bool pr_manager_is_connected(PRManager *pr_mgr) diff --git a/scsi/trace-events b/scsi/trace-events index 45f5b6e49b..f8a68b11eb 100644 --- a/scsi/trace-events +++ b/scsi/trace-events @@ -1,3 +1,3 @@ # scsi/pr-manager.c -pr_manager_execute(int fd, int cmd, int sa, void *opaque) "fd=%d cmd=0x%02x service action=0x%02x opaque=%p" +pr_manager_execute(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x" pr_manager_run(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x" diff --git a/tests/qemu-iotests/133 b/tests/qemu-iotests/133 index a9a47a3c36..565e0b1b6e 100755 --- a/tests/qemu-iotests/133 +++ b/tests/qemu-iotests/133 @@ -91,6 +91,24 @@ echo IMGOPTSSYNTAX=false $QEMU_IO -f null-co -c 'reopen' -c 'info' \ "json:{'driver': 'null-co', 'size': 65536}" +echo +echo "=== Check that mixing -c/-r/-w and their corresponding options is forbidden ===" +echo + +$QEMU_IO -c 'reopen -r -o read-only=on' $TEST_IMG +$QEMU_IO -c 'reopen -w -o read-only=on' $TEST_IMG +$QEMU_IO -c 'reopen -c none -o cache.direct=on' $TEST_IMG +$QEMU_IO -c 'reopen -c writeback -o cache.direct=on' $TEST_IMG +$QEMU_IO -c 'reopen -c directsync -o cache.no-flush=on' $TEST_IMG + +echo +echo "=== Check that invalid options are handled correctly ===" +echo + +$QEMU_IO -c 'reopen -o read-only=foo' $TEST_IMG +$QEMU_IO -c 'reopen -o cache.no-flush=bar' $TEST_IMG +$QEMU_IO -c 'reopen -o cache.direct=baz' $TEST_IMG +$QEMU_IO -c 'reopen -o auto-read-only=qux' $TEST_IMG # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/133.out b/tests/qemu-iotests/133.out index f4a85aeb63..414c7fa27f 100644 --- a/tests/qemu-iotests/133.out +++ b/tests/qemu-iotests/133.out @@ -24,4 +24,19 @@ Cannot change the option 'driver' format name: null-co format name: null-co + +=== Check that mixing -c/-r/-w and their corresponding options is forbidden === + +Cannot set both -r/-w and 'read-only' +Cannot set both -r/-w and 'read-only' +Cannot set both -c and the cache options +Cannot set both -c and the cache options +Cannot set both -c and the cache options + +=== Check that invalid options are handled correctly === + +Parameter 'read-only' expects 'on' or 'off' +Parameter 'cache.no-flush' expects 'on' or 'off' +Parameter 'cache.direct' expects 'on' or 'off' +Parameter 'auto-read-only' expects 'on' or 'off' *** done diff --git a/tests/qemu-iotests/235 b/tests/qemu-iotests/235 index da044ed34e..d6edd97ab4 100755 --- a/tests/qemu-iotests/235 +++ b/tests/qemu-iotests/235 @@ -49,7 +49,9 @@ qemu_img_create('-f', iotests.imgfmt, '-o', 'preallocation=metadata', disk, str(size)) vm = QEMUMachine(iotests.qemu_prog) -vm.add_args('-machine', 'pc,accel=kvm') +vm.add_args('-machine', 'accel=kvm') +if iotests.qemu_default_machine == 's390-ccw-virtio': + vm.add_args('-no-shutdown') vm.add_args('-drive', 'id=src,file=' + disk) vm.launch() |