diff options
-rw-r--r-- | MAINTAINERS | 6 | ||||
-rw-r--r-- | block.c | 12 | ||||
-rw-r--r-- | block/Makefile.objs | 1 | ||||
-rw-r--r-- | block/blklogwrites.c | 547 | ||||
-rw-r--r-- | block/crypto.c | 2 | ||||
-rw-r--r-- | block/file-posix.c | 21 | ||||
-rw-r--r-- | block/qcow2.c | 138 | ||||
-rw-r--r-- | block/qcow2.h | 3 | ||||
-rw-r--r-- | include/block/block.h | 7 | ||||
-rw-r--r-- | qapi/block-core.json | 38 | ||||
-rw-r--r-- | qemu-img.c | 5 | ||||
-rwxr-xr-x | tests/qemu-iotests/051 | 15 | ||||
-rw-r--r-- | tests/qemu-iotests/051.out | 23 | ||||
-rw-r--r-- | tests/qemu-iotests/051.pc.out | 23 |
14 files changed, 791 insertions, 50 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 6630d691d1..4431a80860 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2052,6 +2052,12 @@ S: Supported F: block/quorum.c L: qemu-block@nongnu.org +blklogwrites +M: Ari Sundholm <ari@tuxera.com> +L: qemu-block@nongnu.org +S: Supported +F: block/blklogwrites.c + blkverify M: Stefan Hajnoczi <stefanha@redhat.com> L: qemu-block@nongnu.org @@ -1156,6 +1156,12 @@ static void bdrv_assign_node_name(BlockDriverState *bs, goto out; } + /* Make sure that the node name isn't truncated */ + if (strlen(node_name) >= sizeof(bs->node_name)) { + error_setg(errp, "Node name too long"); + goto out; + } + /* copy node name into the bs and insert it into the graph list */ pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); @@ -1948,12 +1954,6 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, return 0; } -#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ - | BLK_PERM_WRITE \ - | BLK_PERM_WRITE_UNCHANGED \ - | BLK_PERM_RESIZE) -#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH) - void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, BlockReopenQueue *reopen_queue, diff --git a/block/Makefile.objs b/block/Makefile.objs index 899bfb5e2c..c8337bf186 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -5,6 +5,7 @@ block-obj-y += qed-check.o block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o block-obj-y += quorum.o block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o +block-obj-y += blklogwrites.o block-obj-y += block-backend.o snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += file-posix.o diff --git a/block/blklogwrites.c b/block/blklogwrites.c new file mode 100644 index 0000000000..63bf6b34a9 --- /dev/null +++ b/block/blklogwrites.c @@ -0,0 +1,547 @@ +/* + * Write logging blk driver based on blkverify and blkdebug. + * + * Copyright (c) 2017 Tuomas Tynkkynen <tuomas@tuxera.com> + * Copyright (c) 2018 Aapo Vienamo <aapo@tuxera.com> + * Copyright (c) 2018 Ari Sundholm <ari@tuxera.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ +#include "block/block_int.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qemu/cutils.h" +#include "qemu/option.h" + +/* Disk format stuff - taken from Linux drivers/md/dm-log-writes.c */ + +#define LOG_FLUSH_FLAG (1 << 0) +#define LOG_FUA_FLAG (1 << 1) +#define LOG_DISCARD_FLAG (1 << 2) +#define LOG_MARK_FLAG (1 << 3) +#define LOG_FLAG_MASK (LOG_FLUSH_FLAG \ + | LOG_FUA_FLAG \ + | LOG_DISCARD_FLAG \ + | LOG_MARK_FLAG) + +#define WRITE_LOG_VERSION 1ULL +#define WRITE_LOG_MAGIC 0x6a736677736872ULL + +/* All fields are little-endian. */ +struct log_write_super { + uint64_t magic; + uint64_t version; + uint64_t nr_entries; + uint32_t sectorsize; +} QEMU_PACKED; + +struct log_write_entry { + uint64_t sector; + uint64_t nr_sectors; + uint64_t flags; + uint64_t data_len; +} QEMU_PACKED; + +/* End of disk format structures. */ + +typedef struct { + BdrvChild *log_file; + uint32_t sectorsize; + uint32_t sectorbits; + uint64_t cur_log_sector; + uint64_t nr_entries; + uint64_t update_interval; +} BDRVBlkLogWritesState; + +static QemuOptsList runtime_opts = { + .name = "blklogwrites", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "log-append", + .type = QEMU_OPT_BOOL, + .help = "Append to an existing log", + }, + { + .name = "log-sector-size", + .type = QEMU_OPT_SIZE, + .help = "Log sector size", + }, + { + .name = "log-super-update-interval", + .type = QEMU_OPT_NUMBER, + .help = "Log superblock update interval (# of write requests)", + }, + { /* end of list */ } + }, +}; + +static inline uint32_t blk_log_writes_log2(uint32_t value) +{ + assert(value > 0); + return 31 - clz32(value); +} + +static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size) +{ + return sector_size < (1ull << 24) && is_power_of_2(sector_size); +} + +static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log, + uint32_t sector_size, + uint64_t nr_entries, + Error **errp) +{ + uint64_t cur_sector = 1; + uint64_t cur_idx = 0; + uint32_t sector_bits = blk_log_writes_log2(sector_size); + struct log_write_entry cur_entry; + + while (cur_idx < nr_entries) { + int read_ret = bdrv_pread(log, cur_sector << sector_bits, &cur_entry, + sizeof(cur_entry)); + if (read_ret < 0) { + error_setg_errno(errp, -read_ret, + "Failed to read log entry %"PRIu64, cur_idx); + return (uint64_t)-1ull; + } + + if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) { + error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64, + le64_to_cpu(cur_entry.flags), cur_idx); + return (uint64_t)-1ull; + } + + /* Account for the sector of the entry itself */ + ++cur_sector; + + /* + * Account for the data of the write. + * For discards, this data is not present. + */ + if (!(cur_entry.flags & cpu_to_le64(LOG_DISCARD_FLAG))) { + cur_sector += le64_to_cpu(cur_entry.nr_sectors); + } + + ++cur_idx; + } + + return cur_sector; +} + +static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVBlkLogWritesState *s = bs->opaque; + QemuOpts *opts; + Error *local_err = NULL; + int ret; + uint64_t log_sector_size; + bool log_append; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + /* Open the file */ + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false, + &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + /* Open the log file */ + s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_file, false, + &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + log_append = qemu_opt_get_bool(opts, "log-append", false); + + if (log_append) { + struct log_write_super log_sb = { 0, 0, 0, 0 }; + + if (qemu_opt_find(opts, "log-sector-size")) { + ret = -EINVAL; + error_setg(errp, "log-append and log-sector-size are mutually " + "exclusive"); + goto fail_log; + } + + /* Read log superblock or fake one for an empty log */ + if (!bdrv_getlength(s->log_file->bs)) { + log_sb.magic = cpu_to_le64(WRITE_LOG_MAGIC); + log_sb.version = cpu_to_le64(WRITE_LOG_VERSION); + log_sb.nr_entries = cpu_to_le64(0); + log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE); + } else { + ret = bdrv_pread(s->log_file, 0, &log_sb, sizeof(log_sb)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read log superblock"); + goto fail_log; + } + } + + if (log_sb.magic != cpu_to_le64(WRITE_LOG_MAGIC)) { + ret = -EINVAL; + error_setg(errp, "Invalid log superblock magic"); + goto fail_log; + } + + if (log_sb.version != cpu_to_le64(WRITE_LOG_VERSION)) { + ret = -EINVAL; + error_setg(errp, "Unsupported log version %"PRIu64, + le64_to_cpu(log_sb.version)); + goto fail_log; + } + + log_sector_size = le32_to_cpu(log_sb.sectorsize); + s->cur_log_sector = 1; + s->nr_entries = 0; + + if (blk_log_writes_sector_size_valid(log_sector_size)) { + s->cur_log_sector = + blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size, + le64_to_cpu(log_sb.nr_entries), &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail_log; + } + + s->nr_entries = le64_to_cpu(log_sb.nr_entries); + } + } else { + log_sector_size = qemu_opt_get_size(opts, "log-sector-size", + BDRV_SECTOR_SIZE); + s->cur_log_sector = 1; + s->nr_entries = 0; + } + + if (!blk_log_writes_sector_size_valid(log_sector_size)) { + ret = -EINVAL; + error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size); + goto fail_log; + } + + s->sectorsize = log_sector_size; + s->sectorbits = blk_log_writes_log2(log_sector_size); + s->update_interval = qemu_opt_get_number(opts, "log-super-update-interval", + 4096); + if (!s->update_interval) { + ret = -EINVAL; + error_setg(errp, "Invalid log superblock update interval %"PRIu64, + s->update_interval); + goto fail_log; + } + + ret = 0; +fail_log: + if (ret < 0) { + bdrv_unref_child(bs, s->log_file); + s->log_file = NULL; + } +fail: + if (ret < 0) { + bdrv_unref_child(bs, bs->file); + bs->file = NULL; + } + qemu_opts_del(opts); + return ret; +} + +static void blk_log_writes_close(BlockDriverState *bs) +{ + BDRVBlkLogWritesState *s = bs->opaque; + + bdrv_unref_child(bs, s->log_file); + s->log_file = NULL; +} + +static int64_t blk_log_writes_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + +static void blk_log_writes_refresh_filename(BlockDriverState *bs, + QDict *options) +{ + BDRVBlkLogWritesState *s = bs->opaque; + + /* bs->file->bs has already been refreshed */ + bdrv_refresh_filename(s->log_file->bs); + + if (bs->file->bs->full_open_options + && s->log_file->bs->full_open_options) + { + QDict *opts = qdict_new(); + qdict_put_str(opts, "driver", "blklogwrites"); + + qobject_ref(bs->file->bs->full_open_options); + qdict_put_obj(opts, "file", QOBJECT(bs->file->bs->full_open_options)); + qobject_ref(s->log_file->bs->full_open_options); + qdict_put_obj(opts, "log", + QOBJECT(s->log_file->bs->full_open_options)); + qdict_put_int(opts, "log-sector-size", s->sectorsize); + + bs->full_open_options = opts; + } +} + +static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + BlockReopenQueue *ro_q, + uint64_t perm, uint64_t shrd, + uint64_t *nperm, uint64_t *nshrd) +{ + if (!c) { + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + *nshrd = (shrd & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; + return; + } + + if (!strcmp(c->name, "log")) { + bdrv_format_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd); + } else { + bdrv_filter_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd); + } +} + +static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVBlkLogWritesState *s = bs->opaque; + bs->bl.request_alignment = s->sectorsize; +} + +static int coroutine_fn +blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); +} + +typedef struct BlkLogWritesFileReq { + BlockDriverState *bs; + uint64_t offset; + uint64_t bytes; + int file_flags; + QEMUIOVector *qiov; + int (*func)(struct BlkLogWritesFileReq *r); + int file_ret; +} BlkLogWritesFileReq; + +typedef struct { + BlockDriverState *bs; + QEMUIOVector *qiov; + struct log_write_entry entry; + uint64_t zero_size; + int log_ret; +} BlkLogWritesLogReq; + +static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr) +{ + BDRVBlkLogWritesState *s = lr->bs->opaque; + uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits; + + s->nr_entries++; + s->cur_log_sector += + ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits; + + lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size, + lr->qiov, 0); + + /* Logging for the "write zeroes" operation */ + if (lr->log_ret == 0 && lr->zero_size) { + cur_log_offset = s->cur_log_sector << s->sectorbits; + s->cur_log_sector += + ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits; + + lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset, + lr->zero_size, 0); + } + + /* Update super block on flush or every update interval */ + if (lr->log_ret == 0 && ((lr->entry.flags & LOG_FLUSH_FLAG) + || (s->nr_entries % s->update_interval == 0))) + { + struct log_write_super super = { + .magic = cpu_to_le64(WRITE_LOG_MAGIC), + .version = cpu_to_le64(WRITE_LOG_VERSION), + .nr_entries = cpu_to_le64(s->nr_entries), + .sectorsize = cpu_to_le32(s->sectorsize), + }; + void *zeroes = g_malloc0(s->sectorsize - sizeof(super)); + QEMUIOVector qiov; + + qemu_iovec_init(&qiov, 2); + qemu_iovec_add(&qiov, &super, sizeof(super)); + qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super)); + + lr->log_ret = + bdrv_co_pwritev(s->log_file, 0, s->sectorsize, &qiov, 0); + if (lr->log_ret == 0) { + lr->log_ret = bdrv_co_flush(s->log_file->bs); + } + qemu_iovec_destroy(&qiov); + g_free(zeroes); + } +} + +static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr) +{ + fr->file_ret = fr->func(fr); +} + +static int coroutine_fn +blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags, + int (*file_func)(BlkLogWritesFileReq *r), + uint64_t entry_flags, bool is_zero_write) +{ + QEMUIOVector log_qiov; + size_t niov = qiov ? qiov->niov : 0; + BDRVBlkLogWritesState *s = bs->opaque; + BlkLogWritesFileReq fr = { + .bs = bs, + .offset = offset, + .bytes = bytes, + .file_flags = flags, + .qiov = qiov, + .func = file_func, + }; + BlkLogWritesLogReq lr = { + .bs = bs, + .qiov = &log_qiov, + .entry = { + .sector = cpu_to_le64(offset >> s->sectorbits), + .nr_sectors = cpu_to_le64(bytes >> s->sectorbits), + .flags = cpu_to_le64(entry_flags), + .data_len = 0, + }, + .zero_size = is_zero_write ? bytes : 0, + }; + void *zeroes = g_malloc0(s->sectorsize - sizeof(lr.entry)); + + assert((1 << s->sectorbits) == s->sectorsize); + assert(bs->bl.request_alignment == s->sectorsize); + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + + qemu_iovec_init(&log_qiov, niov + 2); + qemu_iovec_add(&log_qiov, &lr.entry, sizeof(lr.entry)); + qemu_iovec_add(&log_qiov, zeroes, s->sectorsize - sizeof(lr.entry)); + if (qiov) { + qemu_iovec_concat(&log_qiov, qiov, 0, qiov->size); + } + + blk_log_writes_co_do_file(&fr); + blk_log_writes_co_do_log(&lr); + + qemu_iovec_destroy(&log_qiov); + g_free(zeroes); + + if (lr.log_ret < 0) { + return lr.log_ret; + } + + return fr.file_ret; +} + +static int coroutine_fn +blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes, + fr->qiov, fr->file_flags); +} + +static int coroutine_fn +blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes, + fr->file_flags); +} + +static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr) +{ + return bdrv_co_flush(fr->bs->file->bs); +} + +static int coroutine_fn +blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pdiscard(fr->bs->file->bs, fr->offset, fr->bytes); +} + +static int coroutine_fn +blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return blk_log_writes_co_log(bs, offset, bytes, qiov, flags, + blk_log_writes_co_do_file_pwritev, 0, false); +} + +static int coroutine_fn +blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, + BdrvRequestFlags flags) +{ + return blk_log_writes_co_log(bs, offset, bytes, NULL, flags, + blk_log_writes_co_do_file_pwrite_zeroes, 0, + true); +} + +static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs) +{ + return blk_log_writes_co_log(bs, 0, 0, NULL, 0, + blk_log_writes_co_do_file_flush, + LOG_FLUSH_FLAG, false); +} + +static int coroutine_fn +blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) +{ + return blk_log_writes_co_log(bs, offset, count, NULL, 0, + blk_log_writes_co_do_file_pdiscard, + LOG_DISCARD_FLAG, false); +} + +static BlockDriver bdrv_blk_log_writes = { + .format_name = "blklogwrites", + .instance_size = sizeof(BDRVBlkLogWritesState), + + .bdrv_open = blk_log_writes_open, + .bdrv_close = blk_log_writes_close, + .bdrv_getlength = blk_log_writes_getlength, + .bdrv_refresh_filename = blk_log_writes_refresh_filename, + .bdrv_child_perm = blk_log_writes_child_perm, + .bdrv_refresh_limits = blk_log_writes_refresh_limits, + + .bdrv_co_preadv = blk_log_writes_co_preadv, + .bdrv_co_pwritev = blk_log_writes_co_pwritev, + .bdrv_co_pwrite_zeroes = blk_log_writes_co_pwrite_zeroes, + .bdrv_co_flush_to_disk = blk_log_writes_co_flush_to_disk, + .bdrv_co_pdiscard = blk_log_writes_co_pdiscard, + .bdrv_co_block_status = bdrv_co_block_status_from_file, + + .is_filter = true, +}; + +static void bdrv_blk_log_writes_init(void) +{ + bdrv_register(&bdrv_blk_log_writes); +} + +block_init(bdrv_blk_log_writes_init); diff --git a/block/crypto.c b/block/crypto.c index 994172a3de..146d81c90a 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -551,7 +551,7 @@ static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, /* Create protocol layer */ ret = bdrv_create_file(filename, opts, errp); if (ret < 0) { - return ret; + goto fail; } bs = bdrv_open(filename, NULL, NULL, diff --git a/block/file-posix.c b/block/file-posix.c index 829ee538d8..98987b80f1 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2111,8 +2111,9 @@ static int coroutine_fn raw_co_create(BlockdevCreateOptions *options, Error **errp) { BlockdevCreateOptionsFile *file_opts; + Error *local_err = NULL; int fd; - int perm, shared; + uint64_t perm, shared; int result = 0; /* Validate options and set default values */ @@ -2148,7 +2149,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; /* Step one: Take locks */ - result = raw_apply_lock_bytes(fd, perm, shared, false, errp); + result = raw_apply_lock_bytes(fd, perm, ~shared, false, errp); if (result < 0) { goto out_close; } @@ -2156,13 +2157,13 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) /* Step two: Check that nobody else has taken conflicting locks */ result = raw_check_lock_bytes(fd, perm, shared, errp); if (result < 0) { - goto out_close; + goto out_unlock; } /* Clear the file by truncating it to 0 */ result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp); if (result < 0) { - goto out_close; + goto out_unlock; } if (file_opts->nocow) { @@ -2185,7 +2186,17 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) result = raw_regular_truncate(NULL, fd, file_opts->size, file_opts->preallocation, errp); if (result < 0) { - goto out_close; + goto out_unlock; + } + +out_unlock: + raw_apply_lock_bytes(fd, 0, 0, true, &local_err); + if (local_err) { + /* The above call should not fail, and if it does, that does + * not mean the whole creation operation has failed. So + * report it the user for their convenience, but do not report + * it to the caller. */ + error_report_err(local_err); } out_close: diff --git a/block/qcow2.c b/block/qcow2.c index 2f9e58e0c4..33b61b7480 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -23,11 +23,14 @@ */ #include "qemu/osdep.h" + +#define ZLIB_CONST +#include <zlib.h> + #include "block/block_int.h" #include "block/qdict.h" #include "sysemu/block-backend.h" #include "qemu/module.h" -#include <zlib.h> #include "qcow2.h" #include "qemu/error-report.h" #include "qapi/error.h" @@ -41,6 +44,7 @@ #include "qapi/qobject-input-visitor.h" #include "qapi/qapi-visit-block-core.h" #include "crypto.h" +#include "block/thread-pool.h" /* Differences with QCOW: @@ -1541,6 +1545,9 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, qcow2_check_refcounts(bs, &result, 0); } #endif + + qemu_co_queue_init(&s->compress_wait_queue); + return ret; fail: @@ -3650,6 +3657,104 @@ fail: return ret; } +/* + * qcow2_compress() + * + * @dest - destination buffer, at least of @size-1 bytes + * @src - source buffer, @size bytes + * + * Returns: compressed size on success + * -1 if compression is inefficient + * -2 on any other error + */ +static ssize_t qcow2_compress(void *dest, const void *src, size_t size) +{ + ssize_t ret; + z_stream strm; + + /* best compression, small window, no zlib header */ + memset(&strm, 0, sizeof(strm)); + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + -12, 9, Z_DEFAULT_STRATEGY); + if (ret != 0) { + return -2; + } + + /* strm.next_in is not const in old zlib versions, such as those used on + * OpenBSD/NetBSD, so cast the const away */ + strm.avail_in = size; + strm.next_in = (void *) src; + strm.avail_out = size - 1; + strm.next_out = dest; + + ret = deflate(&strm, Z_FINISH); + if (ret == Z_STREAM_END) { + ret = size - 1 - strm.avail_out; + } else { + ret = (ret == Z_OK ? -1 : -2); + } + + deflateEnd(&strm); + + return ret; +} + +#define MAX_COMPRESS_THREADS 4 + +typedef struct Qcow2CompressData { + void *dest; + const void *src; + size_t size; + ssize_t ret; +} Qcow2CompressData; + +static int qcow2_compress_pool_func(void *opaque) +{ + Qcow2CompressData *data = opaque; + + data->ret = qcow2_compress(data->dest, data->src, data->size); + + return 0; +} + +static void qcow2_compress_complete(void *opaque, int ret) +{ + qemu_coroutine_enter(opaque); +} + +/* See qcow2_compress definition for parameters description */ +static ssize_t qcow2_co_compress(BlockDriverState *bs, + void *dest, const void *src, size_t size) +{ + BDRVQcow2State *s = bs->opaque; + BlockAIOCB *acb; + ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + Qcow2CompressData arg = { + .dest = dest, + .src = src, + .size = size, + }; + + while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) { + qemu_co_queue_wait(&s->compress_wait_queue, NULL); + } + + s->nb_compress_threads++; + acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg, + qcow2_compress_complete, + qemu_coroutine_self()); + + if (!acb) { + s->nb_compress_threads--; + return -EINVAL; + } + qemu_coroutine_yield(); + s->nb_compress_threads--; + qemu_co_queue_next(&s->compress_wait_queue); + + return arg.ret; +} + /* XXX: put compressed sectors first, then all the cluster aligned tables to avoid losing bytes in alignment */ static coroutine_fn int @@ -3659,8 +3764,8 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, BDRVQcow2State *s = bs->opaque; QEMUIOVector hd_qiov; struct iovec iov; - z_stream strm; - int ret, out_len; + int ret; + size_t out_len; uint8_t *buf, *out_buf; int64_t cluster_offset; @@ -3694,32 +3799,11 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, out_buf = g_malloc(s->cluster_size); - /* best compression, small window, no zlib header */ - memset(&strm, 0, sizeof(strm)); - ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, - Z_DEFLATED, -12, - 9, Z_DEFAULT_STRATEGY); - if (ret != 0) { + out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size); + if (out_len == -2) { ret = -EINVAL; goto fail; - } - - strm.avail_in = s->cluster_size; - strm.next_in = (uint8_t *)buf; - strm.avail_out = s->cluster_size; - strm.next_out = out_buf; - - ret = deflate(&strm, Z_FINISH); - if (ret != Z_STREAM_END && ret != Z_OK) { - deflateEnd(&strm); - ret = -EINVAL; - goto fail; - } - out_len = strm.next_out - out_buf; - - deflateEnd(&strm); - - if (ret != Z_STREAM_END || out_len >= s->cluster_size) { + } else if (out_len == -1) { /* could not compress: write normal cluster */ ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); if (ret < 0) { diff --git a/block/qcow2.h b/block/qcow2.h index 1c9c0d3631..d6aca687d6 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -326,6 +326,9 @@ typedef struct BDRVQcow2State { * override) */ char *image_backing_file; char *image_backing_format; + + CoQueue compress_wait_queue; + int nb_compress_threads; } BDRVQcow2State; typedef struct Qcow2COWRegion { diff --git a/include/block/block.h b/include/block/block.h index e5c7759a0c..bc76b1e59f 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -225,6 +225,13 @@ enum { BLK_PERM_GRAPH_MOD = 0x10, BLK_PERM_ALL = 0x1f, + + DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_WRITE_UNCHANGED + | BLK_PERM_RESIZE, + + DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, }; char *bdrv_perm_names(uint64_t perm); diff --git a/qapi/block-core.json b/qapi/block-core.json index 90e554ed0f..38b31250f9 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2533,16 +2533,17 @@ # @throttle: Since 2.11 # @nvme: Since 2.12 # @copy-on-read: Since 3.0 +# @blklogwrites: Since 3.0 # # Since: 2.9 ## { 'enum': 'BlockdevDriver', - 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'copy-on-read', - 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', - 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs', - 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed', - 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh', - 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] } + 'data': [ 'blkdebug', 'blklogwrites', 'blkverify', 'bochs', 'cloop', + 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps', 'gluster', + 'host_cdrom', 'host_device', 'http', 'https', 'iscsi', 'luks', + 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', + 'qcow2', 'qed', 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', + 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] } ## # @BlockdevOptionsFile: @@ -3045,6 +3046,30 @@ '*set-state': ['BlkdebugSetStateOptions'] } } ## +# @BlockdevOptionsBlklogwrites: +# +# Driver specific block device options for blklogwrites. +# +# @file: block device +# +# @log: block device used to log writes to @file +# +# @log-sector-size: sector size used in logging writes to @file, determines +# granularity of offsets and sizes of writes (default: 512) +# +# @log-super-update-interval: interval of write requests after which the log +# super block is updated to disk (default: 4096) +# +# Since: 3.0 +## +{ 'struct': 'BlockdevOptionsBlklogwrites', + 'data': { 'file': 'BlockdevRef', + 'log': 'BlockdevRef', + '*log-sector-size': 'uint32', + '*log-append': 'bool', + '*log-super-update-interval': 'uint64' } } + +## # @BlockdevOptionsBlkverify: # # Driver specific block device options for blkverify. @@ -3563,6 +3588,7 @@ 'discriminator': 'driver', 'data': { 'blkdebug': 'BlockdevOptionsBlkdebug', + 'blklogwrites':'BlockdevOptionsBlklogwrites', 'blkverify': 'BlockdevOptionsBlkverify', 'bochs': 'BlockdevOptionsGenericFormat', 'cloop': 'BlockdevOptionsGenericFormat', diff --git a/qemu-img.c b/qemu-img.c index e1a506f7f6..7651d8172c 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2141,11 +2141,6 @@ static int img_convert(int argc, char **argv) goto fail_getopt; } - if (!s.wr_in_order && s.compressed) { - error_report("Out of order write and compress are mutually exclusive"); - goto fail_getopt; - } - if (tgt_image_opts && !skip_create) { error_report("--target-image-opts requires use of -n flag"); goto fail_getopt; diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index f617e25e24..ee9c820d0f 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -100,6 +100,21 @@ run_qemu -drive file="$TEST_IMG",driver=raw,format=qcow2 run_qemu -drive file="$TEST_IMG",driver=qcow2,format=qcow2 echo +echo === Node names === +echo + +# Maximum length: 31 characters +run_qemu -drive file="$TEST_IMG",node-name=x123456789012345678901234567890 +run_qemu -drive file="$TEST_IMG",node-name=x1234567890123456789012345678901 + +# First character must be alphabetic +# Following characters alphanumeric or -._ +run_qemu -drive file="$TEST_IMG",node-name=All-Types.of_all0wed_chars +run_qemu -drive file="$TEST_IMG",node-name=123foo +run_qemu -drive file="$TEST_IMG",node-name=_foo +run_qemu -drive file="$TEST_IMG",node-name=foo#12 + +echo echo === Device without drive === echo diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index dd9846d1ce..b7273505c7 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -47,6 +47,29 @@ Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2 QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specify both 'driver' and 'format' +=== Node names === + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x123456789012345678901234567890 +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901: Node name too long + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=All-Types.of_all0wed_chars +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=123foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=_foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=foo#12 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node name + + === Device without drive === Testing: -device VIRTIO_SCSI -device scsi-hd diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out index b01f9a90d7..e9257fe318 100644 --- a/tests/qemu-iotests/051.pc.out +++ b/tests/qemu-iotests/051.pc.out @@ -47,6 +47,29 @@ Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2 QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specify both 'driver' and 'format' +=== Node names === + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x123456789012345678901234567890 +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901: Node name too long + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=All-Types.of_all0wed_chars +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=123foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=_foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=foo#12 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node name + + === Device without drive === Testing: -device VIRTIO_SCSI -device scsi-hd |