diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-05-15 13:54:33 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-05-15 13:54:33 -0700 |
commit | ab4c44d657aeca7e1da6d6dcb1741c8e7d357b8b (patch) | |
tree | 13b05307b2c4023bf21ef5acd38e325e7569e5ac /block | |
parent | c095228e8a8cdf5c15bb8a47c4d069582ae017d1 (diff) | |
parent | 01562fee5f3ad4506d57dbcf4b1903b565eceec7 (diff) |
Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into staging
Pull request
This pull request contain's Sam Li's zoned storage support in the QEMU block
layer and virtio-blk emulation.
v2:
- Sam fixed the CI failures. CI passes for me now. [Richard]
# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCAAdFiEEhpWov9P5fNqsNXdanKSrs4Grc8gFAmRiWCgACgkQnKSrs4Gr
# c8h/7gf+MMm2cGEaf376t8HMwTc6wbXVfbmAlZrge2EXPZfFvEaxj7HClcEraOgV
# yJsGWeU6mOw4r68ICJ/4KhrY1cdv+VZym/LsMLMcFUTXFHnyX4pyU3am31FPOI4K
# +wrDYJOJhc4DkAESWGgEWiMKpuO/uUEgBmHdW+qPFCl77Yl/eP6H5uNP6nGFn55p
# QpS/l8iha7PDkc81EsrjA+e/YI0ubfNSP7+zZElhQ98354CQ0MCfmZ6h9bT+o2bu
# R7SBUj80e+2X0a1b9s/2Jz/x8l4TEsl8kr48/Q1usq3GVVkbjEgqsk6wTN13Q/4g
# CeIR7E61ZeYzmpb4tLFRIqK2Jw+NEQ==
# =Q8xW
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 15 May 2023 09:04:56 AM PDT
# gpg: using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]
* tag 'block-pull-request' of https://gitlab.com/stefanha/qemu:
docs/zoned-storage:add zoned emulation use case
virtio-blk: add some trace events for zoned emulation
block: add accounting for zone append operation
virtio-blk: add zoned storage emulation for zoned devices
block: add some trace events for zone append
qemu-iotests: test zone append operation
block: introduce zone append write for zoned devices
file-posix: add tracking of the zone write pointers
docs/zoned-storage: add zoned device documentation
block: add some trace events for new block layer APIs
iotests: test new zone operations
block: add zoned BlockDriver check to block layer
block/raw-format: add zone operations to pass through requests
block/block-backend: add block layer APIs resembling Linux ZonedBlockDevice ioctls
block/file-posix: introduce helper functions for sysfs attributes
block/block-common: add zoned device structs
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'block')
-rw-r--r-- | block/block-backend.c | 198 | ||||
-rw-r--r-- | block/file-posix.c | 680 | ||||
-rw-r--r-- | block/io.c | 68 | ||||
-rw-r--r-- | block/io_uring.c | 4 | ||||
-rw-r--r-- | block/linux-aio.c | 3 | ||||
-rw-r--r-- | block/qapi-sysemu.c | 11 | ||||
-rw-r--r-- | block/qapi.c | 18 | ||||
-rw-r--r-- | block/raw-format.c | 26 | ||||
-rw-r--r-- | block/trace-events | 4 |
9 files changed, 972 insertions, 40 deletions
diff --git a/block/block-backend.c b/block/block-backend.c index e37d55d3e9..ca537cd0ad 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1845,6 +1845,204 @@ int coroutine_fn blk_co_flush(BlockBackend *blk) return ret; } +static void coroutine_fn blk_aio_zone_report_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset, + (unsigned int*)(uintptr_t)acb->bytes, + rwco->iobuf); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset, + unsigned int *nr_zones, + BlockZoneDescriptor *zones, + BlockCompletionFunc *cb, void *opaque) +{ + BlkAioEmAIOCB *acb; + Coroutine *co; + IO_CODE(); + + blk_inc_in_flight(blk); + acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); + acb->rwco = (BlkRwCo) { + .blk = blk, + .offset = offset, + .iobuf = zones, + .ret = NOT_DONE, + }; + acb->bytes = (int64_t)(uintptr_t)nr_zones, + acb->has_returned = false; + + co = qemu_coroutine_create(blk_aio_zone_report_entry, acb); + aio_co_enter(blk_get_aio_context(blk), co); + + acb->has_returned = true; + if (acb->rwco.ret != NOT_DONE) { + replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + blk_aio_complete_bh, acb); + } + + return &acb->common; +} + +static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_co_zone_mgmt(rwco->blk, + (BlockZoneOp)(uintptr_t)rwco->iobuf, + rwco->offset, acb->bytes); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op, + int64_t offset, int64_t len, + BlockCompletionFunc *cb, void *opaque) { + BlkAioEmAIOCB *acb; + Coroutine *co; + IO_CODE(); + + blk_inc_in_flight(blk); + acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); + acb->rwco = (BlkRwCo) { + .blk = blk, + .offset = offset, + .iobuf = (void *)(uintptr_t)op, + .ret = NOT_DONE, + }; + acb->bytes = len; + acb->has_returned = false; + + co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb); + aio_co_enter(blk_get_aio_context(blk), co); + + acb->has_returned = true; + if (acb->rwco.ret != NOT_DONE) { + replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + blk_aio_complete_bh, acb); + } + + return &acb->common; +} + +static void coroutine_fn blk_aio_zone_append_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes, + rwco->iobuf, rwco->flags); + blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque) { + BlkAioEmAIOCB *acb; + Coroutine *co; + IO_CODE(); + + blk_inc_in_flight(blk); + acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); + acb->rwco = (BlkRwCo) { + .blk = blk, + .ret = NOT_DONE, + .flags = flags, + .iobuf = qiov, + }; + acb->bytes = (int64_t)(uintptr_t)offset; + acb->has_returned = false; + + co = qemu_coroutine_create(blk_aio_zone_append_entry, acb); + aio_co_enter(blk_get_aio_context(blk), co); + acb->has_returned = true; + if (acb->rwco.ret != NOT_DONE) { + replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + blk_aio_complete_bh, acb); + } + + return &acb->common; +} + +/* + * Send a zone_report command. + * offset is a byte offset from the start of the device. No alignment + * required for offset. + * nr_zones represents IN maximum and OUT actual. + */ +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset, + unsigned int *nr_zones, + BlockZoneDescriptor *zones) +{ + int ret; + IO_CODE(); + + blk_inc_in_flight(blk); /* increase before waiting */ + blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); + if (!blk_is_available(blk)) { + blk_dec_in_flight(blk); + return -ENOMEDIUM; + } + ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones); + blk_dec_in_flight(blk); + return ret; +} + +/* + * Send a zone_management command. + * op is the zone operation; + * offset is the byte offset from the start of the zoned device; + * len is the maximum number of bytes the command should operate on. It + * should be aligned with the device zone size. + */ +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op, + int64_t offset, int64_t len) +{ + int ret; + IO_CODE(); + + blk_inc_in_flight(blk); + blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); + + ret = blk_check_byte_request(blk, offset, len); + if (ret < 0) { + blk_dec_in_flight(blk); + return ret; + } + + ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len); + blk_dec_in_flight(blk); + return ret; +} + +/* + * Send a zone_append command. + */ +int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset, + QEMUIOVector *qiov, BdrvRequestFlags flags) +{ + int ret; + IO_CODE(); + + blk_inc_in_flight(blk); + blk_wait_while_drained(blk); + GRAPH_RDLOCK_GUARD(); + if (!blk_is_available(blk)) { + blk_dec_in_flight(blk); + return -ENOMEDIUM; + } + + ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags); + blk_dec_in_flight(blk); + return ret; +} + void blk_drain(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); diff --git a/block/file-posix.c b/block/file-posix.c index c7b723368e..0ab158efba 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -68,6 +68,9 @@ #include <sys/param.h> #include <sys/syscall.h> #include <sys/vfs.h> +#if defined(CONFIG_BLKZONED) +#include <linux/blkzoned.h> +#endif #include <linux/cdrom.h> #include <linux/fd.h> #include <linux/fs.h> @@ -157,6 +160,7 @@ typedef struct BDRVRawState { bool has_write_zeroes:1; bool use_linux_aio:1; bool use_linux_io_uring:1; + int64_t *offset; /* offset of zone append operation */ int page_cache_inconsistent; /* errno from fdatasync failure */ bool has_fallocate; bool needs_alignment; @@ -216,6 +220,13 @@ typedef struct RawPosixAIOData { PreallocMode prealloc; Error **errp; } truncate; + struct { + unsigned int *nr_zones; + BlockZoneDescriptor *zones; + } zone_report; + struct { + unsigned long op; + } zone_mgmt; }; } RawPosixAIOData; @@ -766,6 +777,18 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, goto fail; } } +#ifdef CONFIG_BLKZONED + /* + * The kernel page cache does not reliably work for writes to SWR zones + * of zoned block device because it can not guarantee the order of writes. + */ + if ((bs->bl.zoned != BLK_Z_NONE) && + (!(s->open_flags & O_DIRECT))) { + error_setg(errp, "The driver supports zoned devices, and it requires " + "cache.direct=on, which was not specified."); + return -EINVAL; /* No host kernel page cache */ + } +#endif if (S_ISBLK(st.st_mode)) { #ifdef __linux__ @@ -1202,15 +1225,91 @@ static int hdev_get_max_hw_transfer(int fd, struct stat *st) #endif } -static int hdev_get_max_segments(int fd, struct stat *st) +/* + * Get a sysfs attribute value as character string. + */ +#ifdef CONFIG_LINUX +static int get_sysfs_str_val(struct stat *st, const char *attribute, + char **val) { + g_autofree char *sysfspath = NULL; + int ret; + size_t len; + + if (!S_ISBLK(st->st_mode)) { + return -ENOTSUP; + } + + sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/%s", + major(st->st_rdev), minor(st->st_rdev), + attribute); + ret = g_file_get_contents(sysfspath, val, &len, NULL); + if (ret == -1) { + return -ENOENT; + } + + /* The file is ended with '\n' */ + char *p; + p = *val; + if (*(p + len - 1) == '\n') { + *(p + len - 1) = '\0'; + } + return ret; +} +#endif + +#if defined(CONFIG_BLKZONED) +static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned) { + g_autofree char *val = NULL; + int ret; + + ret = get_sysfs_str_val(st, "zoned", &val); + if (ret < 0) { + return ret; + } + + if (strcmp(val, "host-managed") == 0) { + *zoned = BLK_Z_HM; + } else if (strcmp(val, "host-aware") == 0) { + *zoned = BLK_Z_HA; + } else if (strcmp(val, "none") == 0) { + *zoned = BLK_Z_NONE; + } else { + return -ENOTSUP; + } + return 0; +} +#endif /* defined(CONFIG_BLKZONED) */ + +/* + * Get a sysfs attribute value as a long integer. + */ #ifdef CONFIG_LINUX - char buf[32]; +static long get_sysfs_long_val(struct stat *st, const char *attribute) +{ + g_autofree char *str = NULL; const char *end; - char *sysfspath = NULL; + long val; + int ret; + + ret = get_sysfs_str_val(st, attribute, &str); + if (ret < 0) { + return ret; + } + + /* The file is ended with '\n', pass 'end' to accept that. */ + ret = qemu_strtol(str, &end, 10, &val); + if (ret == 0 && end && *end == '\0') { + ret = val; + } + return ret; +} +#endif + +static int hdev_get_max_segments(int fd, struct stat *st) +{ +#ifdef CONFIG_LINUX int ret; - int sysfd = -1; - long max_segments; if (S_ISCHR(st->st_mode)) { if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) { @@ -1218,43 +1317,175 @@ static int hdev_get_max_segments(int fd, struct stat *st) } return -ENOTSUP; } + return get_sysfs_long_val(st, "max_segments"); +#else + return -ENOTSUP; +#endif +} - if (!S_ISBLK(st->st_mode)) { - return -ENOTSUP; +#if defined(CONFIG_BLKZONED) +/* + * If the reset_all flag is true, then the wps of zone whose state is + * not readonly or offline should be all reset to the start sector. + * Else, take the real wp of the device. + */ +static int get_zones_wp(BlockDriverState *bs, int fd, int64_t offset, + unsigned int nrz, bool reset_all) +{ + struct blk_zone *blkz; + size_t rep_size; + uint64_t sector = offset >> BDRV_SECTOR_BITS; + BlockZoneWps *wps = bs->wps; + unsigned int j = offset / bs->bl.zone_size; + unsigned int n = 0, i = 0; + int ret; + rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct blk_zone); + g_autofree struct blk_zone_report *rep = NULL; + + rep = g_malloc(rep_size); + blkz = (struct blk_zone *)(rep + 1); + while (n < nrz) { + memset(rep, 0, rep_size); + rep->sector = sector; + rep->nr_zones = nrz - n; + + do { + ret = ioctl(fd, BLKREPORTZONE, rep); + } while (ret != 0 && errno == EINTR); + if (ret != 0) { + error_report("%d: ioctl BLKREPORTZONE at %" PRId64 " failed %d", + fd, offset, errno); + return -errno; + } + + if (!rep->nr_zones) { + break; + } + + for (i = 0; i < rep->nr_zones; ++i, ++n, ++j) { + /* + * The wp tracking cares only about sequential writes required and + * sequential write preferred zones so that the wp can advance to + * the right location. + * Use the most significant bit of the wp location to indicate the + * zone type: 0 for SWR/SWP zones and 1 for conventional zones. + */ + if (blkz[i].type == BLK_ZONE_TYPE_CONVENTIONAL) { + wps->wp[j] |= 1ULL << 63; + } else { + switch(blkz[i].cond) { + case BLK_ZONE_COND_FULL: + case BLK_ZONE_COND_READONLY: + /* Zone not writable */ + wps->wp[j] = (blkz[i].start + blkz[i].len) << BDRV_SECTOR_BITS; + break; + case BLK_ZONE_COND_OFFLINE: + /* Zone not writable nor readable */ + wps->wp[j] = (blkz[i].start) << BDRV_SECTOR_BITS; + break; + default: + if (reset_all) { + wps->wp[j] = blkz[i].start << BDRV_SECTOR_BITS; + } else { + wps->wp[j] = blkz[i].wp << BDRV_SECTOR_BITS; + } + break; + } + } + } + sector = blkz[i - 1].start + blkz[i - 1].len; } - sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments", - major(st->st_rdev), minor(st->st_rdev)); - sysfd = open(sysfspath, O_RDONLY); - if (sysfd == -1) { - ret = -errno; - goto out; + return 0; +} + +static void update_zones_wp(BlockDriverState *bs, int fd, int64_t offset, + unsigned int nrz) +{ + if (get_zones_wp(bs, fd, offset, nrz, 0) < 0) { + error_report("update zone wp failed"); + } +} + +static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + BlockZoneModel zoned; + int ret; + + bs->bl.zoned = BLK_Z_NONE; + + ret = get_sysfs_zoned_model(st, &zoned); + if (ret < 0 || zoned == BLK_Z_NONE) { + return; + } + bs->bl.zoned = zoned; + + ret = get_sysfs_long_val(st, "max_open_zones"); + if (ret >= 0) { + bs->bl.max_open_zones = ret; + } + + ret = get_sysfs_long_val(st, "max_active_zones"); + if (ret >= 0) { + bs->bl.max_active_zones = ret; } - ret = RETRY_ON_EINTR(read(sysfd, buf, sizeof(buf) - 1)); + + /* + * The zoned device must at least have zone size and nr_zones fields. + */ + ret = get_sysfs_long_val(st, "chunk_sectors"); if (ret < 0) { - ret = -errno; - goto out; - } else if (ret == 0) { - ret = -EIO; - goto out; + error_setg_errno(errp, -ret, "Unable to read chunk_sectors " + "sysfs attribute"); + return; + } else if (!ret) { + error_setg(errp, "Read 0 from chunk_sectors sysfs attribute"); + return; } - buf[ret] = 0; - /* The file is ended with '\n', pass 'end' to accept that. */ - ret = qemu_strtol(buf, &end, 10, &max_segments); - if (ret == 0 && end && *end == '\n') { - ret = max_segments; + bs->bl.zone_size = ret << BDRV_SECTOR_BITS; + + ret = get_sysfs_long_val(st, "nr_zones"); + if (ret < 0) { + error_setg_errno(errp, -ret, "Unable to read nr_zones " + "sysfs attribute"); + return; + } else if (!ret) { + error_setg(errp, "Read 0 from nr_zones sysfs attribute"); + return; } + bs->bl.nr_zones = ret; -out: - if (sysfd != -1) { - close(sysfd); + ret = get_sysfs_long_val(st, "zone_append_max_bytes"); + if (ret > 0) { + bs->bl.max_append_sectors = ret >> BDRV_SECTOR_BITS; } - g_free(sysfspath); - return ret; -#else - return -ENOTSUP; -#endif + + ret = get_sysfs_long_val(st, "physical_block_size"); + if (ret >= 0) { + bs->bl.write_granularity = ret; + } + + /* The refresh_limits() function can be called multiple times. */ + g_free(bs->wps); + bs->wps = g_malloc(sizeof(BlockZoneWps) + + sizeof(int64_t) * bs->bl.nr_zones); + ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "report wps failed"); + bs->wps = NULL; + return; + } + qemu_co_mutex_init(&bs->wps->colock); } +#else /* !defined(CONFIG_BLKZONED) */ +static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st, + Error **errp) +{ + bs->bl.zoned = BLK_Z_NONE; +} +#endif /* !defined(CONFIG_BLKZONED) */ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) { @@ -1297,6 +1528,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.max_hw_iov = ret; } } + + raw_refresh_zoned_limits(bs, &st, errp); } static int check_for_dasd(int fd) @@ -1320,9 +1553,12 @@ static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) BDRVRawState *s = bs->opaque; int ret; - /* If DASD, get blocksizes */ + /* If DASD or zoned devices, get blocksizes */ if (check_for_dasd(s->fd) < 0) { - return -ENOTSUP; + /* zoned devices are not DASD */ + if (bs->bl.zoned == BLK_Z_NONE) { + return -ENOTSUP; + } } ret = probe_logical_blocksize(s->fd, &bsz->log); if (ret < 0) { @@ -1463,7 +1699,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb) ssize_t len; len = RETRY_ON_EINTR( - (aiocb->aio_type & QEMU_AIO_WRITE) ? + (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) ? qemu_pwritev(aiocb->aio_fildes, aiocb->io.iov, aiocb->io.niov, @@ -1492,7 +1728,7 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf) ssize_t len; while (offset < aiocb->aio_nbytes) { - if (aiocb->aio_type & QEMU_AIO_WRITE) { + if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) { len = pwrite(aiocb->aio_fildes, (const char *)buf + offset, aiocb->aio_nbytes - offset, @@ -1585,7 +1821,7 @@ static int handle_aiocb_rw(void *opaque) } nbytes = handle_aiocb_rw_linear(aiocb, buf); - if (!(aiocb->aio_type & QEMU_AIO_WRITE)) { + if (!(aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))) { char *p = buf; size_t count = aiocb->aio_nbytes, copy; int i; @@ -1790,6 +2026,147 @@ static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd, } #endif +/* + * parse_zone - Fill a zone descriptor + */ +#if defined(CONFIG_BLKZONED) +static inline int parse_zone(struct BlockZoneDescriptor *zone, + const struct blk_zone *blkz) { + zone->start = blkz->start << BDRV_SECTOR_BITS; + zone->length = blkz->len << BDRV_SECTOR_BITS; + zone->wp = blkz->wp << BDRV_SECTOR_BITS; + +#ifdef HAVE_BLK_ZONE_REP_CAPACITY + zone->cap = blkz->capacity << BDRV_SECTOR_BITS; +#else + zone->cap = blkz->len << BDRV_SECTOR_BITS; +#endif + + switch (blkz->type) { + case BLK_ZONE_TYPE_SEQWRITE_REQ: + zone->type = BLK_ZT_SWR; + break; + case BLK_ZONE_TYPE_SEQWRITE_PREF: + zone->type = BLK_ZT_SWP; + break; + case BLK_ZONE_TYPE_CONVENTIONAL: + zone->type = BLK_ZT_CONV; + break; + default: + error_report("Unsupported zone type: 0x%x", blkz->type); + return -ENOTSUP; + } + + switch (blkz->cond) { + case BLK_ZONE_COND_NOT_WP: + zone->state = BLK_ZS_NOT_WP; + break; + case BLK_ZONE_COND_EMPTY: + zone->state = BLK_ZS_EMPTY; + break; + case BLK_ZONE_COND_IMP_OPEN: + zone->state = BLK_ZS_IOPEN; + break; + case BLK_ZONE_COND_EXP_OPEN: + zone->state = BLK_ZS_EOPEN; + break; + case BLK_ZONE_COND_CLOSED: + zone->state = BLK_ZS_CLOSED; + break; + case BLK_ZONE_COND_READONLY: + zone->state = BLK_ZS_RDONLY; + break; + case BLK_ZONE_COND_FULL: + zone->state = BLK_ZS_FULL; + break; + case BLK_ZONE_COND_OFFLINE: + zone->state = BLK_ZS_OFFLINE; + break; + default: + error_report("Unsupported zone state: 0x%x", blkz->cond); + return -ENOTSUP; + } + return 0; +} +#endif + +#if defined(CONFIG_BLKZONED) +static int handle_aiocb_zone_report(void *opaque) +{ + RawPosixAIOData *aiocb = opaque; + int fd = aiocb->aio_fildes; + unsigned int *nr_zones = aiocb->zone_report.nr_zones; + BlockZoneDescriptor *zones = aiocb->zone_report.zones; + /* zoned block devices use 512-byte sectors */ + uint64_t sector = aiocb->aio_offset / 512; + + struct blk_zone *blkz; + size_t rep_size; + unsigned int nrz; + int ret; + unsigned int n = 0, i = 0; + + nrz = *nr_zones; + rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct blk_zone); + g_autofree struct blk_zone_report *rep = NULL; + rep = g_malloc(rep_size); + + blkz = (struct blk_zone *)(rep + 1); + while (n < nrz) { + memset(rep, 0, rep_size); + rep->sector = sector; + rep->nr_zones = nrz - n; + + do { + ret = ioctl(fd, BLKREPORTZONE, rep); + } while (ret != 0 && errno == EINTR); + if (ret != 0) { + error_report("%d: ioctl BLKREPORTZONE at %" PRId64 " failed %d", + fd, sector, errno); + return -errno; + } + + if (!rep->nr_zones) { + break; + } + + for (i = 0; i < rep->nr_zones; i++, n++) { + ret = parse_zone(&zones[n], &blkz[i]); + if (ret != 0) { + return ret; + } + + /* The next report should start after the last zone reported */ + sector = blkz[i].start + blkz[i].len; + } + } + + *nr_zones = n; + return 0; +} +#endif + +#if defined(CONFIG_BLKZONED) +static int handle_aiocb_zone_mgmt(void *opaque) +{ + RawPosixAIOData *aiocb = opaque; + int fd = aiocb->aio_fildes; + uint64_t sector = aiocb->aio_offset / 512; + int64_t nr_sectors = aiocb->aio_nbytes / 512; + struct blk_zone_range range; + int ret; + + /* Execute the operation */ + range.sector = sector; + range.nr_sectors = nr_sectors; + do { + ret = ioctl(fd, aiocb->zone_mgmt.op, &range); + } while (ret != 0 && errno == EINTR); + + return ret < 0 ? -errno : ret; +} +#endif + static int handle_aiocb_copy_range(void *opaque) { RawPosixAIOData *aiocb = opaque; @@ -2072,9 +2449,19 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, { BDRVRawState *s = bs->opaque; RawPosixAIOData acb; + int ret; if (fd_open(bs) < 0) return -EIO; +#if defined(CONFIG_BLKZONED) + if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && bs->wps) { + qemu_co_mutex_lock(&bs->wps->colock); + if (type & QEMU_AIO_ZONE_APPEND && bs->bl.zone_size) { + int index = offset / bs->bl.zone_size; + offset = bs->wps->wp[index]; + } + } +#endif /* * When using O_DIRECT, the request must be aligned to be able to use @@ -2087,12 +2474,15 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, #ifdef CONFIG_LINUX_IO_URING } else if (s->use_linux_io_uring) { assert(qiov->size == bytes); - return luring_co_submit(bs, s->fd, offset, qiov, type); + ret = luring_co_submit(bs, s->fd, offset, qiov, type); + goto out; #endif #ifdef CONFIG_LINUX_AIO } else if (s->use_linux_aio) { assert(qiov->size == bytes); - return laio_co_submit(s->fd, offset, qiov, type, s->aio_max_batch); + ret = laio_co_submit(s->fd, offset, qiov, type, + s->aio_max_batch); + goto out; #endif } @@ -2109,7 +2499,41 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, }; assert(qiov->size == bytes); - return raw_thread_pool_submit(handle_aiocb_rw, &acb); + ret = raw_thread_pool_submit(handle_aiocb_rw, &acb); + goto out; /* Avoid the compiler err of unused label */ + +out: +#if defined(CONFIG_BLKZONED) +{ + BlockZoneWps *wps = bs->wps; + if (ret == 0) { + if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) + && wps && bs->bl.zone_size) { + uint64_t *wp = &wps->wp[offset / bs->bl.zone_size]; + if (!BDRV_ZT_IS_CONV(*wp)) { + if (type & QEMU_AIO_ZONE_APPEND) { + *s->offset = *wp; + trace_zbd_zone_append_complete(bs, *s->offset + >> BDRV_SECTOR_BITS); + } + /* Advance the wp if needed */ + if (offset + bytes > *wp) { + *wp = offset + bytes; + } + } + } + } else { + if (type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) { + update_zones_wp(bs, s->fd, 0, 1); + } + } + + if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && wps) { + qemu_co_mutex_unlock(&wps->colock); + } +} +#endif + return ret; } static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset, @@ -2212,6 +2636,9 @@ static void raw_close(BlockDriverState *bs) BDRVRawState *s = bs->opaque; if (s->fd >= 0) { +#if defined(CONFIG_BLKZONED) + g_free(bs->wps); +#endif qemu_close(s->fd); s->fd = -1; } @@ -2969,6 +3396,171 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret) } } +/* + * zone report - Get a zone block device's information in the form + * of an array of zone descriptors. + * zones is an array of zone descriptors to hold zone information on reply; + * offset can be any byte within the entire size of the device; + * nr_zones is the maxium number of sectors the command should operate on. + */ +#if defined(CONFIG_BLKZONED) +static int coroutine_fn raw_co_zone_report(BlockDriverState *bs, int64_t offset, + unsigned int *nr_zones, + BlockZoneDescriptor *zones) { + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = QEMU_AIO_ZONE_REPORT, + .aio_offset = offset, + .zone_report = { + .nr_zones = nr_zones, + .zones = zones, + }, + }; + + trace_zbd_zone_report(bs, *nr_zones, offset >> BDRV_SECTOR_BITS); + return raw_thread_pool_submit(handle_aiocb_zone_report, &acb); +} +#endif + +/* + * zone management operations - Execute an operation on a zone + */ +#if defined(CONFIG_BLKZONED) +static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op, + int64_t offset, int64_t len) { + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; + int64_t zone_size, zone_size_mask; + const char *op_name; + unsigned long zo; + int ret; + BlockZoneWps *wps = bs->wps; + int64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS; + + zone_size = bs->bl.zone_size; + zone_size_mask = zone_size - 1; + if (offset & zone_size_mask) { + error_report("sector offset %" PRId64 " is not aligned to zone size " + "%" PRId64 "", offset / 512, zone_size / 512); + return -EINVAL; + } + + if (((offset + len) < capacity && len & zone_size_mask) || + offset + len > capacity) { + error_report("number of sectors %" PRId64 " is not aligned to zone size" + " %" PRId64 "", len / 512, zone_size / 512); + return -EINVAL; + } + + uint32_t i = offset / bs->bl.zone_size; + uint32_t nrz = len / bs->bl.zone_size; + uint64_t *wp = &wps->wp[i]; + if (BDRV_ZT_IS_CONV(*wp) && len != capacity) { + error_report("zone mgmt operations are not allowed for conventional zones"); + return -EIO; + } + + switch (op) { + case BLK_ZO_OPEN: + op_name = "BLKOPENZONE"; + zo = BLKOPENZONE; + break; + case BLK_ZO_CLOSE: + op_name = "BLKCLOSEZONE"; + zo = BLKCLOSEZONE; + break; + case BLK_ZO_FINISH: + op_name = "BLKFINISHZONE"; + zo = BLKFINISHZONE; + break; + case BLK_ZO_RESET: + op_name = "BLKRESETZONE"; + zo = BLKRESETZONE; + break; + default: + error_report("Unsupported zone op: 0x%x", op); + return -ENOTSUP; + } + + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, + .aio_type = QEMU_AIO_ZONE_MGMT, + .aio_offset = offset, + .aio_nbytes = len, + .zone_mgmt = { + .op = zo, + }, + }; + + trace_zbd_zone_mgmt(bs, op_name, offset >> BDRV_SECTOR_BITS, + len >> BDRV_SECTOR_BITS); + ret = raw_thread_pool_submit(handle_aiocb_zone_mgmt, &acb); + if (ret != 0) { + update_zones_wp(bs, s->fd, offset, i); + error_report("ioctl %s failed %d", op_name, ret); + return ret; + } + + if (zo == BLKRESETZONE && len == capacity) { + ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 1); + if (ret < 0) { + error_report("reporting single wp failed"); + return ret; + } + } else if (zo == BLKRESETZONE) { + for (unsigned int j = 0; j < nrz; ++j) { + wp[j] = offset + j * zone_size; + } + } else if (zo == BLKFINISHZONE) { + for (unsigned int j = 0; j < nrz; ++j) { + /* The zoned device allows the last zone smaller that the + * zone size. */ + wp[j] = MIN(offset + (j + 1) * zone_size, offset + len); + } + } + + return ret; +} +#endif + +#if defined(CONFIG_BLKZONED) +static int coroutine_fn raw_co_zone_append(BlockDriverState *bs, + int64_t *offset, + QEMUIOVector *qiov, + BdrvRequestFlags flags) { + assert(flags == 0); + int64_t zone_size_mask = bs->bl.zone_size - 1; + int64_t iov_len = 0; + int64_t len = 0; + BDRVRawState *s = bs->opaque; + s->offset = offset; + + if (*offset & zone_size_mask) { + error_report("sector offset %" PRId64 " is not aligned to zone size " + "%" PRId32 "", *offset / 512, bs->bl.zone_size / 512); + return -EINVAL; + } + + int64_t wg = bs->bl.write_granularity; + int64_t wg_mask = wg - 1; + for (int i = 0; i < qiov->niov; i++) { + iov_len = qiov->iov[i].iov_len; + if (iov_len & wg_mask) { + error_report("len of IOVector[%d] %" PRId64 " is not aligned to " + "block size %" PRId64 "", i, iov_len, wg); + return -EINVAL; + } + len += iov_len; + } + + trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS); + return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND); +} +#endif + static coroutine_fn int raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes, bool blkdev) @@ -3724,6 +4316,14 @@ static BlockDriver bdrv_host_device = { #ifdef __linux__ .bdrv_co_ioctl = hdev_co_ioctl, #endif + + /* zoned device */ +#if defined(CONFIG_BLKZONED) + /* zone management operations */ + .bdrv_co_zone_report = raw_co_zone_report, + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, + .bdrv_co_zone_append = raw_co_zone_append, +#endif }; #if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) diff --git a/block/io.c b/block/io.c index 58557f2f96..4d54fda593 100644 --- a/block/io.c +++ b/block/io.c @@ -3113,6 +3113,74 @@ out: return co.ret; } +int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, int64_t offset, + unsigned int *nr_zones, + BlockZoneDescriptor *zones) +{ + BlockDriver *drv = bs->drv; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + IO_CODE(); + + bdrv_inc_in_flight(bs); + if (!drv || !drv->bdrv_co_zone_report || bs->bl.zoned == BLK_Z_NONE) { + co.ret = -ENOTSUP; + goto out; + } + co.ret = drv->bdrv_co_zone_report(bs, offset, nr_zones, zones); +out: + bdrv_dec_in_flight(bs); + return co.ret; +} + +int coroutine_fn bdrv_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op, + int64_t offset, int64_t len) +{ + BlockDriver *drv = bs->drv; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + IO_CODE(); + + bdrv_inc_in_flight(bs); + if (!drv || !drv->bdrv_co_zone_mgmt || bs->bl.zoned == BLK_Z_NONE) { + co.ret = -ENOTSUP; + goto out; + } + co.ret = drv->bdrv_co_zone_mgmt(bs, op, offset, len); +out: + bdrv_dec_in_flight(bs); + return co.ret; +} + +int coroutine_fn bdrv_co_zone_append(BlockDriverState *bs, int64_t *offset, + QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + int ret; + BlockDriver *drv = bs->drv; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + IO_CODE(); + + ret = bdrv_check_qiov_request(*offset, qiov->size, qiov, 0, NULL); + if (ret < 0) { + return ret; + } + + bdrv_inc_in_flight(bs); + if (!drv || !drv->bdrv_co_zone_append || bs->bl.zoned == BLK_Z_NONE) { + co.ret = -ENOTSUP; + goto out; + } + co.ret = drv->bdrv_co_zone_append(bs, offset, qiov, flags); +out: + bdrv_dec_in_flight(bs); + return co.ret; +} + void *qemu_blockalign(BlockDriverState *bs, size_t size) { IO_CODE(); diff --git a/block/io_uring.c b/block/io_uring.c index 989f9a99ed..82cab6a5bd 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -350,6 +350,10 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, io_uring_prep_writev(sqes, fd, luringcb->qiov->iov, luringcb->qiov->niov, offset); break; + case QEMU_AIO_ZONE_APPEND: + io_uring_prep_writev(sqes, fd, luringcb->qiov->iov, + luringcb->qiov->niov, offset); + break; case QEMU_AIO_READ: io_uring_prep_readv(sqes, fd, luringcb->qiov->iov, luringcb->qiov->niov, offset); diff --git a/block/linux-aio.c b/block/linux-aio.c index fc50cdd1bf..442c86209b 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -394,6 +394,9 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, case QEMU_AIO_WRITE: io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset); break; + case QEMU_AIO_ZONE_APPEND: + io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset); + break; case QEMU_AIO_READ: io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset); break; diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c index 7bd7554150..cec3c1afb4 100644 --- a/block/qapi-sysemu.c +++ b/block/qapi-sysemu.c @@ -517,6 +517,7 @@ void qmp_block_latency_histogram_set( bool has_boundaries, uint64List *boundaries, bool has_boundaries_read, uint64List *boundaries_read, bool has_boundaries_write, uint64List *boundaries_write, + bool has_boundaries_append, uint64List *boundaries_append, bool has_boundaries_flush, uint64List *boundaries_flush, Error **errp) { @@ -557,6 +558,16 @@ void qmp_block_latency_histogram_set( } } + if (has_boundaries || has_boundaries_append) { + ret = block_latency_histogram_set( + stats, BLOCK_ACCT_ZONE_APPEND, + has_boundaries_append ? boundaries_append : boundaries); + if (ret) { + error_setg(errp, "Device '%s' set append write boundaries fail", id); + return; + } + } + if (has_boundaries || has_boundaries_flush) { ret = block_latency_histogram_set( stats, BLOCK_ACCT_FLUSH, diff --git a/block/qapi.c b/block/qapi.c index 71f2751257..f34f95e0ef 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -533,27 +533,36 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk) ds->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; ds->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; + ds->zone_append_bytes = stats->nr_bytes[BLOCK_ACCT_ZONE_APPEND]; ds->unmap_bytes = stats->nr_bytes[BLOCK_ACCT_UNMAP]; ds->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; ds->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; + ds->zone_append_operations = stats->nr_ops[BLOCK_ACCT_ZONE_APPEND]; ds->unmap_operations = stats->nr_ops[BLOCK_ACCT_UNMAP]; ds->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; ds->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; + ds->failed_zone_append_operations = + stats->failed_ops[BLOCK_ACCT_ZONE_APPEND]; ds->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; ds->failed_unmap_operations = stats->failed_ops[BLOCK_ACCT_UNMAP]; ds->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; ds->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; + ds->invalid_zone_append_operations = + stats->invalid_ops[BLOCK_ACCT_ZONE_APPEND]; ds->invalid_flush_operations = stats->invalid_ops[BLOCK_ACCT_FLUSH]; ds->invalid_unmap_operations = stats->invalid_ops[BLOCK_ACCT_UNMAP]; ds->rd_merged = stats->merged[BLOCK_ACCT_READ]; ds->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; + ds->zone_append_merged = stats->merged[BLOCK_ACCT_ZONE_APPEND]; ds->unmap_merged = stats->merged[BLOCK_ACCT_UNMAP]; ds->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; ds->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; + ds->zone_append_total_time_ns = + stats->total_time_ns[BLOCK_ACCT_ZONE_APPEND]; ds->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; ds->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; ds->unmap_total_time_ns = stats->total_time_ns[BLOCK_ACCT_UNMAP]; @@ -571,6 +580,7 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk) TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; + TimedAverage *zap = &ts->latency[BLOCK_ACCT_ZONE_APPEND]; TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; dev_stats->interval_length = ts->interval_length; @@ -583,6 +593,10 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk) dev_stats->max_wr_latency_ns = timed_average_max(wr); dev_stats->avg_wr_latency_ns = timed_average_avg(wr); + dev_stats->min_zone_append_latency_ns = timed_average_min(zap); + dev_stats->max_zone_append_latency_ns = timed_average_max(zap); + dev_stats->avg_zone_append_latency_ns = timed_average_avg(zap); + dev_stats->min_flush_latency_ns = timed_average_min(fl); dev_stats->max_flush_latency_ns = timed_average_max(fl); dev_stats->avg_flush_latency_ns = timed_average_avg(fl); @@ -591,6 +605,8 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk) block_acct_queue_depth(ts, BLOCK_ACCT_READ); dev_stats->avg_wr_queue_depth = block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); + dev_stats->avg_zone_append_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_ZONE_APPEND); QAPI_LIST_PREPEND(ds->timed_stats, dev_stats); } @@ -600,6 +616,8 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk) = bdrv_latency_histogram_stats(&hgram[BLOCK_ACCT_READ]); ds->wr_latency_histogram = bdrv_latency_histogram_stats(&hgram[BLOCK_ACCT_WRITE]); + ds->zone_append_latency_histogram + = bdrv_latency_histogram_stats(&hgram[BLOCK_ACCT_ZONE_APPEND]); ds->flush_latency_histogram = bdrv_latency_histogram_stats(&hgram[BLOCK_ACCT_FLUSH]); } diff --git a/block/raw-format.c b/block/raw-format.c index fd9e61f58e..3a3946213f 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -317,6 +317,28 @@ raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) return bdrv_co_pdiscard(bs->file, offset, bytes); } +static int coroutine_fn GRAPH_RDLOCK +raw_co_zone_report(BlockDriverState *bs, int64_t offset, + unsigned int *nr_zones, + BlockZoneDescriptor *zones) +{ + return bdrv_co_zone_report(bs->file->bs, offset, nr_zones, zones); +} + +static int coroutine_fn GRAPH_RDLOCK +raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op, + int64_t offset, int64_t len) +{ + return bdrv_co_zone_mgmt(bs->file->bs, op, offset, len); +} + +static int coroutine_fn GRAPH_RDLOCK +raw_co_zone_append(BlockDriverState *bs,int64_t *offset, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + return bdrv_co_zone_append(bs->file->bs, offset, qiov, flags); +} + static int64_t coroutine_fn GRAPH_RDLOCK raw_co_getlength(BlockDriverState *bs) { @@ -608,6 +630,7 @@ static void raw_child_perm(BlockDriverState *bs, BdrvChild *c, BlockDriver bdrv_raw = { .format_name = "raw", .instance_size = sizeof(BDRVRawState), + .supports_zoned_children = true, .bdrv_probe = &raw_probe, .bdrv_reopen_prepare = &raw_reopen_prepare, .bdrv_reopen_commit = &raw_reopen_commit, @@ -619,6 +642,9 @@ BlockDriver bdrv_raw = { .bdrv_co_pwritev = &raw_co_pwritev, .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, .bdrv_co_pdiscard = &raw_co_pdiscard, + .bdrv_co_zone_report = &raw_co_zone_report, + .bdrv_co_zone_mgmt = &raw_co_zone_mgmt, + .bdrv_co_zone_append = &raw_co_zone_append, .bdrv_co_block_status = &raw_co_block_status, .bdrv_co_copy_range_from = &raw_co_copy_range_from, .bdrv_co_copy_range_to = &raw_co_copy_range_to, diff --git a/block/trace-events b/block/trace-events index 48dbf10c66..32665158d6 100644 --- a/block/trace-events +++ b/block/trace-events @@ -209,6 +209,10 @@ file_FindEjectableOpticalMedia(const char *media) "Matching using %s" file_setup_cdrom(const char *partition) "Using %s as optical disc" file_hdev_is_sg(int type, int version) "SG device found: type=%d, version=%d" file_flush_fdatasync_failed(int err) "errno %d" +zbd_zone_report(void *bs, unsigned int nr_zones, int64_t sector) "bs %p report %d zones starting at sector offset 0x%" PRIx64 "" +zbd_zone_mgmt(void *bs, const char *op_name, int64_t sector, int64_t len) "bs %p %s starts at sector offset 0x%" PRIx64 " over a range of 0x%" PRIx64 " sectors" +zbd_zone_append(void *bs, int64_t sector) "bs %p append at sector offset 0x%" PRIx64 "" +zbd_zone_append_complete(void *bs, int64_t sector) "bs %p returns append sector 0x%" PRIx64 "" # ssh.c sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)" |