diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-10-06 12:15:59 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-10-06 12:15:59 +0100 |
commit | d7c5b788295426c1ef48a9ffc3432c51220f69ba (patch) | |
tree | 9c7d200421b5fb4fa92a5a761532a21b6bfdb2f7 /block | |
parent | 36d9c2883e55c863b622b99f0ebb5143f0001401 (diff) | |
parent | 9ab5741164b1727d22f69fe7001382baf0d56977 (diff) |
Merge remote-tracking branch 'remotes/stefanha-gitlab/tags/block-pull-request' into staging
Pull request
v2:
* Removed clang-format call from scripts/block-coroutine-wrapper.py. This
avoids the issue with clang version incompatibility. It could be added back
in the future but the code is readable without reformatting and it also
makes the build less dependent on the environment.
# gpg: Signature made Mon 05 Oct 2020 16:42:28 BST
# gpg: using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35 775A 9CA4 ABB3 81AB 73C8
* remotes/stefanha-gitlab/tags/block-pull-request:
util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions
util/vfio-helpers: Collect IOVA reserved regions
docs: add 'io_uring' option to 'aio' param in qemu-options.hx
include/block/block.h: drop non-ascii quotation mark
block/io: refactor save/load vmstate
block: drop bdrv_prwv
block: generate coroutine-wrapper code
scripts: add block-coroutine-wrapper.py
block: declare some coroutine functions in block/coroutines.h
block/io: refactor coroutine wrappers
block: return error-code from bdrv_invalidate_cache
block/nvme: Replace magic value by SCALE_MS definition
block/nvme: Use register definitions from 'block/nvme.h'
block/nvme: Drop NVMeRegs structure, directly use NvmeBar
block/nvme: Reduce I/O registers scope
block/nvme: Map doorbells pages write-only
util/vfio-helpers: Pass page protections to qemu_vfio_pci_map_bar()
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r-- | block/block-gen.h | 49 | ||||
-rw-r--r-- | block/coroutines.h | 65 | ||||
-rw-r--r-- | block/io.c | 337 | ||||
-rw-r--r-- | block/meson.build | 8 | ||||
-rw-r--r-- | block/nvme.c | 73 |
5 files changed, 213 insertions, 319 deletions
diff --git a/block/block-gen.h b/block/block-gen.h new file mode 100644 index 0000000000..f80cf4897d --- /dev/null +++ b/block/block-gen.h @@ -0,0 +1,49 @@ +/* + * Block coroutine wrapping core, used by auto-generated block/block-gen.c + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2020 Virtuozzo International GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_BLOCK_GEN_H +#define BLOCK_BLOCK_GEN_H + +#include "block/block_int.h" + +/* Base structure for argument packing structures */ +typedef struct BdrvPollCo { + BlockDriverState *bs; + bool in_progress; + int ret; + Coroutine *co; /* Keep pointer here for debugging */ +} BdrvPollCo; + +static inline int bdrv_poll_co(BdrvPollCo *s) +{ + assert(!qemu_in_coroutine()); + + bdrv_coroutine_enter(s->bs, s->co); + BDRV_POLL_WHILE(s->bs, s->in_progress); + + return s->ret; +} + +#endif /* BLOCK_BLOCK_GEN_H */ diff --git a/block/coroutines.h b/block/coroutines.h new file mode 100644 index 0000000000..f69179f5ef --- /dev/null +++ b/block/coroutines.h @@ -0,0 +1,65 @@ +/* + * Block layer I/O functions + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_COROUTINES_INT_H +#define BLOCK_COROUTINES_INT_H + +#include "block/block_int.h" + +int coroutine_fn bdrv_co_check(BlockDriverState *bs, + BdrvCheckResult *res, BdrvCheckMode fix); +int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp); + +int generated_co_wrapper +bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); +int generated_co_wrapper +bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); + +int coroutine_fn +bdrv_co_common_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file); +int generated_co_wrapper +bdrv_common_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file); + +int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos); +int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos); + +#endif /* BLOCK_COROUTINES_INT_H */ diff --git a/block/io.c b/block/io.c index 11df1889f1..54f0968aee 100644 --- a/block/io.c +++ b/block/io.c @@ -29,6 +29,7 @@ #include "block/blockjob.h" #include "block/blockjob_int.h" #include "block/block_int.h" +#include "block/coroutines.h" #include "qemu/cutils.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -889,89 +890,10 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, return 0; } -typedef int coroutine_fn BdrvRequestEntry(void *opaque); -typedef struct BdrvRunCo { - BdrvRequestEntry *entry; - void *opaque; - int ret; - bool done; - Coroutine *co; /* Coroutine, running bdrv_run_co_entry, for debugging */ -} BdrvRunCo; - -static void coroutine_fn bdrv_run_co_entry(void *opaque) -{ - BdrvRunCo *arg = opaque; - - arg->ret = arg->entry(arg->opaque); - arg->done = true; - aio_wait_kick(); -} - -static int bdrv_run_co(BlockDriverState *bs, BdrvRequestEntry *entry, - void *opaque) -{ - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - return entry(opaque); - } else { - BdrvRunCo s = { .entry = entry, .opaque = opaque }; - - s.co = qemu_coroutine_create(bdrv_run_co_entry, &s); - bdrv_coroutine_enter(bs, s.co); - - BDRV_POLL_WHILE(bs, !s.done); - - return s.ret; - } -} - -typedef struct RwCo { - BdrvChild *child; - int64_t offset; - QEMUIOVector *qiov; - bool is_write; - BdrvRequestFlags flags; -} RwCo; - -static int coroutine_fn bdrv_rw_co_entry(void *opaque) -{ - RwCo *rwco = opaque; - - if (!rwco->is_write) { - return bdrv_co_preadv(rwco->child, rwco->offset, - rwco->qiov->size, rwco->qiov, - rwco->flags); - } else { - return bdrv_co_pwritev(rwco->child, rwco->offset, - rwco->qiov->size, rwco->qiov, - rwco->flags); - } -} - -/* - * Process a vectored synchronous request using coroutines - */ -static int bdrv_prwv_co(BdrvChild *child, int64_t offset, - QEMUIOVector *qiov, bool is_write, - BdrvRequestFlags flags) -{ - RwCo rwco = { - .child = child, - .offset = offset, - .qiov = qiov, - .is_write = is_write, - .flags = flags, - }; - - return bdrv_run_co(child->bs, bdrv_rw_co_entry, &rwco); -} - int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags) { - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes); - - return bdrv_prwv_co(child, offset, &qiov, true, + return bdrv_pwritev(child, offset, bytes, NULL, BDRV_REQ_ZERO_WRITE | flags); } @@ -1016,41 +938,19 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) } } -/* return < 0 if error. See bdrv_pwrite() for the return codes */ -int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) -{ - int ret; - - ret = bdrv_prwv_co(child, offset, qiov, false, 0); - if (ret < 0) { - return ret; - } - - return qiov->size; -} - /* See bdrv_pwrite() for the return codes */ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes) { + int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); if (bytes < 0) { return -EINVAL; } - return bdrv_preadv(child, offset, &qiov); -} - -int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) -{ - int ret; + ret = bdrv_preadv(child, offset, bytes, &qiov, 0); - ret = bdrv_prwv_co(child, offset, qiov, true, 0); - if (ret < 0) { - return ret; - } - - return qiov->size; + return ret < 0 ? ret : bytes; } /* Return no. of bytes on success or < 0 on error. Important errors are: @@ -1061,13 +961,16 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) */ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes) { + int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); if (bytes < 0) { return -EINVAL; } - return bdrv_pwritev(child, offset, &qiov); + ret = bdrv_pwritev(child, offset, bytes, &qiov, 0); + + return ret < 0 ? ret : bytes; } /* @@ -2243,18 +2146,6 @@ int bdrv_flush_all(void) return result; } - -typedef struct BdrvCoBlockStatusData { - BlockDriverState *bs; - BlockDriverState *base; - bool want_zero; - int64_t offset; - int64_t bytes; - int64_t *pnum; - int64_t *map; - BlockDriverState **file; -} BdrvCoBlockStatusData; - /* * Returns the allocation status of the specified sectors. * Drivers not implementing the functionality are assumed to not support @@ -2449,14 +2340,15 @@ early_out: return ret; } -static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, - BlockDriverState *base, - bool want_zero, - int64_t offset, - int64_t bytes, - int64_t *pnum, - int64_t *map, - BlockDriverState **file) +int coroutine_fn +bdrv_co_common_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file) { BlockDriverState *p; int ret = 0; @@ -2489,43 +2381,6 @@ static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, return ret; } -/* Coroutine wrapper for bdrv_block_status_above() */ -static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque) -{ - BdrvCoBlockStatusData *data = opaque; - - return bdrv_co_block_status_above(data->bs, data->base, - data->want_zero, - data->offset, data->bytes, - data->pnum, data->map, data->file); -} - -/* - * Synchronous wrapper around bdrv_co_block_status_above(). - * - * See bdrv_co_block_status_above() for details. - */ -static int bdrv_common_block_status_above(BlockDriverState *bs, - BlockDriverState *base, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, - BlockDriverState **file) -{ - BdrvCoBlockStatusData data = { - .bs = bs, - .base = base, - .want_zero = want_zero, - .offset = offset, - .bytes = bytes, - .pnum = pnum, - .map = map, - .file = file, - }; - - return bdrv_run_co(bs, bdrv_block_status_above_co_entry, &data); -} - int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) @@ -2619,96 +2474,70 @@ int bdrv_is_allocated_above(BlockDriverState *top, return 0; } -typedef struct BdrvVmstateCo { - BlockDriverState *bs; - QEMUIOVector *qiov; - int64_t pos; - bool is_read; -} BdrvVmstateCo; - -static int coroutine_fn -bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, - bool is_read) +int coroutine_fn +bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { BlockDriver *drv = bs->drv; BlockDriverState *child_bs = bdrv_primary_bs(bs); int ret = -ENOTSUP; + if (!drv) { + return -ENOMEDIUM; + } + bdrv_inc_in_flight(bs); - if (!drv) { - ret = -ENOMEDIUM; - } else if (drv->bdrv_load_vmstate) { - if (is_read) { - ret = drv->bdrv_load_vmstate(bs, qiov, pos); - } else { - ret = drv->bdrv_save_vmstate(bs, qiov, pos); - } + if (drv->bdrv_load_vmstate) { + ret = drv->bdrv_load_vmstate(bs, qiov, pos); } else if (child_bs) { - ret = bdrv_co_rw_vmstate(child_bs, qiov, pos, is_read); + ret = bdrv_co_readv_vmstate(child_bs, qiov, pos); } bdrv_dec_in_flight(bs); + return ret; } -static int coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque) +int coroutine_fn +bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BdrvVmstateCo *co = opaque; + BlockDriver *drv = bs->drv; + BlockDriverState *child_bs = bdrv_primary_bs(bs); + int ret = -ENOTSUP; - return bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read); -} + if (!drv) { + return -ENOMEDIUM; + } -static inline int -bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, - bool is_read) -{ - BdrvVmstateCo data = { - .bs = bs, - .qiov = qiov, - .pos = pos, - .is_read = is_read, - }; + bdrv_inc_in_flight(bs); - return bdrv_run_co(bs, bdrv_co_rw_vmstate_entry, &data); + if (drv->bdrv_save_vmstate) { + ret = drv->bdrv_save_vmstate(bs, qiov, pos); + } else if (child_bs) { + ret = bdrv_co_writev_vmstate(child_bs, qiov, pos); + } + + bdrv_dec_in_flight(bs); + + return ret; } int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); - int ret; + int ret = bdrv_writev_vmstate(bs, &qiov, pos); - ret = bdrv_writev_vmstate(bs, &qiov, pos); - if (ret < 0) { - return ret; - } - - return size; -} - -int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) -{ - return bdrv_rw_vmstate(bs, qiov, pos, false); + return ret < 0 ? ret : size; } int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); - int ret; + int ret = bdrv_readv_vmstate(bs, &qiov, pos); - ret = bdrv_readv_vmstate(bs, &qiov, pos); - if (ret < 0) { - return ret; - } - - return size; -} - -int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) -{ - return bdrv_rw_vmstate(bs, qiov, pos, true); + return ret < 0 ? ret : size; } /**************************************************************/ @@ -2748,11 +2577,6 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb) /**************************************************************/ /* Coroutine block device emulation */ -static int coroutine_fn bdrv_flush_co_entry(void *opaque) -{ - return bdrv_co_flush(opaque); -} - int coroutine_fn bdrv_co_flush(BlockDriverState *bs) { BdrvChild *primary_child = bdrv_primary_child(bs); @@ -2876,24 +2700,6 @@ early_exit: return ret; } -int bdrv_flush(BlockDriverState *bs) -{ - return bdrv_run_co(bs, bdrv_flush_co_entry, bs); -} - -typedef struct DiscardCo { - BdrvChild *child; - int64_t offset; - int64_t bytes; -} DiscardCo; - -static int coroutine_fn bdrv_pdiscard_co_entry(void *opaque) -{ - DiscardCo *rwco = opaque; - - return bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes); -} - int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes) { @@ -3008,17 +2814,6 @@ out: return ret; } -int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes) -{ - DiscardCo rwco = { - .child = child, - .offset = offset, - .bytes = bytes, - }; - - return bdrv_run_co(child->bs, bdrv_pdiscard_co_entry, &rwco); -} - int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf) { BlockDriver *drv = bs->drv; @@ -3420,35 +3215,3 @@ out: return ret; } - -typedef struct TruncateCo { - BdrvChild *child; - int64_t offset; - bool exact; - PreallocMode prealloc; - BdrvRequestFlags flags; - Error **errp; -} TruncateCo; - -static int coroutine_fn bdrv_truncate_co_entry(void *opaque) -{ - TruncateCo *tco = opaque; - - return bdrv_co_truncate(tco->child, tco->offset, tco->exact, - tco->prealloc, tco->flags, tco->errp); -} - -int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, - PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) -{ - TruncateCo tco = { - .child = child, - .offset = offset, - .exact = exact, - .prealloc = prealloc, - .flags = flags, - .errp = errp, - }; - - return bdrv_run_co(child->bs, bdrv_truncate_co_entry, &tco); -} diff --git a/block/meson.build b/block/meson.build index 0b38dc36f7..78e8b25232 100644 --- a/block/meson.build +++ b/block/meson.build @@ -107,6 +107,14 @@ module_block_h = custom_target('module_block.h', command: [module_block_py, '@OUTPUT0@', modsrc]) block_ss.add(module_block_h) +wrapper_py = find_program('../scripts/block-coroutine-wrapper.py') +block_gen_c = custom_target('block-gen.c', + output: 'block-gen.c', + input: files('../include/block/block.h', + 'coroutines.h'), + command: [wrapper_py, '@OUTPUT@', '@INPUT@']) +block_ss.add(block_gen_c) + block_ss.add(files('stream.c')) softmmu_ss.add(files('qapi-sysemu.c')) diff --git a/block/nvme.c b/block/nvme.c index f4f27b6da7..b48f6f2588 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -31,7 +31,7 @@ #define NVME_SQ_ENTRY_BYTES 64 #define NVME_CQ_ENTRY_BYTES 16 #define NVME_QUEUE_SIZE 128 -#define NVME_BAR_SIZE 8192 +#define NVME_DOORBELL_SIZE 4096 /* * We have to leave one slot empty as that is the full queue case where @@ -81,15 +81,6 @@ typedef struct { QEMUBH *completion_bh; } NVMeQueuePair; -/* Memory mapped registers */ -typedef volatile struct { - NvmeBar ctrl; - struct { - uint32_t sq_tail; - uint32_t cq_head; - } doorbells[]; -} NVMeRegs; - #define INDEX_ADMIN 0 #define INDEX_IO(n) (1 + n) @@ -102,7 +93,11 @@ enum { struct BDRVNVMeState { AioContext *aio_context; QEMUVFIOState *vfio; - NVMeRegs *regs; + /* Memory mapped registers */ + volatile struct { + uint32_t sq_tail; + uint32_t cq_head; + } *doorbells; /* The submission/completion queue pairs. * [0]: admin queue. * [1..]: io queues. @@ -247,14 +242,14 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, error_propagate(errp, local_err); goto fail; } - q->sq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].sq_tail; + q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail; nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err); if (local_err) { error_propagate(errp, local_err); goto fail; } - q->cq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].cq_head; + q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head; return q; fail: @@ -694,6 +689,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, uint64_t timeout_ms; uint64_t deadline, now; Error *local_err = NULL; + volatile NvmeBar *regs = NULL; qemu_co_mutex_init(&s->dma_map_lock); qemu_co_queue_init(&s->dma_flush_queue); @@ -712,32 +708,32 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, goto out; } - s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE, errp); - if (!s->regs) { + regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar), + PROT_READ | PROT_WRITE, errp); + if (!regs) { ret = -EINVAL; goto out; } - /* Perform initialize sequence as described in NVMe spec "7.6.1 * Initialization". */ - cap = le64_to_cpu(s->regs->ctrl.cap); - if (!(cap & (1ULL << 37))) { + cap = le64_to_cpu(regs->cap); + if (!NVME_CAP_CSS(cap)) { error_setg(errp, "Device doesn't support NVMe command set"); ret = -EINVAL; goto out; } - s->page_size = MAX(4096, 1 << (12 + ((cap >> 48) & 0xF))); - s->doorbell_scale = (4 << (((cap >> 32) & 0xF))) / sizeof(uint32_t); + s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap)); + s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t); bs->bl.opt_mem_alignment = s->page_size; - timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000); + timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000); /* Reset device to get a clean state. */ - s->regs->ctrl.cc = cpu_to_le32(le32_to_cpu(s->regs->ctrl.cc) & 0xFE); + regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE); /* Wait for CSTS.RDY = 0. */ deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS; - while (le32_to_cpu(s->regs->ctrl.csts) & 0x1) { + while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to reset (%" PRId64 " ms)", @@ -747,6 +743,13 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, } } + s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar), + NVME_DOORBELL_SIZE, PROT_WRITE, errp); + if (!s->doorbells) { + ret = -EINVAL; + goto out; + } + /* Set up admin queue. */ s->queues = g_new(NVMeQueuePair *, 1); s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0, @@ -758,18 +761,19 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, } s->nr_queues = 1; QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000); - s->regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE); - s->regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova); - s->regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova); + regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) | + (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT)); + regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova); + regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova); /* After setting up all control registers we can enable device now. */ - s->regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) | - (ctz32(NVME_SQ_ENTRY_BYTES) << 16) | - 0x1); + regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | + (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | + CC_EN_MASK); /* Wait for CSTS.RDY = 1. */ now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - deadline = now + timeout_ms * 1000000; - while (!(le32_to_cpu(s->regs->ctrl.csts) & 0x1)) { + deadline = now + timeout_ms * SCALE_MS; + while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to start (%" PRId64 " ms)", @@ -800,6 +804,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, ret = -EIO; } out: + if (regs) { + qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)regs, 0, sizeof(NvmeBar)); + } + /* Cleaning up is done in nvme_file_open() upon error. */ return ret; } @@ -872,7 +880,8 @@ static void nvme_close(BlockDriverState *bs) &s->irq_notifier[MSIX_SHARED_IRQ_IDX], false, NULL, NULL); event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]); - qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE); + qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells, + sizeof(NvmeBar), NVME_DOORBELL_SIZE); qemu_vfio_close(s->vfio); g_free(s->device); |