aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/block-gen.h49
-rw-r--r--block/coroutines.h65
-rw-r--r--block/io.c337
-rw-r--r--block/meson.build8
-rw-r--r--block/nvme.c73
5 files changed, 213 insertions, 319 deletions
diff --git a/block/block-gen.h b/block/block-gen.h
new file mode 100644
index 0000000000..f80cf4897d
--- /dev/null
+++ b/block/block-gen.h
@@ -0,0 +1,49 @@
+/*
+ * Block coroutine wrapping core, used by auto-generated block/block-gen.c
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2020 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_BLOCK_GEN_H
+#define BLOCK_BLOCK_GEN_H
+
+#include "block/block_int.h"
+
+/* Base structure for argument packing structures */
+typedef struct BdrvPollCo {
+ BlockDriverState *bs;
+ bool in_progress;
+ int ret;
+ Coroutine *co; /* Keep pointer here for debugging */
+} BdrvPollCo;
+
+static inline int bdrv_poll_co(BdrvPollCo *s)
+{
+ assert(!qemu_in_coroutine());
+
+ bdrv_coroutine_enter(s->bs, s->co);
+ BDRV_POLL_WHILE(s->bs, s->in_progress);
+
+ return s->ret;
+}
+
+#endif /* BLOCK_BLOCK_GEN_H */
diff --git a/block/coroutines.h b/block/coroutines.h
new file mode 100644
index 0000000000..f69179f5ef
--- /dev/null
+++ b/block/coroutines.h
@@ -0,0 +1,65 @@
+/*
+ * Block layer I/O functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_COROUTINES_INT_H
+#define BLOCK_COROUTINES_INT_H
+
+#include "block/block_int.h"
+
+int coroutine_fn bdrv_co_check(BlockDriverState *bs,
+ BdrvCheckResult *res, BdrvCheckMode fix);
+int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
+
+int generated_co_wrapper
+bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
+int generated_co_wrapper
+bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
+
+int coroutine_fn
+bdrv_co_common_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file);
+int generated_co_wrapper
+bdrv_common_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file);
+
+int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
+ QEMUIOVector *qiov, int64_t pos);
+int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
+ QEMUIOVector *qiov, int64_t pos);
+
+#endif /* BLOCK_COROUTINES_INT_H */
diff --git a/block/io.c b/block/io.c
index 11df1889f1..54f0968aee 100644
--- a/block/io.c
+++ b/block/io.c
@@ -29,6 +29,7 @@
#include "block/blockjob.h"
#include "block/blockjob_int.h"
#include "block/block_int.h"
+#include "block/coroutines.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
@@ -889,89 +890,10 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
return 0;
}
-typedef int coroutine_fn BdrvRequestEntry(void *opaque);
-typedef struct BdrvRunCo {
- BdrvRequestEntry *entry;
- void *opaque;
- int ret;
- bool done;
- Coroutine *co; /* Coroutine, running bdrv_run_co_entry, for debugging */
-} BdrvRunCo;
-
-static void coroutine_fn bdrv_run_co_entry(void *opaque)
-{
- BdrvRunCo *arg = opaque;
-
- arg->ret = arg->entry(arg->opaque);
- arg->done = true;
- aio_wait_kick();
-}
-
-static int bdrv_run_co(BlockDriverState *bs, BdrvRequestEntry *entry,
- void *opaque)
-{
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- return entry(opaque);
- } else {
- BdrvRunCo s = { .entry = entry, .opaque = opaque };
-
- s.co = qemu_coroutine_create(bdrv_run_co_entry, &s);
- bdrv_coroutine_enter(bs, s.co);
-
- BDRV_POLL_WHILE(bs, !s.done);
-
- return s.ret;
- }
-}
-
-typedef struct RwCo {
- BdrvChild *child;
- int64_t offset;
- QEMUIOVector *qiov;
- bool is_write;
- BdrvRequestFlags flags;
-} RwCo;
-
-static int coroutine_fn bdrv_rw_co_entry(void *opaque)
-{
- RwCo *rwco = opaque;
-
- if (!rwco->is_write) {
- return bdrv_co_preadv(rwco->child, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
- } else {
- return bdrv_co_pwritev(rwco->child, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
- }
-}
-
-/*
- * Process a vectored synchronous request using coroutines
- */
-static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
- QEMUIOVector *qiov, bool is_write,
- BdrvRequestFlags flags)
-{
- RwCo rwco = {
- .child = child,
- .offset = offset,
- .qiov = qiov,
- .is_write = is_write,
- .flags = flags,
- };
-
- return bdrv_run_co(child->bs, bdrv_rw_co_entry, &rwco);
-}
-
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags)
{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
-
- return bdrv_prwv_co(child, offset, &qiov, true,
+ return bdrv_pwritev(child, offset, bytes, NULL,
BDRV_REQ_ZERO_WRITE | flags);
}
@@ -1016,41 +938,19 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
}
}
-/* return < 0 if error. See bdrv_pwrite() for the return codes */
-int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
-{
- int ret;
-
- ret = bdrv_prwv_co(child, offset, qiov, false, 0);
- if (ret < 0) {
- return ret;
- }
-
- return qiov->size;
-}
-
/* See bdrv_pwrite() for the return codes */
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
{
+ int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
if (bytes < 0) {
return -EINVAL;
}
- return bdrv_preadv(child, offset, &qiov);
-}
-
-int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
-{
- int ret;
+ ret = bdrv_preadv(child, offset, bytes, &qiov, 0);
- ret = bdrv_prwv_co(child, offset, qiov, true, 0);
- if (ret < 0) {
- return ret;
- }
-
- return qiov->size;
+ return ret < 0 ? ret : bytes;
}
/* Return no. of bytes on success or < 0 on error. Important errors are:
@@ -1061,13 +961,16 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
*/
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
{
+ int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
if (bytes < 0) {
return -EINVAL;
}
- return bdrv_pwritev(child, offset, &qiov);
+ ret = bdrv_pwritev(child, offset, bytes, &qiov, 0);
+
+ return ret < 0 ? ret : bytes;
}
/*
@@ -2243,18 +2146,6 @@ int bdrv_flush_all(void)
return result;
}
-
-typedef struct BdrvCoBlockStatusData {
- BlockDriverState *bs;
- BlockDriverState *base;
- bool want_zero;
- int64_t offset;
- int64_t bytes;
- int64_t *pnum;
- int64_t *map;
- BlockDriverState **file;
-} BdrvCoBlockStatusData;
-
/*
* Returns the allocation status of the specified sectors.
* Drivers not implementing the functionality are assumed to not support
@@ -2449,14 +2340,15 @@ early_out:
return ret;
}
-static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
- BlockDriverState *base,
- bool want_zero,
- int64_t offset,
- int64_t bytes,
- int64_t *pnum,
- int64_t *map,
- BlockDriverState **file)
+int coroutine_fn
+bdrv_co_common_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
{
BlockDriverState *p;
int ret = 0;
@@ -2489,43 +2381,6 @@ static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
return ret;
}
-/* Coroutine wrapper for bdrv_block_status_above() */
-static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
-{
- BdrvCoBlockStatusData *data = opaque;
-
- return bdrv_co_block_status_above(data->bs, data->base,
- data->want_zero,
- data->offset, data->bytes,
- data->pnum, data->map, data->file);
-}
-
-/*
- * Synchronous wrapper around bdrv_co_block_status_above().
- *
- * See bdrv_co_block_status_above() for details.
- */
-static int bdrv_common_block_status_above(BlockDriverState *bs,
- BlockDriverState *base,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum,
- int64_t *map,
- BlockDriverState **file)
-{
- BdrvCoBlockStatusData data = {
- .bs = bs,
- .base = base,
- .want_zero = want_zero,
- .offset = offset,
- .bytes = bytes,
- .pnum = pnum,
- .map = map,
- .file = file,
- };
-
- return bdrv_run_co(bs, bdrv_block_status_above_co_entry, &data);
-}
-
int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
@@ -2619,96 +2474,70 @@ int bdrv_is_allocated_above(BlockDriverState *top,
return 0;
}
-typedef struct BdrvVmstateCo {
- BlockDriverState *bs;
- QEMUIOVector *qiov;
- int64_t pos;
- bool is_read;
-} BdrvVmstateCo;
-
-static int coroutine_fn
-bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
- bool is_read)
+int coroutine_fn
+bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret = -ENOTSUP;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+
bdrv_inc_in_flight(bs);
- if (!drv) {
- ret = -ENOMEDIUM;
- } else if (drv->bdrv_load_vmstate) {
- if (is_read) {
- ret = drv->bdrv_load_vmstate(bs, qiov, pos);
- } else {
- ret = drv->bdrv_save_vmstate(bs, qiov, pos);
- }
+ if (drv->bdrv_load_vmstate) {
+ ret = drv->bdrv_load_vmstate(bs, qiov, pos);
} else if (child_bs) {
- ret = bdrv_co_rw_vmstate(child_bs, qiov, pos, is_read);
+ ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
}
bdrv_dec_in_flight(bs);
+
return ret;
}
-static int coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
+int coroutine_fn
+bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
- BdrvVmstateCo *co = opaque;
+ BlockDriver *drv = bs->drv;
+ BlockDriverState *child_bs = bdrv_primary_bs(bs);
+ int ret = -ENOTSUP;
- return bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
-}
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
-static inline int
-bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
- bool is_read)
-{
- BdrvVmstateCo data = {
- .bs = bs,
- .qiov = qiov,
- .pos = pos,
- .is_read = is_read,
- };
+ bdrv_inc_in_flight(bs);
- return bdrv_run_co(bs, bdrv_co_rw_vmstate_entry, &data);
+ if (drv->bdrv_save_vmstate) {
+ ret = drv->bdrv_save_vmstate(bs, qiov, pos);
+ } else if (child_bs) {
+ ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
+ }
+
+ bdrv_dec_in_flight(bs);
+
+ return ret;
}
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
- int ret;
+ int ret = bdrv_writev_vmstate(bs, &qiov, pos);
- ret = bdrv_writev_vmstate(bs, &qiov, pos);
- if (ret < 0) {
- return ret;
- }
-
- return size;
-}
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
- return bdrv_rw_vmstate(bs, qiov, pos, false);
+ return ret < 0 ? ret : size;
}
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
int64_t pos, int size)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
- int ret;
+ int ret = bdrv_readv_vmstate(bs, &qiov, pos);
- ret = bdrv_readv_vmstate(bs, &qiov, pos);
- if (ret < 0) {
- return ret;
- }
-
- return size;
-}
-
-int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
- return bdrv_rw_vmstate(bs, qiov, pos, true);
+ return ret < 0 ? ret : size;
}
/**************************************************************/
@@ -2748,11 +2577,6 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb)
/**************************************************************/
/* Coroutine block device emulation */
-static int coroutine_fn bdrv_flush_co_entry(void *opaque)
-{
- return bdrv_co_flush(opaque);
-}
-
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
{
BdrvChild *primary_child = bdrv_primary_child(bs);
@@ -2876,24 +2700,6 @@ early_exit:
return ret;
}
-int bdrv_flush(BlockDriverState *bs)
-{
- return bdrv_run_co(bs, bdrv_flush_co_entry, bs);
-}
-
-typedef struct DiscardCo {
- BdrvChild *child;
- int64_t offset;
- int64_t bytes;
-} DiscardCo;
-
-static int coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
-{
- DiscardCo *rwco = opaque;
-
- return bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
-}
-
int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
int64_t bytes)
{
@@ -3008,17 +2814,6 @@ out:
return ret;
}
-int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
-{
- DiscardCo rwco = {
- .child = child,
- .offset = offset,
- .bytes = bytes,
- };
-
- return bdrv_run_co(child->bs, bdrv_pdiscard_co_entry, &rwco);
-}
-
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
{
BlockDriver *drv = bs->drv;
@@ -3420,35 +3215,3 @@ out:
return ret;
}
-
-typedef struct TruncateCo {
- BdrvChild *child;
- int64_t offset;
- bool exact;
- PreallocMode prealloc;
- BdrvRequestFlags flags;
- Error **errp;
-} TruncateCo;
-
-static int coroutine_fn bdrv_truncate_co_entry(void *opaque)
-{
- TruncateCo *tco = opaque;
-
- return bdrv_co_truncate(tco->child, tco->offset, tco->exact,
- tco->prealloc, tco->flags, tco->errp);
-}
-
-int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
-{
- TruncateCo tco = {
- .child = child,
- .offset = offset,
- .exact = exact,
- .prealloc = prealloc,
- .flags = flags,
- .errp = errp,
- };
-
- return bdrv_run_co(child->bs, bdrv_truncate_co_entry, &tco);
-}
diff --git a/block/meson.build b/block/meson.build
index 0b38dc36f7..78e8b25232 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -107,6 +107,14 @@ module_block_h = custom_target('module_block.h',
command: [module_block_py, '@OUTPUT0@', modsrc])
block_ss.add(module_block_h)
+wrapper_py = find_program('../scripts/block-coroutine-wrapper.py')
+block_gen_c = custom_target('block-gen.c',
+ output: 'block-gen.c',
+ input: files('../include/block/block.h',
+ 'coroutines.h'),
+ command: [wrapper_py, '@OUTPUT@', '@INPUT@'])
+block_ss.add(block_gen_c)
+
block_ss.add(files('stream.c'))
softmmu_ss.add(files('qapi-sysemu.c'))
diff --git a/block/nvme.c b/block/nvme.c
index f4f27b6da7..b48f6f2588 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -31,7 +31,7 @@
#define NVME_SQ_ENTRY_BYTES 64
#define NVME_CQ_ENTRY_BYTES 16
#define NVME_QUEUE_SIZE 128
-#define NVME_BAR_SIZE 8192
+#define NVME_DOORBELL_SIZE 4096
/*
* We have to leave one slot empty as that is the full queue case where
@@ -81,15 +81,6 @@ typedef struct {
QEMUBH *completion_bh;
} NVMeQueuePair;
-/* Memory mapped registers */
-typedef volatile struct {
- NvmeBar ctrl;
- struct {
- uint32_t sq_tail;
- uint32_t cq_head;
- } doorbells[];
-} NVMeRegs;
-
#define INDEX_ADMIN 0
#define INDEX_IO(n) (1 + n)
@@ -102,7 +93,11 @@ enum {
struct BDRVNVMeState {
AioContext *aio_context;
QEMUVFIOState *vfio;
- NVMeRegs *regs;
+ /* Memory mapped registers */
+ volatile struct {
+ uint32_t sq_tail;
+ uint32_t cq_head;
+ } *doorbells;
/* The submission/completion queue pairs.
* [0]: admin queue.
* [1..]: io queues.
@@ -247,14 +242,14 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
error_propagate(errp, local_err);
goto fail;
}
- q->sq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].sq_tail;
+ q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail;
nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto fail;
}
- q->cq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].cq_head;
+ q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head;
return q;
fail:
@@ -694,6 +689,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
uint64_t timeout_ms;
uint64_t deadline, now;
Error *local_err = NULL;
+ volatile NvmeBar *regs = NULL;
qemu_co_mutex_init(&s->dma_map_lock);
qemu_co_queue_init(&s->dma_flush_queue);
@@ -712,32 +708,32 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
goto out;
}
- s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE, errp);
- if (!s->regs) {
+ regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
+ PROT_READ | PROT_WRITE, errp);
+ if (!regs) {
ret = -EINVAL;
goto out;
}
-
/* Perform initialize sequence as described in NVMe spec "7.6.1
* Initialization". */
- cap = le64_to_cpu(s->regs->ctrl.cap);
- if (!(cap & (1ULL << 37))) {
+ cap = le64_to_cpu(regs->cap);
+ if (!NVME_CAP_CSS(cap)) {
error_setg(errp, "Device doesn't support NVMe command set");
ret = -EINVAL;
goto out;
}
- s->page_size = MAX(4096, 1 << (12 + ((cap >> 48) & 0xF)));
- s->doorbell_scale = (4 << (((cap >> 32) & 0xF))) / sizeof(uint32_t);
+ s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap));
+ s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t);
bs->bl.opt_mem_alignment = s->page_size;
- timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000);
+ timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000);
/* Reset device to get a clean state. */
- s->regs->ctrl.cc = cpu_to_le32(le32_to_cpu(s->regs->ctrl.cc) & 0xFE);
+ regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
/* Wait for CSTS.RDY = 0. */
deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
- while (le32_to_cpu(s->regs->ctrl.csts) & 0x1) {
+ while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to reset (%"
PRId64 " ms)",
@@ -747,6 +743,13 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
}
}
+ s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar),
+ NVME_DOORBELL_SIZE, PROT_WRITE, errp);
+ if (!s->doorbells) {
+ ret = -EINVAL;
+ goto out;
+ }
+
/* Set up admin queue. */
s->queues = g_new(NVMeQueuePair *, 1);
s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0,
@@ -758,18 +761,19 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
}
s->nr_queues = 1;
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
- s->regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
- s->regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
- s->regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
+ regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) |
+ (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT));
+ regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
+ regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
/* After setting up all control registers we can enable device now. */
- s->regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
- (ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
- 0x1);
+ regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
+ (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) |
+ CC_EN_MASK);
/* Wait for CSTS.RDY = 1. */
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- deadline = now + timeout_ms * 1000000;
- while (!(le32_to_cpu(s->regs->ctrl.csts) & 0x1)) {
+ deadline = now + timeout_ms * SCALE_MS;
+ while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to start (%"
PRId64 " ms)",
@@ -800,6 +804,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
ret = -EIO;
}
out:
+ if (regs) {
+ qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)regs, 0, sizeof(NvmeBar));
+ }
+
/* Cleaning up is done in nvme_file_open() upon error. */
return ret;
}
@@ -872,7 +880,8 @@ static void nvme_close(BlockDriverState *bs)
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, NULL, NULL);
event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
- qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE);
+ qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells,
+ sizeof(NvmeBar), NVME_DOORBELL_SIZE);
qemu_vfio_close(s->vfio);
g_free(s->device);