diff options
author | Klaus Jensen <k.jensen@samsung.com> | 2021-06-17 21:06:47 +0200 |
---|---|---|
committer | Klaus Jensen <k.jensen@samsung.com> | 2021-06-29 07:16:25 +0200 |
commit | 38f4ac65ac88ebf4f1869c1361f40b1817b4a39c (patch) | |
tree | 9fdad6e6b8b8f6b3f8924eaacd23c416f7690bb0 /hw/nvme/ctrl.c | |
parent | 3276dde4f262588f3645f2adbc84d07cb6981d3e (diff) |
hw/nvme: reimplement flush to allow cancellation
Prior to this patch, a broadcast flush would result in submitting
multiple "fire and forget" aios (no reference saved to the aiocbs
returned from the blk_aio_flush calls).
Fix this by issuing the flushes one after another.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Diffstat (limited to 'hw/nvme/ctrl.c')
-rw-r--r-- | hw/nvme/ctrl.c | 204 |
1 files changed, 124 insertions, 80 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 762bb82e3c..26c65a12e8 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1788,22 +1788,19 @@ static inline bool nvme_is_write(NvmeRequest *req) rw->opcode == NVME_CMD_WRITE_ZEROES; } +static AioContext *nvme_get_aio_context(BlockAIOCB *acb) +{ + return qemu_get_aio_context(); +} + static void nvme_misc_cb(void *opaque, int ret) { NvmeRequest *req = opaque; - NvmeNamespace *ns = req->ns; - BlockBackend *blk = ns->blkconf.blk; - BlockAcctCookie *acct = &req->acct; - BlockAcctStats *stats = blk_get_stats(blk); - - trace_pci_nvme_misc_cb(nvme_cid(req), blk_name(blk)); + trace_pci_nvme_misc_cb(nvme_cid(req)); if (ret) { - block_acct_failed(stats, acct); nvme_aio_err(req, ret); - } else { - block_acct_done(stats, acct); } nvme_enqueue_req_completion(nvme_cq(req), req); @@ -1919,41 +1916,6 @@ static void nvme_aio_format_cb(void *opaque, int ret) nvme_enqueue_req_completion(nvme_cq(req), req); } -struct nvme_aio_flush_ctx { - NvmeRequest *req; - NvmeNamespace *ns; - BlockAcctCookie acct; -}; - -static void nvme_aio_flush_cb(void *opaque, int ret) -{ - struct nvme_aio_flush_ctx *ctx = opaque; - NvmeRequest *req = ctx->req; - uintptr_t *num_flushes = (uintptr_t *)&req->opaque; - - BlockBackend *blk = ctx->ns->blkconf.blk; - BlockAcctCookie *acct = &ctx->acct; - BlockAcctStats *stats = blk_get_stats(blk); - - trace_pci_nvme_aio_flush_cb(nvme_cid(req), blk_name(blk)); - - if (!ret) { - block_acct_done(stats, acct); - } else { - block_acct_failed(stats, acct); - nvme_aio_err(req, ret); - } - - (*num_flushes)--; - g_free(ctx); - - if (*num_flushes) { - return; - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - static void nvme_verify_cb(void *opaque, int ret) { NvmeBounceContext *ctx = opaque; @@ -2868,57 +2830,139 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } -static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) -{ - uint32_t nsid = le32_to_cpu(req->cmd.nsid); - uintptr_t *num_flushes = (uintptr_t *)&req->opaque; - uint16_t status; - struct nvme_aio_flush_ctx *ctx; +typedef struct NvmeFlushAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + NvmeRequest *req; + QEMUBH *bh; + int ret; + NvmeNamespace *ns; + uint32_t nsid; + bool broadcast; +} NvmeFlushAIOCB; - trace_pci_nvme_flush(nvme_cid(req), nsid); +static void nvme_flush_cancel(BlockAIOCB *acb) +{ + NvmeFlushAIOCB *iocb = container_of(acb, NvmeFlushAIOCB, common); - if (nsid != NVME_NSID_BROADCAST) { - req->ns = nvme_ns(n, nsid); - if (unlikely(!req->ns)) { - return NVME_INVALID_FIELD | NVME_DNR; - } + iocb->ret = -ECANCELED; - block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0, - BLOCK_ACCT_FLUSH); - req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_misc_cb, req); - return NVME_NO_COMPLETE; + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); } +} - /* 1-initialize; see comment in nvme_dsm */ - *num_flushes = 1; +static const AIOCBInfo nvme_flush_aiocb_info = { + .aiocb_size = sizeof(NvmeFlushAIOCB), + .cancel_async = nvme_flush_cancel, + .get_aio_context = nvme_get_aio_context, +}; - for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } +static void nvme_flush_ns_cb(void *opaque, int ret) +{ + NvmeFlushAIOCB *iocb = opaque; + NvmeNamespace *ns = iocb->ns; - ctx = g_new(struct nvme_aio_flush_ctx, 1); - ctx->req = req; - ctx->ns = ns; + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; + } - (*num_flushes)++; + if (ns) { + trace_pci_nvme_flush_ns(iocb->nsid); - block_acct_start(blk_get_stats(ns->blkconf.blk), &ctx->acct, 0, - BLOCK_ACCT_FLUSH); - blk_aio_flush(ns->blkconf.blk, nvme_aio_flush_cb, ctx); + iocb->ns = NULL; + iocb->aiocb = blk_aio_flush(ns->blkconf.blk, nvme_flush_ns_cb, iocb); + return; } - /* account for the 1-initialization */ - (*num_flushes)--; +out: + iocb->aiocb = NULL; + qemu_bh_schedule(iocb->bh); +} - if (*num_flushes) { - status = NVME_NO_COMPLETE; - } else { - status = req->status; +static void nvme_flush_bh(void *opaque) +{ + NvmeFlushAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeCtrl *n = nvme_ctrl(req); + int i; + + if (iocb->ret < 0) { + goto done; + } + + if (iocb->broadcast) { + for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) { + iocb->ns = nvme_ns(n, i); + if (iocb->ns) { + iocb->nsid = i; + break; + } + } + } + + if (!iocb->ns) { + goto done; + } + + nvme_flush_ns_cb(iocb, 0); + return; + +done: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_aio_unref(iocb); + + return; +} + +static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeFlushAIOCB *iocb; + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint16_t status; + + iocb = qemu_aio_get(&nvme_flush_aiocb_info, NULL, nvme_misc_cb, req); + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_flush_bh, iocb); + iocb->ret = 0; + iocb->ns = NULL; + iocb->nsid = 0; + iocb->broadcast = (nsid == NVME_NSID_BROADCAST); + + if (!iocb->broadcast) { + if (!nvme_nsid_valid(n, nsid)) { + status = NVME_INVALID_NSID | NVME_DNR; + goto out; + } + + iocb->ns = nvme_ns(n, nsid); + if (!iocb->ns) { + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; + } + + iocb->nsid = nsid; } + req->aiocb = &iocb->common; + qemu_bh_schedule(iocb->bh); + + return NVME_NO_COMPLETE; + +out: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); + return status; } |