aboutsummaryrefslogtreecommitdiff
path: root/hw/block/nvme.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/block/nvme.c')
-rw-r--r--hw/block/nvme.c1237
1 files changed, 1129 insertions, 108 deletions
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index d439e44db8..6842b01ab5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -23,7 +23,8 @@
* [pmrdev=<mem_backend_file_id>,] \
* max_ioqpairs=<N[optional]>, \
* aerl=<N[optional]>,aer_max_queued=<N[optional]>, \
- * mdts=<N[optional]>,zoned.zasl=<N[optional]>, \
+ * mdts=<N[optional]>,vsl=<N[optional]>, \
+ * zoned.zasl=<N[optional]>, \
* subsys=<subsys_id>
* -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
* zoned=<true|false[optional]>, \
@@ -78,12 +79,26 @@
* as a power of two (2^n) and is in units of the minimum memory page size
* (CAP.MPSMIN). The default value is 7 (i.e. 512 KiB).
*
+ * - `vsl`
+ * Indicates the maximum data size limit for the Verify command. Like `mdts`,
+ * this value is specified as a power of two (2^n) and is in units of the
+ * minimum memory page size (CAP.MPSMIN). The default value is 7 (i.e. 512
+ * KiB).
+ *
* - `zoned.zasl`
* Indicates the maximum data transfer size for the Zone Append command. Like
* `mdts`, the value is specified as a power of two (2^n) and is in units of
* the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e.
* defaulting to the value of `mdts`).
*
+ * - `zoned.append_size_limit`
+ * The maximum I/O size in bytes that is allowed in Zone Append command.
+ * The default is 128KiB. Since internally this this value is maintained as
+ * ZASL = log2(<maximum append size> / <page size>), some values assigned
+ * to this property may be rounded down and result in a lower maximum ZA
+ * data size being in effect. By setting this property to 0, users can make
+ * ZASL to be equal to MDTS. This property only affects zoned namespaces.
+ *
* nvme namespace device parameters
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* - `subsys`
@@ -144,6 +159,7 @@
#include "trace.h"
#include "nvme.h"
#include "nvme-ns.h"
+#include "nvme-dif.h"
#define NVME_MAX_IOQPAIRS 0xffff
#define NVME_DB_SIZE 4
@@ -197,6 +213,7 @@ static const uint32_t nvme_cse_acs[256] = {
[NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
+ [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
};
static const uint32_t nvme_cse_iocs_none[256];
@@ -207,6 +224,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = {
[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+ [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
};
@@ -217,6 +235,7 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+ [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
@@ -226,15 +245,6 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
static void nvme_process_sq(void *opaque);
-static uint16_t nvme_cid(NvmeRequest *req)
-{
- if (!req) {
- return 0xffff;
- }
-
- return le16_to_cpu(req->cqe.cid);
-}
-
static uint16_t nvme_sqid(NvmeRequest *req)
{
return le16_to_cpu(req->sq->sqid);
@@ -360,6 +370,26 @@ static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
return pci_dma_read(&n->parent_obj, addr, buf, size);
}
+static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+{
+ hwaddr hi = addr + size - 1;
+ if (hi < addr) {
+ return 1;
+ }
+
+ if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
+ memcpy(nvme_addr_to_cmb(n, addr), buf, size);
+ return 0;
+ }
+
+ if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
+ memcpy(nvme_addr_to_pmr(n, addr), buf, size);
+ return 0;
+ }
+
+ return pci_dma_write(&n->parent_obj, addr, buf, size);
+}
+
static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
{
return nsid && (nsid == NVME_NSID_BROADCAST || nsid <= n->num_namespaces);
@@ -476,6 +506,59 @@ static inline void nvme_sg_unmap(NvmeSg *sg)
memset(sg, 0x0, sizeof(*sg));
}
+/*
+ * When metadata is transfered as extended LBAs, the DPTR mapped into `sg`
+ * holds both data and metadata. This function splits the data and metadata
+ * into two separate QSG/IOVs.
+ */
+static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data,
+ NvmeSg *mdata)
+{
+ NvmeSg *dst = data;
+ size_t size = nvme_lsize(ns);
+ size_t msize = nvme_msize(ns);
+ uint32_t trans_len, count = size;
+ uint64_t offset = 0;
+ bool dma = sg->flags & NVME_SG_DMA;
+ size_t sge_len;
+ size_t sg_len = dma ? sg->qsg.size : sg->iov.size;
+ int sg_idx = 0;
+
+ assert(sg->flags & NVME_SG_ALLOC);
+
+ while (sg_len) {
+ sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
+
+ trans_len = MIN(sg_len, count);
+ trans_len = MIN(trans_len, sge_len - offset);
+
+ if (dst) {
+ if (dma) {
+ qemu_sglist_add(&dst->qsg, sg->qsg.sg[sg_idx].base + offset,
+ trans_len);
+ } else {
+ qemu_iovec_add(&dst->iov,
+ sg->iov.iov[sg_idx].iov_base + offset,
+ trans_len);
+ }
+ }
+
+ sg_len -= trans_len;
+ count -= trans_len;
+ offset += trans_len;
+
+ if (count == 0) {
+ dst = (dst == data) ? mdata : data;
+ count = (dst == data) ? size : msize;
+ }
+
+ if (sge_len == offset) {
+ offset = 0;
+ sg_idx++;
+ }
+ }
+}
+
static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
size_t len)
{
@@ -860,8 +943,8 @@ unmap:
return status;
}
-static uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
- NvmeCmd *cmd)
+uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
+ NvmeCmd *cmd)
{
uint64_t prp1, prp2;
@@ -879,10 +962,158 @@ static uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
}
}
-typedef enum NvmeTxDirection {
- NVME_TX_DIRECTION_TO_DEVICE = 0,
- NVME_TX_DIRECTION_FROM_DEVICE = 1,
-} NvmeTxDirection;
+static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
+ NvmeCmd *cmd)
+{
+ int psdt = NVME_CMD_FLAGS_PSDT(cmd->flags);
+ hwaddr mptr = le64_to_cpu(cmd->mptr);
+ uint16_t status;
+
+ if (psdt == NVME_PSDT_SGL_MPTR_SGL) {
+ NvmeSglDescriptor sgl;
+
+ if (nvme_addr_read(n, mptr, &sgl, sizeof(sgl))) {
+ return NVME_DATA_TRAS_ERROR;
+ }
+
+ status = nvme_map_sgl(n, sg, sgl, len, cmd);
+ if (status && (status & 0x7ff) == NVME_DATA_SGL_LEN_INVALID) {
+ status = NVME_MD_SGL_LEN_INVALID | NVME_DNR;
+ }
+
+ return status;
+ }
+
+ nvme_sg_init(n, sg, nvme_addr_is_dma(n, mptr));
+ status = nvme_map_addr(n, sg, mptr, len);
+ if (status) {
+ nvme_sg_unmap(sg);
+ }
+
+ return status;
+}
+
+static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
+{
+ NvmeNamespace *ns = req->ns;
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ size_t len = nvme_l2b(ns, nlb);
+ uint16_t status;
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
+ (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) {
+ goto out;
+ }
+
+ if (nvme_ns_ext(ns)) {
+ NvmeSg sg;
+
+ len += nvme_m2b(ns, nlb);
+
+ status = nvme_map_dptr(n, &sg, len, &req->cmd);
+ if (status) {
+ return status;
+ }
+
+ nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
+ nvme_sg_split(&sg, ns, &req->sg, NULL);
+ nvme_sg_unmap(&sg);
+
+ return NVME_SUCCESS;
+ }
+
+out:
+ return nvme_map_dptr(n, &req->sg, len, &req->cmd);
+}
+
+static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
+{
+ NvmeNamespace *ns = req->ns;
+ size_t len = nvme_m2b(ns, nlb);
+ uint16_t status;
+
+ if (nvme_ns_ext(ns)) {
+ NvmeSg sg;
+
+ len += nvme_l2b(ns, nlb);
+
+ status = nvme_map_dptr(n, &sg, len, &req->cmd);
+ if (status) {
+ return status;
+ }
+
+ nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
+ nvme_sg_split(&sg, ns, NULL, &req->sg);
+ nvme_sg_unmap(&sg);
+
+ return NVME_SUCCESS;
+ }
+
+ return nvme_map_mptr(n, &req->sg, len, &req->cmd);
+}
+
+static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr,
+ uint32_t len, uint32_t bytes,
+ int32_t skip_bytes, int64_t offset,
+ NvmeTxDirection dir)
+{
+ hwaddr addr;
+ uint32_t trans_len, count = bytes;
+ bool dma = sg->flags & NVME_SG_DMA;
+ int64_t sge_len;
+ int sg_idx = 0;
+ int ret;
+
+ assert(sg->flags & NVME_SG_ALLOC);
+
+ while (len) {
+ sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
+
+ if (sge_len - offset < 0) {
+ offset -= sge_len;
+ sg_idx++;
+ continue;
+ }
+
+ if (sge_len == offset) {
+ offset = 0;
+ sg_idx++;
+ continue;
+ }
+
+ trans_len = MIN(len, count);
+ trans_len = MIN(trans_len, sge_len - offset);
+
+ if (dma) {
+ addr = sg->qsg.sg[sg_idx].base + offset;
+ } else {
+ addr = (hwaddr)(uintptr_t)sg->iov.iov[sg_idx].iov_base + offset;
+ }
+
+ if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
+ ret = nvme_addr_read(n, addr, ptr, trans_len);
+ } else {
+ ret = nvme_addr_write(n, addr, ptr, trans_len);
+ }
+
+ if (ret) {
+ return NVME_DATA_TRAS_ERROR;
+ }
+
+ ptr += trans_len;
+ len -= trans_len;
+ count -= trans_len;
+ offset += trans_len;
+
+ if (count == 0) {
+ count = bytes;
+ offset += skip_bytes;
+ }
+ }
+
+ return NVME_SUCCESS;
+}
static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len,
NvmeTxDirection dir)
@@ -946,6 +1177,49 @@ static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE);
}
+uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+ NvmeTxDirection dir, NvmeRequest *req)
+{
+ NvmeNamespace *ns = req->ns;
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint16_t ctrl = le16_to_cpu(rw->control);
+
+ if (nvme_ns_ext(ns) &&
+ !(ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) {
+ size_t lsize = nvme_lsize(ns);
+ size_t msize = nvme_msize(ns);
+
+ return nvme_tx_interleaved(n, &req->sg, ptr, len, lsize, msize, 0,
+ dir);
+ }
+
+ return nvme_tx(n, &req->sg, ptr, len, dir);
+}
+
+uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+ NvmeTxDirection dir, NvmeRequest *req)
+{
+ NvmeNamespace *ns = req->ns;
+ uint16_t status;
+
+ if (nvme_ns_ext(ns)) {
+ size_t lsize = nvme_lsize(ns);
+ size_t msize = nvme_msize(ns);
+
+ return nvme_tx_interleaved(n, &req->sg, ptr, len, msize, lsize, lsize,
+ dir);
+ }
+
+ nvme_sg_unmap(&req->sg);
+
+ status = nvme_map_mptr(n, &req->sg, len, &req->cmd);
+ if (status) {
+ return status;
+ }
+
+ return nvme_tx(n, &req->sg, ptr, len, dir);
+}
+
static inline void nvme_blk_read(BlockBackend *blk, int64_t offset,
BlockCompletionFunc *cb, NvmeRequest *req)
{
@@ -1498,7 +1772,7 @@ static inline bool nvme_is_write(NvmeRequest *req)
rw->opcode == NVME_CMD_WRITE_ZEROES;
}
-static void nvme_rw_cb(void *opaque, int ret)
+static void nvme_misc_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
NvmeNamespace *ns = req->ns;
@@ -1507,19 +1781,125 @@ static void nvme_rw_cb(void *opaque, int ret)
BlockAcctCookie *acct = &req->acct;
BlockAcctStats *stats = blk_get_stats(blk);
- trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
+ trace_pci_nvme_misc_cb(nvme_cid(req), blk_name(blk));
+
+ if (ret) {
+ block_acct_failed(stats, acct);
+ nvme_aio_err(req, ret);
+ } else {
+ block_acct_done(stats, acct);
+ }
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+void nvme_rw_complete_cb(void *opaque, int ret)
+{
+ NvmeRequest *req = opaque;
+ NvmeNamespace *ns = req->ns;
+ BlockBackend *blk = ns->blkconf.blk;
+ BlockAcctCookie *acct = &req->acct;
+ BlockAcctStats *stats = blk_get_stats(blk);
+
+ trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk));
+
+ if (ret) {
+ block_acct_failed(stats, acct);
+ nvme_aio_err(req, ret);
+ } else {
+ block_acct_done(stats, acct);
+ }
if (ns->params.zoned && nvme_is_write(req)) {
nvme_finalize_zoned_write(ns, req);
}
- if (!ret) {
- block_acct_done(stats, acct);
- } else {
- block_acct_failed(stats, acct);
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static void nvme_rw_cb(void *opaque, int ret)
+{
+ NvmeRequest *req = opaque;
+ NvmeNamespace *ns = req->ns;
+
+ BlockBackend *blk = ns->blkconf.blk;
+
+ trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
+
+ if (ret) {
+ goto out;
+ }
+
+ if (nvme_msize(ns)) {
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+ uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
+
+ if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) {
+ size_t mlen = nvme_m2b(ns, nlb);
+
+ req->aiocb = blk_aio_pwrite_zeroes(blk, offset, mlen,
+ BDRV_REQ_MAY_UNMAP,
+ nvme_rw_complete_cb, req);
+ return;
+ }
+
+ if (nvme_ns_ext(ns) || req->cmd.mptr) {
+ uint16_t status;
+
+ nvme_sg_unmap(&req->sg);
+ status = nvme_map_mdata(nvme_ctrl(req), nlb, req);
+ if (status) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (req->cmd.opcode == NVME_CMD_READ) {
+ return nvme_blk_read(blk, offset, nvme_rw_complete_cb, req);
+ }
+
+ return nvme_blk_write(blk, offset, nvme_rw_complete_cb, req);
+ }
+ }
+
+out:
+ nvme_rw_complete_cb(req, ret);
+}
+
+struct nvme_aio_format_ctx {
+ NvmeRequest *req;
+ NvmeNamespace *ns;
+
+ /* number of outstanding write zeroes for this namespace */
+ int *count;
+};
+
+static void nvme_aio_format_cb(void *opaque, int ret)
+{
+ struct nvme_aio_format_ctx *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeNamespace *ns = ctx->ns;
+ uintptr_t *num_formats = (uintptr_t *)&req->opaque;
+ int *count = ctx->count;
+
+ g_free(ctx);
+
+ if (ret) {
nvme_aio_err(req, ret);
}
+ if (--(*count)) {
+ return;
+ }
+
+ g_free(count);
+ ns->status = 0x0;
+
+ if (--(*num_formats)) {
+ return;
+ }
+
nvme_enqueue_req_completion(nvme_cq(req), req);
}
@@ -1558,6 +1938,90 @@ static void nvme_aio_flush_cb(void *opaque, int ret)
nvme_enqueue_req_completion(nvme_cq(req), req);
}
+static void nvme_verify_cb(void *opaque, int ret)
+{
+ NvmeBounceContext *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeNamespace *ns = req->ns;
+ BlockBackend *blk = ns->blkconf.blk;
+ BlockAcctCookie *acct = &req->acct;
+ BlockAcctStats *stats = blk_get_stats(blk);
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint16_t apptag = le16_to_cpu(rw->apptag);
+ uint16_t appmask = le16_to_cpu(rw->appmask);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
+ uint16_t status;
+
+ trace_pci_nvme_verify_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag,
+ appmask, reftag);
+
+ if (ret) {
+ block_acct_failed(stats, acct);
+ nvme_aio_err(req, ret);
+ goto out;
+ }
+
+ block_acct_done(stats, acct);
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce,
+ ctx->mdata.iov.size, slba);
+ if (status) {
+ req->status = status;
+ goto out;
+ }
+
+ req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
+ ctx->mdata.bounce, ctx->mdata.iov.size,
+ ctrl, slba, apptag, appmask, reftag);
+ }
+
+out:
+ qemu_iovec_destroy(&ctx->data.iov);
+ g_free(ctx->data.bounce);
+
+ qemu_iovec_destroy(&ctx->mdata.iov);
+ g_free(ctx->mdata.bounce);
+
+ g_free(ctx);
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+
+static void nvme_verify_mdata_in_cb(void *opaque, int ret)
+{
+ NvmeBounceContext *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+ size_t mlen = nvme_m2b(ns, nlb);
+ uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
+ BlockBackend *blk = ns->blkconf.blk;
+
+ trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk));
+
+ if (ret) {
+ goto out;
+ }
+
+ ctx->mdata.bounce = g_malloc(mlen);
+
+ qemu_iovec_reset(&ctx->mdata.iov);
+ qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
+
+ req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
+ nvme_verify_cb, ctx);
+ return;
+
+out:
+ nvme_verify_cb(ctx, ret);
+}
+
static void nvme_aio_discard_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
@@ -1583,7 +2047,7 @@ struct nvme_zone_reset_ctx {
NvmeZone *zone;
};
-static void nvme_aio_zone_reset_cb(void *opaque, int ret)
+static void nvme_aio_zone_reset_complete_cb(void *opaque, int ret)
{
struct nvme_zone_reset_ctx *ctx = opaque;
NvmeRequest *req = ctx->req;
@@ -1591,31 +2055,31 @@ static void nvme_aio_zone_reset_cb(void *opaque, int ret)
NvmeZone *zone = ctx->zone;
uintptr_t *resets = (uintptr_t *)&req->opaque;
- g_free(ctx);
-
- trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba);
-
- if (!ret) {
- switch (nvme_get_zone_state(zone)) {
- case NVME_ZONE_STATE_EXPLICITLY_OPEN:
- case NVME_ZONE_STATE_IMPLICITLY_OPEN:
- nvme_aor_dec_open(ns);
- /* fall through */
- case NVME_ZONE_STATE_CLOSED:
- nvme_aor_dec_active(ns);
- /* fall through */
- case NVME_ZONE_STATE_FULL:
- zone->w_ptr = zone->d.zslba;
- zone->d.wp = zone->w_ptr;
- nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
- /* fall through */
- default:
- break;
- }
- } else {
+ if (ret) {
nvme_aio_err(req, ret);
+ goto out;
+ }
+
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fall through */
+ case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fall through */
+ case NVME_ZONE_STATE_FULL:
+ zone->w_ptr = zone->d.zslba;
+ zone->d.wp = zone->w_ptr;
+ nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
+ /* fall through */
+ default:
+ break;
}
+out:
+ g_free(ctx);
+
(*resets)--;
if (*resets) {
@@ -1625,25 +2089,61 @@ static void nvme_aio_zone_reset_cb(void *opaque, int ret)
nvme_enqueue_req_completion(nvme_cq(req), req);
}
+static void nvme_aio_zone_reset_cb(void *opaque, int ret)
+{
+ struct nvme_zone_reset_ctx *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeZone *zone = ctx->zone;
+
+ trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba);
+
+ if (ret) {
+ goto out;
+ }
+
+ if (nvme_msize(ns)) {
+ int64_t offset = ns->mdata_offset + nvme_m2b(ns, zone->d.zslba);
+
+ blk_aio_pwrite_zeroes(ns->blkconf.blk, offset,
+ nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
+ nvme_aio_zone_reset_complete_cb, ctx);
+ return;
+ }
+
+out:
+ nvme_aio_zone_reset_complete_cb(opaque, ret);
+}
+
struct nvme_copy_ctx {
int copies;
uint8_t *bounce;
+ uint8_t *mbounce;
uint32_t nlb;
+ NvmeCopySourceRange *ranges;
};
struct nvme_copy_in_ctx {
NvmeRequest *req;
QEMUIOVector iov;
+ NvmeCopySourceRange *range;
};
-static void nvme_copy_cb(void *opaque, int ret)
+static void nvme_copy_complete_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
NvmeNamespace *ns = req->ns;
struct nvme_copy_ctx *ctx = req->opaque;
- trace_pci_nvme_copy_cb(nvme_cid(req));
+ if (ret) {
+ block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
+ nvme_aio_err(req, ret);
+ goto out;
+ }
+
+ block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
+out:
if (ns->params.zoned) {
NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
uint64_t sdlba = le64_to_cpu(copy->sdlba);
@@ -1652,19 +2152,42 @@ static void nvme_copy_cb(void *opaque, int ret)
__nvme_advance_zone_wp(ns, zone, ctx->nlb);
}
- if (!ret) {
- block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
- } else {
- block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
- nvme_aio_err(req, ret);
- }
-
g_free(ctx->bounce);
+ g_free(ctx->mbounce);
g_free(ctx);
nvme_enqueue_req_completion(nvme_cq(req), req);
}
+static void nvme_copy_cb(void *opaque, int ret)
+{
+ NvmeRequest *req = opaque;
+ NvmeNamespace *ns = req->ns;
+ struct nvme_copy_ctx *ctx = req->opaque;
+
+ trace_pci_nvme_copy_cb(nvme_cid(req));
+
+ if (ret) {
+ goto out;
+ }
+
+ if (nvme_msize(ns)) {
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ uint64_t sdlba = le64_to_cpu(copy->sdlba);
+ int64_t offset = ns->mdata_offset + nvme_m2b(ns, sdlba);
+
+ qemu_iovec_reset(&req->sg.iov);
+ qemu_iovec_add(&req->sg.iov, ctx->mbounce, nvme_m2b(ns, ctx->nlb));
+
+ req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &req->sg.iov, 0,
+ nvme_copy_complete_cb, req);
+ return;
+ }
+
+out:
+ nvme_copy_complete_cb(opaque, ret);
+}
+
static void nvme_copy_in_complete(NvmeRequest *req)
{
NvmeNamespace *ns = req->ns;
@@ -1677,6 +2200,70 @@ static void nvme_copy_in_complete(NvmeRequest *req)
block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ uint16_t prinfor = (copy->control[0] >> 4) & 0xf;
+ uint16_t prinfow = (copy->control[2] >> 2) & 0xf;
+ uint16_t nr = copy->nr + 1;
+ NvmeCopySourceRange *range;
+ uint64_t slba;
+ uint32_t nlb;
+ uint16_t apptag, appmask;
+ uint32_t reftag;
+ uint8_t *buf = ctx->bounce, *mbuf = ctx->mbounce;
+ size_t len, mlen;
+ int i;
+
+ /*
+ * The dif helpers expects prinfo to be similar to the control field of
+ * the NvmeRwCmd, so shift by 10 to fake it.
+ */
+ prinfor = prinfor << 10;
+ prinfow = prinfow << 10;
+
+ for (i = 0; i < nr; i++) {
+ range = &ctx->ranges[i];
+ slba = le64_to_cpu(range->slba);
+ nlb = le16_to_cpu(range->nlb) + 1;
+ len = nvme_l2b(ns, nlb);
+ mlen = nvme_m2b(ns, nlb);
+ apptag = le16_to_cpu(range->apptag);
+ appmask = le16_to_cpu(range->appmask);
+ reftag = le32_to_cpu(range->reftag);
+
+ status = nvme_dif_check(ns, buf, len, mbuf, mlen, prinfor, slba,
+ apptag, appmask, reftag);
+ if (status) {
+ goto invalid;
+ }
+
+ buf += len;
+ mbuf += mlen;
+ }
+
+ apptag = le16_to_cpu(copy->apptag);
+ appmask = le16_to_cpu(copy->appmask);
+ reftag = le32_to_cpu(copy->reftag);
+
+ if (prinfow & NVME_RW_PRINFO_PRACT) {
+ size_t len = nvme_l2b(ns, ctx->nlb);
+ size_t mlen = nvme_m2b(ns, ctx->nlb);
+
+ status = nvme_check_prinfo(ns, prinfow, sdlba, reftag);
+ if (status) {
+ goto invalid;
+ }
+
+ nvme_dif_pract_generate_dif(ns, ctx->bounce, len, ctx->mbounce,
+ mlen, apptag, reftag);
+ } else {
+ status = nvme_dif_check(ns, ctx->bounce, len, ctx->mbounce, mlen,
+ prinfow, sdlba, apptag, appmask, reftag);
+ if (status) {
+ goto invalid;
+ }
+ }
+ }
+
status = nvme_check_bounds(ns, sdlba, ctx->nlb);
if (status) {
trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze);
@@ -1745,6 +2332,7 @@ static void nvme_aio_copy_in_cb(void *opaque, int ret)
block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
g_free(ctx->bounce);
+ g_free(ctx->mbounce);
g_free(ctx);
nvme_enqueue_req_completion(nvme_cq(req), req);
@@ -1756,43 +2344,150 @@ static void nvme_aio_copy_in_cb(void *opaque, int ret)
}
struct nvme_compare_ctx {
- QEMUIOVector iov;
- uint8_t *bounce;
+ struct {
+ QEMUIOVector iov;
+ uint8_t *bounce;
+ } data;
+
+ struct {
+ QEMUIOVector iov;
+ uint8_t *bounce;
+ } mdata;
};
-static void nvme_compare_cb(void *opaque, int ret)
+static void nvme_compare_mdata_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
NvmeNamespace *ns = req->ns;
+ NvmeCtrl *n = nvme_ctrl(req);
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint16_t apptag = le16_to_cpu(rw->apptag);
+ uint16_t appmask = le16_to_cpu(rw->appmask);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
+ struct nvme_compare_ctx *ctx = req->opaque;
+ g_autofree uint8_t *buf = NULL;
+ uint16_t status = NVME_SUCCESS;
+
+ trace_pci_nvme_compare_mdata_cb(nvme_cid(req));
+
+ buf = g_malloc(ctx->mdata.iov.size);
+
+ status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size,
+ NVME_TX_DIRECTION_TO_DEVICE, req);
+ if (status) {
+ req->status = status;
+ goto out;
+ }
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint8_t *bufp;
+ uint8_t *mbufp = ctx->mdata.bounce;
+ uint8_t *end = mbufp + ctx->mdata.iov.size;
+ size_t msize = nvme_msize(ns);
+ int16_t pil = 0;
+
+ status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
+ ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
+ slba, apptag, appmask, reftag);
+ if (status) {
+ req->status = status;
+ goto out;
+ }
+
+ /*
+ * When formatted with protection information, do not compare the DIF
+ * tuple.
+ */
+ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+ pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
+ }
+
+ for (bufp = buf; mbufp < end; bufp += msize, mbufp += msize) {
+ if (memcmp(bufp + pil, mbufp + pil, msize - pil)) {
+ req->status = NVME_CMP_FAILURE;
+ goto out;
+ }
+ }
+
+ goto out;
+ }
+
+ if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) {
+ req->status = NVME_CMP_FAILURE;
+ goto out;
+ }
+
+out:
+ qemu_iovec_destroy(&ctx->data.iov);
+ g_free(ctx->data.bounce);
+
+ qemu_iovec_destroy(&ctx->mdata.iov);
+ g_free(ctx->mdata.bounce);
+
+ g_free(ctx);
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static void nvme_compare_data_cb(void *opaque, int ret)
+{
+ NvmeRequest *req = opaque;
+ NvmeCtrl *n = nvme_ctrl(req);
+ NvmeNamespace *ns = req->ns;
+ BlockBackend *blk = ns->blkconf.blk;
+ BlockAcctCookie *acct = &req->acct;
+ BlockAcctStats *stats = blk_get_stats(blk);
+
struct nvme_compare_ctx *ctx = req->opaque;
g_autofree uint8_t *buf = NULL;
uint16_t status;
- trace_pci_nvme_compare_cb(nvme_cid(req));
+ trace_pci_nvme_compare_data_cb(nvme_cid(req));
- if (!ret) {
- block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
- } else {
- block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
+ if (ret) {
+ block_acct_failed(stats, acct);
nvme_aio_err(req, ret);
goto out;
}
- buf = g_malloc(ctx->iov.size);
+ buf = g_malloc(ctx->data.iov.size);
- status = nvme_h2c(nvme_ctrl(req), buf, ctx->iov.size, req);
+ status = nvme_bounce_data(n, buf, ctx->data.iov.size,
+ NVME_TX_DIRECTION_TO_DEVICE, req);
if (status) {
req->status = status;
goto out;
}
- if (memcmp(buf, ctx->bounce, ctx->iov.size)) {
+ if (memcmp(buf, ctx->data.bounce, ctx->data.iov.size)) {
req->status = NVME_CMP_FAILURE;
+ goto out;
+ }
+
+ if (nvme_msize(ns)) {
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+ size_t mlen = nvme_m2b(ns, nlb);
+ uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
+
+ ctx->mdata.bounce = g_malloc(mlen);
+
+ qemu_iovec_init(&ctx->mdata.iov, 1);
+ qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
+
+ req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
+ nvme_compare_mdata_cb, req);
+ return;
}
+ block_acct_done(stats, acct);
+
out:
- qemu_iovec_destroy(&ctx->iov);
- g_free(ctx->bounce);
+ qemu_iovec_destroy(&ctx->data.iov);
+ g_free(ctx->data.bounce);
g_free(ctx);
nvme_enqueue_req_completion(nvme_cq(req), req);
@@ -1874,23 +2569,95 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
return status;
}
+static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ NvmeNamespace *ns = req->ns;
+ BlockBackend *blk = ns->blkconf.blk;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+ size_t len = nvme_l2b(ns, nlb);
+ int64_t offset = nvme_l2b(ns, slba);
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
+ NvmeBounceContext *ctx = NULL;
+ uint16_t status;
+
+ trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb);
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ if (status) {
+ return status;
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRACT) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+ }
+
+ if (len > n->page_size << n->params.vsl) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ status = nvme_check_bounds(ns, slba, nlb);
+ if (status) {
+ trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
+ return status;
+ }
+
+ if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+ status = nvme_check_dulbe(ns, slba, nlb);
+ if (status) {
+ return status;
+ }
+ }
+
+ ctx = g_new0(NvmeBounceContext, 1);
+ ctx->req = req;
+
+ ctx->data.bounce = g_malloc(len);
+
+ qemu_iovec_init(&ctx->data.iov, 1);
+ qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
+
+ block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
+ BLOCK_ACCT_READ);
+
+ req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
+ nvme_verify_mdata_in_cb, ctx);
+ return NVME_NO_COMPLETE;
+}
+
static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
{
NvmeNamespace *ns = req->ns;
NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
- g_autofree NvmeCopySourceRange *range = NULL;
uint16_t nr = copy->nr + 1;
uint8_t format = copy->control[0] & 0xf;
- uint32_t nlb = 0;
+ /*
+ * Shift the PRINFOR/PRINFOW values by 10 to allow reusing the
+ * NVME_RW_PRINFO constants.
+ */
+ uint16_t prinfor = ((copy->control[0] >> 4) & 0xf) << 10;
+ uint16_t prinfow = ((copy->control[2] >> 2) & 0xf) << 10;
+
+ uint32_t nlb = 0;
uint8_t *bounce = NULL, *bouncep = NULL;
+ uint8_t *mbounce = NULL, *mbouncep = NULL;
struct nvme_copy_ctx *ctx;
uint16_t status;
int i;
trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
+ ((prinfor & NVME_RW_PRINFO_PRACT) != (prinfow & NVME_RW_PRINFO_PRACT))) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
if (!(n->id_ctrl.ocfs & (1 << format))) {
trace_pci_nvme_err_copy_invalid_format(format);
return NVME_INVALID_FIELD | NVME_DNR;
@@ -1900,39 +2667,41 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
return NVME_CMD_SIZE_LIMIT | NVME_DNR;
}
- range = g_new(NvmeCopySourceRange, nr);
+ ctx = g_new(struct nvme_copy_ctx, 1);
+ ctx->ranges = g_new(NvmeCopySourceRange, nr);
- status = nvme_h2c(n, (uint8_t *)range, nr * sizeof(NvmeCopySourceRange),
- req);
+ status = nvme_h2c(n, (uint8_t *)ctx->ranges,
+ nr * sizeof(NvmeCopySourceRange), req);
if (status) {
- return status;
+ goto out;
}
for (i = 0; i < nr; i++) {
- uint64_t slba = le64_to_cpu(range[i].slba);
- uint32_t _nlb = le16_to_cpu(range[i].nlb) + 1;
+ uint64_t slba = le64_to_cpu(ctx->ranges[i].slba);
+ uint32_t _nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1;
if (_nlb > le16_to_cpu(ns->id_ns.mssrl)) {
- return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ goto out;
}
status = nvme_check_bounds(ns, slba, _nlb);
if (status) {
trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze);
- return status;
+ goto out;
}
if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
status = nvme_check_dulbe(ns, slba, _nlb);
if (status) {
- return status;
+ goto out;
}
}
if (ns->params.zoned) {
status = nvme_check_zone_read(ns, slba, _nlb);
if (status) {
- return status;
+ goto out;
}
}
@@ -1940,25 +2709,28 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
}
if (nlb > le32_to_cpu(ns->id_ns.mcl)) {
- return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ goto out;
}
bounce = bouncep = g_malloc(nvme_l2b(ns, nlb));
+ if (nvme_msize(ns)) {
+ mbounce = mbouncep = g_malloc(nvme_m2b(ns, nlb));
+ }
block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
BLOCK_ACCT_READ);
- ctx = g_new(struct nvme_copy_ctx, 1);
-
ctx->bounce = bounce;
+ ctx->mbounce = mbounce;
ctx->nlb = nlb;
ctx->copies = 1;
req->opaque = ctx;
for (i = 0; i < nr; i++) {
- uint64_t slba = le64_to_cpu(range[i].slba);
- uint32_t nlb = le16_to_cpu(range[i].nlb) + 1;
+ uint64_t slba = le64_to_cpu(ctx->ranges[i].slba);
+ uint32_t nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1;
size_t len = nvme_l2b(ns, nlb);
int64_t offset = nvme_l2b(ns, slba);
@@ -1977,6 +2749,24 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
nvme_aio_copy_in_cb, in_ctx);
bouncep += len;
+
+ if (nvme_msize(ns)) {
+ len = nvme_m2b(ns, nlb);
+ offset = ns->mdata_offset + nvme_m2b(ns, slba);
+
+ in_ctx = g_new(struct nvme_copy_in_ctx, 1);
+ in_ctx->req = req;
+
+ qemu_iovec_init(&in_ctx->iov, 1);
+ qemu_iovec_add(&in_ctx->iov, mbouncep, len);
+
+ ctx->copies++;
+
+ blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0,
+ nvme_aio_copy_in_cb, in_ctx);
+
+ mbouncep += len;
+ }
}
/* account for the 1-initialization */
@@ -1987,6 +2777,12 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
}
return NVME_NO_COMPLETE;
+
+out:
+ g_free(ctx->ranges);
+ g_free(ctx);
+
+ return status;
}
static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
@@ -1996,14 +2792,23 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
BlockBackend *blk = ns->blkconf.blk;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
- size_t len = nvme_l2b(ns, nlb);
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ size_t data_len = nvme_l2b(ns, nlb);
+ size_t len = data_len;
int64_t offset = nvme_l2b(ns, slba);
- uint8_t *bounce = NULL;
struct nvme_compare_ctx *ctx = NULL;
uint16_t status;
trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb);
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (ctrl & NVME_RW_PRINFO_PRACT)) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+
+ if (nvme_ns_ext(ns)) {
+ len += nvme_m2b(ns, nlb);
+ }
+
status = nvme_check_mdts(n, len);
if (status) {
return status;
@@ -2022,18 +2827,22 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
}
}
- bounce = g_malloc(len);
+ status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
+ if (status) {
+ return status;
+ }
ctx = g_new(struct nvme_compare_ctx, 1);
- ctx->bounce = bounce;
+ ctx->data.bounce = g_malloc(data_len);
req->opaque = ctx;
- qemu_iovec_init(&ctx->iov, 1);
- qemu_iovec_add(&ctx->iov, bounce, len);
+ qemu_iovec_init(&ctx->data.iov, 1);
+ qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len);
- block_acct_start(blk_get_stats(blk), &req->acct, len, BLOCK_ACCT_READ);
- blk_aio_preadv(blk, offset, &ctx->iov, 0, nvme_compare_cb, req);
+ block_acct_start(blk_get_stats(blk), &req->acct, data_len,
+ BLOCK_ACCT_READ);
+ blk_aio_preadv(blk, offset, &ctx->data.iov, 0, nvme_compare_data_cb, req);
return NVME_NO_COMPLETE;
}
@@ -2056,7 +2865,7 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
BLOCK_ACCT_FLUSH);
- req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req);
+ req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_misc_cb, req);
return NVME_NO_COMPLETE;
}
@@ -2098,14 +2907,28 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
NvmeNamespace *ns = req->ns;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+ uint16_t ctrl = le16_to_cpu(rw->control);
uint64_t data_size = nvme_l2b(ns, nlb);
+ uint64_t mapped_size = data_size;
uint64_t data_offset;
BlockBackend *blk = ns->blkconf.blk;
uint16_t status;
- trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, data_size, slba);
+ if (nvme_ns_ext(ns)) {
+ mapped_size += nvme_m2b(ns, nlb);
- status = nvme_check_mdts(n, data_size);
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ bool pract = ctrl & NVME_RW_PRINFO_PRACT;
+
+ if (pract && nvme_msize(ns) == 8) {
+ mapped_size = data_size;
+ }
+ }
+ }
+
+ trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba);
+
+ status = nvme_check_mdts(n, mapped_size);
if (status) {
goto invalid;
}
@@ -2124,11 +2947,6 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
}
}
- status = nvme_map_dptr(n, &req->sg, data_size, &req->cmd);
- if (status) {
- goto invalid;
- }
-
if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
status = nvme_check_dulbe(ns, slba, nlb);
if (status) {
@@ -2136,6 +2954,15 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
}
}
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ return nvme_dif_rw(n, req);
+ }
+
+ status = nvme_map_data(n, nlb, req);
+ if (status) {
+ goto invalid;
+ }
+
data_offset = nvme_l2b(ns, slba);
block_acct_start(blk_get_stats(blk), &req->acct, data_size,
@@ -2155,18 +2982,32 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
NvmeNamespace *ns = req->ns;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+ uint16_t ctrl = le16_to_cpu(rw->control);
uint64_t data_size = nvme_l2b(ns, nlb);
+ uint64_t mapped_size = data_size;
uint64_t data_offset;
NvmeZone *zone;
NvmeZonedResult *res = (NvmeZonedResult *)&req->cqe;
BlockBackend *blk = ns->blkconf.blk;
uint16_t status;
+ if (nvme_ns_ext(ns)) {
+ mapped_size += nvme_m2b(ns, nlb);
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ bool pract = ctrl & NVME_RW_PRINFO_PRACT;
+
+ if (pract && nvme_msize(ns) == 8) {
+ mapped_size -= nvme_m2b(ns, nlb);
+ }
+ }
+ }
+
trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
- nvme_nsid(ns), nlb, data_size, slba);
+ nvme_nsid(ns), nlb, mapped_size, slba);
if (!wrz) {
- status = nvme_check_mdts(n, data_size);
+ status = nvme_check_mdts(n, mapped_size);
if (status) {
goto invalid;
}
@@ -2182,19 +3023,47 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
zone = nvme_get_zone_by_slba(ns, slba);
if (append) {
+ bool piremap = !!(ctrl & NVME_RW_PIREMAP);
+
if (unlikely(slba != zone->d.zslba)) {
trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
status = NVME_INVALID_FIELD;
goto invalid;
}
- if (n->params.zasl && data_size > n->page_size << n->params.zasl) {
+ if (n->params.zasl &&
+ data_size > (uint64_t)n->page_size << n->params.zasl) {
trace_pci_nvme_err_zasl(data_size);
return NVME_INVALID_FIELD | NVME_DNR;
}
slba = zone->w_ptr;
+ rw->slba = cpu_to_le64(slba);
res->slba = cpu_to_le64(slba);
+
+ switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ case NVME_ID_NS_DPS_TYPE_1:
+ if (!piremap) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+
+ /* fallthrough */
+
+ case NVME_ID_NS_DPS_TYPE_2:
+ if (piremap) {
+ uint32_t reftag = le32_to_cpu(rw->reftag);
+ rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba));
+ }
+
+ break;
+
+ case NVME_ID_NS_DPS_TYPE_3:
+ if (piremap) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+
+ break;
+ }
}
status = nvme_check_zone_write(ns, zone, slba, nlb);
@@ -2212,8 +3081,12 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
data_offset = nvme_l2b(ns, slba);
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ return nvme_dif_rw(n, req);
+ }
+
if (!wrz) {
- status = nvme_map_dptr(n, &req->sg, data_size, &req->cmd);
+ status = nvme_map_data(n, nlb, req);
if (status) {
goto invalid;
}
@@ -2226,6 +3099,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
req);
}
+
return NVME_NO_COMPLETE;
invalid:
@@ -2619,12 +3493,13 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
uint32_t zone_idx, zra, zrasf, partial;
uint64_t max_zones, nr_zones = 0;
uint16_t status;
- uint64_t slba, capacity = nvme_ns_nlbas(ns);
+ uint64_t slba;
NvmeZoneDescr *z;
NvmeZone *zone;
NvmeZoneReportHeader *header;
void *buf, *buf_p;
size_t zone_entry_sz;
+ int i;
req->status = NVME_SUCCESS;
@@ -2666,7 +3541,7 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
buf = g_malloc0(data_size);
zone = &ns->zone_array[zone_idx];
- for (; slba < capacity; slba += ns->zone_size) {
+ for (i = zone_idx; i < ns->num_zones; i++) {
if (partial && nr_zones >= max_zones) {
break;
}
@@ -2718,6 +3593,7 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
{
uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+ uint16_t status;
trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode));
@@ -2759,6 +3635,11 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return NVME_INVALID_OPCODE | NVME_DNR;
}
+ status = nvme_ns_status(req->ns);
+ if (unlikely(status)) {
+ return status;
+ }
+
switch (req->cmd.opcode) {
case NVME_CMD_WRITE_ZEROES:
return nvme_write_zeroes(n, req);
@@ -2772,6 +3653,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_compare(n, req);
case NVME_CMD_DSM:
return nvme_dsm(n, req);
+ case NVME_CMD_VERIFY:
+ return nvme_verify(n, req);
case NVME_CMD_COPY:
return nvme_copy(n, req);
case NVME_CMD_ZONE_MGMT_SEND:
@@ -3288,12 +4171,14 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
{
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
+ NvmeIdCtrlNvm *id_nvm = (NvmeIdCtrlNvm *)&id;
trace_pci_nvme_identify_ctrl_csi(c->csi);
switch (c->csi) {
case NVME_CSI_NVM:
- ((NvmeIdCtrlNvm *)&id)->dmrsl = cpu_to_le32(n->dmrsl);
+ id_nvm->vsl = n->params.vsl;
+ id_nvm->dmrsl = cpu_to_le32(n->dmrsl);
break;
case NVME_CSI_ZONED:
@@ -4056,6 +4941,134 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
return NVME_SUCCESS;
}
+static uint16_t nvme_format_ns(NvmeCtrl *n, NvmeNamespace *ns, uint8_t lbaf,
+ uint8_t mset, uint8_t pi, uint8_t pil,
+ NvmeRequest *req)
+{
+ int64_t len, offset;
+ struct nvme_aio_format_ctx *ctx;
+ BlockBackend *blk = ns->blkconf.blk;
+ uint16_t ms;
+ uintptr_t *num_formats = (uintptr_t *)&req->opaque;
+ int *count;
+
+ if (ns->params.zoned) {
+ return NVME_INVALID_FORMAT | NVME_DNR;
+ }
+
+ trace_pci_nvme_format_ns(nvme_cid(req), nvme_nsid(ns), lbaf, mset, pi, pil);
+
+ if (lbaf > ns->id_ns.nlbaf) {
+ return NVME_INVALID_FORMAT | NVME_DNR;
+ }
+
+ ms = ns->id_ns.lbaf[lbaf].ms;
+
+ if (pi && (ms < sizeof(NvmeDifTuple))) {
+ return NVME_INVALID_FORMAT | NVME_DNR;
+ }
+
+ if (pi && pi > NVME_ID_NS_DPS_TYPE_3) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ nvme_ns_drain(ns);
+ nvme_ns_shutdown(ns);
+ nvme_ns_cleanup(ns);
+
+ ns->id_ns.dps = (pil << 3) | pi;
+ ns->id_ns.flbas = lbaf | (mset << 4);
+
+ nvme_ns_init_format(ns);
+
+ ns->status = NVME_FORMAT_IN_PROGRESS;
+
+ len = ns->size;
+ offset = 0;
+
+ count = g_new(int, 1);
+ *count = 1;
+
+ (*num_formats)++;
+
+ while (len) {
+ ctx = g_new(struct nvme_aio_format_ctx, 1);
+ ctx->req = req;
+ ctx->ns = ns;
+ ctx->count = count;
+
+ size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len);
+
+ (*count)++;
+
+ blk_aio_pwrite_zeroes(blk, offset, bytes, BDRV_REQ_MAY_UNMAP,
+ nvme_aio_format_cb, ctx);
+
+ offset += bytes;
+ len -= bytes;
+
+ }
+
+ (*count)--;
+
+ return NVME_NO_COMPLETE;
+}
+
+static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeNamespace *ns;
+ uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+ uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+ uint8_t lbaf = dw10 & 0xf;
+ uint8_t mset = (dw10 >> 4) & 0x1;
+ uint8_t pi = (dw10 >> 5) & 0x7;
+ uint8_t pil = (dw10 >> 8) & 0x1;
+ uintptr_t *num_formats = (uintptr_t *)&req->opaque;
+ uint16_t status;
+ int i;
+
+ trace_pci_nvme_format(nvme_cid(req), nsid, lbaf, mset, pi, pil);
+
+ /* 1-initialize; see the comment in nvme_dsm */
+ *num_formats = 1;
+
+ if (nsid != NVME_NSID_BROADCAST) {
+ if (!nvme_nsid_valid(n, nsid)) {
+ return NVME_INVALID_NSID | NVME_DNR;
+ }
+
+ ns = nvme_ns(n, nsid);
+ if (!ns) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req);
+ if (status && status != NVME_NO_COMPLETE) {
+ req->status = status;
+ }
+ } else {
+ for (i = 1; i <= n->num_namespaces; i++) {
+ ns = nvme_ns(n, i);
+ if (!ns) {
+ continue;
+ }
+
+ status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req);
+ if (status && status != NVME_NO_COMPLETE) {
+ req->status = status;
+ break;
+ }
+ }
+ }
+
+ /* account for the 1-initialization */
+ if (--(*num_formats)) {
+ return NVME_NO_COMPLETE;
+ }
+
+ return req->status;
+}
+
static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
{
trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
@@ -4094,6 +5107,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_aer(n, req);
case NVME_ADM_CMD_NS_ATTACHMENT:
return nvme_ns_attachment(n, req);
+ case NVME_ADM_CMD_FORMAT_NVM:
+ return nvme_format(n, req);
default:
assert(false);
}
@@ -4836,6 +5851,11 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
"than or equal to mdts (Maximum Data Transfer Size)");
return;
}
+
+ if (!n->params.vsl) {
+ error_setg(errp, "vsl must be non-zero");
+ return;
+ }
}
static void nvme_init_state(NvmeCtrl *n)
@@ -5065,7 +6085,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->mdts = n->params.mdts;
id->ver = cpu_to_le32(NVME_SPEC_VER);
- id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT);
+ id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT);
id->cntrltype = 0x1;
/*
@@ -5236,6 +6256,7 @@ static Property nvme_props[] = {
DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
+ DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7),
DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),