diff options
Diffstat (limited to 'hw')
-rw-r--r-- | hw/block/meson.build | 2 | ||||
-rw-r--r-- | hw/block/nvme-dif.c | 508 | ||||
-rw-r--r-- | hw/block/nvme-dif.h | 53 | ||||
-rw-r--r-- | hw/block/nvme-ns.c | 124 | ||||
-rw-r--r-- | hw/block/nvme-ns.h | 50 | ||||
-rw-r--r-- | hw/block/nvme-subsys.c | 7 | ||||
-rw-r--r-- | hw/block/nvme-subsys.h | 2 | ||||
-rw-r--r-- | hw/block/nvme.c | 1237 | ||||
-rw-r--r-- | hw/block/nvme.h | 44 | ||||
-rw-r--r-- | hw/block/trace-events | 22 | ||||
-rw-r--r-- | hw/core/Kconfig | 5 | ||||
-rw-r--r-- | hw/core/machine-hmp-cmds.c | 8 | ||||
-rw-r--r-- | hw/core/machine-qmp-cmds.c | 120 | ||||
-rw-r--r-- | hw/core/meson.build | 3 | ||||
-rw-r--r-- | hw/i386/pc.c | 2 | ||||
-rw-r--r-- | hw/ide/qdev.c | 38 | ||||
-rw-r--r-- | hw/misc/mac_via.c | 194 | ||||
-rw-r--r-- | hw/misc/trace-events | 4 | ||||
-rw-r--r-- | hw/ppc/mac_newworld.c | 13 | ||||
-rw-r--r-- | hw/ppc/mac_oldworld.c | 13 | ||||
-rw-r--r-- | hw/scsi/scsi-disk.c | 62 | ||||
-rw-r--r-- | hw/sparc64/sun4u.c | 15 |
22 files changed, 1993 insertions, 533 deletions
diff --git a/hw/block/meson.build b/hw/block/meson.build index 5492829155..5b4a7699f9 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -13,7 +13,7 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c')) softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c')) softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c')) -softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'nvme-subsys.c')) +softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'nvme-subsys.c', 'nvme-dif.c')) specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c')) diff --git a/hw/block/nvme-dif.c b/hw/block/nvme-dif.c new file mode 100644 index 0000000000..2038d724bd --- /dev/null +++ b/hw/block/nvme-dif.c @@ -0,0 +1,508 @@ +#include "qemu/osdep.h" +#include "hw/block/block.h" +#include "sysemu/dma.h" +#include "sysemu/block-backend.h" +#include "qapi/error.h" +#include "trace.h" +#include "nvme.h" +#include "nvme-dif.h" + +uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, + uint32_t reftag) +{ + if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) && + (ctrl & NVME_RW_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + + return NVME_SUCCESS; +} + +/* from Linux kernel (crypto/crct10dif_common.c) */ +static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer, + size_t len) +{ + unsigned int i; + + for (i = 0; i < len; i++) { + crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; + } + + return crc; +} + +void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t apptag, + uint32_t reftag) +{ + uint8_t *end = buf + len; + size_t lsize = nvme_lsize(ns); + size_t msize = nvme_msize(ns); + int16_t pil = 0; + + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + } + + trace_pci_nvme_dif_pract_generate_dif(len, lsize, lsize + pil, apptag, + reftag); + + for (; buf < end; buf += lsize, mbuf += msize) { + NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); + uint16_t crc = crc_t10dif(0x0, buf, lsize); + + if (pil) { + crc = crc_t10dif(crc, mbuf, pil); + } + + dif->guard = cpu_to_be16(crc); + dif->apptag = cpu_to_be16(apptag); + dif->reftag = cpu_to_be32(reftag); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { + reftag++; + } + } +} + +static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, + uint8_t *buf, uint8_t *mbuf, size_t pil, + uint16_t ctrl, uint16_t apptag, + uint16_t appmask, uint32_t reftag) +{ + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_3: + if (be32_to_cpu(dif->reftag) != 0xffffffff) { + break; + } + + /* fallthrough */ + case NVME_ID_NS_DPS_TYPE_1: + case NVME_ID_NS_DPS_TYPE_2: + if (be16_to_cpu(dif->apptag) != 0xffff) { + break; + } + + trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag), + be32_to_cpu(dif->reftag)); + + return NVME_SUCCESS; + } + + if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) { + uint16_t crc = crc_t10dif(0x0, buf, nvme_lsize(ns)); + + if (pil) { + crc = crc_t10dif(crc, mbuf, pil); + } + + trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc); + + if (be16_to_cpu(dif->guard) != crc) { + return NVME_E2E_GUARD_ERROR; + } + } + + if (ctrl & NVME_RW_PRINFO_PRCHK_APP) { + trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag, + appmask); + + if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) { + return NVME_E2E_APP_ERROR; + } + } + + if (ctrl & NVME_RW_PRINFO_PRCHK_REF) { + trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag); + + if (be32_to_cpu(dif->reftag) != reftag) { + return NVME_E2E_REF_ERROR; + } + } + + return NVME_SUCCESS; +} + +uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t ctrl, + uint64_t slba, uint16_t apptag, + uint16_t appmask, uint32_t reftag) +{ + uint8_t *end = buf + len; + size_t lsize = nvme_lsize(ns); + size_t msize = nvme_msize(ns); + int16_t pil = 0; + uint16_t status; + + status = nvme_check_prinfo(ns, ctrl, slba, reftag); + if (status) { + return status; + } + + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + } + + trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), lsize + pil); + + for (; buf < end; buf += lsize, mbuf += msize) { + NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); + + status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag, + appmask, reftag); + if (status) { + return status; + } + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { + reftag++; + } + } + + return NVME_SUCCESS; +} + +uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, + uint64_t slba) +{ + BlockBackend *blk = ns->blkconf.blk; + BlockDriverState *bs = blk_bs(blk); + + size_t msize = nvme_msize(ns); + size_t lsize = nvme_lsize(ns); + int64_t moffset = 0, offset = nvme_l2b(ns, slba); + uint8_t *mbufp, *end; + bool zeroed; + int16_t pil = 0; + int64_t bytes = (mlen / msize) * lsize; + int64_t pnum = 0; + + Error *err = NULL; + + + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + } + + do { + int ret; + + bytes -= pnum; + + ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL); + if (ret < 0) { + error_setg_errno(&err, -ret, "unable to get block status"); + error_report_err(err); + + return NVME_INTERNAL_DEV_ERROR; + } + + zeroed = !!(ret & BDRV_BLOCK_ZERO); + + trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed); + + if (zeroed) { + mbufp = mbuf + moffset; + mlen = (pnum / lsize) * msize; + end = mbufp + mlen; + + for (; mbufp < end; mbufp += msize) { + memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple)); + } + } + + moffset += (pnum / lsize) * msize; + offset += pnum; + } while (pnum != bytes); + + return NVME_SUCCESS; +} + +static void nvme_dif_rw_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk)); + + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); + + qemu_iovec_destroy(&ctx->mdata.iov); + g_free(ctx->mdata.bounce); + + g_free(ctx); + + nvme_rw_complete_cb(req, ret); +} + +static void nvme_dif_rw_check_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeCtrl *n = nvme_ctrl(req); + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint16_t ctrl = le16_to_cpu(rw->control); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + uint16_t status; + + trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag, + appmask, reftag); + + if (ret) { + goto out; + } + + status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size, + slba); + if (status) { + req->status = status; + goto out; + } + + status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, + slba, apptag, appmask, reftag); + if (status) { + req->status = status; + goto out; + } + + status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size, + NVME_TX_DIRECTION_FROM_DEVICE, req); + if (status) { + req->status = status; + goto out; + } + + if (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8) { + goto out; + } + + status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, + NVME_TX_DIRECTION_FROM_DEVICE, req); + if (status) { + req->status = status; + } + +out: + nvme_dif_rw_cb(ctx, ret); +} + +static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t mlen = nvme_m2b(ns, nlb); + uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + goto out; + } + + ctx->mdata.bounce = g_malloc(mlen); + + qemu_iovec_reset(&ctx->mdata.iov); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, + nvme_dif_rw_check_cb, ctx); + return; + +out: + nvme_dif_rw_cb(ctx, ret); +} + +static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + goto out; + } + + req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0, + nvme_dif_rw_cb, ctx); + return; + +out: + nvme_dif_rw_cb(ctx, ret); +} + +uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES; + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + uint64_t slba = le64_to_cpu(rw->slba); + size_t len = nvme_l2b(ns, nlb); + size_t mlen = nvme_m2b(ns, nlb); + size_t mapped_len = len; + int64_t offset = nvme_l2b(ns, slba); + uint16_t ctrl = le16_to_cpu(rw->control); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + bool pract = !!(ctrl & NVME_RW_PRINFO_PRACT); + NvmeBounceContext *ctx; + uint16_t status; + + trace_pci_nvme_dif_rw(pract, NVME_RW_PRINFO(ctrl)); + + ctx = g_new0(NvmeBounceContext, 1); + ctx->req = req; + + if (wrz) { + BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP; + + if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) { + status = NVME_INVALID_PROT_INFO | NVME_DNR; + goto err; + } + + if (pract) { + uint8_t *mbuf, *end; + size_t msize = nvme_msize(ns); + int16_t pil = msize - sizeof(NvmeDifTuple); + + status = nvme_check_prinfo(ns, ctrl, slba, reftag); + if (status) { + goto err; + } + + flags = 0; + + ctx->mdata.bounce = g_malloc0(mlen); + + qemu_iovec_init(&ctx->mdata.iov, 1); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + mbuf = ctx->mdata.bounce; + end = mbuf + mlen; + + if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) { + pil = 0; + } + + for (; mbuf < end; mbuf += msize) { + NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); + + dif->apptag = cpu_to_be16(apptag); + dif->reftag = cpu_to_be32(reftag); + + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_1: + case NVME_ID_NS_DPS_TYPE_2: + reftag++; + } + } + } + + req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags, + nvme_dif_rw_mdata_out_cb, ctx); + return NVME_NO_COMPLETE; + } + + if (nvme_ns_ext(ns) && !(pract && nvme_msize(ns) == 8)) { + mapped_len += mlen; + } + + status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd); + if (status) { + return status; + } + + ctx->data.bounce = g_malloc(len); + + qemu_iovec_init(&ctx->data.iov, 1); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); + + if (req->cmd.opcode == NVME_CMD_READ) { + block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, + BLOCK_ACCT_READ); + + req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, + nvme_dif_rw_mdata_in_cb, ctx); + return NVME_NO_COMPLETE; + } + + status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size, + NVME_TX_DIRECTION_TO_DEVICE, req); + if (status) { + goto err; + } + + ctx->mdata.bounce = g_malloc(mlen); + + qemu_iovec_init(&ctx->mdata.iov, 1); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + if (!(pract && nvme_msize(ns) == 8)) { + status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, + NVME_TX_DIRECTION_TO_DEVICE, req); + if (status) { + goto err; + } + } + + status = nvme_check_prinfo(ns, ctrl, slba, reftag); + if (status) { + goto err; + } + + if (pract) { + /* splice generated protection information into the buffer */ + nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, + apptag, reftag); + } else { + status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, + slba, apptag, appmask, reftag); + if (status) { + goto err; + } + } + + block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, + BLOCK_ACCT_WRITE); + + req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0, + nvme_dif_rw_mdata_out_cb, ctx); + + return NVME_NO_COMPLETE; + +err: + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); + + qemu_iovec_destroy(&ctx->mdata.iov); + g_free(ctx->mdata.bounce); + + g_free(ctx); + + return status; +} diff --git a/hw/block/nvme-dif.h b/hw/block/nvme-dif.h new file mode 100644 index 0000000000..5a8e37c852 --- /dev/null +++ b/hw/block/nvme-dif.h @@ -0,0 +1,53 @@ +#ifndef HW_NVME_DIF_H +#define HW_NVME_DIF_H + +/* from Linux kernel (crypto/crct10dif_common.c) */ +static const uint16_t t10_dif_crc_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; + +uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, + uint32_t reftag); +uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, + uint64_t slba); +void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t apptag, + uint32_t reftag); +uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t ctrl, + uint64_t slba, uint16_t apptag, + uint16_t appmask, uint32_t reftag); +uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); + +#endif /* HW_NVME_DIF_H */ diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index eda6a0c003..7f8d139a86 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -32,36 +32,46 @@ #define MIN_DISCARD_GRANULARITY (4 * KiB) -static int nvme_ns_init(NvmeNamespace *ns, Error **errp) +void nvme_ns_init_format(NvmeNamespace *ns) { - BlockDriverInfo bdi; NvmeIdNs *id_ns = &ns->id_ns; - int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); - int npdg; - - ns->id_ns.dlfeat = 0x9; + BlockDriverInfo bdi; + int npdg, nlbas, ret; - id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size); + nlbas = nvme_ns_nlbas(ns); - id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns)); - - ns->csi = NVME_CSI_NVM; + id_ns->nsze = cpu_to_le64(nlbas); /* no thin provisioning */ id_ns->ncap = id_ns->nsze; id_ns->nuse = id_ns->ncap; - /* support DULBE and I/O optimization fields */ - id_ns->nsfeat |= (0x4 | 0x10); + ns->mdata_offset = nvme_l2b(ns, nlbas); - npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size; + npdg = ns->blkconf.discard_granularity / nvme_lsize(ns); - if (bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi) >= 0 && - bdi.cluster_size > ns->blkconf.discard_granularity) { - npdg = bdi.cluster_size / ns->blkconf.logical_block_size; + ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi); + if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) { + npdg = bdi.cluster_size / nvme_lsize(ns); } id_ns->npda = id_ns->npdg = npdg - 1; +} + +static int nvme_ns_init(NvmeNamespace *ns, Error **errp) +{ + NvmeIdNs *id_ns = &ns->id_ns; + uint8_t ds; + uint16_t ms; + int i; + + ns->csi = NVME_CSI_NVM; + ns->status = 0x0; + + ns->id_ns.dlfeat = 0x1; + + /* support DULBE and I/O optimization fields */ + id_ns->nsfeat |= (0x4 | 0x10); if (nvme_ns_shared(ns)) { id_ns->nmic |= NVME_NMIC_NS_SHARED; @@ -72,6 +82,61 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) id_ns->mcl = cpu_to_le32(ns->params.mcl); id_ns->msrc = ns->params.msrc; + ds = 31 - clz32(ns->blkconf.logical_block_size); + ms = ns->params.ms; + + if (ns->params.ms) { + id_ns->mc = 0x3; + + if (ns->params.mset) { + id_ns->flbas |= 0x10; + } + + id_ns->dpc = 0x1f; + id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; + + NvmeLBAF lbaf[16] = { + [0] = { .ds = 9 }, + [1] = { .ds = 9, .ms = 8 }, + [2] = { .ds = 9, .ms = 16 }, + [3] = { .ds = 9, .ms = 64 }, + [4] = { .ds = 12 }, + [5] = { .ds = 12, .ms = 8 }, + [6] = { .ds = 12, .ms = 16 }, + [7] = { .ds = 12, .ms = 64 }, + }; + + memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); + id_ns->nlbaf = 7; + } else { + NvmeLBAF lbaf[16] = { + [0] = { .ds = 9 }, + [1] = { .ds = 12 }, + }; + + memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); + id_ns->nlbaf = 1; + } + + for (i = 0; i <= id_ns->nlbaf; i++) { + NvmeLBAF *lbaf = &id_ns->lbaf[i]; + if (lbaf->ds == ds) { + if (lbaf->ms == ms) { + id_ns->flbas |= i; + goto lbaf_found; + } + } + } + + /* add non-standard lba format */ + id_ns->nlbaf++; + id_ns->lbaf[id_ns->nlbaf].ds = ds; + id_ns->lbaf[id_ns->nlbaf].ms = ms; + id_ns->flbas |= id_ns->nlbaf; + +lbaf_found: + nvme_ns_init_format(ns); + return 0; } @@ -105,7 +170,7 @@ static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) { uint64_t zone_size, zone_cap; - uint32_t lbasz = ns->blkconf.logical_block_size; + uint32_t lbasz = nvme_lsize(ns); /* Make sure that the values of ZNS properties are sane */ if (ns->params.zone_size_bs) { @@ -140,7 +205,7 @@ static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) */ ns->zone_size = zone_size / lbasz; ns->zone_capacity = zone_cap / lbasz; - ns->num_zones = ns->size / lbasz / ns->zone_size; + ns->num_zones = nvme_ns_nlbas(ns) / ns->zone_size; /* Do a few more sanity checks of ZNS properties */ if (!ns->num_zones) { @@ -229,9 +294,10 @@ static void nvme_ns_zoned_init_state(NvmeNamespace *ns) } } -static void nvme_ns_init_zoned(NvmeNamespace *ns, int lba_index) +static void nvme_ns_init_zoned(NvmeNamespace *ns) { NvmeIdNsZoned *id_ns_z; + int i; nvme_ns_zoned_init_state(ns); @@ -243,9 +309,11 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns, int lba_index) id_ns_z->zoc = 0; id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00; - id_ns_z->lbafe[lba_index].zsze = cpu_to_le64(ns->zone_size); - id_ns_z->lbafe[lba_index].zdes = - ns->params.zd_extension_size >> 6; /* Units of 64B */ + for (i = 0; i <= ns->id_ns.nlbaf; i++) { + id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size); + id_ns_z->lbafe[i].zdes = + ns->params.zd_extension_size >> 6; /* Units of 64B */ + } ns->csi = NVME_CSI_ZONED; ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size); @@ -326,6 +394,12 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) return -1; } + if (ns->params.pi && !ns->params.ms) { + error_setg(errp, "at least 8 bytes of metadata required to enable " + "protection information"); + return -1; + } + return 0; } @@ -346,7 +420,7 @@ int nvme_ns_setup(NvmeNamespace *ns, Error **errp) if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) { return -1; } - nvme_ns_init_zoned(ns, 0); + nvme_ns_init_zoned(ns); } return 0; @@ -402,6 +476,10 @@ static Property nvme_ns_props[] = { DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false), DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid), + DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0), + DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), + DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0), + DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0), DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128), DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128), DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127), diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h index 318d3aebe1..9ab7894fc8 100644 --- a/hw/block/nvme-ns.h +++ b/hw/block/nvme-ns.h @@ -15,6 +15,8 @@ #ifndef NVME_NS_H #define NVME_NS_H +#include "qemu/uuid.h" + #define TYPE_NVME_NS "nvme-ns" #define NVME_NS(obj) \ OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) @@ -30,6 +32,11 @@ typedef struct NvmeNamespaceParams { uint32_t nsid; QemuUUID uuid; + uint16_t ms; + uint8_t mset; + uint8_t pi; + uint8_t pil; + uint16_t mssrl; uint32_t mcl; uint8_t msrc; @@ -48,9 +55,11 @@ typedef struct NvmeNamespace { BlockConf blkconf; int32_t bootindex; int64_t size; + int64_t mdata_offset; NvmeIdNs id_ns; const uint32_t *iocs; uint8_t csi; + uint16_t status; NvmeSubsystem *subsys; QTAILQ_ENTRY(NvmeNamespace) entry; @@ -76,6 +85,11 @@ typedef struct NvmeNamespace { } features; } NvmeNamespace; +static inline uint16_t nvme_ns_status(NvmeNamespace *ns) +{ + return ns->status; +} + static inline uint32_t nvme_nsid(NvmeNamespace *ns) { if (ns) { @@ -101,18 +115,41 @@ static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) return nvme_ns_lbaf(ns)->ds; } -/* calculate the number of LBAs that the namespace can accomodate */ -static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns) -{ - return ns->size >> nvme_ns_lbads(ns); -} - /* convert an LBA to the equivalent in bytes */ static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) { return lba << nvme_ns_lbads(ns); } +static inline size_t nvme_lsize(NvmeNamespace *ns) +{ + return 1 << nvme_ns_lbads(ns); +} + +static inline uint16_t nvme_msize(NvmeNamespace *ns) +{ + return nvme_ns_lbaf(ns)->ms; +} + +static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) +{ + return nvme_msize(ns) * lba; +} + +static inline bool nvme_ns_ext(NvmeNamespace *ns) +{ + return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); +} + +/* calculate the number of LBAs that the namespace can accomodate */ +static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns) +{ + if (nvme_msize(ns)) { + return ns->size / (nvme_lsize(ns) + nvme_msize(ns)); + } + return ns->size >> nvme_ns_lbads(ns); +} + typedef struct NvmeCtrl NvmeCtrl; static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) @@ -187,6 +224,7 @@ static inline void nvme_aor_dec_active(NvmeNamespace *ns) assert(ns->nr_active_zones >= 0); } +void nvme_ns_init_format(NvmeNamespace *ns); int nvme_ns_setup(NvmeNamespace *ns, Error **errp); void nvme_ns_drain(NvmeNamespace *ns); void nvme_ns_shutdown(NvmeNamespace *ns); diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c index af4804a819..9fadef8cec 100644 --- a/hw/block/nvme-subsys.c +++ b/hw/block/nvme-subsys.c @@ -47,15 +47,18 @@ int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp) { NvmeSubsystem *subsys = ns->subsys; NvmeCtrl *n; + uint32_t nsid = nvme_nsid(ns); int i; - if (subsys->namespaces[nvme_nsid(ns)]) { + assert(nsid && nsid <= NVME_SUBSYS_MAX_NAMESPACES); + + if (subsys->namespaces[nsid]) { error_setg(errp, "namespace %d already registerd to subsy %s", nvme_nsid(ns), subsys->parent_obj.id); return -1; } - subsys->namespaces[nvme_nsid(ns)] = ns; + subsys->namespaces[nsid] = ns; for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) { n = subsys->ctrls[i]; diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h index fb66ae752a..aafa04b848 100644 --- a/hw/block/nvme-subsys.h +++ b/hw/block/nvme-subsys.h @@ -54,6 +54,8 @@ static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, return NULL; } + assert(nsid && nsid <= NVME_SUBSYS_MAX_NAMESPACES); + return subsys->namespaces[nsid]; } diff --git a/hw/block/nvme.c b/hw/block/nvme.c index d439e44db8..6842b01ab5 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -23,7 +23,8 @@ * [pmrdev=<mem_backend_file_id>,] \ * max_ioqpairs=<N[optional]>, \ * aerl=<N[optional]>,aer_max_queued=<N[optional]>, \ - * mdts=<N[optional]>,zoned.zasl=<N[optional]>, \ + * mdts=<N[optional]>,vsl=<N[optional]>, \ + * zoned.zasl=<N[optional]>, \ * subsys=<subsys_id> * -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\ * zoned=<true|false[optional]>, \ @@ -78,12 +79,26 @@ * as a power of two (2^n) and is in units of the minimum memory page size * (CAP.MPSMIN). The default value is 7 (i.e. 512 KiB). * + * - `vsl` + * Indicates the maximum data size limit for the Verify command. Like `mdts`, + * this value is specified as a power of two (2^n) and is in units of the + * minimum memory page size (CAP.MPSMIN). The default value is 7 (i.e. 512 + * KiB). + * * - `zoned.zasl` * Indicates the maximum data transfer size for the Zone Append command. Like * `mdts`, the value is specified as a power of two (2^n) and is in units of * the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e. * defaulting to the value of `mdts`). * + * - `zoned.append_size_limit` + * The maximum I/O size in bytes that is allowed in Zone Append command. + * The default is 128KiB. Since internally this this value is maintained as + * ZASL = log2(<maximum append size> / <page size>), some values assigned + * to this property may be rounded down and result in a lower maximum ZA + * data size being in effect. By setting this property to 0, users can make + * ZASL to be equal to MDTS. This property only affects zoned namespaces. + * * nvme namespace device parameters * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * - `subsys` @@ -144,6 +159,7 @@ #include "trace.h" #include "nvme.h" #include "nvme-ns.h" +#include "nvme-dif.h" #define NVME_MAX_IOQPAIRS 0xffff #define NVME_DB_SIZE 4 @@ -197,6 +213,7 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, + [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, }; static const uint32_t nvme_cse_iocs_none[256]; @@ -207,6 +224,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = { [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP, [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, }; @@ -217,6 +235,7 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP, [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, @@ -226,15 +245,6 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { static void nvme_process_sq(void *opaque); -static uint16_t nvme_cid(NvmeRequest *req) -{ - if (!req) { - return 0xffff; - } - - return le16_to_cpu(req->cqe.cid); -} - static uint16_t nvme_sqid(NvmeRequest *req) { return le16_to_cpu(req->sq->sqid); @@ -360,6 +370,26 @@ static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) return pci_dma_read(&n->parent_obj, addr, buf, size); } +static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size) +{ + hwaddr hi = addr + size - 1; + if (hi < addr) { + return 1; + } + + if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) { + memcpy(nvme_addr_to_cmb(n, addr), buf, size); + return 0; + } + + if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) { + memcpy(nvme_addr_to_pmr(n, addr), buf, size); + return 0; + } + + return pci_dma_write(&n->parent_obj, addr, buf, size); +} + static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid) { return nsid && (nsid == NVME_NSID_BROADCAST || nsid <= n->num_namespaces); @@ -476,6 +506,59 @@ static inline void nvme_sg_unmap(NvmeSg *sg) memset(sg, 0x0, sizeof(*sg)); } +/* + * When metadata is transfered as extended LBAs, the DPTR mapped into `sg` + * holds both data and metadata. This function splits the data and metadata + * into two separate QSG/IOVs. + */ +static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data, + NvmeSg *mdata) +{ + NvmeSg *dst = data; + size_t size = nvme_lsize(ns); + size_t msize = nvme_msize(ns); + uint32_t trans_len, count = size; + uint64_t offset = 0; + bool dma = sg->flags & NVME_SG_DMA; + size_t sge_len; + size_t sg_len = dma ? sg->qsg.size : sg->iov.size; + int sg_idx = 0; + + assert(sg->flags & NVME_SG_ALLOC); + + while (sg_len) { + sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len; + + trans_len = MIN(sg_len, count); + trans_len = MIN(trans_len, sge_len - offset); + + if (dst) { + if (dma) { + qemu_sglist_add(&dst->qsg, sg->qsg.sg[sg_idx].base + offset, + trans_len); + } else { + qemu_iovec_add(&dst->iov, + sg->iov.iov[sg_idx].iov_base + offset, + trans_len); + } + } + + sg_len -= trans_len; + count -= trans_len; + offset += trans_len; + + if (count == 0) { + dst = (dst == data) ? mdata : data; + count = (dst == data) ? size : msize; + } + + if (sge_len == offset) { + offset = 0; + sg_idx++; + } + } +} + static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, size_t len) { @@ -860,8 +943,8 @@ unmap: return status; } -static uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, - NvmeCmd *cmd) +uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, + NvmeCmd *cmd) { uint64_t prp1, prp2; @@ -879,10 +962,158 @@ static uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, } } -typedef enum NvmeTxDirection { - NVME_TX_DIRECTION_TO_DEVICE = 0, - NVME_TX_DIRECTION_FROM_DEVICE = 1, -} NvmeTxDirection; +static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len, + NvmeCmd *cmd) +{ + int psdt = NVME_CMD_FLAGS_PSDT(cmd->flags); + hwaddr mptr = le64_to_cpu(cmd->mptr); + uint16_t status; + + if (psdt == NVME_PSDT_SGL_MPTR_SGL) { + NvmeSglDescriptor sgl; + + if (nvme_addr_read(n, mptr, &sgl, sizeof(sgl))) { + return NVME_DATA_TRAS_ERROR; + } + + status = nvme_map_sgl(n, sg, sgl, len, cmd); + if (status && (status & 0x7ff) == NVME_DATA_SGL_LEN_INVALID) { + status = NVME_MD_SGL_LEN_INVALID | NVME_DNR; + } + + return status; + } + + nvme_sg_init(n, sg, nvme_addr_is_dma(n, mptr)); + status = nvme_map_addr(n, sg, mptr, len); + if (status) { + nvme_sg_unmap(sg); + } + + return status; +} + +static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint16_t ctrl = le16_to_cpu(rw->control); + size_t len = nvme_l2b(ns, nlb); + uint16_t status; + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && + (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) { + goto out; + } + + if (nvme_ns_ext(ns)) { + NvmeSg sg; + + len += nvme_m2b(ns, nlb); + + status = nvme_map_dptr(n, &sg, len, &req->cmd); + if (status) { + return status; + } + + nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA); + nvme_sg_split(&sg, ns, &req->sg, NULL); + nvme_sg_unmap(&sg); + + return NVME_SUCCESS; + } + +out: + return nvme_map_dptr(n, &req->sg, len, &req->cmd); +} + +static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + size_t len = nvme_m2b(ns, nlb); + uint16_t status; + + if (nvme_ns_ext(ns)) { + NvmeSg sg; + + len += nvme_l2b(ns, nlb); + + status = nvme_map_dptr(n, &sg, len, &req->cmd); + if (status) { + return status; + } + + nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA); + nvme_sg_split(&sg, ns, NULL, &req->sg); + nvme_sg_unmap(&sg); + + return NVME_SUCCESS; + } + + return nvme_map_mptr(n, &req->sg, len, &req->cmd); +} + +static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, + uint32_t len, uint32_t bytes, + int32_t skip_bytes, int64_t offset, + NvmeTxDirection dir) +{ + hwaddr addr; + uint32_t trans_len, count = bytes; + bool dma = sg->flags & NVME_SG_DMA; + int64_t sge_len; + int sg_idx = 0; + int ret; + + assert(sg->flags & NVME_SG_ALLOC); + + while (len) { + sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len; + + if (sge_len - offset < 0) { + offset -= sge_len; + sg_idx++; + continue; + } + + if (sge_len == offset) { + offset = 0; + sg_idx++; + continue; + } + + trans_len = MIN(len, count); + trans_len = MIN(trans_len, sge_len - offset); + + if (dma) { + addr = sg->qsg.sg[sg_idx].base + offset; + } else { + addr = (hwaddr)(uintptr_t)sg->iov.iov[sg_idx].iov_base + offset; + } + + if (dir == NVME_TX_DIRECTION_TO_DEVICE) { + ret = nvme_addr_read(n, addr, ptr, trans_len); + } else { + ret = nvme_addr_write(n, addr, ptr, trans_len); + } + + if (ret) { + return NVME_DATA_TRAS_ERROR; + } + + ptr += trans_len; + len -= trans_len; + count -= trans_len; + offset += trans_len; + + if (count == 0) { + count = bytes; + offset += skip_bytes; + } + } + + return NVME_SUCCESS; +} static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len, NvmeTxDirection dir) @@ -946,6 +1177,49 @@ static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len, return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE); } +uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint16_t ctrl = le16_to_cpu(rw->control); + + if (nvme_ns_ext(ns) && + !(ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) { + size_t lsize = nvme_lsize(ns); + size_t msize = nvme_msize(ns); + + return nvme_tx_interleaved(n, &req->sg, ptr, len, lsize, msize, 0, + dir); + } + + return nvme_tx(n, &req->sg, ptr, len, dir); +} + +uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + uint16_t status; + + if (nvme_ns_ext(ns)) { + size_t lsize = nvme_lsize(ns); + size_t msize = nvme_msize(ns); + + return nvme_tx_interleaved(n, &req->sg, ptr, len, msize, lsize, lsize, + dir); + } + + nvme_sg_unmap(&req->sg); + + status = nvme_map_mptr(n, &req->sg, len, &req->cmd); + if (status) { + return status; + } + + return nvme_tx(n, &req->sg, ptr, len, dir); +} + static inline void nvme_blk_read(BlockBackend *blk, int64_t offset, BlockCompletionFunc *cb, NvmeRequest *req) { @@ -1498,7 +1772,7 @@ static inline bool nvme_is_write(NvmeRequest *req) rw->opcode == NVME_CMD_WRITE_ZEROES; } -static void nvme_rw_cb(void *opaque, int ret) +static void nvme_misc_cb(void *opaque, int ret) { NvmeRequest *req = opaque; NvmeNamespace *ns = req->ns; @@ -1507,19 +1781,125 @@ static void nvme_rw_cb(void *opaque, int ret) BlockAcctCookie *acct = &req->acct; BlockAcctStats *stats = blk_get_stats(blk); - trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk)); + trace_pci_nvme_misc_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + } else { + block_acct_done(stats, acct); + } + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + +void nvme_rw_complete_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + BlockAcctCookie *acct = &req->acct; + BlockAcctStats *stats = blk_get_stats(blk); + + trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + } else { + block_acct_done(stats, acct); + } if (ns->params.zoned && nvme_is_write(req)) { nvme_finalize_zoned_write(ns, req); } - if (!ret) { - block_acct_done(stats, acct); - } else { - block_acct_failed(stats, acct); + nvme_enqueue_req_completion(nvme_cq(req), req); +} + +static void nvme_rw_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeNamespace *ns = req->ns; + + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + goto out; + } + + if (nvme_msize(ns)) { + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; + uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + + if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) { + size_t mlen = nvme_m2b(ns, nlb); + + req->aiocb = blk_aio_pwrite_zeroes(blk, offset, mlen, + BDRV_REQ_MAY_UNMAP, + nvme_rw_complete_cb, req); + return; + } + + if (nvme_ns_ext(ns) || req->cmd.mptr) { + uint16_t status; + + nvme_sg_unmap(&req->sg); + status = nvme_map_mdata(nvme_ctrl(req), nlb, req); + if (status) { + ret = -EFAULT; + goto out; + } + + if (req->cmd.opcode == NVME_CMD_READ) { + return nvme_blk_read(blk, offset, nvme_rw_complete_cb, req); + } + + return nvme_blk_write(blk, offset, nvme_rw_complete_cb, req); + } + } + +out: + nvme_rw_complete_cb(req, ret); +} + +struct nvme_aio_format_ctx { + NvmeRequest *req; + NvmeNamespace *ns; + + /* number of outstanding write zeroes for this namespace */ + int *count; +}; + +static void nvme_aio_format_cb(void *opaque, int ret) +{ + struct nvme_aio_format_ctx *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = ctx->ns; + uintptr_t *num_formats = (uintptr_t *)&req->opaque; + int *count = ctx->count; + + g_free(ctx); + + if (ret) { nvme_aio_err(req, ret); } + if (--(*count)) { + return; + } + + g_free(count); + ns->status = 0x0; + + if (--(*num_formats)) { + return; + } + nvme_enqueue_req_completion(nvme_cq(req), req); } @@ -1558,6 +1938,90 @@ static void nvme_aio_flush_cb(void *opaque, int ret) nvme_enqueue_req_completion(nvme_cq(req), req); } +static void nvme_verify_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + BlockAcctCookie *acct = &req->acct; + BlockAcctStats *stats = blk_get_stats(blk); + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint16_t ctrl = le16_to_cpu(rw->control); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + uint16_t status; + + trace_pci_nvme_verify_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag, + appmask, reftag); + + if (ret) { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + goto out; + } + + block_acct_done(stats, acct); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, + ctx->mdata.iov.size, slba); + if (status) { + req->status = status; + goto out; + } + + req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, + ctrl, slba, apptag, appmask, reftag); + } + +out: + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); + + qemu_iovec_destroy(&ctx->mdata.iov); + g_free(ctx->mdata.bounce); + + g_free(ctx); + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + + +static void nvme_verify_mdata_in_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t mlen = nvme_m2b(ns, nlb); + uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + goto out; + } + + ctx->mdata.bounce = g_malloc(mlen); + + qemu_iovec_reset(&ctx->mdata.iov); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, + nvme_verify_cb, ctx); + return; + +out: + nvme_verify_cb(ctx, ret); +} + static void nvme_aio_discard_cb(void *opaque, int ret) { NvmeRequest *req = opaque; @@ -1583,7 +2047,7 @@ struct nvme_zone_reset_ctx { NvmeZone *zone; }; -static void nvme_aio_zone_reset_cb(void *opaque, int ret) +static void nvme_aio_zone_reset_complete_cb(void *opaque, int ret) { struct nvme_zone_reset_ctx *ctx = opaque; NvmeRequest *req = ctx->req; @@ -1591,31 +2055,31 @@ static void nvme_aio_zone_reset_cb(void *opaque, int ret) NvmeZone *zone = ctx->zone; uintptr_t *resets = (uintptr_t *)&req->opaque; - g_free(ctx); - - trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba); - - if (!ret) { - switch (nvme_get_zone_state(zone)) { - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - nvme_aor_dec_open(ns); - /* fall through */ - case NVME_ZONE_STATE_CLOSED: - nvme_aor_dec_active(ns); - /* fall through */ - case NVME_ZONE_STATE_FULL: - zone->w_ptr = zone->d.zslba; - zone->d.wp = zone->w_ptr; - nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY); - /* fall through */ - default: - break; - } - } else { + if (ret) { nvme_aio_err(req, ret); + goto out; + } + + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + nvme_aor_dec_open(ns); + /* fall through */ + case NVME_ZONE_STATE_CLOSED: + nvme_aor_dec_active(ns); + /* fall through */ + case NVME_ZONE_STATE_FULL: + zone->w_ptr = zone->d.zslba; + zone->d.wp = zone->w_ptr; + nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY); + /* fall through */ + default: + break; } +out: + g_free(ctx); + (*resets)--; if (*resets) { @@ -1625,25 +2089,61 @@ static void nvme_aio_zone_reset_cb(void *opaque, int ret) nvme_enqueue_req_completion(nvme_cq(req), req); } +static void nvme_aio_zone_reset_cb(void *opaque, int ret) +{ + struct nvme_zone_reset_ctx *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeZone *zone = ctx->zone; + + trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba); + + if (ret) { + goto out; + } + + if (nvme_msize(ns)) { + int64_t offset = ns->mdata_offset + nvme_m2b(ns, zone->d.zslba); + + blk_aio_pwrite_zeroes(ns->blkconf.blk, offset, + nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP, + nvme_aio_zone_reset_complete_cb, ctx); + return; + } + +out: + nvme_aio_zone_reset_complete_cb(opaque, ret); +} + struct nvme_copy_ctx { int copies; uint8_t *bounce; + uint8_t *mbounce; uint32_t nlb; + NvmeCopySourceRange *ranges; }; struct nvme_copy_in_ctx { NvmeRequest *req; QEMUIOVector iov; + NvmeCopySourceRange *range; }; -static void nvme_copy_cb(void *opaque, int ret) +static void nvme_copy_complete_cb(void *opaque, int ret) { NvmeRequest *req = opaque; NvmeNamespace *ns = req->ns; struct nvme_copy_ctx *ctx = req->opaque; - trace_pci_nvme_copy_cb(nvme_cid(req)); + if (ret) { + block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct); + nvme_aio_err(req, ret); + goto out; + } + + block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct); +out: if (ns->params.zoned) { NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; uint64_t sdlba = le64_to_cpu(copy->sdlba); @@ -1652,19 +2152,42 @@ static void nvme_copy_cb(void *opaque, int ret) __nvme_advance_zone_wp(ns, zone, ctx->nlb); } - if (!ret) { - block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct); - } else { - block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct); - nvme_aio_err(req, ret); - } - g_free(ctx->bounce); + g_free(ctx->mbounce); g_free(ctx); nvme_enqueue_req_completion(nvme_cq(req), req); } +static void nvme_copy_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeNamespace *ns = req->ns; + struct nvme_copy_ctx *ctx = req->opaque; + + trace_pci_nvme_copy_cb(nvme_cid(req)); + + if (ret) { + goto out; + } + + if (nvme_msize(ns)) { + NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + uint64_t sdlba = le64_to_cpu(copy->sdlba); + int64_t offset = ns->mdata_offset + nvme_m2b(ns, sdlba); + + qemu_iovec_reset(&req->sg.iov); + qemu_iovec_add(&req->sg.iov, ctx->mbounce, nvme_m2b(ns, ctx->nlb)); + + req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &req->sg.iov, 0, + nvme_copy_complete_cb, req); + return; + } + +out: + nvme_copy_complete_cb(opaque, ret); +} + static void nvme_copy_in_complete(NvmeRequest *req) { NvmeNamespace *ns = req->ns; @@ -1677,6 +2200,70 @@ static void nvme_copy_in_complete(NvmeRequest *req) block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct); + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + uint16_t prinfor = (copy->control[0] >> 4) & 0xf; + uint16_t prinfow = (copy->control[2] >> 2) & 0xf; + uint16_t nr = copy->nr + 1; + NvmeCopySourceRange *range; + uint64_t slba; + uint32_t nlb; + uint16_t apptag, appmask; + uint32_t reftag; + uint8_t *buf = ctx->bounce, *mbuf = ctx->mbounce; + size_t len, mlen; + int i; + + /* + * The dif helpers expects prinfo to be similar to the control field of + * the NvmeRwCmd, so shift by 10 to fake it. + */ + prinfor = prinfor << 10; + prinfow = prinfow << 10; + + for (i = 0; i < nr; i++) { + range = &ctx->ranges[i]; + slba = le64_to_cpu(range->slba); + nlb = le16_to_cpu(range->nlb) + 1; + len = nvme_l2b(ns, nlb); + mlen = nvme_m2b(ns, nlb); + apptag = le16_to_cpu(range->apptag); + appmask = le16_to_cpu(range->appmask); + reftag = le32_to_cpu(range->reftag); + + status = nvme_dif_check(ns, buf, len, mbuf, mlen, prinfor, slba, + apptag, appmask, reftag); + if (status) { + goto invalid; + } + + buf += len; + mbuf += mlen; + } + + apptag = le16_to_cpu(copy->apptag); + appmask = le16_to_cpu(copy->appmask); + reftag = le32_to_cpu(copy->reftag); + + if (prinfow & NVME_RW_PRINFO_PRACT) { + size_t len = nvme_l2b(ns, ctx->nlb); + size_t mlen = nvme_m2b(ns, ctx->nlb); + + status = nvme_check_prinfo(ns, prinfow, sdlba, reftag); + if (status) { + goto invalid; + } + + nvme_dif_pract_generate_dif(ns, ctx->bounce, len, ctx->mbounce, + mlen, apptag, reftag); + } else { + status = nvme_dif_check(ns, ctx->bounce, len, ctx->mbounce, mlen, + prinfow, sdlba, apptag, appmask, reftag); + if (status) { + goto invalid; + } + } + } + status = nvme_check_bounds(ns, sdlba, ctx->nlb); if (status) { trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze); @@ -1745,6 +2332,7 @@ static void nvme_aio_copy_in_cb(void *opaque, int ret) block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct); g_free(ctx->bounce); + g_free(ctx->mbounce); g_free(ctx); nvme_enqueue_req_completion(nvme_cq(req), req); @@ -1756,43 +2344,150 @@ static void nvme_aio_copy_in_cb(void *opaque, int ret) } struct nvme_compare_ctx { - QEMUIOVector iov; - uint8_t *bounce; + struct { + QEMUIOVector iov; + uint8_t *bounce; + } data; + + struct { + QEMUIOVector iov; + uint8_t *bounce; + } mdata; }; -static void nvme_compare_cb(void *opaque, int ret) +static void nvme_compare_mdata_cb(void *opaque, int ret) { NvmeRequest *req = opaque; NvmeNamespace *ns = req->ns; + NvmeCtrl *n = nvme_ctrl(req); + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint16_t ctrl = le16_to_cpu(rw->control); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + struct nvme_compare_ctx *ctx = req->opaque; + g_autofree uint8_t *buf = NULL; + uint16_t status = NVME_SUCCESS; + + trace_pci_nvme_compare_mdata_cb(nvme_cid(req)); + + buf = g_malloc(ctx->mdata.iov.size); + + status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size, + NVME_TX_DIRECTION_TO_DEVICE, req); + if (status) { + req->status = status; + goto out; + } + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + uint64_t slba = le64_to_cpu(rw->slba); + uint8_t *bufp; + uint8_t *mbufp = ctx->mdata.bounce; + uint8_t *end = mbufp + ctx->mdata.iov.size; + size_t msize = nvme_msize(ns); + int16_t pil = 0; + + status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, + slba, apptag, appmask, reftag); + if (status) { + req->status = status; + goto out; + } + + /* + * When formatted with protection information, do not compare the DIF + * tuple. + */ + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + } + + for (bufp = buf; mbufp < end; bufp += msize, mbufp += msize) { + if (memcmp(bufp + pil, mbufp + pil, msize - pil)) { + req->status = NVME_CMP_FAILURE; + goto out; + } + } + + goto out; + } + + if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) { + req->status = NVME_CMP_FAILURE; + goto out; + } + +out: + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); + + qemu_iovec_destroy(&ctx->mdata.iov); + g_free(ctx->mdata.bounce); + + g_free(ctx); + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + +static void nvme_compare_data_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeCtrl *n = nvme_ctrl(req); + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + BlockAcctCookie *acct = &req->acct; + BlockAcctStats *stats = blk_get_stats(blk); + struct nvme_compare_ctx *ctx = req->opaque; g_autofree uint8_t *buf = NULL; uint16_t status; - trace_pci_nvme_compare_cb(nvme_cid(req)); + trace_pci_nvme_compare_data_cb(nvme_cid(req)); - if (!ret) { - block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct); - } else { - block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct); + if (ret) { + block_acct_failed(stats, acct); nvme_aio_err(req, ret); goto out; } - buf = g_malloc(ctx->iov.size); + buf = g_malloc(ctx->data.iov.size); - status = nvme_h2c(nvme_ctrl(req), buf, ctx->iov.size, req); + status = nvme_bounce_data(n, buf, ctx->data.iov.size, + NVME_TX_DIRECTION_TO_DEVICE, req); if (status) { req->status = status; goto out; } - if (memcmp(buf, ctx->bounce, ctx->iov.size)) { + if (memcmp(buf, ctx->data.bounce, ctx->data.iov.size)) { req->status = NVME_CMP_FAILURE; + goto out; + } + + if (nvme_msize(ns)) { + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t mlen = nvme_m2b(ns, nlb); + uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + + ctx->mdata.bounce = g_malloc(mlen); + + qemu_iovec_init(&ctx->mdata.iov, 1); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, + nvme_compare_mdata_cb, req); + return; } + block_acct_done(stats, acct); + out: - qemu_iovec_destroy(&ctx->iov); - g_free(ctx->bounce); + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); g_free(ctx); nvme_enqueue_req_completion(nvme_cq(req), req); @@ -1874,23 +2569,95 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) return status; } +static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t len = nvme_l2b(ns, nlb); + int64_t offset = nvme_l2b(ns, slba); + uint16_t ctrl = le16_to_cpu(rw->control); + uint32_t reftag = le32_to_cpu(rw->reftag); + NvmeBounceContext *ctx = NULL; + uint16_t status; + + trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + status = nvme_check_prinfo(ns, ctrl, slba, reftag); + if (status) { + return status; + } + + if (ctrl & NVME_RW_PRINFO_PRACT) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + } + + if (len > n->page_size << n->params.vsl) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + status = nvme_check_bounds(ns, slba, nlb); + if (status) { + trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); + return status; + } + + if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { + status = nvme_check_dulbe(ns, slba, nlb); + if (status) { + return status; + } + } + + ctx = g_new0(NvmeBounceContext, 1); + ctx->req = req; + + ctx->data.bounce = g_malloc(len); + + qemu_iovec_init(&ctx->data.iov, 1); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); + + block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, + BLOCK_ACCT_READ); + + req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, + nvme_verify_mdata_in_cb, ctx); + return NVME_NO_COMPLETE; +} + static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns = req->ns; NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; - g_autofree NvmeCopySourceRange *range = NULL; uint16_t nr = copy->nr + 1; uint8_t format = copy->control[0] & 0xf; - uint32_t nlb = 0; + /* + * Shift the PRINFOR/PRINFOW values by 10 to allow reusing the + * NVME_RW_PRINFO constants. + */ + uint16_t prinfor = ((copy->control[0] >> 4) & 0xf) << 10; + uint16_t prinfow = ((copy->control[2] >> 2) & 0xf) << 10; + + uint32_t nlb = 0; uint8_t *bounce = NULL, *bouncep = NULL; + uint8_t *mbounce = NULL, *mbouncep = NULL; struct nvme_copy_ctx *ctx; uint16_t status; int i; trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format); + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && + ((prinfor & NVME_RW_PRINFO_PRACT) != (prinfow & NVME_RW_PRINFO_PRACT))) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (!(n->id_ctrl.ocfs & (1 << format))) { trace_pci_nvme_err_copy_invalid_format(format); return NVME_INVALID_FIELD | NVME_DNR; @@ -1900,39 +2667,41 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) return NVME_CMD_SIZE_LIMIT | NVME_DNR; } - range = g_new(NvmeCopySourceRange, nr); + ctx = g_new(struct nvme_copy_ctx, 1); + ctx->ranges = g_new(NvmeCopySourceRange, nr); - status = nvme_h2c(n, (uint8_t *)range, nr * sizeof(NvmeCopySourceRange), - req); + status = nvme_h2c(n, (uint8_t *)ctx->ranges, + nr * sizeof(NvmeCopySourceRange), req); if (status) { - return status; + goto out; } for (i = 0; i < nr; i++) { - uint64_t slba = le64_to_cpu(range[i].slba); - uint32_t _nlb = le16_to_cpu(range[i].nlb) + 1; + uint64_t slba = le64_to_cpu(ctx->ranges[i].slba); + uint32_t _nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1; if (_nlb > le16_to_cpu(ns->id_ns.mssrl)) { - return NVME_CMD_SIZE_LIMIT | NVME_DNR; + status = NVME_CMD_SIZE_LIMIT | NVME_DNR; + goto out; } status = nvme_check_bounds(ns, slba, _nlb); if (status) { trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze); - return status; + goto out; } if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { status = nvme_check_dulbe(ns, slba, _nlb); if (status) { - return status; + goto out; } } if (ns->params.zoned) { status = nvme_check_zone_read(ns, slba, _nlb); if (status) { - return status; + goto out; } } @@ -1940,25 +2709,28 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) } if (nlb > le32_to_cpu(ns->id_ns.mcl)) { - return NVME_CMD_SIZE_LIMIT | NVME_DNR; + status = NVME_CMD_SIZE_LIMIT | NVME_DNR; + goto out; } bounce = bouncep = g_malloc(nvme_l2b(ns, nlb)); + if (nvme_msize(ns)) { + mbounce = mbouncep = g_malloc(nvme_m2b(ns, nlb)); + } block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0, BLOCK_ACCT_READ); - ctx = g_new(struct nvme_copy_ctx, 1); - ctx->bounce = bounce; + ctx->mbounce = mbounce; ctx->nlb = nlb; ctx->copies = 1; req->opaque = ctx; for (i = 0; i < nr; i++) { - uint64_t slba = le64_to_cpu(range[i].slba); - uint32_t nlb = le16_to_cpu(range[i].nlb) + 1; + uint64_t slba = le64_to_cpu(ctx->ranges[i].slba); + uint32_t nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1; size_t len = nvme_l2b(ns, nlb); int64_t offset = nvme_l2b(ns, slba); @@ -1977,6 +2749,24 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) nvme_aio_copy_in_cb, in_ctx); bouncep += len; + + if (nvme_msize(ns)) { + len = nvme_m2b(ns, nlb); + offset = ns->mdata_offset + nvme_m2b(ns, slba); + + in_ctx = g_new(struct nvme_copy_in_ctx, 1); + in_ctx->req = req; + + qemu_iovec_init(&in_ctx->iov, 1); + qemu_iovec_add(&in_ctx->iov, mbouncep, len); + + ctx->copies++; + + blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0, + nvme_aio_copy_in_cb, in_ctx); + + mbouncep += len; + } } /* account for the 1-initialization */ @@ -1987,6 +2777,12 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) } return NVME_NO_COMPLETE; + +out: + g_free(ctx->ranges); + g_free(ctx); + + return status; } static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) @@ -1996,14 +2792,23 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) BlockBackend *blk = ns->blkconf.blk; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; - size_t len = nvme_l2b(ns, nlb); + uint16_t ctrl = le16_to_cpu(rw->control); + size_t data_len = nvme_l2b(ns, nlb); + size_t len = data_len; int64_t offset = nvme_l2b(ns, slba); - uint8_t *bounce = NULL; struct nvme_compare_ctx *ctx = NULL; uint16_t status; trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb); + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (ctrl & NVME_RW_PRINFO_PRACT)) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + + if (nvme_ns_ext(ns)) { + len += nvme_m2b(ns, nlb); + } + status = nvme_check_mdts(n, len); if (status) { return status; @@ -2022,18 +2827,22 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) } } - bounce = g_malloc(len); + status = nvme_map_dptr(n, &req->sg, len, &req->cmd); + if (status) { + return status; + } ctx = g_new(struct nvme_compare_ctx, 1); - ctx->bounce = bounce; + ctx->data.bounce = g_malloc(data_len); req->opaque = ctx; - qemu_iovec_init(&ctx->iov, 1); - qemu_iovec_add(&ctx->iov, bounce, len); + qemu_iovec_init(&ctx->data.iov, 1); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len); - block_acct_start(blk_get_stats(blk), &req->acct, len, BLOCK_ACCT_READ); - blk_aio_preadv(blk, offset, &ctx->iov, 0, nvme_compare_cb, req); + block_acct_start(blk_get_stats(blk), &req->acct, data_len, + BLOCK_ACCT_READ); + blk_aio_preadv(blk, offset, &ctx->data.iov, 0, nvme_compare_data_cb, req); return NVME_NO_COMPLETE; } @@ -2056,7 +2865,7 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0, BLOCK_ACCT_FLUSH); - req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req); + req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_misc_cb, req); return NVME_NO_COMPLETE; } @@ -2098,14 +2907,28 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) NvmeNamespace *ns = req->ns; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; + uint16_t ctrl = le16_to_cpu(rw->control); uint64_t data_size = nvme_l2b(ns, nlb); + uint64_t mapped_size = data_size; uint64_t data_offset; BlockBackend *blk = ns->blkconf.blk; uint16_t status; - trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, data_size, slba); + if (nvme_ns_ext(ns)) { + mapped_size += nvme_m2b(ns, nlb); - status = nvme_check_mdts(n, data_size); + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + bool pract = ctrl & NVME_RW_PRINFO_PRACT; + + if (pract && nvme_msize(ns) == 8) { + mapped_size = data_size; + } + } + } + + trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba); + + status = nvme_check_mdts(n, mapped_size); if (status) { goto invalid; } @@ -2124,11 +2947,6 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) } } - status = nvme_map_dptr(n, &req->sg, data_size, &req->cmd); - if (status) { - goto invalid; - } - if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { status = nvme_check_dulbe(ns, slba, nlb); if (status) { @@ -2136,6 +2954,15 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) } } + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + return nvme_dif_rw(n, req); + } + + status = nvme_map_data(n, nlb, req); + if (status) { + goto invalid; + } + data_offset = nvme_l2b(ns, slba); block_acct_start(blk_get_stats(blk), &req->acct, data_size, @@ -2155,18 +2982,32 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, NvmeNamespace *ns = req->ns; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; + uint16_t ctrl = le16_to_cpu(rw->control); uint64_t data_size = nvme_l2b(ns, nlb); + uint64_t mapped_size = data_size; uint64_t data_offset; NvmeZone *zone; NvmeZonedResult *res = (NvmeZonedResult *)&req->cqe; BlockBackend *blk = ns->blkconf.blk; uint16_t status; + if (nvme_ns_ext(ns)) { + mapped_size += nvme_m2b(ns, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + bool pract = ctrl & NVME_RW_PRINFO_PRACT; + + if (pract && nvme_msize(ns) == 8) { + mapped_size -= nvme_m2b(ns, nlb); + } + } + } + trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode), - nvme_nsid(ns), nlb, data_size, slba); + nvme_nsid(ns), nlb, mapped_size, slba); if (!wrz) { - status = nvme_check_mdts(n, data_size); + status = nvme_check_mdts(n, mapped_size); if (status) { goto invalid; } @@ -2182,19 +3023,47 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, zone = nvme_get_zone_by_slba(ns, slba); if (append) { + bool piremap = !!(ctrl & NVME_RW_PIREMAP); + if (unlikely(slba != zone->d.zslba)) { trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba); status = NVME_INVALID_FIELD; goto invalid; } - if (n->params.zasl && data_size > n->page_size << n->params.zasl) { + if (n->params.zasl && + data_size > (uint64_t)n->page_size << n->params.zasl) { trace_pci_nvme_err_zasl(data_size); return NVME_INVALID_FIELD | NVME_DNR; } slba = zone->w_ptr; + rw->slba = cpu_to_le64(slba); res->slba = cpu_to_le64(slba); + + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_1: + if (!piremap) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + + /* fallthrough */ + + case NVME_ID_NS_DPS_TYPE_2: + if (piremap) { + uint32_t reftag = le32_to_cpu(rw->reftag); + rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba)); + } + + break; + + case NVME_ID_NS_DPS_TYPE_3: + if (piremap) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + + break; + } } status = nvme_check_zone_write(ns, zone, slba, nlb); @@ -2212,8 +3081,12 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, data_offset = nvme_l2b(ns, slba); + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + return nvme_dif_rw(n, req); + } + if (!wrz) { - status = nvme_map_dptr(n, &req->sg, data_size, &req->cmd); + status = nvme_map_data(n, nlb, req); if (status) { goto invalid; } @@ -2226,6 +3099,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req); } + return NVME_NO_COMPLETE; invalid: @@ -2619,12 +3493,13 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) uint32_t zone_idx, zra, zrasf, partial; uint64_t max_zones, nr_zones = 0; uint16_t status; - uint64_t slba, capacity = nvme_ns_nlbas(ns); + uint64_t slba; NvmeZoneDescr *z; NvmeZone *zone; NvmeZoneReportHeader *header; void *buf, *buf_p; size_t zone_entry_sz; + int i; req->status = NVME_SUCCESS; @@ -2666,7 +3541,7 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) buf = g_malloc0(data_size); zone = &ns->zone_array[zone_idx]; - for (; slba < capacity; slba += ns->zone_size) { + for (i = zone_idx; i < ns->num_zones; i++) { if (partial && nr_zones >= max_zones) { break; } @@ -2718,6 +3593,7 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) { uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint16_t status; trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode)); @@ -2759,6 +3635,11 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_OPCODE | NVME_DNR; } + status = nvme_ns_status(req->ns); + if (unlikely(status)) { + return status; + } + switch (req->cmd.opcode) { case NVME_CMD_WRITE_ZEROES: return nvme_write_zeroes(n, req); @@ -2772,6 +3653,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_compare(n, req); case NVME_CMD_DSM: return nvme_dsm(n, req); + case NVME_CMD_VERIFY: + return nvme_verify(n, req); case NVME_CMD_COPY: return nvme_copy(n, req); case NVME_CMD_ZONE_MGMT_SEND: @@ -3288,12 +4171,14 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req) { NvmeIdentify *c = (NvmeIdentify *)&req->cmd; uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {}; + NvmeIdCtrlNvm *id_nvm = (NvmeIdCtrlNvm *)&id; trace_pci_nvme_identify_ctrl_csi(c->csi); switch (c->csi) { case NVME_CSI_NVM: - ((NvmeIdCtrlNvm *)&id)->dmrsl = cpu_to_le32(n->dmrsl); + id_nvm->vsl = n->params.vsl; + id_nvm->dmrsl = cpu_to_le32(n->dmrsl); break; case NVME_CSI_ZONED: @@ -4056,6 +4941,134 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_format_ns(NvmeCtrl *n, NvmeNamespace *ns, uint8_t lbaf, + uint8_t mset, uint8_t pi, uint8_t pil, + NvmeRequest *req) +{ + int64_t len, offset; + struct nvme_aio_format_ctx *ctx; + BlockBackend *blk = ns->blkconf.blk; + uint16_t ms; + uintptr_t *num_formats = (uintptr_t *)&req->opaque; + int *count; + + if (ns->params.zoned) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + + trace_pci_nvme_format_ns(nvme_cid(req), nvme_nsid(ns), lbaf, mset, pi, pil); + + if (lbaf > ns->id_ns.nlbaf) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + + ms = ns->id_ns.lbaf[lbaf].ms; + + if (pi && (ms < sizeof(NvmeDifTuple))) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + + if (pi && pi > NVME_ID_NS_DPS_TYPE_3) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + nvme_ns_drain(ns); + nvme_ns_shutdown(ns); + nvme_ns_cleanup(ns); + + ns->id_ns.dps = (pil << 3) | pi; + ns->id_ns.flbas = lbaf | (mset << 4); + + nvme_ns_init_format(ns); + + ns->status = NVME_FORMAT_IN_PROGRESS; + + len = ns->size; + offset = 0; + + count = g_new(int, 1); + *count = 1; + + (*num_formats)++; + + while (len) { + ctx = g_new(struct nvme_aio_format_ctx, 1); + ctx->req = req; + ctx->ns = ns; + ctx->count = count; + + size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len); + + (*count)++; + + blk_aio_pwrite_zeroes(blk, offset, bytes, BDRV_REQ_MAY_UNMAP, + nvme_aio_format_cb, ctx); + + offset += bytes; + len -= bytes; + + } + + (*count)--; + + return NVME_NO_COMPLETE; +} + +static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns; + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint8_t lbaf = dw10 & 0xf; + uint8_t mset = (dw10 >> 4) & 0x1; + uint8_t pi = (dw10 >> 5) & 0x7; + uint8_t pil = (dw10 >> 8) & 0x1; + uintptr_t *num_formats = (uintptr_t *)&req->opaque; + uint16_t status; + int i; + + trace_pci_nvme_format(nvme_cid(req), nsid, lbaf, mset, pi, pil); + + /* 1-initialize; see the comment in nvme_dsm */ + *num_formats = 1; + + if (nsid != NVME_NSID_BROADCAST) { + if (!nvme_nsid_valid(n, nsid)) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (!ns) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req); + if (status && status != NVME_NO_COMPLETE) { + req->status = status; + } + } else { + for (i = 1; i <= n->num_namespaces; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req); + if (status && status != NVME_NO_COMPLETE) { + req->status = status; + break; + } + } + } + + /* account for the 1-initialization */ + if (--(*num_formats)) { + return NVME_NO_COMPLETE; + } + + return req->status; +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, @@ -4094,6 +5107,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_aer(n, req); case NVME_ADM_CMD_NS_ATTACHMENT: return nvme_ns_attachment(n, req); + case NVME_ADM_CMD_FORMAT_NVM: + return nvme_format(n, req); default: assert(false); } @@ -4836,6 +5851,11 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) "than or equal to mdts (Maximum Data Transfer Size)"); return; } + + if (!n->params.vsl) { + error_setg(errp, "vsl must be non-zero"); + return; + } } static void nvme_init_state(NvmeCtrl *n) @@ -5065,7 +6085,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->mdts = n->params.mdts; id->ver = cpu_to_le32(NVME_SPEC_VER); - id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT); + id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT); id->cntrltype = 0x1; /* @@ -5236,6 +6256,7 @@ static Property nvme_props[] = { DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3), DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64), DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7), + DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7), DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false), DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false), DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0), diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 4955d649c7..5b0031b11d 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -2,6 +2,7 @@ #define HW_NVME_H #include "block/nvme.h" +#include "hw/pci/pci.h" #include "nvme-subsys.h" #include "nvme-ns.h" @@ -25,6 +26,7 @@ typedef struct NvmeParams { uint8_t aerl; uint32_t aer_max_queued; uint8_t mdts; + uint8_t vsl; bool use_intel_id; uint8_t zasl; bool legacy_cmb; @@ -62,6 +64,15 @@ typedef struct NvmeRequest { QTAILQ_ENTRY(NvmeRequest)entry; } NvmeRequest; +typedef struct NvmeBounceContext { + NvmeRequest *req; + + struct { + QEMUIOVector iov; + uint8_t *bounce; + } data, mdata; +} NvmeBounceContext; + static inline const char *nvme_adm_opc_str(uint8_t opc) { switch (opc) { @@ -75,6 +86,7 @@ static inline const char *nvme_adm_opc_str(uint8_t opc) case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; + case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; default: return "NVME_ADM_CMD_UNKNOWN"; } } @@ -88,6 +100,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc) case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; + case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; @@ -236,12 +249,18 @@ static inline bool nvme_ns_is_attached(NvmeCtrl *n, NvmeNamespace *ns) static inline void nvme_ns_attach(NvmeCtrl *n, NvmeNamespace *ns) { - n->namespaces[nvme_nsid(ns) - 1] = ns; + uint32_t nsid = nvme_nsid(ns); + assert(nsid && nsid <= NVME_MAX_NAMESPACES); + + n->namespaces[nsid - 1] = ns; } static inline void nvme_ns_detach(NvmeCtrl *n, NvmeNamespace *ns) { - n->namespaces[nvme_nsid(ns) - 1] = NULL; + uint32_t nsid = nvme_nsid(ns); + assert(nsid && nsid <= NVME_MAX_NAMESPACES); + + n->namespaces[nsid - 1] = NULL; } static inline NvmeCQueue *nvme_cq(NvmeRequest *req) @@ -258,6 +277,27 @@ static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) return sq->ctrl; } +static inline uint16_t nvme_cid(NvmeRequest *req) +{ + if (!req) { + return 0xffff; + } + + return le16_to_cpu(req->cqe.cid); +} + +typedef enum NvmeTxDirection { + NVME_TX_DIRECTION_TO_DEVICE = 0, + NVME_TX_DIRECTION_FROM_DEVICE = 1, +} NvmeTxDirection; + int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); +uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req); +uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req); +void nvme_rw_complete_cb(void *opaque, int ret); +uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, + NvmeCmd *cmd); #endif /* HW_NVME_H */ diff --git a/hw/block/trace-events b/hw/block/trace-events index 7d98297dab..22da06986d 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -63,19 +63,39 @@ pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" +pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" +pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" +pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8"" +pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16"" +pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" +pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16"" +pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8"" pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16"" pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32"" pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32"" pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32"" pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_compare_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64"" diff --git a/hw/core/Kconfig b/hw/core/Kconfig index fdf03514d7..9397503656 100644 --- a/hw/core/Kconfig +++ b/hw/core/Kconfig @@ -11,6 +11,11 @@ config GENERIC_LOADER bool default y +config GUEST_LOADER + bool + default y + depends on TCG + config OR_IRQ bool diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c index 6357be9c6b..58248cffa3 100644 --- a/hw/core/machine-hmp-cmds.c +++ b/hw/core/machine-hmp-cmds.c @@ -130,7 +130,7 @@ void hmp_info_numa(Monitor *mon, const QDict *qdict) { int i, nb_numa_nodes; NumaNodeMem *node_mem; - CpuInfoList *cpu_list, *cpu; + CpuInfoFastList *cpu_list, *cpu; MachineState *ms = MACHINE(qdev_get_machine()); nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0; @@ -139,7 +139,7 @@ void hmp_info_numa(Monitor *mon, const QDict *qdict) return; } - cpu_list = qmp_query_cpus(&error_abort); + cpu_list = qmp_query_cpus_fast(&error_abort); node_mem = g_new0(NumaNodeMem, nb_numa_nodes); query_numa_node_mem(node_mem, ms); @@ -148,7 +148,7 @@ void hmp_info_numa(Monitor *mon, const QDict *qdict) for (cpu = cpu_list; cpu; cpu = cpu->next) { if (cpu->value->has_props && cpu->value->props->has_node_id && cpu->value->props->node_id == i) { - monitor_printf(mon, " %" PRIi64, cpu->value->CPU); + monitor_printf(mon, " %" PRIi64, cpu->value->cpu_index); } } monitor_printf(mon, "\n"); @@ -157,6 +157,6 @@ void hmp_info_numa(Monitor *mon, const QDict *qdict) monitor_printf(mon, "node %d plugged: %" PRId64 " MB\n", i, node_mem[i].node_plugged_mem >> 20); } - qapi_free_CpuInfoList(cpu_list); + qapi_free_CpuInfoFastList(cpu_list); g_free(node_mem); } diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c index 44e979e503..68a942595a 100644 --- a/hw/core/machine-qmp-cmds.c +++ b/hw/core/machine-qmp-cmds.c @@ -24,125 +24,6 @@ #include "sysemu/runstate.h" #include "sysemu/sysemu.h" -CpuInfoList *qmp_query_cpus(Error **errp) -{ - MachineState *ms = MACHINE(qdev_get_machine()); - MachineClass *mc = MACHINE_GET_CLASS(ms); - CpuInfoList *head = NULL, **tail = &head; - CPUState *cpu; - - CPU_FOREACH(cpu) { - CpuInfo *value; -#if defined(TARGET_I386) - X86CPU *x86_cpu = X86_CPU(cpu); - CPUX86State *env = &x86_cpu->env; -#elif defined(TARGET_PPC) - PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu); - CPUPPCState *env = &ppc_cpu->env; -#elif defined(TARGET_SPARC) - SPARCCPU *sparc_cpu = SPARC_CPU(cpu); - CPUSPARCState *env = &sparc_cpu->env; -#elif defined(TARGET_RISCV) - RISCVCPU *riscv_cpu = RISCV_CPU(cpu); - CPURISCVState *env = &riscv_cpu->env; -#elif defined(TARGET_MIPS) - MIPSCPU *mips_cpu = MIPS_CPU(cpu); - CPUMIPSState *env = &mips_cpu->env; -#elif defined(TARGET_TRICORE) - TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu); - CPUTriCoreState *env = &tricore_cpu->env; -#elif defined(TARGET_S390X) - S390CPU *s390_cpu = S390_CPU(cpu); - CPUS390XState *env = &s390_cpu->env; -#endif - - cpu_synchronize_state(cpu); - - value = g_malloc0(sizeof(*value)); - value->CPU = cpu->cpu_index; - value->current = (cpu == first_cpu); - value->halted = cpu->halted; - value->qom_path = object_get_canonical_path(OBJECT(cpu)); - value->thread_id = cpu->thread_id; -#if defined(TARGET_I386) - value->arch = CPU_INFO_ARCH_X86; - value->u.x86.pc = env->eip + env->segs[R_CS].base; -#elif defined(TARGET_PPC) - value->arch = CPU_INFO_ARCH_PPC; - value->u.ppc.nip = env->nip; -#elif defined(TARGET_SPARC) - value->arch = CPU_INFO_ARCH_SPARC; - value->u.q_sparc.pc = env->pc; - value->u.q_sparc.npc = env->npc; -#elif defined(TARGET_MIPS) - value->arch = CPU_INFO_ARCH_MIPS; - value->u.q_mips.PC = env->active_tc.PC; -#elif defined(TARGET_TRICORE) - value->arch = CPU_INFO_ARCH_TRICORE; - value->u.tricore.PC = env->PC; -#elif defined(TARGET_S390X) - value->arch = CPU_INFO_ARCH_S390; - value->u.s390.cpu_state = env->cpu_state; -#elif defined(TARGET_RISCV) - value->arch = CPU_INFO_ARCH_RISCV; - value->u.riscv.pc = env->pc; -#else - value->arch = CPU_INFO_ARCH_OTHER; -#endif - value->has_props = !!mc->cpu_index_to_instance_props; - if (value->has_props) { - CpuInstanceProperties *props; - props = g_malloc0(sizeof(*props)); - *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index); - value->props = props; - } - - QAPI_LIST_APPEND(tail, value); - } - - return head; -} - -static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target) -{ - /* - * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the - * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script. - */ - switch (target) { - case SYS_EMU_TARGET_I386: - case SYS_EMU_TARGET_X86_64: - return CPU_INFO_ARCH_X86; - - case SYS_EMU_TARGET_PPC: - case SYS_EMU_TARGET_PPC64: - return CPU_INFO_ARCH_PPC; - - case SYS_EMU_TARGET_SPARC: - case SYS_EMU_TARGET_SPARC64: - return CPU_INFO_ARCH_SPARC; - - case SYS_EMU_TARGET_MIPS: - case SYS_EMU_TARGET_MIPSEL: - case SYS_EMU_TARGET_MIPS64: - case SYS_EMU_TARGET_MIPS64EL: - return CPU_INFO_ARCH_MIPS; - - case SYS_EMU_TARGET_TRICORE: - return CPU_INFO_ARCH_TRICORE; - - case SYS_EMU_TARGET_S390X: - return CPU_INFO_ARCH_S390; - - case SYS_EMU_TARGET_RISCV32: - case SYS_EMU_TARGET_RISCV64: - return CPU_INFO_ARCH_RISCV; - - default: - return CPU_INFO_ARCH_OTHER; - } -} - static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu) { #ifdef TARGET_S390X @@ -183,7 +64,6 @@ CpuInfoFastList *qmp_query_cpus_fast(Error **errp) value->props = props; } - value->arch = sysemu_target_to_cpuinfo_arch(target); value->target = target; if (target == SYS_EMU_TARGET_S390X) { cpustate_to_cpuinfo_s390(&value->u.s390x, cpu); diff --git a/hw/core/meson.build b/hw/core/meson.build index 9cd72edf51..59f1605bb0 100644 --- a/hw/core/meson.build +++ b/hw/core/meson.build @@ -16,6 +16,7 @@ hwcore_files = files( common_ss.add(files('cpu.c')) common_ss.add(when: 'CONFIG_FITLOADER', if_true: files('loader-fit.c')) common_ss.add(when: 'CONFIG_GENERIC_LOADER', if_true: files('generic-loader.c')) +common_ss.add(when: ['CONFIG_GUEST_LOADER', fdt], if_true: files('guest-loader.c')) common_ss.add(when: 'CONFIG_OR_IRQ', if_true: files('or-irq.c')) common_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('platform-bus.c')) common_ss.add(when: 'CONFIG_PTIMER', if_true: files('ptimer.c')) @@ -37,8 +38,6 @@ softmmu_ss.add(files( 'clock-vmstate.c', )) -softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('guest-loader.c')) - specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: files( 'machine-qmp-cmds.c', 'numa.c', diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 410db9ef96..35e1770950 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -338,10 +338,8 @@ GlobalProperty pc_compat_1_4[] = { PC_CPU_MODEL_IDS("1.4.0") { "scsi-hd", "discard_granularity", "0" }, { "scsi-cd", "discard_granularity", "0" }, - { "scsi-disk", "discard_granularity", "0" }, { "ide-hd", "discard_granularity", "0" }, { "ide-cd", "discard_granularity", "0" }, - { "ide-drive", "discard_granularity", "0" }, { "virtio-blk-pci", "discard_granularity", "0" }, /* DEV_NVECTORS_UNSPECIFIED as a uint32_t string: */ { "virtio-serial-pci", "vectors", "0xFFFFFFFF" }, diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index 8cd19fa5e9..e70ebc83a0 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -283,20 +283,6 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) ide_dev_initfn(dev, IDE_CD, errp); } -static void ide_drive_realize(IDEDevice *dev, Error **errp) -{ - DriveInfo *dinfo = NULL; - - warn_report("'ide-drive' is deprecated, " - "please use 'ide-hd' or 'ide-cd' instead"); - - if (dev->conf.blk) { - dinfo = blk_legacy_dinfo(dev->conf.blk); - } - - ide_dev_initfn(dev, dinfo && dinfo->media_cd ? IDE_CD : IDE_HD, errp); -} - #define DEFINE_IDE_DEV_PROPERTIES() \ DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ DEFINE_BLOCK_ERROR_PROPERTIES(IDEDrive, dev.conf), \ @@ -355,29 +341,6 @@ static const TypeInfo ide_cd_info = { .class_init = ide_cd_class_init, }; -static Property ide_drive_properties[] = { - DEFINE_IDE_DEV_PROPERTIES(), - DEFINE_PROP_END_OF_LIST(), -}; - -static void ide_drive_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - IDEDeviceClass *k = IDE_DEVICE_CLASS(klass); - - k->realize = ide_drive_realize; - dc->fw_name = "drive"; - dc->desc = "virtual IDE disk or CD-ROM (legacy)"; - device_class_set_props(dc, ide_drive_properties); -} - -static const TypeInfo ide_drive_info = { - .name = "ide-drive", - .parent = TYPE_IDE_DEVICE, - .instance_size = sizeof(IDEDrive), - .class_init = ide_drive_class_init, -}; - static void ide_device_class_init(ObjectClass *klass, void *data) { DeviceClass *k = DEVICE_CLASS(klass); @@ -402,7 +365,6 @@ static void ide_register_types(void) type_register_static(&ide_bus_info); type_register_static(&ide_hd_info); type_register_static(&ide_cd_info); - type_register_static(&ide_drive_info); type_register_static(&ide_device_type_info); } diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c index ca2f939dd5..ff0156db76 100644 --- a/hw/misc/mac_via.c +++ b/hw/misc/mac_via.c @@ -279,6 +279,12 @@ #define VIA_TIMER_FREQ (783360) #define VIA_ADB_POLL_FREQ 50 /* XXX: not real */ +/* + * Guide to the Macintosh Family Hardware ch. 12 "Displays" p. 401 gives the + * precise 60Hz interrupt frequency as ~60.15Hz with a period of 16625.8 us + */ +#define VIA_60HZ_TIMER_PERIOD_NS 16625800 + /* VIA returns time offset from Jan 1, 1904, not 1970 */ #define RTC_OFFSET 2082844800 @@ -297,44 +303,32 @@ enum { REG_EMPTY = 0xff, }; -static void via1_VBL_update(MOS6522Q800VIA1State *v1s) +static void via1_sixty_hz_update(MOS6522Q800VIA1State *v1s) { - MOS6522State *s = MOS6522(v1s); - /* 60 Hz irq */ - v1s->next_VBL = (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 16630) / - 16630 * 16630; - - if (s->ier & VIA1_IRQ_VBLANK) { - timer_mod(v1s->VBL_timer, v1s->next_VBL); - } else { - timer_del(v1s->VBL_timer); - } + v1s->next_sixty_hz = (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + VIA_60HZ_TIMER_PERIOD_NS) / + VIA_60HZ_TIMER_PERIOD_NS * VIA_60HZ_TIMER_PERIOD_NS; + timer_mod(v1s->sixty_hz_timer, v1s->next_sixty_hz); } static void via1_one_second_update(MOS6522Q800VIA1State *v1s) { - MOS6522State *s = MOS6522(v1s); - v1s->next_second = (qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000) / 1000 * 1000; - if (s->ier & VIA1_IRQ_ONE_SECOND) { - timer_mod(v1s->one_second_timer, v1s->next_second); - } else { - timer_del(v1s->one_second_timer); - } + timer_mod(v1s->one_second_timer, v1s->next_second); } -static void via1_VBL(void *opaque) +static void via1_sixty_hz(void *opaque) { MOS6522Q800VIA1State *v1s = opaque; MOS6522State *s = MOS6522(v1s); MOS6522DeviceClass *mdc = MOS6522_GET_CLASS(s); - s->ifr |= VIA1_IRQ_VBLANK; + s->ifr |= VIA1_IRQ_60HZ; mdc->update_irq(s); - via1_VBL_update(v1s); + via1_sixty_hz_update(v1s); } static void via1_one_second(void *opaque) @@ -609,7 +603,6 @@ static void adb_via_poll(void *opaque) uint8_t obuf[9]; uint8_t *data = &s->sr; int olen; - uint16_t pending; /* * Setting vADBInt below indicates that an autopoll reply has been @@ -618,36 +611,36 @@ static void adb_via_poll(void *opaque) */ adb_autopoll_block(adb_bus); - m->adb_data_in_index = 0; - m->adb_data_out_index = 0; - olen = adb_poll(adb_bus, obuf, adb_bus->autopoll_mask); - - if (olen > 0) { - /* Autopoll response */ - *data = obuf[0]; - olen--; - memcpy(m->adb_data_in, &obuf[1], olen); - m->adb_data_in_size = olen; + if (m->adb_data_in_size > 0 && m->adb_data_in_index == 0) { + /* + * For older Linux kernels that switch to IDLE mode after sending the + * ADB command, detect if there is an existing response and return that + * as a a "fake" autopoll reply or bus timeout accordingly + */ + *data = m->adb_data_out[0]; + olen = m->adb_data_in_size; s->b &= ~VIA1B_vADBInt; qemu_irq_raise(m->adb_data_ready); - } else if (olen < 0) { - /* Bus timeout (device does not exist) */ - *data = 0xff; - s->b |= VIA1B_vADBInt; - adb_autopoll_unblock(adb_bus); } else { - pending = adb_bus->pending & ~(1 << (m->adb_autopoll_cmd >> 4)); + /* + * Otherwise poll as normal + */ + m->adb_data_in_index = 0; + m->adb_data_out_index = 0; + olen = adb_poll(adb_bus, obuf, adb_bus->autopoll_mask); + + if (olen > 0) { + /* Autopoll response */ + *data = obuf[0]; + olen--; + memcpy(m->adb_data_in, &obuf[1], olen); + m->adb_data_in_size = olen; - if (pending) { - /* - * Bus timeout (device exists but another device has data). Block - * autopoll so the OS can read out the first EVEN and first ODD - * byte to determine bus timeout and SRQ status - */ - *data = m->adb_autopoll_cmd; s->b &= ~VIA1B_vADBInt; - + qemu_irq_raise(m->adb_data_ready); + } else { + *data = m->adb_autopoll_cmd; obuf[0] = 0xff; obuf[1] = 0xff; olen = 2; @@ -655,12 +648,8 @@ static void adb_via_poll(void *opaque) memcpy(m->adb_data_in, obuf, olen); m->adb_data_in_size = olen; + s->b &= ~VIA1B_vADBInt; qemu_irq_raise(m->adb_data_ready); - } else { - /* Bus timeout (device exists but no other device has data) */ - *data = 0; - s->b |= VIA1B_vADBInt; - adb_autopoll_unblock(adb_bus); } } @@ -783,27 +772,8 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) return; case ADB_STATE_IDLE: - /* - * Since adb_request() will have already consumed the data from the - * device, we must detect this extra state change and re-inject the - * reponse as either a "fake" autopoll reply or bus timeout - * accordingly - */ - if (s->adb_data_in_index == 0) { - if (adb_bus->status & ADB_STATUS_BUSTIMEOUT) { - *data = 0xff; - ms->b |= VIA1B_vADBInt; - qemu_irq_raise(s->adb_data_ready); - } else if (s->adb_data_in_size > 0) { - adb_bus->status = ADB_STATUS_POLLREPLY; - *data = s->adb_autopoll_cmd; - ms->b &= ~VIA1B_vADBInt; - qemu_irq_raise(s->adb_data_ready); - } - } else { - ms->b |= VIA1B_vADBInt; - adb_autopoll_unblock(adb_bus); - } + ms->b |= VIA1B_vADBInt; + adb_autopoll_unblock(adb_bus); trace_via1_adb_receive("IDLE", *data, (ms->b & VIA1B_vADBInt) ? "+" : "-", adb_bus->status, @@ -816,33 +786,37 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) switch (s->adb_data_in_index) { case 0: /* First EVEN byte: vADBInt indicates bus timeout */ - trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD", - *data, (ms->b & VIA1B_vADBInt) ? "+" : "-", - adb_bus->status, s->adb_data_in_index, - s->adb_data_in_size); - - *data = s->adb_data_in[s->adb_data_in_index++]; + *data = s->adb_data_in[s->adb_data_in_index]; if (adb_bus->status & ADB_STATUS_BUSTIMEOUT) { ms->b &= ~VIA1B_vADBInt; } else { ms->b |= VIA1B_vADBInt; } - break; - case 1: - /* First ODD byte: vADBInt indicates SRQ */ trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD", *data, (ms->b & VIA1B_vADBInt) ? "+" : "-", adb_bus->status, s->adb_data_in_index, s->adb_data_in_size); - *data = s->adb_data_in[s->adb_data_in_index++]; + s->adb_data_in_index++; + break; + + case 1: + /* First ODD byte: vADBInt indicates SRQ */ + *data = s->adb_data_in[s->adb_data_in_index]; pending = adb_bus->pending & ~(1 << (s->adb_autopoll_cmd >> 4)); if (pending) { ms->b &= ~VIA1B_vADBInt; } else { ms->b |= VIA1B_vADBInt; } + + trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD", + *data, (ms->b & VIA1B_vADBInt) ? "+" : "-", + adb_bus->status, s->adb_data_in_index, + s->adb_data_in_size); + + s->adb_data_in_index++; break; default: @@ -852,14 +826,9 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) * end of the poll reply, so provide these extra bytes below to * keep it happy */ - trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD", - *data, (ms->b & VIA1B_vADBInt) ? "+" : "-", - adb_bus->status, s->adb_data_in_index, - s->adb_data_in_size); - if (s->adb_data_in_index < s->adb_data_in_size) { /* Next data byte */ - *data = s->adb_data_in[s->adb_data_in_index++]; + *data = s->adb_data_in[s->adb_data_in_index]; ms->b |= VIA1B_vADBInt; } else if (s->adb_data_in_index == s->adb_data_in_size) { if (adb_bus->status & ADB_STATUS_BUSTIMEOUT) { @@ -869,7 +838,6 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) /* Return 0x0 after reply */ *data = 0; } - s->adb_data_in_index++; ms->b &= ~VIA1B_vADBInt; } else { /* Bus timeout (no more data) */ @@ -878,6 +846,15 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) adb_bus->status = 0; adb_autopoll_unblock(adb_bus); } + + trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD", + *data, (ms->b & VIA1B_vADBInt) ? "+" : "-", + adb_bus->status, s->adb_data_in_index, + s->adb_data_in_size); + + if (s->adb_data_in_index <= s->adb_data_in_size) { + s->adb_data_in_index++; + } break; } @@ -910,21 +887,6 @@ static uint64_t mos6522_q800_via1_read(void *opaque, hwaddr addr, unsigned size) { MOS6522Q800VIA1State *s = MOS6522_Q800_VIA1(opaque); MOS6522State *ms = MOS6522(s); - int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); - - /* - * If IRQs are disabled, timers are disabled, but we need to update - * VIA1_IRQ_VBLANK and VIA1_IRQ_ONE_SECOND bits in the IFR - */ - - if (now >= s->next_VBL) { - ms->ifr |= VIA1_IRQ_VBLANK; - via1_VBL_update(s); - } - if (now >= s->next_second) { - ms->ifr |= VIA1_IRQ_ONE_SECOND; - via1_one_second_update(s); - } addr = (addr >> 9) & 0xf; return mos6522_read(ms, addr, size); @@ -948,9 +910,6 @@ static void mos6522_q800_via1_write(void *opaque, hwaddr addr, uint64_t val, v1s->last_b = ms->b; break; } - - via1_one_second_update(v1s); - via1_VBL_update(v1s); } static const MemoryRegionOps mos6522_q800_via1_ops = { @@ -959,7 +918,7 @@ static const MemoryRegionOps mos6522_q800_via1_ops = { .endianness = DEVICE_BIG_ENDIAN, .valid = { .min_access_size = 1, - .max_access_size = 1, + .max_access_size = 4, }, }; @@ -988,23 +947,17 @@ static const MemoryRegionOps mos6522_q800_via2_ops = { .endianness = DEVICE_BIG_ENDIAN, .valid = { .min_access_size = 1, - .max_access_size = 1, + .max_access_size = 4, }, }; static void mac_via_reset(DeviceState *dev) { MacVIAState *m = MAC_VIA(dev); - MOS6522Q800VIA1State *v1s = &m->mos6522_via1; ADBBusState *adb_bus = &m->adb_bus; adb_set_autopoll_enabled(adb_bus, true); - timer_del(v1s->VBL_timer); - v1s->next_VBL = 0; - timer_del(v1s->one_second_timer); - v1s->next_second = 0; - m->cmd = REG_EMPTY; m->alt = REG_EMPTY; } @@ -1043,8 +996,11 @@ static void mac_via_realize(DeviceState *dev, Error **errp) m->mos6522_via1.one_second_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, via1_one_second, &m->mos6522_via1); - m->mos6522_via1.VBL_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, via1_VBL, - &m->mos6522_via1); + via1_one_second_update(&m->mos6522_via1); + m->mos6522_via1.sixty_hz_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + via1_sixty_hz, + &m->mos6522_via1); + via1_sixty_hz_update(&m->mos6522_via1); qemu_get_timedate(&tm, 0); m->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET; @@ -1133,8 +1089,8 @@ static const VMStateDescription vmstate_mac_via = { VMSTATE_BUFFER(mos6522_via1.PRAM, MacVIAState), VMSTATE_TIMER_PTR(mos6522_via1.one_second_timer, MacVIAState), VMSTATE_INT64(mos6522_via1.next_second, MacVIAState), - VMSTATE_TIMER_PTR(mos6522_via1.VBL_timer, MacVIAState), - VMSTATE_INT64(mos6522_via1.next_VBL, MacVIAState), + VMSTATE_TIMER_PTR(mos6522_via1.sixty_hz_timer, MacVIAState), + VMSTATE_INT64(mos6522_via1.next_sixty_hz, MacVIAState), VMSTATE_STRUCT(mos6522_via2.parent_obj, MacVIAState, 0, vmstate_mos6522, MOS6522State), /* RTC */ diff --git a/hw/misc/trace-events b/hw/misc/trace-events index 3d44978cca..d0a89eb059 100644 --- a/hw/misc/trace-events +++ b/hw/misc/trace-events @@ -233,8 +233,8 @@ via1_rtc_cmd_test_write(int value) "value=0x%02x" via1_rtc_cmd_wprotect_write(int value) "value=0x%02x" via1_rtc_cmd_pram_read(int addr, int value) "addr=%u value=0x%02x" via1_rtc_cmd_pram_write(int addr, int value) "addr=%u value=0x%02x" -via1_rtc_cmd_pram_sect_read(int sector, int offset, int addr, int value) "sector=%u offset=%u addr=%d value=0x%02x" -via1_rtc_cmd_pram_sect_write(int sector, int offset, int addr, int value) "sector=%u offset=%u addr=%d value=0x%02x" +via1_rtc_cmd_pram_sect_read(int sector, int offset, int addr, int value) "sector=%u offset=%u addr=0x%x value=0x%02x" +via1_rtc_cmd_pram_sect_write(int sector, int offset, int addr, int value) "sector=%u offset=%u addr=0x%x value=0x%02x" via1_adb_send(const char *state, uint8_t data, const char *vadbint) "state %s data=0x%02x vADBInt=%s" via1_adb_receive(const char *state, uint8_t data, const char *vadbint, int status, int index, int size) "state %s data=0x%02x vADBInt=%s status=0x%x index=%d size=%d" via1_adb_poll(uint8_t data, const char *vadbint, int status, int index, int size) "data=0x%02x vADBInt=%s status=0x%x index=%d size=%d" diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index e991db4add..2175962846 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -539,8 +539,6 @@ static char *core99_fw_dev_path(FWPathProvider *p, BusState *bus, DeviceState *dev) { PCIDevice *pci; - IDEBus *ide_bus; - IDEState *ide_s; MACIOIDEState *macio_ide; if (!strcmp(object_get_typename(OBJECT(dev)), "macio-newworld")) { @@ -553,17 +551,6 @@ static char *core99_fw_dev_path(FWPathProvider *p, BusState *bus, return g_strdup_printf("ata-3@%x", macio_ide->addr); } - if (!strcmp(object_get_typename(OBJECT(dev)), "ide-drive")) { - ide_bus = IDE_BUS(qdev_get_parent_bus(dev)); - ide_s = idebus_active_if(ide_bus); - - if (ide_s->drive_kind == IDE_CD) { - return g_strdup("cdrom"); - } - - return g_strdup("disk"); - } - if (!strcmp(object_get_typename(OBJECT(dev)), "ide-hd")) { return g_strdup("disk"); } diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 44ee99be88..963d247f5f 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -384,8 +384,6 @@ static char *heathrow_fw_dev_path(FWPathProvider *p, BusState *bus, DeviceState *dev) { PCIDevice *pci; - IDEBus *ide_bus; - IDEState *ide_s; MACIOIDEState *macio_ide; if (!strcmp(object_get_typename(OBJECT(dev)), "macio-oldworld")) { @@ -398,17 +396,6 @@ static char *heathrow_fw_dev_path(FWPathProvider *p, BusState *bus, return g_strdup_printf("ata-3@%x", macio_ide->addr); } - if (!strcmp(object_get_typename(OBJECT(dev)), "ide-drive")) { - ide_bus = IDE_BUS(qdev_get_parent_bus(dev)); - ide_s = idebus_active_if(ide_bus); - - if (ide_s->drive_kind == IDE_CD) { - return g_strdup("cdrom"); - } - - return g_strdup("disk"); - } - if (!strcmp(object_get_typename(OBJECT(dev)), "ide-hd")) { return g_strdup("disk"); } diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 2eaea7e637..3580e7ee61 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2476,28 +2476,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) aio_context_release(ctx); } -static void scsi_disk_realize(SCSIDevice *dev, Error **errp) -{ - DriveInfo *dinfo; - Error *local_err = NULL; - - warn_report("'scsi-disk' is deprecated, " - "please use 'scsi-hd' or 'scsi-cd' instead"); - - if (!dev->conf.blk) { - scsi_realize(dev, &local_err); - assert(local_err); - error_propagate(errp, local_err); - return; - } - - dinfo = blk_legacy_dinfo(dev->conf.blk); - if (dinfo && dinfo->media_cd) { - scsi_cd_realize(dev, errp); - } else { - scsi_hd_realize(dev, errp); - } -} static const SCSIReqOps scsi_disk_emulate_reqops = { .size = sizeof(SCSIDiskReq), @@ -3161,45 +3139,6 @@ static const TypeInfo scsi_block_info = { }; #endif -static Property scsi_disk_properties[] = { - DEFINE_SCSI_DISK_PROPERTIES(), - DEFINE_PROP_BIT("removable", SCSIDiskState, features, - SCSI_DISK_F_REMOVABLE, false), - DEFINE_PROP_BIT("dpofua", SCSIDiskState, features, - SCSI_DISK_F_DPOFUA, false), - DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0), - DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0), - DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0), - DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size, - DEFAULT_MAX_UNMAP_SIZE), - DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size, - DEFAULT_MAX_IO_SIZE), - DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version, - 5), - DEFINE_PROP_END_OF_LIST(), -}; - -static void scsi_disk_class_initfn(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass); - - sc->realize = scsi_disk_realize; - sc->alloc_req = scsi_new_request; - sc->unit_attention_reported = scsi_disk_unit_attention_reported; - dc->fw_name = "disk"; - dc->desc = "virtual SCSI disk or CD-ROM (legacy)"; - dc->reset = scsi_disk_reset; - device_class_set_props(dc, scsi_disk_properties); - dc->vmsd = &vmstate_scsi_disk_state; -} - -static const TypeInfo scsi_disk_info = { - .name = "scsi-disk", - .parent = TYPE_SCSI_DISK_BASE, - .class_init = scsi_disk_class_initfn, -}; - static void scsi_disk_register_types(void) { type_register_static(&scsi_disk_base_info); @@ -3208,7 +3147,6 @@ static void scsi_disk_register_types(void) #ifdef __linux__ type_register_static(&scsi_block_info); #endif - type_register_static(&scsi_disk_info); } type_init(scsi_disk_register_types) diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 0fa13a7330..cda7df36e3 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -749,9 +749,6 @@ static char *sun4u_fw_dev_path(FWPathProvider *p, BusState *bus, DeviceState *dev) { PCIDevice *pci; - IDEBus *ide_bus; - IDEState *ide_s; - int bus_id; if (!strcmp(object_get_typename(OBJECT(dev)), "pbm-bridge")) { pci = PCI_DEVICE(dev); @@ -764,18 +761,6 @@ static char *sun4u_fw_dev_path(FWPathProvider *p, BusState *bus, } } - if (!strcmp(object_get_typename(OBJECT(dev)), "ide-drive")) { - ide_bus = IDE_BUS(qdev_get_parent_bus(dev)); - ide_s = idebus_active_if(ide_bus); - bus_id = ide_bus->bus_id; - - if (ide_s->drive_kind == IDE_CD) { - return g_strdup_printf("ide@%x/cdrom", bus_id); - } - - return g_strdup_printf("ide@%x/disk", bus_id); - } - if (!strcmp(object_get_typename(OBJECT(dev)), "ide-hd")) { return g_strdup("disk"); } |