diff options
author | Andrzej Jakowski <andrzej.jakowski@linux.intel.com> | 2020-03-30 09:46:56 -0700 |
---|---|---|
committer | Kevin Wolf <kwolf@redhat.com> | 2020-04-30 17:51:07 +0200 |
commit | 6cf94132293adb5c76d336ab7ff5924afe2cb147 (patch) | |
tree | 8529eb87880bd271e5335e7aa26bb73e32561c1a /hw/block | |
parent | eb8a0cf3ba26611f3981f8f45ac6a868975a68cc (diff) |
nvme: introduce PMR support from NVMe 1.4 spec
This patch introduces support for PMR that has been defined as part of NVMe 1.4
spec. User can now specify a pmrdev option that should point to HostMemoryBackend.
pmrdev memory region will subsequently be exposed as PCI BAR 2 in emulated NVMe
device. Guest OS can perform mmio read and writes to the PMR region that will stay
persistent across system reboot.
Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200330164656.9348-1-andrzej.jakowski@linux.intel.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Diffstat (limited to 'hw/block')
-rw-r--r-- | hw/block/Makefile.objs | 2 | ||||
-rw-r--r-- | hw/block/nvme.c | 109 | ||||
-rw-r--r-- | hw/block/nvme.h | 2 | ||||
-rw-r--r-- | hw/block/trace-events | 4 |
4 files changed, 116 insertions, 1 deletions
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs index 4b4a2b338d..47960b5f0d 100644 --- a/hw/block/Makefile.objs +++ b/hw/block/Makefile.objs @@ -7,12 +7,12 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o common-obj-$(CONFIG_XEN) += xen-block.o common-obj-$(CONFIG_ECC) += ecc.o common-obj-$(CONFIG_ONENAND) += onenand.o -common-obj-$(CONFIG_NVME_PCI) += nvme.o common-obj-$(CONFIG_SWIM) += swim.o common-obj-$(CONFIG_SH4) += tc58128.o obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o +obj-$(CONFIG_NVME_PCI) += nvme.o obj-y += dataplane/ diff --git a/hw/block/nvme.c b/hw/block/nvme.c index d28335cbf3..9b453423cf 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -19,10 +19,19 @@ * -drive file=<file>,if=none,id=<drive_id> * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \ * cmb_size_mb=<cmb_size_mb[optional]>, \ + * [pmrdev=<mem_backend_file_id>,] \ * num_queues=<N[optional]> * * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. + * + * cmb_size_mb= and pmrdev= options are mutually exclusive due to limitation + * in available BAR's. cmb_size_mb= will take precedence over pmrdev= when + * both provided. + * Enabling pmr emulation can be achieved by pointing to memory-backend-file. + * For example: + * -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \ + * size=<size> .... -device nvme,...,pmrdev=<mem_id> */ #include "qemu/osdep.h" @@ -35,7 +44,9 @@ #include "sysemu/sysemu.h" #include "qapi/error.h" #include "qapi/visitor.h" +#include "sysemu/hostmem.h" #include "sysemu/block-backend.h" +#include "exec/ram_addr.h" #include "qemu/log.h" #include "qemu/module.h" @@ -1141,6 +1152,26 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, "invalid write to read only CMBSZ, ignored"); return; + case 0xE00: /* PMRCAP */ + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly, + "invalid write to PMRCAP register, ignored"); + return; + case 0xE04: /* TODO PMRCTL */ + break; + case 0xE08: /* PMRSTS */ + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly, + "invalid write to PMRSTS register, ignored"); + return; + case 0xE0C: /* PMREBS */ + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly, + "invalid write to PMREBS register, ignored"); + return; + case 0xE10: /* PMRSWTP */ + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly, + "invalid write to PMRSWTP register, ignored"); + return; + case 0xE14: /* TODO PMRMSC */ + break; default: NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, "invalid MMIO write," @@ -1169,6 +1200,16 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) } if (addr < sizeof(n->bar)) { + /* + * When PMRWBM bit 1 is set then read from + * from PMRSTS should ensure prior writes + * made it to persistent media + */ + if (addr == 0xE08 && + (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { + qemu_ram_writeback(n->pmrdev->mr.ram_block, + 0, n->pmrdev->size); + } memcpy(&val, ptr + addr, size); } else { NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, @@ -1332,6 +1373,23 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) error_setg(errp, "serial property not set"); return; } + + if (!n->cmb_size_mb && n->pmrdev) { + if (host_memory_backend_is_mapped(n->pmrdev)) { + char *path = object_get_canonical_path_component(OBJECT(n->pmrdev)); + error_setg(errp, "can't use already busy memdev: %s", path); + g_free(path); + return; + } + + if (!is_power_of_2(n->pmrdev->size)) { + error_setg(errp, "pmr backend size needs to be power of 2 in size"); + return; + } + + host_memory_backend_set_mapped(n->pmrdev, true); + } + blkconf_blocksizes(&n->conf); if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), false, errp)) { @@ -1415,6 +1473,51 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); + } else if (n->pmrdev) { + /* Controller Capabilities register */ + NVME_CAP_SET_PMRS(n->bar.cap, 1); + + /* PMR Capabities register */ + n->bar.pmrcap = 0; + NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2); + NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0); + /* Turn on bit 1 support */ + NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); + NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0); + + /* PMR Control register */ + n->bar.pmrctl = 0; + NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0); + + /* PMR Status register */ + n->bar.pmrsts = 0; + NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0); + + /* PMR Elasticity Buffer Size register */ + n->bar.pmrebs = 0; + NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0); + NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0); + NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0); + + /* PMR Sustained Write Throughput register */ + n->bar.pmrswtp = 0; + NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0); + NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0); + + /* PMR Memory Space Control register */ + n->bar.pmrmsc = 0; + NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0); + NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0); + + pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr); } for (i = 0; i < n->num_namespaces; i++) { @@ -1445,11 +1548,17 @@ static void nvme_exit(PCIDevice *pci_dev) if (n->cmb_size_mb) { g_free(n->cmbuf); } + + if (n->pmrdev) { + host_memory_backend_set_mapped(n->pmrdev, false); + } msix_uninit_exclusive_bar(pci_dev); } static Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), + DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND, + HostMemoryBackend *), DEFINE_PROP_STRING("serial", NvmeCtrl, serial), DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 557194ee19..6520a9f0be 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -83,6 +83,8 @@ typedef struct NvmeCtrl { uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ char *serial; + HostMemoryBackend *pmrdev; + NvmeNamespace *namespaces; NvmeSQueue **sq; NvmeCQueue **cq; diff --git a/hw/block/trace-events b/hw/block/trace-events index bf6d11b58b..aca54bda14 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -110,6 +110,10 @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" +nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" +nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" +nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" +nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" |