diff options
-rw-r--r-- | hw/nvme/ctrl.c | 141 | ||||
-rw-r--r-- | hw/nvme/nvme.h | 4 | ||||
-rw-r--r-- | include/block/nvme.h | 5 |
3 files changed, 143 insertions, 7 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 3315e5c3de..3728813e90 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -36,6 +36,10 @@ * zoned.zasl=<N[optional]>, \ * zoned.auto_transition=<on|off[optional]>, \ * sriov_max_vfs=<N[optional]> \ + * sriov_vq_flexible=<N[optional]> \ + * sriov_vi_flexible=<N[optional]> \ + * sriov_max_vi_per_vf=<N[optional]> \ + * sriov_max_vq_per_vf=<N[optional]> \ * subsys=<subsys_id> * -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\ * zoned=<true|false[optional]>, \ @@ -113,6 +117,29 @@ * enables reporting of both SR-IOV and ARI capabilities by the NVMe device. * Virtual function controllers will not report SR-IOV capability. * + * NOTE: Single Root I/O Virtualization support is experimental. + * All the related parameters may be subject to change. + * + * - `sriov_vq_flexible` + * Indicates the total number of flexible queue resources assignable to all + * the secondary controllers. Implicitly sets the number of primary + * controller's private resources to `(max_ioqpairs - sriov_vq_flexible)`. + * + * - `sriov_vi_flexible` + * Indicates the total number of flexible interrupt resources assignable to + * all the secondary controllers. Implicitly sets the number of primary + * controller's private resources to `(msix_qsize - sriov_vi_flexible)`. + * + * - `sriov_max_vi_per_vf` + * Indicates the maximum number of virtual interrupt resources assignable + * to a secondary controller. The default 0 resolves to + * `(sriov_vi_flexible / sriov_max_vfs)`. + * + * - `sriov_max_vq_per_vf` + * Indicates the maximum number of virtual queue resources assignable to + * a secondary controller. The default 0 resolves to + * `(sriov_vq_flexible / sriov_max_vfs)`. + * * nvme namespace device parameters * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * - `shared` @@ -185,6 +212,7 @@ #define NVME_NUM_FW_SLOTS 1 #define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) #define NVME_MAX_VFS 127 +#define NVME_VF_RES_GRANULARITY 1 #define NVME_VF_OFFSET 0x1 #define NVME_VF_STRIDE 1 @@ -6658,6 +6686,53 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) error_setg(errp, "PMR is not supported with SR-IOV"); return; } + + if (!params->sriov_vq_flexible || !params->sriov_vi_flexible) { + error_setg(errp, "both sriov_vq_flexible and sriov_vi_flexible" + " must be set for the use of SR-IOV"); + return; + } + + if (params->sriov_vq_flexible < params->sriov_max_vfs * 2) { + error_setg(errp, "sriov_vq_flexible must be greater than or equal" + " to %d (sriov_max_vfs * 2)", params->sriov_max_vfs * 2); + return; + } + + if (params->max_ioqpairs < params->sriov_vq_flexible + 2) { + error_setg(errp, "(max_ioqpairs - sriov_vq_flexible) must be" + " greater than or equal to 2"); + return; + } + + if (params->sriov_vi_flexible < params->sriov_max_vfs) { + error_setg(errp, "sriov_vi_flexible must be greater than or equal" + " to %d (sriov_max_vfs)", params->sriov_max_vfs); + return; + } + + if (params->msix_qsize < params->sriov_vi_flexible + 1) { + error_setg(errp, "(msix_qsize - sriov_vi_flexible) must be" + " greater than or equal to 1"); + return; + } + + if (params->sriov_max_vi_per_vf && + (params->sriov_max_vi_per_vf - 1) % NVME_VF_RES_GRANULARITY) { + error_setg(errp, "sriov_max_vi_per_vf must meet:" + " (sriov_max_vi_per_vf - 1) %% %d == 0 and" + " sriov_max_vi_per_vf >= 1", NVME_VF_RES_GRANULARITY); + return; + } + + if (params->sriov_max_vq_per_vf && + (params->sriov_max_vq_per_vf < 2 || + (params->sriov_max_vq_per_vf - 1) % NVME_VF_RES_GRANULARITY)) { + error_setg(errp, "sriov_max_vq_per_vf must meet:" + " (sriov_max_vq_per_vf - 1) %% %d == 0 and" + " sriov_max_vq_per_vf >= 2", NVME_VF_RES_GRANULARITY); + return; + } } } @@ -6666,10 +6741,19 @@ static void nvme_init_state(NvmeCtrl *n) NvmePriCtrlCap *cap = &n->pri_ctrl_cap; NvmeSecCtrlList *list = &n->sec_ctrl_list; NvmeSecCtrlEntry *sctrl; + uint8_t max_vfs; int i; - n->conf_ioqpairs = n->params.max_ioqpairs; - n->conf_msix_qsize = n->params.msix_qsize; + if (pci_is_vf(&n->parent_obj)) { + sctrl = nvme_sctrl(n); + max_vfs = 0; + n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0; + n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1; + } else { + max_vfs = n->params.sriov_max_vfs; + n->conf_ioqpairs = n->params.max_ioqpairs; + n->conf_msix_qsize = n->params.msix_qsize; + } n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1); n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1); @@ -6678,14 +6762,41 @@ static void nvme_init_state(NvmeCtrl *n) n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1); - list->numcntl = cpu_to_le16(n->params.sriov_max_vfs); - for (i = 0; i < n->params.sriov_max_vfs; i++) { + list->numcntl = cpu_to_le16(max_vfs); + for (i = 0; i < max_vfs; i++) { sctrl = &list->sec[i]; sctrl->pcid = cpu_to_le16(n->cntlid); sctrl->vfn = cpu_to_le16(i + 1); } cap->cntlid = cpu_to_le16(n->cntlid); + cap->crt = NVME_CRT_VQ | NVME_CRT_VI; + + if (pci_is_vf(&n->parent_obj)) { + cap->vqprt = cpu_to_le16(1 + n->conf_ioqpairs); + } else { + cap->vqprt = cpu_to_le16(1 + n->params.max_ioqpairs - + n->params.sriov_vq_flexible); + cap->vqfrt = cpu_to_le32(n->params.sriov_vq_flexible); + cap->vqrfap = cap->vqfrt; + cap->vqgran = cpu_to_le16(NVME_VF_RES_GRANULARITY); + cap->vqfrsm = n->params.sriov_max_vq_per_vf ? + cpu_to_le16(n->params.sriov_max_vq_per_vf) : + cap->vqfrt / MAX(max_vfs, 1); + } + + if (pci_is_vf(&n->parent_obj)) { + cap->viprt = cpu_to_le16(n->conf_msix_qsize); + } else { + cap->viprt = cpu_to_le16(n->params.msix_qsize - + n->params.sriov_vi_flexible); + cap->vifrt = cpu_to_le32(n->params.sriov_vi_flexible); + cap->virfap = cap->vifrt; + cap->vigran = cpu_to_le16(NVME_VF_RES_GRANULARITY); + cap->vifrsm = n->params.sriov_max_vi_per_vf ? + cpu_to_le16(n->params.sriov_max_vi_per_vf) : + cap->vifrt / MAX(max_vfs, 1); + } } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -6758,11 +6869,14 @@ static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs, return bar_size; } -static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, - uint64_t bar_size) +static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) { uint16_t vf_dev_id = n->params.use_intel_id ? PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME; + NvmePriCtrlCap *cap = &n->pri_ctrl_cap; + uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm), + le16_to_cpu(cap->vifrsm), + NULL, NULL); pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, n->params.sriov_max_vfs, n->params.sriov_max_vfs, @@ -6860,7 +6974,7 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) } if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) { - nvme_init_sriov(n, pci_dev, 0x120, bar_size); + nvme_init_sriov(n, pci_dev, 0x120); } return 0; @@ -6884,6 +6998,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) NvmeIdCtrl *id = &n->id_ctrl; uint8_t *pci_conf = pci_dev->config; uint64_t cap = ldq_le_p(&n->bar.cap); + NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -6976,6 +7091,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) stl_le_p(&n->bar.vs, NVME_SPEC_VER); n->bar.intmc = n->bar.intms = 0; + + if (pci_is_vf(&n->parent_obj) && !sctrl->scs) { + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); + } } static int nvme_init_subsys(NvmeCtrl *n, Error **errp) @@ -7116,6 +7235,14 @@ static Property nvme_props[] = { DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl, params.auto_transition_zones, true), DEFINE_PROP_UINT8("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0), + DEFINE_PROP_UINT16("sriov_vq_flexible", NvmeCtrl, + params.sriov_vq_flexible, 0), + DEFINE_PROP_UINT16("sriov_vi_flexible", NvmeCtrl, + params.sriov_vi_flexible, 0), + DEFINE_PROP_UINT8("sriov_max_vi_per_vf", NvmeCtrl, + params.sriov_max_vi_per_vf, 0), + DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl, + params.sriov_max_vq_per_vf, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index d9deb0b1ec..9afa5e1a93 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -412,6 +412,10 @@ typedef struct NvmeParams { bool auto_transition_zones; bool legacy_cmb; uint8_t sriov_max_vfs; + uint16_t sriov_vq_flexible; + uint16_t sriov_vi_flexible; + uint8_t sriov_max_vq_per_vf; + uint8_t sriov_max_vi_per_vf; } NvmeParams; typedef struct NvmeCtrl { diff --git a/include/block/nvme.h b/include/block/nvme.h index 94efd32578..58d08d5c2a 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -1576,6 +1576,11 @@ typedef struct QEMU_PACKED NvmePriCtrlCap { uint8_t rsvd80[4016]; } NvmePriCtrlCap; +typedef enum NvmePriCtrlCapCrt { + NVME_CRT_VQ = 1 << 0, + NVME_CRT_VI = 1 << 1, +} NvmePriCtrlCapCrt; + typedef struct QEMU_PACKED NvmeSecCtrlEntry { uint16_t scid; uint16_t pcid; |