aboutsummaryrefslogtreecommitdiff
path: root/hw/nvme/ctrl.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/nvme/ctrl.c')
-rw-r--r--hw/nvme/ctrl.c741
1 files changed, 653 insertions, 88 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 1e6e0fcad9..d349b3e426 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -35,6 +35,11 @@
* mdts=<N[optional]>,vsl=<N[optional]>, \
* zoned.zasl=<N[optional]>, \
* zoned.auto_transition=<on|off[optional]>, \
+ * sriov_max_vfs=<N[optional]> \
+ * sriov_vq_flexible=<N[optional]> \
+ * sriov_vi_flexible=<N[optional]> \
+ * sriov_max_vi_per_vf=<N[optional]> \
+ * sriov_max_vq_per_vf=<N[optional]> \
* subsys=<subsys_id>
* -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
* zoned=<true|false[optional]>, \
@@ -106,6 +111,35 @@
* transitioned to zone state closed for resource management purposes.
* Defaults to 'on'.
*
+ * - `sriov_max_vfs`
+ * Indicates the maximum number of PCIe virtual functions supported
+ * by the controller. The default value is 0. Specifying a non-zero value
+ * enables reporting of both SR-IOV and ARI capabilities by the NVMe device.
+ * Virtual function controllers will not report SR-IOV capability.
+ *
+ * NOTE: Single Root I/O Virtualization support is experimental.
+ * All the related parameters may be subject to change.
+ *
+ * - `sriov_vq_flexible`
+ * Indicates the total number of flexible queue resources assignable to all
+ * the secondary controllers. Implicitly sets the number of primary
+ * controller's private resources to `(max_ioqpairs - sriov_vq_flexible)`.
+ *
+ * - `sriov_vi_flexible`
+ * Indicates the total number of flexible interrupt resources assignable to
+ * all the secondary controllers. Implicitly sets the number of primary
+ * controller's private resources to `(msix_qsize - sriov_vi_flexible)`.
+ *
+ * - `sriov_max_vi_per_vf`
+ * Indicates the maximum number of virtual interrupt resources assignable
+ * to a secondary controller. The default 0 resolves to
+ * `(sriov_vi_flexible / sriov_max_vfs)`.
+ *
+ * - `sriov_max_vq_per_vf`
+ * Indicates the maximum number of virtual queue resources assignable to
+ * a secondary controller. The default 0 resolves to
+ * `(sriov_vq_flexible / sriov_max_vfs)`.
+ *
* nvme namespace device parameters
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* - `shared`
@@ -154,12 +188,14 @@
#include "qemu/error-report.h"
#include "qemu/log.h"
#include "qemu/units.h"
+#include "qemu/range.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "sysemu/sysemu.h"
#include "sysemu/block-backend.h"
#include "sysemu/hostmem.h"
#include "hw/pci/msix.h"
+#include "hw/pci/pcie_sriov.h"
#include "migration/vmstate.h"
#include "nvme.h"
@@ -176,6 +212,10 @@
#define NVME_TEMPERATURE_CRITICAL 0x175
#define NVME_NUM_FW_SLOTS 1
#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
+#define NVME_MAX_VFS 127
+#define NVME_VF_RES_GRANULARITY 1
+#define NVME_VF_OFFSET 0x1
+#define NVME_VF_STRIDE 1
#define NVME_GUEST_ERR(trace, fmt, ...) \
do { \
@@ -223,6 +263,7 @@ static const uint32_t nvme_cse_acs[256] = {
[NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
+ [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
};
@@ -254,6 +295,7 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
};
static void nvme_process_sq(void *opaque);
+static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst);
static uint16_t nvme_sqid(NvmeRequest *req)
{
@@ -437,12 +479,12 @@ static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
{
- return sqid < n->params.max_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1;
+ return sqid < n->conf_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1;
}
static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid)
{
- return cqid < n->params.max_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1;
+ return cqid < n->conf_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1;
}
static void nvme_inc_cq_tail(NvmeCQueue *cq)
@@ -808,10 +850,6 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
uint8_t type = NVME_SGL_TYPE(segment[i].type);
switch (type) {
- case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
- if (cmd->opcode == NVME_CMD_WRITE) {
- continue;
- }
case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
break;
case NVME_SGL_DESCR_TYPE_SEGMENT:
@@ -844,10 +882,6 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
trans_len = MIN(*len, dlen);
- if (type == NVME_SGL_DESCR_TYPE_BIT_BUCKET) {
- goto next;
- }
-
addr = le64_to_cpu(segment[i].addr);
if (UINT64_MAX - addr < dlen) {
@@ -859,7 +893,6 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
return status;
}
-next:
*len -= trans_len;
}
@@ -917,8 +950,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
seg_len = le32_to_cpu(sgld->len);
/* check the length of the (Last) Segment descriptor */
- if ((!seg_len || seg_len & 0xf) &&
- (NVME_SGL_TYPE(sgld->type) != NVME_SGL_DESCR_TYPE_BIT_BUCKET)) {
+ if (!seg_len || seg_len & 0xf) {
return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
}
@@ -956,26 +988,20 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
last_sgld = &segment[nsgld - 1];
/*
- * If the segment ends with a Data Block or Bit Bucket Descriptor Type,
- * then we are done.
+ * If the segment ends with a Data Block, then we are done.
*/
- switch (NVME_SGL_TYPE(last_sgld->type)) {
- case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
- case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
+ if (NVME_SGL_TYPE(last_sgld->type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd);
if (status) {
goto unmap;
}
goto out;
-
- default:
- break;
}
/*
- * If the last descriptor was not a Data Block or Bit Bucket, then the
- * current segment must not be a Last Segment.
+ * If the last descriptor was not a Data Block, then the current
+ * segment must not be a Last Segment.
*/
if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) {
status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
@@ -4284,8 +4310,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_err_invalid_create_sq_cqid(cqid);
return NVME_INVALID_CQID | NVME_DNR;
}
- if (unlikely(!sqid || sqid > n->params.max_ioqpairs ||
- n->sq[sqid] != NULL)) {
+ if (unlikely(!sqid || sqid > n->conf_ioqpairs || n->sq[sqid] != NULL)) {
trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
return NVME_INVALID_QID | NVME_DNR;
}
@@ -4637,8 +4662,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
NVME_CQ_FLAGS_IEN(qflags) != 0);
- if (unlikely(!cqid || cqid > n->params.max_ioqpairs ||
- n->cq[cqid] != NULL)) {
+ if (unlikely(!cqid || cqid > n->conf_ioqpairs || n->cq[cqid] != NULL)) {
trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
return NVME_INVALID_QID | NVME_DNR;
}
@@ -4654,7 +4678,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_err_invalid_create_cq_vector(vector);
return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
}
- if (unlikely(vector >= n->params.msix_qsize)) {
+ if (unlikely(vector >= n->conf_msix_qsize)) {
trace_pci_nvme_err_invalid_create_cq_vector(vector);
return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
}
@@ -4793,6 +4817,37 @@ static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
return nvme_c2h(n, (uint8_t *)list, sizeof(list), req);
}
+static uint16_t nvme_identify_pri_ctrl_cap(NvmeCtrl *n, NvmeRequest *req)
+{
+ trace_pci_nvme_identify_pri_ctrl_cap(le16_to_cpu(n->pri_ctrl_cap.cntlid));
+
+ return nvme_c2h(n, (uint8_t *)&n->pri_ctrl_cap,
+ sizeof(NvmePriCtrlCap), req);
+}
+
+static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+ uint16_t pri_ctrl_id = le16_to_cpu(n->pri_ctrl_cap.cntlid);
+ uint16_t min_id = le16_to_cpu(c->ctrlid);
+ uint8_t num_sec_ctrl = n->sec_ctrl_list.numcntl;
+ NvmeSecCtrlList list = {0};
+ uint8_t i;
+
+ for (i = 0; i < num_sec_ctrl; i++) {
+ if (n->sec_ctrl_list.sec[i].scid >= min_id) {
+ list.numcntl = num_sec_ctrl - i;
+ memcpy(&list.sec, n->sec_ctrl_list.sec + i,
+ list.numcntl * sizeof(NvmeSecCtrlEntry));
+ break;
+ }
+ }
+
+ trace_pci_nvme_identify_sec_ctrl_list(pri_ctrl_id, list.numcntl);
+
+ return nvme_c2h(n, (uint8_t *)&list, sizeof(list), req);
+}
+
static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
bool active)
{
@@ -5009,6 +5064,10 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
return nvme_identify_ctrl_list(n, req, true);
case NVME_ID_CNS_CTRL_LIST:
return nvme_identify_ctrl_list(n, req, false);
+ case NVME_ID_CNS_PRIMARY_CTRL_CAP:
+ return nvme_identify_pri_ctrl_cap(n, req);
+ case NVME_ID_CNS_SECONDARY_CTRL_LIST:
+ return nvme_identify_sec_ctrl_list(n, req);
case NVME_ID_CNS_CS_NS:
return nvme_identify_ns_csi(n, req, true);
case NVME_ID_CNS_CS_NS_PRESENT:
@@ -5217,13 +5276,12 @@ defaults:
break;
case NVME_NUMBER_OF_QUEUES:
- result = (n->params.max_ioqpairs - 1) |
- ((n->params.max_ioqpairs - 1) << 16);
+ result = (n->conf_ioqpairs - 1) | ((n->conf_ioqpairs - 1) << 16);
trace_pci_nvme_getfeat_numq(result);
break;
case NVME_INTERRUPT_VECTOR_CONF:
iv = dw11 & 0xffff;
- if (iv >= n->params.max_ioqpairs + 1) {
+ if (iv >= n->conf_ioqpairs + 1) {
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -5379,10 +5437,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_setfeat_numq((dw11 & 0xffff) + 1,
((dw11 >> 16) & 0xffff) + 1,
- n->params.max_ioqpairs,
- n->params.max_ioqpairs);
- req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) |
- ((n->params.max_ioqpairs - 1) << 16));
+ n->conf_ioqpairs,
+ n->conf_ioqpairs);
+ req->cqe.result = cpu_to_le32((n->conf_ioqpairs - 1) |
+ ((n->conf_ioqpairs - 1) << 16));
break;
case NVME_ASYNCHRONOUS_EVENT_CONF:
n->features.async_config = dw11;
@@ -5769,6 +5827,167 @@ out:
return status;
}
+static void nvme_get_virt_res_num(NvmeCtrl *n, uint8_t rt, int *num_total,
+ int *num_prim, int *num_sec)
+{
+ *num_total = le32_to_cpu(rt ?
+ n->pri_ctrl_cap.vifrt : n->pri_ctrl_cap.vqfrt);
+ *num_prim = le16_to_cpu(rt ?
+ n->pri_ctrl_cap.virfap : n->pri_ctrl_cap.vqrfap);
+ *num_sec = le16_to_cpu(rt ? n->pri_ctrl_cap.virfa : n->pri_ctrl_cap.vqrfa);
+}
+
+static uint16_t nvme_assign_virt_res_to_prim(NvmeCtrl *n, NvmeRequest *req,
+ uint16_t cntlid, uint8_t rt,
+ int nr)
+{
+ int num_total, num_prim, num_sec;
+
+ if (cntlid != n->cntlid) {
+ return NVME_INVALID_CTRL_ID | NVME_DNR;
+ }
+
+ nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec);
+
+ if (nr > num_total) {
+ return NVME_INVALID_NUM_RESOURCES | NVME_DNR;
+ }
+
+ if (nr > num_total - num_sec) {
+ return NVME_INVALID_RESOURCE_ID | NVME_DNR;
+ }
+
+ if (rt) {
+ n->next_pri_ctrl_cap.virfap = cpu_to_le16(nr);
+ } else {
+ n->next_pri_ctrl_cap.vqrfap = cpu_to_le16(nr);
+ }
+
+ req->cqe.result = cpu_to_le32(nr);
+ return req->status;
+}
+
+static void nvme_update_virt_res(NvmeCtrl *n, NvmeSecCtrlEntry *sctrl,
+ uint8_t rt, int nr)
+{
+ int prev_nr, prev_total;
+
+ if (rt) {
+ prev_nr = le16_to_cpu(sctrl->nvi);
+ prev_total = le32_to_cpu(n->pri_ctrl_cap.virfa);
+ sctrl->nvi = cpu_to_le16(nr);
+ n->pri_ctrl_cap.virfa = cpu_to_le32(prev_total + nr - prev_nr);
+ } else {
+ prev_nr = le16_to_cpu(sctrl->nvq);
+ prev_total = le32_to_cpu(n->pri_ctrl_cap.vqrfa);
+ sctrl->nvq = cpu_to_le16(nr);
+ n->pri_ctrl_cap.vqrfa = cpu_to_le32(prev_total + nr - prev_nr);
+ }
+}
+
+static uint16_t nvme_assign_virt_res_to_sec(NvmeCtrl *n, NvmeRequest *req,
+ uint16_t cntlid, uint8_t rt, int nr)
+{
+ int num_total, num_prim, num_sec, num_free, diff, limit;
+ NvmeSecCtrlEntry *sctrl;
+
+ sctrl = nvme_sctrl_for_cntlid(n, cntlid);
+ if (!sctrl) {
+ return NVME_INVALID_CTRL_ID | NVME_DNR;
+ }
+
+ if (sctrl->scs) {
+ return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR;
+ }
+
+ limit = le16_to_cpu(rt ? n->pri_ctrl_cap.vifrsm : n->pri_ctrl_cap.vqfrsm);
+ if (nr > limit) {
+ return NVME_INVALID_NUM_RESOURCES | NVME_DNR;
+ }
+
+ nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec);
+ num_free = num_total - num_prim - num_sec;
+ diff = nr - le16_to_cpu(rt ? sctrl->nvi : sctrl->nvq);
+
+ if (diff > num_free) {
+ return NVME_INVALID_RESOURCE_ID | NVME_DNR;
+ }
+
+ nvme_update_virt_res(n, sctrl, rt, nr);
+ req->cqe.result = cpu_to_le32(nr);
+
+ return req->status;
+}
+
+static uint16_t nvme_virt_set_state(NvmeCtrl *n, uint16_t cntlid, bool online)
+{
+ NvmeCtrl *sn = NULL;
+ NvmeSecCtrlEntry *sctrl;
+ int vf_index;
+
+ sctrl = nvme_sctrl_for_cntlid(n, cntlid);
+ if (!sctrl) {
+ return NVME_INVALID_CTRL_ID | NVME_DNR;
+ }
+
+ if (!pci_is_vf(&n->parent_obj)) {
+ vf_index = le16_to_cpu(sctrl->vfn) - 1;
+ sn = NVME(pcie_sriov_get_vf_at_index(&n->parent_obj, vf_index));
+ }
+
+ if (online) {
+ if (!sctrl->nvi || (le16_to_cpu(sctrl->nvq) < 2) || !sn) {
+ return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR;
+ }
+
+ if (!sctrl->scs) {
+ sctrl->scs = 0x1;
+ nvme_ctrl_reset(sn, NVME_RESET_FUNCTION);
+ }
+ } else {
+ nvme_update_virt_res(n, sctrl, NVME_VIRT_RES_INTERRUPT, 0);
+ nvme_update_virt_res(n, sctrl, NVME_VIRT_RES_QUEUE, 0);
+
+ if (sctrl->scs) {
+ sctrl->scs = 0x0;
+ if (sn) {
+ nvme_ctrl_reset(sn, NVME_RESET_FUNCTION);
+ }
+ }
+ }
+
+ return NVME_SUCCESS;
+}
+
+static uint16_t nvme_virt_mngmt(NvmeCtrl *n, NvmeRequest *req)
+{
+ uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+ uint32_t dw11 = le32_to_cpu(req->cmd.cdw11);
+ uint8_t act = dw10 & 0xf;
+ uint8_t rt = (dw10 >> 8) & 0x7;
+ uint16_t cntlid = (dw10 >> 16) & 0xffff;
+ int nr = dw11 & 0xffff;
+
+ trace_pci_nvme_virt_mngmt(nvme_cid(req), act, cntlid, rt ? "VI" : "VQ", nr);
+
+ if (rt != NVME_VIRT_RES_QUEUE && rt != NVME_VIRT_RES_INTERRUPT) {
+ return NVME_INVALID_RESOURCE_ID | NVME_DNR;
+ }
+
+ switch (act) {
+ case NVME_VIRT_MNGMT_ACTION_SEC_ASSIGN:
+ return nvme_assign_virt_res_to_sec(n, req, cntlid, rt, nr);
+ case NVME_VIRT_MNGMT_ACTION_PRM_ALLOC:
+ return nvme_assign_virt_res_to_prim(n, req, cntlid, rt, nr);
+ case NVME_VIRT_MNGMT_ACTION_SEC_ONLINE:
+ return nvme_virt_set_state(n, cntlid, true);
+ case NVME_VIRT_MNGMT_ACTION_SEC_OFFLINE:
+ return nvme_virt_set_state(n, cntlid, false);
+ default:
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+}
+
static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
{
trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
@@ -5811,6 +6030,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_aer(n, req);
case NVME_ADM_CMD_NS_ATTACHMENT:
return nvme_ns_attachment(n, req);
+ case NVME_ADM_CMD_VIRT_MNGMT:
+ return nvme_virt_mngmt(n, req);
case NVME_ADM_CMD_FORMAT_NVM:
return nvme_format(n, req);
default:
@@ -5857,8 +6078,48 @@ static void nvme_process_sq(void *opaque)
}
}
-static void nvme_ctrl_reset(NvmeCtrl *n)
+static void nvme_update_msixcap_ts(PCIDevice *pci_dev, uint32_t table_size)
{
+ uint8_t *config;
+
+ if (!msix_present(pci_dev)) {
+ return;
+ }
+
+ assert(table_size > 0 && table_size <= pci_dev->msix_entries_nr);
+
+ config = pci_dev->config + pci_dev->msix_cap;
+ pci_set_word_by_mask(config + PCI_MSIX_FLAGS, PCI_MSIX_FLAGS_QSIZE,
+ table_size - 1);
+}
+
+static void nvme_activate_virt_res(NvmeCtrl *n)
+{
+ PCIDevice *pci_dev = &n->parent_obj;
+ NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
+ NvmeSecCtrlEntry *sctrl;
+
+ /* -1 to account for the admin queue */
+ if (pci_is_vf(pci_dev)) {
+ sctrl = nvme_sctrl(n);
+ cap->vqprt = sctrl->nvq;
+ cap->viprt = sctrl->nvi;
+ n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0;
+ n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1;
+ } else {
+ cap->vqrfap = n->next_pri_ctrl_cap.vqrfap;
+ cap->virfap = n->next_pri_ctrl_cap.virfap;
+ n->conf_ioqpairs = le16_to_cpu(cap->vqprt) +
+ le16_to_cpu(cap->vqrfap) - 1;
+ n->conf_msix_qsize = le16_to_cpu(cap->viprt) +
+ le16_to_cpu(cap->virfap);
+ }
+}
+
+static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst)
+{
+ PCIDevice *pci_dev = &n->parent_obj;
+ NvmeSecCtrlEntry *sctrl;
NvmeNamespace *ns;
int i;
@@ -5888,9 +6149,41 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
g_free(event);
}
+ if (n->params.sriov_max_vfs) {
+ if (!pci_is_vf(pci_dev)) {
+ for (i = 0; i < n->sec_ctrl_list.numcntl; i++) {
+ sctrl = &n->sec_ctrl_list.sec[i];
+ nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
+ }
+
+ if (rst != NVME_RESET_CONTROLLER) {
+ pcie_sriov_pf_disable_vfs(pci_dev);
+ }
+ }
+
+ if (rst != NVME_RESET_CONTROLLER) {
+ nvme_activate_virt_res(n);
+ }
+ }
+
n->aer_queued = 0;
+ n->aer_mask = 0;
n->outstanding_aers = 0;
n->qs_created = false;
+
+ nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
+
+ if (pci_is_vf(pci_dev)) {
+ sctrl = nvme_sctrl(n);
+
+ stl_le_p(&n->bar.csts, sctrl->scs ? 0 : NVME_CSTS_FAILED);
+ } else {
+ stl_le_p(&n->bar.csts, 0);
+ }
+
+ stl_le_p(&n->bar.intms, 0);
+ stl_le_p(&n->bar.intmc, 0);
+ stl_le_p(&n->bar.cc, 0);
}
static void nvme_ctrl_shutdown(NvmeCtrl *n)
@@ -5936,7 +6229,15 @@ static int nvme_start_ctrl(NvmeCtrl *n)
uint64_t acq = ldq_le_p(&n->bar.acq);
uint32_t page_bits = NVME_CC_MPS(cc) + 12;
uint32_t page_size = 1 << page_bits;
+ NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
+ if (pci_is_vf(&n->parent_obj) && !sctrl->scs) {
+ trace_pci_nvme_err_startfail_virt_state(le16_to_cpu(sctrl->nvi),
+ le16_to_cpu(sctrl->nvq),
+ sctrl->scs ? "ONLINE" :
+ "OFFLINE");
+ return -1;
+ }
if (unlikely(n->cq[0])) {
trace_pci_nvme_err_startfail_cq();
return -1;
@@ -6017,8 +6318,6 @@ static int nvme_start_ctrl(NvmeCtrl *n)
nvme_set_timestamp(n, 0ULL);
- QTAILQ_INIT(&n->aer_queue);
-
nvme_select_iocs(n);
return 0;
@@ -6096,20 +6395,21 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
nvme_irq_check(n);
break;
case NVME_REG_CC:
+ stl_le_p(&n->bar.cc, data);
+
trace_pci_nvme_mmio_cfg(data & 0xffffffff);
- /* Windows first sends data, then sends enable bit */
- if (!NVME_CC_EN(data) && !NVME_CC_EN(cc) &&
- !NVME_CC_SHN(data) && !NVME_CC_SHN(cc))
- {
- cc = data;
+ if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) {
+ trace_pci_nvme_mmio_shutdown_set();
+ nvme_ctrl_shutdown(n);
+ csts &= ~(CSTS_SHST_MASK << CSTS_SHST_SHIFT);
+ csts |= NVME_CSTS_SHST_COMPLETE;
+ } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) {
+ trace_pci_nvme_mmio_shutdown_cleared();
+ csts &= ~(CSTS_SHST_MASK << CSTS_SHST_SHIFT);
}
if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) {
- cc = data;
-
- /* flush CC since nvme_start_ctrl() needs the value */
- stl_le_p(&n->bar.cc, cc);
if (unlikely(nvme_start_ctrl(n))) {
trace_pci_nvme_err_startfail();
csts = NVME_CSTS_FAILED;
@@ -6119,23 +6419,11 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
}
} else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) {
trace_pci_nvme_mmio_stopped();
- nvme_ctrl_reset(n);
- cc = 0;
- csts &= ~NVME_CSTS_READY;
- }
+ nvme_ctrl_reset(n, NVME_RESET_CONTROLLER);
- if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) {
- trace_pci_nvme_mmio_shutdown_set();
- nvme_ctrl_shutdown(n);
- cc = data;
- csts |= NVME_CSTS_SHST_COMPLETE;
- } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) {
- trace_pci_nvme_mmio_shutdown_cleared();
- csts &= ~NVME_CSTS_SHST_COMPLETE;
- cc = data;
+ break;
}
- stl_le_p(&n->bar.cc, cc);
stl_le_p(&n->bar.csts, csts);
break;
@@ -6319,6 +6607,12 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
return 0;
}
+ if (pci_is_vf(&n->parent_obj) && !nvme_sctrl(n)->scs &&
+ addr != NVME_REG_CSTS) {
+ trace_pci_nvme_err_ignored_mmio_vf_offline(addr, size);
+ return 0;
+ }
+
/*
* When PMRWBM bit 1 is set then read from
* from PMRSTS should ensure prior writes
@@ -6468,6 +6762,12 @@ static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data,
trace_pci_nvme_mmio_write(addr, data, size);
+ if (pci_is_vf(&n->parent_obj) && !nvme_sctrl(n)->scs &&
+ addr != NVME_REG_CSTS) {
+ trace_pci_nvme_err_ignored_mmio_vf_offline(addr, size);
+ return;
+ }
+
if (addr < sizeof(n->bar)) {
nvme_write_bar(n, addr, data, size);
} else {
@@ -6569,19 +6869,140 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
error_setg(errp, "vsl must be non-zero");
return;
}
+
+ if (params->sriov_max_vfs) {
+ if (!n->subsys) {
+ error_setg(errp, "subsystem is required for the use of SR-IOV");
+ return;
+ }
+
+ if (params->sriov_max_vfs > NVME_MAX_VFS) {
+ error_setg(errp, "sriov_max_vfs must be between 0 and %d",
+ NVME_MAX_VFS);
+ return;
+ }
+
+ if (params->cmb_size_mb) {
+ error_setg(errp, "CMB is not supported with SR-IOV");
+ return;
+ }
+
+ if (n->pmr.dev) {
+ error_setg(errp, "PMR is not supported with SR-IOV");
+ return;
+ }
+
+ if (!params->sriov_vq_flexible || !params->sriov_vi_flexible) {
+ error_setg(errp, "both sriov_vq_flexible and sriov_vi_flexible"
+ " must be set for the use of SR-IOV");
+ return;
+ }
+
+ if (params->sriov_vq_flexible < params->sriov_max_vfs * 2) {
+ error_setg(errp, "sriov_vq_flexible must be greater than or equal"
+ " to %d (sriov_max_vfs * 2)", params->sriov_max_vfs * 2);
+ return;
+ }
+
+ if (params->max_ioqpairs < params->sriov_vq_flexible + 2) {
+ error_setg(errp, "(max_ioqpairs - sriov_vq_flexible) must be"
+ " greater than or equal to 2");
+ return;
+ }
+
+ if (params->sriov_vi_flexible < params->sriov_max_vfs) {
+ error_setg(errp, "sriov_vi_flexible must be greater than or equal"
+ " to %d (sriov_max_vfs)", params->sriov_max_vfs);
+ return;
+ }
+
+ if (params->msix_qsize < params->sriov_vi_flexible + 1) {
+ error_setg(errp, "(msix_qsize - sriov_vi_flexible) must be"
+ " greater than or equal to 1");
+ return;
+ }
+
+ if (params->sriov_max_vi_per_vf &&
+ (params->sriov_max_vi_per_vf - 1) % NVME_VF_RES_GRANULARITY) {
+ error_setg(errp, "sriov_max_vi_per_vf must meet:"
+ " (sriov_max_vi_per_vf - 1) %% %d == 0 and"
+ " sriov_max_vi_per_vf >= 1", NVME_VF_RES_GRANULARITY);
+ return;
+ }
+
+ if (params->sriov_max_vq_per_vf &&
+ (params->sriov_max_vq_per_vf < 2 ||
+ (params->sriov_max_vq_per_vf - 1) % NVME_VF_RES_GRANULARITY)) {
+ error_setg(errp, "sriov_max_vq_per_vf must meet:"
+ " (sriov_max_vq_per_vf - 1) %% %d == 0 and"
+ " sriov_max_vq_per_vf >= 2", NVME_VF_RES_GRANULARITY);
+ return;
+ }
+ }
}
static void nvme_init_state(NvmeCtrl *n)
{
- /* add one to max_ioqpairs to account for the admin queue pair */
- n->reg_size = pow2ceil(sizeof(NvmeBar) +
- 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE);
+ NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
+ NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *sctrl;
+ uint8_t max_vfs;
+ int i;
+
+ if (pci_is_vf(&n->parent_obj)) {
+ sctrl = nvme_sctrl(n);
+ max_vfs = 0;
+ n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0;
+ n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1;
+ } else {
+ max_vfs = n->params.sriov_max_vfs;
+ n->conf_ioqpairs = n->params.max_ioqpairs;
+ n->conf_msix_qsize = n->params.msix_qsize;
+ }
+
n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
n->temperature = NVME_TEMPERATURE;
n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
+ QTAILQ_INIT(&n->aer_queue);
+
+ list->numcntl = cpu_to_le16(max_vfs);
+ for (i = 0; i < max_vfs; i++) {
+ sctrl = &list->sec[i];
+ sctrl->pcid = cpu_to_le16(n->cntlid);
+ sctrl->vfn = cpu_to_le16(i + 1);
+ }
+
+ cap->cntlid = cpu_to_le16(n->cntlid);
+ cap->crt = NVME_CRT_VQ | NVME_CRT_VI;
+
+ if (pci_is_vf(&n->parent_obj)) {
+ cap->vqprt = cpu_to_le16(1 + n->conf_ioqpairs);
+ } else {
+ cap->vqprt = cpu_to_le16(1 + n->params.max_ioqpairs -
+ n->params.sriov_vq_flexible);
+ cap->vqfrt = cpu_to_le32(n->params.sriov_vq_flexible);
+ cap->vqrfap = cap->vqfrt;
+ cap->vqgran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
+ cap->vqfrsm = n->params.sriov_max_vq_per_vf ?
+ cpu_to_le16(n->params.sriov_max_vq_per_vf) :
+ cap->vqfrt / MAX(max_vfs, 1);
+ }
+
+ if (pci_is_vf(&n->parent_obj)) {
+ cap->viprt = cpu_to_le16(n->conf_msix_qsize);
+ } else {
+ cap->viprt = cpu_to_le16(n->params.msix_qsize -
+ n->params.sriov_vi_flexible);
+ cap->vifrt = cpu_to_le32(n->params.sriov_vi_flexible);
+ cap->virfap = cap->vifrt;
+ cap->vigran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
+ cap->vifrsm = n->params.sriov_max_vi_per_vf ?
+ cpu_to_le16(n->params.sriov_max_vi_per_vf) :
+ cap->vifrt / MAX(max_vfs, 1);
+ }
}
static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
@@ -6626,10 +7047,77 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
memory_region_set_enabled(&n->pmr.dev->mr, false);
}
+static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
+ unsigned *msix_table_offset,
+ unsigned *msix_pba_offset)
+{
+ uint64_t bar_size, msix_table_size, msix_pba_size;
+
+ bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE;
+ bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
+
+ if (msix_table_offset) {
+ *msix_table_offset = bar_size;
+ }
+
+ msix_table_size = PCI_MSIX_ENTRY_SIZE * total_irqs;
+ bar_size += msix_table_size;
+ bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
+
+ if (msix_pba_offset) {
+ *msix_pba_offset = bar_size;
+ }
+
+ msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8;
+ bar_size += msix_pba_size;
+
+ bar_size = pow2ceil(bar_size);
+ return bar_size;
+}
+
+static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
+{
+ uint16_t vf_dev_id = n->params.use_intel_id ?
+ PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
+ NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
+ uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm),
+ le16_to_cpu(cap->vifrsm),
+ NULL, NULL);
+
+ pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
+ n->params.sriov_max_vfs, n->params.sriov_max_vfs,
+ NVME_VF_OFFSET, NVME_VF_STRIDE);
+
+ pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
+}
+
+static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
+{
+ Error *err = NULL;
+ int ret;
+
+ ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset,
+ PCI_PM_SIZEOF, &err);
+ if (err) {
+ error_report_err(err);
+ return ret;
+ }
+
+ pci_set_word(pci_dev->config + offset + PCI_PM_PMC,
+ PCI_PM_CAP_VER_1_2);
+ pci_set_word(pci_dev->config + offset + PCI_PM_CTRL,
+ PCI_PM_CTRL_NO_SOFT_RESET);
+ pci_set_word(pci_dev->wmask + offset + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+
+ return 0;
+}
+
static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
{
uint8_t *pci_conf = pci_dev->config;
- uint64_t bar_size, msix_table_size, msix_pba_size;
+ uint64_t bar_size;
unsigned msix_table_offset, msix_pba_offset;
int ret;
@@ -6640,34 +7128,35 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
if (n->params.use_intel_id) {
pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
- pci_config_set_device_id(pci_conf, 0x5845);
+ pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_NVME);
} else {
pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
}
pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
+ nvme_add_pm_capability(pci_dev, 0x60);
pcie_endpoint_cap_init(pci_dev, 0x80);
+ pcie_cap_flr_init(pci_dev);
+ if (n->params.sriov_max_vfs) {
+ pcie_ari_init(pci_dev, 0x100, 1);
+ }
- bar_size = QEMU_ALIGN_UP(n->reg_size, 4 * KiB);
- msix_table_offset = bar_size;
- msix_table_size = PCI_MSIX_ENTRY_SIZE * n->params.msix_qsize;
-
- bar_size += msix_table_size;
- bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
- msix_pba_offset = bar_size;
- msix_pba_size = QEMU_ALIGN_UP(n->params.msix_qsize, 64) / 8;
-
- bar_size += msix_pba_size;
- bar_size = pow2ceil(bar_size);
+ /* add one to max_ioqpairs to account for the admin queue pair */
+ bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
+ &msix_table_offset, &msix_pba_offset);
memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
- n->reg_size);
+ msix_table_offset);
memory_region_add_subregion(&n->bar0, 0, &n->iomem);
- pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
- PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
+ if (pci_is_vf(pci_dev)) {
+ pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0);
+ } else {
+ pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
+ }
ret = msix_init(pci_dev, n->params.msix_qsize,
&n->bar0, 0, msix_table_offset,
&n->bar0, 0, msix_pba_offset, 0, &err);
@@ -6680,6 +7169,8 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
}
}
+ nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
+
if (n->params.cmb_size_mb) {
nvme_init_cmb(n, pci_dev);
}
@@ -6688,6 +7179,10 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
nvme_init_pmr(n, pci_dev);
}
+ if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
+ nvme_init_sriov(n, pci_dev, 0x120);
+ }
+
return 0;
}
@@ -6709,6 +7204,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
NvmeIdCtrl *id = &n->id_ctrl;
uint8_t *pci_conf = pci_dev->config;
uint64_t cap = ldq_le_p(&n->bar.cap);
+ NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -6775,8 +7271,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1);
- id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
- NVME_CTRL_SGLS_BITBUCKET);
+ id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN);
nvme_init_subnqn(n);
@@ -6801,6 +7296,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
stl_le_p(&n->bar.vs, NVME_SPEC_VER);
n->bar.intmc = n->bar.intms = 0;
+
+ if (pci_is_vf(&n->parent_obj) && !sctrl->scs) {
+ stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
+ }
}
static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
@@ -6838,6 +7337,16 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
NvmeCtrl *n = NVME(pci_dev);
NvmeNamespace *ns;
Error *local_err = NULL;
+ NvmeCtrl *pn = NVME(pcie_sriov_get_pf(pci_dev));
+
+ if (pci_is_vf(pci_dev)) {
+ /*
+ * VFs derive settings from the parent. PF's lifespan exceeds
+ * that of VF's, so it's safe to share params.serial.
+ */
+ memcpy(&n->params, &pn->params, sizeof(NvmeParams));
+ n->subsys = pn->subsys;
+ }
nvme_check_constraints(n, &local_err);
if (local_err) {
@@ -6848,15 +7357,14 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
qbus_init(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS,
&pci_dev->qdev, n->parent_obj.qdev.id);
- nvme_init_state(n);
- if (nvme_init_pci(n, pci_dev, errp)) {
- return;
- }
-
if (nvme_init_subsys(n, errp)) {
error_propagate(errp, local_err);
return;
}
+ nvme_init_state(n);
+ if (nvme_init_pci(n, pci_dev, errp)) {
+ return;
+ }
nvme_init_ctrl(n, pci_dev);
/* setup a namespace if the controller drive property was given */
@@ -6878,7 +7386,7 @@ static void nvme_exit(PCIDevice *pci_dev)
NvmeNamespace *ns;
int i;
- nvme_ctrl_reset(n);
+ nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
if (n->subsys) {
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
@@ -6902,6 +7410,11 @@ static void nvme_exit(PCIDevice *pci_dev)
if (n->pmr.dev) {
host_memory_backend_set_mapped(n->pmr.dev, false);
}
+
+ if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
+ pcie_sriov_pf_exit(pci_dev);
+ }
+
msix_uninit(pci_dev, &n->bar0, &n->bar0);
memory_region_del_subregion(&n->bar0, &n->iomem);
}
@@ -6926,6 +7439,15 @@ static Property nvme_props[] = {
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
params.auto_transition_zones, true),
+ DEFINE_PROP_UINT8("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0),
+ DEFINE_PROP_UINT16("sriov_vq_flexible", NvmeCtrl,
+ params.sriov_vq_flexible, 0),
+ DEFINE_PROP_UINT16("sriov_vi_flexible", NvmeCtrl,
+ params.sriov_vi_flexible, 0),
+ DEFINE_PROP_UINT8("sriov_max_vi_per_vf", NvmeCtrl,
+ params.sriov_max_vi_per_vf, 0),
+ DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl,
+ params.sriov_max_vq_per_vf, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -6971,6 +7493,47 @@ static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name,
}
}
+static void nvme_pci_reset(DeviceState *qdev)
+{
+ PCIDevice *pci_dev = PCI_DEVICE(qdev);
+ NvmeCtrl *n = NVME(pci_dev);
+
+ trace_pci_nvme_pci_reset();
+ nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
+}
+
+static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address,
+ uint32_t val, int len)
+{
+ NvmeCtrl *n = NVME(dev);
+ NvmeSecCtrlEntry *sctrl;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+ uint32_t off = address - sriov_cap;
+ int i, num_vfs;
+
+ if (!sriov_cap) {
+ return;
+ }
+
+ if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
+ if (!(val & PCI_SRIOV_CTRL_VFE)) {
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+ for (i = 0; i < num_vfs; i++) {
+ sctrl = &n->sec_ctrl_list.sec[i];
+ nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
+ }
+ }
+ }
+}
+
+static void nvme_pci_write_config(PCIDevice *dev, uint32_t address,
+ uint32_t val, int len)
+{
+ nvme_sriov_pre_write_ctrl(dev, address, val, len);
+ pci_default_write_config(dev, address, val, len);
+ pcie_cap_flr_write_config(dev, address, val, len);
+}
+
static const VMStateDescription nvme_vmstate = {
.name = "nvme",
.unmigratable = 1,
@@ -6982,6 +7545,7 @@ static void nvme_class_init(ObjectClass *oc, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
pc->realize = nvme_realize;
+ pc->config_write = nvme_pci_write_config;
pc->exit = nvme_exit;
pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
pc->revision = 2;
@@ -6990,6 +7554,7 @@ static void nvme_class_init(ObjectClass *oc, void *data)
dc->desc = "Non-Volatile Memory Express";
device_class_set_props(dc, nvme_props);
dc->vmsd = &nvme_vmstate;
+ dc->reset = nvme_pci_reset;
}
static void nvme_instance_init(Object *obj)