diff options
-rw-r--r-- | hw/nvme/ctrl.c | 377 | ||||
-rw-r--r-- | hw/nvme/ns.c | 55 | ||||
-rw-r--r-- | hw/nvme/nvme.h | 18 | ||||
-rw-r--r-- | hw/nvme/subsys.c | 9 | ||||
-rw-r--r-- | hw/nvme/trace-events | 1 | ||||
-rw-r--r-- | include/block/nvme.h | 60 | ||||
-rw-r--r-- | migration/channel.c | 15 | ||||
-rw-r--r-- | migration/migration.c | 57 | ||||
-rw-r--r-- | migration/migration.h | 15 | ||||
-rw-r--r-- | migration/multifd.c | 8 | ||||
-rw-r--r-- | migration/qemu-file-channel.c | 11 | ||||
-rw-r--r-- | migration/qemu-file.c | 17 | ||||
-rw-r--r-- | migration/qemu-file.h | 4 | ||||
-rw-r--r-- | migration/ram.c | 77 | ||||
-rw-r--r-- | migration/savevm.c | 11 | ||||
-rw-r--r-- | migration/yank_functions.c | 42 | ||||
-rw-r--r-- | migration/yank_functions.h | 3 | ||||
-rw-r--r-- | tests/qtest/migration-test.c | 4 | ||||
-rw-r--r-- | tests/qtest/nvme-test.c | 87 |
19 files changed, 597 insertions, 274 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 629b0d38c2..43dfaeac9f 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -439,10 +439,12 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq) static void nvme_irq_check(NvmeCtrl *n) { + uint32_t intms = ldl_le_p(&n->bar.intms); + if (msix_enabled(&(n->parent_obj))) { return; } - if (~n->bar.intms & n->irq_status) { + if (~intms & n->irq_status) { pci_irq_assert(&n->parent_obj); } else { pci_irq_deassert(&n->parent_obj); @@ -623,6 +625,10 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len) return NVME_INVALID_USE_OF_CMB | NVME_DNR; } + if (sg->iov.niov + 1 > IOV_MAX) { + goto max_mappings_exceeded; + } + if (cmb) { return nvme_map_addr_cmb(n, &sg->iov, addr, len); } else { @@ -634,9 +640,18 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len) return NVME_INVALID_USE_OF_CMB | NVME_DNR; } + if (sg->qsg.nsg + 1 > IOV_MAX) { + goto max_mappings_exceeded; + } + qemu_sglist_add(&sg->qsg, addr, len); return NVME_SUCCESS; + +max_mappings_exceeded: + NVME_GUEST_ERR(pci_nvme_ub_too_many_mappings, + "number of mappings exceed 1024"); + return NVME_INTERNAL_DEV_ERROR | NVME_DNR; } static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr) @@ -1276,7 +1291,7 @@ static void nvme_post_cqes(void *opaque) if (ret) { trace_pci_nvme_err_addr_write(addr); trace_pci_nvme_err_cfs(); - n->bar.csts = NVME_CSTS_FAILED; + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); break; } QTAILQ_REMOVE(&cq->req_list, req, entry); @@ -4009,7 +4024,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_create_sq_sqid(sqid); return NVME_INVALID_QID | NVME_DNR; } - if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { + if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) { trace_pci_nvme_err_invalid_create_sq_size(qsize); return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; } @@ -4195,7 +4210,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return NVME_INVALID_FIELD | NVME_DNR; } - switch (NVME_CC_CSS(n->bar.cc)) { + switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) { case NVME_CC_CSS_NVM: src_iocs = nvme_cse_iocs_nvm; /* fall through */ @@ -4357,7 +4372,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_create_cq_cqid(cqid); return NVME_INVALID_QID | NVME_DNR; } - if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { + if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) { trace_pci_nvme_err_invalid_create_cq_size(qsize); return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; } @@ -5150,17 +5165,19 @@ static void nvme_update_dmrsl(NvmeCtrl *n) static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns) { + uint32_t cc = ldl_le_p(&n->bar.cc); + ns->iocs = nvme_cse_iocs_none; switch (ns->csi) { case NVME_CSI_NVM: - if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) { + if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) { ns->iocs = nvme_cse_iocs_nvm; } break; case NVME_CSI_ZONED: - if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) { + if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) { ns->iocs = nvme_cse_iocs_zoned; - } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) { + } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) { ns->iocs = nvme_cse_iocs_nvm; } break; @@ -5497,7 +5514,7 @@ static void nvme_process_sq(void *opaque) if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) { trace_pci_nvme_err_addr_read(addr); trace_pci_nvme_err_cfs(); - n->bar.csts = NVME_CSTS_FAILED; + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); break; } nvme_inc_sq_head(sq); @@ -5552,8 +5569,6 @@ static void nvme_ctrl_reset(NvmeCtrl *n) n->aer_queued = 0; n->outstanding_aers = 0; n->qs_created = false; - - n->bar.cc = 0; } static void nvme_ctrl_shutdown(NvmeCtrl *n) @@ -5592,7 +5607,12 @@ static void nvme_select_iocs(NvmeCtrl *n) static int nvme_start_ctrl(NvmeCtrl *n) { - uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; + uint64_t cap = ldq_le_p(&n->bar.cap); + uint32_t cc = ldl_le_p(&n->bar.cc); + uint32_t aqa = ldl_le_p(&n->bar.aqa); + uint64_t asq = ldq_le_p(&n->bar.asq); + uint64_t acq = ldq_le_p(&n->bar.acq); + uint32_t page_bits = NVME_CC_MPS(cc) + 12; uint32_t page_size = 1 << page_bits; if (unlikely(n->cq[0])) { @@ -5603,73 +5623,72 @@ static int nvme_start_ctrl(NvmeCtrl *n) trace_pci_nvme_err_startfail_sq(); return -1; } - if (unlikely(!n->bar.asq)) { + if (unlikely(!asq)) { trace_pci_nvme_err_startfail_nbarasq(); return -1; } - if (unlikely(!n->bar.acq)) { + if (unlikely(!acq)) { trace_pci_nvme_err_startfail_nbaracq(); return -1; } - if (unlikely(n->bar.asq & (page_size - 1))) { - trace_pci_nvme_err_startfail_asq_misaligned(n->bar.asq); + if (unlikely(asq & (page_size - 1))) { + trace_pci_nvme_err_startfail_asq_misaligned(asq); return -1; } - if (unlikely(n->bar.acq & (page_size - 1))) { - trace_pci_nvme_err_startfail_acq_misaligned(n->bar.acq); + if (unlikely(acq & (page_size - 1))) { + trace_pci_nvme_err_startfail_acq_misaligned(acq); return -1; } - if (unlikely(!(NVME_CAP_CSS(n->bar.cap) & (1 << NVME_CC_CSS(n->bar.cc))))) { - trace_pci_nvme_err_startfail_css(NVME_CC_CSS(n->bar.cc)); + if (unlikely(!(NVME_CAP_CSS(cap) & (1 << NVME_CC_CSS(cc))))) { + trace_pci_nvme_err_startfail_css(NVME_CC_CSS(cc)); return -1; } - if (unlikely(NVME_CC_MPS(n->bar.cc) < - NVME_CAP_MPSMIN(n->bar.cap))) { + if (unlikely(NVME_CC_MPS(cc) < NVME_CAP_MPSMIN(cap))) { trace_pci_nvme_err_startfail_page_too_small( - NVME_CC_MPS(n->bar.cc), - NVME_CAP_MPSMIN(n->bar.cap)); + NVME_CC_MPS(cc), + NVME_CAP_MPSMIN(cap)); return -1; } - if (unlikely(NVME_CC_MPS(n->bar.cc) > - NVME_CAP_MPSMAX(n->bar.cap))) { + if (unlikely(NVME_CC_MPS(cc) > + NVME_CAP_MPSMAX(cap))) { trace_pci_nvme_err_startfail_page_too_large( - NVME_CC_MPS(n->bar.cc), - NVME_CAP_MPSMAX(n->bar.cap)); + NVME_CC_MPS(cc), + NVME_CAP_MPSMAX(cap)); return -1; } - if (unlikely(NVME_CC_IOCQES(n->bar.cc) < + if (unlikely(NVME_CC_IOCQES(cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { trace_pci_nvme_err_startfail_cqent_too_small( - NVME_CC_IOCQES(n->bar.cc), - NVME_CTRL_CQES_MIN(n->bar.cap)); + NVME_CC_IOCQES(cc), + NVME_CTRL_CQES_MIN(cap)); return -1; } - if (unlikely(NVME_CC_IOCQES(n->bar.cc) > + if (unlikely(NVME_CC_IOCQES(cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { trace_pci_nvme_err_startfail_cqent_too_large( - NVME_CC_IOCQES(n->bar.cc), - NVME_CTRL_CQES_MAX(n->bar.cap)); + NVME_CC_IOCQES(cc), + NVME_CTRL_CQES_MAX(cap)); return -1; } - if (unlikely(NVME_CC_IOSQES(n->bar.cc) < + if (unlikely(NVME_CC_IOSQES(cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { trace_pci_nvme_err_startfail_sqent_too_small( - NVME_CC_IOSQES(n->bar.cc), - NVME_CTRL_SQES_MIN(n->bar.cap)); + NVME_CC_IOSQES(cc), + NVME_CTRL_SQES_MIN(cap)); return -1; } - if (unlikely(NVME_CC_IOSQES(n->bar.cc) > + if (unlikely(NVME_CC_IOSQES(cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { trace_pci_nvme_err_startfail_sqent_too_large( - NVME_CC_IOSQES(n->bar.cc), - NVME_CTRL_SQES_MAX(n->bar.cap)); + NVME_CC_IOSQES(cc), + NVME_CTRL_SQES_MAX(cap)); return -1; } - if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { + if (unlikely(!NVME_AQA_ASQS(aqa))) { trace_pci_nvme_err_startfail_asqent_sz_zero(); return -1; } - if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { + if (unlikely(!NVME_AQA_ACQS(aqa))) { trace_pci_nvme_err_startfail_acqent_sz_zero(); return -1; } @@ -5677,12 +5696,10 @@ static int nvme_start_ctrl(NvmeCtrl *n) n->page_bits = page_bits; n->page_size = page_size; n->max_prp_ents = n->page_size / sizeof(uint64_t); - n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc); - n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc); - nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0, - NVME_AQA_ACQS(n->bar.aqa) + 1, 1); - nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, - NVME_AQA_ASQS(n->bar.aqa) + 1); + n->cqe_size = 1 << NVME_CC_IOCQES(cc); + n->sqe_size = 1 << NVME_CC_IOSQES(cc); + nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1); + nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1); nvme_set_timestamp(n, 0ULL); @@ -5695,22 +5712,33 @@ static int nvme_start_ctrl(NvmeCtrl *n) static void nvme_cmb_enable_regs(NvmeCtrl *n) { - NVME_CMBLOC_SET_CDPCILS(n->bar.cmbloc, 1); - NVME_CMBLOC_SET_CDPMLS(n->bar.cmbloc, 1); - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, NVME_CMB_BIR); + uint32_t cmbloc = ldl_le_p(&n->bar.cmbloc); + uint32_t cmbsz = ldl_le_p(&n->bar.cmbsz); + + NVME_CMBLOC_SET_CDPCILS(cmbloc, 1); + NVME_CMBLOC_SET_CDPMLS(cmbloc, 1); + NVME_CMBLOC_SET_BIR(cmbloc, NVME_CMB_BIR); + stl_le_p(&n->bar.cmbloc, cmbloc); - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); + NVME_CMBSZ_SET_SQS(cmbsz, 1); + NVME_CMBSZ_SET_CQS(cmbsz, 0); + NVME_CMBSZ_SET_LISTS(cmbsz, 1); + NVME_CMBSZ_SET_RDS(cmbsz, 1); + NVME_CMBSZ_SET_WDS(cmbsz, 1); + NVME_CMBSZ_SET_SZU(cmbsz, 2); /* MBs */ + NVME_CMBSZ_SET_SZ(cmbsz, n->params.cmb_size_mb); + stl_le_p(&n->bar.cmbsz, cmbsz); } static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, unsigned size) { + uint64_t cap = ldq_le_p(&n->bar.cap); + uint32_t cc = ldl_le_p(&n->bar.cc); + uint32_t intms = ldl_le_p(&n->bar.intms); + uint32_t csts = ldl_le_p(&n->bar.csts); + uint32_t pmrsts = ldl_le_p(&n->bar.pmrsts); + if (unlikely(offset & (sizeof(uint32_t) - 1))) { NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32, "MMIO write not 32-bit aligned," @@ -5727,65 +5755,77 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } switch (offset) { - case 0xc: /* INTMS */ + case NVME_REG_INTMS: if (unlikely(msix_enabled(&(n->parent_obj)))) { NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, "undefined access to interrupt mask set" " when MSI-X is enabled"); /* should be ignored, fall through for now */ } - n->bar.intms |= data & 0xffffffff; + intms |= data; + stl_le_p(&n->bar.intms, intms); n->bar.intmc = n->bar.intms; - trace_pci_nvme_mmio_intm_set(data & 0xffffffff, n->bar.intmc); + trace_pci_nvme_mmio_intm_set(data & 0xffffffff, intms); nvme_irq_check(n); break; - case 0x10: /* INTMC */ + case NVME_REG_INTMC: if (unlikely(msix_enabled(&(n->parent_obj)))) { NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, "undefined access to interrupt mask clr" " when MSI-X is enabled"); /* should be ignored, fall through for now */ } - n->bar.intms &= ~(data & 0xffffffff); + intms &= ~data; + stl_le_p(&n->bar.intms, intms); n->bar.intmc = n->bar.intms; - trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, n->bar.intmc); + trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, intms); nvme_irq_check(n); break; - case 0x14: /* CC */ + case NVME_REG_CC: trace_pci_nvme_mmio_cfg(data & 0xffffffff); + /* Windows first sends data, then sends enable bit */ - if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && - !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) + if (!NVME_CC_EN(data) && !NVME_CC_EN(cc) && + !NVME_CC_SHN(data) && !NVME_CC_SHN(cc)) { - n->bar.cc = data; + cc = data; } - if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { - n->bar.cc = data; + if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) { + cc = data; + + /* flush CC since nvme_start_ctrl() needs the value */ + stl_le_p(&n->bar.cc, cc); if (unlikely(nvme_start_ctrl(n))) { trace_pci_nvme_err_startfail(); - n->bar.csts = NVME_CSTS_FAILED; + csts = NVME_CSTS_FAILED; } else { trace_pci_nvme_mmio_start_success(); - n->bar.csts = NVME_CSTS_READY; + csts = NVME_CSTS_READY; } - } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { + } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) { trace_pci_nvme_mmio_stopped(); nvme_ctrl_reset(n); - n->bar.csts &= ~NVME_CSTS_READY; + cc = 0; + csts &= ~NVME_CSTS_READY; } - if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { + + if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) { trace_pci_nvme_mmio_shutdown_set(); nvme_ctrl_shutdown(n); - n->bar.cc = data; - n->bar.csts |= NVME_CSTS_SHST_COMPLETE; - } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { + cc = data; + csts |= NVME_CSTS_SHST_COMPLETE; + } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) { trace_pci_nvme_mmio_shutdown_cleared(); - n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; - n->bar.cc = data; + csts &= ~NVME_CSTS_SHST_COMPLETE; + cc = data; } + + stl_le_p(&n->bar.cc, cc); + stl_le_p(&n->bar.csts, csts); + break; - case 0x1c: /* CSTS */ + case NVME_REG_CSTS: if (data & (1 << 4)) { NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported, "attempted to W1C CSTS.NSSRO" @@ -5796,7 +5836,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, " of controller status"); } break; - case 0x20: /* NSSR */ + case NVME_REG_NSSR: if (data == 0x4e564d65) { trace_pci_nvme_ub_mmiowr_ssreset_unsupported(); } else { @@ -5804,53 +5844,53 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, return; } break; - case 0x24: /* AQA */ - n->bar.aqa = data & 0xffffffff; + case NVME_REG_AQA: + stl_le_p(&n->bar.aqa, data); trace_pci_nvme_mmio_aqattr(data & 0xffffffff); break; - case 0x28: /* ASQ */ - n->bar.asq = size == 8 ? data : - (n->bar.asq & ~0xffffffffULL) | (data & 0xffffffff); + case NVME_REG_ASQ: + stn_le_p(&n->bar.asq, size, data); trace_pci_nvme_mmio_asqaddr(data); break; - case 0x2c: /* ASQ hi */ - n->bar.asq = (n->bar.asq & 0xffffffff) | (data << 32); - trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq); + case NVME_REG_ASQ + 4: + stl_le_p((uint8_t *)&n->bar.asq + 4, data); + trace_pci_nvme_mmio_asqaddr_hi(data, ldq_le_p(&n->bar.asq)); break; - case 0x30: /* ACQ */ + case NVME_REG_ACQ: trace_pci_nvme_mmio_acqaddr(data); - n->bar.acq = size == 8 ? data : - (n->bar.acq & ~0xffffffffULL) | (data & 0xffffffff); + stn_le_p(&n->bar.acq, size, data); break; - case 0x34: /* ACQ hi */ - n->bar.acq = (n->bar.acq & 0xffffffff) | (data << 32); - trace_pci_nvme_mmio_acqaddr_hi(data, n->bar.acq); + case NVME_REG_ACQ + 4: + stl_le_p((uint8_t *)&n->bar.acq + 4, data); + trace_pci_nvme_mmio_acqaddr_hi(data, ldq_le_p(&n->bar.acq)); break; - case 0x38: /* CMBLOC */ + case NVME_REG_CMBLOC: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved, "invalid write to reserved CMBLOC" " when CMBSZ is zero, ignored"); return; - case 0x3C: /* CMBSZ */ + case NVME_REG_CMBSZ: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly, "invalid write to read only CMBSZ, ignored"); return; - case 0x50: /* CMBMSC */ - if (!NVME_CAP_CMBS(n->bar.cap)) { + case NVME_REG_CMBMSC: + if (!NVME_CAP_CMBS(cap)) { return; } - n->bar.cmbmsc = size == 8 ? data : - (n->bar.cmbmsc & ~0xffffffff) | (data & 0xffffffff); + stn_le_p(&n->bar.cmbmsc, size, data); n->cmb.cmse = false; if (NVME_CMBMSC_CRE(data)) { nvme_cmb_enable_regs(n); if (NVME_CMBMSC_CMSE(data)) { - hwaddr cba = NVME_CMBMSC_CBA(data) << CMBMSC_CBA_SHIFT; + uint64_t cmbmsc = ldq_le_p(&n->bar.cmbmsc); + hwaddr cba = NVME_CMBMSC_CBA(cmbmsc) << CMBMSC_CBA_SHIFT; if (cba + int128_get64(n->cmb.mem.size) < cba) { - NVME_CMBSTS_SET_CBAI(n->bar.cmbsts, 1); + uint32_t cmbsts = ldl_le_p(&n->bar.cmbsts); + NVME_CMBSTS_SET_CBAI(cmbsts, 1); + stl_le_p(&n->bar.cmbsts, cmbsts); return; } @@ -5863,53 +5903,57 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } return; - case 0x54: /* CMBMSC hi */ - n->bar.cmbmsc = (n->bar.cmbmsc & 0xffffffff) | (data << 32); + case NVME_REG_CMBMSC + 4: + stl_le_p((uint8_t *)&n->bar.cmbmsc + 4, data); return; - case 0xe00: /* PMRCAP */ + case NVME_REG_PMRCAP: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly, "invalid write to PMRCAP register, ignored"); return; - case 0xe04: /* PMRCTL */ - if (!NVME_CAP_PMRS(n->bar.cap)) { + case NVME_REG_PMRCTL: + if (!NVME_CAP_PMRS(cap)) { return; } - n->bar.pmrctl = data; + stl_le_p(&n->bar.pmrctl, data); if (NVME_PMRCTL_EN(data)) { memory_region_set_enabled(&n->pmr.dev->mr, true); - n->bar.pmrsts = 0; + pmrsts = 0; } else { memory_region_set_enabled(&n->pmr.dev->mr, false); - NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 1); + NVME_PMRSTS_SET_NRDY(pmrsts, 1); n->pmr.cmse = false; } + stl_le_p(&n->bar.pmrsts, pmrsts); return; - case 0xe08: /* PMRSTS */ + case NVME_REG_PMRSTS: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, "invalid write to PMRSTS register, ignored"); return; - case 0xe0C: /* PMREBS */ + case NVME_REG_PMREBS: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly, "invalid write to PMREBS register, ignored"); return; - case 0xe10: /* PMRSWTP */ + case NVME_REG_PMRSWTP: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly, "invalid write to PMRSWTP register, ignored"); return; - case 0xe14: /* PMRMSCL */ - if (!NVME_CAP_PMRS(n->bar.cap)) { + case NVME_REG_PMRMSCL: + if (!NVME_CAP_PMRS(cap)) { return; } - n->bar.pmrmsc = (n->bar.pmrmsc & ~0xffffffff) | (data & 0xffffffff); + stl_le_p(&n->bar.pmrmscl, data); n->pmr.cmse = false; - if (NVME_PMRMSC_CMSE(n->bar.pmrmsc)) { - hwaddr cba = NVME_PMRMSC_CBA(n->bar.pmrmsc) << PMRMSC_CBA_SHIFT; + if (NVME_PMRMSCL_CMSE(data)) { + uint64_t pmrmscu = ldl_le_p(&n->bar.pmrmscu); + hwaddr cba = pmrmscu << 32 | + (NVME_PMRMSCL_CBA(data) << PMRMSCL_CBA_SHIFT); if (cba + int128_get64(n->pmr.dev->mr.size) < cba) { - NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 1); + NVME_PMRSTS_SET_CBAI(pmrsts, 1); + stl_le_p(&n->bar.pmrsts, pmrsts); return; } @@ -5918,12 +5962,12 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } return; - case 0xe18: /* PMRMSCU */ - if (!NVME_CAP_PMRS(n->bar.cap)) { + case NVME_REG_PMRMSCU: + if (!NVME_CAP_PMRS(cap)) { return; } - n->bar.pmrmsc = (n->bar.pmrmsc & 0xffffffff) | (data << 32); + stl_le_p(&n->bar.pmrmscu, data); return; default: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid, @@ -5938,7 +5982,6 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) { NvmeCtrl *n = (NvmeCtrl *)opaque; uint8_t *ptr = (uint8_t *)&n->bar; - uint64_t val = 0; trace_pci_nvme_mmio_read(addr, size); @@ -5954,24 +5997,25 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) /* should RAZ, fall through for now */ } - if (addr < sizeof(n->bar)) { - /* - * When PMRWBM bit 1 is set then read from - * from PMRSTS should ensure prior writes - * made it to persistent media - */ - if (addr == 0xe08 && - (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { - memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); - } - memcpy(&val, ptr + addr, size); - } else { + if (addr > sizeof(n->bar) - size) { NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs, "MMIO read beyond last register," " offset=0x%"PRIx64", returning 0", addr); + + return 0; + } + + /* + * When PMRWBM bit 1 is set then read from + * from PMRSTS should ensure prior writes + * made it to persistent media + */ + if (addr == NVME_REG_PMRSTS && + (NVME_PMRCAP_PMRWBM(ldl_le_p(&n->bar.pmrcap)) & 0x02)) { + memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); } - return val; + return ldn_le_p(ptr + addr, size); } static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) @@ -6229,6 +6273,7 @@ static void nvme_init_state(NvmeCtrl *n) static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) { uint64_t cmb_size = n->params.cmb_size_mb * MiB; + uint64_t cap = ldq_le_p(&n->bar.cap); n->cmb.buf = g_malloc0(cmb_size); memory_region_init_io(&n->cmb.mem, OBJECT(n), &nvme_cmb_ops, n, @@ -6238,7 +6283,8 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, &n->cmb.mem); - NVME_CAP_SET_CMBS(n->bar.cap, 1); + NVME_CAP_SET_CMBS(cap, 1); + stq_le_p(&n->bar.cap, cap); if (n->params.legacy_cmb) { nvme_cmb_enable_regs(n); @@ -6248,14 +6294,17 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) { - NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 1); - NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 1); - NVME_PMRCAP_SET_BIR(n->bar.pmrcap, NVME_PMR_BIR); + uint32_t pmrcap = ldl_le_p(&n->bar.pmrcap); + + NVME_PMRCAP_SET_RDS(pmrcap, 1); + NVME_PMRCAP_SET_WDS(pmrcap, 1); + NVME_PMRCAP_SET_BIR(pmrcap, NVME_PMR_BIR); /* Turn on bit 1 support */ - NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); - NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 1); + NVME_PMRCAP_SET_PMRWBM(pmrcap, 0x02); + NVME_PMRCAP_SET_CMSS(pmrcap, 1); + stl_le_p(&n->bar.pmrcap, pmrcap); - pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), + pci_register_bar(pci_dev, NVME_PMR_BIR, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmr.dev->mr); @@ -6345,6 +6394,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) { NvmeIdCtrl *id = &n->id_ctrl; uint8_t *pci_conf = pci_dev->config; + uint64_t cap = ldq_le_p(&n->bar.cap); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -6423,17 +6473,18 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->cmic |= NVME_CMIC_MULTI_CTRL; } - NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); - NVME_CAP_SET_CQR(n->bar.cap, 1); - NVME_CAP_SET_TO(n->bar.cap, 0xf); - NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_NVM); - NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_CSI_SUPP); - NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_ADMIN_ONLY); - NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - NVME_CAP_SET_CMBS(n->bar.cap, n->params.cmb_size_mb ? 1 : 0); - NVME_CAP_SET_PMRS(n->bar.cap, n->pmr.dev ? 1 : 0); + NVME_CAP_SET_MQES(cap, 0x7ff); + NVME_CAP_SET_CQR(cap, 1); + NVME_CAP_SET_TO(cap, 0xf); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY); + NVME_CAP_SET_MPSMAX(cap, 4); + NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0); + NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0); + stq_le_p(&n->bar.cap, cap); - n->bar.vs = NVME_SPEC_VER; + stl_le_p(&n->bar.vs, NVME_SPEC_VER); n->bar.intmc = n->bar.intms = 0; } @@ -6498,7 +6549,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) ns = &n->namespace; ns->params.nsid = 1; - if (nvme_ns_setup(n, ns, errp)) { + if (nvme_ns_setup(ns, errp)) { return; } @@ -6514,13 +6565,15 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_ctrl_reset(n); - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; + if (n->subsys) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (ns) { + ns->attached--; + } } - nvme_ns_cleanup(ns); + nvme_subsys_unregister_ctrl(n->subsys, n); } g_free(n->cq); @@ -6582,7 +6635,7 @@ static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name, cap = NVME_SMART_SPARE | NVME_SMART_TEMPERATURE | NVME_SMART_RELIABILITY | NVME_SMART_MEDIA_READ_ONLY | NVME_SMART_FAILED_VOLATILE_MEDIA; - if (NVME_CAP_PMRS(n->bar.cap)) { + if (NVME_CAP_PMRS(ldq_le_p(&n->bar.cap))) { cap |= NVME_SMART_PMR_UNRELIABLE; } diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 4275c3db63..b7cf1494e7 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -346,8 +346,7 @@ static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) assert(ns->nr_open_zones == 0); } -static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, - Error **errp) +static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) { if (!ns->blkconf.blk) { error_setg(errp, "block backend not configured"); @@ -366,20 +365,6 @@ static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, return -1; } - if (!n->subsys) { - if (ns->params.detached) { - error_setg(errp, "detached requires that the nvme device is " - "linked to an nvme-subsys device"); - return -1; - } - - if (ns->params.shared) { - error_setg(errp, "shared requires that the nvme device is " - "linked to an nvme-subsys device"); - return -1; - } - } - if (ns->params.zoned) { if (ns->params.max_active_zones) { if (ns->params.max_open_zones > ns->params.max_active_zones) { @@ -411,9 +396,9 @@ static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, return 0; } -int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +int nvme_ns_setup(NvmeNamespace *ns, Error **errp) { - if (nvme_ns_check_constraints(n, ns, errp)) { + if (nvme_ns_check_constraints(ns, errp)) { return -1; } @@ -456,6 +441,15 @@ void nvme_ns_cleanup(NvmeNamespace *ns) } } +static void nvme_ns_unrealize(DeviceState *dev) +{ + NvmeNamespace *ns = NVME_NS(dev); + + nvme_ns_drain(ns); + nvme_ns_shutdown(ns); + nvme_ns_cleanup(ns); +} + static void nvme_ns_realize(DeviceState *dev, Error **errp) { NvmeNamespace *ns = NVME_NS(dev); @@ -465,7 +459,29 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) uint32_t nsid = ns->params.nsid; int i; - if (nvme_ns_setup(n, ns, errp)) { + if (!n->subsys) { + if (ns->params.detached) { + error_setg(errp, "detached requires that the nvme device is " + "linked to an nvme-subsys device"); + return; + } + + if (ns->params.shared) { + error_setg(errp, "shared requires that the nvme device is " + "linked to an nvme-subsys device"); + return; + } + } else { + /* + * If this namespace belongs to a subsystem (through a link on the + * controller device), reparent the device. + */ + if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) { + return; + } + } + + if (nvme_ns_setup(ns, errp)) { return; } @@ -553,6 +569,7 @@ static void nvme_ns_class_init(ObjectClass *oc, void *data) dc->bus_type = TYPE_NVME_BUS; dc->realize = nvme_ns_realize; + dc->unrealize = nvme_ns_unrealize; device_class_set_props(dc, nvme_ns_props); dc->desc = "Virtual NVMe namespace"; } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 56f8eceed2..83ffabade4 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -33,12 +33,20 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); typedef struct NvmeCtrl NvmeCtrl; typedef struct NvmeNamespace NvmeNamespace; +#define TYPE_NVME_BUS "nvme-bus" +OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS) + +typedef struct NvmeBus { + BusState parent_bus; +} NvmeBus; + #define TYPE_NVME_SUBSYS "nvme-subsys" #define NVME_SUBSYS(obj) \ OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) typedef struct NvmeSubsystem { DeviceState parent_obj; + NvmeBus bus; uint8_t subnqn[256]; NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; @@ -50,6 +58,7 @@ typedef struct NvmeSubsystem { } NvmeSubsystem; int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); +void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n); static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, uint32_t cntlid) @@ -246,7 +255,7 @@ static inline void nvme_aor_dec_active(NvmeNamespace *ns) } void nvme_ns_init_format(NvmeNamespace *ns); -int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); +int nvme_ns_setup(NvmeNamespace *ns, Error **errp); void nvme_ns_drain(NvmeNamespace *ns); void nvme_ns_shutdown(NvmeNamespace *ns); void nvme_ns_cleanup(NvmeNamespace *ns); @@ -364,13 +373,6 @@ typedef struct NvmeCQueue { QTAILQ_HEAD(, NvmeRequest) req_list; } NvmeCQueue; -#define TYPE_NVME_BUS "nvme-bus" -#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) - -typedef struct NvmeBus { - BusState parent_bus; -} NvmeBus; - #define TYPE_NVME "nvme" #define NVME(obj) \ OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index 192223d17c..93c35950d6 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -32,6 +32,11 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) return cntlid; } +void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n) +{ + subsys->ctrls[n->cntlid] = NULL; +} + static void nvme_subsys_setup(NvmeSubsystem *subsys) { const char *nqn = subsys->params.nqn ? @@ -45,6 +50,9 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp) { NvmeSubsystem *subsys = NVME_SUBSYS(dev); + qbus_create_inplace(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, + dev->id); + nvme_subsys_setup(subsys); } @@ -61,6 +69,7 @@ static void nvme_subsys_class_init(ObjectClass *oc, void *data) dc->realize = nvme_subsys_realize; dc->desc = "Virtual NVMe subsystem"; + dc->hotpluggable = false; device_class_set_props(dc, nvme_subsystem_props); } diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index f9a1f14e26..430eeb395b 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -199,3 +199,4 @@ pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion qu pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field" +pci_nvme_ub_too_many_mappings(void) "too many prp/sgl mappings" diff --git a/include/block/nvme.h b/include/block/nvme.h index 527105fafc..77aae01174 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -9,7 +9,7 @@ typedef struct QEMU_PACKED NvmeBar { uint32_t cc; uint8_t rsvd24[4]; uint32_t csts; - uint32_t nssrc; + uint32_t nssr; uint32_t aqa; uint64_t asq; uint64_t acq; @@ -26,10 +26,38 @@ typedef struct QEMU_PACKED NvmeBar { uint32_t pmrsts; uint32_t pmrebs; uint32_t pmrswtp; - uint64_t pmrmsc; + uint32_t pmrmscl; + uint32_t pmrmscu; uint8_t css[484]; } NvmeBar; +enum NvmeBarRegs { + NVME_REG_CAP = offsetof(NvmeBar, cap), + NVME_REG_VS = offsetof(NvmeBar, vs), + NVME_REG_INTMS = offsetof(NvmeBar, intms), + NVME_REG_INTMC = offsetof(NvmeBar, intmc), + NVME_REG_CC = offsetof(NvmeBar, cc), + NVME_REG_CSTS = offsetof(NvmeBar, csts), + NVME_REG_NSSR = offsetof(NvmeBar, nssr), + NVME_REG_AQA = offsetof(NvmeBar, aqa), + NVME_REG_ASQ = offsetof(NvmeBar, asq), + NVME_REG_ACQ = offsetof(NvmeBar, acq), + NVME_REG_CMBLOC = offsetof(NvmeBar, cmbloc), + NVME_REG_CMBSZ = offsetof(NvmeBar, cmbsz), + NVME_REG_BPINFO = offsetof(NvmeBar, bpinfo), + NVME_REG_BPRSEL = offsetof(NvmeBar, bprsel), + NVME_REG_BPMBL = offsetof(NvmeBar, bpmbl), + NVME_REG_CMBMSC = offsetof(NvmeBar, cmbmsc), + NVME_REG_CMBSTS = offsetof(NvmeBar, cmbsts), + NVME_REG_PMRCAP = offsetof(NvmeBar, pmrcap), + NVME_REG_PMRCTL = offsetof(NvmeBar, pmrctl), + NVME_REG_PMRSTS = offsetof(NvmeBar, pmrsts), + NVME_REG_PMREBS = offsetof(NvmeBar, pmrebs), + NVME_REG_PMRSWTP = offsetof(NvmeBar, pmrswtp), + NVME_REG_PMRMSCL = offsetof(NvmeBar, pmrmscl), + NVME_REG_PMRMSCU = offsetof(NvmeBar, pmrmscu), +}; + enum NvmeCapShift { CAP_MQES_SHIFT = 0, CAP_CQR_SHIFT = 16, @@ -475,25 +503,25 @@ enum NvmePmrswtpMask { #define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT) -enum NvmePmrmscShift { - PMRMSC_CMSE_SHIFT = 1, - PMRMSC_CBA_SHIFT = 12, +enum NvmePmrmsclShift { + PMRMSCL_CMSE_SHIFT = 1, + PMRMSCL_CBA_SHIFT = 12, }; -enum NvmePmrmscMask { - PMRMSC_CMSE_MASK = 0x1, - PMRMSC_CBA_MASK = 0xfffffffffffff, +enum NvmePmrmsclMask { + PMRMSCL_CMSE_MASK = 0x1, + PMRMSCL_CBA_MASK = 0xfffff, }; -#define NVME_PMRMSC_CMSE(pmrmsc) \ - ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK) -#define NVME_PMRMSC_CBA(pmrmsc) \ - ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK) +#define NVME_PMRMSCL_CMSE(pmrmscl) \ + ((pmrmscl >> PMRMSCL_CMSE_SHIFT) & PMRMSCL_CMSE_MASK) +#define NVME_PMRMSCL_CBA(pmrmscl) \ + ((pmrmscl >> PMRMSCL_CBA_SHIFT) & PMRMSCL_CBA_MASK) -#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \ - (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT) -#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \ - (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT) +#define NVME_PMRMSCL_SET_CMSE(pmrmscl, val) \ + (pmrmscl |= (uint32_t)(val & PMRMSCL_CMSE_MASK) << PMRMSCL_CMSE_SHIFT) +#define NVME_PMRMSCL_SET_CBA(pmrmscl, val) \ + (pmrmscl |= (uint32_t)(val & PMRMSCL_CBA_MASK) << PMRMSCL_CBA_SHIFT) enum NvmeSglDescriptorType { NVME_SGL_DESCR_TYPE_DATA_BLOCK = 0x0, diff --git a/migration/channel.c b/migration/channel.c index 01275a9162..c4fc000a1a 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -44,13 +44,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) TYPE_QIO_CHANNEL_TLS)) { migration_tls_channel_process_incoming(s, ioc, &local_err); } else { - if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { - yank_register_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(ioc)); - } - + migration_ioc_register_yank(ioc); migration_ioc_process_incoming(ioc, &local_err); } @@ -94,12 +88,7 @@ void migration_channel_connect(MigrationState *s, } else { QEMUFile *f = qemu_fopen_channel_output(ioc); - if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { - yank_register_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(ioc)); - } + migration_ioc_register_yank(ioc); qemu_mutex_lock(&s->qemu_file_lock); s->to_dst_file = f; diff --git a/migration/migration.c b/migration/migration.c index 2d306582eb..041b8451a6 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -59,6 +59,7 @@ #include "multifd.h" #include "qemu/yank.h" #include "sysemu/cpus.h" +#include "yank_functions.h" #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ @@ -273,6 +274,7 @@ void migration_incoming_state_destroy(void) } if (mis->from_src_file) { + migration_ioc_unregister_yank_from_file(mis->from_src_file); qemu_fclose(mis->from_src_file); mis->from_src_file = NULL; } @@ -1811,6 +1813,7 @@ static void migrate_fd_cleanup(MigrationState *s) * Close the file handle without the lock to make sure the * critical section won't block for long. */ + migration_ioc_unregister_yank_from_file(tmp); qemu_fclose(tmp); } @@ -1879,9 +1882,11 @@ static void migrate_fd_cancel(MigrationState *s) QEMUFile *f = migrate_get_current()->to_dst_file; trace_migrate_fd_cancel(); - if (s->rp_state.from_dst_file) { - /* shutdown the rp socket, so causing the rp thread to shutdown */ - qemu_file_shutdown(s->rp_state.from_dst_file); + WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { + if (s->rp_state.from_dst_file) { + /* shutdown the rp socket, so causing the rp thread to shutdown */ + qemu_file_shutdown(s->rp_state.from_dst_file); + } } do { @@ -2686,6 +2691,23 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) return 0; } +/* Release ms->rp_state.from_dst_file in a safe way */ +static void migration_release_from_dst_file(MigrationState *ms) +{ + QEMUFile *file; + + WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { + /* + * Reset the from_dst_file pointer first before releasing it, as we + * can't block within lock section + */ + file = ms->rp_state.from_dst_file; + ms->rp_state.from_dst_file = NULL; + } + + qemu_fclose(file); +} + /* * Handles messages sent on the return path towards the source VM * @@ -2827,11 +2849,13 @@ out: * Maybe there is something we can do: it looks like a * network down issue, and we pause for a recovery. */ - qemu_fclose(rp); - ms->rp_state.from_dst_file = NULL; + migration_release_from_dst_file(ms); rp = NULL; if (postcopy_pause_return_path_thread(ms)) { - /* Reload rp, reset the rest */ + /* + * Reload rp, reset the rest. Referencing it is safe since + * it's reset only by us above, or when migration completes + */ rp = ms->rp_state.from_dst_file; ms->rp_state.error = false; goto retry; @@ -2843,8 +2867,7 @@ out: } trace_source_return_path_thread_end(); - ms->rp_state.from_dst_file = NULL; - qemu_fclose(rp); + migration_release_from_dst_file(ms); rcu_unregister_thread(); return NULL; } @@ -2852,7 +2875,6 @@ out: static int open_return_path_on_source(MigrationState *ms, bool create_thread) { - ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); if (!ms->rp_state.from_dst_file) { return -1; @@ -2867,6 +2889,7 @@ static int open_return_path_on_source(MigrationState *ms, qemu_thread_create(&ms->rp_state.rp_thread, "return path", source_return_path_thread, ms, QEMU_THREAD_JOINABLE); + ms->rp_state.rp_thread_created = true; trace_open_return_path_on_source_continue(); @@ -2891,6 +2914,7 @@ static int await_return_path_close_on_source(MigrationState *ms) } trace_await_return_path_close_on_source_joining(); qemu_thread_join(&ms->rp_state.rp_thread); + ms->rp_state.rp_thread_created = false; trace_await_return_path_close_on_source_close(); return ms->rp_state.error; } @@ -3170,7 +3194,7 @@ static void migration_completion(MigrationState *s) * it will wait for the destination to send it's status in * a SHUT command). */ - if (s->rp_state.from_dst_file) { + if (s->rp_state.rp_thread_created) { int rp_error; trace_migration_return_path_end_before(); rp_error = await_return_path_close_on_source(s); @@ -3330,8 +3354,17 @@ static MigThrError postcopy_pause(MigrationState *s) while (true) { QEMUFile *file; - /* Current channel is possibly broken. Release it. */ + /* + * Current channel is possibly broken. Release it. Note that this is + * guaranteed even without lock because to_dst_file should only be + * modified by the migration thread. That also guarantees that the + * unregister of yank is safe too without the lock. It should be safe + * even to be within the qemu_file_lock, but we didn't do that to avoid + * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make + * the qemu_file_lock critical section as small as possible. + */ assert(s->to_dst_file); + migration_ioc_unregister_yank_from_file(s->to_dst_file); qemu_mutex_lock(&s->qemu_file_lock); file = s->to_dst_file; s->to_dst_file = NULL; @@ -3744,7 +3777,7 @@ static void *migration_thread(void *opaque) * If we opened the return path, we need to make sure dst has it * opened as well. */ - if (s->rp_state.from_dst_file) { + if (s->rp_state.rp_thread_created) { /* Now tell the dest that it should open its end so it can reply */ qemu_savevm_send_open_return_path(s->to_dst_file); diff --git a/migration/migration.h b/migration/migration.h index 2ebb740dfa..7a5aa8c2fd 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -154,12 +154,13 @@ struct MigrationState { QemuThread thread; QEMUBH *vm_start_bh; QEMUBH *cleanup_bh; + /* Protected by qemu_file_lock */ QEMUFile *to_dst_file; QIOChannelBuffer *bioc; /* - * Protects to_dst_file pointer. We need to make sure we won't - * yield or hang during the critical section, since this lock will - * be used in OOB command handler. + * Protects to_dst_file/from_dst_file pointers. We need to make sure we + * won't yield or hang during the critical section, since this lock will be + * used in OOB command handler. */ QemuMutex qemu_file_lock; @@ -192,9 +193,17 @@ struct MigrationState { /* State related to return path */ struct { + /* Protected by qemu_file_lock */ QEMUFile *from_dst_file; QemuThread rp_thread; bool error; + /* + * We can also check non-zero of rp_thread, but there's no "official" + * way to do this, so this bool makes it slightly more elegant. + * Checking from_dst_file for this is racy because from_dst_file will + * be cleared in the rp_thread! + */ + bool rp_thread_created; QemuSemaphore rp_sem; } rp_state; diff --git a/migration/multifd.c b/migration/multifd.c index ab41590e71..377da78f5b 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -987,12 +987,8 @@ int multifd_load_cleanup(Error **errp) for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDRecvParams *p = &multifd_recv_state->params[i]; - if ((object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_TLS)) - && OBJECT(p->c)->ref == 1) { - yank_unregister_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(p->c)); + if (OBJECT(p->c)->ref == 1) { + migration_ioc_unregister_yank(p->c); } object_unref(OBJECT(p->c)); diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c index fad340ea7a..bb5a5752df 100644 --- a/migration/qemu-file-channel.c +++ b/migration/qemu-file-channel.c @@ -107,13 +107,6 @@ static int channel_close(void *opaque, Error **errp) int ret; QIOChannel *ioc = QIO_CHANNEL(opaque); ret = qio_channel_close(ioc, errp); - if ((object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) - && OBJECT(ioc)->ref == 1) { - yank_unregister_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(ioc)); - } object_unref(OBJECT(ioc)); return ret; } @@ -191,11 +184,11 @@ static const QEMUFileOps channel_output_ops = { QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc) { object_ref(OBJECT(ioc)); - return qemu_fopen_ops(ioc, &channel_input_ops); + return qemu_fopen_ops(ioc, &channel_input_ops, true); } QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc) { object_ref(OBJECT(ioc)); - return qemu_fopen_ops(ioc, &channel_output_ops); + return qemu_fopen_ops(ioc, &channel_output_ops, true); } diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 1eacf9e831..6338d8e2ff 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -55,6 +55,8 @@ struct QEMUFile { Error *last_error_obj; /* has the file has been shutdown */ bool shutdown; + /* Whether opaque points to a QIOChannel */ + bool has_ioc; }; /* @@ -101,7 +103,7 @@ bool qemu_file_mode_is_not_valid(const char *mode) return false; } -QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) +QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc) { QEMUFile *f; @@ -109,6 +111,7 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) f->opaque = opaque; f->ops = ops; + f->has_ioc = has_ioc; return f; } @@ -851,3 +854,15 @@ void qemu_file_set_blocking(QEMUFile *f, bool block) f->ops->set_blocking(f->opaque, block, NULL); } } + +/* + * Return the ioc object if it's a migration channel. Note: it can return NULL + * for callers passing in a non-migration qemufile. E.g. see qemu_fopen_bdrv() + * and its usage in e.g. load_snapshot(). So we need to check against NULL + * before using it. If without the check, migration_incoming_state_destroy() + * could fail for load_snapshot(). + */ +QIOChannel *qemu_file_get_ioc(QEMUFile *file) +{ + return file->has_ioc ? QIO_CHANNEL(file->opaque) : NULL; +} diff --git a/migration/qemu-file.h b/migration/qemu-file.h index a9b6d6ccb7..3f36d4dc8c 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -27,6 +27,7 @@ #include <zlib.h> #include "exec/cpu-common.h" +#include "io/channel.h" /* Read a chunk of data from a file at the given position. The pos argument * can be ignored if the file is only be used for streaming. The number of @@ -119,7 +120,7 @@ typedef struct QEMUFileHooks { QEMURamSaveFunc *save_page; } QEMUFileHooks; -QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops); +QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc); void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks); int qemu_get_fd(QEMUFile *f); int qemu_fclose(QEMUFile *f); @@ -179,5 +180,6 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data); size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, uint64_t *bytes_sent); +QIOChannel *qemu_file_get_ioc(QEMUFile *file); #endif diff --git a/migration/ram.c b/migration/ram.c index b5fc454b2f..7a43bfd7af 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -550,7 +550,7 @@ static int compress_threads_save_setup(void) /* comp_param[i].file is just used as a dummy buffer to save data, * set its ops to empty. */ - comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); + comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false); comp_param[i].done = true; comp_param[i].quit = false; qemu_mutex_init(&comp_param[i].mutex); @@ -789,6 +789,53 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, return find_next_bit(bitmap, size, start); } +static void migration_clear_memory_region_dirty_bitmap(RAMState *rs, + RAMBlock *rb, + unsigned long page) +{ + uint8_t shift; + hwaddr size, start; + + if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) { + return; + } + + shift = rb->clear_bmap_shift; + /* + * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this + * can make things easier sometimes since then start address + * of the small chunk will always be 64 pages aligned so the + * bitmap will always be aligned to unsigned long. We should + * even be able to remove this restriction but I'm simply + * keeping it. + */ + assert(shift >= 6); + + size = 1ULL << (TARGET_PAGE_BITS + shift); + start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); + trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page); + memory_region_clear_dirty_bitmap(rb->mr, start, size); +} + +static void +migration_clear_memory_region_dirty_bitmap_range(RAMState *rs, + RAMBlock *rb, + unsigned long start, + unsigned long npages) +{ + unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift; + unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages); + unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages); + + /* + * Clear pages from start to start + npages - 1, so the end boundary is + * exclusive. + */ + for (i = chunk_start; i < chunk_end; i += chunk_pages) { + migration_clear_memory_region_dirty_bitmap(rs, rb, i); + } +} + static inline bool migration_bitmap_clear_dirty(RAMState *rs, RAMBlock *rb, unsigned long page) @@ -803,26 +850,9 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, * the page in the chunk we clear the remote dirty bitmap for all. * Clearing it earlier won't be a problem, but too late will. */ - if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) { - uint8_t shift = rb->clear_bmap_shift; - hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift); - hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); - - /* - * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this - * can make things easier sometimes since then start address - * of the small chunk will always be 64 pages aligned so the - * bitmap will always be aligned to unsigned long. We should - * even be able to remove this restriction but I'm simply - * keeping it. - */ - assert(shift >= 6); - trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page); - memory_region_clear_dirty_bitmap(rb->mr, start, size); - } + migration_clear_memory_region_dirty_bitmap(rs, rb, page); ret = test_and_clear_bit(page, rb->bmap); - if (ret) { rs->migration_dirty_pages--; } @@ -2741,6 +2771,14 @@ void qemu_guest_free_page_hint(void *addr, size_t len) npages = used_len >> TARGET_PAGE_BITS; qemu_mutex_lock(&ram_state->bitmap_mutex); + /* + * The skipped free pages are equavalent to be sent from clear_bmap's + * perspective, so clear the bits from the memory region bitmap which + * are initially set. Otherwise those skipped pages will be sent in + * the next round after syncing from the memory region bitmap. + */ + migration_clear_memory_region_dirty_bitmap_range(ram_state, block, + start, npages); ram_state->migration_dirty_pages -= bitmap_count_one_with_offset(block->bmap, start, npages); bitmap_clear(block->bmap, start, npages); @@ -4012,6 +4050,7 @@ static void ram_dirty_bitmap_reload_notify(MigrationState *s) int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) { int ret = -EINVAL; + /* from_dst_file is always valid because we're within rp_thread */ QEMUFile *file = s->rp_state.from_dst_file; unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS; uint64_t local_size = DIV_ROUND_UP(nbits, 8); diff --git a/migration/savevm.c b/migration/savevm.c index 72848b946c..7b7b64bd13 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -65,6 +65,7 @@ #include "qemu/bitmap.h" #include "net/announce.h" #include "qemu/yank.h" +#include "yank_functions.h" const unsigned int postcopy_ram_discard_version; @@ -168,9 +169,9 @@ static const QEMUFileOps bdrv_write_ops = { static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable) { if (is_writable) { - return qemu_fopen_ops(bs, &bdrv_write_ops); + return qemu_fopen_ops(bs, &bdrv_write_ops, false); } - return qemu_fopen_ops(bs, &bdrv_read_ops); + return qemu_fopen_ops(bs, &bdrv_read_ops, false); } @@ -2568,6 +2569,12 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) /* Clear the triggered bit to allow one recovery */ mis->postcopy_recover_triggered = false; + /* + * Unregister yank with either from/to src would work, since ioc behind it + * is the same + */ + migration_ioc_unregister_yank_from_file(mis->from_src_file); + assert(mis->from_src_file); qemu_file_shutdown(mis->from_src_file); qemu_fclose(mis->from_src_file); diff --git a/migration/yank_functions.c b/migration/yank_functions.c index 96c90e17dc..8c08aef14a 100644 --- a/migration/yank_functions.c +++ b/migration/yank_functions.c @@ -11,6 +11,10 @@ #include "qapi/error.h" #include "io/channel.h" #include "yank_functions.h" +#include "qemu/yank.h" +#include "io/channel-socket.h" +#include "io/channel-tls.h" +#include "qemu-file.h" void migration_yank_iochannel(void *opaque) { @@ -18,3 +22,41 @@ void migration_yank_iochannel(void *opaque) qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); } + +/* Return whether yank is supported on this ioc */ +static bool migration_ioc_yank_supported(QIOChannel *ioc) +{ + return object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || + object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS); +} + +void migration_ioc_register_yank(QIOChannel *ioc) +{ + if (migration_ioc_yank_supported(ioc)) { + yank_register_function(MIGRATION_YANK_INSTANCE, + migration_yank_iochannel, + QIO_CHANNEL(ioc)); + } +} + +void migration_ioc_unregister_yank(QIOChannel *ioc) +{ + if (migration_ioc_yank_supported(ioc)) { + yank_unregister_function(MIGRATION_YANK_INSTANCE, + migration_yank_iochannel, + QIO_CHANNEL(ioc)); + } +} + +void migration_ioc_unregister_yank_from_file(QEMUFile *file) +{ + QIOChannel *ioc = qemu_file_get_ioc(file); + + if (ioc) { + /* + * For migration qemufiles, we'll always reach here. Though we'll skip + * calls from e.g. savevm/loadvm as they don't use yank. + */ + migration_ioc_unregister_yank(ioc); + } +} diff --git a/migration/yank_functions.h b/migration/yank_functions.h index 055ea22523..a7577955ed 100644 --- a/migration/yank_functions.h +++ b/migration/yank_functions.h @@ -15,3 +15,6 @@ * @opaque: QIOChannel to shutdown */ void migration_yank_iochannel(void *opaque); +void migration_ioc_register_yank(QIOChannel *ioc); +void migration_ioc_unregister_yank(QIOChannel *ioc); +void migration_ioc_unregister_yank_from_file(QEMUFile *file); diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 328d6dbe97..1e8b7784ef 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -787,10 +787,10 @@ static void test_baddest(void) args->hide_stderr = true; - if (test_migrate_start(&from, &to, "tcp:0:0", args)) { + if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", args)) { return; } - migrate_qmp(from, "tcp:0:0", "{}"); + migrate_qmp(from, "tcp:127.0.0.1:0", "{}"); wait_for_migration_fail(from, false); test_migrate_end(from, to, false); } diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c index d32c953a38..f8bafb5d70 100644 --- a/tests/qtest/nvme-test.c +++ b/tests/qtest/nvme-test.c @@ -13,6 +13,7 @@ #include "libqos/libqtest.h" #include "libqos/qgraph.h" #include "libqos/pci.h" +#include "include/block/nvme.h" typedef struct QNvme QNvme; @@ -66,12 +67,89 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc) g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 0x44332211); } +static void nvmetest_reg_read_test(void *obj, void *data, QGuestAllocator *alloc) +{ + QNvme *nvme = obj; + QPCIDevice *pdev = &nvme->dev; + QPCIBar bar; + uint32_t cap_lo, cap_hi; + uint64_t cap; + + qpci_device_enable(pdev); + bar = qpci_iomap(pdev, 0, NULL); + + cap_lo = qpci_io_readl(pdev, bar, 0x0); + g_assert_cmpint(NVME_CAP_MQES(cap_lo), ==, 0x7ff); + + cap_hi = qpci_io_readl(pdev, bar, 0x4); + g_assert_cmpint(NVME_CAP_MPSMAX((uint64_t)cap_hi << 32), ==, 0x4); + + cap = qpci_io_readq(pdev, bar, 0x0); + g_assert_cmpint(NVME_CAP_MQES(cap), ==, 0x7ff); + g_assert_cmpint(NVME_CAP_MPSMAX(cap), ==, 0x4); + + qpci_iounmap(pdev, bar); +} + +static void nvmetest_pmr_reg_test(void *obj, void *data, QGuestAllocator *alloc) +{ + QNvme *nvme = obj; + QPCIDevice *pdev = &nvme->dev; + QPCIBar pmr_bar, nvme_bar; + uint32_t pmrcap, pmrsts; + + qpci_device_enable(pdev); + pmr_bar = qpci_iomap(pdev, 4, NULL); + + /* Without Enabling PMRCTL check bar enablemet */ + qpci_io_writel(pdev, pmr_bar, 0, 0xccbbaa99); + g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), !=, 0x99); + g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), !=, 0xaa99); + + /* Map NVMe Bar Register to Enable the Mem Region */ + nvme_bar = qpci_iomap(pdev, 0, NULL); + + pmrcap = qpci_io_readl(pdev, nvme_bar, 0xe00); + g_assert_cmpint(NVME_PMRCAP_RDS(pmrcap), ==, 0x1); + g_assert_cmpint(NVME_PMRCAP_WDS(pmrcap), ==, 0x1); + g_assert_cmpint(NVME_PMRCAP_BIR(pmrcap), ==, 0x4); + g_assert_cmpint(NVME_PMRCAP_PMRWBM(pmrcap), ==, 0x2); + g_assert_cmpint(NVME_PMRCAP_CMSS(pmrcap), ==, 0x1); + + /* Enable PMRCTRL */ + qpci_io_writel(pdev, nvme_bar, 0xe04, 0x1); + + qpci_io_writel(pdev, pmr_bar, 0, 0x44332211); + g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), ==, 0x11); + g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), ==, 0x2211); + g_assert_cmpint(qpci_io_readl(pdev, pmr_bar, 0), ==, 0x44332211); + + pmrsts = qpci_io_readl(pdev, nvme_bar, 0xe08); + g_assert_cmpint(NVME_PMRSTS_NRDY(pmrsts), ==, 0x0); + + /* Disable PMRCTRL */ + qpci_io_writel(pdev, nvme_bar, 0xe04, 0x0); + + qpci_io_writel(pdev, pmr_bar, 0, 0x88776655); + g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), !=, 0x55); + g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), !=, 0x6655); + g_assert_cmpint(qpci_io_readl(pdev, pmr_bar, 0), !=, 0x88776655); + + pmrsts = qpci_io_readl(pdev, nvme_bar, 0xe08); + g_assert_cmpint(NVME_PMRSTS_NRDY(pmrsts), ==, 0x1); + + qpci_iounmap(pdev, nvme_bar); + qpci_iounmap(pdev, pmr_bar); +} + static void nvme_register_nodes(void) { QOSGraphEdgeOptions opts = { .extra_device_opts = "addr=04.0,drive=drv0,serial=foo", .before_cmd_line = "-drive id=drv0,if=none,file=null-co://," - "file.read-zeroes=on,format=raw", + "file.read-zeroes=on,format=raw " + "-object memory-backend-ram,id=pmr0," + "share=on,size=8", }; add_qpci_address(&opts, &(QPCIAddress) { .devfn = QPCI_DEVFN(4, 0) }); @@ -83,6 +161,13 @@ static void nvme_register_nodes(void) qos_add_test("oob-cmb-access", "nvme", nvmetest_oob_cmb_test, &(QOSGraphTestOptions) { .edge.extra_device_opts = "cmb_size_mb=2" }); + + qos_add_test("pmr-test-access", "nvme", nvmetest_pmr_reg_test, + &(QOSGraphTestOptions) { + .edge.extra_device_opts = "pmrdev=pmr0" + }); + + qos_add_test("reg-read", "nvme", nvmetest_reg_read_test, NULL); } libqos_init(nvme_register_nodes); |