diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2022-02-15 13:51:35 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2022-02-15 13:51:35 +0000 |
commit | cc6721e449c4c5a9a5007ad8a810f7f54143eadc (patch) | |
tree | f45f74a05858f8523b029895243a71265928ec29 | |
parent | e56d873f0ed9f7ed35b40cc1be841bf7f22db690 (diff) | |
parent | e321b4cdc2dd0b5e806ecf759138be7f83774142 (diff) |
Merge remote-tracking branch 'remotes/nvme/tags/nvme-next-pull-request' into staging
hw/nvme updates
- fix CVE-2021-3929
- add zone random write area support
- misc cleanups from Philippe
# gpg: Signature made Mon 14 Feb 2022 08:01:34 GMT
# gpg: using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9
# gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown]
# gpg: aka "Klaus Jensen <k.jensen@samsung.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468 4272 63D5 6FC5 E55D A838
# Subkey fingerprint: 5228 33AA 75E2 DCE6 A247 66C0 4DE1 AF31 6D4F 0DE9
* remotes/nvme/tags/nvme-next-pull-request:
hw/nvme: add support for zoned random write area
hw/nvme: add ozcs enum
hw/nvme: add struct for zone management send
hw/nvme/ctrl: Pass buffers as 'void *' types
hw/nvme/ctrl: Have nvme_addr_write() take const buffer
hw/nvme: fix CVE-2021-3929
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | hw/nvme/ctrl.c | 215 | ||||
-rw-r--r-- | hw/nvme/ns.c | 61 | ||||
-rw-r--r-- | hw/nvme/nvme.h | 14 | ||||
-rw-r--r-- | hw/nvme/trace-events | 1 | ||||
-rw-r--r-- | include/block/nvme.h | 40 |
5 files changed, 296 insertions, 35 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 1f62116af9..98aac98bef 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -299,26 +299,37 @@ static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone, } } -/* - * Check if we can open a zone without exceeding open/active limits. - * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5). - */ -static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn) +static uint16_t nvme_zns_check_resources(NvmeNamespace *ns, uint32_t act, + uint32_t opn, uint32_t zrwa) { if (ns->params.max_active_zones != 0 && ns->nr_active_zones + act > ns->params.max_active_zones) { trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones); return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR; } + if (ns->params.max_open_zones != 0 && ns->nr_open_zones + opn > ns->params.max_open_zones) { trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones); return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR; } + if (zrwa > ns->zns.numzrwa) { + return NVME_NOZRWA | NVME_DNR; + } + return NVME_SUCCESS; } +/* + * Check if we can open a zone without exceeding open/active limits. + * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5). + */ +static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn) +{ + return nvme_zns_check_resources(ns, act, opn, 0); +} + static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) { hwaddr hi, lo; @@ -357,6 +368,24 @@ static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr addr) return memory_region_get_ram_ptr(&n->pmr.dev->mr) + (addr - n->pmr.cba); } +static inline bool nvme_addr_is_iomem(NvmeCtrl *n, hwaddr addr) +{ + hwaddr hi, lo; + + /* + * The purpose of this check is to guard against invalid "local" access to + * the iomem (i.e. controller registers). Thus, we check against the range + * covered by the 'bar0' MemoryRegion since that is currently composed of + * two subregions (the NVMe "MBAR" and the MSI-X table/pba). Note, however, + * that if the device model is ever changed to allow the CMB to be located + * in BAR0 as well, then this must be changed. + */ + lo = n->bar0.addr; + hi = lo + int128_get64(n->bar0.size); + + return addr >= lo && addr < hi; +} + static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) { hwaddr hi = addr + size - 1; @@ -377,7 +406,7 @@ static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) return pci_dma_read(&n->parent_obj, addr, buf, size); } -static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size) +static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size) { hwaddr hi = addr + size - 1; if (hi < addr) { @@ -614,6 +643,10 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len) trace_pci_nvme_map_addr(addr, len); + if (nvme_addr_is_iomem(n, addr)) { + return NVME_DATA_TRAS_ERROR; + } + if (nvme_addr_is_cmb(n, addr)) { cmb = true; } else if (nvme_addr_is_pmr(n, addr)) { @@ -1140,7 +1173,7 @@ static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, return NVME_SUCCESS; } -static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len, +static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, void *ptr, uint32_t len, NvmeTxDirection dir) { assert(sg->flags & NVME_SG_ALLOC); @@ -1177,7 +1210,7 @@ static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len, return NVME_SUCCESS; } -static inline uint16_t nvme_c2h(NvmeCtrl *n, uint8_t *ptr, uint32_t len, +static inline uint16_t nvme_c2h(NvmeCtrl *n, void *ptr, uint32_t len, NvmeRequest *req) { uint16_t status; @@ -1190,7 +1223,7 @@ static inline uint16_t nvme_c2h(NvmeCtrl *n, uint8_t *ptr, uint32_t len, return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_FROM_DEVICE); } -static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len, +static inline uint16_t nvme_h2c(NvmeCtrl *n, void *ptr, uint32_t len, NvmeRequest *req) { uint16_t status; @@ -1203,7 +1236,7 @@ static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len, return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE); } -uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, +uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len, NvmeTxDirection dir, NvmeRequest *req) { NvmeNamespace *ns = req->ns; @@ -1219,7 +1252,7 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, return nvme_tx(n, &req->sg, ptr, len, dir); } -uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, +uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len, NvmeTxDirection dir, NvmeRequest *req) { NvmeNamespace *ns = req->ns; @@ -1606,9 +1639,19 @@ static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone, return status; } - if (unlikely(slba != zone->w_ptr)) { - trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr); - return NVME_ZONE_INVALID_WRITE; + if (zone->d.za & NVME_ZA_ZRWA_VALID) { + uint64_t ezrwa = zone->w_ptr + 2 * ns->zns.zrwas; + + if (slba < zone->w_ptr || slba + nlb > ezrwa) { + trace_pci_nvme_err_zone_invalid_write(slba, zone->w_ptr); + return NVME_ZONE_INVALID_WRITE; + } + } else { + if (unlikely(slba != zone->w_ptr)) { + trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, + zone->w_ptr); + return NVME_ZONE_INVALID_WRITE; + } } if (unlikely((slba + nlb) > zcap)) { @@ -1688,6 +1731,14 @@ static uint16_t nvme_zrm_finish(NvmeNamespace *ns, NvmeZone *zone) /* fallthrough */ case NVME_ZONE_STATE_CLOSED: nvme_aor_dec_active(ns); + + if (zone->d.za & NVME_ZA_ZRWA_VALID) { + zone->d.za &= ~NVME_ZA_ZRWA_VALID; + if (ns->params.numzrwa) { + ns->zns.numzrwa++; + } + } + /* fallthrough */ case NVME_ZONE_STATE_EMPTY: nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL); @@ -1723,6 +1774,13 @@ static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone) /* fallthrough */ case NVME_ZONE_STATE_CLOSED: nvme_aor_dec_active(ns); + + if (zone->d.za & NVME_ZA_ZRWA_VALID) { + if (ns->params.numzrwa) { + ns->zns.numzrwa++; + } + } + /* fallthrough */ case NVME_ZONE_STATE_FULL: zone->w_ptr = zone->d.zslba; @@ -1756,6 +1814,7 @@ static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns) enum { NVME_ZRM_AUTO = 1 << 0, + NVME_ZRM_ZRWA = 1 << 1, }; static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns, @@ -1774,7 +1833,8 @@ static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns, if (n->params.auto_transition_zones) { nvme_zrm_auto_transition_zone(ns); } - status = nvme_aor_check(ns, act, 1); + status = nvme_zns_check_resources(ns, act, 1, + (flags & NVME_ZRM_ZRWA) ? 1 : 0); if (status) { return status; } @@ -1802,6 +1862,12 @@ static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns, /* fallthrough */ case NVME_ZONE_STATE_EXPLICITLY_OPEN: + if (flags & NVME_ZRM_ZRWA) { + ns->zns.numzrwa--; + + zone->d.za |= NVME_ZA_ZRWA_VALID; + } + return NVME_SUCCESS; default: @@ -1815,12 +1881,6 @@ static inline uint16_t nvme_zrm_auto(NvmeCtrl *n, NvmeNamespace *ns, return nvme_zrm_open_flags(n, ns, zone, NVME_ZRM_AUTO); } -static inline uint16_t nvme_zrm_open(NvmeCtrl *n, NvmeNamespace *ns, - NvmeZone *zone) -{ - return nvme_zrm_open_flags(n, ns, zone, 0); -} - static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, uint32_t nlb) { @@ -1831,6 +1891,20 @@ static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, } } +static void nvme_zoned_zrwa_implicit_flush(NvmeNamespace *ns, NvmeZone *zone, + uint32_t nlbc) +{ + uint16_t nzrwafgs = DIV_ROUND_UP(nlbc, ns->zns.zrwafg); + + nlbc = nzrwafgs * ns->zns.zrwafg; + + trace_pci_nvme_zoned_zrwa_implicit_flush(zone->d.zslba, nlbc); + + zone->w_ptr += nlbc; + + nvme_advance_zone_wp(ns, zone, nlbc); +} + static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req) { NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; @@ -1843,6 +1917,17 @@ static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req) zone = nvme_get_zone_by_slba(ns, slba); assert(zone); + if (zone->d.za & NVME_ZA_ZRWA_VALID) { + uint64_t ezrwa = zone->w_ptr + ns->zns.zrwas - 1; + uint64_t elba = slba + nlb - 1; + + if (elba > ezrwa) { + nvme_zoned_zrwa_implicit_flush(ns, zone, elba - ezrwa); + } + + return; + } + nvme_advance_zone_wp(ns, zone, nlb); } @@ -2643,7 +2728,9 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) goto invalid; } - iocb->zone->w_ptr += nlb; + if (!(iocb->zone->d.za & NVME_ZA_ZRWA_VALID)) { + iocb->zone->w_ptr += nlb; + } } qemu_iovec_reset(&iocb->iov); @@ -3182,6 +3269,10 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (append) { bool piremap = !!(ctrl & NVME_RW_PIREMAP); + if (unlikely(zone->d.za & NVME_ZA_ZRWA_VALID)) { + return NVME_INVALID_ZONE_OP | NVME_DNR; + } + if (unlikely(slba != zone->d.zslba)) { trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba); status = NVME_INVALID_FIELD; @@ -3233,7 +3324,9 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, goto invalid; } - zone->w_ptr += nlb; + if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) { + zone->w_ptr += nlb; + } } data_offset = nvme_l2b(ns, slba); @@ -3317,7 +3410,24 @@ enum NvmeZoneProcessingMask { static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone, NvmeZoneState state, NvmeRequest *req) { - return nvme_zrm_open(nvme_ctrl(req), ns, zone); + NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd; + int flags = 0; + + if (cmd->zsflags & NVME_ZSFLAG_ZRWA_ALLOC) { + uint16_t ozcs = le16_to_cpu(ns->id_ns_zoned->ozcs); + + if (!(ozcs & NVME_ID_NS_ZONED_OZCS_ZRWASUP)) { + return NVME_INVALID_ZONE_OP | NVME_DNR; + } + + if (zone->w_ptr % ns->zns.zrwafg) { + return NVME_NOZRWA | NVME_DNR; + } + + flags = NVME_ZRM_ZRWA; + } + + return nvme_zrm_open_flags(nvme_ctrl(req), ns, zone, flags); } static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone, @@ -3592,35 +3702,71 @@ done: } } +static uint16_t nvme_zone_mgmt_send_zrwa_flush(NvmeCtrl *n, NvmeZone *zone, + uint64_t elba, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + uint16_t ozcs = le16_to_cpu(ns->id_ns_zoned->ozcs); + uint64_t wp = zone->d.wp; + uint32_t nlb = elba - wp + 1; + uint16_t status; + + + if (!(ozcs & NVME_ID_NS_ZONED_OZCS_ZRWASUP)) { + return NVME_INVALID_ZONE_OP | NVME_DNR; + } + + if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (elba < wp || elba > wp + ns->zns.zrwas) { + return NVME_ZONE_BOUNDARY_ERROR | NVME_DNR; + } + + if (nlb % ns->zns.zrwafg) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + status = nvme_zrm_auto(n, ns, zone); + if (status) { + return status; + } + + zone->w_ptr += nlb; + + nvme_advance_zone_wp(ns, zone, nlb); + + return NVME_SUCCESS; +} + static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) { - NvmeCmd *cmd = (NvmeCmd *)&req->cmd; + NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd; NvmeNamespace *ns = req->ns; NvmeZone *zone; NvmeZoneResetAIOCB *iocb; uint8_t *zd_ext; - uint32_t dw13 = le32_to_cpu(cmd->cdw13); uint64_t slba = 0; uint32_t zone_idx = 0; uint16_t status; - uint8_t action; + uint8_t action = cmd->zsa; bool all; enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE; - action = dw13 & 0xff; - all = !!(dw13 & 0x100); + all = cmd->zsflags & NVME_ZSFLAG_SELECT_ALL; req->status = NVME_SUCCESS; if (!all) { - status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx); + status = nvme_get_mgmt_zone_slba_idx(ns, &req->cmd, &slba, &zone_idx); if (status) { return status; } } zone = &ns->zone_array[zone_idx]; - if (slba != zone->d.zslba) { + if (slba != zone->d.zslba && action != NVME_ZONE_ACTION_ZRWA_FLUSH) { trace_pci_nvme_err_unaligned_zone_cmd(action, slba, zone->d.zslba); return NVME_INVALID_FIELD | NVME_DNR; } @@ -3696,6 +3842,13 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) } break; + case NVME_ZONE_ACTION_ZRWA_FLUSH: + if (all) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return nvme_zone_mgmt_send_zrwa_flush(n, zone, slba, req); + default: trace_pci_nvme_err_invalid_mgmt_action(action); status = NVME_INVALID_FIELD; diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 8b5f98c761..ee673f1a5b 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -266,7 +266,8 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns) id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); id_ns_z->zoc = 0; - id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00; + id_ns_z->ozcs = ns->params.cross_zone_read ? + NVME_ID_NS_ZONED_OZCS_RAZB : 0x00; for (i = 0; i <= ns->id_ns.nlbaf; i++) { id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size); @@ -274,6 +275,23 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns) ns->params.zd_extension_size >> 6; /* Units of 64B */ } + if (ns->params.zrwas) { + ns->zns.numzrwa = ns->params.numzrwa ? + ns->params.numzrwa : ns->num_zones; + + ns->zns.zrwas = ns->params.zrwas >> ns->lbaf.ds; + ns->zns.zrwafg = ns->params.zrwafg >> ns->lbaf.ds; + + id_ns_z->ozcs |= NVME_ID_NS_ZONED_OZCS_ZRWASUP; + id_ns_z->zrwacap = NVME_ID_NS_ZONED_ZRWACAP_EXPFLUSHSUP; + + id_ns_z->numzrwa = cpu_to_le32(ns->params.numzrwa); + id_ns_z->zrwas = cpu_to_le16(ns->zns.zrwas); + id_ns_z->zrwafg = cpu_to_le16(ns->zns.zrwafg); + } + + id_ns_z->ozcs = cpu_to_le16(id_ns_z->ozcs); + ns->csi = NVME_CSI_ZONED; ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size); ns->id_ns.ncap = ns->id_ns.nsze; @@ -314,6 +332,10 @@ static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone) QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry); } else { trace_pci_nvme_clear_ns_reset(state, zone->d.zslba); + if (zone->d.za & NVME_ZA_ZRWA_VALID) { + zone->d.za &= ~NVME_ZA_ZRWA_VALID; + ns->zns.numzrwa++; + } nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); } } @@ -391,6 +413,40 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) return -1; } } + + if (ns->params.zrwas) { + if (ns->params.zrwas % ns->blkconf.logical_block_size) { + error_setg(errp, "zone random write area size (zoned.zrwas " + "%"PRIu64") must be a multiple of the logical " + "block size (logical_block_size %"PRIu32")", + ns->params.zrwas, ns->blkconf.logical_block_size); + return -1; + } + + if (ns->params.zrwafg == -1) { + ns->params.zrwafg = ns->blkconf.logical_block_size; + } + + if (ns->params.zrwas % ns->params.zrwafg) { + error_setg(errp, "zone random write area size (zoned.zrwas " + "%"PRIu64") must be a multiple of the zone random " + "write area flush granularity (zoned.zrwafg, " + "%"PRIu64")", ns->params.zrwas, ns->params.zrwafg); + return -1; + } + + if (ns->params.max_active_zones) { + if (ns->params.numzrwa > ns->params.max_active_zones) { + error_setg(errp, "number of zone random write area " + "resources (zoned.numzrwa, %d) must be less " + "than or equal to maximum active resources " + "(zoned.max_active_zones, %d)", + ns->params.numzrwa, + ns->params.max_active_zones); + return -1; + } + } + } } return 0; @@ -550,6 +606,9 @@ static Property nvme_ns_props[] = { params.max_open_zones, 0), DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace, params.zd_extension_size, 0), + DEFINE_PROP_UINT32("zoned.numzrwa", NvmeNamespace, params.numzrwa, 0), + DEFINE_PROP_SIZE("zoned.zrwas", NvmeNamespace, params.zrwas, 0), + DEFINE_PROP_SIZE("zoned.zrwafg", NvmeNamespace, params.zrwafg, -1), DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default, true), DEFINE_PROP_END_OF_LIST(), diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 83ffabade4..90c0bb7ce2 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -114,6 +114,10 @@ typedef struct NvmeNamespaceParams { uint32_t max_active_zones; uint32_t max_open_zones; uint32_t zd_extension_size; + + uint32_t numzrwa; + uint64_t zrwas; + uint64_t zrwafg; } NvmeNamespaceParams; typedef struct NvmeNamespace { @@ -130,6 +134,12 @@ typedef struct NvmeNamespace { uint16_t status; int attached; + struct { + uint16_t zrwas; + uint16_t zrwafg; + uint32_t numzrwa; + } zns; + QTAILQ_ENTRY(NvmeNamespace) entry; NvmeIdNsZoned *id_ns_zoned; @@ -495,9 +505,9 @@ static inline uint16_t nvme_cid(NvmeRequest *req) } void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); -uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, +uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len, NvmeTxDirection dir, NvmeRequest *req); -uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, +uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len, NvmeTxDirection dir, NvmeRequest *req); void nvme_rw_complete_cb(void *opaque, int ret); uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index ff6cafd520..90730d802f 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -103,6 +103,7 @@ pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone de pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32"" pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state" pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state" +pci_nvme_zoned_zrwa_implicit_flush(uint64_t zslba, uint32_t nlb) "zslba 0x%"PRIx64" nlb %"PRIu32"" # error conditions pci_nvme_err_mdts(size_t len) "len %zu" diff --git a/include/block/nvme.h b/include/block/nvme.h index e3bd47bf76..cd068ac891 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -890,6 +890,8 @@ enum NvmeStatusCodes { NVME_INVALID_PROT_INFO = 0x0181, NVME_WRITE_TO_RO = 0x0182, NVME_CMD_SIZE_LIMIT = 0x0183, + NVME_INVALID_ZONE_OP = 0x01b6, + NVME_NOZRWA = 0x01b7, NVME_ZONE_BOUNDARY_ERROR = 0x01b8, NVME_ZONE_FULL = 0x01b9, NVME_ZONE_READ_ONLY = 0x01ba, @@ -1345,12 +1347,26 @@ typedef struct QEMU_PACKED NvmeIdNsZoned { uint32_t mor; uint32_t rrl; uint32_t frl; - uint8_t rsvd20[2796]; + uint8_t rsvd12[24]; + uint32_t numzrwa; + uint16_t zrwafg; + uint16_t zrwas; + uint8_t zrwacap; + uint8_t rsvd53[2763]; NvmeLBAFE lbafe[16]; uint8_t rsvd3072[768]; uint8_t vs[256]; } NvmeIdNsZoned; +enum NvmeIdNsZonedOzcs { + NVME_ID_NS_ZONED_OZCS_RAZB = 1 << 0, + NVME_ID_NS_ZONED_OZCS_ZRWASUP = 1 << 1, +}; + +enum NvmeIdNsZonedZrwacap { + NVME_ID_NS_ZONED_ZRWACAP_EXPFLUSHSUP = 1 << 0, +}; + /*Deallocate Logical Block Features*/ #define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat) ((dlfeat) & 0x10) #define NVME_ID_NS_DLFEAT_WRITE_ZEROES(dlfeat) ((dlfeat) & 0x08) @@ -1404,6 +1420,7 @@ enum NvmeZoneAttr { NVME_ZA_FINISHED_BY_CTLR = 1 << 0, NVME_ZA_FINISH_RECOMMENDED = 1 << 1, NVME_ZA_RESET_RECOMMENDED = 1 << 2, + NVME_ZA_ZRWA_VALID = 1 << 3, NVME_ZA_ZD_EXT_VALID = 1 << 7, }; @@ -1433,6 +1450,21 @@ enum NvmeZoneType { NVME_ZONE_TYPE_SEQ_WRITE = 0x02, }; +typedef struct QEMU_PACKED NvmeZoneSendCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint32_t rsvd8[4]; + NvmeCmdDptr dptr; + uint64_t slba; + uint32_t rsvd48; + uint8_t zsa; + uint8_t zsflags; + uint8_t rsvd54[2]; + uint32_t rsvd56[2]; +} NvmeZoneSendCmd; + enum NvmeZoneSendAction { NVME_ZONE_ACTION_RSD = 0x00, NVME_ZONE_ACTION_CLOSE = 0x01, @@ -1441,6 +1473,12 @@ enum NvmeZoneSendAction { NVME_ZONE_ACTION_RESET = 0x04, NVME_ZONE_ACTION_OFFLINE = 0x05, NVME_ZONE_ACTION_SET_ZD_EXT = 0x10, + NVME_ZONE_ACTION_ZRWA_FLUSH = 0x11, +}; + +enum { + NVME_ZSFLAG_SELECT_ALL = 1 << 0, + NVME_ZSFLAG_ZRWA_ALLOC = 1 << 1, }; typedef struct QEMU_PACKED NvmeZoneDescr { |