diff options
Diffstat (limited to 'include')
39 files changed, 960 insertions, 87 deletions
diff --git a/include/block/block.h b/include/block/block.h index 0a9f2c187c..2f2698074e 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -849,4 +849,7 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, BdrvChild *dst, int64_t dst_offset, int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); + +void bdrv_cancel_in_flight(BlockDriverState *bs); + #endif diff --git a/include/block/block_int.h b/include/block/block_int.h index 22a2789d35..88e4111939 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -353,6 +353,15 @@ struct BlockDriver { int64_t *map, BlockDriverState **file); /* + * This informs the driver that we are no longer interested in the result + * of in-flight requests, so don't waste the time if possible. + * + * One example usage is to avoid waiting for an nbd target node reconnect + * timeout during job-cancel. + */ + void (*bdrv_cancel_in_flight)(BlockDriverState *bs); + + /* * Invalidate any cached meta-data. */ void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, diff --git a/include/block/nvme.h b/include/block/nvme.h index 3e02d9ca98..07cfc92936 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -15,14 +15,19 @@ typedef struct QEMU_PACKED NvmeBar { uint64_t acq; uint32_t cmbloc; uint32_t cmbsz; - uint8_t padding[3520]; /* not used by QEMU */ + uint32_t bpinfo; + uint32_t bprsel; + uint64_t bpmbl; + uint64_t cmbmsc; + uint32_t cmbsts; + uint8_t rsvd92[3492]; uint32_t pmrcap; uint32_t pmrctl; uint32_t pmrsts; uint32_t pmrebs; uint32_t pmrswtp; uint64_t pmrmsc; - uint8_t reserved[484]; + uint8_t css[484]; } NvmeBar; enum NvmeCapShift { @@ -35,7 +40,8 @@ enum NvmeCapShift { CAP_CSS_SHIFT = 37, CAP_MPSMIN_SHIFT = 48, CAP_MPSMAX_SHIFT = 52, - CAP_PMR_SHIFT = 56, + CAP_PMRS_SHIFT = 56, + CAP_CMBS_SHIFT = 57, }; enum NvmeCapMask { @@ -48,7 +54,8 @@ enum NvmeCapMask { CAP_CSS_MASK = 0xff, CAP_MPSMIN_MASK = 0xf, CAP_MPSMAX_MASK = 0xf, - CAP_PMR_MASK = 0x1, + CAP_PMRS_MASK = 0x1, + CAP_CMBS_MASK = 0x1, }; #define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK) @@ -60,6 +67,8 @@ enum NvmeCapMask { #define NVME_CAP_CSS(cap) (((cap) >> CAP_CSS_SHIFT) & CAP_CSS_MASK) #define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK) #define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK) +#define NVME_CAP_PMRS(cap) (((cap) >> CAP_PMRS_SHIFT) & CAP_PMRS_MASK) +#define NVME_CAP_CMBS(cap) (((cap) >> CAP_CMBS_SHIFT) & CAP_CMBS_MASK) #define NVME_CAP_SET_MQES(cap, val) (cap |= (uint64_t)(val & CAP_MQES_MASK) \ << CAP_MQES_SHIFT) @@ -78,12 +87,15 @@ enum NvmeCapMask { #define NVME_CAP_SET_MPSMIN(cap, val) (cap |= (uint64_t)(val & CAP_MPSMIN_MASK)\ << CAP_MPSMIN_SHIFT) #define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\ - << CAP_MPSMAX_SHIFT) -#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\ - << CAP_PMR_SHIFT) + << CAP_MPSMAX_SHIFT) +#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMRS_MASK) \ + << CAP_PMRS_SHIFT) +#define NVME_CAP_SET_CMBS(cap, val) (cap |= (uint64_t)(val & CAP_CMBS_MASK) \ + << CAP_CMBS_SHIFT) enum NvmeCapCss { NVME_CAP_CSS_NVM = 1 << 0, + NVME_CAP_CSS_CSI_SUPP = 1 << 6, NVME_CAP_CSS_ADMIN_ONLY = 1 << 7, }; @@ -117,9 +129,25 @@ enum NvmeCcMask { enum NvmeCcCss { NVME_CC_CSS_NVM = 0x0, + NVME_CC_CSS_CSI = 0x6, NVME_CC_CSS_ADMIN_ONLY = 0x7, }; +#define NVME_SET_CC_EN(cc, val) \ + (cc |= (uint32_t)((val) & CC_EN_MASK) << CC_EN_SHIFT) +#define NVME_SET_CC_CSS(cc, val) \ + (cc |= (uint32_t)((val) & CC_CSS_MASK) << CC_CSS_SHIFT) +#define NVME_SET_CC_MPS(cc, val) \ + (cc |= (uint32_t)((val) & CC_MPS_MASK) << CC_MPS_SHIFT) +#define NVME_SET_CC_AMS(cc, val) \ + (cc |= (uint32_t)((val) & CC_AMS_MASK) << CC_AMS_SHIFT) +#define NVME_SET_CC_SHN(cc, val) \ + (cc |= (uint32_t)((val) & CC_SHN_MASK) << CC_SHN_SHIFT) +#define NVME_SET_CC_IOSQES(cc, val) \ + (cc |= (uint32_t)((val) & CC_IOSQES_MASK) << CC_IOSQES_SHIFT) +#define NVME_SET_CC_IOCQES(cc, val) \ + (cc |= (uint32_t)((val) & CC_IOCQES_MASK) << CC_IOCQES_SHIFT) + enum NvmeCstsShift { CSTS_RDY_SHIFT = 0, CSTS_CFS_SHIFT = 1, @@ -162,25 +190,64 @@ enum NvmeAqaMask { #define NVME_AQA_ACQS(aqa) ((aqa >> AQA_ACQS_SHIFT) & AQA_ACQS_MASK) enum NvmeCmblocShift { - CMBLOC_BIR_SHIFT = 0, - CMBLOC_OFST_SHIFT = 12, + CMBLOC_BIR_SHIFT = 0, + CMBLOC_CQMMS_SHIFT = 3, + CMBLOC_CQPDS_SHIFT = 4, + CMBLOC_CDPMLS_SHIFT = 5, + CMBLOC_CDPCILS_SHIFT = 6, + CMBLOC_CDMMMS_SHIFT = 7, + CMBLOC_CQDA_SHIFT = 8, + CMBLOC_OFST_SHIFT = 12, }; enum NvmeCmblocMask { - CMBLOC_BIR_MASK = 0x7, - CMBLOC_OFST_MASK = 0xfffff, + CMBLOC_BIR_MASK = 0x7, + CMBLOC_CQMMS_MASK = 0x1, + CMBLOC_CQPDS_MASK = 0x1, + CMBLOC_CDPMLS_MASK = 0x1, + CMBLOC_CDPCILS_MASK = 0x1, + CMBLOC_CDMMMS_MASK = 0x1, + CMBLOC_CQDA_MASK = 0x1, + CMBLOC_OFST_MASK = 0xfffff, }; -#define NVME_CMBLOC_BIR(cmbloc) ((cmbloc >> CMBLOC_BIR_SHIFT) & \ - CMBLOC_BIR_MASK) -#define NVME_CMBLOC_OFST(cmbloc)((cmbloc >> CMBLOC_OFST_SHIFT) & \ - CMBLOC_OFST_MASK) - -#define NVME_CMBLOC_SET_BIR(cmbloc, val) \ +#define NVME_CMBLOC_BIR(cmbloc) \ + ((cmbloc >> CMBLOC_BIR_SHIFT) & CMBLOC_BIR_MASK) +#define NVME_CMBLOC_CQMMS(cmbloc) \ + ((cmbloc >> CMBLOC_CQMMS_SHIFT) & CMBLOC_CQMMS_MASK) +#define NVME_CMBLOC_CQPDS(cmbloc) \ + ((cmbloc >> CMBLOC_CQPDS_SHIFT) & CMBLOC_CQPDS_MASK) +#define NVME_CMBLOC_CDPMLS(cmbloc) \ + ((cmbloc >> CMBLOC_CDPMLS_SHIFT) & CMBLOC_CDPMLS_MASK) +#define NVME_CMBLOC_CDPCILS(cmbloc) \ + ((cmbloc >> CMBLOC_CDPCILS_SHIFT) & CMBLOC_CDPCILS_MASK) +#define NVME_CMBLOC_CDMMMS(cmbloc) \ + ((cmbloc >> CMBLOC_CDMMMS_SHIFT) & CMBLOC_CDMMMS_MASK) +#define NVME_CMBLOC_CQDA(cmbloc) \ + ((cmbloc >> CMBLOC_CQDA_SHIFT) & CMBLOC_CQDA_MASK) +#define NVME_CMBLOC_OFST(cmbloc) \ + ((cmbloc >> CMBLOC_OFST_SHIFT) & CMBLOC_OFST_MASK) + +#define NVME_CMBLOC_SET_BIR(cmbloc, val) \ (cmbloc |= (uint64_t)(val & CMBLOC_BIR_MASK) << CMBLOC_BIR_SHIFT) +#define NVME_CMBLOC_SET_CQMMS(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_CQMMS_MASK) << CMBLOC_CQMMS_SHIFT) +#define NVME_CMBLOC_SET_CQPDS(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_CQPDS_MASK) << CMBLOC_CQPDS_SHIFT) +#define NVME_CMBLOC_SET_CDPMLS(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_CDPMLS_MASK) << CMBLOC_CDPMLS_SHIFT) +#define NVME_CMBLOC_SET_CDPCILS(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_CDPCILS_MASK) << CMBLOC_CDPCILS_SHIFT) +#define NVME_CMBLOC_SET_CDMMMS(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_CDMMMS_MASK) << CMBLOC_CDMMMS_SHIFT) +#define NVME_CMBLOC_SET_CQDA(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_CQDA_MASK) << CMBLOC_CQDA_SHIFT) #define NVME_CMBLOC_SET_OFST(cmbloc, val) \ (cmbloc |= (uint64_t)(val & CMBLOC_OFST_MASK) << CMBLOC_OFST_SHIFT) +#define NVME_CMBMSMC_SET_CRE (cmbmsc, val) \ + (cmbmsc |= (uint64_t)(val & CMBLOC_OFST_MASK) << CMBMSC_CRE_SHIFT) + enum NvmeCmbszShift { CMBSZ_SQS_SHIFT = 0, CMBSZ_CQS_SHIFT = 1, @@ -227,6 +294,46 @@ enum NvmeCmbszMask { #define NVME_CMBSZ_GETSIZE(cmbsz) \ (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz)))) +enum NvmeCmbmscShift { + CMBMSC_CRE_SHIFT = 0, + CMBMSC_CMSE_SHIFT = 1, + CMBMSC_CBA_SHIFT = 12, +}; + +enum NvmeCmbmscMask { + CMBMSC_CRE_MASK = 0x1, + CMBMSC_CMSE_MASK = 0x1, + CMBMSC_CBA_MASK = ((1ULL << 52) - 1), +}; + +#define NVME_CMBMSC_CRE(cmbmsc) \ + ((cmbmsc >> CMBMSC_CRE_SHIFT) & CMBMSC_CRE_MASK) +#define NVME_CMBMSC_CMSE(cmbmsc) \ + ((cmbmsc >> CMBMSC_CMSE_SHIFT) & CMBMSC_CMSE_MASK) +#define NVME_CMBMSC_CBA(cmbmsc) \ + ((cmbmsc >> CMBMSC_CBA_SHIFT) & CMBMSC_CBA_MASK) + + +#define NVME_CMBMSC_SET_CRE(cmbmsc, val) \ + (cmbmsc |= (uint64_t)(val & CMBMSC_CRE_MASK) << CMBMSC_CRE_SHIFT) +#define NVME_CMBMSC_SET_CMSE(cmbmsc, val) \ + (cmbmsc |= (uint64_t)(val & CMBMSC_CMSE_MASK) << CMBMSC_CMSE_SHIFT) +#define NVME_CMBMSC_SET_CBA(cmbmsc, val) \ + (cmbmsc |= (uint64_t)(val & CMBMSC_CBA_MASK) << CMBMSC_CBA_SHIFT) + +enum NvmeCmbstsShift { + CMBSTS_CBAI_SHIFT = 0, +}; +enum NvmeCmbstsMask { + CMBSTS_CBAI_MASK = 0x1, +}; + +#define NVME_CMBSTS_CBAI(cmbsts) \ + ((cmbsts >> CMBSTS_CBAI_SHIFT) & CMBSTS_CBAI_MASK) + +#define NVME_CMBSTS_SET_CBAI(cmbsts, val) \ + (cmbsts |= (uint64_t)(val & CMBSTS_CBAI_MASK) << CMBSTS_CBAI_SHIFT) + enum NvmePmrcapShift { PMRCAP_RDS_SHIFT = 3, PMRCAP_WDS_SHIFT = 4, @@ -472,6 +579,9 @@ enum NvmeIoCommands { NVME_CMD_COMPARE = 0x05, NVME_CMD_WRITE_ZEROES = 0x08, NVME_CMD_DSM = 0x09, + NVME_CMD_ZONE_MGMT_SEND = 0x79, + NVME_CMD_ZONE_MGMT_RECV = 0x7a, + NVME_CMD_ZONE_APPEND = 0x7d, }; typedef struct QEMU_PACKED NvmeDeleteQ { @@ -540,8 +650,13 @@ typedef struct QEMU_PACKED NvmeIdentify { uint64_t rsvd2[2]; uint64_t prp1; uint64_t prp2; - uint32_t cns; - uint32_t rsvd11[5]; + uint8_t cns; + uint8_t rsvd10; + uint16_t ctrlid; + uint16_t nvmsetid; + uint8_t rsvd11; + uint8_t csi; + uint32_t rsvd12[4]; } NvmeIdentify; typedef struct QEMU_PACKED NvmeRwCmd { @@ -632,9 +747,13 @@ typedef struct QEMU_PACKED NvmeAerResult { uint8_t resv; } NvmeAerResult; +typedef struct QEMU_PACKED NvmeZonedResult { + uint64_t slba; +} NvmeZonedResult; + typedef struct QEMU_PACKED NvmeCqe { uint32_t result; - uint32_t rsvd; + uint32_t dw1; uint16_t sq_head; uint16_t sq_id; uint16_t cid; @@ -662,6 +781,8 @@ enum NvmeStatusCodes { NVME_SGL_DESCR_TYPE_INVALID = 0x0011, NVME_INVALID_USE_OF_CMB = 0x0012, NVME_INVALID_PRP_OFFSET = 0x0013, + NVME_CMD_SET_CMB_REJECTED = 0x002b, + NVME_INVALID_CMD_SET = 0x002c, NVME_LBA_RANGE = 0x0080, NVME_CAP_EXCEEDED = 0x0081, NVME_NS_NOT_READY = 0x0082, @@ -686,6 +807,14 @@ enum NvmeStatusCodes { NVME_CONFLICTING_ATTRS = 0x0180, NVME_INVALID_PROT_INFO = 0x0181, NVME_WRITE_TO_RO = 0x0182, + NVME_ZONE_BOUNDARY_ERROR = 0x01b8, + NVME_ZONE_FULL = 0x01b9, + NVME_ZONE_READ_ONLY = 0x01ba, + NVME_ZONE_OFFLINE = 0x01bb, + NVME_ZONE_INVALID_WRITE = 0x01bc, + NVME_ZONE_TOO_MANY_ACTIVE = 0x01bd, + NVME_ZONE_TOO_MANY_OPEN = 0x01be, + NVME_ZONE_INVAL_TRANSITION = 0x01bf, NVME_WRITE_FAULT = 0x0280, NVME_UNRECOVERED_READ = 0x0281, NVME_E2E_GUARD_ERROR = 0x0282, @@ -693,6 +822,7 @@ enum NvmeStatusCodes { NVME_E2E_REF_ERROR = 0x0284, NVME_CMP_FAILURE = 0x0285, NVME_ACCESS_DENIED = 0x0286, + NVME_DULB = 0x0287, NVME_MORE = 0x2000, NVME_DNR = 0x4000, NVME_NO_COMPLETE = 0xffff, @@ -743,18 +873,37 @@ typedef struct QEMU_PACKED NvmeSmartLog { uint8_t reserved2[320]; } NvmeSmartLog; +#define NVME_SMART_WARN_MAX 6 enum NvmeSmartWarn { NVME_SMART_SPARE = 1 << 0, NVME_SMART_TEMPERATURE = 1 << 1, NVME_SMART_RELIABILITY = 1 << 2, NVME_SMART_MEDIA_READ_ONLY = 1 << 3, NVME_SMART_FAILED_VOLATILE_MEDIA = 1 << 4, + NVME_SMART_PMR_UNRELIABLE = 1 << 5, +}; + +typedef struct NvmeEffectsLog { + uint32_t acs[256]; + uint32_t iocs[256]; + uint8_t resv[2048]; +} NvmeEffectsLog; + +enum { + NVME_CMD_EFF_CSUPP = 1 << 0, + NVME_CMD_EFF_LBCC = 1 << 1, + NVME_CMD_EFF_NCC = 1 << 2, + NVME_CMD_EFF_NIC = 1 << 3, + NVME_CMD_EFF_CCC = 1 << 4, + NVME_CMD_EFF_CSE_MASK = 3 << 16, + NVME_CMD_EFF_UUID_SEL = 1 << 19, }; enum NvmeLogIdentifier { NVME_LOG_ERROR_INFO = 0x01, NVME_LOG_SMART_INFO = 0x02, NVME_LOG_FW_SLOT_INFO = 0x03, + NVME_LOG_CMD_EFFECTS = 0x05, }; typedef struct QEMU_PACKED NvmePSD { @@ -771,11 +920,19 @@ typedef struct QEMU_PACKED NvmePSD { #define NVME_IDENTIFY_DATA_SIZE 4096 -enum { - NVME_ID_CNS_NS = 0x0, - NVME_ID_CNS_CTRL = 0x1, - NVME_ID_CNS_NS_ACTIVE_LIST = 0x2, - NVME_ID_CNS_NS_DESCR_LIST = 0x3, +enum NvmeIdCns { + NVME_ID_CNS_NS = 0x00, + NVME_ID_CNS_CTRL = 0x01, + NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, + NVME_ID_CNS_NS_DESCR_LIST = 0x03, + NVME_ID_CNS_CS_NS = 0x05, + NVME_ID_CNS_CS_CTRL = 0x06, + NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07, + NVME_ID_CNS_NS_PRESENT_LIST = 0x10, + NVME_ID_CNS_NS_PRESENT = 0x11, + NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a, + NVME_ID_CNS_CS_NS_PRESENT = 0x1b, + NVME_ID_CNS_IO_COMMAND_SET = 0x1c, }; typedef struct QEMU_PACKED NvmeIdCtrl { @@ -794,7 +951,8 @@ typedef struct QEMU_PACKED NvmeIdCtrl { uint32_t rtd3e; uint32_t oaes; uint32_t ctratt; - uint8_t rsvd100[12]; + uint8_t rsvd100[11]; + uint8_t cntrltype; uint8_t fguid[16]; uint8_t rsvd128[128]; uint16_t oacs; @@ -845,6 +1003,11 @@ typedef struct QEMU_PACKED NvmeIdCtrl { uint8_t vs[1024]; } NvmeIdCtrl; +typedef struct NvmeIdCtrlZoned { + uint8_t zasl; + uint8_t rsvd1[4095]; +} NvmeIdCtrlZoned; + enum NvmeIdCtrlOacs { NVME_OACS_SECURITY = 1 << 0, NVME_OACS_FORMAT = 1 << 1, @@ -867,6 +1030,7 @@ enum NvmeIdCtrlFrmw { enum NvmeIdCtrlLpa { NVME_LPA_NS_SMART = 1 << 0, + NVME_LPA_CSE = 1 << 1, NVME_LPA_EXTENDED = 1 << 2, }; @@ -909,6 +1073,9 @@ enum NvmeIdCtrlLpa { #define NVME_AEC_NS_ATTR(aec) ((aec >> 8) & 0x1) #define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1) +#define NVME_ERR_REC_TLER(err_rec) (err_rec & 0xffff) +#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x10000) + enum NvmeFeatureIds { NVME_ARBITRATION = 0x1, NVME_POWER_MANAGEMENT = 0x2, @@ -922,6 +1089,7 @@ enum NvmeFeatureIds { NVME_WRITE_ATOMICITY = 0xa, NVME_ASYNCHRONOUS_EVENT_CONF = 0xb, NVME_TIMESTAMP = 0xe, + NVME_COMMAND_SET_PROFILE = 0x19, NVME_SOFTWARE_PROGRESS_MARKER = 0x80, NVME_FID_MAX = 0x100, }; @@ -968,6 +1136,12 @@ typedef struct QEMU_PACKED NvmeLBAF { uint8_t rp; } NvmeLBAF; +typedef struct QEMU_PACKED NvmeLBAFE { + uint64_t zsze; + uint8_t zdes; + uint8_t rsvd9[7]; +} NvmeLBAFE; + #define NVME_NSID_BROADCAST 0xffffffff typedef struct QEMU_PACKED NvmeIdNs { @@ -992,7 +1166,12 @@ typedef struct QEMU_PACKED NvmeIdNs { uint16_t nabspf; uint16_t noiob; uint8_t nvmcap[16]; - uint8_t rsvd64[40]; + uint16_t npwg; + uint16_t npwa; + uint16_t npdg; + uint16_t npda; + uint16_t nows; + uint8_t rsvd74[30]; uint8_t nguid[16]; uint64_t eui64; NvmeLBAF lbaf[16]; @@ -1006,18 +1185,40 @@ typedef struct QEMU_PACKED NvmeIdNsDescr { uint8_t rsvd2[2]; } NvmeIdNsDescr; -enum { - NVME_NIDT_EUI64_LEN = 8, - NVME_NIDT_NGUID_LEN = 16, - NVME_NIDT_UUID_LEN = 16, +enum NvmeNsIdentifierLength { + NVME_NIDL_EUI64 = 8, + NVME_NIDL_NGUID = 16, + NVME_NIDL_UUID = 16, + NVME_NIDL_CSI = 1, }; enum NvmeNsIdentifierType { - NVME_NIDT_EUI64 = 0x1, - NVME_NIDT_NGUID = 0x2, - NVME_NIDT_UUID = 0x3, + NVME_NIDT_EUI64 = 0x01, + NVME_NIDT_NGUID = 0x02, + NVME_NIDT_UUID = 0x03, + NVME_NIDT_CSI = 0x04, }; +enum NvmeCsi { + NVME_CSI_NVM = 0x00, + NVME_CSI_ZONED = 0x02, +}; + +#define NVME_SET_CSI(vec, csi) (vec |= (uint8_t)(1 << (csi))) + +typedef struct QEMU_PACKED NvmeIdNsZoned { + uint16_t zoc; + uint16_t ozcs; + uint32_t mar; + uint32_t mor; + uint32_t rrl; + uint32_t frl; + uint8_t rsvd20[2796]; + NvmeLBAFE lbafe[16]; + uint8_t rsvd3072[768]; + uint8_t vs[256]; +} NvmeIdNsZoned; + /*Deallocate Logical Block Features*/ #define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat) ((dlfeat) & 0x10) #define NVME_ID_NS_DLFEAT_WRITE_ZEROES(dlfeat) ((dlfeat) & 0x08) @@ -1029,6 +1230,7 @@ enum NvmeNsIdentifierType { #define NVME_ID_NS_NSFEAT_THIN(nsfeat) ((nsfeat & 0x1)) +#define NVME_ID_NS_NSFEAT_DULBE(nsfeat) ((nsfeat >> 2) & 0x1) #define NVME_ID_NS_FLBAS_EXTENDED(flbas) ((flbas >> 4) & 0x1) #define NVME_ID_NS_FLBAS_INDEX(flbas) ((flbas & 0xf)) #define NVME_ID_NS_MC_SEPARATE(mc) ((mc >> 1) & 0x1) @@ -1049,10 +1251,76 @@ enum NvmeIdNsDps { DPS_FIRST_EIGHT = 8, }; +enum NvmeZoneAttr { + NVME_ZA_FINISHED_BY_CTLR = 1 << 0, + NVME_ZA_FINISH_RECOMMENDED = 1 << 1, + NVME_ZA_RESET_RECOMMENDED = 1 << 2, + NVME_ZA_ZD_EXT_VALID = 1 << 7, +}; + +typedef struct QEMU_PACKED NvmeZoneReportHeader { + uint64_t nr_zones; + uint8_t rsvd[56]; +} NvmeZoneReportHeader; + +enum NvmeZoneReceiveAction { + NVME_ZONE_REPORT = 0, + NVME_ZONE_REPORT_EXTENDED = 1, +}; + +enum NvmeZoneReportType { + NVME_ZONE_REPORT_ALL = 0, + NVME_ZONE_REPORT_EMPTY = 1, + NVME_ZONE_REPORT_IMPLICITLY_OPEN = 2, + NVME_ZONE_REPORT_EXPLICITLY_OPEN = 3, + NVME_ZONE_REPORT_CLOSED = 4, + NVME_ZONE_REPORT_FULL = 5, + NVME_ZONE_REPORT_READ_ONLY = 6, + NVME_ZONE_REPORT_OFFLINE = 7, +}; + +enum NvmeZoneType { + NVME_ZONE_TYPE_RESERVED = 0x00, + NVME_ZONE_TYPE_SEQ_WRITE = 0x02, +}; + +enum NvmeZoneSendAction { + NVME_ZONE_ACTION_RSD = 0x00, + NVME_ZONE_ACTION_CLOSE = 0x01, + NVME_ZONE_ACTION_FINISH = 0x02, + NVME_ZONE_ACTION_OPEN = 0x03, + NVME_ZONE_ACTION_RESET = 0x04, + NVME_ZONE_ACTION_OFFLINE = 0x05, + NVME_ZONE_ACTION_SET_ZD_EXT = 0x10, +}; + +typedef struct QEMU_PACKED NvmeZoneDescr { + uint8_t zt; + uint8_t zs; + uint8_t za; + uint8_t rsvd3[5]; + uint64_t zcap; + uint64_t zslba; + uint64_t wp; + uint8_t rsvd32[32]; +} NvmeZoneDescr; + +typedef enum NvmeZoneState { + NVME_ZONE_STATE_RESERVED = 0x00, + NVME_ZONE_STATE_EMPTY = 0x01, + NVME_ZONE_STATE_IMPLICITLY_OPEN = 0x02, + NVME_ZONE_STATE_EXPLICITLY_OPEN = 0x03, + NVME_ZONE_STATE_CLOSED = 0x04, + NVME_ZONE_STATE_READ_ONLY = 0x0D, + NVME_ZONE_STATE_FULL = 0x0E, + NVME_ZONE_STATE_OFFLINE = 0x0F, +} NvmeZoneState; + static inline void _nvme_check_size(void) { QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeAerResult) != 4); + QEMU_BUILD_BUG_ON(sizeof(NvmeZonedResult) != 8); QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64); @@ -1066,9 +1334,15 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512); QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512); + QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlZoned) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4); + QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsZoned) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsDescr) != 4); + QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64); } #endif diff --git a/include/block/snapshot.h b/include/block/snapshot.h index b0fe42993d..940345692f 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -25,7 +25,7 @@ #ifndef SNAPSHOT_H #define SNAPSHOT_H - +#include "qapi/qapi-builtin-types.h" #define SNAPSHOT_OPT_BASE "snapshot." #define SNAPSHOT_OPT_ID "snapshot.id" @@ -77,17 +77,26 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, * These functions will properly handle dataplane (take aio_context_acquire * when appropriate for appropriate block drivers */ -bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs); -int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs, +bool bdrv_all_can_snapshot(bool has_devices, strList *devices, + Error **errp); +int bdrv_all_delete_snapshot(const char *name, + bool has_devices, strList *devices, Error **errp); -int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs, +int bdrv_all_goto_snapshot(const char *name, + bool has_devices, strList *devices, Error **errp); -int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs); +int bdrv_all_has_snapshot(const char *name, + bool has_devices, strList *devices, + Error **errp); int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, BlockDriverState *vm_state_bs, uint64_t vm_state_size, - BlockDriverState **first_bad_bs); + bool has_devices, + strList *devices, + Error **errp); -BlockDriverState *bdrv_all_find_vmstate_bs(void); +BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs, + bool has_devices, strList *devices, + Error **errp); #endif diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h new file mode 100644 index 0000000000..ba2dd4b5df --- /dev/null +++ b/include/exec/confidential-guest-support.h @@ -0,0 +1,62 @@ +/* + * QEMU Confidential Guest support + * This interface describes the common pieces between various + * schemes for protecting guest memory or other state against a + * compromised hypervisor. This includes memory encryption (AMD's + * SEV and Intel's MKTME) or special protection modes (PEF on POWER, + * or PV on s390x). + * + * Copyright Red Hat. + * + * Authors: + * David Gibson <david@gibson.dropbear.id.au> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + */ +#ifndef QEMU_CONFIDENTIAL_GUEST_SUPPORT_H +#define QEMU_CONFIDENTIAL_GUEST_SUPPORT_H + +#ifndef CONFIG_USER_ONLY + +#include "qom/object.h" + +#define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" +OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) + +struct ConfidentialGuestSupport { + Object parent; + + /* + * ready: flag set by CGS initialization code once it's ready to + * start executing instructions in a potentially-secure + * guest + * + * The definition here is a bit fuzzy, because this is essentially + * part of a self-sanity-check, rather than a strict mechanism. + * + * It's not feasible to have a single point in the common machine + * init path to configure confidential guest support, because + * different mechanisms have different interdependencies requiring + * initialization in different places, often in arch or machine + * type specific code. It's also usually not possible to check + * for invalid configurations until that initialization code. + * That means it would be very easy to have a bug allowing CGS + * init to be bypassed entirely in certain configurations. + * + * Silently ignoring a requested security feature would be bad, so + * to avoid that we check late in init that this 'ready' flag is + * set if CGS was requested. If the CGS init hasn't happened, and + * so 'ready' is not set, we'll abort. + */ + bool ready; +}; + +typedef struct ConfidentialGuestSupportClass { + ObjectClass parent; +} ConfidentialGuestSupportClass; + +#endif /* !CONFIG_USER_ONLY */ + +#endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */ diff --git a/include/exec/memory.h b/include/exec/memory.h index c6ce74fb79..c6fb714e49 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -45,13 +45,11 @@ DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass, #ifdef CONFIG_FUZZ void fuzz_dma_read_cb(size_t addr, size_t len, - MemoryRegion *mr, - bool is_write); + MemoryRegion *mr); #else static inline void fuzz_dma_read_cb(size_t addr, size_t len, - MemoryRegion *mr, - bool is_write) + MemoryRegion *mr) { /* Do Nothing */ } @@ -149,6 +147,14 @@ typedef struct IOMMUTLBEvent { /* RAM is a persistent kind memory */ #define RAM_PMEM (1 << 5) + +/* + * UFFDIO_WRITEPROTECT is used on this RAMBlock to + * support 'write-tracking' migration type. + * Implies ram_state->ram_wt_enabled. + */ +#define RAM_UF_WRITEPROTECT (1 << 6) + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, IOMMUNotifierFlag flags, hwaddr start, hwaddr end, @@ -992,6 +998,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, * @size: size of the region. * @share: %true if memory must be mmaped with the MAP_SHARED flag * @fd: the fd to mmap. + * @offset: offset within the file referenced by fd * @errp: pointer to Error*, to store an error if it happens. * * Note that this function does not do anything to cause the data in the @@ -1003,6 +1010,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, uint64_t size, bool share, int fd, + ram_addr_t offset, Error **errp); #endif @@ -2506,7 +2514,7 @@ address_space_read_cached(MemoryRegionCache *cache, hwaddr addr, void *buf, hwaddr len) { assert(addr < cache->len && len <= cache->len - addr); - fuzz_dma_read_cb(cache->xlat + addr, len, cache->mrs.mr, false); + fuzz_dma_read_cb(cache->xlat + addr, len, cache->mrs.mr); if (likely(cache->ptr)) { memcpy(buf, cache->ptr + addr, len); return MEMTX_OK; diff --git a/include/exec/memory_ldst_cached.h.inc b/include/exec/memory_ldst_cached.h.inc index 01efad62de..7bc8790d34 100644 --- a/include/exec/memory_ldst_cached.h.inc +++ b/include/exec/memory_ldst_cached.h.inc @@ -28,7 +28,7 @@ static inline uint32_t ADDRESS_SPACE_LD_CACHED(l)(MemoryRegionCache *cache, hwaddr addr, MemTxAttrs attrs, MemTxResult *result) { assert(addr < cache->len && 4 <= cache->len - addr); - fuzz_dma_read_cb(cache->xlat + addr, 4, cache->mrs.mr, false); + fuzz_dma_read_cb(cache->xlat + addr, 4, cache->mrs.mr); if (likely(cache->ptr)) { return LD_P(l)(cache->ptr + addr); } else { @@ -40,7 +40,7 @@ static inline uint64_t ADDRESS_SPACE_LD_CACHED(q)(MemoryRegionCache *cache, hwaddr addr, MemTxAttrs attrs, MemTxResult *result) { assert(addr < cache->len && 8 <= cache->len - addr); - fuzz_dma_read_cb(cache->xlat + addr, 8, cache->mrs.mr, false); + fuzz_dma_read_cb(cache->xlat + addr, 8, cache->mrs.mr); if (likely(cache->ptr)) { return LD_P(q)(cache->ptr + addr); } else { @@ -52,7 +52,7 @@ static inline uint32_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache, hwaddr addr, MemTxAttrs attrs, MemTxResult *result) { assert(addr < cache->len && 2 <= cache->len - addr); - fuzz_dma_read_cb(cache->xlat + addr, 2, cache->mrs.mr, false); + fuzz_dma_read_cb(cache->xlat + addr, 2, cache->mrs.mr); if (likely(cache->ptr)) { return LD_P(uw)(cache->ptr + addr); } else { diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 40b16609ab..3cb9791df3 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -121,8 +121,8 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, uint32_t ram_flags, const char *mem_path, bool readonly, Error **errp); RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, - uint32_t ram_flags, int fd, bool readonly, - Error **errp); + uint32_t ram_flags, int fd, off_t offset, + bool readonly, Error **errp); RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr, Error **errp); diff --git a/include/hw/boards.h b/include/hw/boards.h index 85af4faf76..a46dfe5d1a 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -270,7 +270,7 @@ struct MachineState { bool iommu; bool suppress_vmdesc; bool enable_graphics; - char *memory_encryption; + ConfidentialGuestSupport *cgs; char *ram_memdev_id; /* * convenience alias to ram_memdev_id backend memory region diff --git a/include/hw/dma/pl080.h b/include/hw/dma/pl080.h index 1883f04270..3c9659e438 100644 --- a/include/hw/dma/pl080.h +++ b/include/hw/dma/pl080.h @@ -10,11 +10,12 @@ * (at your option) any later version. */ -/* This is a model of the Arm PrimeCell PL080/PL081 DMA controller: +/* + * This is a model of the Arm PrimeCell PL080/PL081 DMA controller: * The PL080 TRM is: - * http://infocenter.arm.com/help/topic/com.arm.doc.ddi0196g/DDI0196.pdf + * https://developer.arm.com/documentation/ddi0196/latest * and the PL081 TRM is: - * http://infocenter.arm.com/help/topic/com.arm.doc.ddi0218e/DDI0218.pdf + * https://developer.arm.com/documentation/ddi0218/latest * * QEMU interface: * + sysbus IRQ 0: DMACINTR combined interrupt line diff --git a/include/hw/misc/arm_integrator_debug.h b/include/hw/misc/arm_integrator_debug.h index 0077dacb44..798b082164 100644 --- a/include/hw/misc/arm_integrator_debug.h +++ b/include/hw/misc/arm_integrator_debug.h @@ -3,7 +3,7 @@ * * Browse the data sheet: * - * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0159b/Babbfijf.html + * https://developer.arm.com/documentation/dui0159/b/peripherals-and-interfaces/debug-leds-and-dip-switch-interface * * Copyright (c) 2013 Alex Bennée <alex@bennee.com> * diff --git a/include/hw/pci-host/remote.h b/include/hw/pci-host/remote.h new file mode 100644 index 0000000000..3dcf6aa51d --- /dev/null +++ b/include/hw/pci-host/remote.h @@ -0,0 +1,30 @@ +/* + * PCI Host for remote device + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef REMOTE_PCIHOST_H +#define REMOTE_PCIHOST_H + +#include "exec/memory.h" +#include "hw/pci/pcie_host.h" + +#define TYPE_REMOTE_PCIHOST "remote-pcihost" +OBJECT_DECLARE_SIMPLE_TYPE(RemotePCIHost, REMOTE_PCIHOST) + +struct RemotePCIHost { + /*< private >*/ + PCIExpressHost parent_obj; + /*< public >*/ + + MemoryRegion *mr_pci_mem; + MemoryRegion *mr_sys_io; + MemoryRegion *mr_sys_mem; +}; + +#endif diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index bd014823a9..5b03a7b0eb 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -210,4 +210,6 @@ static inline unsigned spapr_phb_windows_supported(SpaprPhbState *sphb) return sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; } +char *spapr_pci_fw_dev_name(PCIDevice *dev); + #endif /* PCI_HOST_SPAPR_H */ diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 11f8ab7149..bd0c17dc78 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -192,6 +192,9 @@ #define PCI_DEVICE_ID_SUN_SIMBA 0x5000 #define PCI_DEVICE_ID_SUN_SABRE 0xa000 +#define PCI_VENDOR_ID_ORACLE 0x108e +#define PCI_DEVICE_ID_REMOTE_IOHUB 0xb000 + #define PCI_VENDOR_ID_CMD 0x1095 #define PCI_DEVICE_ID_CMD_646 0x0646 diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h new file mode 100644 index 0000000000..707dbe524c --- /dev/null +++ b/include/hw/ppc/pef.h @@ -0,0 +1,17 @@ +/* + * PEF (Protected Execution Facility) for POWER support + * + * Copyright Red Hat. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_PPC_PEF_H +#define HW_PPC_PEF_H + +int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp); + +#endif /* HW_PPC_PEF_H */ diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index ee7eda3e01..d69cee17b2 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -58,6 +58,7 @@ struct PnvChip { MemoryRegion xscom; AddressSpace xscom_as; + MemoryRegion *fw_mr; gchar *dt_isa_nodename; }; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index c27c7ce515..ccbeeca1de 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -851,7 +851,6 @@ int spapr_max_server_number(SpaprMachineState *spapr); void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte0, uint64_t pte1); void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered); -bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr); /* DRC callbacks. */ void spapr_core_release(DeviceState *dev); diff --git a/include/hw/ppc/spapr_numa.h b/include/hw/ppc/spapr_numa.h index b3fd950634..6f9f02d3de 100644 --- a/include/hw/ppc/spapr_numa.h +++ b/include/hw/ppc/spapr_numa.h @@ -31,5 +31,6 @@ int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, int offset, PowerPCCPU *cpu); int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt, int offset); +unsigned int spapr_numa_initial_nvgpu_numa_id(MachineState *machine); #endif /* HW_SPAPR_NUMA_H */ diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h index 7879692825..b7fde2354e 100644 --- a/include/hw/ppc/xive_regs.h +++ b/include/hw/ppc/xive_regs.h @@ -236,6 +236,8 @@ typedef struct XiveEND { (be32_to_cpu((end)->w0) & END_W0_UNCOND_ESCALATE) #define xive_end_is_silent_escalation(end) \ (be32_to_cpu((end)->w0) & END_W0_SILENT_ESCALATE) +#define xive_end_is_firmware(end) \ + (be32_to_cpu((end)->w0) & END_W0_FIRMWARE) static inline uint64_t xive_end_qaddr(XiveEND *end) { diff --git a/include/hw/remote/iohub.h b/include/hw/remote/iohub.h new file mode 100644 index 0000000000..0bf98e0d78 --- /dev/null +++ b/include/hw/remote/iohub.h @@ -0,0 +1,42 @@ +/* + * IO Hub for remote device + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef REMOTE_IOHUB_H +#define REMOTE_IOHUB_H + +#include "hw/pci/pci.h" +#include "qemu/event_notifier.h" +#include "qemu/thread-posix.h" +#include "hw/remote/mpqemu-link.h" + +#define REMOTE_IOHUB_NB_PIRQS PCI_DEVFN_MAX + +typedef struct ResampleToken { + void *iohub; + int pirq; +} ResampleToken; + +typedef struct RemoteIOHubState { + PCIDevice d; + EventNotifier irqfds[REMOTE_IOHUB_NB_PIRQS]; + EventNotifier resamplefds[REMOTE_IOHUB_NB_PIRQS]; + unsigned int irq_level[REMOTE_IOHUB_NB_PIRQS]; + ResampleToken token[REMOTE_IOHUB_NB_PIRQS]; + QemuMutex irq_level_lock[REMOTE_IOHUB_NB_PIRQS]; +} RemoteIOHubState; + +int remote_iohub_map_irq(PCIDevice *pci_dev, int intx); +void remote_iohub_set_irq(void *opaque, int pirq, int level); +void process_set_irqfd_msg(PCIDevice *pci_dev, MPQemuMsg *msg); + +void remote_iohub_init(RemoteIOHubState *iohub); +void remote_iohub_finalize(RemoteIOHubState *iohub); + +#endif diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h new file mode 100644 index 0000000000..2a2a33c4b2 --- /dev/null +++ b/include/hw/remote/machine.h @@ -0,0 +1,38 @@ +/* + * Remote machine configuration + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef REMOTE_MACHINE_H +#define REMOTE_MACHINE_H + +#include "qom/object.h" +#include "hw/boards.h" +#include "hw/pci-host/remote.h" +#include "io/channel.h" +#include "hw/remote/iohub.h" + +struct RemoteMachineState { + MachineState parent_obj; + + RemotePCIHost *host; + RemoteIOHubState iohub; +}; + +/* Used to pass to co-routine device and ioc. */ +typedef struct RemoteCommDev { + PCIDevice *dev; + QIOChannel *ioc; +} RemoteCommDev; + +#define TYPE_REMOTE_MACHINE "x-remote-machine" +OBJECT_DECLARE_SIMPLE_TYPE(RemoteMachineState, REMOTE_MACHINE) + +void coroutine_fn mpqemu_remote_msg_loop_co(void *data); + +#endif diff --git a/include/hw/remote/memory.h b/include/hw/remote/memory.h new file mode 100644 index 0000000000..bc2e30945f --- /dev/null +++ b/include/hw/remote/memory.h @@ -0,0 +1,19 @@ +/* + * Memory manager for remote device + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef REMOTE_MEMORY_H +#define REMOTE_MEMORY_H + +#include "exec/hwaddr.h" +#include "hw/remote/mpqemu-link.h" + +void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp); + +#endif diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h new file mode 100644 index 0000000000..4ec0915885 --- /dev/null +++ b/include/hw/remote/mpqemu-link.h @@ -0,0 +1,99 @@ +/* + * Communication channel between QEMU and remote device process + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef MPQEMU_LINK_H +#define MPQEMU_LINK_H + +#include "qom/object.h" +#include "qemu/thread.h" +#include "io/channel.h" +#include "exec/hwaddr.h" +#include "io/channel-socket.h" +#include "hw/remote/proxy.h" + +#define REMOTE_MAX_FDS 8 + +#define MPQEMU_MSG_HDR_SIZE offsetof(MPQemuMsg, data.u64) + +/** + * MPQemuCmd: + * + * MPQemuCmd enum type to specify the command to be executed on the remote + * device. + * + * This uses a private protocol between QEMU and the remote process. vfio-user + * protocol would supersede this in the future. + * + */ +typedef enum { + MPQEMU_CMD_SYNC_SYSMEM, + MPQEMU_CMD_RET, + MPQEMU_CMD_PCI_CFGWRITE, + MPQEMU_CMD_PCI_CFGREAD, + MPQEMU_CMD_BAR_WRITE, + MPQEMU_CMD_BAR_READ, + MPQEMU_CMD_SET_IRQFD, + MPQEMU_CMD_DEVICE_RESET, + MPQEMU_CMD_MAX, +} MPQemuCmd; + +typedef struct { + hwaddr gpas[REMOTE_MAX_FDS]; + uint64_t sizes[REMOTE_MAX_FDS]; + off_t offsets[REMOTE_MAX_FDS]; +} SyncSysmemMsg; + +typedef struct { + uint32_t addr; + uint32_t val; + int len; +} PciConfDataMsg; + +typedef struct { + hwaddr addr; + uint64_t val; + unsigned size; + bool memory; +} BarAccessMsg; + +/** + * MPQemuMsg: + * @cmd: The remote command + * @size: Size of the data to be shared + * @data: Structured data + * @fds: File descriptors to be shared with remote device + * + * MPQemuMsg Format of the message sent to the remote device from QEMU. + * + */ + +typedef struct { + int cmd; + size_t size; + + union { + uint64_t u64; + PciConfDataMsg pci_conf_data; + SyncSysmemMsg sync_sysmem; + BarAccessMsg bar_access; + } data; + + int fds[REMOTE_MAX_FDS]; + int num_fds; +} MPQemuMsg; + +bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp); +bool mpqemu_msg_recv(MPQemuMsg *msg, QIOChannel *ioc, Error **errp); + +uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev, + Error **errp); +bool mpqemu_msg_valid(MPQemuMsg *msg); + +#endif diff --git a/include/hw/remote/proxy-memory-listener.h b/include/hw/remote/proxy-memory-listener.h new file mode 100644 index 0000000000..c4f3efb928 --- /dev/null +++ b/include/hw/remote/proxy-memory-listener.h @@ -0,0 +1,28 @@ +/* + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef PROXY_MEMORY_LISTENER_H +#define PROXY_MEMORY_LISTENER_H + +#include "exec/memory.h" +#include "io/channel.h" + +typedef struct ProxyMemoryListener { + MemoryListener listener; + + int n_mr_sections; + MemoryRegionSection *mr_sections; + + QIOChannel *ioc; +} ProxyMemoryListener; + +void proxy_memory_listener_configure(ProxyMemoryListener *proxy_listener, + QIOChannel *ioc); +void proxy_memory_listener_deconfigure(ProxyMemoryListener *proxy_listener); + +#endif diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h new file mode 100644 index 0000000000..741def71f1 --- /dev/null +++ b/include/hw/remote/proxy.h @@ -0,0 +1,48 @@ +/* + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef PROXY_H +#define PROXY_H + +#include "hw/pci/pci.h" +#include "io/channel.h" +#include "hw/remote/proxy-memory-listener.h" +#include "qemu/event_notifier.h" + +#define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev" +OBJECT_DECLARE_SIMPLE_TYPE(PCIProxyDev, PCI_PROXY_DEV) + +typedef struct ProxyMemoryRegion { + PCIProxyDev *dev; + MemoryRegion mr; + bool memory; + bool present; + uint8_t type; +} ProxyMemoryRegion; + +struct PCIProxyDev { + PCIDevice parent_dev; + char *fd; + + /* + * Mutex used to protect the QIOChannel fd from + * the concurrent access by the VCPUs since proxy + * blocks while awaiting for the replies from the + * process remote. + */ + QemuMutex io_mutex; + QIOChannel *ioc; + Error *migration_blocker; + ProxyMemoryListener proxy_listener; + int virq; + EventNotifier intr; + EventNotifier resample; + ProxyMemoryRegion region[PCI_NUM_REGIONS]; +}; + +#endif /* PROXY_H */ diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h index aee758bc2d..1f1f545bfc 100644 --- a/include/hw/s390x/pv.h +++ b/include/hw/s390x/pv.h @@ -12,6 +12,9 @@ #ifndef HW_S390_PV_H #define HW_S390_PV_H +#include "qapi/error.h" +#include "sysemu/kvm.h" + #ifdef CONFIG_KVM #include "cpu.h" #include "hw/s390x/s390-virtio-ccw.h" @@ -55,4 +58,18 @@ static inline void s390_pv_unshare(void) {} static inline void s390_pv_inject_reset_error(CPUState *cs) {}; #endif /* CONFIG_KVM */ +int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + if (!cgs) { + return 0; + } + if (kvm_enabled()) { + return s390_pv_kvm_init(cgs, errp); + } + + error_setg(errp, "Protected Virtualization requires KVM"); + return -1; +} + #endif /* HW_S390_PV_H */ diff --git a/include/hw/ssi/pl022.h b/include/hw/ssi/pl022.h index 545b52689c..25d58db5f3 100644 --- a/include/hw/ssi/pl022.h +++ b/include/hw/ssi/pl022.h @@ -9,9 +9,10 @@ * (at your option) any later version. */ -/* This is a model of the Arm PrimeCell PL022 synchronous serial port. +/* + * This is a model of the Arm PrimeCell PL022 synchronous serial port. * The PL022 TRM is: - * http://infocenter.arm.com/help/topic/com.arm.doc.ddi0194h/DDI0194H_ssp_pl022_trm.pdf + * https://developer.arm.com/documentation/ddi0194/latest * * QEMU interface: * + sysbus IRQ: SSPINTR combined interrupt line diff --git a/include/io/channel.h b/include/io/channel.h index ab9ea77959..88988979f8 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -777,4 +777,82 @@ void qio_channel_set_aio_fd_handler(QIOChannel *ioc, IOHandler *io_write, void *opaque); +/** + * qio_channel_readv_full_all_eof: + * @ioc: the channel object + * @iov: the array of memory regions to read data to + * @niov: the length of the @iov array + * @fds: an array of file handles to read + * @nfds: number of file handles in @fds + * @errp: pointer to a NULL-initialized error object + * + * + * Performs same function as qio_channel_readv_all_eof. + * Additionally, attempts to read file descriptors shared + * over the channel. The function will wait for all + * requested data to be read, yielding from the current + * coroutine if required. data refers to both file + * descriptors and the iovs. + * + * Returns: 1 if all bytes were read, 0 if end-of-file + * occurs without data, or -1 on error + */ + +int qio_channel_readv_full_all_eof(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, size_t *nfds, + Error **errp); + +/** + * qio_channel_readv_full_all: + * @ioc: the channel object + * @iov: the array of memory regions to read data to + * @niov: the length of the @iov array + * @fds: an array of file handles to read + * @nfds: number of file handles in @fds + * @errp: pointer to a NULL-initialized error object + * + * + * Performs same function as qio_channel_readv_all_eof. + * Additionally, attempts to read file descriptors shared + * over the channel. The function will wait for all + * requested data to be read, yielding from the current + * coroutine if required. data refers to both file + * descriptors and the iovs. + * + * Returns: 0 if all bytes were read, or -1 on error + */ + +int qio_channel_readv_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, size_t *nfds, + Error **errp); + +/** + * qio_channel_writev_full_all: + * @ioc: the channel object + * @iov: the array of memory regions to write data from + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds + * @errp: pointer to a NULL-initialized error object + * + * + * Behaves like qio_channel_writev_full but will attempt + * to send all data passed (file handles and memory regions). + * The function will wait for all requested data + * to be written, yielding from the current coroutine + * if required. + * + * Returns: 0 if all bytes were written, or -1 on error + */ + +int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, + Error **errp); + #endif /* QIO_CHANNEL_H */ diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h index c85b6ec75b..e72083b117 100644 --- a/include/migration/snapshot.h +++ b/include/migration/snapshot.h @@ -15,7 +15,50 @@ #ifndef QEMU_MIGRATION_SNAPSHOT_H #define QEMU_MIGRATION_SNAPSHOT_H -int save_snapshot(const char *name, Error **errp); -int load_snapshot(const char *name, Error **errp); +#include "qapi/qapi-builtin-types.h" + +/** + * save_snapshot: Save an internal snapshot. + * @name: name of internal snapshot + * @overwrite: replace existing snapshot with @name + * @vmstate: blockdev node name to store VM state in + * @has_devices: whether to use explicit device list + * @devices: explicit device list to snapshot + * @errp: pointer to error object + * On success, return %true. + * On failure, store an error through @errp and return %false. + */ +bool save_snapshot(const char *name, bool overwrite, + const char *vmstate, + bool has_devices, strList *devices, + Error **errp); + +/** + * load_snapshot: Load an internal snapshot. + * @name: name of internal snapshot + * @vmstate: blockdev node name to load VM state from + * @has_devices: whether to use explicit device list + * @devices: explicit device list to snapshot + * @errp: pointer to error object + * On success, return %true. + * On failure, store an error through @errp and return %false. + */ +bool load_snapshot(const char *name, + const char *vmstate, + bool has_devices, strList *devices, + Error **errp); + +/** + * delete_snapshot: Delete a snapshot. + * @name: path to snapshot + * @has_devices: whether to use explicit device list + * @devices: explicit device list to snapshot + * @errp: pointer to error object + * On success, return %true. + * On failure, store an error through @errp and return %false. + */ +bool delete_snapshot(const char *name, + bool has_devices, strList *devices, + Error **errp); #endif diff --git a/include/qemu/event_notifier.h b/include/qemu/event_notifier.h index 3380b662f3..b79add035d 100644 --- a/include/qemu/event_notifier.h +++ b/include/qemu/event_notifier.h @@ -24,6 +24,7 @@ struct EventNotifier { #else int rfd; int wfd; + bool initialized; #endif }; diff --git a/include/qemu/fifo8.h b/include/qemu/fifo8.h index 489c354291..28bf2cee57 100644 --- a/include/qemu/fifo8.h +++ b/include/qemu/fifo8.h @@ -148,12 +148,16 @@ uint32_t fifo8_num_used(Fifo8 *fifo); extern const VMStateDescription vmstate_fifo8; -#define VMSTATE_FIFO8(_field, _state) { \ - .name = (stringify(_field)), \ - .size = sizeof(Fifo8), \ - .vmsd = &vmstate_fifo8, \ - .flags = VMS_STRUCT, \ - .offset = vmstate_offset_value(_state, _field, Fifo8), \ +#define VMSTATE_FIFO8_TEST(_field, _state, _test) { \ + .name = (stringify(_field)), \ + .field_exists = (_test), \ + .size = sizeof(Fifo8), \ + .vmsd = &vmstate_fifo8, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, Fifo8), \ } +#define VMSTATE_FIFO8(_field, _state) \ + VMSTATE_FIFO8_TEST(_field, _state, NULL) + #endif /* QEMU_FIFO8_H */ diff --git a/include/qemu/job.h b/include/qemu/job.h index 32aabb1c60..efc6fa7544 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -251,6 +251,11 @@ struct JobDriver { */ void (*clean)(Job *job); + /** + * If the callback is not NULL, it will be invoked in job_cancel_async + */ + void (*cancel)(Job *job); + /** Called when the job is freed */ void (*free)(Job *job); diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h index 8b7a5c70f3..456ff87df1 100644 --- a/include/qemu/mmap-alloc.h +++ b/include/qemu/mmap-alloc.h @@ -17,6 +17,7 @@ size_t qemu_mempath_getpagesize(const char *mem_path); * @readonly: true for a read-only mapping, false for read/write. * @shared: map has RAM_SHARED flag. * @is_pmem: map has RAM_PMEM flag. + * @map_offset: map starts at offset of map_offset from the start of fd * * Return: * On success, return a pointer to the mapped area. @@ -27,7 +28,8 @@ void *qemu_ram_mmap(int fd, size_t align, bool readonly, bool shared, - bool is_pmem); + bool is_pmem, + off_t map_offset); void qemu_ram_munmap(int fd, void *ptr, size_t size); diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 68deb74ef6..dc39b05c30 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -37,6 +37,7 @@ typedef struct Chardev Chardev; typedef struct Clock Clock; typedef struct CompatProperty CompatProperty; typedef struct CoMutex CoMutex; +typedef struct ConfidentialGuestSupport ConfidentialGuestSupport; typedef struct CPUAddressSpace CPUAddressSpace; typedef struct CPUState CPUState; typedef struct DeviceListener DeviceListener; diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h new file mode 100644 index 0000000000..6b74f92792 --- /dev/null +++ b/include/qemu/userfaultfd.h @@ -0,0 +1,35 @@ +/* + * Linux UFFD-WP support + * + * Copyright Virtuozzo GmbH, 2020 + * + * Authors: + * Andrey Gruzdev <andrey.gruzdev@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef USERFAULTFD_H +#define USERFAULTFD_H + +#include "qemu/osdep.h" +#include "exec/hwaddr.h" +#include <linux/userfaultfd.h> + +int uffd_query_features(uint64_t *features); +int uffd_create_fd(uint64_t features, bool non_blocking); +void uffd_close_fd(int uffd_fd); +int uffd_register_memory(int uffd_fd, void *addr, uint64_t length, + uint64_t mode, uint64_t *ioctls); +int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length); +int uffd_change_protection(int uffd_fd, void *addr, uint64_t length, + bool wp, bool dont_wake); +int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr, + uint64_t length, bool dont_wake); +int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake); +int uffd_wakeup(int uffd_fd, void *addr, uint64_t length); +int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count); +bool uffd_poll_events(int uffd_fd, int tmo); + +#endif /* USERFAULTFD_H */ diff --git a/include/qom/object.h b/include/qom/object.h index d378f13a11..6721cd312e 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -638,7 +638,8 @@ bool object_apply_global_props(Object *obj, const GPtrArray *props, Error **errp); void object_set_machine_compat_props(GPtrArray *compat_props); void object_set_accelerator_compat_props(GPtrArray *compat_props); -void object_register_sugar_prop(const char *driver, const char *prop, const char *value); +void object_register_sugar_prop(const char *driver, const char *prop, + const char *value, bool optional); void object_apply_compat_props(Object *obj); /** diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h index 0c5284dbbc..f177142f16 100644 --- a/include/sysemu/iothread.h +++ b/include/sysemu/iothread.h @@ -57,4 +57,10 @@ IOThread *iothread_create(const char *id, Error **errp); void iothread_stop(IOThread *iothread); void iothread_destroy(IOThread *iothread); +/* + * Returns true if executing withing IOThread context, + * false otherwise. + */ +bool qemu_in_iothread(void); + #endif /* IOTHREAD_H */ diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 739682f3c3..c5546bdecc 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -233,22 +233,6 @@ int kvm_has_intx_set_mask(void); */ bool kvm_arm_supports_user_irq(void); -/** - * kvm_memcrypt_enabled - return boolean indicating whether memory encryption - * is enabled - * Returns: 1 memory encryption is enabled - * 0 memory encryption is disabled - */ -bool kvm_memcrypt_enabled(void); - -/** - * kvm_memcrypt_encrypt_data: encrypt the memory range - * - * Return: 1 failed to encrypt the range - * 0 succesfully encrypted memory region - */ -int kvm_memcrypt_encrypt_data(uint8_t *ptr, uint64_t len); - #ifdef NEED_CPU_H #include "cpu.h" diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h index 7ab6e3e31d..5c5a13c6ca 100644 --- a/include/sysemu/sev.h +++ b/include/sysemu/sev.h @@ -16,8 +16,8 @@ #include "sysemu/kvm.h" -void *sev_guest_init(const char *id); -int sev_encrypt_data(void *handle, uint8_t *ptr, uint64_t len); +int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); int sev_inject_launch_secret(const char *hdr, const char *secret, uint64_t gpa, Error **errp); #endif |