diff options
-rw-r--r-- | hw/rdma/rdma_backend.c | 125 | ||||
-rw-r--r-- | hw/rdma/rdma_backend.h | 18 | ||||
-rw-r--r-- | hw/rdma/rdma_backend_defs.h | 5 | ||||
-rw-r--r-- | hw/rdma/rdma_rm.c | 117 | ||||
-rw-r--r-- | hw/rdma/rdma_rm.h | 13 | ||||
-rw-r--r-- | hw/rdma/rdma_rm_defs.h | 10 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_cmd.c | 206 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_main.c | 16 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_qp_ops.c | 46 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_qp_ops.h | 1 | ||||
-rwxr-xr-x | scripts/decodetree.py | 233 | ||||
m--------- | slirp | 0 | ||||
-rw-r--r-- | target/arm/translate-sve.c | 24 | ||||
-rw-r--r-- | target/hppa/translate.c | 16 | ||||
-rw-r--r-- | target/riscv/insn_trans/trans_rvc.inc.c | 10 | ||||
-rw-r--r-- | target/riscv/translate.c | 4 |
16 files changed, 762 insertions, 82 deletions
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index d1660b6474..cf34874e9d 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -40,6 +40,7 @@ typedef struct BackendCtx { void *up_ctx; struct ibv_sge sge; /* Used to save MAD recv buffer */ RdmaBackendQP *backend_qp; /* To maintain recv buffers */ + RdmaBackendSRQ *backend_srq; } BackendCtx; struct backend_umad { @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) int i, ne, total_ne = 0; BackendCtx *bctx; struct ibv_wc wc[2]; + RdmaProtectedGSList *cqe_ctx_list; qemu_mutex_lock(&rdma_dev_res->lock); do { @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) comp_handler(bctx->up_ctx, &wc[i]); - rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list, - wc[i].wr_id); + if (bctx->backend_qp) { + cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list; + } else { + cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list; + } + + rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id); rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); g_free(bctx); } @@ -662,6 +669,60 @@ err_free_bctx: g_free(bctx); } +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, + RdmaBackendSRQ *srq, struct ibv_sge *sge, + uint32_t num_sge, void *ctx) +{ + BackendCtx *bctx; + struct ibv_sge new_sge[MAX_SGE]; + uint32_t bctx_id; + int rc; + struct ibv_recv_wr wr = {}, *bad_wr; + + bctx = g_malloc0(sizeof(*bctx)); + bctx->up_ctx = ctx; + bctx->backend_srq = srq; + + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); + if (unlikely(rc)) { + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); + goto err_free_bctx; + } + + rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id); + + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge, + &backend_dev->rdma_dev_res->stats.rx_bufs_len); + if (rc) { + complete_work(IBV_WC_GENERAL_ERR, rc, ctx); + goto err_dealloc_cqe_ctx; + } + + wr.num_sge = num_sge; + wr.sg_list = new_sge; + wr.wr_id = bctx_id; + rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr); + if (rc) { + rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d", + srq->ibsrq->handle, rc, errno); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); + goto err_dealloc_cqe_ctx; + } + + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); + backend_dev->rdma_dev_res->stats.rx_bufs++; + backend_dev->rdma_dev_res->stats.rx_srq++; + + return; + +err_dealloc_cqe_ctx: + backend_dev->rdma_dev_res->stats.rx_bufs_err++; + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); + +err_free_bctx: + g_free(bctx); +} + int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd) { pd->ibpd = ibv_alloc_pd(backend_dev->context); @@ -733,9 +794,9 @@ void rdma_backend_destroy_cq(RdmaBackendCQ *cq) int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, RdmaBackendPD *pd, RdmaBackendCQ *scq, - RdmaBackendCQ *rcq, uint32_t max_send_wr, - uint32_t max_recv_wr, uint32_t max_send_sge, - uint32_t max_recv_sge) + RdmaBackendCQ *rcq, RdmaBackendSRQ *srq, + uint32_t max_send_wr, uint32_t max_recv_wr, + uint32_t max_send_sge, uint32_t max_recv_sge) { struct ibv_qp_init_attr attr = {}; @@ -763,6 +824,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, attr.cap.max_recv_wr = max_recv_wr; attr.cap.max_send_sge = max_send_sge; attr.cap.max_recv_sge = max_recv_sge; + if (srq) { + attr.srq = srq->ibsrq; + } qp->ibqp = ibv_create_qp(pd->ibpd, &attr); if (!qp->ibqp) { @@ -938,6 +1002,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res) rdma_protected_gslist_destroy(&qp->cqe_ctx_list); } +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, + uint32_t max_wr, uint32_t max_sge, + uint32_t srq_limit) +{ + struct ibv_srq_init_attr srq_init_attr = {}; + + srq_init_attr.attr.max_wr = max_wr; + srq_init_attr.attr.max_sge = max_sge; + srq_init_attr.attr.srq_limit = srq_limit; + + srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr); + if (!srq->ibsrq) { + rdma_error_report("ibv_create_srq failed, errno=%d", errno); + return -EIO; + } + + rdma_protected_gslist_init(&srq->cqe_ctx_list); + + return 0; +} + +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr) +{ + if (!srq->ibsrq) { + return -EINVAL; + } + + return ibv_query_srq(srq->ibsrq, srq_attr); +} + +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, + int srq_attr_mask) +{ + if (!srq->ibsrq) { + return -EINVAL; + } + + return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask); +} + +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res) +{ + if (srq->ibsrq) { + ibv_destroy_srq(srq->ibsrq); + } + g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res); + rdma_protected_gslist_destroy(&srq->cqe_ctx_list); +} + #define CHK_ATTR(req, dev, member, fmt) ({ \ trace_rdma_check_dev_attr(#member, dev.member, req->member); \ if (req->member > dev.member) { \ @@ -960,6 +1073,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, } dev_attr->max_sge = MAX_SGE; + dev_attr->max_srq_sge = MAX_SGE; CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64); CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d"); @@ -970,6 +1084,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d"); return 0; } diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 38056d97c7..7c1a19a2b5 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -89,9 +89,9 @@ void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq); int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, RdmaBackendPD *pd, RdmaBackendCQ *scq, - RdmaBackendCQ *rcq, uint32_t max_send_wr, - uint32_t max_recv_wr, uint32_t max_send_sge, - uint32_t max_recv_sge); + RdmaBackendCQ *rcq, RdmaBackendSRQ *srq, + uint32_t max_send_wr, uint32_t max_recv_wr, + uint32_t max_send_sge, uint32_t max_recv_sge); int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, uint32_t qkey); int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, struct ibv_sge *sge, uint32_t num_sge, void *ctx); +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, + uint32_t max_wr, uint32_t max_sge, + uint32_t srq_limit); +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr); +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, + int srq_attr_mask); +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, + RdmaDeviceResources *dev_res); +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, + RdmaBackendSRQ *srq, struct ibv_sge *sge, + uint32_t num_sge, void *ctx); + #endif diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h index 817153dc8c..0b55be3503 100644 --- a/hw/rdma/rdma_backend_defs.h +++ b/hw/rdma/rdma_backend_defs.h @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP { RdmaProtectedGSList cqe_ctx_list; } RdmaBackendQP; +typedef struct RdmaBackendSRQ { + struct ibv_srq *ibsrq; + RdmaProtectedGSList cqe_ctx_list; +} RdmaBackendSRQ; + #endif diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index bac3b2f4a6..1927f85472 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res) dev_res->stats.tx_err); monitor_printf(mon, "\trx_bufs : %" PRId64 "\n", dev_res->stats.rx_bufs); + monitor_printf(mon, "\trx_srq : %" PRId64 "\n", + dev_res->stats.rx_srq); monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n", dev_res->stats.rx_bufs_len); monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n", @@ -384,12 +386,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, uint8_t qp_type, uint32_t max_send_wr, uint32_t max_send_sge, uint32_t send_cq_handle, uint32_t max_recv_wr, uint32_t max_recv_sge, - uint32_t recv_cq_handle, void *opaque, uint32_t *qpn) + uint32_t recv_cq_handle, void *opaque, uint32_t *qpn, + uint8_t is_srq, uint32_t srq_handle) { int rc; RdmaRmQP *qp; RdmaRmCQ *scq, *rcq; RdmaRmPD *pd; + RdmaRmSRQ *srq = NULL; uint32_t rm_qpn; pd = rdma_rm_get_pd(dev_res, pd_handle); @@ -406,6 +410,16 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, return -EINVAL; } + if (is_srq) { + srq = rdma_rm_get_srq(dev_res, srq_handle); + if (!srq) { + rdma_error_report("Invalid srqn %d", srq_handle); + return -EINVAL; + } + + srq->recv_cq_handle = recv_cq_handle; + } + if (qp_type == IBV_QPT_GSI) { scq->notify = CNT_SET; rcq->notify = CNT_SET; @@ -422,10 +436,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, qp->send_cq_handle = send_cq_handle; qp->recv_cq_handle = recv_cq_handle; qp->opaque = opaque; + qp->is_srq = is_srq; rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd, - &scq->backend_cq, &rcq->backend_cq, max_send_wr, - max_recv_wr, max_send_sge, max_recv_sge); + &scq->backend_cq, &rcq->backend_cq, + is_srq ? &srq->backend_srq : NULL, + max_send_wr, max_recv_wr, max_send_sge, + max_recv_sge); + if (rc) { rc = -EIO; goto out_dealloc_qp; @@ -542,6 +560,96 @@ void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle) rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); } +RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle) +{ + return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle); +} + +int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle, + uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit, + uint32_t *srq_handle, void *opaque) +{ + RdmaRmSRQ *srq; + RdmaRmPD *pd; + int rc; + + pd = rdma_rm_get_pd(dev_res, pd_handle); + if (!pd) { + return -EINVAL; + } + + srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle); + if (!srq) { + return -ENOMEM; + } + + rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd, + max_wr, max_sge, srq_limit); + if (rc) { + rc = -EIO; + goto out_dealloc_srq; + } + + srq->opaque = opaque; + + return 0; + +out_dealloc_srq: + rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle); + + return rc; +} + +int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle, + struct ibv_srq_attr *srq_attr) +{ + RdmaRmSRQ *srq; + + srq = rdma_rm_get_srq(dev_res, srq_handle); + if (!srq) { + return -EINVAL; + } + + return rdma_backend_query_srq(&srq->backend_srq, srq_attr); +} + +int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle, + struct ibv_srq_attr *srq_attr, int srq_attr_mask) +{ + RdmaRmSRQ *srq; + + srq = rdma_rm_get_srq(dev_res, srq_handle); + if (!srq) { + return -EINVAL; + } + + if ((srq_attr_mask & IBV_SRQ_LIMIT) && + (srq_attr->srq_limit == 0)) { + return -EINVAL; + } + + if ((srq_attr_mask & IBV_SRQ_MAX_WR) && + (srq_attr->max_wr == 0)) { + return -EINVAL; + } + + return rdma_backend_modify_srq(&srq->backend_srq, srq_attr, + srq_attr_mask); +} + +void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle) +{ + RdmaRmSRQ *srq; + + srq = rdma_rm_get_srq(dev_res, srq_handle); + if (!srq) { + return; + } + + rdma_backend_destroy_srq(&srq->backend_srq, dev_res); + rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle); +} + void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id) { void **cqe_ctx; @@ -671,6 +779,8 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr) res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp * dev_attr->max_qp_wr, sizeof(void *)); res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC)); + res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq, + sizeof(RdmaRmSRQ)); init_ports(dev_res); @@ -689,6 +799,7 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, fini_ports(dev_res, backend_dev, ifname); + res_tbl_free(&dev_res->srq_tbl); res_tbl_free(&dev_res->uc_tbl); res_tbl_free(&dev_res->cqe_ctx_tbl); res_tbl_free(&dev_res->qp_tbl); diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h index 4f03f9b8c5..e8639909cd 100644 --- a/hw/rdma/rdma_rm.h +++ b/hw/rdma/rdma_rm.h @@ -53,7 +53,8 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, uint8_t qp_type, uint32_t max_send_wr, uint32_t max_send_sge, uint32_t send_cq_handle, uint32_t max_recv_wr, uint32_t max_recv_sge, - uint32_t recv_cq_handle, void *opaque, uint32_t *qpn); + uint32_t recv_cq_handle, void *opaque, uint32_t *qpn, + uint8_t is_srq, uint32_t srq_handle); RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn); int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx, @@ -65,6 +66,16 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, int attr_mask, struct ibv_qp_init_attr *init_attr); void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle); +RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle); +int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle, + uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit, + uint32_t *srq_handle, void *opaque); +int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle, + struct ibv_srq_attr *srq_attr); +int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle, + struct ibv_srq_attr *srq_attr, int srq_attr_mask); +void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle); + int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id, void *ctx); void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id); diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index c200d311de..534f2f74d3 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -33,6 +33,7 @@ #define MAX_QP_RD_ATOM 16 #define MAX_QP_INIT_RD_ATOM 16 #define MAX_AH 64 +#define MAX_SRQ 512 #define MAX_RM_TBL_NAME 16 #define MAX_CONSEQ_EMPTY_POLL_CQ 4096 /* considered as error above this */ @@ -87,8 +88,15 @@ typedef struct RdmaRmQP { uint32_t send_cq_handle; uint32_t recv_cq_handle; enum ibv_qp_state qp_state; + uint8_t is_srq; } RdmaRmQP; +typedef struct RdmaRmSRQ { + RdmaBackendSRQ backend_srq; + uint32_t recv_cq_handle; + void *opaque; +} RdmaRmSRQ; + typedef struct RdmaRmGid { union ibv_gid gid; int backend_gid_index; @@ -106,6 +114,7 @@ typedef struct RdmaRmStats { uint64_t rx_bufs; uint64_t rx_bufs_len; uint64_t rx_bufs_err; + uint64_t rx_srq; uint64_t completions; uint64_t mad_tx; uint64_t mad_tx_err; @@ -128,6 +137,7 @@ struct RdmaDeviceResources { RdmaRmResTbl qp_tbl; RdmaRmResTbl cq_tbl; RdmaRmResTbl cqe_ctx_tbl; + RdmaRmResTbl srq_tbl; GHashTable *qp_hash; /* Keeps mapping between real and emulated */ QemuMutex lock; RdmaRmStats stats; diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index 4afcd2037d..8d70c0d23d 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -357,7 +357,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req, static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, PvrdmaRing **rings, uint32_t scqe, uint32_t smax_sge, uint32_t spages, uint32_t rcqe, uint32_t rmax_sge, - uint32_t rpages) + uint32_t rpages, uint8_t is_srq) { uint64_t *dir = NULL, *tbl = NULL; PvrdmaRing *sr, *rr; @@ -365,9 +365,14 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, char ring_name[MAX_RING_NAME_SZ]; uint32_t wqe_sz; - if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES - || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) { - rdma_error_report("Got invalid page count for QP ring: %d, %d", spages, + if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES) { + rdma_error_report("Got invalid send page count for QP ring: %d", + spages); + return rc; + } + + if (!is_srq && (!rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES)) { + rdma_error_report("Got invalid recv page count for QP ring: %d", rpages); return rc; } @@ -384,8 +389,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, goto out; } - sr = g_malloc(2 * sizeof(*rr)); - rr = &sr[1]; + if (!is_srq) { + sr = g_malloc(2 * sizeof(*rr)); + rr = &sr[1]; + } else { + sr = g_malloc(sizeof(*sr)); + } *rings = sr; @@ -407,15 +416,18 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, goto out_unmap_ring_state; } - /* Create recv ring */ - rr->ring_state = &sr->ring_state[1]; - wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) + - sizeof(struct pvrdma_sge) * rmax_sge - 1); - sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma); - rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state, - rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages], rpages); - if (rc) { - goto out_free_sr; + if (!is_srq) { + /* Create recv ring */ + rr->ring_state = &sr->ring_state[1]; + wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) + + sizeof(struct pvrdma_sge) * rmax_sge - 1); + sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma); + rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state, + rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages], + rpages); + if (rc) { + goto out_free_sr; + } } goto out; @@ -436,10 +448,12 @@ out: return rc; } -static void destroy_qp_rings(PvrdmaRing *ring) +static void destroy_qp_rings(PvrdmaRing *ring, uint8_t is_srq) { pvrdma_ring_free(&ring[0]); - pvrdma_ring_free(&ring[1]); + if (!is_srq) { + pvrdma_ring_free(&ring[1]); + } rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE); g_free(ring); @@ -458,7 +472,7 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings, cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks, cmd->max_recv_wr, cmd->max_recv_sge, - cmd->total_chunks - cmd->send_chunks - 1); + cmd->total_chunks - cmd->send_chunks - 1, cmd->is_srq); if (rc) { return rc; } @@ -467,9 +481,9 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, cmd->max_send_wr, cmd->max_send_sge, cmd->send_cq_handle, cmd->max_recv_wr, cmd->max_recv_sge, cmd->recv_cq_handle, rings, - &resp->qpn); + &resp->qpn, cmd->is_srq, cmd->srq_handle); if (rc) { - destroy_qp_rings(rings); + destroy_qp_rings(rings, cmd->is_srq); return rc; } @@ -531,10 +545,9 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, return -EINVAL; } - rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle); - ring = (PvrdmaRing *)qp->opaque; - destroy_qp_rings(ring); + destroy_qp_rings(ring, qp->is_srq); + rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle); return 0; } @@ -596,6 +609,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, return 0; } +static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring, + uint64_t pdir_dma, uint32_t max_wr, + uint32_t max_sge, uint32_t nchunks) +{ + uint64_t *dir = NULL, *tbl = NULL; + PvrdmaRing *r; + int rc = -EINVAL; + char ring_name[MAX_RING_NAME_SZ]; + uint32_t wqe_sz; + + if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) { + rdma_error_report("Got invalid page count for SRQ ring: %d", + nchunks); + return rc; + } + + dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); + if (!dir) { + rdma_error_report("Failed to map to SRQ page directory"); + goto out; + } + + tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); + if (!tbl) { + rdma_error_report("Failed to map to SRQ page table"); + goto out; + } + + r = g_malloc(sizeof(*r)); + *ring = r; + + r->ring_state = (struct pvrdma_ring *) + rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); + if (!r->ring_state) { + rdma_error_report("Failed to map tp SRQ ring state"); + goto out_free_ring_mem; + } + + wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) + + sizeof(struct pvrdma_sge) * max_sge - 1); + sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma); + rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr, + wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1); + if (rc) { + goto out_unmap_ring_state; + } + + goto out; + +out_unmap_ring_state: + rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE); + +out_free_ring_mem: + g_free(r); + +out: + rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE); + rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE); + + return rc; +} + +static void destroy_srq_ring(PvrdmaRing *ring) +{ + pvrdma_ring_free(ring); + rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE); + g_free(ring); +} + +static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_create_srq *cmd = &req->create_srq; + struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp; + PvrdmaRing *ring = NULL; + int rc; + + memset(resp, 0, sizeof(*resp)); + + rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, + cmd->attrs.max_wr, cmd->attrs.max_sge, + cmd->nchunks); + if (rc) { + return rc; + } + + rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle, + cmd->attrs.max_wr, cmd->attrs.max_sge, + cmd->attrs.srq_limit, &resp->srqn, ring); + if (rc) { + destroy_srq_ring(ring); + return rc; + } + + return 0; +} + +static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_query_srq *cmd = &req->query_srq; + struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp; + + memset(resp, 0, sizeof(*resp)); + + return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle, + (struct ibv_srq_attr *)&resp->attrs); +} + +static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq; + + /* Only support SRQ limit */ + if (!(cmd->attr_mask & IBV_SRQ_LIMIT) || + (cmd->attr_mask & IBV_SRQ_MAX_WR)) + return -EINVAL; + + return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle, + (struct ibv_srq_attr *)&cmd->attrs, + cmd->attr_mask); +} + +static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq; + RdmaRmSRQ *srq; + PvrdmaRing *ring; + + srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle); + if (!srq) { + return -EINVAL; + } + + ring = (PvrdmaRing *)srq->opaque; + destroy_srq_ring(ring); + rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle); + + return 0; +} + struct cmd_handler { uint32_t cmd; uint32_t ack; @@ -621,6 +777,10 @@ static struct cmd_handler cmd_handlers[] = { {PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc}, {PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind}, {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind}, + {PVRDMA_CMD_CREATE_SRQ, PVRDMA_CMD_CREATE_SRQ_RESP, create_srq}, + {PVRDMA_CMD_QUERY_SRQ, PVRDMA_CMD_QUERY_SRQ_RESP, query_srq}, + {PVRDMA_CMD_MODIFY_SRQ, PVRDMA_CMD_MODIFY_SRQ_RESP, modify_srq}, + {PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_DESTROY_SRQ_RESP, destroy_srq}, }; int pvrdma_exec_cmd(PVRDMADev *dev) diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 0b46561bad..769f7990f8 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = { DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), + DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ), DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), DEFINE_PROP_END_OF_LIST(), }; @@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev) dsr->caps.max_mr = dev->dev_attr.max_mr; dsr->caps.max_pd = dev->dev_attr.max_pd; dsr->caps.max_ah = dev->dev_attr.max_ah; + dsr->caps.max_srq = dev->dev_attr.max_srq; + dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr; + dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge; dsr->caps.gid_tbl_len = MAX_GIDS; dsr->caps.sys_image_guid = 0; dsr->caps.node_guid = dev->node_guid; @@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK); } break; + case PVRDMA_UAR_SRQ_OFFSET: + if (val & PVRDMA_UAR_SRQ_RECV) { + trace_pvrdma_uar_write(addr, val, "QP", "SRQ", + val & PVRDMA_UAR_HANDLE_MASK, 0); + pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); + } + break; default: rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64, addr, val); @@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev) dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - TARGET_PAGE_SIZE; /* First page is ring state */ + + dev->dev_attr.max_srq_wr = pg_tbl_bytes / + ((sizeof(struct pvrdma_rq_wqe_hdr) + + sizeof(struct pvrdma_sge)) * + dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; } static int pvrdma_check_ram_shared(Object *obj, void *opaque) diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index 5b9786efbe..bd6db858de 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, memset(cqe1, 0, sizeof(*cqe1)); cqe1->wr_id = cqe->wr_id; - cqe1->qp = cqe->qp; + cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num; cqe1->opcode = cqe->opcode; cqe1->status = wc->status; cqe1->byte_len = wc->byte_len; @@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) } } +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle) +{ + RdmaRmSRQ *srq; + PvrdmaRqWqe *wqe; + PvrdmaRing *ring; + + srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle); + if (unlikely(!srq)) { + return; + } + + ring = (PvrdmaRing *)srq->opaque; + + wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring); + while (wqe) { + CompHandlerCtx *comp_ctx; + + /* Prepare CQE */ + comp_ctx = g_malloc(sizeof(CompHandlerCtx)); + comp_ctx->dev = dev; + comp_ctx->cq_handle = srq->recv_cq_handle; + comp_ctx->cqe.wr_id = wqe->hdr.wr_id; + comp_ctx->cqe.qp = 0; + comp_ctx->cqe.opcode = IBV_WC_RECV; + + if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { + rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge, + dev->dev_attr.max_sge); + complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); + continue; + } + + rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq, + (struct ibv_sge *)&wqe->sge[0], + wqe->hdr.num_sge, + comp_ctx); + + pvrdma_ring_read_inc(ring); + + wqe = pvrdma_ring_next_elem_read(ring); + } + +} + void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle) { RdmaRmCQ *cq; diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h index 31cb48ba29..82e720a76f 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.h +++ b/hw/rdma/vmw/pvrdma_qp_ops.h @@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void); void pvrdma_qp_ops_fini(void); void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle); void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle); +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle); void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle); #endif diff --git a/scripts/decodetree.py b/scripts/decodetree.py index aa790b596a..81874e22cc 100755 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -27,6 +27,7 @@ import getopt insnwidth = 32 insnmask = 0xffffffff +variablewidth = False fields = {} arguments = {} formats = {} @@ -255,7 +256,7 @@ class FunctionField: return self.func + '(' + str(self.base) + ')' def str_extract(self): - return self.func + '(' + self.base.str_extract() + ')' + return self.func + '(ctx, ' + self.base.str_extract() + ')' def __eq__(self, other): return self.func == other.func and self.base == other.base @@ -289,7 +290,7 @@ class Arguments: class General: """Common code between instruction formats and instruction patterns""" - def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds): + def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): self.name = name self.file = input_file self.lineno = lineno @@ -299,6 +300,7 @@ class General: self.undefmask = udfm self.fieldmask = fldm self.fields = flds + self.width = w def __str__(self): return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) @@ -316,7 +318,7 @@ class Format(General): return decode_function + '_extract_' + self.name def output_extract(self): - output('static void ', self.extract_name(), '(', + output('static void ', self.extract_name(), '(DisasContext *ctx, ', self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') for n, f in self.fields.items(): output(' a->', n, ' = ', f.str_extract(), ';\n') @@ -341,7 +343,8 @@ class Pattern(General): arg = self.base.base.name output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') if not extracted: - output(ind, self.base.extract_name(), '(&u.f_', arg, ', insn);\n') + output(ind, self.base.extract_name(), + '(ctx, &u.f_', arg, ', insn);\n') for n, f in self.fields.items(): output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') output(ind, 'if (', translate_prefix, '_', self.name, @@ -352,7 +355,7 @@ class Pattern(General): class MultiPattern(General): """Class representing an overlapping set of instruction patterns""" - def __init__(self, lineno, pats, fixb, fixm, udfm): + def __init__(self, lineno, pats, fixb, fixm, udfm, w): self.file = input_file self.lineno = lineno self.pats = pats @@ -360,6 +363,7 @@ class MultiPattern(General): self.fixedbits = fixb self.fixedmask = fixm self.undefmask = udfm + self.width = w def __str__(self): r = "{" @@ -502,7 +506,7 @@ def infer_argument_set(flds): return arg -def infer_format(arg, fieldmask, flds): +def infer_format(arg, fieldmask, flds, width): global arguments global formats global decode_function @@ -521,6 +525,8 @@ def infer_format(arg, fieldmask, flds): continue if fieldmask != fmt.fieldmask: continue + if width != fmt.width: + continue if not eq_fields_for_fmts(flds, fmt.fields): continue return (fmt, const_flds) @@ -529,7 +535,7 @@ def infer_format(arg, fieldmask, flds): if not arg: arg = infer_argument_set(flds) - fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds) + fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width) formats[name] = fmt return (fmt, const_flds) @@ -546,6 +552,7 @@ def parse_generic(lineno, is_format, name, toks): global re_ident global insnwidth global insnmask + global variablewidth fixedmask = 0 fixedbits = 0 @@ -633,8 +640,15 @@ def parse_generic(lineno, is_format, name, toks): error(lineno, 'invalid token "{0}"'.format(t)) width += shift + if variablewidth and width < insnwidth and width % 8 == 0: + shift = insnwidth - width + fixedbits <<= shift + fixedmask <<= shift + undefmask <<= shift + undefmask |= (1 << shift) - 1 + # We should have filled in all of the bits of the instruction. - if not (is_format and width == 0) and width != insnwidth: + elif not (is_format and width == 0) and width != insnwidth: error(lineno, 'definition has {0} bits'.format(width)) # Do not check for fields overlaping fields; one valid usage @@ -660,7 +674,7 @@ def parse_generic(lineno, is_format, name, toks): if name in formats: error(lineno, 'duplicate format name', name) fmt = Format(name, lineno, arg, fixedbits, fixedmask, - undefmask, fieldmask, flds) + undefmask, fieldmask, flds, width) formats[name] = fmt else: # Patterns can reference a format ... @@ -670,12 +684,14 @@ def parse_generic(lineno, is_format, name, toks): error(lineno, 'pattern specifies both format and argument set') if fixedmask & fmt.fixedmask: error(lineno, 'pattern fixed bits overlap format fixed bits') + if width != fmt.width: + error(lineno, 'pattern uses format of different width') fieldmask |= fmt.fieldmask fixedbits |= fmt.fixedbits fixedmask |= fmt.fixedmask undefmask |= fmt.undefmask else: - (fmt, flds) = infer_format(arg, fieldmask, flds) + (fmt, flds) = infer_format(arg, fieldmask, flds, width) arg = fmt.base for f in flds.keys(): if f not in arg.fields: @@ -687,7 +703,7 @@ def parse_generic(lineno, is_format, name, toks): if f not in flds.keys() and f not in fmt.fields.keys(): error(lineno, 'field {0} not initialized'.format(f)) pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, - undefmask, fieldmask, flds) + undefmask, fieldmask, flds, width) patterns.append(pat) allpatterns.append(pat) @@ -727,6 +743,13 @@ def build_multi_pattern(lineno, pats): if p.lineno < lineno: lineno = p.lineno + width = None + for p in pats: + if width is None: + width = p.width + elif width != p.width: + error(lineno, 'width mismatch in patterns within braces') + repeat = True while repeat: if fixedmask == 0: @@ -742,7 +765,7 @@ def build_multi_pattern(lineno, pats): else: repeat = False - mp = MultiPattern(lineno, pats, fixedbits, fixedmask, undefmask) + mp = MultiPattern(lineno, pats, fixedbits, fixedmask, undefmask, width) patterns.append(mp) # end build_multi_pattern @@ -872,7 +895,7 @@ class Tree: # extract the fields now. if not extracted and self.base: output(ind, self.base.extract_name(), - '(&u.f_', self.base.base.name, ', insn);\n') + '(ctx, &u.f_', self.base.base.name, ', insn);\n') extracted = True # Attempt to aid the compiler in producing compact switch statements. @@ -943,6 +966,147 @@ def build_tree(pats, outerbits, outermask): # end build_tree +class SizeTree: + """Class representing a node in a size decode tree""" + + def __init__(self, m, w): + self.mask = m + self.subs = [] + self.base = None + self.width = w + + def str1(self, i): + ind = str_indent(i) + r = '{0}{1:08x}'.format(ind, self.mask) + r += ' [\n' + for (b, s) in self.subs: + r += '{0} {1:08x}:\n'.format(ind, b) + r += s.str1(i + 4) + '\n' + r += ind + ']' + return r + + def __str__(self): + return self.str1(0) + + def output_code(self, i, extracted, outerbits, outermask): + ind = str_indent(i) + + # If we need to load more bytes to test, do so now. + if extracted < self.width: + output(ind, 'insn = ', decode_function, + '_load_bytes(ctx, insn, {0}, {1});\n' + .format(extracted / 8, self.width / 8)); + extracted = self.width + + # Attempt to aid the compiler in producing compact switch statements. + # If the bits in the mask are contiguous, extract them. + sh = is_contiguous(self.mask) + if sh > 0: + # Propagate SH down into the local functions. + def str_switch(b, sh=sh): + return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh) + + def str_case(b, sh=sh): + return '0x{0:x}'.format(b >> sh) + else: + def str_switch(b): + return 'insn & 0x{0:08x}'.format(b) + + def str_case(b): + return '0x{0:08x}'.format(b) + + output(ind, 'switch (', str_switch(self.mask), ') {\n') + for b, s in sorted(self.subs): + innermask = outermask | self.mask + innerbits = outerbits | b + output(ind, 'case ', str_case(b), ':\n') + output(ind, ' /* ', + str_match_bits(innerbits, innermask), ' */\n') + s.output_code(i + 4, extracted, innerbits, innermask) + output(ind, '}\n') + output(ind, 'return insn;\n') +# end SizeTree + +class SizeLeaf: + """Class representing a leaf node in a size decode tree""" + + def __init__(self, m, w): + self.mask = m + self.width = w + + def str1(self, i): + ind = str_indent(i) + return '{0}{1:08x}'.format(ind, self.mask) + + def __str__(self): + return self.str1(0) + + def output_code(self, i, extracted, outerbits, outermask): + global decode_function + ind = str_indent(i) + + # If we need to load more bytes, do so now. + if extracted < self.width: + output(ind, 'insn = ', decode_function, + '_load_bytes(ctx, insn, {0}, {1});\n' + .format(extracted / 8, self.width / 8)); + extracted = self.width + output(ind, 'return insn;\n') +# end SizeLeaf + + +def build_size_tree(pats, width, outerbits, outermask): + global insnwidth + + # Collect the mask of bits that are fixed in this width + innermask = 0xff << (insnwidth - width) + innermask &= ~outermask + minwidth = None + onewidth = True + for i in pats: + innermask &= i.fixedmask + if minwidth is None: + minwidth = i.width + elif minwidth != i.width: + onewidth = False; + if minwidth < i.width: + minwidth = i.width + + if onewidth: + return SizeLeaf(innermask, minwidth) + + if innermask == 0: + if width < minwidth: + return build_size_tree(pats, width + 8, outerbits, outermask) + + pnames = [] + for p in pats: + pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) + error_with_file(pats[0].file, pats[0].lineno, + 'overlapping patterns size {0}:'.format(width), pnames) + + bins = {} + for i in pats: + fb = i.fixedbits & innermask + if fb in bins: + bins[fb].append(i) + else: + bins[fb] = [i] + + fullmask = outermask | innermask + lens = sorted(bins.keys()) + if len(lens) == 1: + b = lens[0] + return build_size_tree(bins[b], width + 8, b | outerbits, fullmask) + + r = SizeTree(innermask, width) + for b, l in bins.items(): + s = build_size_tree(l, width, b | outerbits, fullmask) + r.subs.append((b, s)) + return r +# end build_size_tree + + def prop_format(tree): """Propagate Format objects into the decode tree""" @@ -965,6 +1129,23 @@ def prop_format(tree): # end prop_format +def prop_size(tree): + """Propagate minimum widths up the decode size tree""" + + if isinstance(tree, SizeTree): + min = None + for (b, s) in tree.subs: + width = prop_size(s) + if min is None or min > width: + min = width + assert min >= tree.width + tree.width = min + else: + min = tree.width + return min +# end prop_size + + def main(): global arguments global formats @@ -979,13 +1160,14 @@ def main(): global insntype global insnmask global decode_function + global variablewidth decode_scope = 'static ' long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', - 'static-decode='] + 'static-decode=', 'varinsnwidth='] try: - (opts, args) = getopt.getopt(sys.argv[1:], 'o:w:', long_opts) + (opts, args) = getopt.getopt(sys.argv[1:], 'o:vw:', long_opts) except getopt.GetoptError as err: error(0, err) for o, a in opts: @@ -999,7 +1181,9 @@ def main(): elif o == '--translate': translate_prefix = a translate_scope = '' - elif o in ('-w', '--insnwidth'): + elif o in ('-w', '--insnwidth', '--varinsnwidth'): + if o == '--varinsnwidth': + variablewidth = True insnwidth = int(a) if insnwidth == 16: insntype = 'uint16_t' @@ -1017,8 +1201,12 @@ def main(): parse_file(f) f.close() - t = build_tree(patterns, 0, 0) - prop_format(t) + if variablewidth: + stree = build_size_tree(patterns, 8, 0, 0) + prop_size(stree) + + dtree = build_tree(patterns, 0, 0) + prop_format(dtree) if output_file: output_fd = open(output_file, 'w') @@ -1059,11 +1247,18 @@ def main(): f = arguments[n] output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') output(i4, '} u;\n\n') - t.output_code(4, False, 0, 0) + dtree.output_code(4, False, 0, 0) output(i4, 'return false;\n') output('}\n') + if variablewidth: + output('\n', decode_scope, insntype, ' ', decode_function, + '_load(DisasContext *ctx)\n{\n', + ' ', insntype, ' insn = 0;\n\n') + stree.output_code(4, 0, 0, 0) + output('}\n') + if output_file: output_fd.close() # end main diff --git a/slirp b/slirp -Subproject 59a1b1f165458c2acb7ff0525b543945f741622 +Subproject 0e79ba48567ccfb3cc2cf2e98cce8811eee7e45 diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 245cd82621..80645db508 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -54,35 +54,35 @@ typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, /* See e.g. ASR (immediate, predicated). * Returns -1 for unallocated encoding; diagnose later. */ -static int tszimm_esz(int x) +static int tszimm_esz(DisasContext *s, int x) { x >>= 3; /* discard imm3 */ return 31 - clz32(x); } -static int tszimm_shr(int x) +static int tszimm_shr(DisasContext *s, int x) { - return (16 << tszimm_esz(x)) - x; + return (16 << tszimm_esz(s, x)) - x; } /* See e.g. LSL (immediate, predicated). */ -static int tszimm_shl(int x) +static int tszimm_shl(DisasContext *s, int x) { - return x - (8 << tszimm_esz(x)); + return x - (8 << tszimm_esz(s, x)); } -static inline int plus1(int x) +static inline int plus1(DisasContext *s, int x) { return x + 1; } /* The SH bit is in bit 8. Extract the low 8 and shift. */ -static inline int expand_imm_sh8s(int x) +static inline int expand_imm_sh8s(DisasContext *s, int x) { return (int8_t)x << (x & 0x100 ? 8 : 0); } -static inline int expand_imm_sh8u(int x) +static inline int expand_imm_sh8u(DisasContext *s, int x) { return (uint8_t)x << (x & 0x100 ? 8 : 0); } @@ -90,7 +90,7 @@ static inline int expand_imm_sh8u(int x) /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) * with unsigned data. C.f. SVE Memory Contiguous Load Group. */ -static inline int msz_dtype(int msz) +static inline int msz_dtype(DisasContext *s, int msz) { static const uint8_t dtype[4] = { 0, 5, 10, 15 }; return dtype[msz]; @@ -4834,7 +4834,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz) int desc, poff; /* Load the first quadword using the normal predicated load helpers. */ - desc = sve_memopidx(s, msz_dtype(msz)); + desc = sve_memopidx(s, msz_dtype(s, msz)); desc |= zt << MEMOPIDX_SHIFT; desc = simd_desc(16, 16, desc); t_desc = tcg_const_i32(desc); @@ -5016,7 +5016,7 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, fn = fn_multiple[be][nreg - 1][msz]; } assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn); + do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn); } static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) @@ -5065,7 +5065,7 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, TCGv_i32 t_desc; int desc; - desc = sve_memopidx(s, msz_dtype(msz)); + desc = sve_memopidx(s, msz_dtype(s, msz)); desc |= scale << MEMOPIDX_SHIFT; desc = simd_desc(vsz, vsz, desc); t_desc = tcg_const_i32(desc); diff --git a/target/hppa/translate.c b/target/hppa/translate.c index e1febdfea1..188fe688cb 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -279,7 +279,7 @@ typedef struct DisasContext { } DisasContext; /* Note that ssm/rsm instructions number PSW_W and PSW_E differently. */ -static int expand_sm_imm(int val) +static int expand_sm_imm(DisasContext *ctx, int val) { if (val & PSW_SM_E) { val = (val & ~PSW_SM_E) | PSW_E; @@ -291,43 +291,43 @@ static int expand_sm_imm(int val) } /* Inverted space register indicates 0 means sr0 not inferred from base. */ -static int expand_sr3x(int val) +static int expand_sr3x(DisasContext *ctx, int val) { return ~val; } /* Convert the M:A bits within a memory insn to the tri-state value we use for the final M. */ -static int ma_to_m(int val) +static int ma_to_m(DisasContext *ctx, int val) { return val & 2 ? (val & 1 ? -1 : 1) : 0; } /* Convert the sign of the displacement to a pre or post-modify. */ -static int pos_to_m(int val) +static int pos_to_m(DisasContext *ctx, int val) { return val ? 1 : -1; } -static int neg_to_m(int val) +static int neg_to_m(DisasContext *ctx, int val) { return val ? -1 : 1; } /* Used for branch targets and fp memory ops. */ -static int expand_shl2(int val) +static int expand_shl2(DisasContext *ctx, int val) { return val << 2; } /* Used for fp memory ops. */ -static int expand_shl3(int val) +static int expand_shl3(DisasContext *ctx, int val) { return val << 3; } /* Used for assemble_21. */ -static int expand_shl11(int val) +static int expand_shl11(DisasContext *ctx, int val) { return val << 11; } diff --git a/target/riscv/insn_trans/trans_rvc.inc.c b/target/riscv/insn_trans/trans_rvc.inc.c index ebcd977b2f..3e5d6fd5ea 100644 --- a/target/riscv/insn_trans/trans_rvc.inc.c +++ b/target/riscv/insn_trans/trans_rvc.inc.c @@ -48,13 +48,13 @@ static bool trans_c_flw_ld(DisasContext *ctx, arg_c_flw_ld *a) REQUIRE_EXT(ctx, RVF); arg_c_lw tmp; - decode_insn16_extract_cl_w(&tmp, ctx->opcode); + decode_insn16_extract_cl_w(ctx, &tmp, ctx->opcode); arg_flw arg = { .rd = tmp.rd, .rs1 = tmp.rs1, .imm = tmp.uimm }; return trans_flw(ctx, &arg); #else /* C.LD ( RV64C/RV128C-only ) */ arg_c_fld tmp; - decode_insn16_extract_cl_d(&tmp, ctx->opcode); + decode_insn16_extract_cl_d(ctx, &tmp, ctx->opcode); arg_ld arg = { .rd = tmp.rd, .rs1 = tmp.rs1, .imm = tmp.uimm }; return trans_ld(ctx, &arg); #endif @@ -80,13 +80,13 @@ static bool trans_c_fsw_sd(DisasContext *ctx, arg_c_fsw_sd *a) REQUIRE_EXT(ctx, RVF); arg_c_sw tmp; - decode_insn16_extract_cs_w(&tmp, ctx->opcode); + decode_insn16_extract_cs_w(ctx, &tmp, ctx->opcode); arg_fsw arg = { .rs1 = tmp.rs1, .rs2 = tmp.rs2, .imm = tmp.uimm }; return trans_fsw(ctx, &arg); #else /* C.SD ( RV64C/RV128C-only ) */ arg_c_fsd tmp; - decode_insn16_extract_cs_d(&tmp, ctx->opcode); + decode_insn16_extract_cs_d(ctx, &tmp, ctx->opcode); arg_sd arg = { .rs1 = tmp.rs1, .rs2 = tmp.rs2, .imm = tmp.uimm }; return trans_sd(ctx, &arg); #endif @@ -107,7 +107,7 @@ static bool trans_c_jal_addiw(DisasContext *ctx, arg_c_jal_addiw *a) #ifdef TARGET_RISCV32 /* C.JAL */ arg_c_j tmp; - decode_insn16_extract_cj(&tmp, ctx->opcode); + decode_insn16_extract_cj(ctx, &tmp, ctx->opcode); arg_jal arg = { .rd = 1, .imm = tmp.imm }; return trans_jal(ctx, &arg); #else diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 967eac7bc3..2ff6b49487 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -517,7 +517,7 @@ static void decode_RV32_64C(DisasContext *ctx) } #define EX_SH(amount) \ - static int ex_shift_##amount(int imm) \ + static int ex_shift_##amount(DisasContext *ctx, int imm) \ { \ return imm << amount; \ } @@ -533,7 +533,7 @@ EX_SH(12) } \ } while (0) -static int ex_rvc_register(int reg) +static int ex_rvc_register(DisasContext *ctx, int reg) { return 8 + reg; } |