diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | contrib/rdmacm-mux/Makefile.objs | 1 | ||||
-rw-r--r-- | contrib/rdmacm-mux/main.c | 12 | ||||
-rw-r--r-- | docs/pvrdma.txt | 4 | ||||
-rw-r--r-- | hw/rdma/rdma_backend.c | 63 | ||||
-rw-r--r-- | hw/rdma/rdma_backend.h | 12 | ||||
-rw-r--r-- | hw/rdma/rdma_backend_defs.h | 1 | ||||
-rw-r--r-- | hw/rdma/rdma_rm.c | 9 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_main.c | 10 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_qp_ops.c | 44 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_qp_ops.h | 4 |
11 files changed, 96 insertions, 66 deletions
@@ -581,6 +581,8 @@ vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) libvhost-user.a $(call LINK, $^) vhost-user-blk$(EXESUF): $(vhost-user-blk-obj-y) libvhost-user.a $(call LINK, $^) + +rdmacm-mux$(EXESUF): LIBS += "-libumad" rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) $(call LINK, $^) diff --git a/contrib/rdmacm-mux/Makefile.objs b/contrib/rdmacm-mux/Makefile.objs index be3eacb6f7..3df744af89 100644 --- a/contrib/rdmacm-mux/Makefile.objs +++ b/contrib/rdmacm-mux/Makefile.objs @@ -1,4 +1,3 @@ ifdef CONFIG_PVRDMA -CFLAGS += -libumad -Wno-format-truncation rdmacm-mux-obj-y = main.o endif diff --git a/contrib/rdmacm-mux/main.c b/contrib/rdmacm-mux/main.c index 835a7f9214..ae88c77a1e 100644 --- a/contrib/rdmacm-mux/main.c +++ b/contrib/rdmacm-mux/main.c @@ -42,6 +42,8 @@ /* The below can be override by command line parameter */ #define UNIX_SOCKET_PATH "/var/run/rdmacm-mux" +/* Has format %s-%s-%d" <path>-<rdma-dev--name>-<port> */ +#define SOCKET_PATH_MAX (PATH_MAX - NAME_MAX - sizeof(int) - 2) #define RDMA_PORT_NUM 1 typedef struct RdmaCmServerArgs { @@ -95,7 +97,7 @@ static void help(const char *progname) static void parse_args(int argc, char *argv[]) { int c; - char unix_socket_path[PATH_MAX]; + char unix_socket_path[SOCKET_PATH_MAX]; strcpy(server.args.rdma_dev_name, ""); strcpy(unix_socket_path, UNIX_SOCKET_PATH); @@ -113,7 +115,7 @@ static void parse_args(int argc, char *argv[]) case 's': /* This is temporary, final name will build below */ - strncpy(unix_socket_path, optarg, PATH_MAX); + strncpy(unix_socket_path, optarg, SOCKET_PATH_MAX); break; case 'p': @@ -348,7 +350,7 @@ static int get_fd(const char *mad, int *fd, __be64 *gid_ifid) static void *umad_recv_thread_func(void *args) { int rc; - RdmaCmMuxMsg msg = {0}; + RdmaCmMuxMsg msg = {}; int fd = -2; msg.hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ; @@ -385,7 +387,7 @@ static void *umad_recv_thread_func(void *args) static int read_and_process(int fd) { int rc; - RdmaCmMuxMsg msg = {0}; + RdmaCmMuxMsg msg = {}; struct umad_hdr *hdr; uint32_t *comm_id = 0; uint16_t attr_id; @@ -742,7 +744,7 @@ static void signal_handler(int sig, siginfo_t *siginfo, void *context) static int init(void) { int rc; - struct sigaction sig = {0}; + struct sigaction sig = {}; rc = init_listener(); if (rc) { diff --git a/docs/pvrdma.txt b/docs/pvrdma.txt index 5175251b47..0f0dd8a7e5 100644 --- a/docs/pvrdma.txt +++ b/docs/pvrdma.txt @@ -99,6 +99,9 @@ MAD layer to send and receive RDMA-CM MAD packets. To build rdmacm-mux run # make rdmacm-mux +Before running the rdmacm-mux make sure that both ib_cm and rdma_cm kernel +modules aren't loaded, otherwise the rdmacm-mux service will fail to start. + The application accepts 3 command line arguments and exposes a UNIX socket to pass control and data to it. -d rdma-device-name Name of RDMA device to register with @@ -153,7 +156,6 @@ Ethernet function can be used for other Ethernet purposes such as IP. specify the port to use. If not set 1 will be used. - dev-caps-max-mr-size: The maximum size of MR. - dev-caps-max-qp: Maximum number of QPs. -- dev-caps-max-sge: Maximum number of SGE elements in WR. - dev-caps-max-cq: Maximum number of CQs. - dev-caps-max-mr: Maximum number of MRs. - dev-caps-max-pd: Maximum number of PDs. diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index c28bfbd44d..fd571f21e5 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -32,17 +32,6 @@ #include "rdma_rm.h" #include "rdma_backend.h" -/* Vendor Errors */ -#define VENDOR_ERR_FAIL_BACKEND 0x201 -#define VENDOR_ERR_TOO_MANY_SGES 0x202 -#define VENDOR_ERR_NOMEM 0x203 -#define VENDOR_ERR_QP0 0x204 -#define VENDOR_ERR_INV_NUM_SGE 0x205 -#define VENDOR_ERR_MAD_SEND 0x206 -#define VENDOR_ERR_INVLKEY 0x207 -#define VENDOR_ERR_MR_SMALL 0x208 -#define VENDOR_ERR_INV_MAD_BUFF 0x209 - #define THR_NAME_LEN 16 #define THR_POLL_TO 5000 @@ -190,7 +179,7 @@ static inline int rdmacm_mux_can_process_async(RdmaBackendDev *backend_dev) static int check_mux_op_status(CharBackend *mad_chr_be) { - RdmaCmMuxMsg msg = {0}; + RdmaCmMuxMsg msg = {}; int ret; pr_dbg("Reading response\n"); @@ -387,7 +376,7 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx, union ibv_gid *sgid, struct ibv_sge *sge, uint32_t num_sge) { - RdmaCmMuxMsg msg = {0}; + RdmaCmMuxMsg msg = {}; char *hdr, *data; int ret; @@ -475,11 +464,6 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, } pr_dbg("num_sge=%d\n", num_sge); - if (!num_sge || num_sge > MAX_SGE) { - pr_dbg("invalid num_sge=%d\n", num_sge); - complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_NUM_SGE, ctx); - return; - } bctx = g_malloc0(sizeof(*bctx)); bctx->up_ctx = ctx; @@ -602,11 +586,6 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, } pr_dbg("num_sge=%d\n", num_sge); - if (!num_sge || num_sge > MAX_SGE) { - pr_dbg("invalid num_sge=%d\n", num_sge); - complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_NUM_SGE, ctx); - return; - } bctx = g_malloc0(sizeof(*bctx)); bctx->up_ctx = ctx; @@ -938,21 +917,25 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp) static int init_device_caps(RdmaBackendDev *backend_dev, struct ibv_device_attr *dev_attr) { - if (ibv_query_device(backend_dev->context, &backend_dev->dev_attr)) { + struct ibv_device_attr bk_dev_attr; + + if (ibv_query_device(backend_dev->context, &bk_dev_attr)) { return -EIO; } - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_mr_size, "%" PRId64); - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_qp, "%d"); - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_sge, "%d"); - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_qp_wr, "%d"); - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_cq, "%d"); - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_cqe, "%d"); - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_mr, "%d"); - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_pd, "%d"); - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_qp_rd_atom, "%d"); - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_qp_init_rd_atom, "%d"); - CHK_ATTR(dev_attr, backend_dev->dev_attr, max_ah, "%d"); + dev_attr->max_sge = MAX_SGE; + + CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64); + CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_sge, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_qp_wr, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_cq, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_cqe, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_mr, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_pd, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d"); return 0; } @@ -1083,8 +1066,10 @@ static void mad_fini(RdmaBackendDev *backend_dev) pr_dbg("Stopping MAD\n"); disable_rdmacm_mux_async(backend_dev); qemu_chr_fe_disconnect(backend_dev->rdmacm_mux.chr_be); - qlist_destroy_obj(QOBJECT(backend_dev->recv_mads_list.list)); - qemu_mutex_destroy(&backend_dev->recv_mads_list.lock); + if (backend_dev->recv_mads_list.list) { + qlist_destroy_obj(QOBJECT(backend_dev->recv_mads_list.list)); + qemu_mutex_destroy(&backend_dev->recv_mads_list.lock); + } } int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev, @@ -1112,7 +1097,7 @@ int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev, int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname, union ibv_gid *gid) { - RdmaCmMuxMsg msg = {0}; + RdmaCmMuxMsg msg = {}; int ret; pr_dbg("0x%llx, 0x%llx\n", @@ -1138,7 +1123,7 @@ int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname, int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname, union ibv_gid *gid) { - RdmaCmMuxMsg msg = {0}; + RdmaCmMuxMsg msg = {}; int ret; pr_dbg("0x%llx, 0x%llx\n", diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 8cae40f827..5114c90e67 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -22,6 +22,18 @@ #include "rdma_rm_defs.h" #include "rdma_backend_defs.h" +/* Vendor Errors */ +#define VENDOR_ERR_FAIL_BACKEND 0x201 +#define VENDOR_ERR_TOO_MANY_SGES 0x202 +#define VENDOR_ERR_NOMEM 0x203 +#define VENDOR_ERR_QP0 0x204 +#define VENDOR_ERR_INV_NUM_SGE 0x205 +#define VENDOR_ERR_MAD_SEND 0x206 +#define VENDOR_ERR_INVLKEY 0x207 +#define VENDOR_ERR_MR_SMALL 0x208 +#define VENDOR_ERR_INV_MAD_BUFF 0x209 +#define VENDOR_ERR_INV_GID_IDX 0x210 + /* Add definition for QP0 and QP1 as there is no userspace enums for them */ enum ibv_special_qp_type { IBV_QPT_SMI = 0, diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h index 1e5c3dd3bf..15ae8b970e 100644 --- a/hw/rdma/rdma_backend_defs.h +++ b/hw/rdma/rdma_backend_defs.h @@ -41,7 +41,6 @@ typedef struct RdmaCmMux { } RdmaCmMux; typedef struct RdmaBackendDev { - struct ibv_device_attr dev_attr; RdmaBackendThread comp_thread; PCIDevice *dev; RdmaDeviceResources *rdma_dev_res; diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index f5b1295890..268ff633a4 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -41,6 +41,9 @@ static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl, static inline void res_tbl_free(RdmaRmResTbl *tbl) { + if (!tbl->bitmap) { + return; + } qemu_mutex_destroy(&tbl->lock); g_free(tbl->tbl); g_free(tbl->bitmap); @@ -576,7 +579,7 @@ int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, int sgid_idx) { - if (unlikely(sgid_idx < 0 || sgid_idx > MAX_PORT_GIDS)) { + if (unlikely(sgid_idx < 0 || sgid_idx >= MAX_PORT_GIDS)) { pr_dbg("Got invalid sgid_idx %d\n", sgid_idx); return -EINVAL; } @@ -655,5 +658,7 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, res_tbl_free(&dev_res->cq_tbl); res_tbl_free(&dev_res->pd_tbl); - g_hash_table_destroy(dev_res->qp_hash); + if (dev_res->qp_hash) { + g_hash_table_destroy(dev_res->qp_hash); + } } diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 838ad8a949..d2bdb5ba8c 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -43,7 +43,6 @@ static Property pvrdma_dev_properties[] = { DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size, MAX_MR_SIZE), DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP), - DEFINE_PROP_INT32("dev-caps-max-sge", PVRDMADev, dev_attr.max_sge, MAX_SGE), DEFINE_PROP_INT32("dev-caps-max-cq", PVRDMADev, dev_attr.max_cq, MAX_CQ), DEFINE_PROP_INT32("dev-caps-max-mr", PVRDMADev, dev_attr.max_mr, MAX_MR), DEFINE_PROP_INT32("dev-caps-max-pd", PVRDMADev, dev_attr.max_pd, MAX_PD), @@ -549,8 +548,9 @@ static void init_dev_caps(PVRDMADev *dev) sizeof(struct pvrdma_rq_wqe_hdr)); dev->dev_attr.max_qp_wr = pg_tbl_bytes / - (wr_sz + sizeof(struct pvrdma_sge) * MAX_SGE) - - TARGET_PAGE_SIZE; /* First page is ring state */ + (wr_sz + sizeof(struct pvrdma_sge) * + dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; + /* First page is ring state ^^^^ */ pr_dbg("max_qp_wr=%d\n", dev->dev_attr.max_qp_wr); dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - @@ -626,8 +626,6 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) init_regs(pdev); - init_dev_caps(dev); - rc = init_msix(pdev, errp); if (rc) { goto out; @@ -640,6 +638,8 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) goto out; } + init_dev_caps(dev); + rc = rdma_rm_init(&dev->rdma_dev_res, &dev->dev_attr, errp); if (rc) { goto out; diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index 300471a4c9..ce5a60e184 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -121,6 +121,16 @@ static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc) g_free(ctx); } +static void complete_with_error(uint32_t vendor_err, void *ctx) +{ + struct ibv_wc wc = {0}; + + wc.status = IBV_WC_GENERAL_ERR; + wc.vendor_err = vendor_err; + + pvrdma_qp_ops_comp_handler(ctx, &wc); +} + void pvrdma_qp_ops_fini(void) { rdma_backend_unregister_comp_handler(); @@ -133,7 +143,7 @@ int pvrdma_qp_ops_init(void) return 0; } -int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) +void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) { RdmaRmQP *qp; PvrdmaSqWqe *wqe; @@ -145,7 +155,8 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle); if (unlikely(!qp)) { - return -EINVAL; + pr_dbg("Invalid qpn\n"); + return; } ring = (PvrdmaRing *)qp->opaque; @@ -168,7 +179,8 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index); if (!sgid) { pr_dbg("Fail to get gid for idx %d\n", wqe->hdr.wr.ud.av.gid_index); - return -EIO; + complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx); + continue; } pr_dbg("sgid_id=%d, sgid=0x%llx\n", wqe->hdr.wr.ud.av.gid_index, sgid->global.interface_id); @@ -179,7 +191,15 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) if (sgid_idx <= 0) { pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", wqe->hdr.wr.ud.av.gid_index); - return -EIO; + complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx); + continue; + } + + if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { + pr_dbg("Invalid num_sge=%d (max %d)\n", wqe->hdr.num_sge, + dev->dev_attr.max_sge); + complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); + continue; } rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type, @@ -193,11 +213,9 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) wqe = pvrdma_ring_next_elem_read(ring); } - - return 0; } -int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) +void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) { RdmaRmQP *qp; PvrdmaRqWqe *wqe; @@ -207,7 +225,8 @@ int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle); if (unlikely(!qp)) { - return -EINVAL; + pr_dbg("Invalid qpn\n"); + return; } ring = &((PvrdmaRing *)qp->opaque)[1]; @@ -227,6 +246,13 @@ int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) comp_ctx->cqe.qp = qp_handle; comp_ctx->cqe.opcode = IBV_WC_RECV; + if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { + pr_dbg("Invalid num_sge=%d (max %d)\n", wqe->hdr.num_sge, + dev->dev_attr.max_sge); + complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); + continue; + } + rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res, &qp->backend_qp, qp->qp_type, (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge, @@ -236,8 +262,6 @@ int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) wqe = pvrdma_ring_next_elem_read(ring); } - - return 0; } void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle) diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h index ac46bf7fdf..31cb48ba29 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.h +++ b/hw/rdma/vmw/pvrdma_qp_ops.h @@ -20,8 +20,8 @@ int pvrdma_qp_ops_init(void); void pvrdma_qp_ops_fini(void); -int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle); -int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle); +void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle); +void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle); void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle); #endif |