diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-08-20 15:44:40 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-08-20 15:44:41 +0100 |
commit | c8090972fa85b197411b530ddf81a2867379406c (patch) | |
tree | e69e51b0b544fe9486ec6811f024944a9300f327 | |
parent | 62c34848efb41f0e81af0c6b4f1d5d577039eec9 (diff) | |
parent | 21ab34c9543fe1b6d31b3edbd01a397e7e090d00 (diff) |
Merge remote-tracking branch 'remotes/marcel/tags/rdma-pull-request' into staging
RDMA queue
# gpg: Signature made Sat 18 Aug 2018 16:01:46 BST
# gpg: using RSA key 36D4C0F0CF2FE46D
# gpg: Good signature from "Marcel Apfelbaum <marcel.apfelbaum@zoho.com>"
# gpg: aka "Marcel Apfelbaum <marcel@redhat.com>"
# gpg: aka "Marcel Apfelbaum <marcel.apfelbaum@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg: It is not certain that the signature belongs to the owner.
# Primary key fingerprint: B1C6 3A57 F92E 08F2 640F 31F5 36D4 C0F0 CF2F E46D
* remotes/marcel/tags/rdma-pull-request:
config: split PVRDMA from RDMA
hw/pvrdma: remove not needed include
hw/rdma: Add reference to pci_dev in backend_dev
hw/rdma: Bugfix - Support non-aligned buffers
hw/rdma: Print backend QP number in hex format
hw/rdma: Cosmetic change - move to generic function
hw/pvrdma: Cosmetic change - indent right
hw/rdma: Reorder resource cleanup
hw/rdma: Do not allocate memory for non-dma MR
hw/rdma: Delete useless structure RdmaRmUserMR
hw/pvrdma: Make default pkey 0xFFFF
hw/pvrdma: Clean CQE before use
hw/rdma: Modify debug macros
hw/pvrdma: Bugfix - provide the correct attr_mask to query_qp
hw/rdma: Make distinction between device init and start modes
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rwxr-xr-x | configure | 55 | ||||
-rw-r--r-- | hw/rdma/Makefile.objs | 2 | ||||
-rw-r--r-- | hw/rdma/rdma_backend.c | 105 | ||||
-rw-r--r-- | hw/rdma/rdma_backend.h | 4 | ||||
-rw-r--r-- | hw/rdma/rdma_backend_defs.h | 3 | ||||
-rw-r--r-- | hw/rdma/rdma_rm.c | 69 | ||||
-rw-r--r-- | hw/rdma/rdma_rm_defs.h | 10 | ||||
-rw-r--r-- | hw/rdma/rdma_utils.c | 4 | ||||
-rw-r--r-- | hw/rdma/rdma_utils.h | 16 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma.h | 3 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_cmd.c | 9 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_main.c | 137 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_qp_ops.c | 5 |
13 files changed, 283 insertions, 139 deletions
@@ -375,6 +375,7 @@ hax="no" hvf="no" whpx="no" rdma="" +pvrdma="" gprof="no" debug_tcg="no" debug="no" @@ -1363,6 +1364,10 @@ for opt do ;; --disable-rdma) rdma="no" ;; + --enable-pvrdma) pvrdma="yes" + ;; + --disable-pvrdma) pvrdma="no" + ;; --with-gtkabi=*) gtkabi="$optarg" ;; --disable-vte) vte="no" @@ -1669,7 +1674,8 @@ disabled with --disable-FEATURE, default is enabled if available: hax HAX acceleration support hvf Hypervisor.framework acceleration support whpx Windows Hypervisor Platform acceleration support - rdma Enable RDMA-based migration and PVRDMA support + rdma Enable RDMA-based migration + pvrdma Enable PVRDMA support vde support for vde network netmap support for netmap network linux-aio Linux AIO support @@ -3064,6 +3070,48 @@ EOF fi fi +########################################## +# PVRDMA detection + +cat > $TMPC <<EOF && +#include <sys/mman.h> + +int +main(void) +{ + char buf = 0; + void *addr = &buf; + addr = mremap(addr, 0, 1, MREMAP_MAYMOVE | MREMAP_FIXED); + + return 0; +} +EOF + +if test "$rdma" = "yes" ; then + case "$pvrdma" in + "") + if compile_prog "" ""; then + pvrdma="yes" + else + pvrdma="no" + fi + ;; + "yes") + if ! compile_prog "" ""; then + error_exit "PVRDMA is not supported since mremap is not implemented" + fi + pvrdma="yes" + ;; + "no") + pvrdma="no" + ;; + esac +else + if test "$pvrdma" = "yes" ; then + error_exit "PVRDMA requires rdma suppport" + fi + pvrdma="no" +fi ########################################## # VNC SASL detection @@ -5952,6 +6000,7 @@ if test "$tcg" = "yes" ; then fi echo "malloc trim support $malloc_trim" echo "RDMA support $rdma" +echo "PVRDMA support $pvrdma" echo "fdt support $fdt" echo "membarrier $membarrier" echo "preadv support $preadv" @@ -6708,6 +6757,10 @@ if test "$rdma" = "yes" ; then echo "RDMA_LIBS=$rdma_libs" >> $config_host_mak fi +if test "$pvrdma" = "yes" ; then + echo "CONFIG_PVRDMA=y" >> $config_host_mak +fi + if test "$have_rtnetlink" = "yes" ; then echo "CONFIG_RTNETLINK=y" >> $config_host_mak fi diff --git a/hw/rdma/Makefile.objs b/hw/rdma/Makefile.objs index 3504c39d21..bd36cbf51c 100644 --- a/hw/rdma/Makefile.objs +++ b/hw/rdma/Makefile.objs @@ -1,4 +1,4 @@ -ifeq ($(CONFIG_RDMA),y) +ifeq ($(CONFIG_PVRDMA),y) obj-$(CONFIG_PCI) += rdma_utils.o rdma_backend.o rdma_rm.o obj-$(CONFIG_PCI) += vmw/pvrdma_dev_ring.o vmw/pvrdma_cmd.o \ vmw/pvrdma_qp_ops.o vmw/pvrdma_main.o diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index e9ced6f9ef..d7a4bbd91f 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -35,6 +35,7 @@ #define VENDOR_ERR_MR_SMALL 0x208 #define THR_NAME_LEN 16 +#define THR_POLL_TO 5000 typedef struct BackendCtx { uint64_t req_id; @@ -91,35 +92,82 @@ static void *comp_handler_thread(void *arg) int rc; struct ibv_cq *ev_cq; void *ev_ctx; + int flags; + GPollFD pfds[1]; + + /* Change to non-blocking mode */ + flags = fcntl(backend_dev->channel->fd, F_GETFL); + rc = fcntl(backend_dev->channel->fd, F_SETFL, flags | O_NONBLOCK); + if (rc < 0) { + pr_dbg("Fail to change to non-blocking mode\n"); + return NULL; + } pr_dbg("Starting\n"); + pfds[0].fd = backend_dev->channel->fd; + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; + + backend_dev->comp_thread.is_running = true; + while (backend_dev->comp_thread.run) { - pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel); - rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx); - pr_dbg("ibv_get_cq_event=%d\n", rc); - if (unlikely(rc)) { - pr_dbg("---> ibv_get_cq_event (%d)\n", rc); - continue; - } + do { + rc = qemu_poll_ns(pfds, 1, THR_POLL_TO * (int64_t)SCALE_MS); + } while (!rc && backend_dev->comp_thread.run); + + if (backend_dev->comp_thread.run) { + pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel); + rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx); + pr_dbg("ibv_get_cq_event=%d\n", rc); + if (unlikely(rc)) { + pr_dbg("---> ibv_get_cq_event (%d)\n", rc); + continue; + } - rc = ibv_req_notify_cq(ev_cq, 0); - if (unlikely(rc)) { - pr_dbg("Error %d from ibv_req_notify_cq\n", rc); - } + rc = ibv_req_notify_cq(ev_cq, 0); + if (unlikely(rc)) { + pr_dbg("Error %d from ibv_req_notify_cq\n", rc); + } - poll_cq(backend_dev->rdma_dev_res, ev_cq); + poll_cq(backend_dev->rdma_dev_res, ev_cq); - ibv_ack_cq_events(ev_cq, 1); + ibv_ack_cq_events(ev_cq, 1); + } } pr_dbg("Going down\n"); /* TODO: Post cqe for all remaining buffs that were posted */ + backend_dev->comp_thread.is_running = false; + + qemu_thread_exit(0); + return NULL; } +static void stop_backend_thread(RdmaBackendThread *thread) +{ + thread->run = false; + while (thread->is_running) { + pr_dbg("Waiting for thread to complete\n"); + sleep(THR_POLL_TO / SCALE_US / 2); + } +} + +static void start_comp_thread(RdmaBackendDev *backend_dev) +{ + char thread_name[THR_NAME_LEN] = {0}; + + stop_backend_thread(&backend_dev->comp_thread); + + snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s", + ibv_get_device_name(backend_dev->ib_dev)); + backend_dev->comp_thread.run = true; + qemu_thread_create(&backend_dev->comp_thread.thread, thread_name, + comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED); +} + void rdma_backend_register_comp_handler(void (*handler)(int status, unsigned int vendor_err, void *ctx)) { @@ -223,8 +271,7 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey; } - dsge->addr = (uintptr_t)mr->user_mr.host_virt + ssge[ssge_idx].addr - - mr->user_mr.guest_start; + dsge->addr = (uintptr_t)mr->virt + ssge[ssge_idx].addr - mr->start; dsge->length = ssge[ssge_idx].length; dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr); @@ -697,7 +744,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, return 0; } -int rdma_backend_init(RdmaBackendDev *backend_dev, +int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, RdmaDeviceResources *rdma_dev_res, const char *backend_device_name, uint8_t port_num, uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr, @@ -706,10 +753,13 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, int i; int ret = 0; int num_ibv_devices; - char thread_name[THR_NAME_LEN] = {0}; struct ibv_device **dev_list; struct ibv_port_attr port_attr; + memset(backend_dev, 0, sizeof(*backend_dev)); + + backend_dev->dev = pdev; + backend_dev->backend_gid_idx = backend_gid_idx; backend_dev->port_num = port_num; backend_dev->rdma_dev_res = rdma_dev_res; @@ -800,11 +850,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, pr_dbg("interface_id=0x%" PRIx64 "\n", be64_to_cpu(backend_dev->gid.global.interface_id)); - snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s", - ibv_get_device_name(backend_dev->ib_dev)); - backend_dev->comp_thread.run = true; - qemu_thread_create(&backend_dev->comp_thread.thread, thread_name, - comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED); + backend_dev->comp_thread.run = false; + backend_dev->comp_thread.is_running = false; ah_cache_init(); @@ -823,8 +870,22 @@ out: return ret; } + +void rdma_backend_start(RdmaBackendDev *backend_dev) +{ + pr_dbg("Starting rdma_backend\n"); + start_comp_thread(backend_dev); +} + +void rdma_backend_stop(RdmaBackendDev *backend_dev) +{ + pr_dbg("Stopping rdma_backend\n"); + stop_backend_thread(&backend_dev->comp_thread); +} + void rdma_backend_fini(RdmaBackendDev *backend_dev) { + rdma_backend_stop(backend_dev); g_hash_table_destroy(ah_hash); ibv_destroy_comp_channel(backend_dev->channel); ibv_close_device(backend_dev->context); diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 3cd636dd88..86e8fe8ab6 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -46,12 +46,14 @@ static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr) return mr->ibmr ? mr->ibmr->rkey : 0; } -int rdma_backend_init(RdmaBackendDev *backend_dev, +int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, RdmaDeviceResources *rdma_dev_res, const char *backend_device_name, uint8_t port_num, uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr, Error **errp); void rdma_backend_fini(RdmaBackendDev *backend_dev); +void rdma_backend_start(RdmaBackendDev *backend_dev); +void rdma_backend_stop(RdmaBackendDev *backend_dev); void rdma_backend_register_comp_handler(void (*handler)(int status, unsigned int vendor_err, void *ctx)); void rdma_backend_unregister_comp_handler(void); diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h index ff5cfc26eb..7404f64002 100644 --- a/hw/rdma/rdma_backend_defs.h +++ b/hw/rdma/rdma_backend_defs.h @@ -24,7 +24,8 @@ typedef struct RdmaDeviceResources RdmaDeviceResources; typedef struct RdmaBackendThread { QemuThread thread; QemuMutex mutex; - bool run; + bool run; /* Set by thread manager to let thread know it should exit */ + bool is_running; /* Set by the thread to report its status */ } RdmaBackendThread; typedef struct RdmaBackendDev { diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 415da15efe..8d59a42cd1 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -144,8 +144,6 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, RdmaRmMR *mr; int ret = 0; RdmaRmPD *pd; - void *addr; - size_t length; pd = rdma_rm_get_pd(dev_res, pd_handle); if (!pd) { @@ -158,40 +156,30 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, pr_dbg("Failed to allocate obj in table\n"); return -ENOMEM; } - - if (!host_virt) { - /* TODO: This is my guess but not so sure that this needs to be - * done */ - length = TARGET_PAGE_SIZE; - addr = g_malloc(length); - } else { - mr->user_mr.host_virt = host_virt; - pr_dbg("host_virt=0x%p\n", mr->user_mr.host_virt); - mr->user_mr.length = guest_length; - pr_dbg("length=%zu\n", guest_length); - mr->user_mr.guest_start = guest_start; - pr_dbg("guest_start=0x%" PRIx64 "\n", mr->user_mr.guest_start); - - length = mr->user_mr.length; - addr = mr->user_mr.host_virt; + pr_dbg("mr_handle=%d\n", *mr_handle); + + pr_dbg("host_virt=0x%p\n", host_virt); + pr_dbg("guest_start=0x%" PRIx64 "\n", guest_start); + pr_dbg("length=%zu\n", guest_length); + + if (host_virt) { + mr->virt = host_virt; + mr->start = guest_start; + mr->length = guest_length; + mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1)); + + ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt, + mr->length, access_flags); + if (ret) { + pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret); + ret = -EIO; + goto out_dealloc_mr; + } } - ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, addr, length, - access_flags); - if (ret) { - pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret); - ret = -EIO; - goto out_dealloc_mr; - } - - if (!host_virt) { - *lkey = mr->lkey = rdma_backend_mr_lkey(&mr->backend_mr); - *rkey = mr->rkey = rdma_backend_mr_rkey(&mr->backend_mr); - } else { - /* We keep mr_handle in lkey so send and recv get get mr ptr */ - *lkey = *mr_handle; - *rkey = -1; - } + /* We keep mr_handle in lkey so send and recv get get mr ptr */ + *lkey = *mr_handle; + *rkey = -1; mr->pd_handle = pd_handle; @@ -214,7 +202,11 @@ void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle) if (mr) { rdma_backend_destroy_mr(&mr->backend_mr); - munmap(mr->user_mr.host_virt, mr->user_mr.length); + pr_dbg("start=0x%" PRIx64 "\n", mr->start); + if (mr->start) { + mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1)); + munmap(mr->virt, mr->length); + } res_tbl_dealloc(&dev_res->mr_tbl, mr_handle); } } @@ -399,7 +391,7 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, RdmaRmQP *qp; int ret; - pr_dbg("qpn=%d\n", qp_handle); + pr_dbg("qpn=0x%x\n", qp_handle); qp = rdma_rm_get_qp(dev_res, qp_handle); if (!qp) { @@ -457,7 +449,7 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, { RdmaRmQP *qp; - pr_dbg("qpn=%d\n", qp_handle); + pr_dbg("qpn=0x%x\n", qp_handle); qp = rdma_rm_get_qp(dev_res, qp_handle); if (!qp) { @@ -553,8 +545,9 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res) res_tbl_free(&dev_res->uc_tbl); res_tbl_free(&dev_res->cqe_ctx_tbl); res_tbl_free(&dev_res->qp_tbl); - res_tbl_free(&dev_res->cq_tbl); res_tbl_free(&dev_res->mr_tbl); + res_tbl_free(&dev_res->cq_tbl); res_tbl_free(&dev_res->pd_tbl); + g_hash_table_destroy(dev_res->qp_hash); } diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index 226011176d..7228151239 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -55,16 +55,12 @@ typedef struct RdmaRmCQ { bool notify; } RdmaRmCQ; -typedef struct RdmaRmUserMR { - void *host_virt; - uint64_t guest_start; - size_t length; -} RdmaRmUserMR; - /* MR (DMA region) */ typedef struct RdmaRmMR { RdmaBackendMR backend_mr; - RdmaRmUserMR user_mr; + void *virt; + uint64_t start; + size_t length; uint32_t pd_handle; uint32_t lkey; uint32_t rkey; diff --git a/hw/rdma/rdma_utils.c b/hw/rdma/rdma_utils.c index d713f635f1..dc23f158f3 100644 --- a/hw/rdma/rdma_utils.c +++ b/hw/rdma/rdma_utils.c @@ -15,6 +15,10 @@ #include "rdma_utils.h" +#ifdef PVRDMA_DEBUG +unsigned long pr_dbg_cnt; +#endif + void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen) { void *p; diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h index 3dc07891bc..04c7c2ef5b 100644 --- a/hw/rdma/rdma_utils.h +++ b/hw/rdma/rdma_utils.h @@ -22,18 +22,26 @@ #include "sysemu/dma.h" #define pr_info(fmt, ...) \ - fprintf(stdout, "%s: %-20s (%3d): " fmt, "pvrdma", __func__, __LINE__,\ + fprintf(stdout, "%s: %-20s (%3d): " fmt, "rdma", __func__, __LINE__,\ ## __VA_ARGS__) #define pr_err(fmt, ...) \ - fprintf(stderr, "%s: Error at %-20s (%3d): " fmt, "pvrdma", __func__, \ + fprintf(stderr, "%s: Error at %-20s (%3d): " fmt, "rdma", __func__, \ __LINE__, ## __VA_ARGS__) #ifdef PVRDMA_DEBUG +extern unsigned long pr_dbg_cnt; + +#define init_pr_dbg(void) \ +{ \ + pr_dbg_cnt = 0; \ +} + #define pr_dbg(fmt, ...) \ - fprintf(stdout, "%s: %-20s (%3d): " fmt, "pvrdma", __func__, __LINE__,\ - ## __VA_ARGS__) + fprintf(stdout, "%lx %ld: %-20s (%3d): " fmt, pthread_self(), pr_dbg_cnt++, \ + __func__, __LINE__, ## __VA_ARGS__) #else +#define init_pr_dbg(void) #define pr_dbg(fmt, ...) #endif diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h index 81e0e0e99c..e2d9f93cdf 100644 --- a/hw/rdma/vmw/pvrdma.h +++ b/hw/rdma/vmw/pvrdma.h @@ -50,6 +50,9 @@ #define PVRDMA_HW_VERSION 17 #define PVRDMA_FW_VERSION 14 +/* Some defaults */ +#define PVRDMA_PKEY 0x7FFF + typedef struct DSRInfo { dma_addr_t dma; struct pvrdma_device_shared_region *dsr; diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index 14255d609f..4faeb21631 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -16,7 +16,6 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" #include "cpu.h" -#include <linux/types.h> #include "hw/hw.h" #include "hw/pci/pci.h" #include "hw/pci/pci_ids.h" @@ -59,6 +58,7 @@ static void *pvrdma_map_to_pdir(PCIDevice *pdev, uint64_t pdir_dma, } host_virt = mremap(curr_page, 0, length, MREMAP_MAYMOVE); + pr_dbg("mremap %p -> %p\n", curr_page, host_virt); if (host_virt == MAP_FAILED) { host_virt = NULL; error_report("PVRDMA: Failed to remap memory for host_virt"); @@ -166,7 +166,7 @@ static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req *req, resp->hdr.ack = PVRDMA_CMD_QUERY_PKEY_RESP; resp->hdr.err = 0; - resp->pkey = 0x7FFF; + resp->pkey = PVRDMA_PKEY; pr_dbg("pkey=0x%x\n", resp->pkey); return 0; @@ -524,6 +524,7 @@ static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, struct ibv_qp_init_attr init_attr; pr_dbg("qp_handle=%d\n", cmd->qp_handle); + pr_dbg("attr_mask=0x%x\n", cmd->attr_mask); memset(rsp, 0, sizeof(*rsp)); rsp->hdr.response = cmd->hdr.response; @@ -531,8 +532,8 @@ static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, rsp->hdr.err = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev, cmd->qp_handle, - (struct ibv_qp_attr *)&resp->attrs, -1, - &init_attr); + (struct ibv_qp_attr *)&resp->attrs, + cmd->attr_mask, &init_attr); pr_dbg("ret=%d\n", rsp->hdr.err); return rsp->hdr.err; diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 3ed7409763..ca5fa8d981 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -286,8 +286,78 @@ static void init_ports(PVRDMADev *dev, Error **errp) } } +static void uninit_msix(PCIDevice *pdev, int used_vectors) +{ + PVRDMADev *dev = PVRDMA_DEV(pdev); + int i; + + for (i = 0; i < used_vectors; i++) { + msix_vector_unuse(pdev, i); + } + + msix_uninit(pdev, &dev->msix, &dev->msix); +} + +static int init_msix(PCIDevice *pdev, Error **errp) +{ + PVRDMADev *dev = PVRDMA_DEV(pdev); + int i; + int rc; + + rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX, + RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX, + RDMA_MSIX_PBA, 0, NULL); + + if (rc < 0) { + error_setg(errp, "Failed to initialize MSI-X"); + return rc; + } + + for (i = 0; i < RDMA_MAX_INTRS; i++) { + rc = msix_vector_use(PCI_DEVICE(dev), i); + if (rc < 0) { + error_setg(errp, "Fail mark MSI-X vector %d", i); + uninit_msix(pdev, i); + return rc; + } + } + + return 0; +} + +static void pvrdma_fini(PCIDevice *pdev) +{ + PVRDMADev *dev = PVRDMA_DEV(pdev); + + pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); + + pvrdma_qp_ops_fini(); + + rdma_rm_fini(&dev->rdma_dev_res); + + rdma_backend_fini(&dev->backend_dev); + + free_dsr(dev); + + if (msix_enabled(pdev)) { + uninit_msix(pdev, RDMA_MAX_INTRS); + } +} + +static void pvrdma_stop(PVRDMADev *dev) +{ + rdma_backend_stop(&dev->backend_dev); +} + +static void pvrdma_start(PVRDMADev *dev) +{ + rdma_backend_start(&dev->backend_dev); +} + static void activate_device(PVRDMADev *dev) { + pvrdma_start(dev); set_reg_val(dev, PVRDMA_REG_ERR, 0); pr_dbg("Device activated\n"); } @@ -300,7 +370,10 @@ static int unquiesce_device(PVRDMADev *dev) static int reset_device(PVRDMADev *dev) { + pvrdma_stop(dev); + pr_dbg("Device reset complete\n"); + return 0; } @@ -357,7 +430,7 @@ static void regs_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) reset_device(dev); break; } - break; + break; case PVRDMA_REG_IMR: pr_dbg("Interrupt mask=0x%" PRIx64 "\n", val); dev->interrupt_mask = val; @@ -366,7 +439,7 @@ static void regs_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) if (val == 0) { execute_command(dev); } - break; + break; default: break; } @@ -469,45 +542,6 @@ static void init_regs(PCIDevice *pdev) set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF); } -static void uninit_msix(PCIDevice *pdev, int used_vectors) -{ - PVRDMADev *dev = PVRDMA_DEV(pdev); - int i; - - for (i = 0; i < used_vectors; i++) { - msix_vector_unuse(pdev, i); - } - - msix_uninit(pdev, &dev->msix, &dev->msix); -} - -static int init_msix(PCIDevice *pdev, Error **errp) -{ - PVRDMADev *dev = PVRDMA_DEV(pdev); - int i; - int rc; - - rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX, - RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX, - RDMA_MSIX_PBA, 0, NULL); - - if (rc < 0) { - error_setg(errp, "Failed to initialize MSI-X"); - return rc; - } - - for (i = 0; i < RDMA_MAX_INTRS; i++) { - rc = msix_vector_use(PCI_DEVICE(dev), i); - if (rc < 0) { - error_setg(errp, "Fail mark MSI-X vercor %d", i); - uninit_msix(pdev, i); - return rc; - } - } - - return 0; -} - static void init_dev_caps(PVRDMADev *dev) { size_t pg_tbl_bytes = TARGET_PAGE_SIZE * @@ -543,6 +577,8 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) Object *memdev_root; bool ram_shared = false; + init_pr_dbg(); + pr_dbg("Initializing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); @@ -575,7 +611,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) goto out; } - rc = rdma_backend_init(&dev->backend_dev, &dev->rdma_dev_res, + rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res, dev->backend_device_name, dev->backend_port_num, dev->backend_gid_idx, &dev->dev_attr, errp); if (rc) { @@ -602,22 +638,7 @@ out: static void pvrdma_exit(PCIDevice *pdev) { - PVRDMADev *dev = PVRDMA_DEV(pdev); - - pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn)); - - pvrdma_qp_ops_fini(); - - rdma_rm_fini(&dev->rdma_dev_res); - - rdma_backend_fini(&dev->backend_dev); - - free_dsr(dev); - - if (msix_enabled(pdev)) { - uninit_msix(pdev, RDMA_MAX_INTRS); - } + pvrdma_fini(pdev); } static void pvrdma_class_init(ObjectClass *klass, void *data) diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index 99bb51111e..c668afd0ed 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -69,6 +69,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, return -EINVAL; } + memset(cqe1, 0, sizeof(*cqe1)); cqe1->wr_id = cqe->wr_id; cqe1->qp = cqe->qp; cqe1->opcode = cqe->opcode; @@ -129,7 +130,7 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) PvrdmaSqWqe *wqe; PvrdmaRing *ring; - pr_dbg("qp_handle=%d\n", qp_handle); + pr_dbg("qp_handle=0x%x\n", qp_handle); qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle); if (unlikely(!qp)) { @@ -173,7 +174,7 @@ int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) PvrdmaRqWqe *wqe; PvrdmaRing *ring; - pr_dbg("qp_handle=%d\n", qp_handle); + pr_dbg("qp_handle=0x%x\n", qp_handle); qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle); if (unlikely(!qp)) { |