aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-08-20 15:44:40 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-08-20 15:44:41 +0100
commitc8090972fa85b197411b530ddf81a2867379406c (patch)
treee69e51b0b544fe9486ec6811f024944a9300f327
parent62c34848efb41f0e81af0c6b4f1d5d577039eec9 (diff)
parent21ab34c9543fe1b6d31b3edbd01a397e7e090d00 (diff)
Merge remote-tracking branch 'remotes/marcel/tags/rdma-pull-request' into staging
RDMA queue # gpg: Signature made Sat 18 Aug 2018 16:01:46 BST # gpg: using RSA key 36D4C0F0CF2FE46D # gpg: Good signature from "Marcel Apfelbaum <marcel.apfelbaum@zoho.com>" # gpg: aka "Marcel Apfelbaum <marcel@redhat.com>" # gpg: aka "Marcel Apfelbaum <marcel.apfelbaum@gmail.com>" # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: B1C6 3A57 F92E 08F2 640F 31F5 36D4 C0F0 CF2F E46D * remotes/marcel/tags/rdma-pull-request: config: split PVRDMA from RDMA hw/pvrdma: remove not needed include hw/rdma: Add reference to pci_dev in backend_dev hw/rdma: Bugfix - Support non-aligned buffers hw/rdma: Print backend QP number in hex format hw/rdma: Cosmetic change - move to generic function hw/pvrdma: Cosmetic change - indent right hw/rdma: Reorder resource cleanup hw/rdma: Do not allocate memory for non-dma MR hw/rdma: Delete useless structure RdmaRmUserMR hw/pvrdma: Make default pkey 0xFFFF hw/pvrdma: Clean CQE before use hw/rdma: Modify debug macros hw/pvrdma: Bugfix - provide the correct attr_mask to query_qp hw/rdma: Make distinction between device init and start modes Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rwxr-xr-xconfigure55
-rw-r--r--hw/rdma/Makefile.objs2
-rw-r--r--hw/rdma/rdma_backend.c105
-rw-r--r--hw/rdma/rdma_backend.h4
-rw-r--r--hw/rdma/rdma_backend_defs.h3
-rw-r--r--hw/rdma/rdma_rm.c69
-rw-r--r--hw/rdma/rdma_rm_defs.h10
-rw-r--r--hw/rdma/rdma_utils.c4
-rw-r--r--hw/rdma/rdma_utils.h16
-rw-r--r--hw/rdma/vmw/pvrdma.h3
-rw-r--r--hw/rdma/vmw/pvrdma_cmd.c9
-rw-r--r--hw/rdma/vmw/pvrdma_main.c137
-rw-r--r--hw/rdma/vmw/pvrdma_qp_ops.c5
13 files changed, 283 insertions, 139 deletions
diff --git a/configure b/configure
index db97930314..7d9a63636c 100755
--- a/configure
+++ b/configure
@@ -375,6 +375,7 @@ hax="no"
hvf="no"
whpx="no"
rdma=""
+pvrdma=""
gprof="no"
debug_tcg="no"
debug="no"
@@ -1363,6 +1364,10 @@ for opt do
;;
--disable-rdma) rdma="no"
;;
+ --enable-pvrdma) pvrdma="yes"
+ ;;
+ --disable-pvrdma) pvrdma="no"
+ ;;
--with-gtkabi=*) gtkabi="$optarg"
;;
--disable-vte) vte="no"
@@ -1669,7 +1674,8 @@ disabled with --disable-FEATURE, default is enabled if available:
hax HAX acceleration support
hvf Hypervisor.framework acceleration support
whpx Windows Hypervisor Platform acceleration support
- rdma Enable RDMA-based migration and PVRDMA support
+ rdma Enable RDMA-based migration
+ pvrdma Enable PVRDMA support
vde support for vde network
netmap support for netmap network
linux-aio Linux AIO support
@@ -3064,6 +3070,48 @@ EOF
fi
fi
+##########################################
+# PVRDMA detection
+
+cat > $TMPC <<EOF &&
+#include <sys/mman.h>
+
+int
+main(void)
+{
+ char buf = 0;
+ void *addr = &buf;
+ addr = mremap(addr, 0, 1, MREMAP_MAYMOVE | MREMAP_FIXED);
+
+ return 0;
+}
+EOF
+
+if test "$rdma" = "yes" ; then
+ case "$pvrdma" in
+ "")
+ if compile_prog "" ""; then
+ pvrdma="yes"
+ else
+ pvrdma="no"
+ fi
+ ;;
+ "yes")
+ if ! compile_prog "" ""; then
+ error_exit "PVRDMA is not supported since mremap is not implemented"
+ fi
+ pvrdma="yes"
+ ;;
+ "no")
+ pvrdma="no"
+ ;;
+ esac
+else
+ if test "$pvrdma" = "yes" ; then
+ error_exit "PVRDMA requires rdma suppport"
+ fi
+ pvrdma="no"
+fi
##########################################
# VNC SASL detection
@@ -5952,6 +6000,7 @@ if test "$tcg" = "yes" ; then
fi
echo "malloc trim support $malloc_trim"
echo "RDMA support $rdma"
+echo "PVRDMA support $pvrdma"
echo "fdt support $fdt"
echo "membarrier $membarrier"
echo "preadv support $preadv"
@@ -6708,6 +6757,10 @@ if test "$rdma" = "yes" ; then
echo "RDMA_LIBS=$rdma_libs" >> $config_host_mak
fi
+if test "$pvrdma" = "yes" ; then
+ echo "CONFIG_PVRDMA=y" >> $config_host_mak
+fi
+
if test "$have_rtnetlink" = "yes" ; then
echo "CONFIG_RTNETLINK=y" >> $config_host_mak
fi
diff --git a/hw/rdma/Makefile.objs b/hw/rdma/Makefile.objs
index 3504c39d21..bd36cbf51c 100644
--- a/hw/rdma/Makefile.objs
+++ b/hw/rdma/Makefile.objs
@@ -1,4 +1,4 @@
-ifeq ($(CONFIG_RDMA),y)
+ifeq ($(CONFIG_PVRDMA),y)
obj-$(CONFIG_PCI) += rdma_utils.o rdma_backend.o rdma_rm.o
obj-$(CONFIG_PCI) += vmw/pvrdma_dev_ring.o vmw/pvrdma_cmd.o \
vmw/pvrdma_qp_ops.o vmw/pvrdma_main.o
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index e9ced6f9ef..d7a4bbd91f 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -35,6 +35,7 @@
#define VENDOR_ERR_MR_SMALL 0x208
#define THR_NAME_LEN 16
+#define THR_POLL_TO 5000
typedef struct BackendCtx {
uint64_t req_id;
@@ -91,35 +92,82 @@ static void *comp_handler_thread(void *arg)
int rc;
struct ibv_cq *ev_cq;
void *ev_ctx;
+ int flags;
+ GPollFD pfds[1];
+
+ /* Change to non-blocking mode */
+ flags = fcntl(backend_dev->channel->fd, F_GETFL);
+ rc = fcntl(backend_dev->channel->fd, F_SETFL, flags | O_NONBLOCK);
+ if (rc < 0) {
+ pr_dbg("Fail to change to non-blocking mode\n");
+ return NULL;
+ }
pr_dbg("Starting\n");
+ pfds[0].fd = backend_dev->channel->fd;
+ pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+
+ backend_dev->comp_thread.is_running = true;
+
while (backend_dev->comp_thread.run) {
- pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
- rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
- pr_dbg("ibv_get_cq_event=%d\n", rc);
- if (unlikely(rc)) {
- pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
- continue;
- }
+ do {
+ rc = qemu_poll_ns(pfds, 1, THR_POLL_TO * (int64_t)SCALE_MS);
+ } while (!rc && backend_dev->comp_thread.run);
+
+ if (backend_dev->comp_thread.run) {
+ pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
+ rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
+ pr_dbg("ibv_get_cq_event=%d\n", rc);
+ if (unlikely(rc)) {
+ pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
+ continue;
+ }
- rc = ibv_req_notify_cq(ev_cq, 0);
- if (unlikely(rc)) {
- pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
- }
+ rc = ibv_req_notify_cq(ev_cq, 0);
+ if (unlikely(rc)) {
+ pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
+ }
- poll_cq(backend_dev->rdma_dev_res, ev_cq);
+ poll_cq(backend_dev->rdma_dev_res, ev_cq);
- ibv_ack_cq_events(ev_cq, 1);
+ ibv_ack_cq_events(ev_cq, 1);
+ }
}
pr_dbg("Going down\n");
/* TODO: Post cqe for all remaining buffs that were posted */
+ backend_dev->comp_thread.is_running = false;
+
+ qemu_thread_exit(0);
+
return NULL;
}
+static void stop_backend_thread(RdmaBackendThread *thread)
+{
+ thread->run = false;
+ while (thread->is_running) {
+ pr_dbg("Waiting for thread to complete\n");
+ sleep(THR_POLL_TO / SCALE_US / 2);
+ }
+}
+
+static void start_comp_thread(RdmaBackendDev *backend_dev)
+{
+ char thread_name[THR_NAME_LEN] = {0};
+
+ stop_backend_thread(&backend_dev->comp_thread);
+
+ snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
+ ibv_get_device_name(backend_dev->ib_dev));
+ backend_dev->comp_thread.run = true;
+ qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
+ comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
+}
+
void rdma_backend_register_comp_handler(void (*handler)(int status,
unsigned int vendor_err, void *ctx))
{
@@ -223,8 +271,7 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey;
}
- dsge->addr = (uintptr_t)mr->user_mr.host_virt + ssge[ssge_idx].addr -
- mr->user_mr.guest_start;
+ dsge->addr = (uintptr_t)mr->virt + ssge[ssge_idx].addr - mr->start;
dsge->length = ssge[ssge_idx].length;
dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr);
@@ -697,7 +744,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
return 0;
}
-int rdma_backend_init(RdmaBackendDev *backend_dev,
+int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
RdmaDeviceResources *rdma_dev_res,
const char *backend_device_name, uint8_t port_num,
uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
@@ -706,10 +753,13 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
int i;
int ret = 0;
int num_ibv_devices;
- char thread_name[THR_NAME_LEN] = {0};
struct ibv_device **dev_list;
struct ibv_port_attr port_attr;
+ memset(backend_dev, 0, sizeof(*backend_dev));
+
+ backend_dev->dev = pdev;
+
backend_dev->backend_gid_idx = backend_gid_idx;
backend_dev->port_num = port_num;
backend_dev->rdma_dev_res = rdma_dev_res;
@@ -800,11 +850,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
pr_dbg("interface_id=0x%" PRIx64 "\n",
be64_to_cpu(backend_dev->gid.global.interface_id));
- snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
- ibv_get_device_name(backend_dev->ib_dev));
- backend_dev->comp_thread.run = true;
- qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
- comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
+ backend_dev->comp_thread.run = false;
+ backend_dev->comp_thread.is_running = false;
ah_cache_init();
@@ -823,8 +870,22 @@ out:
return ret;
}
+
+void rdma_backend_start(RdmaBackendDev *backend_dev)
+{
+ pr_dbg("Starting rdma_backend\n");
+ start_comp_thread(backend_dev);
+}
+
+void rdma_backend_stop(RdmaBackendDev *backend_dev)
+{
+ pr_dbg("Stopping rdma_backend\n");
+ stop_backend_thread(&backend_dev->comp_thread);
+}
+
void rdma_backend_fini(RdmaBackendDev *backend_dev)
{
+ rdma_backend_stop(backend_dev);
g_hash_table_destroy(ah_hash);
ibv_destroy_comp_channel(backend_dev->channel);
ibv_close_device(backend_dev->context);
diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
index 3cd636dd88..86e8fe8ab6 100644
--- a/hw/rdma/rdma_backend.h
+++ b/hw/rdma/rdma_backend.h
@@ -46,12 +46,14 @@ static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr)
return mr->ibmr ? mr->ibmr->rkey : 0;
}
-int rdma_backend_init(RdmaBackendDev *backend_dev,
+int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
RdmaDeviceResources *rdma_dev_res,
const char *backend_device_name, uint8_t port_num,
uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
Error **errp);
void rdma_backend_fini(RdmaBackendDev *backend_dev);
+void rdma_backend_start(RdmaBackendDev *backend_dev);
+void rdma_backend_stop(RdmaBackendDev *backend_dev);
void rdma_backend_register_comp_handler(void (*handler)(int status,
unsigned int vendor_err, void *ctx));
void rdma_backend_unregister_comp_handler(void);
diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
index ff5cfc26eb..7404f64002 100644
--- a/hw/rdma/rdma_backend_defs.h
+++ b/hw/rdma/rdma_backend_defs.h
@@ -24,7 +24,8 @@ typedef struct RdmaDeviceResources RdmaDeviceResources;
typedef struct RdmaBackendThread {
QemuThread thread;
QemuMutex mutex;
- bool run;
+ bool run; /* Set by thread manager to let thread know it should exit */
+ bool is_running; /* Set by the thread to report its status */
} RdmaBackendThread;
typedef struct RdmaBackendDev {
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index 415da15efe..8d59a42cd1 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -144,8 +144,6 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
RdmaRmMR *mr;
int ret = 0;
RdmaRmPD *pd;
- void *addr;
- size_t length;
pd = rdma_rm_get_pd(dev_res, pd_handle);
if (!pd) {
@@ -158,40 +156,30 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
pr_dbg("Failed to allocate obj in table\n");
return -ENOMEM;
}
-
- if (!host_virt) {
- /* TODO: This is my guess but not so sure that this needs to be
- * done */
- length = TARGET_PAGE_SIZE;
- addr = g_malloc(length);
- } else {
- mr->user_mr.host_virt = host_virt;
- pr_dbg("host_virt=0x%p\n", mr->user_mr.host_virt);
- mr->user_mr.length = guest_length;
- pr_dbg("length=%zu\n", guest_length);
- mr->user_mr.guest_start = guest_start;
- pr_dbg("guest_start=0x%" PRIx64 "\n", mr->user_mr.guest_start);
-
- length = mr->user_mr.length;
- addr = mr->user_mr.host_virt;
+ pr_dbg("mr_handle=%d\n", *mr_handle);
+
+ pr_dbg("host_virt=0x%p\n", host_virt);
+ pr_dbg("guest_start=0x%" PRIx64 "\n", guest_start);
+ pr_dbg("length=%zu\n", guest_length);
+
+ if (host_virt) {
+ mr->virt = host_virt;
+ mr->start = guest_start;
+ mr->length = guest_length;
+ mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));
+
+ ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
+ mr->length, access_flags);
+ if (ret) {
+ pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
+ ret = -EIO;
+ goto out_dealloc_mr;
+ }
}
- ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, addr, length,
- access_flags);
- if (ret) {
- pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
- ret = -EIO;
- goto out_dealloc_mr;
- }
-
- if (!host_virt) {
- *lkey = mr->lkey = rdma_backend_mr_lkey(&mr->backend_mr);
- *rkey = mr->rkey = rdma_backend_mr_rkey(&mr->backend_mr);
- } else {
- /* We keep mr_handle in lkey so send and recv get get mr ptr */
- *lkey = *mr_handle;
- *rkey = -1;
- }
+ /* We keep mr_handle in lkey so send and recv get get mr ptr */
+ *lkey = *mr_handle;
+ *rkey = -1;
mr->pd_handle = pd_handle;
@@ -214,7 +202,11 @@ void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
if (mr) {
rdma_backend_destroy_mr(&mr->backend_mr);
- munmap(mr->user_mr.host_virt, mr->user_mr.length);
+ pr_dbg("start=0x%" PRIx64 "\n", mr->start);
+ if (mr->start) {
+ mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1));
+ munmap(mr->virt, mr->length);
+ }
res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
}
}
@@ -399,7 +391,7 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
RdmaRmQP *qp;
int ret;
- pr_dbg("qpn=%d\n", qp_handle);
+ pr_dbg("qpn=0x%x\n", qp_handle);
qp = rdma_rm_get_qp(dev_res, qp_handle);
if (!qp) {
@@ -457,7 +449,7 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
{
RdmaRmQP *qp;
- pr_dbg("qpn=%d\n", qp_handle);
+ pr_dbg("qpn=0x%x\n", qp_handle);
qp = rdma_rm_get_qp(dev_res, qp_handle);
if (!qp) {
@@ -553,8 +545,9 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res)
res_tbl_free(&dev_res->uc_tbl);
res_tbl_free(&dev_res->cqe_ctx_tbl);
res_tbl_free(&dev_res->qp_tbl);
- res_tbl_free(&dev_res->cq_tbl);
res_tbl_free(&dev_res->mr_tbl);
+ res_tbl_free(&dev_res->cq_tbl);
res_tbl_free(&dev_res->pd_tbl);
+
g_hash_table_destroy(dev_res->qp_hash);
}
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index 226011176d..7228151239 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -55,16 +55,12 @@ typedef struct RdmaRmCQ {
bool notify;
} RdmaRmCQ;
-typedef struct RdmaRmUserMR {
- void *host_virt;
- uint64_t guest_start;
- size_t length;
-} RdmaRmUserMR;
-
/* MR (DMA region) */
typedef struct RdmaRmMR {
RdmaBackendMR backend_mr;
- RdmaRmUserMR user_mr;
+ void *virt;
+ uint64_t start;
+ size_t length;
uint32_t pd_handle;
uint32_t lkey;
uint32_t rkey;
diff --git a/hw/rdma/rdma_utils.c b/hw/rdma/rdma_utils.c
index d713f635f1..dc23f158f3 100644
--- a/hw/rdma/rdma_utils.c
+++ b/hw/rdma/rdma_utils.c
@@ -15,6 +15,10 @@
#include "rdma_utils.h"
+#ifdef PVRDMA_DEBUG
+unsigned long pr_dbg_cnt;
+#endif
+
void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen)
{
void *p;
diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h
index 3dc07891bc..04c7c2ef5b 100644
--- a/hw/rdma/rdma_utils.h
+++ b/hw/rdma/rdma_utils.h
@@ -22,18 +22,26 @@
#include "sysemu/dma.h"
#define pr_info(fmt, ...) \
- fprintf(stdout, "%s: %-20s (%3d): " fmt, "pvrdma", __func__, __LINE__,\
+ fprintf(stdout, "%s: %-20s (%3d): " fmt, "rdma", __func__, __LINE__,\
## __VA_ARGS__)
#define pr_err(fmt, ...) \
- fprintf(stderr, "%s: Error at %-20s (%3d): " fmt, "pvrdma", __func__, \
+ fprintf(stderr, "%s: Error at %-20s (%3d): " fmt, "rdma", __func__, \
__LINE__, ## __VA_ARGS__)
#ifdef PVRDMA_DEBUG
+extern unsigned long pr_dbg_cnt;
+
+#define init_pr_dbg(void) \
+{ \
+ pr_dbg_cnt = 0; \
+}
+
#define pr_dbg(fmt, ...) \
- fprintf(stdout, "%s: %-20s (%3d): " fmt, "pvrdma", __func__, __LINE__,\
- ## __VA_ARGS__)
+ fprintf(stdout, "%lx %ld: %-20s (%3d): " fmt, pthread_self(), pr_dbg_cnt++, \
+ __func__, __LINE__, ## __VA_ARGS__)
#else
+#define init_pr_dbg(void)
#define pr_dbg(fmt, ...)
#endif
diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h
index 81e0e0e99c..e2d9f93cdf 100644
--- a/hw/rdma/vmw/pvrdma.h
+++ b/hw/rdma/vmw/pvrdma.h
@@ -50,6 +50,9 @@
#define PVRDMA_HW_VERSION 17
#define PVRDMA_FW_VERSION 14
+/* Some defaults */
+#define PVRDMA_PKEY 0x7FFF
+
typedef struct DSRInfo {
dma_addr_t dma;
struct pvrdma_device_shared_region *dsr;
diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index 14255d609f..4faeb21631 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -16,7 +16,6 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "cpu.h"
-#include <linux/types.h>
#include "hw/hw.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_ids.h"
@@ -59,6 +58,7 @@ static void *pvrdma_map_to_pdir(PCIDevice *pdev, uint64_t pdir_dma,
}
host_virt = mremap(curr_page, 0, length, MREMAP_MAYMOVE);
+ pr_dbg("mremap %p -> %p\n", curr_page, host_virt);
if (host_virt == MAP_FAILED) {
host_virt = NULL;
error_report("PVRDMA: Failed to remap memory for host_virt");
@@ -166,7 +166,7 @@ static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req *req,
resp->hdr.ack = PVRDMA_CMD_QUERY_PKEY_RESP;
resp->hdr.err = 0;
- resp->pkey = 0x7FFF;
+ resp->pkey = PVRDMA_PKEY;
pr_dbg("pkey=0x%x\n", resp->pkey);
return 0;
@@ -524,6 +524,7 @@ static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
struct ibv_qp_init_attr init_attr;
pr_dbg("qp_handle=%d\n", cmd->qp_handle);
+ pr_dbg("attr_mask=0x%x\n", cmd->attr_mask);
memset(rsp, 0, sizeof(*rsp));
rsp->hdr.response = cmd->hdr.response;
@@ -531,8 +532,8 @@ static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
rsp->hdr.err = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev,
cmd->qp_handle,
- (struct ibv_qp_attr *)&resp->attrs, -1,
- &init_attr);
+ (struct ibv_qp_attr *)&resp->attrs,
+ cmd->attr_mask, &init_attr);
pr_dbg("ret=%d\n", rsp->hdr.err);
return rsp->hdr.err;
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
index 3ed7409763..ca5fa8d981 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -286,8 +286,78 @@ static void init_ports(PVRDMADev *dev, Error **errp)
}
}
+static void uninit_msix(PCIDevice *pdev, int used_vectors)
+{
+ PVRDMADev *dev = PVRDMA_DEV(pdev);
+ int i;
+
+ for (i = 0; i < used_vectors; i++) {
+ msix_vector_unuse(pdev, i);
+ }
+
+ msix_uninit(pdev, &dev->msix, &dev->msix);
+}
+
+static int init_msix(PCIDevice *pdev, Error **errp)
+{
+ PVRDMADev *dev = PVRDMA_DEV(pdev);
+ int i;
+ int rc;
+
+ rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
+ RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
+ RDMA_MSIX_PBA, 0, NULL);
+
+ if (rc < 0) {
+ error_setg(errp, "Failed to initialize MSI-X");
+ return rc;
+ }
+
+ for (i = 0; i < RDMA_MAX_INTRS; i++) {
+ rc = msix_vector_use(PCI_DEVICE(dev), i);
+ if (rc < 0) {
+ error_setg(errp, "Fail mark MSI-X vector %d", i);
+ uninit_msix(pdev, i);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static void pvrdma_fini(PCIDevice *pdev)
+{
+ PVRDMADev *dev = PVRDMA_DEV(pdev);
+
+ pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn));
+
+ pvrdma_qp_ops_fini();
+
+ rdma_rm_fini(&dev->rdma_dev_res);
+
+ rdma_backend_fini(&dev->backend_dev);
+
+ free_dsr(dev);
+
+ if (msix_enabled(pdev)) {
+ uninit_msix(pdev, RDMA_MAX_INTRS);
+ }
+}
+
+static void pvrdma_stop(PVRDMADev *dev)
+{
+ rdma_backend_stop(&dev->backend_dev);
+}
+
+static void pvrdma_start(PVRDMADev *dev)
+{
+ rdma_backend_start(&dev->backend_dev);
+}
+
static void activate_device(PVRDMADev *dev)
{
+ pvrdma_start(dev);
set_reg_val(dev, PVRDMA_REG_ERR, 0);
pr_dbg("Device activated\n");
}
@@ -300,7 +370,10 @@ static int unquiesce_device(PVRDMADev *dev)
static int reset_device(PVRDMADev *dev)
{
+ pvrdma_stop(dev);
+
pr_dbg("Device reset complete\n");
+
return 0;
}
@@ -357,7 +430,7 @@ static void regs_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
reset_device(dev);
break;
}
- break;
+ break;
case PVRDMA_REG_IMR:
pr_dbg("Interrupt mask=0x%" PRIx64 "\n", val);
dev->interrupt_mask = val;
@@ -366,7 +439,7 @@ static void regs_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
if (val == 0) {
execute_command(dev);
}
- break;
+ break;
default:
break;
}
@@ -469,45 +542,6 @@ static void init_regs(PCIDevice *pdev)
set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF);
}
-static void uninit_msix(PCIDevice *pdev, int used_vectors)
-{
- PVRDMADev *dev = PVRDMA_DEV(pdev);
- int i;
-
- for (i = 0; i < used_vectors; i++) {
- msix_vector_unuse(pdev, i);
- }
-
- msix_uninit(pdev, &dev->msix, &dev->msix);
-}
-
-static int init_msix(PCIDevice *pdev, Error **errp)
-{
- PVRDMADev *dev = PVRDMA_DEV(pdev);
- int i;
- int rc;
-
- rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
- RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
- RDMA_MSIX_PBA, 0, NULL);
-
- if (rc < 0) {
- error_setg(errp, "Failed to initialize MSI-X");
- return rc;
- }
-
- for (i = 0; i < RDMA_MAX_INTRS; i++) {
- rc = msix_vector_use(PCI_DEVICE(dev), i);
- if (rc < 0) {
- error_setg(errp, "Fail mark MSI-X vercor %d", i);
- uninit_msix(pdev, i);
- return rc;
- }
- }
-
- return 0;
-}
-
static void init_dev_caps(PVRDMADev *dev)
{
size_t pg_tbl_bytes = TARGET_PAGE_SIZE *
@@ -543,6 +577,8 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
Object *memdev_root;
bool ram_shared = false;
+ init_pr_dbg();
+
pr_dbg("Initializing device %s %x.%x\n", pdev->name,
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
@@ -575,7 +611,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
goto out;
}
- rc = rdma_backend_init(&dev->backend_dev, &dev->rdma_dev_res,
+ rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res,
dev->backend_device_name, dev->backend_port_num,
dev->backend_gid_idx, &dev->dev_attr, errp);
if (rc) {
@@ -602,22 +638,7 @@ out:
static void pvrdma_exit(PCIDevice *pdev)
{
- PVRDMADev *dev = PVRDMA_DEV(pdev);
-
- pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn));
-
- pvrdma_qp_ops_fini();
-
- rdma_rm_fini(&dev->rdma_dev_res);
-
- rdma_backend_fini(&dev->backend_dev);
-
- free_dsr(dev);
-
- if (msix_enabled(pdev)) {
- uninit_msix(pdev, RDMA_MAX_INTRS);
- }
+ pvrdma_fini(pdev);
}
static void pvrdma_class_init(ObjectClass *klass, void *data)
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
index 99bb51111e..c668afd0ed 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.c
+++ b/hw/rdma/vmw/pvrdma_qp_ops.c
@@ -69,6 +69,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
return -EINVAL;
}
+ memset(cqe1, 0, sizeof(*cqe1));
cqe1->wr_id = cqe->wr_id;
cqe1->qp = cqe->qp;
cqe1->opcode = cqe->opcode;
@@ -129,7 +130,7 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
PvrdmaSqWqe *wqe;
PvrdmaRing *ring;
- pr_dbg("qp_handle=%d\n", qp_handle);
+ pr_dbg("qp_handle=0x%x\n", qp_handle);
qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
if (unlikely(!qp)) {
@@ -173,7 +174,7 @@ int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
PvrdmaRqWqe *wqe;
PvrdmaRing *ring;
- pr_dbg("qp_handle=%d\n", qp_handle);
+ pr_dbg("qp_handle=0x%x\n", qp_handle);
qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
if (unlikely(!qp)) {