aboutsummaryrefslogtreecommitdiff
path: root/migration/rdma.c
diff options
context:
space:
mode:
Diffstat (limited to 'migration/rdma.c')
-rw-r--r--migration/rdma.c185
1 files changed, 33 insertions, 152 deletions
diff --git a/migration/rdma.c b/migration/rdma.c
index 672d1958a9..94a55dd95b 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -21,7 +21,6 @@
#include "migration.h"
#include "qemu-file.h"
#include "ram.h"
-#include "qemu-file-channel.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
@@ -1371,30 +1370,6 @@ const char *print_wrid(int wrid)
}
/*
- * RDMA requires memory registration (mlock/pinning), but this is not good for
- * overcommitment.
- *
- * In preparation for the future where LRU information or workload-specific
- * writable writable working set memory access behavior is available to QEMU
- * it would be nice to have in place the ability to UN-register/UN-pin
- * particular memory regions from the RDMA hardware when it is determine that
- * those regions of memory will likely not be accessed again in the near future.
- *
- * While we do not yet have such information right now, the following
- * compile-time option allows us to perform a non-optimized version of this
- * behavior.
- *
- * By uncommenting this option, you will cause *all* RDMA transfers to be
- * unregistered immediately after the transfer completes on both sides of the
- * connection. This has no effect in 'rdma-pin-all' mode, only regular mode.
- *
- * This will have a terrible impact on migration performance, so until future
- * workload information or LRU information is available, do not attempt to use
- * this feature except for basic testing.
- */
-/* #define RDMA_UNREGISTRATION_EXAMPLE */
-
-/*
* Perform a non-optimized memory unregistration after every transfer
* for demonstration purposes, only if pin-all is not requested.
*
@@ -1487,34 +1462,6 @@ static uint64_t qemu_rdma_make_wrid(uint64_t wr_id, uint64_t index,
}
/*
- * Set bit for unregistration in the next iteration.
- * We cannot transmit right here, but will unpin later.
- */
-static void qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index,
- uint64_t chunk, uint64_t wr_id)
-{
- if (rdma->unregistrations[rdma->unregister_next] != 0) {
- error_report("rdma migration: queue is full");
- } else {
- RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]);
-
- if (!test_and_set_bit(chunk, block->unregister_bitmap)) {
- trace_qemu_rdma_signal_unregister_append(chunk,
- rdma->unregister_next);
-
- rdma->unregistrations[rdma->unregister_next++] =
- qemu_rdma_make_wrid(wr_id, index, chunk);
-
- if (rdma->unregister_next == RDMA_SIGNALED_SEND_MAX) {
- rdma->unregister_next = 0;
- }
- } else {
- trace_qemu_rdma_signal_unregister_already(chunk);
- }
- }
-}
-
-/*
* Consult the connection manager to see a work request
* (of any kind) has completed.
* Return the work request ID that completed.
@@ -1571,18 +1518,6 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, struct ibv_cq *cq,
if (rdma->nb_sent > 0) {
rdma->nb_sent--;
}
-
- if (!rdma->pin_all) {
- /*
- * FYI: If one wanted to signal a specific chunk to be unregistered
- * using LRU or workload-specific information, this is the function
- * you would call to do so. That chunk would then get asynchronously
- * unregistered later.
- */
-#ifdef RDMA_UNREGISTRATION_EXAMPLE
- qemu_rdma_signal_unregister(rdma, index, chunk, wc.wr_id);
-#endif
- }
} else {
trace_qemu_rdma_poll_other(print_wrid(wr_id), wr_id, rdma->nb_sent);
}
@@ -2137,11 +2072,6 @@ retry:
chunk_end = ram_chunk_end(block, chunk + chunks);
- if (!rdma->pin_all) {
-#ifdef RDMA_UNREGISTRATION_EXAMPLE
- qemu_rdma_unregister_waiting(rdma);
-#endif
- }
while (test_bit(chunk, block->transit_bitmap)) {
(void)count;
@@ -3278,33 +3208,17 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
* Offset is an offset to be added to block_offset and used
* to also lookup the corresponding RAMBlock.
*
- * @size > 0 :
- * Initiate an transfer this size.
- *
- * @size == 0 :
- * A 'hint' or 'advice' that means that we wish to speculatively
- * and asynchronously unregister this memory. In this case, there is no
- * guarantee that the unregister will actually happen, for example,
- * if the memory is being actively transmitted. Additionally, the memory
- * may be re-registered at any future time if a write within the same
- * chunk was requested again, even if you attempted to unregister it
- * here.
- *
- * @size < 0 : TODO, not yet supported
- * Unregister the memory NOW. This means that the caller does not
- * expect there to be any future RDMA transfers and we just want to clean
- * things up. This is used in case the upper layer owns the memory and
- * cannot wait for qemu_fclose() to occur.
+ * @size : Number of bytes to transfer
*
* @bytes_sent : User-specificed pointer to indicate how many bytes were
* sent. Usually, this will not be more than a few bytes of
* the protocol because most transfers are sent asynchronously.
*/
-static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
+static size_t qemu_rdma_save_page(QEMUFile *f,
ram_addr_t block_offset, ram_addr_t offset,
size_t size, uint64_t *bytes_sent)
{
- QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
RDMAContext *rdma;
int ret;
@@ -3323,61 +3237,27 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
qemu_fflush(f);
- if (size > 0) {
- /*
- * Add this page to the current 'chunk'. If the chunk
- * is full, or the page doesn't belong to the current chunk,
- * an actual RDMA write will occur and a new chunk will be formed.
- */
- ret = qemu_rdma_write(f, rdma, block_offset, offset, size);
- if (ret < 0) {
- error_report("rdma migration: write error! %d", ret);
- goto err;
- }
-
- /*
- * We always return 1 bytes because the RDMA
- * protocol is completely asynchronous. We do not yet know
- * whether an identified chunk is zero or not because we're
- * waiting for other pages to potentially be merged with
- * the current chunk. So, we have to call qemu_update_position()
- * later on when the actual write occurs.
- */
- if (bytes_sent) {
- *bytes_sent = 1;
- }
- } else {
- uint64_t index, chunk;
-
- /* TODO: Change QEMUFileOps prototype to be signed: size_t => long
- if (size < 0) {
- ret = qemu_rdma_drain_cq(f, rdma);
- if (ret < 0) {
- fprintf(stderr, "rdma: failed to synchronously drain"
- " completion queue before unregistration.\n");
- goto err;
- }
- }
- */
-
- ret = qemu_rdma_search_ram_block(rdma, block_offset,
- offset, size, &index, &chunk);
-
- if (ret) {
- error_report("ram block search failed");
- goto err;
- }
-
- qemu_rdma_signal_unregister(rdma, index, chunk, 0);
+ /*
+ * Add this page to the current 'chunk'. If the chunk
+ * is full, or the page doesn't belong to the current chunk,
+ * an actual RDMA write will occur and a new chunk will be formed.
+ */
+ ret = qemu_rdma_write(f, rdma, block_offset, offset, size);
+ if (ret < 0) {
+ error_report("rdma migration: write error! %d", ret);
+ goto err;
+ }
- /*
- * TODO: Synchronous, guaranteed unregistration (should not occur during
- * fast-path). Otherwise, unregisters will process on the next call to
- * qemu_rdma_drain_cq()
- if (size < 0) {
- qemu_rdma_unregister_waiting(rdma);
- }
- */
+ /*
+ * We always return 1 bytes because the RDMA
+ * protocol is completely asynchronous. We do not yet know
+ * whether an identified chunk is zero or not because we're
+ * waiting for other pages to potentially be merged with
+ * the current chunk. So, we have to call qemu_update_position()
+ * later on when the actual write occurs.
+ */
+ if (bytes_sent) {
+ *bytes_sent = 1;
}
/*
@@ -3950,14 +3830,15 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
return 0;
}
-static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data)
+static int rdma_load_hook(QEMUFile *f, uint64_t flags, void *data)
{
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
switch (flags) {
case RAM_CONTROL_BLOCK_REG:
- return rdma_block_notification_handle(opaque, data);
+ return rdma_block_notification_handle(rioc, data);
case RAM_CONTROL_HOOK:
- return qemu_rdma_registration_handle(f, opaque);
+ return qemu_rdma_registration_handle(f, rioc);
default:
/* Shouldn't be called with any other values */
@@ -3965,10 +3846,10 @@ static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data)
}
}
-static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
+static int qemu_rdma_registration_start(QEMUFile *f,
uint64_t flags, void *data)
{
- QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
RDMAContext *rdma;
RCU_READ_LOCK_GUARD();
@@ -3994,10 +3875,10 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
* Inform dest that dynamic registrations are done for now.
* First, flush writes, if any.
*/
-static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
+static int qemu_rdma_registration_stop(QEMUFile *f,
uint64_t flags, void *data)
{
- QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
RDMAContext *rdma;
RDMAControlHeader head = { .len = 0, .repeat = 1 };
int ret = 0;
@@ -4170,12 +4051,12 @@ static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
rioc = QIO_CHANNEL_RDMA(object_new(TYPE_QIO_CHANNEL_RDMA));
if (mode[0] == 'w') {
- rioc->file = qemu_fopen_channel_output(QIO_CHANNEL(rioc));
+ rioc->file = qemu_file_new_output(QIO_CHANNEL(rioc));
rioc->rdmaout = rdma;
rioc->rdmain = rdma->return_path;
qemu_file_set_hooks(rioc->file, &rdma_write_hooks);
} else {
- rioc->file = qemu_fopen_channel_input(QIO_CHANNEL(rioc));
+ rioc->file = qemu_file_new_input(QIO_CHANNEL(rioc));
rioc->rdmain = rdma;
rioc->rdmaout = rdma->return_path;
qemu_file_set_hooks(rioc->file, &rdma_read_hooks);