aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/migration/migration.h5
-rw-r--r--include/migration/postcopy-ram.h35
-rw-r--r--include/qemu/typedefs.h1
-rw-r--r--migration/migration.c1
-rw-r--r--migration/postcopy-ram.c126
-rw-r--r--migration/ram.c181
-rw-r--r--migration/savevm.c2
-rw-r--r--trace-events6
8 files changed, 354 insertions, 3 deletions
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 219032def2..b382d7774f 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -199,6 +199,11 @@ double xbzrle_mig_cache_miss_rate(void);
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
void ram_debug_dump_bitmap(unsigned long *todump, bool expected);
+/* For outgoing discard bitmap */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms);
+/* For incoming postcopy discard */
+int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
+ uint64_t start, size_t length);
/**
* @migrate_add_blocker - prevent migration from proceeding
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index d81934ff8d..de79fa778f 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -16,4 +16,39 @@
/* Return true if the host supports everything we need to do postcopy-ram */
bool postcopy_ram_supported_by_host(void);
+/*
+ * Discard the contents of 'length' bytes from 'start'
+ * We can assume that if we've been called postcopy_ram_hosttest returned true
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+ size_t length);
+
+
+/*
+ * Called at the start of each RAMBlock by the bitmap code.
+ * 'offset' is the bitmap offset of the named RAMBlock in the migration
+ * bitmap.
+ * Returns a new PDS
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+ unsigned long offset,
+ const char *name);
+
+/*
+ * Called by the bitmap code for each chunk to discard.
+ * May send a discard message, may just leave it queued to
+ * be sent later.
+ * @start,@length: a range of pages in the migration bitmap in the
+ * RAM block passed to postcopy_discard_send_init() (length=1 is one page)
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+ unsigned long start, unsigned long length);
+
+/*
+ * Called at the end of each RAMBlock by the bitmap code.
+ * Sends any outstanding discard messages, frees the PDS.
+ */
+void postcopy_discard_send_finish(MigrationState *ms,
+ PostcopyDiscardState *pds);
+
#endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index f7e0ed0a3b..6b1093dcfc 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -67,6 +67,7 @@ typedef struct PCMachineState PCMachineState;
typedef struct PCMachineClass PCMachineClass;
typedef struct PCMCIACardState PCMCIACardState;
typedef struct PixelFormat PixelFormat;
+typedef struct PostcopyDiscardState PostcopyDiscardState;
typedef struct PropertyInfo PropertyInfo;
typedef struct Property Property;
typedef struct QEMUBH QEMUBH;
diff --git a/migration/migration.c b/migration/migration.c
index 2acd0e689d..afc863a84d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -24,6 +24,7 @@
#include "qemu/sockets.h"
#include "qemu/rcu.h"
#include "migration/block.h"
+#include "migration/postcopy-ram.h"
#include "qemu/thread.h"
#include "qmp-commands.h"
#include "trace.h"
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index cdd0168dbb..261feda4c6 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -27,6 +27,24 @@
#include "qemu/error-report.h"
#include "trace.h"
+/* Arbitrary limit on size of each discard command,
+ * keeps them around ~200 bytes
+ */
+#define MAX_DISCARDS_PER_COMMAND 12
+
+struct PostcopyDiscardState {
+ const char *ramblock_name;
+ uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
+ uint16_t cur_entry;
+ /*
+ * Start and length of a discard range (bytes)
+ */
+ uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
+ uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
+ unsigned int nsentwords;
+ unsigned int nsentcmds;
+};
+
/* Postcopy needs to detect accesses to pages that haven't yet been copied
* across, and efficiently map new pages in, the techniques for doing this
* are target OS specific.
@@ -145,6 +163,27 @@ out:
return ret;
}
+/**
+ * postcopy_ram_discard_range: Discard a range of memory.
+ * We can assume that if we've been called postcopy_ram_hosttest returned true.
+ *
+ * @mis: Current incoming migration state.
+ * @start, @length: range of memory to discard.
+ *
+ * returns: 0 on success.
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+ size_t length)
+{
+ trace_postcopy_ram_discard_range(start, length);
+ if (madvise(start, length, MADV_DONTNEED)) {
+ error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
#else
/* No target OS support, stubs just fail */
bool postcopy_ram_supported_by_host(void)
@@ -153,5 +192,92 @@ bool postcopy_ram_supported_by_host(void)
return false;
}
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+ size_t length)
+{
+ assert(0);
+}
#endif
+/* ------------------------------------------------------------------------- */
+
+/**
+ * postcopy_discard_send_init: Called at the start of each RAMBlock before
+ * asking to discard individual ranges.
+ *
+ * @ms: The current migration state.
+ * @offset: the bitmap offset of the named RAMBlock in the migration
+ * bitmap.
+ * @name: RAMBlock that discards will operate on.
+ *
+ * returns: a new PDS.
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+ unsigned long offset,
+ const char *name)
+{
+ PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
+
+ if (res) {
+ res->ramblock_name = name;
+ res->offset = offset;
+ }
+
+ return res;
+}
+
+/**
+ * postcopy_discard_send_range: Called by the bitmap code for each chunk to
+ * discard. May send a discard message, may just leave it queued to
+ * be sent later.
+ *
+ * @ms: Current migration state.
+ * @pds: Structure initialised by postcopy_discard_send_init().
+ * @start,@length: a range of pages in the migration bitmap in the
+ * RAM block passed to postcopy_discard_send_init() (length=1 is one page)
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+ unsigned long start, unsigned long length)
+{
+ size_t tp_bits = qemu_target_page_bits();
+ /* Convert to byte offsets within the RAM block */
+ pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
+ pds->length_list[pds->cur_entry] = length << tp_bits;
+ trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
+ pds->cur_entry++;
+ pds->nsentwords++;
+
+ if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
+ /* Full set, ship it! */
+ qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name,
+ pds->cur_entry,
+ pds->start_list,
+ pds->length_list);
+ pds->nsentcmds++;
+ pds->cur_entry = 0;
+ }
+}
+
+/**
+ * postcopy_discard_send_finish: Called at the end of each RAMBlock by the
+ * bitmap code. Sends any outstanding discard messages, frees the PDS
+ *
+ * @ms: Current migration state.
+ * @pds: Structure initialised by postcopy_discard_send_init().
+ */
+void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
+{
+ /* Anything unsent? */
+ if (pds->cur_entry) {
+ qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name,
+ pds->cur_entry,
+ pds->start_list,
+ pds->length_list);
+ pds->nsentcmds++;
+ }
+
+ trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
+ pds->nsentcmds);
+
+ g_free(pds);
+}
diff --git a/migration/ram.c b/migration/ram.c
index c703176c74..b481d555e9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -32,6 +32,7 @@
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "migration/migration.h"
+#include "migration/postcopy-ram.h"
#include "exec/address-spaces.h"
#include "migration/page_cache.h"
#include "qemu/error-report.h"
@@ -967,6 +968,8 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
* @f: Current migration stream.
* @pss: Data about the state of the current dirty page scan.
* @*again: Set to false if the search has scanned the whole of RAM
+ * *ram_addr_abs: Pointer into which to store the address of the dirty page
+ * within the global ram_addr space
*/
static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
bool *again, ram_addr_t *ram_addr_abs)
@@ -1064,7 +1067,6 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
if (unsentmap) {
clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
}
-
}
}
} while (!pages && again);
@@ -1233,6 +1235,183 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
}
}
+/* **** functions for postcopy ***** */
+
+/*
+ * Callback from postcopy_each_ram_send_discard for each RAMBlock
+ * Note: At this point the 'unsentmap' is the processed bitmap combined
+ * with the dirtymap; so a '1' means it's either dirty or unsent.
+ * start,length: Indexes into the bitmap for the first bit
+ * representing the named block and length in target-pages
+ */
+static int postcopy_send_discard_bm_ram(MigrationState *ms,
+ PostcopyDiscardState *pds,
+ unsigned long start,
+ unsigned long length)
+{
+ unsigned long end = start + length; /* one after the end */
+ unsigned long current;
+ unsigned long *unsentmap;
+
+ unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
+ for (current = start; current < end; ) {
+ unsigned long one = find_next_bit(unsentmap, end, current);
+
+ if (one <= end) {
+ unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
+ unsigned long discard_length;
+
+ if (zero >= end) {
+ discard_length = end - one;
+ } else {
+ discard_length = zero - one;
+ }
+ postcopy_discard_send_range(ms, pds, one, discard_length);
+ current = one + discard_length;
+ } else {
+ current = one;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Utility for the outgoing postcopy code.
+ * Calls postcopy_send_discard_bm_ram for each RAMBlock
+ * passing it bitmap indexes and name.
+ * Returns: 0 on success
+ * (qemu_ram_foreach_block ends up passing unscaled lengths
+ * which would mean postcopy code would have to deal with target page)
+ */
+static int postcopy_each_ram_send_discard(MigrationState *ms)
+{
+ struct RAMBlock *block;
+ int ret;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ unsigned long first = block->offset >> TARGET_PAGE_BITS;
+ PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
+ first,
+ block->idstr);
+
+ /*
+ * Postcopy sends chunks of bitmap over the wire, but it
+ * just needs indexes at this point, avoids it having
+ * target page specific code.
+ */
+ ret = postcopy_send_discard_bm_ram(ms, pds, first,
+ block->used_length >> TARGET_PAGE_BITS);
+ postcopy_discard_send_finish(ms, pds);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Transmit the set of pages to be discarded after precopy to the target
+ * these are pages that:
+ * a) Have been previously transmitted but are now dirty again
+ * b) Pages that have never been transmitted, this ensures that
+ * any pages on the destination that have been mapped by background
+ * tasks get discarded (transparent huge pages is the specific concern)
+ * Hopefully this is pretty sparse
+ */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms)
+{
+ int ret;
+ unsigned long *bitmap, *unsentmap;
+
+ rcu_read_lock();
+
+ /* This should be our last sync, the src is now paused */
+ migration_bitmap_sync();
+
+ unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
+ if (!unsentmap) {
+ /* We don't have a safe way to resize the sentmap, so
+ * if the bitmap was resized it will be NULL at this
+ * point.
+ */
+ error_report("migration ram resized during precopy phase");
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ /*
+ * Update the unsentmap to be unsentmap = unsentmap | dirty
+ */
+ bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
+ bitmap_or(unsentmap, unsentmap, bitmap,
+ last_ram_offset() >> TARGET_PAGE_BITS);
+
+
+ trace_ram_postcopy_send_discard_bitmap();
+#ifdef DEBUG_POSTCOPY
+ ram_debug_dump_bitmap(unsentmap, true);
+#endif
+
+ ret = postcopy_each_ram_send_discard(ms);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/*
+ * At the start of the postcopy phase of migration, any now-dirty
+ * precopied pages are discarded.
+ *
+ * start, length describe a byte address range within the RAMBlock
+ *
+ * Returns 0 on success.
+ */
+int ram_discard_range(MigrationIncomingState *mis,
+ const char *block_name,
+ uint64_t start, size_t length)
+{
+ int ret = -1;
+
+ rcu_read_lock();
+ RAMBlock *rb = qemu_ram_block_by_name(block_name);
+
+ if (!rb) {
+ error_report("ram_discard_range: Failed to find block '%s'",
+ block_name);
+ goto err;
+ }
+
+ uint8_t *host_startaddr = rb->host + start;
+
+ if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
+ error_report("ram_discard_range: Unaligned start address: %p",
+ host_startaddr);
+ goto err;
+ }
+
+ if ((start + length) <= rb->used_length) {
+ uint8_t *host_endaddr = host_startaddr + length;
+ if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
+ error_report("ram_discard_range: Unaligned end address: %p",
+ host_endaddr);
+ goto err;
+ }
+ ret = postcopy_ram_discard_range(mis, host_startaddr, length);
+ } else {
+ error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
+ "/%zu/%zu)",
+ block_name, start, length, rb->used_length);
+ }
+
+err:
+ rcu_read_unlock();
+
+ return ret;
+}
+
+
/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
* long-running RCU critical section. When rcu-reclaims in the code
* start to become numerous it will be necessary to reduce the
diff --git a/migration/savevm.c b/migration/savevm.c
index 8e11877198..8f07abd1c6 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1349,7 +1349,6 @@ static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
}
trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
while (len) {
- /* TODO - ram_discard_range gets added in a later patch
uint64_t start_addr, block_length;
start_addr = qemu_get_be64(mis->from_src_file);
block_length = qemu_get_be64(mis->from_src_file);
@@ -1360,7 +1359,6 @@ static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
if (ret) {
return ret;
}
- */
}
trace_loadvm_postcopy_ram_handle_discard_end();
diff --git a/trace-events b/trace-events
index a51ce2254d..e74675e5bf 100644
--- a/trace-events
+++ b/trace-events
@@ -1255,6 +1255,7 @@ qemu_file_fclose(void) ""
migration_bitmap_sync_start(void) ""
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
migration_throttle(void) ""
+ram_postcopy_send_discard_bitmap(void) ""
# hw/display/qxl.c
disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
@@ -1532,6 +1533,11 @@ rdma_start_incoming_migration_after_rdma_listen(void) ""
rdma_start_outgoing_migration_after_rdma_connect(void) ""
rdma_start_outgoing_migration_after_rdma_source_init(void) ""
+# migration/postcopy-ram.c
+postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
+postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx"
+postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
+
# kvm-all.c
kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"