diff options
Diffstat (limited to 'migration/ram.c')
-rw-r--r-- | migration/ram.c | 997 |
1 files changed, 920 insertions, 77 deletions
diff --git a/migration/ram.c b/migration/ram.c index df3df9e3bf..62cf42bfdb 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -32,6 +32,7 @@ #include "qemu/timer.h" #include "qemu/main-loop.h" #include "migration/migration.h" +#include "migration/postcopy-ram.h" #include "exec/address-spaces.h" #include "migration/page_cache.h" #include "qemu/error-report.h" @@ -237,7 +238,14 @@ typedef struct PageSearchStatus PageSearchStatus; static struct BitmapRcu { struct rcu_head rcu; + /* Main migration bitmap */ unsigned long *bmap; + /* bitmap of pages that haven't been sent even once + * only maintained and used in postcopy at the moment + * where it's used to send the dirtymap at the start + * of the postcopy phase + */ + unsigned long *unsentmap; } *migration_bitmap_rcu; struct CompressParam { @@ -531,10 +539,18 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, return 1; } -/* Called with rcu_read_lock() to protect migration_bitmap */ +/* Called with rcu_read_lock() to protect migration_bitmap + * rb: The RAMBlock to search for dirty pages in + * start: Start address (typically so we can continue from previous page) + * ram_addr_abs: Pointer into which to store the address of the dirty page + * within the global ram_addr space + * + * Returns: byte offset within memory region of the start of a dirty page + */ static inline -ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, - ram_addr_t start) +ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb, + ram_addr_t start, + ram_addr_t *ram_addr_abs) { unsigned long base = rb->offset >> TARGET_PAGE_BITS; unsigned long nr = base + (start >> TARGET_PAGE_BITS); @@ -551,14 +567,24 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, next = find_next_bit(bitmap, size, nr); } - if (next < size) { - clear_bit(next, bitmap); + *ram_addr_abs = next << TARGET_PAGE_BITS; + return (next - base) << TARGET_PAGE_BITS; +} + +static inline bool migration_bitmap_clear_dirty(ram_addr_t addr) +{ + bool ret; + int nr = addr >> TARGET_PAGE_BITS; + unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + + ret = test_and_clear_bit(nr, bitmap); + + if (ret) { migration_dirty_pages--; } - return (next - base) << TARGET_PAGE_BITS; + return ret; } -/* Called with rcu_read_lock() to protect migration_bitmap */ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) { unsigned long *bitmap; @@ -951,12 +977,14 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, * @f: Current migration stream. * @pss: Data about the state of the current dirty page scan. * @*again: Set to false if the search has scanned the whole of RAM + * *ram_addr_abs: Pointer into which to store the address of the dirty page + * within the global ram_addr space */ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, - bool *again) + bool *again, ram_addr_t *ram_addr_abs) { - pss->offset = migration_bitmap_find_and_reset_dirty(pss->block, - pss->offset); + pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset, + ram_addr_abs); if (pss->complete_round && pss->block == last_seen_block && pss->offset >= last_offset) { /* @@ -995,6 +1023,276 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, } } +/* + * Helper for 'get_queued_page' - gets a page off the queue + * ms: MigrationState in + * *offset: Used to return the offset within the RAMBlock + * ram_addr_abs: global offset in the dirty/sent bitmaps + * + * Returns: block (or NULL if none available) + */ +static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset, + ram_addr_t *ram_addr_abs) +{ + RAMBlock *block = NULL; + + qemu_mutex_lock(&ms->src_page_req_mutex); + if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) { + struct MigrationSrcPageRequest *entry = + QSIMPLEQ_FIRST(&ms->src_page_requests); + block = entry->rb; + *offset = entry->offset; + *ram_addr_abs = (entry->offset + entry->rb->offset) & + TARGET_PAGE_MASK; + + if (entry->len > TARGET_PAGE_SIZE) { + entry->len -= TARGET_PAGE_SIZE; + entry->offset += TARGET_PAGE_SIZE; + } else { + memory_region_unref(block->mr); + QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); + g_free(entry); + } + } + qemu_mutex_unlock(&ms->src_page_req_mutex); + + return block; +} + +/* + * Unqueue a page from the queue fed by postcopy page requests; skips pages + * that are already sent (!dirty) + * + * ms: MigrationState in + * pss: PageSearchStatus structure updated with found block/offset + * ram_addr_abs: global offset in the dirty/sent bitmaps + * + * Returns: true if a queued page is found + */ +static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, + ram_addr_t *ram_addr_abs) +{ + RAMBlock *block; + ram_addr_t offset; + bool dirty; + + do { + block = unqueue_page(ms, &offset, ram_addr_abs); + /* + * We're sending this page, and since it's postcopy nothing else + * will dirty it, and we must make sure it doesn't get sent again + * even if this queue request was received after the background + * search already sent it. + */ + if (block) { + unsigned long *bitmap; + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap); + if (!dirty) { + trace_get_queued_page_not_dirty( + block->idstr, (uint64_t)offset, + (uint64_t)*ram_addr_abs, + test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, + atomic_rcu_read(&migration_bitmap_rcu)->unsentmap)); + } else { + trace_get_queued_page(block->idstr, + (uint64_t)offset, + (uint64_t)*ram_addr_abs); + } + } + + } while (block && !dirty); + + if (block) { + /* + * As soon as we start servicing pages out of order, then we have + * to kill the bulk stage, since the bulk stage assumes + * in (migration_bitmap_find_and_reset_dirty) that every page is + * dirty, that's no longer true. + */ + ram_bulk_stage = false; + + /* + * We want the background search to continue from the queued page + * since the guest is likely to want other pages near to the page + * it just requested. + */ + pss->block = block; + pss->offset = offset; + } + + return !!block; +} + +/** + * flush_page_queue: Flush any remaining pages in the ram request queue + * it should be empty at the end anyway, but in error cases there may be + * some left. + * + * ms: MigrationState + */ +void flush_page_queue(MigrationState *ms) +{ + struct MigrationSrcPageRequest *mspr, *next_mspr; + /* This queue generally should be empty - but in the case of a failed + * migration might have some droppings in. + */ + rcu_read_lock(); + QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) { + memory_region_unref(mspr->rb->mr); + QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); + g_free(mspr); + } + rcu_read_unlock(); +} + +/** + * Queue the pages for transmission, e.g. a request from postcopy destination + * ms: MigrationStatus in which the queue is held + * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last) + * start: Offset from the start of the RAMBlock + * len: Length (in bytes) to send + * Return: 0 on success + */ +int ram_save_queue_pages(MigrationState *ms, const char *rbname, + ram_addr_t start, ram_addr_t len) +{ + RAMBlock *ramblock; + + rcu_read_lock(); + if (!rbname) { + /* Reuse last RAMBlock */ + ramblock = ms->last_req_rb; + + if (!ramblock) { + /* + * Shouldn't happen, we can't reuse the last RAMBlock if + * it's the 1st request. + */ + error_report("ram_save_queue_pages no previous block"); + goto err; + } + } else { + ramblock = qemu_ram_block_by_name(rbname); + + if (!ramblock) { + /* We shouldn't be asked for a non-existent RAMBlock */ + error_report("ram_save_queue_pages no block '%s'", rbname); + goto err; + } + ms->last_req_rb = ramblock; + } + trace_ram_save_queue_pages(ramblock->idstr, start, len); + if (start+len > ramblock->used_length) { + error_report("%s request overrun start=%zx len=%zx blocklen=%zx", + __func__, start, len, ramblock->used_length); + goto err; + } + + struct MigrationSrcPageRequest *new_entry = + g_malloc0(sizeof(struct MigrationSrcPageRequest)); + new_entry->rb = ramblock; + new_entry->offset = start; + new_entry->len = len; + + memory_region_ref(ramblock->mr); + qemu_mutex_lock(&ms->src_page_req_mutex); + QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req); + qemu_mutex_unlock(&ms->src_page_req_mutex); + rcu_read_unlock(); + + return 0; + +err: + rcu_read_unlock(); + return -1; +} + +/** + * ram_save_target_page: Save one target page + * + * + * @f: QEMUFile where to send the data + * @block: pointer to block that contains the page we want to send + * @offset: offset inside the block for the page; + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space + * + * Returns: Number of pages written. + */ +static int ram_save_target_page(MigrationState *ms, QEMUFile *f, + RAMBlock *block, ram_addr_t offset, + bool last_stage, + uint64_t *bytes_transferred, + ram_addr_t dirty_ram_abs) +{ + int res = 0; + + /* Check the pages is dirty and if it is send it */ + if (migration_bitmap_clear_dirty(dirty_ram_abs)) { + unsigned long *unsentmap; + if (compression_switch && migrate_use_compression()) { + res = ram_save_compressed_page(f, block, offset, + last_stage, + bytes_transferred); + } else { + res = ram_save_page(f, block, offset, last_stage, + bytes_transferred); + } + + if (res < 0) { + return res; + } + unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + if (unsentmap) { + clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap); + } + } + + return res; +} + +/** + * ram_save_host_page: Starting at *offset send pages upto the end + * of the current host page. It's valid for the initial + * offset to point into the middle of a host page + * in which case the remainder of the hostpage is sent. + * Only dirty target pages are sent. + * + * Returns: Number of pages written. + * + * @f: QEMUFile where to send the data + * @block: pointer to block that contains the page we want to send + * @offset: offset inside the block for the page; updated to last target page + * sent + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space + */ +static int ram_save_host_page(MigrationState *ms, QEMUFile *f, RAMBlock *block, + ram_addr_t *offset, bool last_stage, + uint64_t *bytes_transferred, + ram_addr_t dirty_ram_abs) +{ + int tmppages, pages = 0; + do { + tmppages = ram_save_target_page(ms, f, block, *offset, last_stage, + bytes_transferred, dirty_ram_abs); + if (tmppages < 0) { + return tmppages; + } + + pages += tmppages; + *offset += TARGET_PAGE_SIZE; + dirty_ram_abs += TARGET_PAGE_SIZE; + } while (*offset & (qemu_host_page_size - 1)); + + /* The offset we leave with is the last one we looked at */ + *offset -= TARGET_PAGE_SIZE; + return pages; +} + /** * ram_find_and_save_block: Finds a dirty page and sends it to f * @@ -1006,14 +1304,20 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, * @f: QEMUFile where to send the data * @last_stage: if we are at the completion stage * @bytes_transferred: increase it with the number of transferred bytes + * + * On systems where host-page-size > target-page-size it will send all the + * pages in a host page that are dirty. */ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, uint64_t *bytes_transferred) { PageSearchStatus pss; + MigrationState *ms = migrate_get_current(); int pages = 0; bool again, found; + ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in + ram_addr_t space */ pss.block = last_seen_block; pss.offset = last_offset; @@ -1024,22 +1328,18 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, } do { - found = find_dirty_block(f, &pss, &again); + again = true; + found = get_queued_page(ms, &pss, &dirty_ram_abs); - if (found) { - if (compression_switch && migrate_use_compression()) { - pages = ram_save_compressed_page(f, pss.block, pss.offset, - last_stage, - bytes_transferred); - } else { - pages = ram_save_page(f, pss.block, pss.offset, last_stage, - bytes_transferred); - } + if (!found) { + /* priority queue empty, so just search for something dirty */ + found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); + } - /* if page is unmodified, continue to the next */ - if (pages > 0) { - last_sent_block = pss.block; - } + if (found) { + pages = ram_save_host_page(ms, f, pss.block, &pss.offset, + last_stage, bytes_transferred, + dirty_ram_abs); } } while (!pages && again); @@ -1097,6 +1397,7 @@ void free_xbzrle_decoded_buf(void) static void migration_bitmap_free(struct BitmapRcu *bmap) { g_free(bmap->bmap); + g_free(bmap->unsentmap); g_free(bmap); } @@ -1153,6 +1454,13 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) qemu_mutex_lock(&migration_bitmap_mutex); bitmap_copy(bitmap->bmap, old_bitmap->bmap, old); bitmap_set(bitmap->bmap, old, new - old); + + /* We don't have a way to safely extend the sentmap + * with RCU; so mark it as missing, entry to postcopy + * will fail. + */ + bitmap->unsentmap = NULL; + atomic_rcu_set(&migration_bitmap_rcu, bitmap); qemu_mutex_unlock(&migration_bitmap_mutex); migration_dirty_pages += new - old; @@ -1160,6 +1468,394 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) } } +/* + * 'expected' is the value you expect the bitmap mostly to be full + * of; it won't bother printing lines that are all this value. + * If 'todump' is null the migration bitmap is dumped. + */ +void ram_debug_dump_bitmap(unsigned long *todump, bool expected) +{ + int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + + int64_t cur; + int64_t linelen = 128; + char linebuf[129]; + + if (!todump) { + todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + } + + for (cur = 0; cur < ram_pages; cur += linelen) { + int64_t curb; + bool found = false; + /* + * Last line; catch the case where the line length + * is longer than remaining ram + */ + if (cur + linelen > ram_pages) { + linelen = ram_pages - cur; + } + for (curb = 0; curb < linelen; curb++) { + bool thisbit = test_bit(cur + curb, todump); + linebuf[curb] = thisbit ? '1' : '.'; + found = found || (thisbit != expected); + } + if (found) { + linebuf[curb] = '\0'; + fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf); + } + } +} + +/* **** functions for postcopy ***** */ + +/* + * Callback from postcopy_each_ram_send_discard for each RAMBlock + * Note: At this point the 'unsentmap' is the processed bitmap combined + * with the dirtymap; so a '1' means it's either dirty or unsent. + * start,length: Indexes into the bitmap for the first bit + * representing the named block and length in target-pages + */ +static int postcopy_send_discard_bm_ram(MigrationState *ms, + PostcopyDiscardState *pds, + unsigned long start, + unsigned long length) +{ + unsigned long end = start + length; /* one after the end */ + unsigned long current; + unsigned long *unsentmap; + + unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + for (current = start; current < end; ) { + unsigned long one = find_next_bit(unsentmap, end, current); + + if (one <= end) { + unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1); + unsigned long discard_length; + + if (zero >= end) { + discard_length = end - one; + } else { + discard_length = zero - one; + } + postcopy_discard_send_range(ms, pds, one, discard_length); + current = one + discard_length; + } else { + current = one; + } + } + + return 0; +} + +/* + * Utility for the outgoing postcopy code. + * Calls postcopy_send_discard_bm_ram for each RAMBlock + * passing it bitmap indexes and name. + * Returns: 0 on success + * (qemu_ram_foreach_block ends up passing unscaled lengths + * which would mean postcopy code would have to deal with target page) + */ +static int postcopy_each_ram_send_discard(MigrationState *ms) +{ + struct RAMBlock *block; + int ret; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + unsigned long first = block->offset >> TARGET_PAGE_BITS; + PostcopyDiscardState *pds = postcopy_discard_send_init(ms, + first, + block->idstr); + + /* + * Postcopy sends chunks of bitmap over the wire, but it + * just needs indexes at this point, avoids it having + * target page specific code. + */ + ret = postcopy_send_discard_bm_ram(ms, pds, first, + block->used_length >> TARGET_PAGE_BITS); + postcopy_discard_send_finish(ms, pds); + if (ret) { + return ret; + } + } + + return 0; +} + +/* + * Helper for postcopy_chunk_hostpages; it's called twice to cleanup + * the two bitmaps, that are similar, but one is inverted. + * + * We search for runs of target-pages that don't start or end on a + * host page boundary; + * unsent_pass=true: Cleans up partially unsent host pages by searching + * the unsentmap + * unsent_pass=false: Cleans up partially dirty host pages by searching + * the main migration bitmap + * + */ +static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, + RAMBlock *block, + PostcopyDiscardState *pds) +{ + unsigned long *bitmap; + unsigned long *unsentmap; + unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE; + unsigned long first = block->offset >> TARGET_PAGE_BITS; + unsigned long len = block->used_length >> TARGET_PAGE_BITS; + unsigned long last = first + (len - 1); + unsigned long run_start; + + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + + if (unsent_pass) { + /* Find a sent page */ + run_start = find_next_zero_bit(unsentmap, last + 1, first); + } else { + /* Find a dirty page */ + run_start = find_next_bit(bitmap, last + 1, first); + } + + while (run_start <= last) { + bool do_fixup = false; + unsigned long fixup_start_addr; + unsigned long host_offset; + + /* + * If the start of this run of pages is in the middle of a host + * page, then we need to fixup this host page. + */ + host_offset = run_start % host_ratio; + if (host_offset) { + do_fixup = true; + run_start -= host_offset; + fixup_start_addr = run_start; + /* For the next pass */ + run_start = run_start + host_ratio; + } else { + /* Find the end of this run */ + unsigned long run_end; + if (unsent_pass) { + run_end = find_next_bit(unsentmap, last + 1, run_start + 1); + } else { + run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1); + } + /* + * If the end isn't at the start of a host page, then the + * run doesn't finish at the end of a host page + * and we need to discard. + */ + host_offset = run_end % host_ratio; + if (host_offset) { + do_fixup = true; + fixup_start_addr = run_end - host_offset; + /* + * This host page has gone, the next loop iteration starts + * from after the fixup + */ + run_start = fixup_start_addr + host_ratio; + } else { + /* + * No discards on this iteration, next loop starts from + * next sent/dirty page + */ + run_start = run_end + 1; + } + } + + if (do_fixup) { + unsigned long page; + + /* Tell the destination to discard this page */ + if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) { + /* For the unsent_pass we: + * discard partially sent pages + * For the !unsent_pass (dirty) we: + * discard partially dirty pages that were sent + * (any partially sent pages were already discarded + * by the previous unsent_pass) + */ + postcopy_discard_send_range(ms, pds, fixup_start_addr, + host_ratio); + } + + /* Clean up the bitmap */ + for (page = fixup_start_addr; + page < fixup_start_addr + host_ratio; page++) { + /* All pages in this host page are now not sent */ + set_bit(page, unsentmap); + + /* + * Remark them as dirty, updating the count for any pages + * that weren't previously dirty. + */ + migration_dirty_pages += !test_and_set_bit(page, bitmap); + } + } + + if (unsent_pass) { + /* Find the next sent page for the next iteration */ + run_start = find_next_zero_bit(unsentmap, last + 1, + run_start); + } else { + /* Find the next dirty page for the next iteration */ + run_start = find_next_bit(bitmap, last + 1, run_start); + } + } +} + +/* + * Utility for the outgoing postcopy code. + * + * Discard any partially sent host-page size chunks, mark any partially + * dirty host-page size chunks as all dirty. + * + * Returns: 0 on success + */ +static int postcopy_chunk_hostpages(MigrationState *ms) +{ + struct RAMBlock *block; + + if (qemu_host_page_size == TARGET_PAGE_SIZE) { + /* Easy case - TPS==HPS - nothing to be done */ + return 0; + } + + /* Easiest way to make sure we don't resume in the middle of a host-page */ + last_seen_block = NULL; + last_sent_block = NULL; + last_offset = 0; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + unsigned long first = block->offset >> TARGET_PAGE_BITS; + + PostcopyDiscardState *pds = + postcopy_discard_send_init(ms, first, block->idstr); + + /* First pass: Discard all partially sent host pages */ + postcopy_chunk_hostpages_pass(ms, true, block, pds); + /* + * Second pass: Ensure that all partially dirty host pages are made + * fully dirty. + */ + postcopy_chunk_hostpages_pass(ms, false, block, pds); + + postcopy_discard_send_finish(ms, pds); + } /* ram_list loop */ + + return 0; +} + +/* + * Transmit the set of pages to be discarded after precopy to the target + * these are pages that: + * a) Have been previously transmitted but are now dirty again + * b) Pages that have never been transmitted, this ensures that + * any pages on the destination that have been mapped by background + * tasks get discarded (transparent huge pages is the specific concern) + * Hopefully this is pretty sparse + */ +int ram_postcopy_send_discard_bitmap(MigrationState *ms) +{ + int ret; + unsigned long *bitmap, *unsentmap; + + rcu_read_lock(); + + /* This should be our last sync, the src is now paused */ + migration_bitmap_sync(); + + unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + if (!unsentmap) { + /* We don't have a safe way to resize the sentmap, so + * if the bitmap was resized it will be NULL at this + * point. + */ + error_report("migration ram resized during precopy phase"); + rcu_read_unlock(); + return -EINVAL; + } + + /* Deal with TPS != HPS */ + ret = postcopy_chunk_hostpages(ms); + if (ret) { + rcu_read_unlock(); + return ret; + } + + /* + * Update the unsentmap to be unsentmap = unsentmap | dirty + */ + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + bitmap_or(unsentmap, unsentmap, bitmap, + last_ram_offset() >> TARGET_PAGE_BITS); + + + trace_ram_postcopy_send_discard_bitmap(); +#ifdef DEBUG_POSTCOPY + ram_debug_dump_bitmap(unsentmap, true); +#endif + + ret = postcopy_each_ram_send_discard(ms); + rcu_read_unlock(); + + return ret; +} + +/* + * At the start of the postcopy phase of migration, any now-dirty + * precopied pages are discarded. + * + * start, length describe a byte address range within the RAMBlock + * + * Returns 0 on success. + */ +int ram_discard_range(MigrationIncomingState *mis, + const char *block_name, + uint64_t start, size_t length) +{ + int ret = -1; + + rcu_read_lock(); + RAMBlock *rb = qemu_ram_block_by_name(block_name); + + if (!rb) { + error_report("ram_discard_range: Failed to find block '%s'", + block_name); + goto err; + } + + uint8_t *host_startaddr = rb->host + start; + + if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) { + error_report("ram_discard_range: Unaligned start address: %p", + host_startaddr); + goto err; + } + + if ((start + length) <= rb->used_length) { + uint8_t *host_endaddr = host_startaddr + length; + if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) { + error_report("ram_discard_range: Unaligned end address: %p", + host_endaddr); + goto err; + } + ret = postcopy_ram_discard_range(mis, host_startaddr, length); + } else { + error_report("ram_discard_range: Overrun block '%s' (%" PRIu64 + "/%zu/%zu)", + block_name, start, length, rb->used_length); + } + +err: + rcu_read_unlock(); + + return ret; +} + + /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code * start to become numerous it will be necessary to reduce the @@ -1214,10 +1910,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque) reset_ram_globals(); ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; - migration_bitmap_rcu = g_new(struct BitmapRcu, 1); + migration_bitmap_rcu = g_new0(struct BitmapRcu, 1); migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages); bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages); + if (migrate_postcopy_ram()) { + migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages); + bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages); + } + /* * Count the total number of pages used by ram blocks not including any * gaps due to alignment or unplugs. @@ -1317,7 +2018,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque) { rcu_read_lock(); - migration_bitmap_sync(); + if (!migration_in_postcopy(migrate_get_current())) { + migration_bitmap_sync(); + } ram_control_before_iterate(f, RAM_CONTROL_FINISH); @@ -1344,13 +2047,16 @@ static int ram_save_complete(QEMUFile *f, void *opaque) return 0; } -static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) +static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, + uint64_t *non_postcopiable_pending, + uint64_t *postcopiable_pending) { uint64_t remaining_size; remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; - if (remaining_size < max_size) { + if (!migration_in_postcopy(migrate_get_current()) && + remaining_size < max_size) { qemu_mutex_lock_iothread(); rcu_read_lock(); migration_bitmap_sync(); @@ -1358,7 +2064,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) qemu_mutex_unlock_iothread(); remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; } - return remaining_size; + + /* We can do postcopy, and all the data is postcopiable */ + *postcopiable_pending += remaining_size; } static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) @@ -1399,6 +2107,14 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) /* Must be called from within a rcu critical section. * Returns a pointer from within the RCU-protected ram_list. */ +/* + * Read a RAMBlock ID from the stream f, find the host address of the + * start of that block and add on 'offset' + * + * f: Stream to read from + * offset: Offset within the block + * flags: Page flags (mostly to see if it's a continuation of previous block) + */ static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -1420,14 +2136,12 @@ static inline void *host_from_stream_offset(QEMUFile *f, qemu_get_buffer(f, (uint8_t *)id, len); id[len] = 0; - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id)) && - block->max_length > offset) { - return block->host + offset; - } + block = qemu_ram_block_by_name(id); + if (block && block->max_length > offset) { + return block->host + offset; } - error_report("Can't find block %s!", id); + error_report("Can't find block %s", id); return NULL; } @@ -1535,11 +2249,148 @@ static void decompress_data_with_multi_threads(uint8_t *compbuf, } } +/* + * Allocate data structures etc needed by incoming migration with postcopy-ram + * postcopy-ram's similarly names postcopy_ram_incoming_init does the work + */ +int ram_postcopy_incoming_init(MigrationIncomingState *mis) +{ + size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + + return postcopy_ram_incoming_init(mis, ram_pages); +} + +/* + * Called in postcopy mode by ram_load(). + * rcu_read_lock is taken prior to this being called. + */ +static int ram_load_postcopy(QEMUFile *f) +{ + int flags = 0, ret = 0; + bool place_needed = false; + bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE; + MigrationIncomingState *mis = migration_incoming_get_current(); + /* Temporary page that is later 'placed' */ + void *postcopy_host_page = postcopy_get_tmp_page(mis); + void *last_host = NULL; + + while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { + ram_addr_t addr; + void *host = NULL; + void *page_buffer = NULL; + void *place_source = NULL; + uint8_t ch; + bool all_zero = false; + + addr = qemu_get_be64(f); + flags = addr & ~TARGET_PAGE_MASK; + addr &= TARGET_PAGE_MASK; + + trace_ram_load_postcopy_loop((uint64_t)addr, flags); + place_needed = false; + if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) { + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + page_buffer = host; + /* + * Postcopy requires that we place whole host pages atomically. + * To make it atomic, the data is read into a temporary page + * that's moved into place later. + * The migration protocol uses, possibly smaller, target-pages + * however the source ensures it always sends all the components + * of a host page in order. + */ + page_buffer = postcopy_host_page + + ((uintptr_t)host & ~qemu_host_page_mask); + /* If all TP are zero then we can optimise the place */ + if (!((uintptr_t)host & ~qemu_host_page_mask)) { + all_zero = true; + } else { + /* not the 1st TP within the HP */ + if (host != (last_host + TARGET_PAGE_SIZE)) { + error_report("Non-sequential target page %p/%p\n", + host, last_host); + ret = -EINVAL; + break; + } + } + + + /* + * If it's the last part of a host page then we place the host + * page + */ + place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & + ~qemu_host_page_mask) == 0; + place_source = postcopy_host_page; + } + last_host = host; + + switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { + case RAM_SAVE_FLAG_COMPRESS: + ch = qemu_get_byte(f); + memset(page_buffer, ch, TARGET_PAGE_SIZE); + if (ch) { + all_zero = false; + } + break; + + case RAM_SAVE_FLAG_PAGE: + all_zero = false; + if (!place_needed || !matching_page_sizes) { + qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE); + } else { + /* Avoids the qemu_file copy during postcopy, which is + * going to do a copy later; can only do it when we + * do this read in one go (matching page sizes) + */ + qemu_get_buffer_in_place(f, (uint8_t **)&place_source, + TARGET_PAGE_SIZE); + } + break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ + break; + default: + error_report("Unknown combination of migration flags: %#x" + " (postcopy mode)", flags); + ret = -EINVAL; + } + + if (place_needed) { + /* This gets called at the last target page in the host page */ + if (all_zero) { + ret = postcopy_place_page_zero(mis, + host + TARGET_PAGE_SIZE - + qemu_host_page_size); + } else { + ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE - + qemu_host_page_size, + place_source); + } + } + if (!ret) { + ret = qemu_file_get_error(f); + } + } + + return ret; +} + static int ram_load(QEMUFile *f, void *opaque, int version_id) { int flags = 0, ret = 0; static uint64_t seq_iter; int len = 0; + /* + * If system is running in postcopy mode, page inserts to host memory must + * be atomic + */ + bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING; seq_iter++; @@ -1553,15 +2404,30 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) * critical section. */ rcu_read_lock(); - while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { + + if (postcopy_running) { + ret = ram_load_postcopy(f); + } + + while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr, total_ram_bytes; - void *host; + void *host = NULL; uint8_t ch; addr = qemu_get_be64(f); flags = addr & ~TARGET_PAGE_MASK; addr &= TARGET_PAGE_MASK; + if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE | + RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + } + switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { case RAM_SAVE_FLAG_MEM_SIZE: /* Synchronize RAM block list */ @@ -1576,23 +2442,20 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) id[len] = 0; length = qemu_get_be64(f); - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id))) { - if (length != block->used_length) { - Error *local_err = NULL; + block = qemu_ram_block_by_name(id); + if (block) { + if (length != block->used_length) { + Error *local_err = NULL; - ret = qemu_ram_resize(block->offset, length, &local_err); - if (local_err) { - error_report_err(local_err); - } + ret = qemu_ram_resize(block->offset, length, + &local_err); + if (local_err) { + error_report_err(local_err); } - ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, - block->idstr); - break; } - } - - if (!block) { + ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, + block->idstr); + } else { error_report("Unknown ramblock \"%s\", cannot " "accept migration", id); ret = -EINVAL; @@ -1601,33 +2464,17 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) total_ram_bytes -= length; } break; + case RAM_SAVE_FLAG_COMPRESS: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } ch = qemu_get_byte(f); ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); break; + case RAM_SAVE_FLAG_PAGE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } qemu_get_buffer(f, host, TARGET_PAGE_SIZE); break; - case RAM_SAVE_FLAG_COMPRESS_PAGE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Invalid RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } + case RAM_SAVE_FLAG_COMPRESS_PAGE: len = qemu_get_be32(f); if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { error_report("Invalid compressed data length: %d", len); @@ -1637,13 +2484,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) qemu_get_buffer(f, compressed_data_buf, len); decompress_data_with_multi_threads(compressed_data_buf, host, len); break; + case RAM_SAVE_FLAG_XBZRLE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } if (load_xbzrle(f, addr, host) < 0) { error_report("Failed to decompress XBZRLE page at " RAM_ADDR_FMT, addr); @@ -1677,7 +2519,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) static SaveVMHandlers savevm_ram_handlers = { .save_live_setup = ram_save_setup, .save_live_iterate = ram_save_iterate, - .save_live_complete = ram_save_complete, + .save_live_complete_postcopy = ram_save_complete, + .save_live_complete_precopy = ram_save_complete, .save_live_pending = ram_save_pending, .load_state = ram_load, .cleanup = ram_migration_cleanup, |