diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-03-02 17:39:12 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-03-02 17:39:12 +0000 |
commit | 251501a3714096f807778f6d3f03711dcdb9ce29 (patch) | |
tree | fd80eb80a4ea2eb24f07411aa8eb8b294aec4213 /migration | |
parent | c9fc677a35e6e00cdf00c7d085cbd74e0b90b2e6 (diff) | |
parent | 665414ad06aa1bc92e615db9641e58fb13d07de1 (diff) |
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170228a' into staging
Migration pull
Note: The 'postcopy: Update userfaultfd.h header' is part of
Paolo's header update and will disappear if applied after it.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
# gpg: Signature made Tue 28 Feb 2017 12:38:34 GMT
# gpg: using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7
* remotes/dgilbert/tags/pull-migration-20170228a: (27 commits)
postcopy: Add extra check for COPY function
postcopy: Add doc about hugepages and postcopy
postcopy: Check for userfault+hugepage feature
postcopy: Update userfaultfd.h header
postcopy: Allow hugepages
postcopy: Send whole huge pages
postcopy: Mask fault addresses to huge page boundary
postcopy: Load huge pages in one go
postcopy: Use temporary for placing zero huge pages
postcopy: Plumb pagesize down into place helpers
postcopy: Record largest page size
postcopy: enhance ram_block_discard_range for hugepages
exec: ram_block_discard_range
postcopy: Chunk discards for hugepages
postcopy: Transmit and compare individual page sizes
postcopy: Transmit ram size summary word
migration: fix use-after-free of to_dst_file
migration: Update docs to discourage version bumps
migration: fix id leak regression
migrate: Introduce a 'dc->vmsd' check to avoid segfault for --only-migratable
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'migration')
-rw-r--r-- | migration/migration.c | 36 | ||||
-rw-r--r-- | migration/postcopy-ram.c | 142 | ||||
-rw-r--r-- | migration/ram.c | 109 | ||||
-rw-r--r-- | migration/savevm.c | 38 | ||||
-rw-r--r-- | migration/trace-events | 2 | ||||
-rw-r--r-- | migration/vmstate.c | 97 |
6 files changed, 257 insertions, 167 deletions
diff --git a/migration/migration.c b/migration/migration.c index c6ae69d371..3dab6845b1 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -49,6 +49,10 @@ * for sending the last part */ #define DEFAULT_MIGRATE_SET_DOWNTIME 300 +/* Maximum migrate downtime set to 2000 seconds */ +#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 +#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) + /* Default compression thread count */ #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 /* Default decompression thread count, usually decompression is at @@ -383,6 +387,7 @@ static void process_incoming_migration_co(void *opaque) int ret; mis->from_src_file = f; + mis->largest_page_size = qemu_ram_pagesize_largest(); postcopy_state_set(POSTCOPY_INCOMING_NONE); migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_ACTIVE); @@ -843,10 +848,11 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp) return; } if (params->has_downtime_limit && - (params->downtime_limit < 0 || params->downtime_limit > 2000000)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "downtime_limit", - "an integer in the range of 0 to 2000000 milliseconds"); + (params->downtime_limit < 0 || + params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { + error_setg(errp, "Parameter 'downtime_limit' expects an integer in " + "the range of 0 to %d milliseconds", + MAX_MIGRATE_DOWNTIME); return; } if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) { @@ -1145,6 +1151,21 @@ void migrate_del_blocker(Error *reason) migration_blockers = g_slist_remove(migration_blockers, reason); } +int check_migratable(Object *obj, Error **err) +{ + DeviceClass *dc = DEVICE_GET_CLASS(obj); + if (only_migratable && dc->vmsd) { + if (dc->vmsd->unmigratable) { + error_setg(err, "Device %s is not migratable, but " + "--only-migratable was specified", + object_get_typename(obj)); + return -1; + } + } + + return 0; +} + void qmp_migrate_incoming(const char *uri, Error **errp) { Error *local_err = NULL; @@ -1289,6 +1310,13 @@ void qmp_migrate_set_speed(int64_t value, Error **errp) void qmp_migrate_set_downtime(double value, Error **errp) { + if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { + error_setg(errp, "Parameter 'downtime_limit' expects an integer in " + "the range of 0 to %d seconds", + MAX_MIGRATE_DOWNTIME_SECONDS); + return; + } + value *= 1000; /* Convert to milliseconds */ value = MAX(0, MIN(INT64_MAX, value)); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index a40dddbaf6..effbeb64fb 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -81,25 +81,18 @@ static bool ufd_version_check(int ufd) return false; } - return true; -} - -/* - * Check for things that postcopy won't support; returns 0 if the block - * is fine. - */ -static int check_range(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) -{ - RAMBlock *rb = qemu_ram_block_by_name(block_name); - - if (qemu_ram_pagesize(rb) > getpagesize()) { - error_report("Postcopy doesn't support large page sizes yet (%s)", - block_name); - return -E2BIG; + if (getpagesize() != ram_pagesize_summary()) { + bool have_hp = false; + /* We've got a huge page */ +#ifdef UFFD_FEATURE_MISSING_HUGETLBFS + have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS; +#endif + if (!have_hp) { + error_report("Userfault on this host does not support huge pages"); + return false; + } } - - return 0; + return true; } /* @@ -122,12 +115,6 @@ bool postcopy_ram_supported_by_host(void) goto out; } - /* Check for anything about the RAMBlocks we don't support */ - if (qemu_ram_foreach_block(check_range, NULL)) { - /* check_range will have printed its own error */ - goto out; - } - ufd = syscall(__NR_userfaultfd, O_CLOEXEC); if (ufd == -1) { error_report("%s: userfaultfd not available: %s", __func__, @@ -200,27 +187,6 @@ out: return ret; } -/** - * postcopy_ram_discard_range: Discard a range of memory. - * We can assume that if we've been called postcopy_ram_hosttest returned true. - * - * @mis: Current incoming migration state. - * @start, @length: range of memory to discard. - * - * returns: 0 on success. - */ -int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, - size_t length) -{ - trace_postcopy_ram_discard_range(start, length); - if (madvise(start, length, MADV_DONTNEED)) { - error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno)); - return -1; - } - - return 0; -} - /* * Setup an area of RAM so that it *can* be used for postcopy later; this * must be done right at the start prior to pre-copy. @@ -239,7 +205,7 @@ static int init_range(const char *block_name, void *host_addr, * - we're going to get the copy from the source anyway. * (Precopy will just overwrite this data, so doesn't need the discard) */ - if (postcopy_ram_discard_range(mis, host_addr, length)) { + if (ram_discard_range(mis, block_name, 0, length)) { return -1; } @@ -342,9 +308,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); if (mis->postcopy_tmp_page) { - munmap(mis->postcopy_tmp_page, getpagesize()); + munmap(mis->postcopy_tmp_page, mis->largest_page_size); mis->postcopy_tmp_page = NULL; } + if (mis->postcopy_tmp_zero_page) { + munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size); + mis->postcopy_tmp_zero_page = NULL; + } trace_postcopy_ram_incoming_cleanup_exit(); return 0; } @@ -408,6 +378,10 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, error_report("%s userfault register: %s", __func__, strerror(errno)); return -1; } + if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { + error_report("%s userfault: Region doesn't support COPY", __func__); + return -1; + } return 0; } @@ -420,7 +394,6 @@ static void *postcopy_ram_fault_thread(void *opaque) MigrationIncomingState *mis = opaque; struct uffd_msg msg; int ret; - size_t hostpagesize = getpagesize(); RAMBlock *rb = NULL; RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ @@ -487,7 +460,7 @@ static void *postcopy_ram_fault_thread(void *opaque) break; } - rb_offset &= ~(hostpagesize - 1); + rb_offset &= ~(qemu_ram_pagesize(rb) - 1); trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, qemu_ram_get_idstr(rb), rb_offset); @@ -499,11 +472,11 @@ static void *postcopy_ram_fault_thread(void *opaque) if (rb != last_rb) { last_rb = rb; migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), - rb_offset, hostpagesize); + rb_offset, qemu_ram_pagesize(rb)); } else { /* Save some space */ migrate_send_rp_req_pages(mis, NULL, - rb_offset, hostpagesize); + rb_offset, qemu_ram_pagesize(rb)); } } trace_postcopy_ram_fault_thread_exit(); @@ -564,13 +537,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * Place a host page (from) at (host) atomically * returns 0 on success */ -int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, + size_t pagesize) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host; copy_struct.src = (uint64_t)(uintptr_t)from; - copy_struct.len = getpagesize(); + copy_struct.len = pagesize; copy_struct.mode = 0; /* copy also acks to the kernel waking the stalled thread up @@ -580,8 +554,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) */ if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) { int e = errno; - error_report("%s: %s copy host: %p from: %p", - __func__, strerror(e), host, from); + error_report("%s: %s copy host: %p from: %p (size: %zd)", + __func__, strerror(e), host, from, pagesize); return -e; } @@ -594,23 +568,44 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) * Place a zero page at (host) atomically * returns 0 on success */ -int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, + size_t pagesize) { - struct uffdio_zeropage zero_struct; + trace_postcopy_place_page_zero(host); - zero_struct.range.start = (uint64_t)(uintptr_t)host; - zero_struct.range.len = getpagesize(); - zero_struct.mode = 0; + if (pagesize == getpagesize()) { + struct uffdio_zeropage zero_struct; + zero_struct.range.start = (uint64_t)(uintptr_t)host; + zero_struct.range.len = getpagesize(); + zero_struct.mode = 0; - if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { - int e = errno; - error_report("%s: %s zero host: %p", - __func__, strerror(e), host); + if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { + int e = errno; + error_report("%s: %s zero host: %p", + __func__, strerror(e), host); - return -e; + return -e; + } + } else { + /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */ + if (!mis->postcopy_tmp_zero_page) { + mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (mis->postcopy_tmp_zero_page == MAP_FAILED) { + int e = errno; + mis->postcopy_tmp_zero_page = NULL; + error_report("%s: %s mapping large zero page", + __func__, strerror(e)); + return -e; + } + memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); + } + return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, + pagesize); } - trace_postcopy_place_page_zero(host); return 0; } @@ -625,7 +620,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) void *postcopy_get_tmp_page(MigrationIncomingState *mis) { if (!mis->postcopy_tmp_page) { - mis->postcopy_tmp_page = mmap(NULL, getpagesize(), + mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mis->postcopy_tmp_page == MAP_FAILED) { @@ -658,13 +653,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) return -1; } -int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, - size_t length) -{ - assert(0); - return -1; -} - int postcopy_ram_prepare_discard(MigrationIncomingState *mis) { assert(0); @@ -677,13 +665,15 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return -1; } -int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, + size_t pagesize) { assert(0); return -1; } -int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, + size_t pagesize) { assert(0); return -1; diff --git a/migration/ram.c b/migration/ram.c index f289fcddd5..719425b9b8 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -600,6 +600,23 @@ static void migration_bitmap_sync_init(void) iterations_prev = 0; } +/* Returns a summary bitmap of the page sizes of all RAMBlocks; + * for VMs with just normal pages this is equivalent to the + * host page size. If it's got some huge pages then it's the OR + * of all the different page sizes. + */ +uint64_t ram_pagesize_summary(void) +{ + RAMBlock *block; + uint64_t summary = 0; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + summary |= block->page_size; + } + + return summary; +} + static void migration_bitmap_sync(void) { RAMBlock *block; @@ -1285,6 +1302,8 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f, * offset to point into the middle of a host page * in which case the remainder of the hostpage is sent. * Only dirty target pages are sent. + * Note that the host page size may be a huge page for this + * block. * * Returns: Number of pages written. * @@ -1303,6 +1322,8 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f, ram_addr_t dirty_ram_abs) { int tmppages, pages = 0; + size_t pagesize = qemu_ram_pagesize(pss->block); + do { tmppages = ram_save_target_page(ms, f, pss, last_stage, bytes_transferred, dirty_ram_abs); @@ -1313,7 +1334,7 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f, pages += tmppages; pss->offset += TARGET_PAGE_SIZE; dirty_ram_abs += TARGET_PAGE_SIZE; - } while (pss->offset & (qemu_host_page_size - 1)); + } while (pss->offset & (pagesize - 1)); /* The offset we leave with is the last one we looked at */ pss->offset -= TARGET_PAGE_SIZE; @@ -1655,12 +1676,17 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, { unsigned long *bitmap; unsigned long *unsentmap; - unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE; + unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; unsigned long first = block->offset >> TARGET_PAGE_BITS; unsigned long len = block->used_length >> TARGET_PAGE_BITS; unsigned long last = first + (len - 1); unsigned long run_start; + if (block->page_size == TARGET_PAGE_SIZE) { + /* Easy case - TPS==HPS for a non-huge page RAMBlock */ + return; + } + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; @@ -1764,7 +1790,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, * Utility for the outgoing postcopy code. * * Discard any partially sent host-page size chunks, mark any partially - * dirty host-page size chunks as all dirty. + * dirty host-page size chunks as all dirty. In this case the host-page + * is the host-page for the particular RAMBlock, i.e. it might be a huge page * * Returns: 0 on success */ @@ -1772,11 +1799,6 @@ static int postcopy_chunk_hostpages(MigrationState *ms) { struct RAMBlock *block; - if (qemu_host_page_size == TARGET_PAGE_SIZE) { - /* Easy case - TPS==HPS - nothing to be done */ - return 0; - } - /* Easiest way to make sure we don't resume in the middle of a host-page */ last_seen_block = NULL; last_sent_block = NULL; @@ -1832,7 +1854,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) return -EINVAL; } - /* Deal with TPS != HPS */ + /* Deal with TPS != HPS and huge pages */ ret = postcopy_chunk_hostpages(ms); if (ret) { rcu_read_unlock(); @@ -1872,6 +1894,8 @@ int ram_discard_range(MigrationIncomingState *mis, { int ret = -1; + trace_ram_discard_range(block_name, start, length); + rcu_read_lock(); RAMBlock *rb = qemu_ram_block_by_name(block_name); @@ -1881,27 +1905,7 @@ int ram_discard_range(MigrationIncomingState *mis, goto err; } - uint8_t *host_startaddr = rb->host + start; - - if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) { - error_report("ram_discard_range: Unaligned start address: %p", - host_startaddr); - goto err; - } - - if ((start + length) <= rb->used_length) { - uint8_t *host_endaddr = host_startaddr + length; - if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) { - error_report("ram_discard_range: Unaligned end address: %p", - host_endaddr); - goto err; - } - ret = postcopy_ram_discard_range(mis, host_startaddr, length); - } else { - error_report("ram_discard_range: Overrun block '%s' (%" PRIu64 - "/%zx/" RAM_ADDR_FMT")", - block_name, start, length, rb->used_length); - } + ret = ram_block_discard_range(rb, start, length); err: rcu_read_unlock(); @@ -2010,6 +2014,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque) qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->used_length); + if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { + qemu_put_be64(f, block->page_size); + } } rcu_read_unlock(); @@ -2387,7 +2394,7 @@ static int ram_load_postcopy(QEMUFile *f) { int flags = 0, ret = 0; bool place_needed = false; - bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE; + bool matching_page_sizes = false; MigrationIncomingState *mis = migration_incoming_get_current(); /* Temporary page that is later 'placed' */ void *postcopy_host_page = postcopy_get_tmp_page(mis); @@ -2399,6 +2406,7 @@ static int ram_load_postcopy(QEMUFile *f) void *host = NULL; void *page_buffer = NULL; void *place_source = NULL; + RAMBlock *block = NULL; uint8_t ch; addr = qemu_get_be64(f); @@ -2408,7 +2416,7 @@ static int ram_load_postcopy(QEMUFile *f) trace_ram_load_postcopy_loop((uint64_t)addr, flags); place_needed = false; if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) { - RAMBlock *block = ram_block_from_stream(f, flags); + block = ram_block_from_stream(f, flags); host = host_from_ram_block_offset(block, addr); if (!host) { @@ -2416,8 +2424,11 @@ static int ram_load_postcopy(QEMUFile *f) ret = -EINVAL; break; } + matching_page_sizes = block->page_size == TARGET_PAGE_SIZE; /* - * Postcopy requires that we place whole host pages atomically. + * Postcopy requires that we place whole host pages atomically; + * these may be huge pages for RAMBlocks that are backed by + * hugetlbfs. * To make it atomic, the data is read into a temporary page * that's moved into place later. * The migration protocol uses, possibly smaller, target-pages @@ -2425,9 +2436,9 @@ static int ram_load_postcopy(QEMUFile *f) * of a host page in order. */ page_buffer = postcopy_host_page + - ((uintptr_t)host & ~qemu_host_page_mask); + ((uintptr_t)host & (block->page_size - 1)); /* If all TP are zero then we can optimise the place */ - if (!((uintptr_t)host & ~qemu_host_page_mask)) { + if (!((uintptr_t)host & (block->page_size - 1))) { all_zero = true; } else { /* not the 1st TP within the HP */ @@ -2445,7 +2456,7 @@ static int ram_load_postcopy(QEMUFile *f) * page */ place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & - ~qemu_host_page_mask) == 0; + (block->page_size - 1)) == 0; place_source = postcopy_host_page; } last_host = host; @@ -2483,14 +2494,14 @@ static int ram_load_postcopy(QEMUFile *f) if (place_needed) { /* This gets called at the last target page in the host page */ + void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; + if (all_zero) { - ret = postcopy_place_page_zero(mis, - host + TARGET_PAGE_SIZE - - qemu_host_page_size); + ret = postcopy_place_page_zero(mis, place_dest, + block->page_size); } else { - ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE - - qemu_host_page_size, - place_source); + ret = postcopy_place_page(mis, place_dest, + place_source, block->page_size); } } if (!ret) { @@ -2511,6 +2522,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) * be atomic */ bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING; + /* ADVISE is earlier, it shows the source has the postcopy capability on */ + bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE; seq_iter++; @@ -2575,6 +2588,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) error_report_err(local_err); } } + /* For postcopy we need to check hugepage sizes match */ + if (postcopy_advised && + block->page_size != qemu_host_page_size) { + uint64_t remote_page_size = qemu_get_be64(f); + if (remote_page_size != block->page_size) { + error_report("Mismatched RAM page size %s " + "(local) %zd != %" PRId64, + id, block->page_size, + remote_page_size); + ret = -EINVAL; + } + } ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, block->idstr); } else { diff --git a/migration/savevm.c b/migration/savevm.c index 5ecd264134..3b19a4a274 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -688,6 +688,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, return -1; } + g_free(id); se->compat = g_new0(CompatEntry, 1); pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); @@ -869,7 +870,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len) void qemu_savevm_send_postcopy_advise(QEMUFile *f) { uint64_t tmp[2]; - tmp[0] = cpu_to_be64(getpagesize()); + tmp[0] = cpu_to_be64(ram_pagesize_summary()); tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits()); trace_qemu_savevm_send_postcopy_advise(); @@ -1276,6 +1277,11 @@ done: status = MIGRATION_STATUS_COMPLETED; } migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status); + + /* f is outer parameter, it should not stay in global migration state after + * this function finished */ + ms->to_dst_file = NULL; + return ret; } @@ -1346,7 +1352,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) { PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); - uint64_t remote_hps, remote_tps; + uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; trace_loadvm_postcopy_handle_advise(); if (ps != POSTCOPY_INCOMING_NONE) { @@ -1359,17 +1365,27 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) return -1; } - remote_hps = qemu_get_be64(mis->from_src_file); - if (remote_hps != getpagesize()) { + remote_pagesize_summary = qemu_get_be64(mis->from_src_file); + local_pagesize_summary = ram_pagesize_summary(); + + if (remote_pagesize_summary != local_pagesize_summary) { /* - * Some combinations of mismatch are probably possible but it gets - * a bit more complicated. In particular we need to place whole - * host pages on the dest at once, and we need to ensure that we - * handle dirtying to make sure we never end up sending part of - * a hostpage on it's own. + * This detects two potential causes of mismatch: + * a) A mismatch in host page sizes + * Some combinations of mismatch are probably possible but it gets + * a bit more complicated. In particular we need to place whole + * host pages on the dest at once, and we need to ensure that we + * handle dirtying to make sure we never end up sending part of + * a hostpage on it's own. + * b) The use of different huge page sizes on source/destination + * a more fine grain test is performed during RAM block migration + * but this test here causes a nice early clear failure, and + * also fails when passed to an older qemu that doesn't + * do huge pages. */ - error_report("Postcopy needs matching host page sizes (s=%d d=%d)", - (int)remote_hps, getpagesize()); + error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64 + " d=%" PRIx64 ")", + remote_pagesize_summary, local_pagesize_summary); return -1; } diff --git a/migration/trace-events b/migration/trace-events index fa660e35b1..7372ce2a51 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -68,6 +68,7 @@ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 migration_throttle(void) "" +ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx" ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x" ram_postcopy_send_discard_bitmap(void) "" ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx" @@ -176,7 +177,6 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) "" # migration/postcopy-ram.c postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands" postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx" -postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx" postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" diff --git a/migration/vmstate.c b/migration/vmstate.c index b4d8ae982a..78b3cd48e7 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -52,29 +52,15 @@ static int vmstate_size(void *opaque, VMStateField *field) return size; } -static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc) +static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) { - void *base_addr = opaque + field->offset; - - if (field->flags & VMS_POINTER) { - if (alloc && (field->flags & VMS_ALLOC)) { - gsize size = 0; - if (field->flags & VMS_VBUFFER) { - size = vmstate_size(opaque, field); - } else { - int n_elems = vmstate_n_elems(opaque, field); - if (n_elems) { - size = n_elems * field->size; - } - } - if (size) { - *(void **)base_addr = g_malloc(size); - } + if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { + gsize size = vmstate_size(opaque, field); + size *= vmstate_n_elems(opaque, field); + if (size) { + *(void **)ptr = g_malloc(size); } - base_addr = *(void **)base_addr; } - - return base_addr; } int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, @@ -116,21 +102,30 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, field->field_exists(opaque, version_id)) || (!field->field_exists && field->version_id <= version_id)) { - void *base_addr = vmstate_base_addr(opaque, field, true); + void *first_elem = opaque + field->offset; int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); + vmstate_handle_alloc(first_elem, field, opaque); + if (field->flags & VMS_POINTER) { + first_elem = *(void **)first_elem; + assert(first_elem || !n_elems); + } for (i = 0; i < n_elems; i++) { - void *addr = base_addr + size * i; + void *curr_elem = first_elem + size * i; if (field->flags & VMS_ARRAY_OF_POINTER) { - addr = *(void **)addr; + curr_elem = *(void **)curr_elem; } - if (field->flags & VMS_STRUCT) { - ret = vmstate_load_state(f, field->vmsd, addr, + if (!curr_elem) { + /* if null pointer check placeholder and do not follow */ + assert(field->flags & VMS_ARRAY_OF_POINTER); + ret = vmstate_info_nullptr.get(f, curr_elem, size, NULL); + } else if (field->flags & VMS_STRUCT) { + ret = vmstate_load_state(f, field->vmsd, curr_elem, field->vmsd->version_id); } else { - ret = field->info->get(f, addr, size, field); + ret = field->info->get(f, curr_elem, size, field); } if (ret >= 0) { ret = qemu_file_get_error(f); @@ -321,26 +316,34 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, while (field->name) { if (!field->field_exists || field->field_exists(opaque, vmsd->version_id)) { - void *base_addr = vmstate_base_addr(opaque, field, false); + void *first_elem = opaque + field->offset; int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); int64_t old_offset, written_bytes; QJSON *vmdesc_loop = vmdesc; trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems); + if (field->flags & VMS_POINTER) { + first_elem = *(void **)first_elem; + assert(first_elem || !n_elems); + } for (i = 0; i < n_elems; i++) { - void *addr = base_addr + size * i; + void *curr_elem = first_elem + size * i; vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems); old_offset = qemu_ftell_fast(f); - if (field->flags & VMS_ARRAY_OF_POINTER) { - addr = *(void **)addr; + assert(curr_elem); + curr_elem = *(void **)curr_elem; } - if (field->flags & VMS_STRUCT) { - vmstate_save_state(f, field->vmsd, addr, vmdesc_loop); + if (!curr_elem) { + /* if null pointer write placeholder and do not follow */ + assert(field->flags & VMS_ARRAY_OF_POINTER); + vmstate_info_nullptr.put(f, curr_elem, size, NULL, NULL); + } else if (field->flags & VMS_STRUCT) { + vmstate_save_state(f, field->vmsd, curr_elem, vmdesc_loop); } else { - field->info->put(f, addr, size, field, vmdesc_loop); + field->info->put(f, curr_elem, size, field, vmdesc_loop); } written_bytes = qemu_ftell_fast(f) - old_offset; @@ -752,6 +755,34 @@ const VMStateInfo vmstate_info_uint64 = { .put = put_uint64, }; +static int get_nullptr(QEMUFile *f, void *pv, size_t size, VMStateField *field) + +{ + if (qemu_get_byte(f) == VMS_NULLPTR_MARKER) { + return 0; + } + error_report("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); + return -EINVAL; +} + +static int put_nullptr(QEMUFile *f, void *pv, size_t size, + VMStateField *field, QJSON *vmdesc) + +{ + if (pv == NULL) { + qemu_put_byte(f, VMS_NULLPTR_MARKER); + return 0; + } + error_report("vmstate: put_nullptr must be called with pv == NULL"); + return -EINVAL; +} + +const VMStateInfo vmstate_info_nullptr = { + .name = "uint64", + .get = get_nullptr, + .put = put_nullptr, +}; + /* 64 bit unsigned int. See that the received value is the same than the one in the field */ |