aboutsummaryrefslogtreecommitdiff
path: root/migration
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-03-20 15:48:34 +0000
committerPeter Maydell <peter.maydell@linaro.org>2018-03-20 15:48:34 +0000
commited627b2ad37469eeba9e9ed5fecfe315df9ecc60 (patch)
treeaca1c6bddbaa61ffe2d029b123539fe20e6ecddc /migration
parent4aafb1b192e5d3685e94cefdce63343a86d64647 (diff)
parent1dc61e7b37d339c42ec9bd7a7eec1ef2c22f351c (diff)
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio,vhost,pci,pc: features, cleanups SRAT tables for DIMM devices new virtio net flags for speed/duplex post-copy migration support in vhost cleanups in pci Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Tue 20 Mar 2018 14:40:43 GMT # gpg: using RSA key 281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: (51 commits) postcopy shared docs libvhost-user: Claim support for postcopy postcopy: Allow shared memory vhost: Huge page align and merge vhost+postcopy: Wire up POSTCOPY_END notify vhost-user: Add VHOST_USER_POSTCOPY_END message libvhost-user: mprotect & madvises for postcopy vhost+postcopy: Call wakeups vhost+postcopy: Add vhost waker postcopy: postcopy_notify_shared_wake postcopy: helper for waking shared vhost+postcopy: Resolve client address postcopy-ram: add a stub for postcopy_request_shared_page vhost+postcopy: Helper to send requests to source for shared pages vhost+postcopy: Stash RAMBlock and offset vhost+postcopy: Send address back to qemu libvhost-user+postcopy: Register new regions with the ufd migration/ram: ramblock_recv_bitmap_test_byte_offset postcopy+vhost-user: Split set_mem_table for postcopy vhost+postcopy: Transmit 'listen' to slave ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org> # Conflicts: # scripts/update-linux-headers.sh
Diffstat (limited to 'migration')
-rw-r--r--migration/migration.c6
-rw-r--r--migration/migration.h4
-rw-r--r--migration/postcopy-ram.c360
-rw-r--r--migration/postcopy-ram.h73
-rw-r--r--migration/ram.c5
-rw-r--r--migration/ram.h1
-rw-r--r--migration/savevm.c13
-rw-r--r--migration/trace-events6
8 files changed, 402 insertions, 66 deletions
diff --git a/migration/migration.c b/migration/migration.c
index 623f373326..fc629e5965 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -155,6 +155,8 @@ MigrationIncomingState *migration_incoming_get_current(void)
if (!once) {
mis_current.state = MIGRATION_STATUS_NONE;
memset(&mis_current, 0, sizeof(MigrationIncomingState));
+ mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE,
+ sizeof(struct PostCopyFD));
qemu_mutex_init(&mis_current.rp_mutex);
qemu_event_init(&mis_current.main_thread_load_event, false);
@@ -180,6 +182,10 @@ void migration_incoming_state_destroy(void)
qemu_fclose(mis->from_src_file);
mis->from_src_file = NULL;
}
+ if (mis->postcopy_remote_fds) {
+ g_array_free(mis->postcopy_remote_fds, TRUE);
+ mis->postcopy_remote_fds = NULL;
+ }
qemu_event_reset(&mis->main_thread_load_event);
}
diff --git a/migration/migration.h b/migration/migration.h
index a79540b99c..8d2f320c48 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -49,8 +49,12 @@ struct MigrationIncomingState {
int userfault_event_fd;
QEMUFile *to_src_file;
QemuMutex rp_mutex; /* We send replies from multiple threads */
+ /* RAMBlock of last request sent to source */
+ RAMBlock *last_rb;
void *postcopy_tmp_page;
void *postcopy_tmp_zero_page;
+ /* PostCopyFD's for external userfaultfds & handlers of shared memory */
+ GArray *postcopy_remote_fds;
QEMUBH *bh;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 032abfbf1a..efd77939af 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -23,6 +23,8 @@
#include "savevm.h"
#include "postcopy-ram.h"
#include "ram.h"
+#include "qapi/error.h"
+#include "qemu/notify.h"
#include "sysemu/sysemu.h"
#include "sysemu/balloon.h"
#include "qemu/error-report.h"
@@ -45,6 +47,33 @@ struct PostcopyDiscardState {
unsigned int nsentcmds;
};
+static NotifierWithReturnList postcopy_notifier_list;
+
+void postcopy_infrastructure_init(void)
+{
+ notifier_with_return_list_init(&postcopy_notifier_list);
+}
+
+void postcopy_add_notifier(NotifierWithReturn *nn)
+{
+ notifier_with_return_list_add(&postcopy_notifier_list, nn);
+}
+
+void postcopy_remove_notifier(NotifierWithReturn *n)
+{
+ notifier_with_return_remove(n);
+}
+
+int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp)
+{
+ struct PostcopyNotifyData pnd;
+ pnd.reason = reason;
+ pnd.errp = errp;
+
+ return notifier_with_return_list_notify(&postcopy_notifier_list,
+ &pnd);
+}
+
/* Postcopy needs to detect accesses to pages that haven't yet been copied
* across, and efficiently map new pages in, the techniques for doing this
* are target OS specific.
@@ -186,12 +215,6 @@ static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
RAMBlock *rb = qemu_ram_block_by_name(block_name);
size_t pagesize = qemu_ram_pagesize(rb);
- if (qemu_ram_is_shared(rb)) {
- error_report("Postcopy on shared RAM (%s) is not yet supported",
- block_name);
- return 1;
- }
-
if (length % pagesize) {
error_report("Postcopy requires RAM blocks to be a page size multiple,"
" block %s is 0x" RAM_ADDR_FMT " bytes with a "
@@ -215,6 +238,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
struct uffdio_register reg_struct;
struct uffdio_range range_struct;
uint64_t feature_mask;
+ Error *local_err = NULL;
if (qemu_target_page_size() > pagesize) {
error_report("Target page size bigger than host page size");
@@ -228,6 +252,12 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
goto out;
}
+ /* Give devices a chance to object */
+ if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) {
+ error_report_err(local_err);
+ goto out;
+ }
+
/* Version and features check */
if (!ufd_check_and_apply(ufd, mis)) {
goto out;
@@ -377,6 +407,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
trace_postcopy_ram_incoming_cleanup_entry();
if (mis->have_fault_thread) {
+ Error *local_err = NULL;
+
+ if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+
if (qemu_ram_foreach_block(cleanup_range, mis)) {
return -1;
}
@@ -481,10 +518,63 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
error_report("%s userfault: Region doesn't support COPY", __func__);
return -1;
}
+ if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) {
+ RAMBlock *rb = qemu_ram_block_by_name(block_name);
+ qemu_ram_set_uf_zeroable(rb);
+ }
return 0;
}
+int postcopy_wake_shared(struct PostCopyFD *pcfd,
+ uint64_t client_addr,
+ RAMBlock *rb)
+{
+ size_t pagesize = qemu_ram_pagesize(rb);
+ struct uffdio_range range;
+ int ret;
+ trace_postcopy_wake_shared(client_addr, qemu_ram_get_idstr(rb));
+ range.start = client_addr & ~(pagesize - 1);
+ range.len = pagesize;
+ ret = ioctl(pcfd->fd, UFFDIO_WAKE, &range);
+ if (ret) {
+ error_report("%s: Failed to wake: %zx in %s (%s)",
+ __func__, (size_t)client_addr, qemu_ram_get_idstr(rb),
+ strerror(errno));
+ }
+ return ret;
+}
+
+/*
+ * Callback from shared fault handlers to ask for a page,
+ * the page must be specified by a RAMBlock and an offset in that rb
+ * Note: Only for use by shared fault handlers (in fault thread)
+ */
+int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
+ uint64_t client_addr, uint64_t rb_offset)
+{
+ size_t pagesize = qemu_ram_pagesize(rb);
+ uint64_t aligned_rbo = rb_offset & ~(pagesize - 1);
+ MigrationIncomingState *mis = migration_incoming_get_current();
+
+ trace_postcopy_request_shared_page(pcfd->idstr, qemu_ram_get_idstr(rb),
+ rb_offset);
+ if (ramblock_recv_bitmap_test_byte_offset(rb, aligned_rbo)) {
+ trace_postcopy_request_shared_page_present(pcfd->idstr,
+ qemu_ram_get_idstr(rb), rb_offset);
+ return postcopy_wake_shared(pcfd, client_addr, rb);
+ }
+ if (rb != mis->last_rb) {
+ mis->last_rb = rb;
+ migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+ aligned_rbo, pagesize);
+ } else {
+ /* Save some space */
+ migrate_send_rp_req_pages(mis, NULL, aligned_rbo, pagesize);
+ }
+ return 0;
+}
+
/*
* Handle faults detected by the USERFAULT markings
*/
@@ -493,29 +583,44 @@ static void *postcopy_ram_fault_thread(void *opaque)
MigrationIncomingState *mis = opaque;
struct uffd_msg msg;
int ret;
+ size_t index;
RAMBlock *rb = NULL;
- RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
trace_postcopy_ram_fault_thread_entry();
+ mis->last_rb = NULL; /* last RAMBlock we sent part of */
qemu_sem_post(&mis->fault_thread_sem);
+ struct pollfd *pfd;
+ size_t pfd_len = 2 + mis->postcopy_remote_fds->len;
+
+ pfd = g_new0(struct pollfd, pfd_len);
+
+ pfd[0].fd = mis->userfault_fd;
+ pfd[0].events = POLLIN;
+ pfd[1].fd = mis->userfault_event_fd;
+ pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
+ trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd);
+ for (index = 0; index < mis->postcopy_remote_fds->len; index++) {
+ struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds,
+ struct PostCopyFD, index);
+ pfd[2 + index].fd = pcfd->fd;
+ pfd[2 + index].events = POLLIN;
+ trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr,
+ pcfd->fd);
+ }
+
while (true) {
ram_addr_t rb_offset;
- struct pollfd pfd[2];
+ int poll_result;
/*
* We're mainly waiting for the kernel to give us a faulting HVA,
* however we can be told to quit via userfault_quit_fd which is
* an eventfd
*/
- pfd[0].fd = mis->userfault_fd;
- pfd[0].events = POLLIN;
- pfd[0].revents = 0;
- pfd[1].fd = mis->userfault_event_fd;
- pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
- pfd[1].revents = 0;
-
- if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
+
+ poll_result = poll(pfd, pfd_len, -1 /* Wait forever */);
+ if (poll_result == -1) {
error_report("%s: userfault poll: %s", __func__, strerror(errno));
break;
}
@@ -535,57 +640,117 @@ static void *postcopy_ram_fault_thread(void *opaque)
}
}
- ret = read(mis->userfault_fd, &msg, sizeof(msg));
- if (ret != sizeof(msg)) {
- if (errno == EAGAIN) {
- /*
- * if a wake up happens on the other thread just after
- * the poll, there is nothing to read.
- */
- continue;
+ if (pfd[0].revents) {
+ poll_result--;
+ ret = read(mis->userfault_fd, &msg, sizeof(msg));
+ if (ret != sizeof(msg)) {
+ if (errno == EAGAIN) {
+ /*
+ * if a wake up happens on the other thread just after
+ * the poll, there is nothing to read.
+ */
+ continue;
+ }
+ if (ret < 0) {
+ error_report("%s: Failed to read full userfault "
+ "message: %s",
+ __func__, strerror(errno));
+ break;
+ } else {
+ error_report("%s: Read %d bytes from userfaultfd "
+ "expected %zd",
+ __func__, ret, sizeof(msg));
+ break; /* Lost alignment, don't know what we'd read next */
+ }
}
- if (ret < 0) {
- error_report("%s: Failed to read full userfault message: %s",
- __func__, strerror(errno));
- break;
- } else {
- error_report("%s: Read %d bytes from userfaultfd expected %zd",
- __func__, ret, sizeof(msg));
- break; /* Lost alignment, don't know what we'd read next */
+ if (msg.event != UFFD_EVENT_PAGEFAULT) {
+ error_report("%s: Read unexpected event %ud from userfaultfd",
+ __func__, msg.event);
+ continue; /* It's not a page fault, shouldn't happen */
}
- }
- if (msg.event != UFFD_EVENT_PAGEFAULT) {
- error_report("%s: Read unexpected event %ud from userfaultfd",
- __func__, msg.event);
- continue; /* It's not a page fault, shouldn't happen */
- }
- rb = qemu_ram_block_from_host(
- (void *)(uintptr_t)msg.arg.pagefault.address,
- true, &rb_offset);
- if (!rb) {
- error_report("postcopy_ram_fault_thread: Fault outside guest: %"
- PRIx64, (uint64_t)msg.arg.pagefault.address);
- break;
- }
+ rb = qemu_ram_block_from_host(
+ (void *)(uintptr_t)msg.arg.pagefault.address,
+ true, &rb_offset);
+ if (!rb) {
+ error_report("postcopy_ram_fault_thread: Fault outside guest: %"
+ PRIx64, (uint64_t)msg.arg.pagefault.address);
+ break;
+ }
- rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
- trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
+ rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
+ trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
qemu_ram_get_idstr(rb),
rb_offset);
+ /*
+ * Send the request to the source - we want to request one
+ * of our host page sizes (which is >= TPS)
+ */
+ if (rb != mis->last_rb) {
+ mis->last_rb = rb;
+ migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+ rb_offset, qemu_ram_pagesize(rb));
+ } else {
+ /* Save some space */
+ migrate_send_rp_req_pages(mis, NULL,
+ rb_offset, qemu_ram_pagesize(rb));
+ }
+ }
- /*
- * Send the request to the source - we want to request one
- * of our host page sizes (which is >= TPS)
- */
- if (rb != last_rb) {
- last_rb = rb;
- migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
- rb_offset, qemu_ram_pagesize(rb));
- } else {
- /* Save some space */
- migrate_send_rp_req_pages(mis, NULL,
- rb_offset, qemu_ram_pagesize(rb));
+ /* Now handle any requests from external processes on shared memory */
+ /* TODO: May need to handle devices deregistering during postcopy */
+ for (index = 2; index < pfd_len && poll_result; index++) {
+ if (pfd[index].revents) {
+ struct PostCopyFD *pcfd =
+ &g_array_index(mis->postcopy_remote_fds,
+ struct PostCopyFD, index - 2);
+
+ poll_result--;
+ if (pfd[index].revents & POLLERR) {
+ error_report("%s: POLLERR on poll %zd fd=%d",
+ __func__, index, pcfd->fd);
+ pfd[index].events = 0;
+ continue;
+ }
+
+ ret = read(pcfd->fd, &msg, sizeof(msg));
+ if (ret != sizeof(msg)) {
+ if (errno == EAGAIN) {
+ /*
+ * if a wake up happens on the other thread just after
+ * the poll, there is nothing to read.
+ */
+ continue;
+ }
+ if (ret < 0) {
+ error_report("%s: Failed to read full userfault "
+ "message: %s (shared) revents=%d",
+ __func__, strerror(errno),
+ pfd[index].revents);
+ /*TODO: Could just disable this sharer */
+ break;
+ } else {
+ error_report("%s: Read %d bytes from userfaultfd "
+ "expected %zd (shared)",
+ __func__, ret, sizeof(msg));
+ /*TODO: Could just disable this sharer */
+ break; /*Lost alignment,don't know what we'd read next*/
+ }
+ }
+ if (msg.event != UFFD_EVENT_PAGEFAULT) {
+ error_report("%s: Read unexpected event %ud "
+ "from userfaultfd (shared)",
+ __func__, msg.event);
+ continue; /* It's not a page fault, shouldn't happen */
+ }
+ /* Call the device handler registered with us */
+ ret = pcfd->handler(pcfd, &msg);
+ if (ret) {
+ error_report("%s: Failed to resolve shared fault on %zd/%s",
+ __func__, index, pcfd->idstr);
+ /* TODO: Fail? Disable this sharer? */
+ }
+ }
}
}
trace_postcopy_ram_fault_thread_exit();
@@ -667,6 +832,22 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
return ret;
}
+int postcopy_notify_shared_wake(RAMBlock *rb, uint64_t offset)
+{
+ int i;
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ GArray *pcrfds = mis->postcopy_remote_fds;
+
+ for (i = 0; i < pcrfds->len; i++) {
+ struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i);
+ int ret = cur->waker(cur, rb, offset);
+ if (ret) {
+ return ret;
+ }
+ }
+ return 0;
+}
+
/*
* Place a host page (from) at (host) atomically
* returns 0 on success
@@ -690,7 +871,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
}
trace_postcopy_place_page(host);
- return 0;
+ return postcopy_notify_shared_wake(rb,
+ qemu_ram_block_host_offset(rb, host));
}
/*
@@ -700,17 +882,23 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
RAMBlock *rb)
{
+ size_t pagesize = qemu_ram_pagesize(rb);
trace_postcopy_place_page_zero(host);
- if (qemu_ram_pagesize(rb) == getpagesize()) {
- if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(),
- rb)) {
+ /* Normal RAMBlocks can zero a page using UFFDIO_ZEROPAGE
+ * but it's not available for everything (e.g. hugetlbpages)
+ */
+ if (qemu_ram_is_uf_zeroable(rb)) {
+ if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, pagesize, rb)) {
int e = errno;
error_report("%s: %s zero host: %p",
__func__, strerror(e), host);
return -e;
}
+ return postcopy_notify_shared_wake(rb,
+ qemu_ram_block_host_offset(rb,
+ host));
} else {
/* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
if (!mis->postcopy_tmp_zero_page) {
@@ -730,8 +918,6 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
rb);
}
-
- return 0;
}
/*
@@ -784,6 +970,13 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
return -1;
}
+int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
+ uint64_t client_addr, uint64_t rb_offset)
+{
+ assert(0);
+ return -1;
+}
+
int postcopy_ram_enable_notify(MigrationIncomingState *mis)
{
assert(0);
@@ -810,6 +1003,13 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis)
return NULL;
}
+int postcopy_wake_shared(struct PostCopyFD *pcfd,
+ uint64_t client_addr,
+ RAMBlock *rb)
+{
+ assert(0);
+ return -1;
+}
#endif
/* ------------------------------------------------------------------------- */
@@ -927,3 +1127,31 @@ PostcopyState postcopy_state_set(PostcopyState new_state)
{
return atomic_xchg(&incoming_postcopy_state, new_state);
}
+
+/* Register a handler for external shared memory postcopy
+ * called on the destination.
+ */
+void postcopy_register_shared_ufd(struct PostCopyFD *pcfd)
+{
+ MigrationIncomingState *mis = migration_incoming_get_current();
+
+ mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds,
+ *pcfd);
+}
+
+/* Unregister a handler for external shared memory postcopy
+ */
+void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
+{
+ guint i;
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ GArray *pcrfds = mis->postcopy_remote_fds;
+
+ for (i = 0; i < pcrfds->len; i++) {
+ struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i);
+ if (cur->fd == pcfd->fd) {
+ mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i);
+ return;
+ }
+ }
+}
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 14f6cadcbd..d900d9c34f 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -116,4 +116,77 @@ PostcopyState postcopy_state_set(PostcopyState new_state);
void postcopy_fault_thread_notify(MigrationIncomingState *mis);
+/*
+ * To be called once at the start before any device initialisation
+ */
+void postcopy_infrastructure_init(void);
+
+/* Add a notifier to a list to be called when checking whether the devices
+ * can support postcopy.
+ * It's data is a *PostcopyNotifyData
+ * It should return 0 if OK, or a negative value on failure.
+ * On failure it must set the data->errp to an error.
+ *
+ */
+enum PostcopyNotifyReason {
+ POSTCOPY_NOTIFY_PROBE = 0,
+ POSTCOPY_NOTIFY_INBOUND_ADVISE,
+ POSTCOPY_NOTIFY_INBOUND_LISTEN,
+ POSTCOPY_NOTIFY_INBOUND_END,
+};
+
+struct PostcopyNotifyData {
+ enum PostcopyNotifyReason reason;
+ Error **errp;
+};
+
+void postcopy_add_notifier(NotifierWithReturn *nn);
+void postcopy_remove_notifier(NotifierWithReturn *n);
+/* Call the notifier list set by postcopy_add_start_notifier */
+int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp);
+
+struct PostCopyFD;
+
+/* ufd is a pointer to the struct uffd_msg *TODO: more Portable! */
+typedef int (*pcfdhandler)(struct PostCopyFD *pcfd, void *ufd);
+/* Notification to wake, either on place or on reception of
+ * a fault on something that's already arrived (race)
+ */
+typedef int (*pcfdwake)(struct PostCopyFD *pcfd, RAMBlock *rb, uint64_t offset);
+
+struct PostCopyFD {
+ int fd;
+ /* Data to pass to handler */
+ void *data;
+ /* Handler to be called whenever we get a poll event */
+ pcfdhandler handler;
+ /* Notification to wake shared client */
+ pcfdwake waker;
+ /* A string to use in error messages */
+ const char *idstr;
+};
+
+/* Register a userfaultfd owned by an external process for
+ * shared memory.
+ */
+void postcopy_register_shared_ufd(struct PostCopyFD *pcfd);
+void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd);
+/* Call each of the shared 'waker's registerd telling them of
+ * availability of a block.
+ */
+int postcopy_notify_shared_wake(RAMBlock *rb, uint64_t offset);
+/* postcopy_wake_shared: Notify a client ufd that a page is available
+ *
+ * Returns 0 on success
+ *
+ * @pcfd: Structure with fd, handler and name as above
+ * @client_addr: Address in the client program, not QEMU
+ * @rb: The RAMBlock the page is in
+ */
+int postcopy_wake_shared(struct PostCopyFD *pcfd, uint64_t client_addr,
+ RAMBlock *rb);
+/* Callback from shared fault handlers to ask for a page */
+int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
+ uint64_t client_addr, uint64_t offset);
+
#endif
diff --git a/migration/ram.c b/migration/ram.c
index 590fceb7e9..0e90efa092 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -169,6 +169,11 @@ int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
rb->receivedmap);
}
+bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
+{
+ return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
+}
+
void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
{
set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
diff --git a/migration/ram.h b/migration/ram.h
index 53f0021c51..5030be110a 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -60,6 +60,7 @@ int ram_postcopy_incoming_init(MigrationIncomingState *mis);
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr);
+bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset);
void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr);
void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr);
diff --git a/migration/savevm.c b/migration/savevm.c
index f417cef7d5..e2be02afe4 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1395,6 +1395,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
{
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
+ Error *local_err = NULL;
trace_loadvm_postcopy_handle_advise();
if (ps != POSTCOPY_INCOMING_NONE) {
@@ -1460,6 +1461,11 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
return -1;
}
+ if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+
if (ram_postcopy_incoming_init(mis)) {
return -1;
}
@@ -1621,6 +1627,8 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
{
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
trace_loadvm_postcopy_handle_listen();
+ Error *local_err = NULL;
+
if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
return -1;
@@ -1646,6 +1654,11 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
}
}
+ if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+
if (mis->have_listen_thread) {
error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
return -1;
diff --git a/migration/trace-events b/migration/trace-events
index 314e1be6bc..a180d7b008 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -190,12 +190,18 @@ postcopy_place_page_zero(void *host_addr) "host=%p"
postcopy_ram_enable_notify(void) ""
postcopy_ram_fault_thread_entry(void) ""
postcopy_ram_fault_thread_exit(void) ""
+postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d"
+postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d"
postcopy_ram_fault_thread_quit(void) ""
postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx"
postcopy_ram_incoming_cleanup_closeuf(void) ""
postcopy_ram_incoming_cleanup_entry(void) ""
postcopy_ram_incoming_cleanup_exit(void) ""
postcopy_ram_incoming_cleanup_join(void) ""
+postcopy_request_shared_page(const char *sharer, const char *rb, uint64_t rb_offset) "for %s in %s offset 0x%"PRIx64
+postcopy_request_shared_page_present(const char *sharer, const char *rb, uint64_t rb_offset) "%s already %s offset 0x%"PRIx64
+postcopy_wake_shared(uint64_t client_addr, const char *rb) "at 0x%"PRIx64" in %s"
+
save_xbzrle_page_skipping(void) ""
save_xbzrle_page_overflow(void) ""
ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRIu64 " milliseconds, %d iterations"