From 2b0ce0797d6bfb13ebefe010da86abced0b7a9b3 Mon Sep 17 00:00:00 2001 From: "Michael R. Hines" Date: Tue, 25 Jun 2013 21:35:28 -0400 Subject: rdma: introduce qemu_update_position() RDMA writes happen asynchronously, and thus the performance accounting also needs to be able to occur asynchronously. This allows anybody to call into savevm.c to update both f->pos as well as into arch_init.c to update the acct_info structure with up-to-date values when the RDMA transfer actually completes. Reviewed-by: Juan Quintela Tested-by: Chegu Vinod Tested-by: Michael R. Hines Signed-off-by: Michael R. Hines Signed-off-by: Juan Quintela --- include/migration/migration.h | 2 ++ include/migration/qemu-file.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/migration') diff --git a/include/migration/migration.h b/include/migration/migration.h index e2acec64c0..0be28a288a 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -92,6 +92,8 @@ uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_transferred(void); uint64_t ram_bytes_total(void); +void acct_update_position(QEMUFile *f, size_t size, bool zero); + extern SaveVMHandlers savevm_ram_handlers; uint64_t dup_mig_bytes_transferred(void); diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 7519464192..8fab0dd752 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -93,6 +93,7 @@ void qemu_put_be32(QEMUFile *f, unsigned int v); void qemu_put_be64(QEMUFile *f, uint64_t v); int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size); int qemu_get_byte(QEMUFile *f); +void qemu_update_position(QEMUFile *f, size_t size); static inline unsigned int qemu_get_ubyte(QEMUFile *f) { -- cgit v1.2.3 From 7e114f8cf24a01893226e3a4d22a288125515cfd Mon Sep 17 00:00:00 2001 From: "Michael R. Hines" Date: Tue, 25 Jun 2013 21:35:30 -0400 Subject: rdma: export throughput w/ MigrationStats QMP This exposes throughput (in megabits/sec) through QMP. Reviewed-by: Juan Quintela Reviewed-by: Paolo Bonzini Reviewed-by: Chegu Vinod Tested-by: Chegu Vinod Tested-by: Michael R. Hines Signed-off-by: Michael R. Hines Signed-off-by: Juan Quintela --- include/migration/migration.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/migration') diff --git a/include/migration/migration.h b/include/migration/migration.h index 0be28a288a..535e844880 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -40,6 +40,7 @@ struct MigrationState int state; MigrationParams params; + double mbps; int64_t total_time; int64_t downtime; int64_t expected_downtime; -- cgit v1.2.3 From bc1256f7f187cc7d491bfe3861249a60873adbbc Mon Sep 17 00:00:00 2001 From: "Michael R. Hines" Date: Tue, 25 Jun 2013 21:35:31 -0400 Subject: rdma: introduce qemu_file_mode_is_not_valid() QEMUFileRDMA also has read and write modes. This function is now shared to reduce code duplication. Reviewed-by: Juan Quintela Reviewed-by: Paolo Bonzini Reviewed-by: Chegu Vinod Tested-by: Chegu Vinod Tested-by: Michael R. Hines Signed-off-by: Michael R. Hines Signed-off-by: Juan Quintela --- include/migration/qemu-file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/migration') diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 8fab0dd752..dd3fd5155e 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -80,6 +80,7 @@ void qemu_put_byte(QEMUFile *f, int v); * The buffer should be available till it is sent asynchronously. */ void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size); +bool qemu_file_mode_is_not_valid(const char *mode); static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v) { -- cgit v1.2.3 From be903b2ae7ca750bde2549432c5536087436cf49 Mon Sep 17 00:00:00 2001 From: "Michael R. Hines" Date: Tue, 25 Jun 2013 21:35:32 -0400 Subject: rdma: export qemu_fflush() RDMA uses this to flush the control channel before sending its own message to handle page registrations. Reviewed-by: Juan Quintela Reviewed-by: Paolo Bonzini Reviewed-by: Chegu Vinod Tested-by: Chegu Vinod Tested-by: Michael R. Hines Signed-off-by: Michael R. Hines Signed-off-by: Juan Quintela --- include/migration/qemu-file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/migration') diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index dd3fd5155e..37d1604065 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -112,6 +112,7 @@ void qemu_file_reset_rate_limit(QEMUFile *f); void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); int64_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_get_error(QEMUFile *f); +void qemu_fflush(QEMUFile *f); static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv) { -- cgit v1.2.3 From 43487c678d6e4e7182bfa70d2bc75422578782aa Mon Sep 17 00:00:00 2001 From: "Michael R. Hines" Date: Tue, 25 Jun 2013 21:35:35 -0400 Subject: rdma: new QEMUFileOps hooks These are the prototypes and implementation of new hooks that RDMA takes advantage of to perform dynamic page registration. An optional hook is also introduced for a custom function to be able to override the default save_page function. Also included are the prototypes and accessor methods used by arch_init.c which invoke funtions inside savevm.c to call out to the hooks that may or may not have been overridden inside of QEMUFileOps. Reviewed-by: Juan Quintela Reviewed-by: Paolo Bonzini Reviewed-by: Chegu Vinod Tested-by: Chegu Vinod Tested-by: Michael R. Hines Signed-off-by: Michael R. Hines Signed-off-by: Juan Quintela --- include/migration/migration.h | 20 ++++++++++++++++++++ include/migration/qemu-file.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'include/migration') diff --git a/include/migration/migration.h b/include/migration/migration.h index 535e844880..3ddd64ffcb 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -21,6 +21,7 @@ #include "qapi/error.h" #include "migration/vmstate.h" #include "qapi-types.h" +#include "exec/cpu-common.h" struct MigrationParams { bool blk; @@ -130,4 +131,23 @@ int migrate_use_xbzrle(void); int64_t migrate_xbzrle_cache_size(void); int64_t xbzrle_cache_resize(int64_t new_size); + +void ram_control_before_iterate(QEMUFile *f, uint64_t flags); +void ram_control_after_iterate(QEMUFile *f, uint64_t flags); +void ram_control_load_hook(QEMUFile *f, uint64_t flags); + +/* Whenever this is found in the data stream, the flags + * will be passed to ram_control_load_hook in the incoming-migration + * side. This lets before_ram_iterate/after_ram_iterate add + * transport-specific sections to the RAM migration data. + */ +#define RAM_SAVE_FLAG_HOOK 0x80 + +#define RAM_SAVE_CONTROL_NOT_SUPP -1000 +#define RAM_SAVE_CONTROL_DELAYED -2000 + +size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, + ram_addr_t offset, size_t size, + int *bytes_sent); + #endif diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 37d1604065..0f757fbeb6 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -23,6 +23,7 @@ */ #ifndef QEMU_FILE_H #define QEMU_FILE_H 1 +#include "exec/cpu-common.h" /* This function writes a chunk of data to a file at the given position. * The pos argument can be ignored if the file is only being used for @@ -57,12 +58,40 @@ typedef int (QEMUFileGetFD)(void *opaque); typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov, int iovcnt, int64_t pos); +/* + * This function provides hooks around different + * stages of RAM migration. + */ +typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags); + +/* + * Constants used by ram_control_* hooks + */ +#define RAM_CONTROL_SETUP 0 +#define RAM_CONTROL_ROUND 1 +#define RAM_CONTROL_HOOK 2 +#define RAM_CONTROL_FINISH 3 + +/* + * This function allows override of where the RAM page + * is saved (such as RDMA, for example.) + */ +typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque, + ram_addr_t block_offset, + ram_addr_t offset, + size_t size, + int *bytes_sent); + typedef struct QEMUFileOps { QEMUFilePutBufferFunc *put_buffer; QEMUFileGetBufferFunc *get_buffer; QEMUFileCloseFunc *close; QEMUFileGetFD *get_fd; QEMUFileWritevBufferFunc *writev_buffer; + QEMURamHookFunc *before_ram_iterate; + QEMURamHookFunc *after_ram_iterate; + QEMURamHookFunc *hook_ram_load; + QEMURamSaveFunc *save_page; } QEMUFileOps; QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops); -- cgit v1.2.3 From 60d9222c8f50c3e5dd3df9ee84ddd1d1c4b35389 Mon Sep 17 00:00:00 2001 From: "Michael R. Hines" Date: Tue, 25 Jun 2013 21:35:36 -0400 Subject: rdma: introduce capability x-rdma-pin-all This capability allows you to disable dynamic chunk registration for better throughput on high-performance links. For example, using an 8GB RAM virtual machine with all 8GB of memory in active use and the VM itself is completely idle using a 40 gbps infiniband link: 1. x-rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps 2. x-rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps These numbers would of course scale up to whatever size virtual machine you have to migrate using RDMA. Enabling this feature does *not* have any measurable affect on migration *downtime*. This is because, without this feature, all of the memory will have already been registered already in advance during the bulk round and does not need to be re-registered during the successive iteration rounds. Reviewed-by: Juan Quintela Reviewed-by: Paolo Bonzini Reviewed-by: Chegu Vinod Reviewed-by: Eric Blake Tested-by: Chegu Vinod Tested-by: Michael R. Hines Signed-off-by: Michael R. Hines Signed-off-by: Juan Quintela --- include/migration/migration.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/migration') diff --git a/include/migration/migration.h b/include/migration/migration.h index 3ddd64ffcb..f0640e0eec 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -123,6 +123,8 @@ void migrate_add_blocker(Error *reason); */ void migrate_del_blocker(Error *reason); +bool migrate_rdma_pin_all(void); + int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, uint8_t *dst, int dlen); int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); -- cgit v1.2.3