diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2012-08-13 16:02:11 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2012-08-13 16:02:11 -0500 |
commit | ac839ccd8c30fe5706cc43f00e056049d6e55377 (patch) | |
tree | d2e37ee8bd7750ce1dd01c4dd6cec21a87551682 | |
parent | 6a1f9d0c1fbf186d3d68cb2af43d047637c93072 (diff) | |
parent | dd051c7217eae04191169ac62f6ffb7531c8da32 (diff) |
Merge remote-tracking branch 'quintela/migration-next-20120808' into staging
* quintela/migration-next-20120808:
Restart optimization on stage3 update version
Add XBZRLE statistics
Add migration accounting for normal and duplicate pages
Change total_time to total-time in MigrationStats
Add migrate_set_cache_size command
Add XBZRLE to ram_save_block and ram_save_live
Add xbzrle_encode_buffer and xbzrle_decode_buffer functions
Add uleb encoding/decoding functions
Add cache handling functions
Add XBZRLE documentation
Add migrate-set-capabilities
Add migration capabilities
-rw-r--r-- | Makefile.objs | 1 | ||||
-rw-r--r-- | arch_init.c | 246 | ||||
-rw-r--r-- | cutils.c | 42 | ||||
-rw-r--r-- | docs/xbzrle.txt | 128 | ||||
-rw-r--r-- | hmp-commands.hx | 38 | ||||
-rw-r--r-- | hmp.c | 103 | ||||
-rw-r--r-- | hmp.h | 4 | ||||
-rw-r--r-- | include/qemu/page_cache.h | 79 | ||||
-rw-r--r-- | migration.c | 112 | ||||
-rw-r--r-- | migration.h | 21 | ||||
-rw-r--r-- | monitor.c | 14 | ||||
-rw-r--r-- | page_cache.c | 218 | ||||
-rw-r--r-- | qapi-schema.json | 119 | ||||
-rw-r--r-- | qemu-common.h | 21 | ||||
-rw-r--r-- | qmp-commands.hx | 159 | ||||
-rw-r--r-- | savevm.c | 159 |
16 files changed, 1451 insertions, 13 deletions
diff --git a/Makefile.objs b/Makefile.objs index 5ebbcfa171..e0fb69b202 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -77,6 +77,7 @@ common-obj-y += qemu-char.o #aio.o common-obj-y += block-migration.o iohandler.o common-obj-y += pflib.o common-obj-y += bitmap.o bitops.o +common-obj-y += page_cache.o common-obj-$(CONFIG_POSIX) += migration-exec.o migration-unix.o migration-fd.o common-obj-$(CONFIG_WIN32) += version.o diff --git a/arch_init.c b/arch_init.c index 7b65c4888b..9b46bfcaa5 100644 --- a/arch_init.c +++ b/arch_init.c @@ -43,6 +43,7 @@ #include "hw/smbios.h" #include "exec-memory.h" #include "hw/pcspk.h" +#include "qemu/page_cache.h" #ifdef DEBUG_ARCH_INIT #define DPRINTF(fmt, ...) \ @@ -106,6 +107,7 @@ const uint32_t arch_type = QEMU_ARCH; #define RAM_SAVE_FLAG_PAGE 0x08 #define RAM_SAVE_FLAG_EOS 0x10 #define RAM_SAVE_FLAG_CONTINUE 0x20 +#define RAM_SAVE_FLAG_XBZRLE 0x40 #ifdef __ALTIVEC__ #include <altivec.h> @@ -173,6 +175,92 @@ static int is_dup_page(uint8_t *page) return 1; } +/* struct contains XBZRLE cache and a static page + used by the compression */ +static struct { + /* buffer used for XBZRLE encoding */ + uint8_t *encoded_buf; + /* buffer for storing page content */ + uint8_t *current_buf; + /* buffer used for XBZRLE decoding */ + uint8_t *decoded_buf; + /* Cache for XBZRLE */ + PageCache *cache; +} XBZRLE = { + .encoded_buf = NULL, + .current_buf = NULL, + .decoded_buf = NULL, + .cache = NULL, +}; + + +int64_t xbzrle_cache_resize(int64_t new_size) +{ + if (XBZRLE.cache != NULL) { + return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * + TARGET_PAGE_SIZE; + } + return pow2floor(new_size); +} + +/* accounting for migration statistics */ +typedef struct AccountingInfo { + uint64_t dup_pages; + uint64_t norm_pages; + uint64_t iterations; + uint64_t xbzrle_bytes; + uint64_t xbzrle_pages; + uint64_t xbzrle_cache_miss; + uint64_t xbzrle_overflows; +} AccountingInfo; + +static AccountingInfo acct_info; + +static void acct_clear(void) +{ + memset(&acct_info, 0, sizeof(acct_info)); +} + +uint64_t dup_mig_bytes_transferred(void) +{ + return acct_info.dup_pages * TARGET_PAGE_SIZE; +} + +uint64_t dup_mig_pages_transferred(void) +{ + return acct_info.dup_pages; +} + +uint64_t norm_mig_bytes_transferred(void) +{ + return acct_info.norm_pages * TARGET_PAGE_SIZE; +} + +uint64_t norm_mig_pages_transferred(void) +{ + return acct_info.norm_pages; +} + +uint64_t xbzrle_mig_bytes_transferred(void) +{ + return acct_info.xbzrle_bytes; +} + +uint64_t xbzrle_mig_pages_transferred(void) +{ + return acct_info.xbzrle_pages; +} + +uint64_t xbzrle_mig_pages_cache_miss(void) +{ + return acct_info.xbzrle_cache_miss; +} + +uint64_t xbzrle_mig_pages_overflow(void) +{ + return acct_info.xbzrle_overflows; +} + static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, int cont, int flag) { @@ -185,6 +273,61 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, } +#define ENCODING_FLAG_XBZRLE 0x1 + +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, + ram_addr_t current_addr, RAMBlock *block, + ram_addr_t offset, int cont, bool last_stage) +{ + int encoded_len = 0, bytes_sent = -1; + uint8_t *prev_cached_page; + + if (!cache_is_cached(XBZRLE.cache, current_addr)) { + if (!last_stage) { + cache_insert(XBZRLE.cache, current_addr, + g_memdup(current_data, TARGET_PAGE_SIZE)); + } + acct_info.xbzrle_cache_miss++; + return -1; + } + + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); + + /* save current buffer into memory */ + memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); + + /* XBZRLE encoding (if there is no overflow) */ + encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); + if (encoded_len == 0) { + DPRINTF("Skipping unmodified page\n"); + return 0; + } else if (encoded_len == -1) { + DPRINTF("Overflow\n"); + acct_info.xbzrle_overflows++; + /* update data in the cache */ + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); + return -1; + } + + /* we need to update the data in the cache, in order to get the same data */ + if (!last_stage) { + memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); + } + + /* Send XBZRLE based compressed page */ + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); + qemu_put_byte(f, ENCODING_FLAG_XBZRLE); + qemu_put_be16(f, encoded_len); + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); + bytes_sent = encoded_len + 1 + 2; + acct_info.xbzrle_pages++; + acct_info.xbzrle_bytes += bytes_sent; + + return bytes_sent; +} + static RAMBlock *last_block; static ram_addr_t last_offset; @@ -196,12 +339,13 @@ static ram_addr_t last_offset; * n: the amount of bytes written in other case */ -static int ram_save_block(QEMUFile *f) +static int ram_save_block(QEMUFile *f, bool last_stage) { RAMBlock *block = last_block; ram_addr_t offset = last_offset; int bytes_sent = -1; MemoryRegion *mr; + ram_addr_t current_addr; if (!block) block = QLIST_FIRST(&ram_list.blocks); @@ -219,16 +363,31 @@ static int ram_save_block(QEMUFile *f) p = memory_region_get_ram_ptr(mr) + offset; if (is_dup_page(p)) { + acct_info.dup_pages++; save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, *p); bytes_sent = 1; - } else { + } else if (migrate_use_xbzrle()) { + current_addr = block->offset + offset; + bytes_sent = save_xbzrle_page(f, p, current_addr, block, + offset, cont, last_stage); + if (!last_stage) { + p = get_cached_data(XBZRLE.cache, current_addr); + } + } + + /* either we didn't send yet (we may have had XBZRLE overflow) */ + if (bytes_sent == -1) { save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); qemu_put_buffer(f, p, TARGET_PAGE_SIZE); bytes_sent = TARGET_PAGE_SIZE; + acct_info.norm_pages++; } - break; + /* if page is unmodified, continue to the next */ + if (bytes_sent != 0) { + break; + } } offset += TARGET_PAGE_SIZE; @@ -306,6 +465,15 @@ static void sort_ram_list(void) static void migration_end(void) { memory_global_dirty_log_stop(); + + if (migrate_use_xbzrle()) { + cache_fini(XBZRLE.cache); + g_free(XBZRLE.cache); + g_free(XBZRLE.encoded_buf); + g_free(XBZRLE.current_buf); + g_free(XBZRLE.decoded_buf); + XBZRLE.cache = NULL; + } } static void ram_migration_cancel(void *opaque) @@ -325,6 +493,19 @@ static int ram_save_setup(QEMUFile *f, void *opaque) last_offset = 0; sort_ram_list(); + if (migrate_use_xbzrle()) { + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / + TARGET_PAGE_SIZE, + TARGET_PAGE_SIZE); + if (!XBZRLE.cache) { + DPRINTF("Error creating cache\n"); + return -1; + } + XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); + XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); + acct_clear(); + } + /* Make sure all dirty bits are set */ QLIST_FOREACH(block, &ram_list.blocks, next) { for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { @@ -365,12 +546,13 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) while ((ret = qemu_file_rate_limit(f)) == 0) { int bytes_sent; - bytes_sent = ram_save_block(f); + bytes_sent = ram_save_block(f, false); /* no more blocks to sent */ if (bytes_sent < 0) { break; } bytes_transferred += bytes_sent; + acct_info.iterations++; /* we want to check in the 1st loop, just in case it was the 1st time and we had to sync the dirty bitmap. qemu_get_clock_ns() is a bit expensive, so we only check each some @@ -426,7 +608,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) while (true) { int bytes_sent; - bytes_sent = ram_save_block(f); + bytes_sent = ram_save_block(f, true); /* no more blocks to sent */ if (bytes_sent < 0) { break; @@ -440,6 +622,47 @@ static int ram_save_complete(QEMUFile *f, void *opaque) return 0; } +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) +{ + int ret, rc = 0; + unsigned int xh_len; + int xh_flags; + + if (!XBZRLE.decoded_buf) { + XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); + } + + /* extract RLE header */ + xh_flags = qemu_get_byte(f); + xh_len = qemu_get_be16(f); + + if (xh_flags != ENCODING_FLAG_XBZRLE) { + fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); + return -1; + } + + if (xh_len > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); + return -1; + } + /* load data and decode */ + qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); + + /* decode RLE */ + ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, + TARGET_PAGE_SIZE); + if (ret == -1) { + fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); + rc = -1; + } else if (ret > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", + ret, TARGET_PAGE_SIZE); + abort(); + } + + return rc; +} + static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -553,6 +776,19 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + } else if (flags & RAM_SAVE_FLAG_XBZRLE) { + if (!migrate_use_xbzrle()) { + return -EINVAL; + } + void *host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } + + if (load_xbzrle(f, addr, host) < 0) { + ret = -EINVAL; + goto done; + } } error = qemu_file_get_error(f); if (error) { @@ -382,3 +382,45 @@ int qemu_parse_fd(const char *param) } return fd; } + +/* round down to the nearest power of 2*/ +int64_t pow2floor(int64_t value) +{ + if (!is_power_of_2(value)) { + value = 0x8000000000000000ULL >> clz64(value); + } + return value; +} + +/* + * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128) + * Input is limited to 14-bit numbers + */ +int uleb128_encode_small(uint8_t *out, uint32_t n) +{ + g_assert(n <= 0x3fff); + if (n < 0x80) { + *out++ = n; + return 1; + } else { + *out++ = (n & 0x7f) | 0x80; + *out++ = n >> 7; + return 2; + } +} + +int uleb128_decode_small(const uint8_t *in, uint32_t *n) +{ + if (!(*in & 0x80)) { + *n = *in++; + return 1; + } else { + *n = *in++ & 0x7f; + /* we exceed 14 bit number */ + if (*in & 0x80) { + return -1; + } + *n |= *in++ << 7; + return 2; + } +} diff --git a/docs/xbzrle.txt b/docs/xbzrle.txt new file mode 100644 index 0000000000..cc3a26a91d --- /dev/null +++ b/docs/xbzrle.txt @@ -0,0 +1,128 @@ +XBZRLE (Xor Based Zero Run Length Encoding) +=========================================== + +Using XBZRLE (Xor Based Zero Run Length Encoding) allows for the reduction +of VM downtime and the total live-migration time of Virtual machines. +It is particularly useful for virtual machines running memory write intensive +workloads that are typical of large enterprise applications such as SAP ERP +Systems, and generally speaking for any application that uses a sparse memory +update pattern. + +Instead of sending the changed guest memory page this solution will send a +compressed version of the updates, thus reducing the amount of data sent during +live migration. +In order to be able to calculate the update, the previous memory pages need to +be stored on the source. Those pages are stored in a dedicated cache +(hash table) and are accessed by their address. +The larger the cache size the better the chances are that the page has already +been stored in the cache. +A small cache size will result in high cache miss rate. +Cache size can be changed before and during migration. + +Format +======= + +The compression format performs a XOR between the previous and current content +of the page, where zero represents an unchanged value. +The page data delta is represented by zero and non zero runs. +A zero run is represented by its length (in bytes). +A non zero run is represented by its length (in bytes) and the new data. +The run length is encoded using ULEB128 (http://en.wikipedia.org/wiki/LEB128) + +There can be more than one valid encoding, the sender may send a longer encoding +for the benefit of reducing computation cost. + +page = zrun nzrun + | zrun nzrun page + +zrun = length + +nzrun = length byte... + +length = uleb128 encoded integer + +On the sender side XBZRLE is used as a compact delta encoding of page updates, +retrieving the old page content from the cache (default size of 512 MB). The +receiving side uses the existing page's content and XBZRLE to decode the new +page's content. + +This work was originally based on research results published +VEE 2011: Evaluation of Delta Compression Techniques for Efficient Live +Migration of Large Virtual Machines by Benoit, Svard, Tordsson and Elmroth. +Additionally the delta encoder XBRLE was improved further using the XBZRLE +instead. + +XBZRLE has a sustained bandwidth of 2-2.5 GB/s for typical workloads making it +ideal for in-line, real-time encoding such as is needed for live-migration. + +Example +old buffer: +1001 zeros +05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 68 00 00 6b 00 6d +3074 zeros + +new buffer: +1001 zeros +01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 68 00 00 67 00 69 +3074 zeros + +encoded buffer: + +encoded length 24 +e9 07 0f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 03 01 67 01 01 69 + +Usage +====================== +1. Verify the destination QEMU version is able to decode the new format. + {qemu} info migrate_capabilities + {qemu} xbzrle: off , ... + +2. Activate xbzrle on both source and destination: + {qemu} migrate_set_capability xbzrle on + +3. Set the XBZRLE cache size - the cache size is in MBytes and should be a +power of 2. The cache default value is 64MBytes. (on source only) + {qemu} migrate_set_cache_size 256m + +4. Start outgoing migration + {qemu} migrate -d tcp:destination.host:4444 + {qemu} info migrate + capabilities: xbzrle: on + Migration status: active + transferred ram: A kbytes + remaining ram: B kbytes + total ram: C kbytes + total time: D milliseconds + duplicate: E pages + normal: F pages + normal bytes: G kbytes + cache size: H bytes + xbzrle transferred: I kbytes + xbzrle pages: J pages + xbzrle cache miss: K + xbzrle overflow : L + +xbzrle cache-miss: the number of cache misses to date - high cache-miss rate +indicates that the cache size is set too low. +xbzrle overflow: the number of overflows in the decoding which where the delta +could not be compressed. This can happen if the changes in the pages are too +large or there are many short changes; for example, changing every second byte +(half a page). + +Testing: Testing indicated that live migration with XBZRLE was completed in 110 +seconds, whereas without it would not be able to complete. + +A simple synthetic memory r/w load generator: +.. include <stdlib.h> +.. include <stdio.h> +.. int main() +.. { +.. char *buf = (char *) calloc(4096, 4096); +.. while (1) { +.. int i; +.. for (i = 0; i < 4096 * 4; i++) { +.. buf[i * 4096 / 4]++; +.. } +.. printf("."); +.. } +.. } diff --git a/hmp-commands.hx b/hmp-commands.hx index 9bbc7f7555..f6104b099f 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -829,6 +829,26 @@ STEXI @item migrate_cancel @findex migrate_cancel Cancel the current VM migration. + +ETEXI + + { + .name = "migrate_set_cache_size", + .args_type = "value:o", + .params = "value", + .help = "set cache size (in bytes) for XBZRLE migrations," + "the cache size will be rounded down to the nearest " + "power of 2.\n" + "The cache size affects the number of cache misses." + "In case of a high cache miss ratio you need to increase" + " the cache size", + .mhandler.cmd = hmp_migrate_set_cache_size, + }, + +STEXI +@item migrate_set_cache_size @var{value} +@findex migrate_set_cache_size +Set cache size to @var{value} (in bytes) for xbzrle migrations. ETEXI { @@ -861,6 +881,20 @@ Set maximum tolerated downtime (in seconds) for migration. ETEXI { + .name = "migrate_set_capability", + .args_type = "capability:s,state:b", + .params = "capability state", + .help = "Enable/Disable the usage of a capability for migration", + .mhandler.cmd = hmp_migrate_set_capability, + }, + +STEXI +@item migrate_set_capability @var{capability} @var{state} +@findex migrate_set_capability +Enable/Disable the usage of a capability @var{capability} for migration. +ETEXI + + { .name = "client_migrate_info", .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", .params = "protocol hostname port tls-port cert-subject", @@ -1417,6 +1451,10 @@ show CPU statistics show user network stack connection states @item info migrate show migration status +@item info migrate_capabilities +show current migration capabilities +@item info migrate_cache_size +show current migration XBZRLE cache size @item info balloon show balloon information @item info qtree @@ -131,8 +131,21 @@ void hmp_info_mice(Monitor *mon) void hmp_info_migrate(Monitor *mon) { MigrationInfo *info; + MigrationCapabilityStatusList *caps, *cap; info = qmp_query_migrate(NULL); + caps = qmp_query_migrate_capabilities(NULL); + + /* do not display parameters during setup */ + if (info->has_status && caps) { + monitor_printf(mon, "capabilities: "); + for (cap = caps; cap; cap = cap->next) { + monitor_printf(mon, "%s: %s ", + MigrationCapability_lookup[cap->value->capability], + cap->value->state ? "on" : "off"); + } + monitor_printf(mon, "\n"); + } if (info->has_status) { monitor_printf(mon, "Migration status: %s\n", info->status); @@ -147,6 +160,12 @@ void hmp_info_migrate(Monitor *mon) info->ram->total >> 10); monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n", info->ram->total_time); + monitor_printf(mon, "duplicate: %" PRIu64 " pages\n", + info->ram->duplicate); + monitor_printf(mon, "normal: %" PRIu64 " pages\n", + info->ram->normal); + monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n", + info->ram->normal_bytes >> 10); } if (info->has_disk) { @@ -158,7 +177,46 @@ void hmp_info_migrate(Monitor *mon) info->disk->total >> 10); } + if (info->has_xbzrle_cache) { + monitor_printf(mon, "cache size: %" PRIu64 " bytes\n", + info->xbzrle_cache->cache_size); + monitor_printf(mon, "xbzrle transferred: %" PRIu64 " kbytes\n", + info->xbzrle_cache->bytes >> 10); + monitor_printf(mon, "xbzrle pages: %" PRIu64 " pages\n", + info->xbzrle_cache->pages); + monitor_printf(mon, "xbzrle cache miss: %" PRIu64 "\n", + info->xbzrle_cache->cache_miss); + monitor_printf(mon, "xbzrle overflow : %" PRIu64 "\n", + info->xbzrle_cache->overflow); + } + qapi_free_MigrationInfo(info); + qapi_free_MigrationCapabilityStatusList(caps); +} + +void hmp_info_migrate_capabilities(Monitor *mon) +{ + MigrationCapabilityStatusList *caps, *cap; + + caps = qmp_query_migrate_capabilities(NULL); + + if (caps) { + monitor_printf(mon, "capabilities: "); + for (cap = caps; cap; cap = cap->next) { + monitor_printf(mon, "%s: %s ", + MigrationCapability_lookup[cap->value->capability], + cap->value->state ? "on" : "off"); + } + monitor_printf(mon, "\n"); + } + + qapi_free_MigrationCapabilityStatusList(caps); +} + +void hmp_info_migrate_cache_size(Monitor *mon) +{ + monitor_printf(mon, "xbzrel cache size: %" PRId64 " kbytes\n", + qmp_query_migrate_cache_size(NULL) >> 10); } void hmp_info_cpus(Monitor *mon) @@ -731,12 +789,57 @@ void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict) qmp_migrate_set_downtime(value, NULL); } +void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict) +{ + int64_t value = qdict_get_int(qdict, "value"); + Error *err = NULL; + + qmp_migrate_set_cache_size(value, &err); + if (err) { + monitor_printf(mon, "%s\n", error_get_pretty(err)); + error_free(err); + return; + } +} + void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict) { int64_t value = qdict_get_int(qdict, "value"); qmp_migrate_set_speed(value, NULL); } +void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict) +{ + const char *cap = qdict_get_str(qdict, "capability"); + bool state = qdict_get_bool(qdict, "state"); + Error *err = NULL; + MigrationCapabilityStatusList *caps = g_malloc0(sizeof(*caps)); + int i; + + for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) { + if (strcmp(cap, MigrationCapability_lookup[i]) == 0) { + caps->value = g_malloc0(sizeof(*caps->value)); + caps->value->capability = i; + caps->value->state = state; + caps->next = NULL; + qmp_migrate_set_capabilities(caps, &err); + break; + } + } + + if (i == MIGRATION_CAPABILITY_MAX) { + error_set(&err, QERR_INVALID_PARAMETER, cap); + } + + qapi_free_MigrationCapabilityStatusList(caps); + + if (err) { + monitor_printf(mon, "migrate_set_parameter: %s\n", + error_get_pretty(err)); + error_free(err); + } +} + void hmp_set_password(Monitor *mon, const QDict *qdict) { const char *protocol = qdict_get_str(qdict, "protocol"); @@ -25,6 +25,8 @@ void hmp_info_uuid(Monitor *mon); void hmp_info_chardev(Monitor *mon); void hmp_info_mice(Monitor *mon); void hmp_info_migrate(Monitor *mon); +void hmp_info_migrate_capabilities(Monitor *mon); +void hmp_info_migrate_cache_size(Monitor *mon); void hmp_info_cpus(Monitor *mon); void hmp_info_block(Monitor *mon); void hmp_info_blockstats(Monitor *mon); @@ -51,6 +53,8 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict); void hmp_migrate_cancel(Monitor *mon, const QDict *qdict); void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict); void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict); +void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict); +void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict); void hmp_set_password(Monitor *mon, const QDict *qdict); void hmp_expire_password(Monitor *mon, const QDict *qdict); void hmp_eject(Monitor *mon, const QDict *qdict); diff --git a/include/qemu/page_cache.h b/include/qemu/page_cache.h new file mode 100644 index 0000000000..3839ac7726 --- /dev/null +++ b/include/qemu/page_cache.h @@ -0,0 +1,79 @@ +/* + * Page cache for QEMU + * The cache is base on a hash of the page address + * + * Copyright 2012 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Orit Wasserman <owasserm@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef PAGE_CACHE_H +#define PAGE_CACHE_H + +/* Page cache for storing guest pages */ +typedef struct PageCache PageCache; + +/** + * cache_init: Initialize the page cache + * + * + * Returns new allocated cache or NULL on error + * + * @cache pointer to the PageCache struct + * @num_pages: cache maximal number of cached pages + * @page_size: cache page size + */ +PageCache *cache_init(int64_t num_pages, unsigned int page_size); + +/** + * cache_fini: free all cache resources + * @cache pointer to the PageCache struct + */ +void cache_fini(PageCache *cache); + +/** + * cache_is_cached: Checks to see if the page is cached + * + * Returns %true if page is cached + * + * @cache pointer to the PageCache struct + * @addr: page addr + */ +bool cache_is_cached(const PageCache *cache, uint64_t addr); + +/** + * get_cached_data: Get the data cached for an addr + * + * Returns pointer to the data cached or NULL if not cached + * + * @cache pointer to the PageCache struct + * @addr: page addr + */ +uint8_t *get_cached_data(const PageCache *cache, uint64_t addr); + +/** + * cache_insert: insert the page into the cache. the previous value will be overwritten + * + * @cache pointer to the PageCache struct + * @addr: page address + * @pdata: pointer to the page + */ +void cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata); + +/** + * cache_resize: resize the page cache. In case of size reduction the extra + * pages will be freed + * + * Returns -1 on error new cache size on success + * + * @cache pointer to the PageCache struct + * @num_pages: new page cache size (in pages) + */ +int64_t cache_resize(PageCache *cache, int64_t num_pages); + +#endif diff --git a/migration.c b/migration.c index 8db1b433f0..653a3c1a88 100644 --- a/migration.c +++ b/migration.c @@ -43,6 +43,9 @@ enum { #define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ +/* Migration XBZRLE default cache size */ +#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024) + static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -55,6 +58,7 @@ static MigrationState *migrate_get_current(void) static MigrationState current_migration = { .state = MIG_STATE_SETUP, .bandwidth_limit = MAX_THROTTLE, + .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE, }; return ¤t_migration; @@ -113,6 +117,43 @@ uint64_t migrate_max_downtime(void) return max_downtime; } +MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) +{ + MigrationCapabilityStatusList *head = NULL; + MigrationCapabilityStatusList *caps; + MigrationState *s = migrate_get_current(); + int i; + + for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) { + if (head == NULL) { + head = g_malloc0(sizeof(*caps)); + caps = head; + } else { + caps->next = g_malloc0(sizeof(*caps)); + caps = caps->next; + } + caps->value = + g_malloc(sizeof(*caps->value)); + caps->value->capability = i; + caps->value->state = s->enabled_capabilities[i]; + } + + return head; +} + +static void get_xbzrle_cache_stats(MigrationInfo *info) +{ + if (migrate_use_xbzrle()) { + info->has_xbzrle_cache = true; + info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); + info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); + info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred(); + info->xbzrle_cache->pages = xbzrle_mig_pages_transferred(); + info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss(); + info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow(); + } +} + MigrationInfo *qmp_query_migrate(Error **errp) { MigrationInfo *info = g_malloc0(sizeof(*info)); @@ -133,6 +174,9 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->ram->total = ram_bytes_total(); info->ram->total_time = qemu_get_clock_ms(rt_clock) - s->total_time; + info->ram->duplicate = dup_mig_pages_transferred(); + info->ram->normal = norm_mig_pages_transferred(); + info->ram->normal_bytes = norm_mig_bytes_transferred(); if (blk_mig_active()) { info->has_disk = true; @@ -141,8 +185,12 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->disk->remaining = blk_mig_bytes_remaining(); info->disk->total = blk_mig_bytes_total(); } + + get_xbzrle_cache_stats(info); break; case MIG_STATE_COMPLETED: + get_xbzrle_cache_stats(info); + info->has_status = true; info->status = g_strdup("completed"); @@ -152,6 +200,9 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->ram->remaining = 0; info->ram->total = ram_bytes_total(); info->ram->total_time = s->total_time; + info->ram->duplicate = dup_mig_pages_transferred(); + info->ram->normal = norm_mig_pages_transferred(); + info->ram->normal_bytes = norm_mig_bytes_transferred(); break; case MIG_STATE_ERROR: info->has_status = true; @@ -166,6 +217,22 @@ MigrationInfo *qmp_query_migrate(Error **errp) return info; } +void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + Error **errp) +{ + MigrationState *s = migrate_get_current(); + MigrationCapabilityStatusList *cap; + + if (s->state == MIG_STATE_ACTIVE) { + error_set(errp, QERR_MIGRATION_ACTIVE); + return; + } + + for (cap = params; cap; cap = cap->next) { + s->enabled_capabilities[cap->value->capability] = cap->value->state; + } +} + /* shared migration helpers */ static int migrate_fd_cleanup(MigrationState *s) @@ -375,10 +442,18 @@ static MigrationState *migrate_init(const MigrationParams *params) { MigrationState *s = migrate_get_current(); int64_t bandwidth_limit = s->bandwidth_limit; + bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; + int64_t xbzrle_cache_size = s->xbzrle_cache_size; + + memcpy(enabled_capabilities, s->enabled_capabilities, + sizeof(enabled_capabilities)); memset(s, 0, sizeof(*s)); s->bandwidth_limit = bandwidth_limit; s->params = *params; + memcpy(s->enabled_capabilities, enabled_capabilities, + sizeof(enabled_capabilities)); + s->xbzrle_cache_size = xbzrle_cache_size; s->bandwidth_limit = bandwidth_limit; s->state = MIG_STATE_SETUP; @@ -459,6 +534,25 @@ void qmp_migrate_cancel(Error **errp) migrate_fd_cancel(migrate_get_current()); } +void qmp_migrate_set_cache_size(int64_t value, Error **errp) +{ + MigrationState *s = migrate_get_current(); + + /* Check for truncation */ + if (value != (size_t)value) { + error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "exceeding address space"); + return; + } + + s->xbzrle_cache_size = xbzrle_cache_resize(value); +} + +int64_t qmp_query_migrate_cache_size(Error **errp) +{ + return migrate_xbzrle_cache_size(); +} + void qmp_migrate_set_speed(int64_t value, Error **errp) { MigrationState *s; @@ -478,3 +572,21 @@ void qmp_migrate_set_downtime(double value, Error **errp) value = MAX(0, MIN(UINT64_MAX, value)); max_downtime = (uint64_t)value; } + +int migrate_use_xbzrle(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; +} + +int64_t migrate_xbzrle_cache_size(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->xbzrle_cache_size; +} diff --git a/migration.h b/migration.h index 57572a61e9..a9852fcae0 100644 --- a/migration.h +++ b/migration.h @@ -19,6 +19,7 @@ #include "notify.h" #include "error.h" #include "vmstate.h" +#include "qapi-types.h" struct MigrationParams { bool blk; @@ -39,6 +40,8 @@ struct MigrationState void *opaque; MigrationParams params; int64_t total_time; + bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; + int64_t xbzrle_cache_size; }; void process_incoming_migration(QEMUFile *f); @@ -84,6 +87,15 @@ uint64_t ram_bytes_total(void); extern SaveVMHandlers savevm_ram_handlers; +uint64_t dup_mig_bytes_transferred(void); +uint64_t dup_mig_pages_transferred(void); +uint64_t norm_mig_bytes_transferred(void); +uint64_t norm_mig_pages_transferred(void); +uint64_t xbzrle_mig_bytes_transferred(void); +uint64_t xbzrle_mig_pages_transferred(void); +uint64_t xbzrle_mig_pages_overflow(void); +uint64_t xbzrle_mig_pages_cache_miss(void); + /** * @migrate_add_blocker - prevent migration from proceeding * @@ -98,4 +110,13 @@ void migrate_add_blocker(Error *reason); */ void migrate_del_blocker(Error *reason); +int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen); +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); + +int migrate_use_xbzrle(void); +int64_t migrate_xbzrle_cache_size(void); + +int64_t xbzrle_cache_resize(int64_t new_size); + #endif @@ -2655,6 +2655,20 @@ static mon_cmd_t info_cmds[] = { .mhandler.info = hmp_info_migrate, }, { + .name = "migrate_capabilities", + .args_type = "", + .params = "", + .help = "show current migration capabilities", + .mhandler.info = hmp_info_migrate_capabilities, + }, + { + .name = "migrate_cache_size", + .args_type = "", + .params = "", + .help = "show current migration xbzrle cache size", + .mhandler.info = hmp_info_migrate_cache_size, + }, + { .name = "balloon", .args_type = "", .params = "", diff --git a/page_cache.c b/page_cache.c new file mode 100644 index 0000000000..0294f7e9f6 --- /dev/null +++ b/page_cache.c @@ -0,0 +1,218 @@ +/* + * Page cache for QEMU + * The cache is base on a hash of the page address + * + * Copyright 2012 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Orit Wasserman <owasserm@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <string.h> +#include <sys/time.h> +#include <sys/types.h> +#include <stdbool.h> +#include <glib.h> +#include <strings.h> + +#include "qemu-common.h" +#include "qemu/page_cache.h" + +#ifdef DEBUG_CACHE +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, "cache: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +typedef struct CacheItem CacheItem; + +struct CacheItem { + uint64_t it_addr; + uint64_t it_age; + uint8_t *it_data; +}; + +struct PageCache { + CacheItem *page_cache; + unsigned int page_size; + int64_t max_num_items; + uint64_t max_item_age; + int64_t num_items; +}; + +PageCache *cache_init(int64_t num_pages, unsigned int page_size) +{ + int64_t i; + + PageCache *cache; + + if (num_pages <= 0) { + DPRINTF("invalid number of pages\n"); + return NULL; + } + + cache = g_malloc(sizeof(*cache)); + + /* round down to the nearest power of 2 */ + if (!is_power_of_2(num_pages)) { + num_pages = pow2floor(num_pages); + DPRINTF("rounding down to %" PRId64 "\n", num_pages); + } + cache->page_size = page_size; + cache->num_items = 0; + cache->max_item_age = 0; + cache->max_num_items = num_pages; + + DPRINTF("Setting cache buckets to %" PRId64 "\n", cache->max_num_items); + + cache->page_cache = g_malloc((cache->max_num_items) * + sizeof(*cache->page_cache)); + + for (i = 0; i < cache->max_num_items; i++) { + cache->page_cache[i].it_data = NULL; + cache->page_cache[i].it_age = 0; + cache->page_cache[i].it_addr = -1; + } + + return cache; +} + +void cache_fini(PageCache *cache) +{ + int64_t i; + + g_assert(cache); + g_assert(cache->page_cache); + + for (i = 0; i < cache->max_num_items; i++) { + g_free(cache->page_cache[i].it_data); + } + + g_free(cache->page_cache); + cache->page_cache = NULL; +} + +static size_t cache_get_cache_pos(const PageCache *cache, + uint64_t address) +{ + size_t pos; + + g_assert(cache->max_num_items); + pos = (address / cache->page_size) & (cache->max_num_items - 1); + return pos; +} + +bool cache_is_cached(const PageCache *cache, uint64_t addr) +{ + size_t pos; + + g_assert(cache); + g_assert(cache->page_cache); + + pos = cache_get_cache_pos(cache, addr); + + return (cache->page_cache[pos].it_addr == addr); +} + +static CacheItem *cache_get_by_addr(const PageCache *cache, uint64_t addr) +{ + size_t pos; + + g_assert(cache); + g_assert(cache->page_cache); + + pos = cache_get_cache_pos(cache, addr); + + return &cache->page_cache[pos]; +} + +uint8_t *get_cached_data(const PageCache *cache, uint64_t addr) +{ + return cache_get_by_addr(cache, addr)->it_data; +} + +void cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata) +{ + + CacheItem *it = NULL; + + g_assert(cache); + g_assert(cache->page_cache); + + /* actual update of entry */ + it = cache_get_by_addr(cache, addr); + + if (!it->it_data) { + cache->num_items++; + } + + it->it_data = pdata; + it->it_age = ++cache->max_item_age; + it->it_addr = addr; +} + +int64_t cache_resize(PageCache *cache, int64_t new_num_pages) +{ + PageCache *new_cache; + int64_t i; + + CacheItem *old_it, *new_it; + + g_assert(cache); + + /* cache was not inited */ + if (cache->page_cache == NULL) { + return -1; + } + + /* same size */ + if (pow2floor(new_num_pages) == cache->max_num_items) { + return cache->max_num_items; + } + + new_cache = cache_init(new_num_pages, cache->page_size); + if (!(new_cache)) { + DPRINTF("Error creating new cache\n"); + return -1; + } + + /* move all data from old cache */ + for (i = 0; i < cache->max_num_items; i++) { + old_it = &cache->page_cache[i]; + if (old_it->it_addr != -1) { + /* check for collision, if there is, keep MRU page */ + new_it = cache_get_by_addr(new_cache, old_it->it_addr); + if (new_it->it_data) { + /* keep the MRU page */ + if (new_it->it_age >= old_it->it_age) { + g_free(old_it->it_data); + } else { + g_free(new_it->it_data); + new_it->it_data = old_it->it_data; + new_it->it_age = old_it->it_age; + new_it->it_addr = old_it->it_addr; + } + } else { + cache_insert(new_cache, old_it->it_addr, old_it->it_data); + } + } + } + + cache->page_cache = new_cache->page_cache; + cache->max_num_items = new_cache->max_num_items; + cache->num_items = new_cache->num_items; + + g_free(new_cache); + + return cache->max_num_items; +} diff --git a/qapi-schema.json b/qapi-schema.json index bd9c450029..56d9d7b0e2 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -260,15 +260,43 @@ # # @total: total amount of bytes involved in the migration process # -# @total_time: tota0l amount of ms since migration started. If +# @total-time: total amount of ms since migration started. If # migration has ended, it returns the total migration # time. (since 1.2) # -# Since: 0.14.0. +# @duplicate: number of duplicate pages (since 1.2) +# +# @normal : number of normal pages (since 1.2) +# +# @normal-bytes : number of normal bytes sent (since 1.2) +# +# Since: 0.14.0 ## { 'type': 'MigrationStats', 'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' , - 'total_time': 'int' } } + 'total-time': 'int', 'duplicate': 'int', 'normal': 'int', + 'normal-bytes': 'int' } } + +## +# @XBZRLECacheStats +# +# Detailed XBZRLE migration cache statistics +# +# @cache-size: XBZRLE cache size +# +# @bytes: amount of bytes already transferred to the target VM +# +# @pages: amount of pages transferred to the target VM +# +# @cache-miss: number of cache miss +# +# @overflow: number of overflows +# +# Since: 1.2 +## +{ 'type': 'XBZRLECacheStats', + 'data': {'cache-size': 'int', 'bytes': 'int', 'pages': 'int', + 'cache-miss': 'int', 'overflow': 'int' } } ## # @MigrationInfo @@ -288,11 +316,16 @@ # status, only returned if status is 'active' and it is a block # migration # +# @xbzrle-cache: #optional @XBZRLECacheStats containing detailed XBZRLE +# migration statistics, only returned if XBZRLE feature is on and +# status is 'active' or 'completed' (since 1.2) +# # Since: 0.14.0 ## { 'type': 'MigrationInfo', 'data': {'*status': 'str', '*ram': 'MigrationStats', - '*disk': 'MigrationStats'} } + '*disk': 'MigrationStats', + '*xbzrle-cache': 'XBZRLECacheStats'} } ## # @query-migrate @@ -306,6 +339,57 @@ { 'command': 'query-migrate', 'returns': 'MigrationInfo' } ## +# @MigrationCapability +# +# Migration capabilities enumeration +# +# @xbzrle: Migration supports xbzrle (Xor Based Zero Run Length Encoding). +# This feature allows us to minimize migration traffic for certain work +# loads, by sending compressed difference of the pages +# +# Since: 1.2 +## +{ 'enum': 'MigrationCapability', + 'data': ['xbzrle'] } + +## +# @MigrationCapabilityStatus +# +# Migration capability information +# +# @capability: capability enum +# +# @state: capability state bool +# +# Since: 1.2 +## +{ 'type': 'MigrationCapabilityStatus', + 'data': { 'capability' : 'MigrationCapability', 'state' : 'bool' } } + +## +# @migrate-set-capabilities +# +# Enable/Disable the following migration capabilities (like xbzrle) +# +# @capabilities: json array of capability modifications to make +# +# Since: 1.2 +## +{ 'command': 'migrate-set-capabilities', + 'data': { 'capabilities': ['MigrationCapabilityStatus'] } } + +## +# @query-migrate-capabilities +# +# Returns information about the current migration capabilities status +# +# Returns: @MigrationCapabilitiesStatus +# +# Since: 1.2 +## +{ 'command': 'query-migrate-capabilities', 'returns': ['MigrationCapabilityStatus']} + +## # @MouseInfo: # # Information about a mouse device. @@ -1344,6 +1428,33 @@ { 'command': 'migrate_set_speed', 'data': {'value': 'int'} } ## +# @migrate-set-cache-size +# +# Set XBZRLE cache size +# +# @value: cache size in bytes +# +# The size will be rounded down to the nearest power of 2. +# The cache size can be modified before and during ongoing migration +# +# Returns: nothing on success +# +# Since: 1.2 +## +{ 'command': 'migrate-set-cache-size', 'data': {'value': 'int'} } + +## +# @query-migrate-cache-size +# +# query XBZRLE cache size +# +# Returns: XBZRLE cache size in bytes +# +# Since: 1.2 +## +{ 'command': 'query-migrate-cache-size', 'returns': 'int' } + +## # @ObjectPropertyInfo: # # @name: the name of the property diff --git a/qemu-common.h b/qemu-common.h index f9deca6f86..095e28d89a 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -1,3 +1,4 @@ + /* Common header file that is included by all of qemu. */ #ifndef QEMU_COMMON_H #define QEMU_COMMON_H @@ -429,6 +430,26 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) /* Round number up to multiple */ #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m)) +static inline bool is_power_of_2(uint64_t value) +{ + if (!value) { + return 0; + } + + return !(value & (value - 1)); +} + +/* round down to the nearest power of 2*/ +int64_t pow2floor(int64_t value); + #include "module.h" +/* + * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128) + * Input is limited to 14-bit numbers + */ + +int uleb128_encode_small(uint8_t *out, uint32_t n); +int uleb128_decode_small(const uint8_t *in, uint32_t *n); + #endif diff --git a/qmp-commands.hx b/qmp-commands.hx index ac466382c0..0363d7cca9 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -520,6 +520,50 @@ Example: <- { "return": {} } EQMP +{ + .name = "migrate-set-cache-size", + .args_type = "value:o", + .mhandler.cmd_new = qmp_marshal_input_migrate_set_cache_size, + }, + +SQMP +migrate-set-cache-size +--------------------- + +Set cache size to be used by XBZRLE migration, the cache size will be rounded +down to the nearest power of 2 + +Arguments: + +- "value": cache size in bytes (json-int) + +Example: + +-> { "execute": "migrate-set-cache-size", "arguments": { "value": 536870912 } } +<- { "return": {} } + +EQMP + { + .name = "query-migrate-cache-size", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_migrate_cache_size, + }, + +SQMP +query-migrate-cache-size +--------------------- + +Show cache size to be used by XBZRLE migration + +returns a json-object with the following information: +- "size" : json-int + +Example: + +-> { "execute": "query-migrate-cache-size" } +<- { "return": 67108864 } + +EQMP { .name = "migrate_set_speed", @@ -2078,12 +2122,24 @@ The main json-object contains the following: - "transferred": amount transferred (json-int) - "remaining": amount remaining (json-int) - "total": total (json-int) + - "total-time": total amount of ms since migration started. If + migration has ended, it returns the total migration time + (json-int) + - "duplicate": number of duplicated pages (json-int) + - "normal" : number of normal pages transferred (json-int) + - "normal-bytes" : number of normal bytes transferred (json-int) - "disk": only present if "status" is "active" and it is a block migration, it is a json-object with the following disk information (in bytes): - "transferred": amount transferred (json-int) - "remaining": amount remaining (json-int) - "total": total (json-int) - +- "xbzrle-cache": only present if XBZRLE is active. + It is a json-object with the following XBZRLE information: + - "cache-size": XBZRLE cache size + - "bytes": total XBZRLE bytes transferred + - "pages": number of XBZRLE compressed pages + - "cache-miss": number of cache misses + - "overflow": number of XBZRLE overflows Examples: 1. Before the first migration @@ -2094,7 +2150,19 @@ Examples: 2. Migration is done and has succeeded -> { "execute": "query-migrate" } -<- { "return": { "status": "completed" } } +<- { "return": { + "status": "completed", + "ram":{ + "transferred":123, + "remaining":123, + "total":246, + "total-time":12345, + "duplicate":123, + "normal":123, + "normal-bytes":123456 + } + } + } 3. Migration is done and has failed @@ -2110,7 +2178,11 @@ Examples: "ram":{ "transferred":123, "remaining":123, - "total":246 + "total":246, + "total-time":12345, + "duplicate":123, + "normal":123, + "normal-bytes":123456 } } } @@ -2124,7 +2196,11 @@ Examples: "ram":{ "total":1057024, "remaining":1053304, - "transferred":3720 + "transferred":3720, + "total-time":12345, + "duplicate":123, + "normal":123, + "normal-bytes":123456 }, "disk":{ "total":20971520, @@ -2134,6 +2210,32 @@ Examples: } } +6. Migration is being performed and XBZRLE is active: + +-> { "execute": "query-migrate" } +<- { + "return":{ + "status":"active", + "capabilities" : [ { "capability": "xbzrle", "state" : true } ], + "ram":{ + "total":1057024, + "remaining":1053304, + "transferred":3720, + "total-time":12345, + "duplicate":10, + "normal":3333, + "normal-bytes":3412992 + }, + "xbzrle-cache":{ + "cache-size":67108864, + "bytes":20971520, + "pages":2444343, + "cache-miss":2244, + "overflow":34434 + } + } + } + EQMP { @@ -2143,6 +2245,55 @@ EQMP }, SQMP +migrate-set-capabilities +------- + +Enable/Disable migration capabilities + +- "xbzrle": xbzrle support + +Arguments: + +Example: + +-> { "execute": "migrate-set-capabilities" , "arguments": + { "capabilities": [ { "capability": "xbzrle", "state": true } ] } } + +EQMP + + { + .name = "migrate-set-capabilities", + .args_type = "capabilities:O", + .params = "capability:s,state:b", + .mhandler.cmd_new = qmp_marshal_input_migrate_set_capabilities, + }, +SQMP +query-migrate-capabilities +------- + +Query current migration capabilities + +- "capabilities": migration capabilities state + - "xbzrle" : XBZRLE state (json-bool) + +Arguments: + +Example: + +-> { "execute": "query-migrate-capabilities" } +<- { "return": { + "capabilities" : [ { "capability" : "xbzrle", "state" : false } ] + } + } +EQMP + + { + .name = "query-migrate-capabilities", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_migrate_capabilities, + }, + +SQMP query-balloon ------------- @@ -2392,3 +2392,162 @@ void vmstate_register_ram_global(MemoryRegion *mr) { vmstate_register_ram(mr, NULL); } + +/* + page = zrun nzrun + | zrun nzrun page + + zrun = length + + nzrun = length byte... + + length = uleb128 encoded integer + */ +int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen) +{ + uint32_t zrun_len = 0, nzrun_len = 0; + int d = 0, i = 0; + long res, xor; + uint8_t *nzrun_start = NULL; + + g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) % + sizeof(long))); + + while (i < slen) { + /* overflow */ + if (d + 2 > dlen) { + return -1; + } + + /* not aligned to sizeof(long) */ + res = (slen - i) % sizeof(long); + while (res && old_buf[i] == new_buf[i]) { + zrun_len++; + i++; + res--; + } + + /* word at a time for speed */ + if (!res) { + while (i < slen && + (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) { + i += sizeof(long); + zrun_len += sizeof(long); + } + + /* go over the rest */ + while (i < slen && old_buf[i] == new_buf[i]) { + zrun_len++; + i++; + } + } + + /* buffer unchanged */ + if (zrun_len == slen) { + return 0; + } + + /* skip last zero run */ + if (i == slen) { + return d; + } + + d += uleb128_encode_small(dst + d, zrun_len); + + zrun_len = 0; + nzrun_start = new_buf + i; + + /* overflow */ + if (d + 2 > dlen) { + return -1; + } + /* not aligned to sizeof(long) */ + res = (slen - i) % sizeof(long); + while (res && old_buf[i] != new_buf[i]) { + i++; + nzrun_len++; + res--; + } + + /* word at a time for speed, use of 32-bit long okay */ + if (!res) { + /* truncation to 32-bit long okay */ + long mask = 0x0101010101010101ULL; + while (i < slen) { + xor = *(long *)(old_buf + i) ^ *(long *)(new_buf + i); + if ((xor - mask) & ~xor & (mask << 7)) { + /* found the end of an nzrun within the current long */ + while (old_buf[i] != new_buf[i]) { + nzrun_len++; + i++; + } + break; + } else { + i += sizeof(long); + nzrun_len += sizeof(long); + } + } + } + + d += uleb128_encode_small(dst + d, nzrun_len); + /* overflow */ + if (d + nzrun_len > dlen) { + return -1; + } + memcpy(dst + d, nzrun_start, nzrun_len); + d += nzrun_len; + nzrun_len = 0; + } + + return d; +} + +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) +{ + int i = 0, d = 0; + int ret; + uint32_t count = 0; + + while (i < slen) { + + /* zrun */ + if ((slen - i) < 2) { + return -1; + } + + ret = uleb128_decode_small(src + i, &count); + if (ret < 0 || (i && !count)) { + return -1; + } + i += ret; + d += count; + + /* overflow */ + if (d > dlen) { + return -1; + } + + /* nzrun */ + if ((slen - i) < 2) { + return -1; + } + + ret = uleb128_decode_small(src + i, &count); + if (ret < 0 || !count) { + return -1; + } + i += ret; + + /* overflow */ + if (d + count > dlen || i + count > slen) { + return -1; + } + + memcpy(dst + d, src + i, count); + d += count; + i += count; + } + + return d; +} |