diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2013-03-26 13:38:00 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2013-03-26 13:38:00 -0500 |
commit | 18501ae6e825d8da72369fd091018ef71071bd87 (patch) | |
tree | 780fd5af5f8a5bcb13da79962107b5364e7f29aa | |
parent | fad5593ca646010d3cb83f7926d78dd30c20c610 (diff) | |
parent | 500f0061d628b52220038939728f0d7aee634468 (diff) |
Merge remote-tracking branch 'quintela/migration.next' into staging
# By Peter Lieven (9) and others
# Via Juan Quintela
* quintela/migration.next: (22 commits)
Use qemu_put_buffer_async for guest memory pages
Add qemu_put_buffer_async
Use writev ops if available
Store the data to send also in iovec
Update bytes_xfer in qemu_put_byte
Add socket_writev_buffer function
Add QemuFileWritevBuffer QemuFileOps
migration: use XBZRLE only after bulk stage
migration: do not search dirty pages in bulk stage
migration: do not sent zero pages in bulk stage
migration: add an indicator for bulk state of ram migration
migration: search for zero instead of dup pages
bitops: unroll while loop in find_next_bit()
buffer_is_zero: use vector optimizations if possible
cutils: add a function to find non-zero content in a buffer
move vector definitions to qemu-common.h
savevm: Fix bugs in the VMSTATE_VBUFFER_MULTIPLY definition
savevm: Add VMSTATE_STRUCT_VARRAY_POINTER_UINT32
savevm: Add VMSTATE_FLOAT64 helpers
savevm: Add VMSTATE_UINTTL_EQUAL helper
...
-rw-r--r-- | arch_init.c | 76 | ||||
-rw-r--r-- | hmp.c | 2 | ||||
-rw-r--r-- | hw/hw.h | 6 | ||||
-rw-r--r-- | include/migration/migration.h | 2 | ||||
-rw-r--r-- | include/migration/qemu-file.h | 12 | ||||
-rw-r--r-- | include/migration/vmstate.h | 43 | ||||
-rw-r--r-- | include/qemu-common.h | 31 | ||||
-rw-r--r-- | migration.c | 3 | ||||
-rw-r--r-- | qapi-schema.json | 8 | ||||
-rw-r--r-- | qmp-commands.hx | 55 | ||||
-rw-r--r-- | savevm.c | 146 | ||||
-rw-r--r-- | util/bitops.c | 18 | ||||
-rw-r--r-- | util/cutils.c | 60 |
13 files changed, 375 insertions, 87 deletions
diff --git a/arch_init.c b/arch_init.c index e8ade9e639..4ef5a15a6e 100644 --- a/arch_init.c +++ b/arch_init.c @@ -116,26 +116,6 @@ const uint32_t arch_type = QEMU_ARCH; #define RAM_SAVE_FLAG_CONTINUE 0x20 #define RAM_SAVE_FLAG_XBZRLE 0x40 -#ifdef __ALTIVEC__ -#include <altivec.h> -#define VECTYPE vector unsigned char -#define SPLAT(p) vec_splat(vec_ld(0, p), 0) -#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) -/* altivec.h may redefine the bool macro as vector type. - * Reset it to POSIX semantics. */ -#undef bool -#define bool _Bool -#elif defined __SSE2__ -#include <emmintrin.h> -#define VECTYPE __m128i -#define SPLAT(p) _mm_set1_epi8(*(p)) -#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) -#else -#define VECTYPE unsigned long -#define SPLAT(p) (*(p) * (~0UL / 255)) -#define ALL_EQ(v1, v2) ((v1) == (v2)) -#endif - static struct defconfig_file { const char *filename; @@ -166,19 +146,10 @@ int qemu_read_default_config_files(bool userconfig) return 0; } -static int is_dup_page(uint8_t *page) +static inline bool is_zero_page(uint8_t *p) { - VECTYPE *p = (VECTYPE *)page; - VECTYPE val = SPLAT(page); - int i; - - for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) { - if (!ALL_EQ(val, p[i])) { - return 0; - } - } - - return 1; + return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) == + TARGET_PAGE_SIZE; } /* struct contains XBZRLE cache and a static page @@ -212,6 +183,7 @@ int64_t xbzrle_cache_resize(int64_t new_size) /* accounting for migration statistics */ typedef struct AccountingInfo { uint64_t dup_pages; + uint64_t skipped_pages; uint64_t norm_pages; uint64_t iterations; uint64_t xbzrle_bytes; @@ -237,6 +209,16 @@ uint64_t dup_mig_pages_transferred(void) return acct_info.dup_pages; } +uint64_t skipped_mig_bytes_transferred(void) +{ + return acct_info.skipped_pages * TARGET_PAGE_SIZE; +} + +uint64_t skipped_mig_pages_transferred(void) +{ + return acct_info.skipped_pages; +} + uint64_t norm_mig_bytes_transferred(void) { return acct_info.norm_pages * TARGET_PAGE_SIZE; @@ -348,6 +330,7 @@ static ram_addr_t last_offset; static unsigned long *migration_bitmap; static uint64_t migration_dirty_pages; static uint32_t last_version; +static bool ram_bulk_stage; static inline ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, @@ -357,7 +340,13 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, unsigned long nr = base + (start >> TARGET_PAGE_BITS); unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS); - unsigned long next = find_next_bit(migration_bitmap, size, nr); + unsigned long next; + + if (ram_bulk_stage && nr > base) { + next = nr + 1; + } else { + next = find_next_bit(migration_bitmap, size, nr); + } if (next < size) { clear_bit(next, migration_bitmap); @@ -455,6 +444,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage) if (!block) { block = QTAILQ_FIRST(&ram_list.blocks); complete_round = true; + ram_bulk_stage = false; } } else { uint8_t *p; @@ -465,13 +455,18 @@ static int ram_save_block(QEMUFile *f, bool last_stage) /* In doubt sent page as normal */ bytes_sent = -1; - if (is_dup_page(p)) { + if (is_zero_page(p)) { acct_info.dup_pages++; - bytes_sent = save_block_hdr(f, block, offset, cont, - RAM_SAVE_FLAG_COMPRESS); - qemu_put_byte(f, *p); - bytes_sent += 1; - } else if (migrate_use_xbzrle()) { + if (!ram_bulk_stage) { + bytes_sent = save_block_hdr(f, block, offset, cont, + RAM_SAVE_FLAG_COMPRESS); + qemu_put_byte(f, 0); + bytes_sent++; + } else { + acct_info.skipped_pages++; + bytes_sent = 0; + } + } else if (!ram_bulk_stage && migrate_use_xbzrle()) { current_addr = block->offset + offset; bytes_sent = save_xbzrle_page(f, p, current_addr, block, offset, cont, last_stage); @@ -483,7 +478,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage) /* XBZRLE overflow or normal page */ if (bytes_sent == -1) { bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); - qemu_put_buffer(f, p, TARGET_PAGE_SIZE); + qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); bytes_sent += TARGET_PAGE_SIZE; acct_info.norm_pages++; } @@ -558,6 +553,7 @@ static void reset_ram_globals(void) last_sent_block = NULL; last_offset = 0; last_version = ram_list.version; + ram_bulk_stage = true; } #define MAX_WAIT 50 /* ms, half buffered_file limit */ @@ -173,6 +173,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->ram->total >> 10); monitor_printf(mon, "duplicate: %" PRIu64 " pages\n", info->ram->duplicate); + monitor_printf(mon, "skipped: %" PRIu64 " pages\n", + info->ram->skipped); monitor_printf(mon, "normal: %" PRIu64 " pages\n", info->ram->normal); monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n", @@ -52,16 +52,22 @@ int qemu_boot_set(const char *boot_devices); #if TARGET_LONG_BITS == 64 #define VMSTATE_UINTTL_V(_f, _s, _v) \ VMSTATE_UINT64_V(_f, _s, _v) +#define VMSTATE_UINTTL_EQUAL_V(_f, _s, _v) \ + VMSTATE_UINT64_EQUAL_V(_f, _s, _v) #define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v) \ VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v) #else #define VMSTATE_UINTTL_V(_f, _s, _v) \ VMSTATE_UINT32_V(_f, _s, _v) +#define VMSTATE_UINTTL_EQUAL_V(_f, _s, _v) \ + VMSTATE_UINT32_EQUAL_V(_f, _s, _v) #define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v) \ VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v) #endif #define VMSTATE_UINTTL(_f, _s) \ VMSTATE_UINTTL_V(_f, _s, 0) +#define VMSTATE_UINTTL_EQUAL(_f, _s) \ + VMSTATE_UINTTL_EQUAL_V(_f, _s, 0) #define VMSTATE_UINTTL_ARRAY(_f, _s, _n) \ VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, 0) diff --git a/include/migration/migration.h b/include/migration/migration.h index bb617fdacf..e2acec64c0 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -96,6 +96,8 @@ extern SaveVMHandlers savevm_ram_handlers; uint64_t dup_mig_bytes_transferred(void); uint64_t dup_mig_pages_transferred(void); +uint64_t skipped_mig_bytes_transferred(void); +uint64_t skipped_mig_pages_transferred(void); uint64_t norm_mig_bytes_transferred(void); uint64_t norm_mig_pages_transferred(void); uint64_t xbzrle_mig_bytes_transferred(void); diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index df812617f8..623c434b15 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -51,11 +51,18 @@ typedef int (QEMUFileCloseFunc)(void *opaque); */ typedef int (QEMUFileGetFD)(void *opaque); +/* + * This function writes an iovec to file. + */ +typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov, + int iovcnt); + typedef struct QEMUFileOps { QEMUFilePutBufferFunc *put_buffer; QEMUFileGetBufferFunc *get_buffer; QEMUFileCloseFunc *close; QEMUFileGetFD *get_fd; + QEMUFileWritevBufferFunc *writev_buffer; } QEMUFileOps; QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops); @@ -68,6 +75,11 @@ int qemu_fclose(QEMUFile *f); int64_t qemu_ftell(QEMUFile *f); void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size); void qemu_put_byte(QEMUFile *f, int v); +/* + * put_buffer without copying the buffer. + * The buffer should be available till it is sent asynchronously. + */ +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size); static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v) { diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 6666d27b25..65918a9abe 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -149,6 +149,7 @@ extern const VMStateInfo vmstate_info_uint8_equal; extern const VMStateInfo vmstate_info_uint16_equal; extern const VMStateInfo vmstate_info_int32_equal; extern const VMStateInfo vmstate_info_uint32_equal; +extern const VMStateInfo vmstate_info_uint64_equal; extern const VMStateInfo vmstate_info_int32_le; extern const VMStateInfo vmstate_info_uint8; @@ -156,6 +157,8 @@ extern const VMStateInfo vmstate_info_uint16; extern const VMStateInfo vmstate_info_uint32; extern const VMStateInfo vmstate_info_uint64; +extern const VMStateInfo vmstate_info_float64; + extern const VMStateInfo vmstate_info_timer; extern const VMStateInfo vmstate_info_buffer; extern const VMStateInfo vmstate_info_unused_buffer; @@ -340,6 +343,16 @@ extern const VMStateInfo vmstate_info_bitmap; .offset = vmstate_offset_pointer(_state, _field, _type), \ } +#define VMSTATE_STRUCT_VARRAY_POINTER_UINT32(_field, _state, _field_num, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = 0, \ + .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\ + .size = sizeof(_type), \ + .vmsd = &(_vmsd), \ + .flags = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + #define VMSTATE_STRUCT_VARRAY_POINTER_UINT16(_field, _state, _field_num, _vmsd, _type) { \ .name = (stringify(_field)), \ .version_id = 0, \ @@ -380,14 +393,14 @@ extern const VMStateInfo vmstate_info_bitmap; .offset = vmstate_offset_buffer(_state, _field) + _start, \ } -#define VMSTATE_BUFFER_MULTIPLY(_field, _state, _version, _test, _start, _field_size, _multiply) { \ +#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, _start, _field_size, _multiply) { \ .name = (stringify(_field)), \ .version_id = (_version), \ .field_exists = (_test), \ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\ .size = (_multiply), \ .info = &vmstate_info_buffer, \ - .flags = VMS_VBUFFER|VMS_MULTIPLY, \ + .flags = VMS_VBUFFER|VMS_POINTER|VMS_MULTIPLY, \ .offset = offsetof(_state, _field), \ .start = (_start), \ } @@ -518,8 +531,17 @@ extern const VMStateInfo vmstate_info_bitmap; #define VMSTATE_INT32_EQUAL(_f, _s) \ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_equal, int32_t) -#define VMSTATE_UINT32_EQUAL(_f, _s) \ - VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint32_equal, uint32_t) +#define VMSTATE_UINT32_EQUAL_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32_equal, uint32_t) + +#define VMSTATE_UINT32_EQUAL(_f, _s) \ + VMSTATE_UINT32_EQUAL_V(_f, _s, 0) + +#define VMSTATE_UINT64_EQUAL_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64_equal, uint64_t) + +#define VMSTATE_UINT64_EQUAL(_f, _s) \ + VMSTATE_UINT64_EQUAL_V(_f, _s, 0) #define VMSTATE_INT32_LE(_f, _s) \ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t) @@ -533,6 +555,13 @@ extern const VMStateInfo vmstate_info_bitmap; #define VMSTATE_UINT32_TEST(_f, _s, _t) \ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint32, uint32_t) + +#define VMSTATE_FLOAT64_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_float64, float64) + +#define VMSTATE_FLOAT64(_f, _s) \ + VMSTATE_FLOAT64_V(_f, _s, 0) + #define VMSTATE_TIMER_TEST(_f, _s, _test) \ VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *) @@ -599,6 +628,12 @@ extern const VMStateInfo vmstate_info_bitmap; #define VMSTATE_INT64_ARRAY(_f, _s, _n) \ VMSTATE_INT64_ARRAY_V(_f, _s, _n, 0) +#define VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_float64, float64) + +#define VMSTATE_FLOAT64_ARRAY(_f, _s, _n) \ + VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, 0) + #define VMSTATE_BUFFER_V(_f, _s, _v) \ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) diff --git a/include/qemu-common.h b/include/qemu-common.h index 2371132c11..31fff22f32 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -448,4 +448,35 @@ int uleb128_decode_small(const uint8_t *in, uint32_t *n); void hexdump(const char *buf, FILE *fp, const char *prefix, size_t size); +/* vector definitions */ +#ifdef __ALTIVEC__ +#include <altivec.h> +#define VECTYPE vector unsigned char +#define SPLAT(p) vec_splat(vec_ld(0, p), 0) +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) +/* altivec.h may redefine the bool macro as vector type. + * Reset it to POSIX semantics. */ +#undef bool +#define bool _Bool +#elif defined __SSE2__ +#include <emmintrin.h> +#define VECTYPE __m128i +#define SPLAT(p) _mm_set1_epi8(*(p)) +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) +#else +#define VECTYPE unsigned long +#define SPLAT(p) (*(p) * (~0UL / 255)) +#define ALL_EQ(v1, v2) ((v1) == (v2)) +#endif + +#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8 +static inline bool +can_use_buffer_find_nonzero_offset(const void *buf, size_t len) +{ + return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR + * sizeof(VECTYPE)) == 0 + && ((uintptr_t) buf) % sizeof(VECTYPE) == 0); +} +size_t buffer_find_nonzero_offset(const void *buf, size_t len); + #endif diff --git a/migration.c b/migration.c index 185d11260d..7fb2147391 100644 --- a/migration.c +++ b/migration.c @@ -197,11 +197,11 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->ram->remaining = ram_bytes_remaining(); info->ram->total = ram_bytes_total(); info->ram->duplicate = dup_mig_pages_transferred(); + info->ram->skipped = skipped_mig_pages_transferred(); info->ram->normal = norm_mig_pages_transferred(); info->ram->normal_bytes = norm_mig_bytes_transferred(); info->ram->dirty_pages_rate = s->dirty_pages_rate; - if (blk_mig_active()) { info->has_disk = true; info->disk = g_malloc0(sizeof(*info->disk)); @@ -227,6 +227,7 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->ram->remaining = 0; info->ram->total = ram_bytes_total(); info->ram->duplicate = dup_mig_pages_transferred(); + info->ram->skipped = skipped_mig_pages_transferred(); info->ram->normal = norm_mig_pages_transferred(); info->ram->normal_bytes = norm_mig_bytes_transferred(); break; diff --git a/qapi-schema.json b/qapi-schema.json index af499bd926..6494787714 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -496,7 +496,9 @@ # # @total: total amount of bytes involved in the migration process # -# @duplicate: number of duplicate pages (since 1.2) +# @duplicate: number of duplicate (zero) pages (since 1.2) +# +# @skipped: number of skipped zero pages (since 1.5) # # @normal : number of normal pages (since 1.2) # @@ -509,8 +511,8 @@ ## { 'type': 'MigrationStats', 'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' , - 'duplicate': 'int', 'normal': 'int', 'normal-bytes': 'int', - 'dirty-pages-rate' : 'int' } } + 'duplicate': 'int', 'skipped': 'int', 'normal': 'int', + 'normal-bytes': 'int', 'dirty-pages-rate' : 'int' } } ## # @XBZRLECacheStats diff --git a/qmp-commands.hx b/qmp-commands.hx index 2051fcb49c..1e0e11ee32 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -666,7 +666,7 @@ EQMP SQMP migrate-set-cache-size ---------------------- +---------------------- Set cache size to be used by XBZRLE migration, the cache size will be rounded down to the nearest power of 2 @@ -689,7 +689,7 @@ EQMP SQMP query-migrate-cache-size ---------------------- +------------------------ Show cache size to be used by XBZRLE migration @@ -2453,32 +2453,43 @@ The main json-object contains the following: - Possible values: "active", "completed", "failed", "cancelled" - "total-time": total amount of ms since migration started. If migration has ended, it returns the total migration - time (json-int) + time (json-int) - "downtime": only present when migration has finished correctly total amount in ms for downtime that happened (json-int) - "expected-downtime": only present while migration is active total amount in ms for downtime that was calculated on - the last bitmap round (json-int) + the last bitmap round (json-int) - "ram": only present if "status" is "active", it is a json-object with the - following RAM information (in bytes): - - "transferred": amount transferred (json-int) - - "remaining": amount remaining (json-int) - - "total": total (json-int) - - "duplicate": number of duplicated pages (json-int) - - "normal" : number of normal pages transferred (json-int) - - "normal-bytes" : number of normal bytes transferred (json-int) + following RAM information: + - "transferred": amount transferred in bytes (json-int) + - "remaining": amount remaining to transfer in bytes (json-int) + - "total": total amount of memory in bytes (json-int) + - "duplicate": number of pages filled entirely with the same + byte (json-int) + These are sent over the wire much more efficiently. + - "skipped": number of skipped zero pages (json-int) + - "normal" : number of whole pages transfered. I.e. they + were not sent as duplicate or xbzrle pages (json-int) + - "normal-bytes" : number of bytes transferred in whole + pages. This is just normal pages times size of one page, + but this way upper levels don't need to care about page + size (json-int) - "disk": only present if "status" is "active" and it is a block migration, - it is a json-object with the following disk information (in bytes): - - "transferred": amount transferred (json-int) - - "remaining": amount remaining (json-int) - - "total": total (json-int) + it is a json-object with the following disk information: + - "transferred": amount transferred in bytes (json-int) + - "remaining": amount remaining to transfer in bytes json-int) + - "total": total disk size in bytes (json-int) - "xbzrle-cache": only present if XBZRLE is active. It is a json-object with the following XBZRLE information: - - "cache-size": XBZRLE cache size - - "bytes": total XBZRLE bytes transferred + - "cache-size": XBZRLE cache size in bytes + - "bytes": number of bytes transferred for XBZRLE compressed pages - "pages": number of XBZRLE compressed pages - - "cache-miss": number of cache misses - - "overflow": number of XBZRLE overflows + - "cache-miss": number of XBRZRLE page cache misses + - "overflow": number of times XBZRLE overflows. This means + that the XBZRLE encoding was bigger than just sent the + whole page, and then we sent the whole page instead (as as + normal page). + Examples: 1. Before the first migration @@ -2589,11 +2600,11 @@ EQMP SQMP migrate-set-capabilities -------- +------------------------ Enable/Disable migration capabilities -- "xbzrle": xbzrle support +- "xbzrle": XBZRLE support Arguments: @@ -2612,7 +2623,7 @@ EQMP }, SQMP query-migrate-capabilities -------- +-------------------------- Query current migration capabilities @@ -39,6 +39,7 @@ #include "qmp-commands.h" #include "trace.h" #include "qemu/bitops.h" +#include "qemu/iov.h" #define SELF_ANNOUNCE_ROUNDS 5 @@ -113,6 +114,7 @@ void qemu_announce_self(void) /* savevm/loadvm support */ #define IO_BUF_SIZE 32768 +#define MAX_IOV_SIZE MIN(IOV_MAX, 64) struct QEMUFile { const QEMUFileOps *ops; @@ -128,6 +130,9 @@ struct QEMUFile { int buf_size; /* 0 when writing */ uint8_t buf[IO_BUF_SIZE]; + struct iovec iov[MAX_IOV_SIZE]; + unsigned int iovcnt; + int last_error; }; @@ -171,6 +176,19 @@ static void coroutine_fn yield_until_fd_readable(int fd) qemu_coroutine_yield(); } +static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt) +{ + QEMUFileSocket *s = opaque; + ssize_t len; + ssize_t size = iov_size(iov, iovcnt); + + len = iov_send(s->fd, iov, iovcnt, 0, size); + if (len < size) { + len = -socket_error(); + } + return len; +} + static int socket_get_fd(void *opaque) { QEMUFileSocket *s = opaque; @@ -275,7 +293,7 @@ static int stdio_fclose(void *opaque) QEMUFileStdio *s = opaque; int ret = 0; - if (s->file->ops->put_buffer) { + if (s->file->ops->put_buffer || s->file->ops->writev_buffer) { int fd = fileno(s->stdio_file); struct stat st; @@ -387,6 +405,7 @@ static const QEMUFileOps socket_read_ops = { static const QEMUFileOps socket_write_ops = { .get_fd = socket_get_fd, .put_buffer = socket_put_buffer, + .writev_buffer = socket_writev_buffer, .close = socket_close }; @@ -497,22 +516,38 @@ static void qemu_file_set_error(QEMUFile *f, int ret) } } -/** Flushes QEMUFile buffer +/** + * Flushes QEMUFile buffer * + * If there is writev_buffer QEMUFileOps it uses it otherwise uses + * put_buffer ops. */ static void qemu_fflush(QEMUFile *f) { - int ret = 0; + ssize_t ret = 0; + int i = 0; - if (!f->ops->put_buffer) { + if (!f->ops->writev_buffer && !f->ops->put_buffer) { return; } - if (f->is_write && f->buf_index > 0) { - ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index); - if (ret >= 0) { - f->pos += f->buf_index; + + if (f->is_write && f->iovcnt > 0) { + if (f->ops->writev_buffer) { + ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt); + if (ret >= 0) { + f->pos += ret; + } + } else { + for (i = 0; i < f->iovcnt && ret >= 0; i++) { + ret = f->ops->put_buffer(f->opaque, f->iov[i].iov_base, f->pos, + f->iov[i].iov_len); + if (ret >= 0) { + f->pos += ret; + } + } } f->buf_index = 0; + f->iovcnt = 0; } if (ret < 0) { qemu_file_set_error(f, ret); @@ -586,6 +621,40 @@ int qemu_fclose(QEMUFile *f) return ret; } +static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size) +{ + /* check for adjacent buffer and coalesce them */ + if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base + + f->iov[f->iovcnt - 1].iov_len) { + f->iov[f->iovcnt - 1].iov_len += size; + } else { + f->iov[f->iovcnt].iov_base = (uint8_t *)buf; + f->iov[f->iovcnt++].iov_len = size; + } +} + +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size) +{ + if (f->last_error) { + return; + } + + if (f->is_write == 0 && f->buf_index > 0) { + fprintf(stderr, + "Attempted to write to buffer while read buffer is not empty\n"); + abort(); + } + + add_to_iovec(f, buf, size); + + f->is_write = 1; + f->bytes_xfer += size; + + if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) { + qemu_fflush(f); + } +} + void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size) { int l; @@ -607,15 +676,12 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size) memcpy(f->buf + f->buf_index, buf, l); f->is_write = 1; f->buf_index += l; - f->bytes_xfer += l; + qemu_put_buffer_async(f, f->buf + (f->buf_index - l), l); + if (qemu_file_get_error(f)) { + break; + } buf += l; size -= l; - if (f->buf_index >= IO_BUF_SIZE) { - qemu_fflush(f); - if (qemu_file_get_error(f)) { - break; - } - } } } @@ -633,7 +699,11 @@ void qemu_put_byte(QEMUFile *f, int v) f->buf[f->buf_index++] = v; f->is_write = 1; - if (f->buf_index >= IO_BUF_SIZE) { + f->bytes_xfer++; + + add_to_iovec(f, f->buf + (f->buf_index - 1), 1); + + if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) { qemu_fflush(f); } } @@ -1072,6 +1142,27 @@ const VMStateInfo vmstate_info_uint64 = { .put = put_uint64, }; +/* 64 bit unsigned int. See that the received value is the same than the one + in the field */ + +static int get_uint64_equal(QEMUFile *f, void *pv, size_t size) +{ + uint64_t *v = pv; + uint64_t v2; + qemu_get_be64s(f, &v2); + + if (*v == v2) { + return 0; + } + return -EINVAL; +} + +const VMStateInfo vmstate_info_uint64_equal = { + .name = "int64 equal", + .get = get_uint64_equal, + .put = put_uint64, +}; + /* 8 bit int. See that the received value is the same than the one in the field */ @@ -1112,6 +1203,29 @@ const VMStateInfo vmstate_info_uint16_equal = { .put = put_uint16, }; +/* floating point */ + +static int get_float64(QEMUFile *f, void *pv, size_t size) +{ + float64 *v = pv; + + *v = make_float64(qemu_get_be64(f)); + return 0; +} + +static void put_float64(QEMUFile *f, void *pv, size_t size) +{ + uint64_t *v = pv; + + qemu_put_be64(f, float64_val(*v)); +} + +const VMStateInfo vmstate_info_float64 = { + .name = "float64", + .get = get_float64, + .put = put_float64, +}; + /* timers */ static int get_timer(QEMUFile *f, void *pv, size_t size) diff --git a/util/bitops.c b/util/bitops.c index e72237ab2b..227c38b883 100644 --- a/util/bitops.c +++ b/util/bitops.c @@ -42,7 +42,23 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, size -= BITS_PER_LONG; result += BITS_PER_LONG; } - while (size & ~(BITS_PER_LONG-1)) { + while (size >= 4*BITS_PER_LONG) { + unsigned long d1, d2, d3; + tmp = *p; + d1 = *(p+1); + d2 = *(p+2); + d3 = *(p+3); + if (tmp) { + goto found_middle; + } + if (d1 | d2 | d3) { + break; + } + p += 4; + result += 4*BITS_PER_LONG; + size -= 4*BITS_PER_LONG; + } + while (size >= BITS_PER_LONG) { if ((tmp = *(p++))) { goto found_middle; } diff --git a/util/cutils.c b/util/cutils.c index 1439da4f99..5024253405 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -143,6 +143,61 @@ int qemu_fdatasync(int fd) } /* + * Searches for an area with non-zero content in a buffer + * + * Attention! The len must be a multiple of + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) + * and addr must be a multiple of sizeof(VECTYPE) due to + * restriction of optimizations in this function. + * + * can_use_buffer_find_nonzero_offset() can be used to check + * these requirements. + * + * The return value is the offset of the non-zero area rounded + * down to a multiple of sizeof(VECTYPE) for the first + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) + * afterwards. + * + * If the buffer is all zero the return value is equal to len. + */ + +size_t buffer_find_nonzero_offset(const void *buf, size_t len) +{ + const VECTYPE *p = buf; + const VECTYPE zero = (VECTYPE){0}; + size_t i; + + assert(can_use_buffer_find_nonzero_offset(buf, len)); + + if (!len) { + return 0; + } + + for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { + if (!ALL_EQ(p[i], zero)) { + return i * sizeof(VECTYPE); + } + } + + for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; + i < len / sizeof(VECTYPE); + i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { + VECTYPE tmp0 = p[i + 0] | p[i + 1]; + VECTYPE tmp1 = p[i + 2] | p[i + 3]; + VECTYPE tmp2 = p[i + 4] | p[i + 5]; + VECTYPE tmp3 = p[i + 6] | p[i + 7]; + VECTYPE tmp01 = tmp0 | tmp1; + VECTYPE tmp23 = tmp2 | tmp3; + if (!ALL_EQ(tmp01 | tmp23, zero)) { + break; + } + } + + return i * sizeof(VECTYPE); +} + +/* * Checks if a buffer is all zeroes * * Attention! The len must be a multiple of 4 * sizeof(long) due to @@ -160,6 +215,11 @@ bool buffer_is_zero(const void *buf, size_t len) long d0, d1, d2, d3; const long * const data = buf; + /* use vector optimized zero check if possible */ + if (can_use_buffer_find_nonzero_offset(buf, len)) { + return buffer_find_nonzero_offset(buf, len) == len; + } + assert(len % (4 * sizeof(long)) == 0); len /= sizeof(long); |