diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-01-16 10:16:14 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-01-16 10:16:14 +0000 |
commit | e68cba36360a2ab5bf0576b66df4d0eb0d822f8d (patch) | |
tree | 933ecff5cc8e380fe38693dc0cca2d7b8e479dfc | |
parent | df58887b20fab8fe8a6dcca4db30cd4e4077d53a (diff) | |
parent | ea987c2c21d4326bb58ee28f6888fdcf8fbda067 (diff) |
Merge remote-tracking branch 'remotes/amit-migration/tags/mig-2.3-1' into staging
A set of patches collected over the holidays. Mix of optimizations and
fixes.
# gpg: Signature made Fri 16 Jan 2015 07:42:00 GMT using RSA key ID 854083B6
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg: aka "Amit Shah <amit@kernel.org>"
# gpg: aka "Amit Shah <amitshah@gmx.net>"
* remotes/amit-migration/tags/mig-2.3-1:
vmstate: type-check sub-arrays
migration_cancel: shutdown migration socket
Handle bi-directional communication for fd migration
socket shutdown
Tests: QEMUSizedBuffer/QEMUBuffer
QEMUSizedBuffer: only free qsb that qemu_bufopen allocated
xbzrle: rebuild the cache_is_cached function
xbzrle: optimize XBZRLE to decrease the cache misses
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | arch_init.c | 8 | ||||
-rw-r--r-- | docs/xbzrle.txt | 8 | ||||
-rw-r--r-- | include/migration/page_cache.h | 10 | ||||
-rw-r--r-- | include/migration/qemu-file.h | 10 | ||||
-rw-r--r-- | include/migration/vmstate.h | 2 | ||||
-rw-r--r-- | include/qemu/sockets.h | 7 | ||||
-rw-r--r-- | migration/fd.c | 24 | ||||
-rw-r--r-- | migration/migration.c | 12 | ||||
-rw-r--r-- | migration/qemu-file-buf.c | 10 | ||||
-rw-r--r-- | migration/qemu-file-unix.c | 23 | ||||
-rw-r--r-- | migration/qemu-file.c | 12 | ||||
-rw-r--r-- | page_cache.c | 43 | ||||
-rw-r--r-- | tests/test-vmstate.c | 20 |
13 files changed, 143 insertions, 46 deletions
diff --git a/arch_init.c b/arch_init.c index cfedbf08af..89c8fa46bb 100644 --- a/arch_init.c +++ b/arch_init.c @@ -346,7 +346,8 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr) /* We don't care if this fails to allocate a new cache page * as long as it updated an old one */ - cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE); + cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE, + bitmap_sync_count); } #define ENCODING_FLAG_XBZRLE 0x1 @@ -358,10 +359,11 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, int encoded_len = 0, bytes_sent = -1; uint8_t *prev_cached_page; - if (!cache_is_cached(XBZRLE.cache, current_addr)) { + if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { acct_info.xbzrle_cache_miss++; if (!last_stage) { - if (cache_insert(XBZRLE.cache, current_addr, *current_data) == -1) { + if (cache_insert(XBZRLE.cache, current_addr, *current_data, + bitmap_sync_count) == -1) { return -1; } else { /* update *current_data when the page has been diff --git a/docs/xbzrle.txt b/docs/xbzrle.txt index cc3a26a91d..52c8511a4c 100644 --- a/docs/xbzrle.txt +++ b/docs/xbzrle.txt @@ -71,6 +71,14 @@ encoded buffer: encoded length 24 e9 07 0f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 03 01 67 01 01 69 +Cache update strategy +===================== +Keeping the hot pages in the cache is effective for decreased cache +misses. XBZRLE uses a counter as the age of each page. The counter will +increase after each ram dirty bitmap sync. When a cache conflict is +detected, XBZRLE will only evict pages in the cache that are older than +a threshold. + Usage ====================== 1. Verify the destination QEMU version is able to decode the new format. diff --git a/include/migration/page_cache.h b/include/migration/page_cache.h index 2d5ce2dd7a..10ed53274c 100644 --- a/include/migration/page_cache.h +++ b/include/migration/page_cache.h @@ -43,8 +43,10 @@ void cache_fini(PageCache *cache); * * @cache pointer to the PageCache struct * @addr: page addr + * @current_age: current bitmap generation */ -bool cache_is_cached(const PageCache *cache, uint64_t addr); +bool cache_is_cached(const PageCache *cache, uint64_t addr, + uint64_t current_age); /** * get_cached_data: Get the data cached for an addr @@ -60,13 +62,15 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr); * cache_insert: insert the page into the cache. the page cache * will dup the data on insert. the previous value will be overwritten * - * Returns -1 on error + * Returns -1 when the page isn't inserted into cache * * @cache pointer to the PageCache struct * @addr: page address * @pdata: pointer to the page + * @current_age: current bitmap generation */ -int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata); +int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata, + uint64_t current_age); /** * cache_resize: resize the page cache. In case of size reduction the extra diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 401676bf4d..d843c0010c 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -84,6 +84,14 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque, size_t size, int *bytes_sent); +/* + * Stop any read or write (depending on flags) on the underlying + * transport on the QEMUFile. + * Existing blocking reads/writes must be woken + * Returns 0 on success, -err on error + */ +typedef int (QEMUFileShutdownFunc)(void *opaque, bool rd, bool wr); + typedef struct QEMUFileOps { QEMUFilePutBufferFunc *put_buffer; QEMUFileGetBufferFunc *get_buffer; @@ -94,6 +102,7 @@ typedef struct QEMUFileOps { QEMURamHookFunc *after_ram_iterate; QEMURamHookFunc *hook_ram_load; QEMURamSaveFunc *save_page; + QEMUFileShutdownFunc *shut_down; } QEMUFileOps; struct QEMUSizedBuffer { @@ -177,6 +186,7 @@ void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); int64_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_get_error(QEMUFile *f); void qemu_file_set_error(QEMUFile *f, int ret); +int qemu_file_shutdown(QEMUFile *f); void qemu_fflush(QEMUFile *f); static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index e45fc49cb1..d712a651ca 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -189,7 +189,7 @@ extern const VMStateInfo vmstate_info_bitmap; type_check_2darray(_type, typeof_field(_state, _field), _n1, _n2)) #define vmstate_offset_sub_array(_state, _field, _type, _start) \ - (offsetof(_state, _field[_start])) + vmstate_offset_value(_state, _field[_start], _type) #define vmstate_offset_buffer(_state, _field) \ vmstate_offset_array(_state, _field, uint8_t, \ diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index f47dae614a..7992ece72a 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -44,6 +44,13 @@ int socket_set_fast_reuse(int fd); int send_all(int fd, const void *buf, int len1); int recv_all(int fd, void *buf, int len1, bool single_read); +#ifdef WIN32 +/* Windows has different names for the same constants with the same values */ +#define SHUT_RD 0 +#define SHUT_WR 1 +#define SHUT_RDWR 2 +#endif + /* callback function for nonblocking connect * valid fd on success, negative error code on failure */ diff --git a/migration/fd.c b/migration/fd.c index d2e523af74..129da9910b 100644 --- a/migration/fd.c +++ b/migration/fd.c @@ -31,13 +31,29 @@ do { } while (0) #endif +static bool fd_is_socket(int fd) +{ + struct stat stat; + int ret = fstat(fd, &stat); + if (ret == -1) { + /* When in doubt say no */ + return false; + } + return S_ISSOCK(stat.st_mode); +} + void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp) { int fd = monitor_get_fd(cur_mon, fdname, errp); if (fd == -1) { return; } - s->file = qemu_fdopen(fd, "wb"); + + if (fd_is_socket(fd)) { + s->file = qemu_fopen_socket(fd, "wb"); + } else { + s->file = qemu_fdopen(fd, "wb"); + } migrate_fd_connect(s); } @@ -58,7 +74,11 @@ void fd_start_incoming_migration(const char *infd, Error **errp) DPRINTF("Attempting to start an incoming migration via fd\n"); fd = strtol(infd, NULL, 0); - f = qemu_fdopen(fd, "rb"); + if (fd_is_socket(fd)) { + f = qemu_fopen_socket(fd, "rb"); + } else { + f = qemu_fdopen(fd, "rb"); + } if(f == NULL) { error_setg_errno(errp, errno, "failed to open the source descriptor"); return; diff --git a/migration/migration.c b/migration/migration.c index c49a05a165..b3adbc653a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -330,6 +330,7 @@ void migrate_fd_error(MigrationState *s) static void migrate_fd_cancel(MigrationState *s) { int old_state ; + QEMUFile *f = migrate_get_current()->file; trace_migrate_fd_cancel(); do { @@ -339,6 +340,17 @@ static void migrate_fd_cancel(MigrationState *s) } migrate_set_state(s, old_state, MIG_STATE_CANCELLING); } while (s->state != MIG_STATE_CANCELLING); + + /* + * If we're unlucky the migration code might be stuck somewhere in a + * send/write while the network has failed and is waiting to timeout; + * if we've got shutdown(2) available then we can force it to quit. + * The outgoing qemu file gets closed in migrate_fd_cleanup that is + * called in a bh, so there is no race against this cancel. + */ + if (s->state == MIG_STATE_CANCELLING && f) { + qemu_file_shutdown(f); + } } void add_migration_state_change_notifier(Notifier *notify) diff --git a/migration/qemu-file-buf.c b/migration/qemu-file-buf.c index d33dd44747..e97e0bd655 100644 --- a/migration/qemu-file-buf.c +++ b/migration/qemu-file-buf.c @@ -395,6 +395,7 @@ QEMUSizedBuffer *qsb_clone(const QEMUSizedBuffer *qsb) typedef struct QEMUBuffer { QEMUSizedBuffer *qsb; QEMUFile *file; + bool qsb_allocated; } QEMUBuffer; static int buf_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) @@ -424,7 +425,9 @@ static int buf_close(void *opaque) { QEMUBuffer *s = opaque; - qsb_free(s->qsb); + if (s->qsb_allocated) { + qsb_free(s->qsb); + } g_free(s); @@ -463,12 +466,11 @@ QEMUFile *qemu_bufopen(const char *mode, QEMUSizedBuffer *input) } s = g_malloc0(sizeof(QEMUBuffer)); - if (mode[0] == 'r') { - s->qsb = input; - } + s->qsb = input; if (s->qsb == NULL) { s->qsb = qsb_create(NULL, 0); + s->qsb_allocated = true; } if (!s->qsb) { g_free(s); diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c index 9682396d97..bfbc0861ab 100644 --- a/migration/qemu-file-unix.c +++ b/migration/qemu-file-unix.c @@ -26,6 +26,7 @@ #include "qemu/sockets.h" #include "block/coroutine.h" #include "migration/qemu-file.h" +#include "migration/qemu-file-internal.h" typedef struct QEMUFileSocket { int fd; @@ -84,6 +85,17 @@ static int socket_close(void *opaque) return 0; } +static int socket_shutdown(void *opaque, bool rd, bool wr) +{ + QEMUFileSocket *s = opaque; + + if (shutdown(s->fd, rd ? (wr ? SHUT_RDWR : SHUT_RD) : SHUT_WR)) { + return -errno; + } else { + return 0; + } +} + static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, int64_t pos) { @@ -192,15 +204,18 @@ QEMUFile *qemu_fdopen(int fd, const char *mode) } static const QEMUFileOps socket_read_ops = { - .get_fd = socket_get_fd, + .get_fd = socket_get_fd, .get_buffer = socket_get_buffer, - .close = socket_close + .close = socket_close, + .shut_down = socket_shutdown + }; static const QEMUFileOps socket_write_ops = { - .get_fd = socket_get_fd, + .get_fd = socket_get_fd, .writev_buffer = socket_writev_buffer, - .close = socket_close + .close = socket_close, + .shut_down = socket_shutdown }; QEMUFile *qemu_fopen_socket(int fd, const char *mode) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index a7f2a34430..edc283073a 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -30,6 +30,18 @@ #include "migration/qemu-file-internal.h" #include "trace.h" +/* + * Stop a file from being read/written - not all backing files can do this + * typically only sockets can. + */ +int qemu_file_shutdown(QEMUFile *f) +{ + if (!f->ops->shut_down) { + return -ENOSYS; + } + return f->ops->shut_down(f->opaque, true, true); +} + bool qemu_file_mode_is_not_valid(const char *mode) { if (mode == NULL || diff --git a/page_cache.c b/page_cache.c index 89bb1ec3a0..cf8878d1d7 100644 --- a/page_cache.c +++ b/page_cache.c @@ -33,6 +33,9 @@ do { } while (0) #endif +/* the page in cache will not be replaced in two cycles */ +#define CACHED_PAGE_LIFETIME 2 + typedef struct CacheItem CacheItem; struct CacheItem { @@ -122,18 +125,6 @@ static size_t cache_get_cache_pos(const PageCache *cache, return pos; } -bool cache_is_cached(const PageCache *cache, uint64_t addr) -{ - size_t pos; - - g_assert(cache); - g_assert(cache->page_cache); - - pos = cache_get_cache_pos(cache, addr); - - return (cache->page_cache[pos].it_addr == addr); -} - static CacheItem *cache_get_by_addr(const PageCache *cache, uint64_t addr) { size_t pos; @@ -151,17 +142,35 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr) return cache_get_by_addr(cache, addr)->it_data; } -int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata) +bool cache_is_cached(const PageCache *cache, uint64_t addr, + uint64_t current_age) { + CacheItem *it; - CacheItem *it = NULL; + it = cache_get_by_addr(cache, addr); - g_assert(cache); - g_assert(cache->page_cache); + if (it->it_addr == addr) { + /* update the it_age when the cache hit */ + it->it_age = current_age; + return true; + } + return false; +} + +int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata, + uint64_t current_age) +{ + + CacheItem *it; /* actual update of entry */ it = cache_get_by_addr(cache, addr); + if (it->it_data && it->it_addr != addr && + it->it_age + CACHED_PAGE_LIFETIME > current_age) { + /* the cache page is fresh, don't replace it */ + return -1; + } /* allocate page */ if (!it->it_data) { it->it_data = g_try_malloc(cache->page_size); @@ -174,7 +183,7 @@ int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata) memcpy(it->it_data, pdata, cache->page_size); - it->it_age = ++cache->max_item_age; + it->it_age = current_age; it->it_addr = addr; return 0; diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c index 5e0fd13cc4..39b7b01734 100644 --- a/tests/test-vmstate.c +++ b/tests/test-vmstate.c @@ -60,16 +60,6 @@ static QEMUFile *open_test_file(bool write) return qemu_fdopen(fd, write ? "wb" : "rb"); } -/* Open a read-only qemu-file from an existing memory block */ -static QEMUFile *open_mem_file_read(const void *data, size_t len) -{ - /* The qsb gets freed by qemu_fclose */ - QEMUSizedBuffer *qsb = qsb_create(data, len); - g_assert(qsb); - - return qemu_bufopen("r", qsb); -} - /* * Check that the contents of the memory-buffered file f match * the given size/data. @@ -450,7 +440,9 @@ static void test_load_noskip(void) QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */ }; - QEMUFile *loading = open_mem_file_read(buf, sizeof(buf)); + QEMUSizedBuffer *qsb = qsb_create(buf, sizeof(buf)); + g_assert(qsb); + QEMUFile *loading = qemu_bufopen("r", qsb); TestStruct obj = { .skip_c_e = false }; vmstate_load_state(loading, &vmstate_skipping, &obj, 2); g_assert(!qemu_file_get_error(loading)); @@ -461,6 +453,7 @@ static void test_load_noskip(void) g_assert_cmpint(obj.e, ==, 50); g_assert_cmpint(obj.f, ==, 60); qemu_fclose(loading); + qsb_free(qsb); } static void test_load_skip(void) @@ -473,7 +466,9 @@ static void test_load_skip(void) QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */ }; - QEMUFile *loading = open_mem_file_read(buf, sizeof(buf)); + QEMUSizedBuffer *qsb = qsb_create(buf, sizeof(buf)); + g_assert(qsb); + QEMUFile *loading = qemu_bufopen("r", qsb); TestStruct obj = { .skip_c_e = true, .c = 300, .e = 500 }; vmstate_load_state(loading, &vmstate_skipping, &obj, 2); g_assert(!qemu_file_get_error(loading)); @@ -484,6 +479,7 @@ static void test_load_skip(void) g_assert_cmpint(obj.e, ==, 500); g_assert_cmpint(obj.f, ==, 60); qemu_fclose(loading); + qsb_free(qsb); } int main(int argc, char **argv) |