aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/migration/migration.h1
-rw-r--r--include/migration/qemu-file.h3
-rw-r--r--migration/migration.c9
-rw-r--r--migration/qemu-file.c59
-rw-r--r--migration/ram.c22
-rw-r--r--qapi-schema.json5
6 files changed, 89 insertions, 10 deletions
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 7528cc2fbc..b9b706a7e3 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -304,6 +304,7 @@ int migrate_add_blocker(Error *reason, Error **errp);
*/
void migrate_del_blocker(Error *reason);
+bool migrate_release_ram(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index abedd466c9..0cd648a733 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -132,7 +132,8 @@ void qemu_put_byte(QEMUFile *f, int v);
* put_buffer without copying the buffer.
* The buffer should be available till it is sent asynchronously.
*/
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size);
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
+ bool may_free);
bool qemu_file_mode_is_not_valid(const char *mode);
bool qemu_file_is_writable(QEMUFile *f);
diff --git a/migration/migration.c b/migration/migration.c
index 2b179c69fa..68afc07016 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1297,6 +1297,15 @@ void qmp_migrate_set_downtime(double value, Error **errp)
qmp_migrate_set_parameters(&p, errp);
}
+bool migrate_release_ram(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
+}
+
bool migrate_postcopy_ram(void)
{
MigrationState *s;
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index e9fae31158..195fa94fcf 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -49,6 +49,7 @@ struct QEMUFile {
int buf_size; /* 0 when writing */
uint8_t buf[IO_BUF_SIZE];
+ DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
unsigned int iovcnt;
@@ -132,6 +133,41 @@ bool qemu_file_is_writable(QEMUFile *f)
return f->ops->writev_buffer;
}
+static void qemu_iovec_release_ram(QEMUFile *f)
+{
+ struct iovec iov;
+ unsigned long idx;
+
+ /* Find and release all the contiguous memory ranges marked as may_free. */
+ idx = find_next_bit(f->may_free, f->iovcnt, 0);
+ if (idx >= f->iovcnt) {
+ return;
+ }
+ iov = f->iov[idx];
+
+ /* The madvise() in the loop is called for iov within a continuous range and
+ * then reinitialize the iov. And in the end, madvise() is called for the
+ * last iov.
+ */
+ while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) {
+ /* check for adjacent buffer and coalesce them */
+ if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) {
+ iov.iov_len += f->iov[idx].iov_len;
+ continue;
+ }
+ if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
+ error_report("migrate: madvise DONTNEED failed %p %zd: %s",
+ iov.iov_base, iov.iov_len, strerror(errno));
+ }
+ iov = f->iov[idx];
+ }
+ if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
+ error_report("migrate: madvise DONTNEED failed %p %zd: %s",
+ iov.iov_base, iov.iov_len, strerror(errno));
+ }
+ memset(f->may_free, 0, sizeof(f->may_free));
+}
+
/**
* Flushes QEMUFile buffer
*
@@ -151,6 +187,8 @@ void qemu_fflush(QEMUFile *f)
if (f->iovcnt > 0) {
expect = iov_size(f->iov, f->iovcnt);
ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
+
+ qemu_iovec_release_ram(f);
}
if (ret >= 0) {
@@ -304,13 +342,19 @@ int qemu_fclose(QEMUFile *f)
return ret;
}
-static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
+static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size,
+ bool may_free)
{
/* check for adjacent buffer and coalesce them */
if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
- f->iov[f->iovcnt - 1].iov_len) {
+ f->iov[f->iovcnt - 1].iov_len &&
+ may_free == test_bit(f->iovcnt - 1, f->may_free))
+ {
f->iov[f->iovcnt - 1].iov_len += size;
} else {
+ if (may_free) {
+ set_bit(f->iovcnt, f->may_free);
+ }
f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
f->iov[f->iovcnt++].iov_len = size;
}
@@ -320,14 +364,15 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
}
}
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size)
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
+ bool may_free)
{
if (f->last_error) {
return;
}
f->bytes_xfer += size;
- add_to_iovec(f, buf, size);
+ add_to_iovec(f, buf, size, may_free);
}
void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
@@ -345,7 +390,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
memcpy(f->buf + f->buf_index, buf, l);
f->bytes_xfer += l;
- add_to_iovec(f, f->buf + f->buf_index, l);
+ add_to_iovec(f, f->buf + f->buf_index, l, false);
f->buf_index += l;
if (f->buf_index == IO_BUF_SIZE) {
qemu_fflush(f);
@@ -366,7 +411,7 @@ void qemu_put_byte(QEMUFile *f, int v)
f->buf[f->buf_index] = v;
f->bytes_xfer++;
- add_to_iovec(f, f->buf + f->buf_index, 1);
+ add_to_iovec(f, f->buf + f->buf_index, 1, false);
f->buf_index++;
if (f->buf_index == IO_BUF_SIZE) {
qemu_fflush(f);
@@ -647,7 +692,7 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
}
qemu_put_be32(f, blen);
if (f->ops->writev_buffer) {
- add_to_iovec(f, f->buf + f->buf_index, blen);
+ add_to_iovec(f, f->buf + f->buf_index, blen, false);
}
f->buf_index += blen;
if (f->buf_index == IO_BUF_SIZE) {
diff --git a/migration/ram.c b/migration/ram.c
index 91443b3961..c22209db30 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -705,6 +705,16 @@ static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
return pages;
}
+static void ram_release_pages(MigrationState *ms, const char *block_name,
+ uint64_t offset, int pages)
+{
+ if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
+ return;
+ }
+
+ ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
+}
+
/**
* ram_save_page: Send the given page to the stream
*
@@ -765,6 +775,7 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
* page would be stale
*/
xbzrle_cache_zero_page(current_addr);
+ ram_release_pages(ms, block->idstr, pss->offset, pages);
} else if (!ram_bulk_stage &&
!migration_in_postcopy(ms) && migrate_use_xbzrle()) {
pages = save_xbzrle_page(f, &p, current_addr, block,
@@ -783,7 +794,9 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
*bytes_transferred += save_page_header(f, block,
offset | RAM_SAVE_FLAG_PAGE);
if (send_async) {
- qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+ qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
+ migrate_release_ram() &
+ migration_in_postcopy(ms));
} else {
qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
}
@@ -813,6 +826,8 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
error_report("compressed data failed!");
} else {
bytes_sent += blen;
+ ram_release_pages(migrate_get_current(), block->idstr,
+ offset & TARGET_PAGE_MASK, 1);
}
return bytes_sent;
@@ -952,12 +967,17 @@ static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
error_report("compressed data failed!");
}
}
+ if (pages > 0) {
+ ram_release_pages(ms, block->idstr, pss->offset, pages);
+ }
} else {
offset |= RAM_SAVE_FLAG_CONTINUE;
pages = save_zero_page(f, block, offset, p, bytes_transferred);
if (pages == -1) {
pages = compress_page_with_multi_thread(f, block, offset,
bytes_transferred);
+ } else {
+ ram_release_pages(ms, block->idstr, pss->offset, pages);
}
}
}
diff --git a/qapi-schema.json b/qapi-schema.json
index 61151f34d0..93305412dd 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -865,11 +865,14 @@
# side, this process is called COarse-Grain LOck Stepping (COLO) for
# Non-stop Service. (since 2.8)
#
+# @release-ram: if enabled, qemu will free the migrated ram pages on the source
+# during postcopy-ram migration. (since 2.9)
+#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
- 'compress', 'events', 'postcopy-ram', 'x-colo'] }
+ 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram'] }
##
# @MigrationCapabilityStatus: