aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-03-13 10:33:04 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-03-13 10:33:04 +0000
commit61c265f0660ee476985808c8aa7915617c44fd53 (patch)
tree1a2f200018dc1a9cf070f27e5071252f386a375c
parentd4f7d56759f7c75270c13d5f3f5f736a9558929c (diff)
parent19dd408a479cae3027ae9ff9ef3f509ad3e681e5 (diff)
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20200313a' into staging
Migration pull 2020-03-13 zstd build fix A new auto-converge parameter Some COLO improvements # gpg: Signature made Fri 13 Mar 2020 10:29:34 GMT # gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7 # gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full] # Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7 * remotes/dgilbert/tags/pull-migration-20200313a: migration: recognize COLO as part of activating process ram/colo: only record bitmap of dirty pages in COLO stage COLO: Optimize memory back-up process migration/throttle: Add throttle-trig-thres migration parameter configure: Improve zstd test Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rwxr-xr-xconfigure3
-rw-r--r--migration/colo.c3
-rw-r--r--migration/migration.c25
-rw-r--r--migration/ram.c123
-rw-r--r--migration/ram.h1
-rw-r--r--monitor/hmp-cmds.c7
-rw-r--r--qapi/migration.json16
7 files changed, 135 insertions, 43 deletions
diff --git a/configure b/configure
index 3c7470096f..eb49bb6680 100755
--- a/configure
+++ b/configure
@@ -2475,7 +2475,8 @@ fi
# zstd check
if test "$zstd" != "no" ; then
- if $pkg_config --exist libzstd ; then
+ libzstd_minver="1.4.0"
+ if $pkg_config --atleast-version=$libzstd_minver libzstd ; then
zstd_cflags="$($pkg_config --cflags libzstd)"
zstd_libs="$($pkg_config --libs libzstd)"
LIBS="$zstd_libs $LIBS"
diff --git a/migration/colo.c b/migration/colo.c
index 93c5a452fb..44942c4e23 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -26,6 +26,7 @@
#include "qemu/main-loop.h"
#include "qemu/rcu.h"
#include "migration/failover.h"
+#include "migration/ram.h"
#ifdef CONFIG_REPLICATION
#include "replication.h"
#endif
@@ -845,6 +846,8 @@ void *colo_process_incoming_thread(void *opaque)
*/
qemu_file_set_blocking(mis->from_src_file, true);
+ colo_incoming_start_dirty_log();
+
bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
object_unref(OBJECT(bioc));
diff --git a/migration/migration.c b/migration/migration.c
index 0b2045ccbd..c1d88ace7f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -78,6 +78,7 @@
/*0: means nocompress, 1: best speed, ... 9: best compress ratio */
#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
/* Define default autoconverge cpu throttle migration parameters */
+#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
@@ -778,6 +779,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
params->compress_wait_thread = s->parameters.compress_wait_thread;
params->has_decompress_threads = true;
params->decompress_threads = s->parameters.decompress_threads;
+ params->has_throttle_trigger_threshold = true;
+ params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold;
params->has_cpu_throttle_initial = true;
params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
params->has_cpu_throttle_increment = true;
@@ -851,6 +854,7 @@ bool migration_is_setup_or_active(int state)
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
case MIGRATION_STATUS_WAIT_UNPLUG:
+ case MIGRATION_STATUS_COLO:
return true;
default:
@@ -1169,6 +1173,15 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
return false;
}
+ if (params->has_throttle_trigger_threshold &&
+ (params->throttle_trigger_threshold < 1 ||
+ params->throttle_trigger_threshold > 100)) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+ "throttle_trigger_threshold",
+ "an integer in the range of 1 to 100");
+ return false;
+ }
+
if (params->has_cpu_throttle_initial &&
(params->cpu_throttle_initial < 1 ||
params->cpu_throttle_initial > 99)) {
@@ -1298,6 +1311,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
dest->decompress_threads = params->decompress_threads;
}
+ if (params->has_throttle_trigger_threshold) {
+ dest->throttle_trigger_threshold = params->throttle_trigger_threshold;
+ }
+
if (params->has_cpu_throttle_initial) {
dest->cpu_throttle_initial = params->cpu_throttle_initial;
}
@@ -1382,6 +1399,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
s->parameters.decompress_threads = params->decompress_threads;
}
+ if (params->has_throttle_trigger_threshold) {
+ s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold;
+ }
+
if (params->has_cpu_throttle_initial) {
s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
}
@@ -3558,6 +3579,9 @@ static Property migration_properties[] = {
DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
parameters.decompress_threads,
DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
+ DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
+ parameters.throttle_trigger_threshold,
+ DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD),
DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
parameters.cpu_throttle_initial,
DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
@@ -3667,6 +3691,7 @@ static void migration_instance_init(Object *obj)
params->has_compress_level = true;
params->has_compress_threads = true;
params->has_decompress_threads = true;
+ params->has_throttle_trigger_threshold = true;
params->has_cpu_throttle_initial = true;
params->has_cpu_throttle_increment = true;
params->has_max_bandwidth = true;
diff --git a/migration/ram.c b/migration/ram.c
index 0ef68798d2..c12cfdbe26 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -896,11 +896,38 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
}
}
+static void migration_trigger_throttle(RAMState *rs)
+{
+ MigrationState *s = migrate_get_current();
+ uint64_t threshold = s->parameters.throttle_trigger_threshold;
+
+ uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev;
+ uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
+ uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
+
+ /* During block migration the auto-converge logic incorrectly detects
+ * that ram migration makes no progress. Avoid this by disabling the
+ * throttling logic during the bulk phase of block migration. */
+ if (migrate_auto_converge() && !blk_mig_bulk_active()) {
+ /* The following detection logic can be refined later. For now:
+ Check to see if the ratio between dirtied bytes and the approx.
+ amount of bytes that just got transferred since the last time
+ we were in this routine reaches the threshold. If that happens
+ twice, start or increase throttling. */
+
+ if ((bytes_dirty_period > bytes_dirty_threshold) &&
+ (++rs->dirty_rate_high_cnt >= 2)) {
+ trace_migration_throttle();
+ rs->dirty_rate_high_cnt = 0;
+ mig_throttle_guest_down();
+ }
+ }
+}
+
static void migration_bitmap_sync(RAMState *rs)
{
RAMBlock *block;
int64_t end_time;
- uint64_t bytes_xfer_now;
ram_counters.dirty_sync_count++;
@@ -927,26 +954,7 @@ static void migration_bitmap_sync(RAMState *rs)
/* more than 1 second = 1000 millisecons */
if (end_time > rs->time_last_bitmap_sync + 1000) {
- bytes_xfer_now = ram_counters.transferred;
-
- /* During block migration the auto-converge logic incorrectly detects
- * that ram migration makes no progress. Avoid this by disabling the
- * throttling logic during the bulk phase of block migration. */
- if (migrate_auto_converge() && !blk_mig_bulk_active()) {
- /* The following detection logic can be refined later. For now:
- Check to see if the dirtied bytes is 50% more than the approx.
- amount of bytes that just got transferred since the last time we
- were in this routine. If that happens twice, start or increase
- throttling */
-
- if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
- (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
- (++rs->dirty_rate_high_cnt >= 2)) {
- trace_migration_throttle();
- rs->dirty_rate_high_cnt = 0;
- mig_throttle_guest_down();
- }
- }
+ migration_trigger_throttle(rs);
migration_update_rates(rs, end_time);
@@ -955,7 +963,7 @@ static void migration_bitmap_sync(RAMState *rs)
/* reset period counters */
rs->time_last_bitmap_sync = end_time;
rs->num_dirty_pages_period = 0;
- rs->bytes_xfer_prev = bytes_xfer_now;
+ rs->bytes_xfer_prev = ram_counters.transferred;
}
if (migrate_use_events()) {
qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
@@ -2734,7 +2742,7 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
}
static inline void *colo_cache_from_block_offset(RAMBlock *block,
- ram_addr_t offset)
+ ram_addr_t offset, bool record_bitmap)
{
if (!offset_in_ramblock(block, offset)) {
return NULL;
@@ -2750,7 +2758,8 @@ static inline void *colo_cache_from_block_offset(RAMBlock *block,
* It help us to decide which pages in ram cache should be flushed
* into VM's RAM later.
*/
- if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
+ if (record_bitmap &&
+ !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
ram_state->migration_dirty_pages++;
}
return block->colo_cache + offset;
@@ -2986,7 +2995,6 @@ int colo_init_ram_cache(void)
}
return -errno;
}
- memcpy(block->colo_cache, block->host, block->used_length);
}
}
@@ -3000,19 +3008,36 @@ int colo_init_ram_cache(void)
RAMBLOCK_FOREACH_NOT_IGNORED(block) {
unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
-
block->bmap = bitmap_new(pages);
- bitmap_set(block->bmap, 0, pages);
}
}
- ram_state = g_new0(RAMState, 1);
- ram_state->migration_dirty_pages = 0;
- qemu_mutex_init(&ram_state->bitmap_mutex);
- memory_global_dirty_log_start();
+ ram_state_init(&ram_state);
return 0;
}
+/* TODO: duplicated with ram_init_bitmaps */
+void colo_incoming_start_dirty_log(void)
+{
+ RAMBlock *block = NULL;
+ /* For memory_global_dirty_log_start below. */
+ qemu_mutex_lock_iothread();
+ qemu_mutex_lock_ramlist();
+
+ memory_global_dirty_log_sync();
+ WITH_RCU_READ_LOCK_GUARD() {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+ ramblock_sync_dirty_bitmap(ram_state, block);
+ /* Discard this dirty bitmap record */
+ bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
+ }
+ memory_global_dirty_log_start();
+ }
+ ram_state->migration_dirty_pages = 0;
+ qemu_mutex_unlock_ramlist();
+ qemu_mutex_unlock_iothread();
+}
+
/* It is need to hold the global lock to call this helper */
void colo_release_ram_cache(void)
{
@@ -3032,9 +3057,7 @@ void colo_release_ram_cache(void)
}
}
}
- qemu_mutex_destroy(&ram_state->bitmap_mutex);
- g_free(ram_state);
- ram_state = NULL;
+ ram_state_cleanup(&ram_state);
}
/**
@@ -3348,7 +3371,7 @@ static int ram_load_precopy(QEMUFile *f)
while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
ram_addr_t addr, total_ram_bytes;
- void *host = NULL;
+ void *host = NULL, *host_bak = NULL;
uint8_t ch;
/*
@@ -3379,20 +3402,35 @@ static int ram_load_precopy(QEMUFile *f)
RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
RAMBlock *block = ram_block_from_stream(f, flags);
+ host = host_from_ram_block_offset(block, addr);
/*
- * After going into COLO, we should load the Page into colo_cache.
+ * After going into COLO stage, we should not load the page
+ * into SVM's memory directly, we put them into colo_cache firstly.
+ * NOTE: We need to keep a copy of SVM's ram in colo_cache.
+ * Previously, we copied all these memory in preparing stage of COLO
+ * while we need to stop VM, which is a time-consuming process.
+ * Here we optimize it by a trick, back-up every page while in
+ * migration process while COLO is enabled, though it affects the
+ * speed of the migration, but it obviously reduce the downtime of
+ * back-up all SVM'S memory in COLO preparing stage.
*/
- if (migration_incoming_in_colo_state()) {
- host = colo_cache_from_block_offset(block, addr);
- } else {
- host = host_from_ram_block_offset(block, addr);
+ if (migration_incoming_colo_enabled()) {
+ if (migration_incoming_in_colo_state()) {
+ /* In COLO stage, put all pages into cache temporarily */
+ host = colo_cache_from_block_offset(block, addr, true);
+ } else {
+ /*
+ * In migration stage but before COLO stage,
+ * Put all pages into both cache and SVM's memory.
+ */
+ host_bak = colo_cache_from_block_offset(block, addr, false);
+ }
}
if (!host) {
error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
ret = -EINVAL;
break;
}
-
if (!migration_incoming_in_colo_state()) {
ramblock_recv_bitmap_set(block, host);
}
@@ -3506,6 +3544,9 @@ static int ram_load_precopy(QEMUFile *f)
if (!ret) {
ret = qemu_file_get_error(f);
}
+ if (!ret && host_bak) {
+ memcpy(host_bak, host, TARGET_PAGE_SIZE);
+ }
}
ret |= wait_for_decompress_done();
diff --git a/migration/ram.h b/migration/ram.h
index a553d40751..5ceaff7cb4 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -66,5 +66,6 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
/* ram cache */
int colo_init_ram_cache(void);
void colo_release_ram_cache(void);
+void colo_incoming_start_dirty_log(void);
#endif
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 1c69d51b97..58724031ea 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -407,6 +407,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "%s: %u\n",
MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS),
params->decompress_threads);
+ assert(params->has_throttle_trigger_threshold);
+ monitor_printf(mon, "%s: %u\n",
+ MigrationParameter_str(MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD),
+ params->throttle_trigger_threshold);
assert(params->has_cpu_throttle_initial);
monitor_printf(mon, "%s: %u\n",
MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL),
@@ -1254,6 +1258,9 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p->has_decompress_threads = true;
visit_type_int(v, param, &p->decompress_threads, &err);
break;
+ case MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD:
+ p->has_throttle_trigger_threshold = true;
+ visit_type_int(v, param, &p->throttle_trigger_threshold, &err);
case MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL:
p->has_cpu_throttle_initial = true;
visit_type_int(v, param, &p->cpu_throttle_initial, &err);
diff --git a/qapi/migration.json b/qapi/migration.json
index d44d99cd78..0d1c0712ca 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -540,6 +540,10 @@
# compression, so set the decompress-threads to the number about 1/4
# of compress-threads is adequate.
#
+# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
+# to trigger throttling. It is expressed as percentage.
+# The default value is 50. (Since 5.0)
+#
# @cpu-throttle-initial: Initial percentage of time guest cpus are throttled
# when migration auto-converge is activated. The
# default value is 20. (Since 2.7)
@@ -625,7 +629,7 @@
'data': ['announce-initial', 'announce-max',
'announce-rounds', 'announce-step',
'compress-level', 'compress-threads', 'decompress-threads',
- 'compress-wait-thread',
+ 'compress-wait-thread', 'throttle-trigger-threshold',
'cpu-throttle-initial', 'cpu-throttle-increment',
'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth',
'downtime-limit', 'x-checkpoint-delay', 'block-incremental',
@@ -660,6 +664,10 @@
#
# @decompress-threads: decompression thread count
#
+# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
+# to trigger throttling. It is expressed as percentage.
+# The default value is 50. (Since 5.0)
+#
# @cpu-throttle-initial: Initial percentage of time guest cpus are
# throttled when migration auto-converge is activated.
# The default value is 20. (Since 2.7)
@@ -752,6 +760,7 @@
'*compress-threads': 'int',
'*compress-wait-thread': 'bool',
'*decompress-threads': 'int',
+ '*throttle-trigger-threshold': 'int',
'*cpu-throttle-initial': 'int',
'*cpu-throttle-increment': 'int',
'*tls-creds': 'StrOrNull',
@@ -813,6 +822,10 @@
#
# @decompress-threads: decompression thread count
#
+# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
+# to trigger throttling. It is expressed as percentage.
+# The default value is 50. (Since 5.0)
+#
# @cpu-throttle-initial: Initial percentage of time guest cpus are
# throttled when migration auto-converge is activated.
# (Since 2.7)
@@ -905,6 +918,7 @@
'*compress-threads': 'uint8',
'*compress-wait-thread': 'bool',
'*decompress-threads': 'uint8',
+ '*throttle-trigger-threshold': 'uint8',
'*cpu-throttle-initial': 'uint8',
'*cpu-throttle-increment': 'uint8',
'*tls-creds': 'str',