diff options
author | Philippe Mathieu-Daudé <philmd@linaro.org> | 2023-10-04 11:06:28 +0200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-10-08 21:08:08 +0200 |
commit | 8d7f2e767d8cd058c817dbe31430b89f2e11535d (patch) | |
tree | 5b7c25cddf7e6c1b9dfc93c5169541e8c14e2901 /system/dirtylimit.c | |
parent | 01c85e60a4d0675f0ad203fe1fe119381e7ad15b (diff) |
system: Rename softmmu/ directory as system/
The softmmu/ directory contains files specific to system
emulation. Rename it as system/. Update meson rules, the
MAINTAINERS file and all the documentation and comments.
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20231004090629.37473-14-philmd@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'system/dirtylimit.c')
-rw-r--r-- | system/dirtylimit.c | 678 |
1 files changed, 678 insertions, 0 deletions
diff --git a/system/dirtylimit.c b/system/dirtylimit.c new file mode 100644 index 0000000000..fa959d7743 --- /dev/null +++ b/system/dirtylimit.c @@ -0,0 +1,678 @@ +/* + * Dirty page rate limit implementation code + * + * Copyright (c) 2022 CHINA TELECOM CO.,LTD. + * + * Authors: + * Hyman Huang(黄勇) <huangy81@chinatelecom.cn> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qapi/qapi-commands-migration.h" +#include "qapi/qmp/qdict.h" +#include "qapi/error.h" +#include "sysemu/dirtyrate.h" +#include "sysemu/dirtylimit.h" +#include "monitor/hmp.h" +#include "monitor/monitor.h" +#include "exec/memory.h" +#include "exec/target_page.h" +#include "hw/boards.h" +#include "sysemu/kvm.h" +#include "trace.h" +#include "migration/misc.h" +#include "migration/migration.h" +#include "migration/options.h" + +/* + * Dirtylimit stop working if dirty page rate error + * value less than DIRTYLIMIT_TOLERANCE_RANGE + */ +#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ +/* + * Plus or minus vcpu sleep time linearly if dirty + * page rate error value percentage over + * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. + * Otherwise, plus or minus a fixed vcpu sleep time. + */ +#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 +/* + * Max vcpu sleep time percentage during a cycle + * composed of dirty ring full and sleep time. + */ +#define DIRTYLIMIT_THROTTLE_PCT_MAX 99 + +struct { + VcpuStat stat; + bool running; + QemuThread thread; +} *vcpu_dirty_rate_stat; + +typedef struct VcpuDirtyLimitState { + int cpu_index; + bool enabled; + /* + * Quota dirty page rate, unit is MB/s + * zero if not enabled. + */ + uint64_t quota; +} VcpuDirtyLimitState; + +struct { + VcpuDirtyLimitState *states; + /* Max cpus number configured by user */ + int max_cpus; + /* Number of vcpu under dirtylimit */ + int limited_nvcpu; +} *dirtylimit_state; + +/* protect dirtylimit_state */ +static QemuMutex dirtylimit_mutex; + +/* dirtylimit thread quit if dirtylimit_quit is true */ +static bool dirtylimit_quit; + +static void vcpu_dirty_rate_stat_collect(void) +{ + MigrationState *s = migrate_get_current(); + VcpuStat stat; + int i = 0; + int64_t period = DIRTYLIMIT_CALC_TIME_MS; + + if (migrate_dirty_limit() && + migration_is_active(s)) { + period = s->parameters.x_vcpu_dirty_limit_period; + } + + /* calculate vcpu dirtyrate */ + vcpu_calculate_dirtyrate(period, + &stat, + GLOBAL_DIRTY_LIMIT, + false); + + for (i = 0; i < stat.nvcpu; i++) { + vcpu_dirty_rate_stat->stat.rates[i].id = i; + vcpu_dirty_rate_stat->stat.rates[i].dirty_rate = + stat.rates[i].dirty_rate; + } + + g_free(stat.rates); +} + +static void *vcpu_dirty_rate_stat_thread(void *opaque) +{ + rcu_register_thread(); + + /* start log sync */ + global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); + + while (qatomic_read(&vcpu_dirty_rate_stat->running)) { + vcpu_dirty_rate_stat_collect(); + if (dirtylimit_in_service()) { + dirtylimit_process(); + } + } + + /* stop log sync */ + global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); + + rcu_unregister_thread(); + return NULL; +} + +int64_t vcpu_dirty_rate_get(int cpu_index) +{ + DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates; + return qatomic_read_i64(&rates[cpu_index].dirty_rate); +} + +void vcpu_dirty_rate_stat_start(void) +{ + if (qatomic_read(&vcpu_dirty_rate_stat->running)) { + return; + } + + qatomic_set(&vcpu_dirty_rate_stat->running, 1); + qemu_thread_create(&vcpu_dirty_rate_stat->thread, + "dirtyrate-stat", + vcpu_dirty_rate_stat_thread, + NULL, + QEMU_THREAD_JOINABLE); +} + +void vcpu_dirty_rate_stat_stop(void) +{ + qatomic_set(&vcpu_dirty_rate_stat->running, 0); + dirtylimit_state_unlock(); + qemu_mutex_unlock_iothread(); + qemu_thread_join(&vcpu_dirty_rate_stat->thread); + qemu_mutex_lock_iothread(); + dirtylimit_state_lock(); +} + +void vcpu_dirty_rate_stat_initialize(void) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + int max_cpus = ms->smp.max_cpus; + + vcpu_dirty_rate_stat = + g_malloc0(sizeof(*vcpu_dirty_rate_stat)); + + vcpu_dirty_rate_stat->stat.nvcpu = max_cpus; + vcpu_dirty_rate_stat->stat.rates = + g_new0(DirtyRateVcpu, max_cpus); + + vcpu_dirty_rate_stat->running = false; +} + +void vcpu_dirty_rate_stat_finalize(void) +{ + g_free(vcpu_dirty_rate_stat->stat.rates); + vcpu_dirty_rate_stat->stat.rates = NULL; + + g_free(vcpu_dirty_rate_stat); + vcpu_dirty_rate_stat = NULL; +} + +void dirtylimit_state_lock(void) +{ + qemu_mutex_lock(&dirtylimit_mutex); +} + +void dirtylimit_state_unlock(void) +{ + qemu_mutex_unlock(&dirtylimit_mutex); +} + +static void +__attribute__((__constructor__)) dirtylimit_mutex_init(void) +{ + qemu_mutex_init(&dirtylimit_mutex); +} + +static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) +{ + return &dirtylimit_state->states[cpu_index]; +} + +void dirtylimit_state_initialize(void) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + int max_cpus = ms->smp.max_cpus; + int i; + + dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state)); + + dirtylimit_state->states = + g_new0(VcpuDirtyLimitState, max_cpus); + + for (i = 0; i < max_cpus; i++) { + dirtylimit_state->states[i].cpu_index = i; + } + + dirtylimit_state->max_cpus = max_cpus; + trace_dirtylimit_state_initialize(max_cpus); +} + +void dirtylimit_state_finalize(void) +{ + g_free(dirtylimit_state->states); + dirtylimit_state->states = NULL; + + g_free(dirtylimit_state); + dirtylimit_state = NULL; + + trace_dirtylimit_state_finalize(); +} + +bool dirtylimit_in_service(void) +{ + return !!dirtylimit_state; +} + +bool dirtylimit_vcpu_index_valid(int cpu_index) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + + return !(cpu_index < 0 || + cpu_index >= ms->smp.max_cpus); +} + +static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) +{ + static uint64_t max_dirtyrate; + uint64_t dirty_ring_size_MiB; + + dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size()); + + if (max_dirtyrate < dirtyrate) { + max_dirtyrate = dirtyrate; + } + + return dirty_ring_size_MiB * 1000000 / max_dirtyrate; +} + +static inline bool dirtylimit_done(uint64_t quota, + uint64_t current) +{ + uint64_t min, max; + + min = MIN(quota, current); + max = MAX(quota, current); + + return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; +} + +static inline bool +dirtylimit_need_linear_adjustment(uint64_t quota, + uint64_t current) +{ + uint64_t min, max; + + min = MIN(quota, current); + max = MAX(quota, current); + + return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; +} + +static void dirtylimit_set_throttle(CPUState *cpu, + uint64_t quota, + uint64_t current) +{ + int64_t ring_full_time_us = 0; + uint64_t sleep_pct = 0; + uint64_t throttle_us = 0; + + if (current == 0) { + cpu->throttle_us_per_full = 0; + return; + } + + ring_full_time_us = dirtylimit_dirty_ring_full_time(current); + + if (dirtylimit_need_linear_adjustment(quota, current)) { + if (quota < current) { + sleep_pct = (current - quota) * 100 / current; + throttle_us = + ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); + cpu->throttle_us_per_full += throttle_us; + } else { + sleep_pct = (quota - current) * 100 / quota; + throttle_us = + ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); + cpu->throttle_us_per_full -= throttle_us; + } + + trace_dirtylimit_throttle_pct(cpu->cpu_index, + sleep_pct, + throttle_us); + } else { + if (quota < current) { + cpu->throttle_us_per_full += ring_full_time_us / 10; + } else { + cpu->throttle_us_per_full -= ring_full_time_us / 10; + } + } + + /* + * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario), + * current dirty page rate may never reach the quota, we should stop + * increasing sleep time? + */ + cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full, + ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); + + cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0); +} + +static void dirtylimit_adjust_throttle(CPUState *cpu) +{ + uint64_t quota = 0; + uint64_t current = 0; + int cpu_index = cpu->cpu_index; + + quota = dirtylimit_vcpu_get_state(cpu_index)->quota; + current = vcpu_dirty_rate_get(cpu_index); + + if (!dirtylimit_done(quota, current)) { + dirtylimit_set_throttle(cpu, quota, current); + } + + return; +} + +void dirtylimit_process(void) +{ + CPUState *cpu; + + if (!qatomic_read(&dirtylimit_quit)) { + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + dirtylimit_state_unlock(); + return; + } + + CPU_FOREACH(cpu) { + if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { + continue; + } + dirtylimit_adjust_throttle(cpu); + } + dirtylimit_state_unlock(); + } +} + +void dirtylimit_change(bool start) +{ + if (start) { + qatomic_set(&dirtylimit_quit, 0); + } else { + qatomic_set(&dirtylimit_quit, 1); + } +} + +void dirtylimit_set_vcpu(int cpu_index, + uint64_t quota, + bool enable) +{ + trace_dirtylimit_set_vcpu(cpu_index, quota); + + if (enable) { + dirtylimit_state->states[cpu_index].quota = quota; + if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { + dirtylimit_state->limited_nvcpu++; + } + } else { + dirtylimit_state->states[cpu_index].quota = 0; + if (dirtylimit_state->states[cpu_index].enabled) { + dirtylimit_state->limited_nvcpu--; + } + } + + dirtylimit_state->states[cpu_index].enabled = enable; +} + +void dirtylimit_set_all(uint64_t quota, + bool enable) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + int max_cpus = ms->smp.max_cpus; + int i; + + for (i = 0; i < max_cpus; i++) { + dirtylimit_set_vcpu(i, quota, enable); + } +} + +void dirtylimit_vcpu_execute(CPUState *cpu) +{ + if (dirtylimit_in_service() && + dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && + cpu->throttle_us_per_full) { + trace_dirtylimit_vcpu_execute(cpu->cpu_index, + cpu->throttle_us_per_full); + usleep(cpu->throttle_us_per_full); + } +} + +static void dirtylimit_init(void) +{ + dirtylimit_state_initialize(); + dirtylimit_change(true); + vcpu_dirty_rate_stat_initialize(); + vcpu_dirty_rate_stat_start(); +} + +static void dirtylimit_cleanup(void) +{ + vcpu_dirty_rate_stat_stop(); + vcpu_dirty_rate_stat_finalize(); + dirtylimit_change(false); + dirtylimit_state_finalize(); +} + +/* + * dirty page rate limit is not allowed to set if migration + * is running with dirty-limit capability enabled. + */ +static bool dirtylimit_is_allowed(void) +{ + MigrationState *ms = migrate_get_current(); + + if (migration_is_running(ms->state) && + (!qemu_thread_is_self(&ms->thread)) && + migrate_dirty_limit() && + dirtylimit_in_service()) { + return false; + } + return true; +} + +void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, + int64_t cpu_index, + Error **errp) +{ + if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { + return; + } + + if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { + error_setg(errp, "incorrect cpu index specified"); + return; + } + + if (!dirtylimit_is_allowed()) { + error_setg(errp, "can't cancel dirty page rate limit while" + " migration is running"); + return; + } + + if (!dirtylimit_in_service()) { + return; + } + + dirtylimit_state_lock(); + + if (has_cpu_index) { + dirtylimit_set_vcpu(cpu_index, 0, false); + } else { + dirtylimit_set_all(0, false); + } + + if (!dirtylimit_state->limited_nvcpu) { + dirtylimit_cleanup(); + } + + dirtylimit_state_unlock(); +} + +void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); + Error *err = NULL; + + qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err); + if (err) { + hmp_handle_error(mon, err); + return; + } + + monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " + "dirty limit for virtual CPU]\n"); +} + +void qmp_set_vcpu_dirty_limit(bool has_cpu_index, + int64_t cpu_index, + uint64_t dirty_rate, + Error **errp) +{ + if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { + error_setg(errp, "dirty page limit feature requires KVM with" + " accelerator property 'dirty-ring-size' set'"); + return; + } + + if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { + error_setg(errp, "incorrect cpu index specified"); + return; + } + + if (!dirtylimit_is_allowed()) { + error_setg(errp, "can't set dirty page rate limit while" + " migration is running"); + return; + } + + if (!dirty_rate) { + qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp); + return; + } + + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + dirtylimit_init(); + } + + if (has_cpu_index) { + dirtylimit_set_vcpu(cpu_index, dirty_rate, true); + } else { + dirtylimit_set_all(dirty_rate, true); + } + + dirtylimit_state_unlock(); +} + +void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate"); + int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); + Error *err = NULL; + + if (dirty_rate < 0) { + error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate); + goto out; + } + + qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err); + +out: + hmp_handle_error(mon, err); +} + +/* Return the max throttle time of each virtual CPU */ +uint64_t dirtylimit_throttle_time_per_round(void) +{ + CPUState *cpu; + int64_t max = 0; + + CPU_FOREACH(cpu) { + if (cpu->throttle_us_per_full > max) { + max = cpu->throttle_us_per_full; + } + } + + return max; +} + +/* + * Estimate average dirty ring full time of each virtaul CPU. + * Return 0 if guest doesn't dirty memory. + */ +uint64_t dirtylimit_ring_full_time(void) +{ + CPUState *cpu; + uint64_t curr_rate = 0; + int nvcpus = 0; + + CPU_FOREACH(cpu) { + if (cpu->running) { + nvcpus++; + curr_rate += vcpu_dirty_rate_get(cpu->cpu_index); + } + } + + if (!curr_rate || !nvcpus) { + return 0; + } + + return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus); +} + +static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) +{ + DirtyLimitInfo *info = NULL; + + info = g_malloc0(sizeof(*info)); + info->cpu_index = cpu_index; + info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota; + info->current_rate = vcpu_dirty_rate_get(cpu_index); + + return info; +} + +static struct DirtyLimitInfoList *dirtylimit_query_all(void) +{ + int i, index; + DirtyLimitInfo *info = NULL; + DirtyLimitInfoList *head = NULL, **tail = &head; + + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + dirtylimit_state_unlock(); + return NULL; + } + + for (i = 0; i < dirtylimit_state->max_cpus; i++) { + index = dirtylimit_state->states[i].cpu_index; + if (dirtylimit_vcpu_get_state(index)->enabled) { + info = dirtylimit_query_vcpu(index); + QAPI_LIST_APPEND(tail, info); + } + } + + dirtylimit_state_unlock(); + + return head; +} + +struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) +{ + if (!dirtylimit_in_service()) { + return NULL; + } + + return dirtylimit_query_all(); +} + +void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + DirtyLimitInfoList *info; + g_autoptr(DirtyLimitInfoList) head = NULL; + Error *err = NULL; + + if (!dirtylimit_in_service()) { + monitor_printf(mon, "Dirty page limit not enabled!\n"); + return; + } + + head = qmp_query_vcpu_dirty_limit(&err); + if (err) { + hmp_handle_error(mon, err); + return; + } + + for (info = head; info != NULL; info = info->next) { + monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)," + " current rate %"PRIi64 " (MB/s)\n", + info->value->cpu_index, + info->value->limit_rate, + info->value->current_rate); + } +} |