cpu-timers, icount: new modules

refactoring of cpus.c continues with cpu timer state extraction. cpu-timers: responsible for the softmmu cpu timers state, including cpu clocks and ticks. icount: counts the TCG instructions executed. As such it is specific to the TCG accelerator. Therefore, it is built only under CONFIG_TCG. One complication is due to qtest, which uses an icount field to warp time as part of qtest (qtest_clock_warp). In order to solve this problem, provide a separate counter for qtest. This requires fixing assumptions scattered in the code that qtest_enabled() implies icount_enabled(), checking each specific case. Signed-off-by: Claudio Fontana <cfontana@suse.de> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> [remove redundant initialization with qemu_spice_init] Reviewed-by: Alex Bennée <alex.bennee@linaro.org> [fix lingering calls to icount_get] Signed-off-by: Claudio Fontana <cfontana@suse.de> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Claudio Fontana <cfontana@suse.de> 2020-08-19 13:17:19 +0200
committer: Paolo Bonzini <pbonzini@redhat.com> 2020-10-05 16:41:22 +0200
commit: 740b175973427bcfa32ad894bb1f83b96d184c28 (patch)
tree: 3613e83be5fd8620b369479b98e6ede5d5746a9a /softmmu
parent: 0ac0b47c44b4be6cbce26777a1a5968cc8f025a5 (diff)
7 files changed, 897 insertions, 742 deletions
diff --git a/softmmu/cpu-timers.c b/softmmu/cpu-timers.c
new file mode 100644
index 0000000000..6c6c56090f
--- /dev/null
+++ b/softmmu/cpu-timers.c
@@ -0,0 +1,284 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "exec/exec-all.h"
+#include "sysemu/cpus.h"
+#include "sysemu/qtest.h"
+#include "qemu/main-loop.h"
+#include "qemu/option.h"
+#include "qemu/seqlock.h"
+#include "sysemu/replay.h"
+#include "sysemu/runstate.h"
+#include "hw/core/cpu.h"
+#include "sysemu/cpu-timers.h"
+#include "sysemu/cpu-throttle.h"
+#include "timers-state.h"
+
+/* clock and ticks */
+
+static int64_t cpu_get_ticks_locked(void)
+{
+    int64_t ticks = timers_state.cpu_ticks_offset;
+    if (timers_state.cpu_ticks_enabled) {
+        ticks += cpu_get_host_ticks();
+    }
+
+    if (timers_state.cpu_ticks_prev > ticks) {
+        /* Non increasing ticks may happen if the host uses software suspend. */
+        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
+        ticks = timers_state.cpu_ticks_prev;
+    }
+
+    timers_state.cpu_ticks_prev = ticks;
+    return ticks;
+}
+
+/*
+ * return the time elapsed in VM between vm_start and vm_stop.  Unless
+ * icount is active, cpu_get_ticks() uses units of the host CPU cycle
+ * counter.
+ */
+int64_t cpu_get_ticks(void)
+{
+    int64_t ticks;
+
+    if (icount_enabled()) {
+        return cpu_get_icount();
+    }
+
+    qemu_spin_lock(&timers_state.vm_clock_lock);
+    ticks = cpu_get_ticks_locked();
+    qemu_spin_unlock(&timers_state.vm_clock_lock);
+    return ticks;
+}
+
+int64_t cpu_get_clock_locked(void)
+{
+    int64_t time;
+
+    time = timers_state.cpu_clock_offset;
+    if (timers_state.cpu_ticks_enabled) {
+        time += get_clock();
+    }
+
+    return time;
+}
+
+/*
+ * Return the monotonic time elapsed in VM, i.e.,
+ * the time between vm_start and vm_stop
+ */
+int64_t cpu_get_clock(void)
+{
+    int64_t ti;
+    unsigned start;
+
+    do {
+        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+        ti = cpu_get_clock_locked();
+    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
+
+    return ti;
+}
+
+/*
+ * enable cpu_get_ticks()
+ * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
+ */
+void cpu_enable_ticks(void)
+{
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+    if (!timers_state.cpu_ticks_enabled) {
+        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
+        timers_state.cpu_clock_offset -= get_clock();
+        timers_state.cpu_ticks_enabled = 1;
+    }
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+}
+
+/*
+ * disable cpu_get_ticks() : the clock is stopped. You must not call
+ * cpu_get_ticks() after that.
+ * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
+ */
+void cpu_disable_ticks(void)
+{
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+    if (timers_state.cpu_ticks_enabled) {
+        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
+        timers_state.cpu_clock_offset = cpu_get_clock_locked();
+        timers_state.cpu_ticks_enabled = 0;
+    }
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
+}
+
+static bool icount_state_needed(void *opaque)
+{
+    return icount_enabled();
+}
+
+static bool warp_timer_state_needed(void *opaque)
+{
+    TimersState *s = opaque;
+    return s->icount_warp_timer != NULL;
+}
+
+static bool adjust_timers_state_needed(void *opaque)
+{
+    TimersState *s = opaque;
+    return s->icount_rt_timer != NULL;
+}
+
+static bool shift_state_needed(void *opaque)
+{
+    return icount_enabled() == 2;
+}
+
+/*
+ * Subsection for warp timer migration is optional, because may not be created
+ */
+static const VMStateDescription icount_vmstate_warp_timer = {
+    .name = "timer/icount/warp_timer",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = warp_timer_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(vm_clock_warp_start, TimersState),
+        VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription icount_vmstate_adjust_timers = {
+    .name = "timer/icount/timers",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = adjust_timers_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
+        VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription icount_vmstate_shift = {
+    .name = "timer/icount/shift",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = shift_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT16(icount_time_shift, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+/*
+ * This is a subsection for icount migration.
+ */
+static const VMStateDescription icount_vmstate_timers = {
+    .name = "timer/icount",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = icount_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(qemu_icount_bias, TimersState),
+        VMSTATE_INT64(qemu_icount, TimersState),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &icount_vmstate_warp_timer,
+        &icount_vmstate_adjust_timers,
+        &icount_vmstate_shift,
+        NULL
+    }
+};
+
+static const VMStateDescription vmstate_timers = {
+    .name = "timer",
+    .version_id = 2,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(cpu_ticks_offset, TimersState),
+        VMSTATE_UNUSED(8),
+        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &icount_vmstate_timers,
+        NULL
+    }
+};
+
+static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
+{
+}
+
+void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
+{
+    if (!icount_enabled() || type != QEMU_CLOCK_VIRTUAL) {
+        qemu_notify_event();
+        return;
+    }
+
+    if (qemu_in_vcpu_thread()) {
+        /*
+         * A CPU is currently running; kick it back out to the
+         * tcg_cpu_exec() loop so it will recalculate its
+         * icount deadline immediately.
+         */
+        qemu_cpu_kick(current_cpu);
+    } else if (first_cpu) {
+        /*
+         * qemu_cpu_kick is not enough to kick a halted CPU out of
+         * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
+         * causes cpu_thread_is_idle to return false.  This way,
+         * handle_icount_deadline can run.
+         * If we have no CPUs at all for some reason, we don't
+         * need to do anything.
+         */
+        async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
+    }
+}
+
+TimersState timers_state;
+
+/* initialize timers state and the cpu throttle for convenience */
+void cpu_timers_init(void)
+{
+    seqlock_init(&timers_state.vm_clock_seqlock);
+    qemu_spin_init(&timers_state.vm_clock_lock);
+    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
+
+    cpu_throttle_init();
+}
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index ac8940d52e..4f2777a738 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -58,11 +58,10 @@
 #include "hw/nmi.h"
 #include "sysemu/replay.h"
 #include "sysemu/runstate.h"
+#include "sysemu/cpu-timers.h"
 #include "hw/boards.h"
 #include "hw/hw.h"
 
-#include "sysemu/cpu-throttle.h"
-
 #ifdef CONFIG_LINUX
 
 #include <sys/prctl.h>
@@ -83,9 +82,6 @@
 
 static QemuMutex qemu_global_mutex;
 
-int64_t max_delay;
-int64_t max_advance;
-
 bool cpu_is_stopped(CPUState *cpu)
 {
     return cpu->stopped || !runstate_is_running();
@@ -116,7 +112,7 @@ static bool cpu_thread_is_idle(CPUState *cpu)
     return true;
 }
 
-static bool all_cpu_threads_idle(void)
+bool all_cpu_threads_idle(void)
 {
     CPUState *cpu;
 
@@ -128,688 +124,9 @@ static bool all_cpu_threads_idle(void)
     return true;
 }
 
-/***********************************************************/
-/* guest cycle counter */
-
-/* Protected by TimersState seqlock */
-
-static bool icount_sleep = true;
-/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
-#define MAX_ICOUNT_SHIFT 10
-
-typedef struct TimersState {
-    /* Protected by BQL.  */
-    int64_t cpu_ticks_prev;
-    int64_t cpu_ticks_offset;
-
-    /* Protect fields that can be respectively read outside the
-     * BQL, and written from multiple threads.
-     */
-    QemuSeqLock vm_clock_seqlock;
-    QemuSpin vm_clock_lock;
-
-    int16_t cpu_ticks_enabled;
-
-    /* Conversion factor from emulated instructions to virtual clock ticks.  */
-    int16_t icount_time_shift;
-
-    /* Compensate for varying guest execution speed.  */
-    int64_t qemu_icount_bias;
-
-    int64_t vm_clock_warp_start;
-    int64_t cpu_clock_offset;
-
-    /* Only written by TCG thread */
-    int64_t qemu_icount;
-
-    /* for adjusting icount */
-    QEMUTimer *icount_rt_timer;
-    QEMUTimer *icount_vm_timer;
-    QEMUTimer *icount_warp_timer;
-} TimersState;
-
-static TimersState timers_state;
 bool mttcg_enabled;
 
 
-/* The current number of executed instructions is based on what we
- * originally budgeted minus the current state of the decrementing
- * icount counters in extra/u16.low.
- */
-static int64_t cpu_get_icount_executed(CPUState *cpu)
-{
-    return (cpu->icount_budget -
-            (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
-}
-
-/*
- * Update the global shared timer_state.qemu_icount to take into
- * account executed instructions. This is done by the TCG vCPU
- * thread so the main-loop can see time has moved forward.
- */
-static void cpu_update_icount_locked(CPUState *cpu)
-{
-    int64_t executed = cpu_get_icount_executed(cpu);
-    cpu->icount_budget -= executed;
-
-    qatomic_set_i64(&timers_state.qemu_icount,
-                   timers_state.qemu_icount + executed);
-}
-
-/*
- * Update the global shared timer_state.qemu_icount to take into
- * account executed instructions. This is done by the TCG vCPU
- * thread so the main-loop can see time has moved forward.
- */
-void cpu_update_icount(CPUState *cpu)
-{
-    seqlock_write_lock(&timers_state.vm_clock_seqlock,
-                       &timers_state.vm_clock_lock);
-    cpu_update_icount_locked(cpu);
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
-                         &timers_state.vm_clock_lock);
-}
-
-static int64_t cpu_get_icount_raw_locked(void)
-{
-    CPUState *cpu = current_cpu;
-
-    if (cpu && cpu->running) {
-        if (!cpu->can_do_io) {
-            error_report("Bad icount read");
-            exit(1);
-        }
-        /* Take into account what has run */
-        cpu_update_icount_locked(cpu);
-    }
-    /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
-    return qatomic_read_i64(&timers_state.qemu_icount);
-}
-
-static int64_t cpu_get_icount_locked(void)
-{
-    int64_t icount = cpu_get_icount_raw_locked();
-    return qatomic_read_i64(&timers_state.qemu_icount_bias) +
-        cpu_icount_to_ns(icount);
-}
-
-int64_t cpu_get_icount_raw(void)
-{
-    int64_t icount;
-    unsigned start;
-
-    do {
-        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
-        icount = cpu_get_icount_raw_locked();
-    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
-
-    return icount;
-}
-
-/* Return the virtual CPU time, based on the instruction counter.  */
-int64_t cpu_get_icount(void)
-{
-    int64_t icount;
-    unsigned start;
-
-    do {
-        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
-        icount = cpu_get_icount_locked();
-    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
-
-    return icount;
-}
-
-int64_t cpu_icount_to_ns(int64_t icount)
-{
-    return icount << qatomic_read(&timers_state.icount_time_shift);
-}
-
-static int64_t cpu_get_ticks_locked(void)
-{
-    int64_t ticks = timers_state.cpu_ticks_offset;
-    if (timers_state.cpu_ticks_enabled) {
-        ticks += cpu_get_host_ticks();
-    }
-
-    if (timers_state.cpu_ticks_prev > ticks) {
-        /* Non increasing ticks may happen if the host uses software suspend.  */
-        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
-        ticks = timers_state.cpu_ticks_prev;
-    }
-
-    timers_state.cpu_ticks_prev = ticks;
-    return ticks;
-}
-
-/* return the time elapsed in VM between vm_start and vm_stop.  Unless
- * icount is active, cpu_get_ticks() uses units of the host CPU cycle
- * counter.
- */
-int64_t cpu_get_ticks(void)
-{
-    int64_t ticks;
-
-    if (use_icount) {
-        return cpu_get_icount();
-    }
-
-    qemu_spin_lock(&timers_state.vm_clock_lock);
-    ticks = cpu_get_ticks_locked();
-    qemu_spin_unlock(&timers_state.vm_clock_lock);
-    return ticks;
-}
-
-static int64_t cpu_get_clock_locked(void)
-{
-    int64_t time;
-
-    time = timers_state.cpu_clock_offset;
-    if (timers_state.cpu_ticks_enabled) {
-        time += get_clock();
-    }
-
-    return time;
-}
-
-/* Return the monotonic time elapsed in VM, i.e.,
- * the time between vm_start and vm_stop
- */
-int64_t cpu_get_clock(void)
-{
-    int64_t ti;
-    unsigned start;
-
-    do {
-        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
-        ti = cpu_get_clock_locked();
-    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
-
-    return ti;
-}
-
-/* enable cpu_get_ticks()
- * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
- */
-void cpu_enable_ticks(void)
-{
-    seqlock_write_lock(&timers_state.vm_clock_seqlock,
-                       &timers_state.vm_clock_lock);
-    if (!timers_state.cpu_ticks_enabled) {
-        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
-        timers_state.cpu_clock_offset -= get_clock();
-        timers_state.cpu_ticks_enabled = 1;
-    }
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
-                       &timers_state.vm_clock_lock);
-}
-
-/* disable cpu_get_ticks() : the clock is stopped. You must not call
- * cpu_get_ticks() after that.
- * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
- */
-void cpu_disable_ticks(void)
-{
-    seqlock_write_lock(&timers_state.vm_clock_seqlock,
-                       &timers_state.vm_clock_lock);
-    if (timers_state.cpu_ticks_enabled) {
-        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
-        timers_state.cpu_clock_offset = cpu_get_clock_locked();
-        timers_state.cpu_ticks_enabled = 0;
-    }
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
-                         &timers_state.vm_clock_lock);
-}
-
-/* Correlation between real and virtual time is always going to be
-   fairly approximate, so ignore small variation.
-   When the guest is idle real and virtual time will be aligned in
-   the IO wait loop.  */
-#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
-
-static void icount_adjust(void)
-{
-    int64_t cur_time;
-    int64_t cur_icount;
-    int64_t delta;
-
-    /* Protected by TimersState mutex.  */
-    static int64_t last_delta;
-
-    /* If the VM is not running, then do nothing.  */
-    if (!runstate_is_running()) {
-        return;
-    }
-
-    seqlock_write_lock(&timers_state.vm_clock_seqlock,
-                       &timers_state.vm_clock_lock);
-    cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
-                                   cpu_get_clock_locked());
-    cur_icount = cpu_get_icount_locked();
-
-    delta = cur_icount - cur_time;
-    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
-    if (delta > 0
-        && last_delta + ICOUNT_WOBBLE < delta * 2
-        && timers_state.icount_time_shift > 0) {
-        /* The guest is getting too far ahead.  Slow time down.  */
-        qatomic_set(&timers_state.icount_time_shift,
-                   timers_state.icount_time_shift - 1);
-    }
-    if (delta < 0
-        && last_delta - ICOUNT_WOBBLE > delta * 2
-        && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
-        /* The guest is getting too far behind.  Speed time up.  */
-        qatomic_set(&timers_state.icount_time_shift,
-                   timers_state.icount_time_shift + 1);
-    }
-    last_delta = delta;
-    qatomic_set_i64(&timers_state.qemu_icount_bias,
-                   cur_icount - (timers_state.qemu_icount
-                                 << timers_state.icount_time_shift));
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
-                         &timers_state.vm_clock_lock);
-}
-
-static void icount_adjust_rt(void *opaque)
-{
-    timer_mod(timers_state.icount_rt_timer,
-              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
-    icount_adjust();
-}
-
-static void icount_adjust_vm(void *opaque)
-{
-    timer_mod(timers_state.icount_vm_timer,
-                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                   NANOSECONDS_PER_SECOND / 10);
-    icount_adjust();
-}
-
-static int64_t qemu_icount_round(int64_t count)
-{
-    int shift = qatomic_read(&timers_state.icount_time_shift);
-    return (count + (1 << shift) - 1) >> shift;
-}
-
-static void icount_warp_rt(void)
-{
-    unsigned seq;
-    int64_t warp_start;
-
-    /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
-     * changes from -1 to another value, so the race here is okay.
-     */
-    do {
-        seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
-        warp_start = timers_state.vm_clock_warp_start;
-    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
-
-    if (warp_start == -1) {
-        return;
-    }
-
-    seqlock_write_lock(&timers_state.vm_clock_seqlock,
-                       &timers_state.vm_clock_lock);
-    if (runstate_is_running()) {
-        int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
-                                            cpu_get_clock_locked());
-        int64_t warp_delta;
-
-        warp_delta = clock - timers_state.vm_clock_warp_start;
-        if (use_icount == 2) {
-            /*
-             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
-             * far ahead of real time.
-             */
-            int64_t cur_icount = cpu_get_icount_locked();
-            int64_t delta = clock - cur_icount;
-            warp_delta = MIN(warp_delta, delta);
-        }
-        qatomic_set_i64(&timers_state.qemu_icount_bias,
-                       timers_state.qemu_icount_bias + warp_delta);
-    }
-    timers_state.vm_clock_warp_start = -1;
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
-                       &timers_state.vm_clock_lock);
-
-    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
-        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-    }
-}
-
-static void icount_timer_cb(void *opaque)
-{
-    /* No need for a checkpoint because the timer already synchronizes
-     * with CHECKPOINT_CLOCK_VIRTUAL_RT.
-     */
-    icount_warp_rt();
-}
-
-void qtest_clock_warp(int64_t dest)
-{
-    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    AioContext *aio_context;
-    assert(qtest_enabled());
-    aio_context = qemu_get_aio_context();
-    while (clock < dest) {
-        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
-                                                      QEMU_TIMER_ATTR_ALL);
-        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
-
-        seqlock_write_lock(&timers_state.vm_clock_seqlock,
-                           &timers_state.vm_clock_lock);
-        qatomic_set_i64(&timers_state.qemu_icount_bias,
-                       timers_state.qemu_icount_bias + warp);
-        seqlock_write_unlock(&timers_state.vm_clock_seqlock,
-                             &timers_state.vm_clock_lock);
-
-        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
-        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
-        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    }
-    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-}
-
-void qemu_start_warp_timer(void)
-{
-    int64_t clock;
-    int64_t deadline;
-
-    if (!use_icount) {
-        return;
-    }
-
-    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
-     * do not fire, so computing the deadline does not make sense.
-     */
-    if (!runstate_is_running()) {
-        return;
-    }
-
-    if (replay_mode != REPLAY_MODE_PLAY) {
-        if (!all_cpu_threads_idle()) {
-            return;
-        }
-
-        if (qtest_enabled()) {
-            /* When testing, qtest commands advance icount.  */
-            return;
-        }
-
-        replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
-    } else {
-        /* warp clock deterministically in record/replay mode */
-        if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
-            /* vCPU is sleeping and warp can't be started.
-               It is probably a race condition: notification sent
-               to vCPU was processed in advance and vCPU went to sleep.
-               Therefore we have to wake it up for doing someting. */
-            if (replay_has_checkpoint()) {
-                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-            }
-            return;
-        }
-    }
-
-    /* We want to use the earliest deadline from ALL vm_clocks */
-    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
-    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
-                                          ~QEMU_TIMER_ATTR_EXTERNAL);
-    if (deadline < 0) {
-        static bool notified;
-        if (!icount_sleep && !notified) {
-            warn_report("icount sleep disabled and no active timers");
-            notified = true;
-        }
-        return;
-    }
-
-    if (deadline > 0) {
-        /*
-         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
-         * sleep.  Otherwise, the CPU might be waiting for a future timer
-         * interrupt to wake it up, but the interrupt never comes because
-         * the vCPU isn't running any insns and thus doesn't advance the
-         * QEMU_CLOCK_VIRTUAL.
-         */
-        if (!icount_sleep) {
-            /*
-             * We never let VCPUs sleep in no sleep icount mode.
-             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
-             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
-             * It is useful when we want a deterministic execution time,
-             * isolated from host latencies.
-             */
-            seqlock_write_lock(&timers_state.vm_clock_seqlock,
-                               &timers_state.vm_clock_lock);
-            qatomic_set_i64(&timers_state.qemu_icount_bias,
-                           timers_state.qemu_icount_bias + deadline);
-            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
-                                 &timers_state.vm_clock_lock);
-            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-        } else {
-            /*
-             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
-             * "real" time, (related to the time left until the next event) has
-             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
-             * This avoids that the warps are visible externally; for example,
-             * you will not be sending network packets continuously instead of
-             * every 100ms.
-             */
-            seqlock_write_lock(&timers_state.vm_clock_seqlock,
-                               &timers_state.vm_clock_lock);
-            if (timers_state.vm_clock_warp_start == -1
-                || timers_state.vm_clock_warp_start > clock) {
-                timers_state.vm_clock_warp_start = clock;
-            }
-            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
-                                 &timers_state.vm_clock_lock);
-            timer_mod_anticipate(timers_state.icount_warp_timer,
-                                 clock + deadline);
-        }
-    } else if (deadline == 0) {
-        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-    }
-}
-
-static void qemu_account_warp_timer(void)
-{
-    if (!use_icount || !icount_sleep) {
-        return;
-    }
-
-    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
-     * do not fire, so computing the deadline does not make sense.
-     */
-    if (!runstate_is_running()) {
-        return;
-    }
-
-    /* warp clock deterministically in record/replay mode */
-    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
-        return;
-    }
-
-    timer_del(timers_state.icount_warp_timer);
-    icount_warp_rt();
-}
-
-static bool icount_state_needed(void *opaque)
-{
-    return use_icount;
-}
-
-static bool warp_timer_state_needed(void *opaque)
-{
-    TimersState *s = opaque;
-    return s->icount_warp_timer != NULL;
-}
-
-static bool adjust_timers_state_needed(void *opaque)
-{
-    TimersState *s = opaque;
-    return s->icount_rt_timer != NULL;
-}
-
-static bool shift_state_needed(void *opaque)
-{
-    return use_icount == 2;
-}
-
-/*
- * Subsection for warp timer migration is optional, because may not be created
- */
-static const VMStateDescription icount_vmstate_warp_timer = {
-    .name = "timer/icount/warp_timer",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = warp_timer_state_needed,
-    .fields = (VMStateField[]) {
-        VMSTATE_INT64(vm_clock_warp_start, TimersState),
-        VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static const VMStateDescription icount_vmstate_adjust_timers = {
-    .name = "timer/icount/timers",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = adjust_timers_state_needed,
-    .fields = (VMStateField[]) {
-        VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
-        VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static const VMStateDescription icount_vmstate_shift = {
-    .name = "timer/icount/shift",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = shift_state_needed,
-    .fields = (VMStateField[]) {
-        VMSTATE_INT16(icount_time_shift, TimersState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-/*
- * This is a subsection for icount migration.
- */
-static const VMStateDescription icount_vmstate_timers = {
-    .name = "timer/icount",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = icount_state_needed,
-    .fields = (VMStateField[]) {
-        VMSTATE_INT64(qemu_icount_bias, TimersState),
-        VMSTATE_INT64(qemu_icount, TimersState),
-        VMSTATE_END_OF_LIST()
-    },
-    .subsections = (const VMStateDescription*[]) {
-        &icount_vmstate_warp_timer,
-        &icount_vmstate_adjust_timers,
-        &icount_vmstate_shift,
-        NULL
-    }
-};
-
-static const VMStateDescription vmstate_timers = {
-    .name = "timer",
-    .version_id = 2,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_INT64(cpu_ticks_offset, TimersState),
-        VMSTATE_UNUSED(8),
-        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
-        VMSTATE_END_OF_LIST()
-    },
-    .subsections = (const VMStateDescription*[]) {
-        &icount_vmstate_timers,
-        NULL
-    }
-};
-
-void cpu_ticks_init(void)
-{
-    seqlock_init(&timers_state.vm_clock_seqlock);
-    qemu_spin_init(&timers_state.vm_clock_lock);
-    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
-    cpu_throttle_init();
-}
-
-void configure_icount(QemuOpts *opts, Error **errp)
-{
-    const char *option = qemu_opt_get(opts, "shift");
-    bool sleep = qemu_opt_get_bool(opts, "sleep", true);
-    bool align = qemu_opt_get_bool(opts, "align", false);
-    long time_shift = -1;
-
-    if (!option) {
-        if (qemu_opt_get(opts, "align") != NULL) {
-            error_setg(errp, "Please specify shift option when using align");
-        }
-        return;
-    }
-
-    if (align && !sleep) {
-        error_setg(errp, "align=on and sleep=off are incompatible");
-        return;
-    }
-
-    if (strcmp(option, "auto") != 0) {
-        if (qemu_strtol(option, NULL, 0, &time_shift) < 0
-            || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
-            error_setg(errp, "icount: Invalid shift value");
-            return;
-        }
-    } else if (icount_align_option) {
-        error_setg(errp, "shift=auto and align=on are incompatible");
-        return;
-    } else if (!icount_sleep) {
-        error_setg(errp, "shift=auto and sleep=off are incompatible");
-        return;
-    }
-
-    icount_sleep = sleep;
-    if (icount_sleep) {
-        timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
-                                         icount_timer_cb, NULL);
-    }
-
-    icount_align_option = align;
-
-    if (time_shift >= 0) {
-        timers_state.icount_time_shift = time_shift;
-        use_icount = 1;
-        return;
-    }
-
-    use_icount = 2;
-
-    /* 125MIPS seems a reasonable initial guess at the guest speed.
-       It will be corrected fairly quickly anyway.  */
-    timers_state.icount_time_shift = 3;
-
-    /* Have both realtime and virtual time triggers for speed adjustment.
-       The realtime trigger catches emulated time passing too slowly,
-       the virtual time trigger catches emulated time passing too fast.
-       Realtime triggers occur even when idle, so use them less frequently
-       than VM triggers.  */
-    timers_state.vm_clock_warp_start = -1;
-    timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
-                                   icount_adjust_rt, NULL);
-    timer_mod(timers_state.icount_rt_timer,
-                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
-    timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                        icount_adjust_vm, NULL);
-    timer_mod(timers_state.icount_vm_timer,
-                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                   NANOSECONDS_PER_SECOND / 10);
-}
-
 /***********************************************************/
 /* TCG vCPU kick timer
  *
@@ -854,35 +171,6 @@ static void qemu_cpu_kick_rr_cpus(void)
     };
 }
 
-static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
-{
-}
-
-void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
-{
-    if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
-        qemu_notify_event();
-        return;
-    }
-
-    if (qemu_in_vcpu_thread()) {
-        /* A CPU is currently running; kick it back out to the
-         * tcg_cpu_exec() loop so it will recalculate its
-         * icount deadline immediately.
-         */
-        qemu_cpu_kick(current_cpu);
-    } else if (first_cpu) {
-        /* qemu_cpu_kick is not enough to kick a halted CPU out of
-         * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
-         * causes cpu_thread_is_idle to return false.  This way,
-         * handle_icount_deadline can run.
-         * If we have no CPUs at all for some reason, we don't
-         * need to do anything.
-         */
-        async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
-    }
-}
-
 static void kick_tcg_thread(void *opaque)
 {
     timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
@@ -1288,7 +576,7 @@ static void notify_aio_contexts(void)
 static void handle_icount_deadline(void)
 {
     assert(qemu_in_vcpu_thread());
-    if (use_icount) {
+    if (icount_enabled()) {
         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
                                                       QEMU_TIMER_ATTR_ALL);
 
@@ -1300,7 +588,7 @@ static void handle_icount_deadline(void)
 
 static void prepare_icount_for_run(CPUState *cpu)
 {
-    if (use_icount) {
+    if (icount_enabled()) {
         int insns_left;
 
         /* These should always be cleared by process_icount_data after
@@ -1325,7 +613,7 @@ static void prepare_icount_for_run(CPUState *cpu)
 
 static void process_icount_data(CPUState *cpu)
 {
-    if (use_icount) {
+    if (icount_enabled()) {
         /* Account for executed instructions */
         cpu_update_icount(cpu);
 
@@ -1486,7 +774,7 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
             qatomic_mb_set(&cpu->exit_request, 0);
         }
 
-        if (use_icount && all_cpu_threads_idle()) {
+        if (icount_enabled() && all_cpu_threads_idle()) {
             /*
              * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
              * in the main_loop, wake it up in order to start the warp timer.
@@ -1639,7 +927,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
     CPUState *cpu = arg;
 
     assert(tcg_enabled());
-    g_assert(!use_icount);
+    g_assert(!icount_enabled());
 
     rcu_register_thread();
     tcg_register_thread();
@@ -2227,21 +1515,3 @@ void qmp_inject_nmi(Error **errp)
     nmi_monitor_handle(monitor_get_cpu_index(), errp);
 }
 
-void dump_drift_info(void)
-{
-    if (!use_icount) {
-        return;
-    }
-
-    qemu_printf("Host - Guest clock  %"PRIi64" ms\n",
-                (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
-    if (icount_align_option) {
-        qemu_printf("Max guest delay     %"PRIi64" ms\n",
-                    -max_delay / SCALE_MS);
-        qemu_printf("Max guest advance   %"PRIi64" ms\n",
-                    max_advance / SCALE_MS);
-    } else {
-        qemu_printf("Max guest delay     NA\n");
-        qemu_printf("Max guest advance   NA\n");
-    }
-}
diff --git a/softmmu/icount.c b/softmmu/icount.c
new file mode 100644
index 0000000000..0b815fe88f
--- /dev/null
+++ b/softmmu/icount.c
@@ -0,0 +1,492 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "exec/exec-all.h"
+#include "sysemu/cpus.h"
+#include "sysemu/qtest.h"
+#include "qemu/main-loop.h"
+#include "qemu/option.h"
+#include "qemu/seqlock.h"
+#include "sysemu/replay.h"
+#include "sysemu/runstate.h"
+#include "hw/core/cpu.h"
+#include "sysemu/cpu-timers.h"
+#include "sysemu/cpu-throttle.h"
+#include "timers-state.h"
+
+/*
+ * ICOUNT: Instruction Counter
+ *
+ * this module is split off from cpu-timers because the icount part
+ * is TCG-specific, and does not need to be built for other accels.
+ */
+static bool icount_sleep = true;
+/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
+#define MAX_ICOUNT_SHIFT 10
+
+/*
+ * 0 = Do not count executed instructions.
+ * 1 = Fixed conversion of insn to ns via "shift" option
+ * 2 = Runtime adaptive algorithm to compute shift
+ */
+int use_icount;
+
+static void icount_enable_precise(void)
+{
+    use_icount = 1;
+}
+
+static void icount_enable_adaptive(void)
+{
+    use_icount = 2;
+}
+
+/*
+ * The current number of executed instructions is based on what we
+ * originally budgeted minus the current state of the decrementing
+ * icount counters in extra/u16.low.
+ */
+static int64_t cpu_get_icount_executed(CPUState *cpu)
+{
+    return (cpu->icount_budget -
+            (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
+}
+
+/*
+ * Update the global shared timer_state.qemu_icount to take into
+ * account executed instructions. This is done by the TCG vCPU
+ * thread so the main-loop can see time has moved forward.
+ */
+static void cpu_update_icount_locked(CPUState *cpu)
+{
+    int64_t executed = cpu_get_icount_executed(cpu);
+    cpu->icount_budget -= executed;
+
+    qatomic_set_i64(&timers_state.qemu_icount,
+                    timers_state.qemu_icount + executed);
+}
+
+/*
+ * Update the global shared timer_state.qemu_icount to take into
+ * account executed instructions. This is done by the TCG vCPU
+ * thread so the main-loop can see time has moved forward.
+ */
+void cpu_update_icount(CPUState *cpu)
+{
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+    cpu_update_icount_locked(cpu);
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
+}
+
+static int64_t cpu_get_icount_raw_locked(void)
+{
+    CPUState *cpu = current_cpu;
+
+    if (cpu && cpu->running) {
+        if (!cpu->can_do_io) {
+            error_report("Bad icount read");
+            exit(1);
+        }
+        /* Take into account what has run */
+        cpu_update_icount_locked(cpu);
+    }
+    /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
+    return qatomic_read_i64(&timers_state.qemu_icount);
+}
+
+static int64_t cpu_get_icount_locked(void)
+{
+    int64_t icount = cpu_get_icount_raw_locked();
+    return qatomic_read_i64(&timers_state.qemu_icount_bias) +
+        cpu_icount_to_ns(icount);
+}
+
+int64_t cpu_get_icount_raw(void)
+{
+    int64_t icount;
+    unsigned start;
+
+    do {
+        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+        icount = cpu_get_icount_raw_locked();
+    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
+
+    return icount;
+}
+
+/* Return the virtual CPU time, based on the instruction counter.  */
+int64_t cpu_get_icount(void)
+{
+    int64_t icount;
+    unsigned start;
+
+    do {
+        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+        icount = cpu_get_icount_locked();
+    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
+
+    return icount;
+}
+
+int64_t cpu_icount_to_ns(int64_t icount)
+{
+    return icount << qatomic_read(&timers_state.icount_time_shift);
+}
+
+/*
+ * Correlation between real and virtual time is always going to be
+ * fairly approximate, so ignore small variation.
+ * When the guest is idle real and virtual time will be aligned in
+ * the IO wait loop.
+ */
+#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
+
+static void icount_adjust(void)
+{
+    int64_t cur_time;
+    int64_t cur_icount;
+    int64_t delta;
+
+    /* Protected by TimersState mutex.  */
+    static int64_t last_delta;
+
+    /* If the VM is not running, then do nothing.  */
+    if (!runstate_is_running()) {
+        return;
+    }
+
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+    cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
+                                   cpu_get_clock_locked());
+    cur_icount = cpu_get_icount_locked();
+
+    delta = cur_icount - cur_time;
+    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
+    if (delta > 0
+        && last_delta + ICOUNT_WOBBLE < delta * 2
+        && timers_state.icount_time_shift > 0) {
+        /* The guest is getting too far ahead.  Slow time down.  */
+        qatomic_set(&timers_state.icount_time_shift,
+                    timers_state.icount_time_shift - 1);
+    }
+    if (delta < 0
+        && last_delta - ICOUNT_WOBBLE > delta * 2
+        && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
+        /* The guest is getting too far behind.  Speed time up.  */
+        qatomic_set(&timers_state.icount_time_shift,
+                    timers_state.icount_time_shift + 1);
+    }
+    last_delta = delta;
+    qatomic_set_i64(&timers_state.qemu_icount_bias,
+                    cur_icount - (timers_state.qemu_icount
+                                  << timers_state.icount_time_shift));
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
+}
+
+static void icount_adjust_rt(void *opaque)
+{
+    timer_mod(timers_state.icount_rt_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
+    icount_adjust();
+}
+
+static void icount_adjust_vm(void *opaque)
+{
+    timer_mod(timers_state.icount_vm_timer,
+                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+                   NANOSECONDS_PER_SECOND / 10);
+    icount_adjust();
+}
+
+int64_t qemu_icount_round(int64_t count)
+{
+    int shift = qatomic_read(&timers_state.icount_time_shift);
+    return (count + (1 << shift) - 1) >> shift;
+}
+
+static void icount_warp_rt(void)
+{
+    unsigned seq;
+    int64_t warp_start;
+
+    /*
+     * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
+     * changes from -1 to another value, so the race here is okay.
+     */
+    do {
+        seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+        warp_start = timers_state.vm_clock_warp_start;
+    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
+
+    if (warp_start == -1) {
+        return;
+    }
+
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+    if (runstate_is_running()) {
+        int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
+                                            cpu_get_clock_locked());
+        int64_t warp_delta;
+
+        warp_delta = clock - timers_state.vm_clock_warp_start;
+        if (icount_enabled() == 2) {
+            /*
+             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
+             * far ahead of real time.
+             */
+            int64_t cur_icount = cpu_get_icount_locked();
+            int64_t delta = clock - cur_icount;
+            warp_delta = MIN(warp_delta, delta);
+        }
+        qatomic_set_i64(&timers_state.qemu_icount_bias,
+                        timers_state.qemu_icount_bias + warp_delta);
+    }
+    timers_state.vm_clock_warp_start = -1;
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+
+    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
+        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+    }
+}
+
+static void icount_timer_cb(void *opaque)
+{
+    /*
+     * No need for a checkpoint because the timer already synchronizes
+     * with CHECKPOINT_CLOCK_VIRTUAL_RT.
+     */
+    icount_warp_rt();
+}
+
+void qemu_start_warp_timer(void)
+{
+    int64_t clock;
+    int64_t deadline;
+
+    assert(icount_enabled());
+
+    /*
+     * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
+     * do not fire, so computing the deadline does not make sense.
+     */
+    if (!runstate_is_running()) {
+        return;
+    }
+
+    if (replay_mode != REPLAY_MODE_PLAY) {
+        if (!all_cpu_threads_idle()) {
+            return;
+        }
+
+        if (qtest_enabled()) {
+            /* When testing, qtest commands advance icount.  */
+            return;
+        }
+
+        replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
+    } else {
+        /* warp clock deterministically in record/replay mode */
+        if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
+            /*
+             * vCPU is sleeping and warp can't be started.
+             * It is probably a race condition: notification sent
+             * to vCPU was processed in advance and vCPU went to sleep.
+             * Therefore we have to wake it up for doing someting.
+             */
+            if (replay_has_checkpoint()) {
+                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+            }
+            return;
+        }
+    }
+
+    /* We want to use the earliest deadline from ALL vm_clocks */
+    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
+    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                          ~QEMU_TIMER_ATTR_EXTERNAL);
+    if (deadline < 0) {
+        static bool notified;
+        if (!icount_sleep && !notified) {
+            warn_report("icount sleep disabled and no active timers");
+            notified = true;
+        }
+        return;
+    }
+
+    if (deadline > 0) {
+        /*
+         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
+         * sleep.  Otherwise, the CPU might be waiting for a future timer
+         * interrupt to wake it up, but the interrupt never comes because
+         * the vCPU isn't running any insns and thus doesn't advance the
+         * QEMU_CLOCK_VIRTUAL.
+         */
+        if (!icount_sleep) {
+            /*
+             * We never let VCPUs sleep in no sleep icount mode.
+             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
+             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
+             * It is useful when we want a deterministic execution time,
+             * isolated from host latencies.
+             */
+            seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                               &timers_state.vm_clock_lock);
+            qatomic_set_i64(&timers_state.qemu_icount_bias,
+                            timers_state.qemu_icount_bias + deadline);
+            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                                 &timers_state.vm_clock_lock);
+            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+        } else {
+            /*
+             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
+             * "real" time, (related to the time left until the next event) has
+             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
+             * This avoids that the warps are visible externally; for example,
+             * you will not be sending network packets continuously instead of
+             * every 100ms.
+             */
+            seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                               &timers_state.vm_clock_lock);
+            if (timers_state.vm_clock_warp_start == -1
+                || timers_state.vm_clock_warp_start > clock) {
+                timers_state.vm_clock_warp_start = clock;
+            }
+            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                                 &timers_state.vm_clock_lock);
+            timer_mod_anticipate(timers_state.icount_warp_timer,
+                                 clock + deadline);
+        }
+    } else if (deadline == 0) {
+        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+    }
+}
+
+void qemu_account_warp_timer(void)
+{
+    if (!icount_enabled() || !icount_sleep) {
+        return;
+    }
+
+    /*
+     * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
+     * do not fire, so computing the deadline does not make sense.
+     */
+    if (!runstate_is_running()) {
+        return;
+    }
+
+    /* warp clock deterministically in record/replay mode */
+    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
+        return;
+    }
+
+    timer_del(timers_state.icount_warp_timer);
+    icount_warp_rt();
+}
+
+void configure_icount(QemuOpts *opts, Error **errp)
+{
+    const char *option = qemu_opt_get(opts, "shift");
+    bool sleep = qemu_opt_get_bool(opts, "sleep", true);
+    bool align = qemu_opt_get_bool(opts, "align", false);
+    long time_shift = -1;
+
+    if (!option) {
+        if (qemu_opt_get(opts, "align") != NULL) {
+            error_setg(errp, "Please specify shift option when using align");
+        }
+        return;
+    }
+
+    if (align && !sleep) {
+        error_setg(errp, "align=on and sleep=off are incompatible");
+        return;
+    }
+
+    if (strcmp(option, "auto") != 0) {
+        if (qemu_strtol(option, NULL, 0, &time_shift) < 0
+            || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
+            error_setg(errp, "icount: Invalid shift value");
+            return;
+        }
+    } else if (icount_align_option) {
+        error_setg(errp, "shift=auto and align=on are incompatible");
+        return;
+    } else if (!icount_sleep) {
+        error_setg(errp, "shift=auto and sleep=off are incompatible");
+        return;
+    }
+
+    icount_sleep = sleep;
+    if (icount_sleep) {
+        timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
+                                         icount_timer_cb, NULL);
+    }
+
+    icount_align_option = align;
+
+    if (time_shift >= 0) {
+        timers_state.icount_time_shift = time_shift;
+        icount_enable_precise();
+        return;
+    }
+
+    icount_enable_adaptive();
+
+    /*
+     * 125MIPS seems a reasonable initial guess at the guest speed.
+     * It will be corrected fairly quickly anyway.
+     */
+    timers_state.icount_time_shift = 3;
+
+    /*
+     * Have both realtime and virtual time triggers for speed adjustment.
+     * The realtime trigger catches emulated time passing too slowly,
+     * the virtual time trigger catches emulated time passing too fast.
+     * Realtime triggers occur even when idle, so use them less frequently
+     * than VM triggers.
+     */
+    timers_state.vm_clock_warp_start = -1;
+    timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
+                                   icount_adjust_rt, NULL);
+    timer_mod(timers_state.icount_rt_timer,
+                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
+    timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                        icount_adjust_vm, NULL);
+    timer_mod(timers_state.icount_vm_timer,
+                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+                   NANOSECONDS_PER_SECOND / 10);
+}
diff --git a/softmmu/meson.build b/softmmu/meson.build
index 95d38df259..36c96e7b15 100644
--- a/softmmu/meson.build
+++ b/softmmu/meson.build
@@ -1,4 +1,4 @@
-specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: files(
+specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files(
   'arch_init.c',
   'balloon.c',
   'cpus.c',
@@ -7,4 +7,10 @@ specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: files(
   'memory.c',
   'memory_mapping.c',
   'qtest.c',
-  'vl.c'))
+  'vl.c',
+  'cpu-timers.c',
+)])
+
+specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: [files(
+  'icount.c'
+)])
diff --git a/softmmu/qtest.c b/softmmu/qtest.c
index 4e439caec7..0d43cf8883 100644
--- a/softmmu/qtest.c
+++ b/softmmu/qtest.c
@@ -21,7 +21,7 @@
 #include "exec/memory.h"
 #include "hw/irq.h"
 #include "sysemu/accel.h"
-#include "sysemu/cpus.h"
+#include "sysemu/cpu-timers.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "qemu/error-report.h"
@@ -273,6 +273,38 @@ static void qtest_irq_handler(void *opaque, int n, int level)
     }
 }
 
+static int64_t qtest_clock_counter;
+
+int64_t qtest_get_virtual_clock(void)
+{
+    return qatomic_read_i64(&qtest_clock_counter);
+}
+
+static void qtest_set_virtual_clock(int64_t count)
+{
+    qatomic_set_i64(&qtest_clock_counter, count);
+}
+
+static void qtest_clock_warp(int64_t dest)
+{
+    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    AioContext *aio_context;
+    assert(qtest_enabled());
+    aio_context = qemu_get_aio_context();
+    while (clock < dest) {
+        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                                      QEMU_TIMER_ATTR_ALL);
+        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
+
+        qtest_set_virtual_clock(qtest_get_virtual_clock() + warp);
+
+        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
+        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
+        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    }
+    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+}
+
 static void qtest_process_command(CharBackend *chr, gchar **words)
 {
     const gchar *command;
diff --git a/softmmu/timers-state.h b/softmmu/timers-state.h
new file mode 100644
index 0000000000..db4e60f18f
--- /dev/null
+++ b/softmmu/timers-state.h
@@ -0,0 +1,69 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TIMERS_STATE_H
+#define TIMERS_STATE_H
+
+/* timers state, for sharing between icount and cpu-timers */
+
+typedef struct TimersState {
+    /* Protected by BQL.  */
+    int64_t cpu_ticks_prev;
+    int64_t cpu_ticks_offset;
+
+    /*
+     * Protect fields that can be respectively read outside the
+     * BQL, and written from multiple threads.
+     */
+    QemuSeqLock vm_clock_seqlock;
+    QemuSpin vm_clock_lock;
+
+    int16_t cpu_ticks_enabled;
+
+    /* Conversion factor from emulated instructions to virtual clock ticks.  */
+    int16_t icount_time_shift;
+
+    /* Compensate for varying guest execution speed.  */
+    int64_t qemu_icount_bias;
+
+    int64_t vm_clock_warp_start;
+    int64_t cpu_clock_offset;
+
+    /* Only written by TCG thread */
+    int64_t qemu_icount;
+
+    /* for adjusting icount */
+    QEMUTimer *icount_rt_timer;
+    QEMUTimer *icount_vm_timer;
+    QEMUTimer *icount_warp_timer;
+} TimersState;
+
+extern TimersState timers_state;
+
+/*
+ * icount needs this internal from cpu-timers when adjusting the icount shift.
+ */
+int64_t cpu_get_clock_locked(void);
+
+#endif /* TIMERS_STATE_H */
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 22bc570df4..6a1ee531db 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -74,6 +74,7 @@
 #include "hw/audio/soundhw.h"
 #include "audio/audio.h"
 #include "sysemu/cpus.h"
+#include "sysemu/cpu-timers.h"
 #include "migration/colo.h"
 #include "migration/postcopy-ram.h"
 #include "sysemu/kvm.h"
@@ -2804,7 +2805,7 @@ static void configure_accelerators(const char *progname)
         error_report("falling back to %s", ac->name);
     }
 
-    if (use_icount && !(tcg_enabled() || qtest_enabled())) {
+    if (icount_enabled() && !tcg_enabled()) {
         error_report("-icount is not allowed with hardware virtualization");
         exit(1);
     }
@@ -4254,7 +4255,8 @@ void qemu_init(int argc, char **argv, char **envp)
         semihosting_arg_fallback(kernel_filename, kernel_cmdline);
     }
 
-    cpu_ticks_init();
+    /* initialize cpu timers and VCPU throttle modules */
+    cpu_timers_init();
 
     if (default_net) {
         QemuOptsList *net = qemu_find_opts("net");
author	Claudio Fontana <cfontana@suse.de>	2020-08-19 13:17:19 +0200
committer	Paolo Bonzini <pbonzini@redhat.com>	2020-10-05 16:41:22 +0200
commit	740b175973427bcfa32ad894bb1f83b96d184c28 (patch)
tree	3613e83be5fd8620b369479b98e6ede5d5746a9a /softmmu
parent	0ac0b47c44b4be6cbce26777a1a5968cc8f025a5 (diff)