enable vm_clock to "warp" in the iothread+icount case

The previous patch however is not enough, because if the virtual CPU goes to sleep waiting for a future timer interrupt to wake it up, qemu deadlocks. The timer interrupt never comes because time is driven by icount, but the vCPU doesn't run any insns. You could say that VCPUs should never go to sleep in icount mode if there is a pending vm_clock timer; rather time should just warp to the next vm_clock event with no sleep ever taking place. Even better, you can sleep for some time related to the time left until the next event, to avoid that the warps are too visible externally; for example, you could be sending network packets continously instead of every 100ms. This is what this patch implements. qemu_clock_warp is called: 1) whenever a vm_clock timer is adjusted, to ensure the warp_timer is synchronized; 2) at strategic points in the CPU thread, to make sure the insn counter is synchronized before the CPU starts running. In any case, the warp_timer is disabled while the CPU is running, because the insn counter will then be making progress on its own. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Tested-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> Signed-off-by: Edgar E. Iglesias <edgar.iglesias@gmail.com>
author: Paolo Bonzini <pbonzini@redhat.com> 2011-04-13 10:03:44 +0200
committer: Edgar E. Iglesias <edgar.iglesias@gmail.com> 2011-04-15 09:27:56 +0200
commit: ab33fcda9f96b9195dfb3fcf5bd9bb5383caeaea (patch)
tree: 97a1a32a03b69a15fb4f23f7a5eb5e880d036980
parent: 3b2319a30b5ae528787bf3769b1a28a863b53252 (diff)
4 files changed, 102 insertions, 2 deletions
diff --git a/cpus.c b/cpus.c
index cbeac7a40e..6a50199039 100644
--- a/cpus.c
+++ b/cpus.c
@@ -155,7 +155,7 @@ static bool cpu_thread_is_idle(CPUState *env)
     return true;
 }
 
-static bool all_cpu_threads_idle(void)
+bool all_cpu_threads_idle(void)
 {
     CPUState *env;
 
@@ -739,6 +739,9 @@ static void qemu_tcg_wait_io_event(void)
     CPUState *env;
 
     while (all_cpu_threads_idle()) {
+       /* Start accounting real time to the virtual clock if the CPUs
+          are idle.  */
+        qemu_clock_warp(vm_clock);
         qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
     }
 
@@ -1073,6 +1076,9 @@ bool cpu_exec_all(void)
 {
     int r;
 
+    /* Account partial waits to the vm_clock.  */
+    qemu_clock_warp(vm_clock);
+
     if (next_cpu == NULL) {
         next_cpu = first_cpu;
     }
diff --git a/qemu-common.h b/qemu-common.h
index 82e27c18d3..4f6037bab9 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -298,6 +298,7 @@ void qemu_notify_event(void);
 void qemu_cpu_kick(void *env);
 void qemu_cpu_kick_self(void);
 int qemu_cpu_is_self(void *env);
+bool all_cpu_threads_idle(void);
 
 /* work queue */
 struct qemu_work_item {
diff --git a/qemu-timer.c b/qemu-timer.c
index 50f1943afd..4959688895 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -153,6 +153,8 @@ void cpu_disable_ticks(void)
 struct QEMUClock {
     int type;
     int enabled;
+
+    QEMUTimer *warp_timer;
 };
 
 struct QEMUTimer {
@@ -386,6 +388,90 @@ void qemu_clock_enable(QEMUClock *clock, int enabled)
     clock->enabled = enabled;
 }
 
+static int64_t vm_clock_warp_start;
+
+static void icount_warp_rt(void *opaque)
+{
+    if (vm_clock_warp_start == -1) {
+        return;
+    }
+
+    if (vm_running) {
+        int64_t clock = qemu_get_clock_ns(rt_clock);
+        int64_t warp_delta = clock - vm_clock_warp_start;
+        if (use_icount == 1) {
+            qemu_icount_bias += warp_delta;
+        } else {
+            /*
+             * In adaptive mode, do not let the vm_clock run too
+             * far ahead of real time.
+             */
+            int64_t cur_time = cpu_get_clock();
+            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
+            int64_t delta = cur_time - cur_icount;
+            qemu_icount_bias += MIN(warp_delta, delta);
+        }
+        if (qemu_timer_expired(active_timers[QEMU_CLOCK_VIRTUAL],
+                               qemu_get_clock_ns(vm_clock))) {
+            qemu_notify_event();
+        }
+    }
+    vm_clock_warp_start = -1;
+}
+
+void qemu_clock_warp(QEMUClock *clock)
+{
+    int64_t deadline;
+
+    if (!clock->warp_timer) {
+        return;
+    }
+
+    /*
+     * There are too many global variables to make the "warp" behavior
+     * applicable to other clocks.  But a clock argument removes the
+     * need for if statements all over the place.
+     */
+    assert(clock == vm_clock);
+
+    /*
+     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
+     * ensures that the deadline for the timer is computed correctly below.
+     * This also makes sure that the insn counter is synchronized before the
+     * CPU starts running, in case the CPU is woken by an event other than
+     * the earliest vm_clock timer.
+     */
+    icount_warp_rt(NULL);
+    if (!all_cpu_threads_idle() || !active_timers[clock->type]) {
+        qemu_del_timer(clock->warp_timer);
+        return;
+    }
+
+    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
+    deadline = qemu_next_deadline();
+    if (deadline > 0) {
+        /*
+         * Ensure the vm_clock proceeds even when the virtual CPU goes to
+         * sleep.  Otherwise, the CPU might be waiting for a future timer
+         * interrupt to wake it up, but the interrupt never comes because
+         * the vCPU isn't running any insns and thus doesn't advance the
+         * vm_clock.
+         *
+         * An extreme solution for this problem would be to never let VCPUs
+         * sleep in icount mode if there is a pending vm_clock timer; rather
+         * time could just advance to the next vm_clock event.  Instead, we
+         * do stop VCPUs and only advance vm_clock after some "real" time,
+         * (related to the time left until the next event) has passed.  This
+         * rt_clock timer will do this.  This avoids that the warps are too
+         * visible externally---for example, you will not be sending network
+         * packets continously instead of every 100ms.
+         */
+        qemu_mod_timer(clock->warp_timer, vm_clock_warp_start + deadline);
+    } else {
+        qemu_notify_event();
+    }
+}
+
 QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
                           QEMUTimerCB *cb, void *opaque)
 {
@@ -454,8 +540,10 @@ static void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time)
             qemu_rearm_alarm_timer(alarm_timer);
         }
         /* Interrupt execution to force deadline recalculation.  */
-        if (use_icount)
+        qemu_clock_warp(ts->clock);
+        if (use_icount) {
             qemu_notify_event();
+        }
     }
 }
 
@@ -576,6 +664,10 @@ void configure_icount(const char *option)
     if (!option)
         return;
 
+#ifdef CONFIG_IOTHREAD
+    vm_clock->warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
+#endif
+
     if (strcmp(option, "auto") != 0) {
         icount_time_shift = strtol(option, NULL, 0);
         use_icount = 1;
diff --git a/qemu-timer.h b/qemu-timer.h
index 75d567578b..c01bcaba66 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -39,6 +39,7 @@ extern QEMUClock *host_clock;
 
 int64_t qemu_get_clock_ns(QEMUClock *clock);
 void qemu_clock_enable(QEMUClock *clock, int enabled);
+void qemu_clock_warp(QEMUClock *clock);
 
 QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
                           QEMUTimerCB *cb, void *opaque);
author	Paolo Bonzini <pbonzini@redhat.com>	2011-04-13 10:03:44 +0200
committer	Edgar E. Iglesias <edgar.iglesias@gmail.com>	2011-04-15 09:27:56 +0200
commit	ab33fcda9f96b9195dfb3fcf5bd9bb5383caeaea (patch)
tree	97a1a32a03b69a15fb4f23f7a5eb5e880d036980
parent	3b2319a30b5ae528787bf3769b1a28a863b53252 (diff)