diff options
Diffstat (limited to 'cpus.c')
-rw-r--r-- | cpus.c | 345 |
1 files changed, 274 insertions, 71 deletions
@@ -25,6 +25,7 @@ /* Needed early for CONFIG_BSD etc. */ #include "qemu/osdep.h" #include "qemu-common.h" +#include "qemu/config-file.h" #include "cpu.h" #include "monitor/monitor.h" #include "qapi/qmp/qerror.h" @@ -45,6 +46,7 @@ #include "qemu/main-loop.h" #include "qemu/bitmap.h" #include "qemu/seqlock.h" +#include "tcg.h" #include "qapi-event.h" #include "hw/nmi.h" #include "sysemu/replay.h" @@ -150,6 +152,77 @@ typedef struct TimersState { } TimersState; static TimersState timers_state; +bool mttcg_enabled; + +/* + * We default to false if we know other options have been enabled + * which are currently incompatible with MTTCG. Otherwise when each + * guest (target) has been updated to support: + * - atomic instructions + * - memory ordering primitives (barriers) + * they can set the appropriate CONFIG flags in ${target}-softmmu.mak + * + * Once a guest architecture has been converted to the new primitives + * there are two remaining limitations to check. + * + * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host) + * - The host must have a stronger memory order than the guest + * + * It may be possible in future to support strong guests on weak hosts + * but that will require tagging all load/stores in a guest with their + * implicit memory order requirements which would likely slow things + * down a lot. + */ + +static bool check_tcg_memory_orders_compatible(void) +{ +#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO) + return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0; +#else + return false; +#endif +} + +static bool default_mttcg_enabled(void) +{ + QemuOpts *icount_opts = qemu_find_opts_singleton("icount"); + const char *rr = qemu_opt_get(icount_opts, "rr"); + + if (rr || TCG_OVERSIZED_GUEST) { + return false; + } else { +#ifdef TARGET_SUPPORTS_MTTCG + return check_tcg_memory_orders_compatible(); +#else + return false; +#endif + } +} + +void qemu_tcg_configure(QemuOpts *opts, Error **errp) +{ + const char *t = qemu_opt_get(opts, "thread"); + if (t) { + if (strcmp(t, "multi") == 0) { + if (TCG_OVERSIZED_GUEST) { + error_setg(errp, "No MTTCG when guest word size > hosts"); + } else { + if (!check_tcg_memory_orders_compatible()) { + error_report("Guest expects a stronger memory ordering " + "than the host provides"); + error_printf("This may cause strange/hard to debug errors"); + } + mttcg_enabled = true; + } + } else if (strcmp(t, "single") == 0) { + mttcg_enabled = false; + } else { + error_setg(errp, "Invalid 'thread' setting %s", t); + } + } else { + mttcg_enabled = default_mttcg_enabled(); + } +} int64_t cpu_get_icount_raw(void) { @@ -695,6 +768,63 @@ void configure_icount(QemuOpts *opts, Error **errp) } /***********************************************************/ +/* TCG vCPU kick timer + * + * The kick timer is responsible for moving single threaded vCPU + * emulation on to the next vCPU. If more than one vCPU is running a + * timer event with force a cpu->exit so the next vCPU can get + * scheduled. + * + * The timer is removed if all vCPUs are idle and restarted again once + * idleness is complete. + */ + +static QEMUTimer *tcg_kick_vcpu_timer; +static CPUState *tcg_current_rr_cpu; + +#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10) + +static inline int64_t qemu_tcg_next_kick(void) +{ + return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD; +} + +/* Kick the currently round-robin scheduled vCPU */ +static void qemu_cpu_kick_rr_cpu(void) +{ + CPUState *cpu; + do { + cpu = atomic_mb_read(&tcg_current_rr_cpu); + if (cpu) { + cpu_exit(cpu); + } + } while (cpu != atomic_mb_read(&tcg_current_rr_cpu)); +} + +static void kick_tcg_thread(void *opaque) +{ + timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); + qemu_cpu_kick_rr_cpu(); +} + +static void start_tcg_kick_timer(void) +{ + if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) { + tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + kick_tcg_thread, NULL); + timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); + } +} + +static void stop_tcg_kick_timer(void) +{ + if (tcg_kick_vcpu_timer) { + timer_del(tcg_kick_vcpu_timer); + tcg_kick_vcpu_timer = NULL; + } +} + +/***********************************************************/ void hw_error(const char *fmt, ...) { va_list ap; @@ -896,8 +1026,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu) #endif /* _WIN32 */ static QemuMutex qemu_global_mutex; -static QemuCond qemu_io_proceeded_cond; -static unsigned iothread_requesting_mutex; static QemuThread io_thread; @@ -911,7 +1039,6 @@ void qemu_init_cpu_loop(void) qemu_init_sigbus(); qemu_cond_init(&qemu_cpu_cond); qemu_cond_init(&qemu_pause_cond); - qemu_cond_init(&qemu_io_proceeded_cond); qemu_mutex_init(&qemu_global_mutex); qemu_thread_get_self(&io_thread); @@ -936,28 +1063,34 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) static void qemu_wait_io_event_common(CPUState *cpu) { + atomic_mb_set(&cpu->thread_kicked, false); if (cpu->stop) { cpu->stop = false; cpu->stopped = true; qemu_cond_broadcast(&qemu_pause_cond); } process_queued_cpu_work(cpu); - cpu->thread_kicked = false; +} + +static bool qemu_tcg_should_sleep(CPUState *cpu) +{ + if (mttcg_enabled) { + return cpu_thread_is_idle(cpu); + } else { + return all_cpu_threads_idle(); + } } static void qemu_tcg_wait_io_event(CPUState *cpu) { - while (all_cpu_threads_idle()) { + while (qemu_tcg_should_sleep(cpu)) { + stop_tcg_kick_timer(); qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); } - while (iothread_requesting_mutex) { - qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex); - } + start_tcg_kick_timer(); - CPU_FOREACH(cpu) { - qemu_wait_io_event_common(cpu); - } + qemu_wait_io_event_common(cpu); } static void qemu_kvm_wait_io_event(CPUState *cpu) @@ -1028,6 +1161,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); cpu->can_do_io = 1; + current_cpu = cpu; sigemptyset(&waitset); sigaddset(&waitset, SIG_IPI); @@ -1036,9 +1170,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) cpu->created = true; qemu_cond_signal(&qemu_cpu_cond); - current_cpu = cpu; while (1) { - current_cpu = NULL; qemu_mutex_unlock_iothread(); do { int sig; @@ -1049,7 +1181,6 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) exit(1); } qemu_mutex_lock_iothread(); - current_cpu = cpu; qemu_wait_io_event_common(cpu); } @@ -1115,9 +1246,11 @@ static int tcg_cpu_exec(CPUState *cpu) cpu->icount_decr.u16.low = decr; cpu->icount_extra = count; } + qemu_mutex_unlock_iothread(); cpu_exec_start(cpu); ret = cpu_exec(cpu); cpu_exec_end(cpu); + qemu_mutex_lock_iothread(); #ifdef CONFIG_PROFILER tcg_time += profile_getclock() - ti; #endif @@ -1150,7 +1283,16 @@ static void deal_with_unplugged_cpus(void) } } -static void *qemu_tcg_cpu_thread_fn(void *arg) +/* Single-threaded TCG + * + * In the single-threaded case each vCPU is simulated in turn. If + * there is more than a single vCPU we create a simple timer to kick + * the vCPU and ensure we don't get stuck in a tight loop in one vCPU. + * This is done explicitly rather than relying on side-effects + * elsewhere. + */ + +static void *qemu_tcg_rr_cpu_thread_fn(void *arg) { CPUState *cpu = arg; @@ -1172,15 +1314,18 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) /* process any pending work */ CPU_FOREACH(cpu) { + current_cpu = cpu; qemu_wait_io_event_common(cpu); } } - /* process any pending work */ - atomic_mb_set(&exit_request, 1); + start_tcg_kick_timer(); cpu = first_cpu; + /* process any pending work */ + cpu->exit_request = 1; + while (1) { /* Account partial waits to QEMU_CLOCK_VIRTUAL. */ qemu_account_warp_timer(); @@ -1189,7 +1334,10 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) cpu = first_cpu; } - for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) { + while (cpu && !cpu->queued_work_first && !cpu->exit_request) { + + atomic_mb_set(&tcg_current_rr_cpu, cpu); + current_cpu = cpu; qemu_clock_enable(QEMU_CLOCK_VIRTUAL, (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); @@ -1200,22 +1348,32 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); break; + } else if (r == EXCP_ATOMIC) { + qemu_mutex_unlock_iothread(); + cpu_exec_step_atomic(cpu); + qemu_mutex_lock_iothread(); + break; } - } else if (cpu->stop || cpu->stopped) { + } else if (cpu->stop) { if (cpu->unplug) { cpu = CPU_NEXT(cpu); } break; } - } /* for cpu.. */ + cpu = CPU_NEXT(cpu); + } /* while (cpu && !cpu->exit_request).. */ + + /* Does not need atomic_mb_set because a spurious wakeup is okay. */ + atomic_set(&tcg_current_rr_cpu, NULL); - /* Pairs with smp_wmb in qemu_cpu_kick. */ - atomic_mb_set(&exit_request, 0); + if (cpu && cpu->exit_request) { + atomic_mb_set(&cpu->exit_request, 0); + } handle_icount_deadline(); - qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus)); + qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus)); deal_with_unplugged_cpus(); } @@ -1262,6 +1420,68 @@ static void CALLBACK dummy_apc_func(ULONG_PTR unused) } #endif +/* Multi-threaded TCG + * + * In the multi-threaded case each vCPU has its own thread. The TLS + * variable current_cpu can be used deep in the code to find the + * current CPUState for a given thread. + */ + +static void *qemu_tcg_cpu_thread_fn(void *arg) +{ + CPUState *cpu = arg; + + rcu_register_thread(); + + qemu_mutex_lock_iothread(); + qemu_thread_get_self(cpu->thread); + + cpu->thread_id = qemu_get_thread_id(); + cpu->created = true; + cpu->can_do_io = 1; + current_cpu = cpu; + qemu_cond_signal(&qemu_cpu_cond); + + /* process any pending work */ + cpu->exit_request = 1; + + while (1) { + if (cpu_can_run(cpu)) { + int r; + r = tcg_cpu_exec(cpu); + switch (r) { + case EXCP_DEBUG: + cpu_handle_guest_debug(cpu); + break; + case EXCP_HALTED: + /* during start-up the vCPU is reset and the thread is + * kicked several times. If we don't ensure we go back + * to sleep in the halted state we won't cleanly + * start-up when the vCPU is enabled. + * + * cpu->halted should ensure we sleep in wait_io_event + */ + g_assert(cpu->halted); + break; + case EXCP_ATOMIC: + qemu_mutex_unlock_iothread(); + cpu_exec_step_atomic(cpu); + qemu_mutex_lock_iothread(); + default: + /* Ignore everything else? */ + break; + } + } + + handle_icount_deadline(); + + atomic_mb_set(&cpu->exit_request, 0); + qemu_tcg_wait_io_event(cpu); + } + + return NULL; +} + static void qemu_cpu_kick_thread(CPUState *cpu) { #ifndef _WIN32 @@ -1287,24 +1507,13 @@ static void qemu_cpu_kick_thread(CPUState *cpu) #endif } -static void qemu_cpu_kick_no_halt(void) -{ - CPUState *cpu; - /* Ensure whatever caused the exit has reached the CPU threads before - * writing exit_request. - */ - atomic_mb_set(&exit_request, 1); - cpu = atomic_mb_read(&tcg_current_cpu); - if (cpu) { - cpu_exit(cpu); - } -} - void qemu_cpu_kick(CPUState *cpu) { qemu_cond_broadcast(cpu->halt_cond); if (tcg_enabled()) { - qemu_cpu_kick_no_halt(); + cpu_exit(cpu); + /* NOP unless doing single-thread RR */ + qemu_cpu_kick_rr_cpu(); } else { if (hax_enabled()) { /* @@ -1342,27 +1551,14 @@ bool qemu_mutex_iothread_locked(void) void qemu_mutex_lock_iothread(void) { - atomic_inc(&iothread_requesting_mutex); - /* In the simple case there is no need to bump the VCPU thread out of - * TCG code execution. - */ - if (!tcg_enabled() || qemu_in_vcpu_thread() || - !first_cpu || !first_cpu->created) { - qemu_mutex_lock(&qemu_global_mutex); - atomic_dec(&iothread_requesting_mutex); - } else { - if (qemu_mutex_trylock(&qemu_global_mutex)) { - qemu_cpu_kick_no_halt(); - qemu_mutex_lock(&qemu_global_mutex); - } - atomic_dec(&iothread_requesting_mutex); - qemu_cond_broadcast(&qemu_io_proceeded_cond); - } + g_assert(!qemu_mutex_iothread_locked()); + qemu_mutex_lock(&qemu_global_mutex); iothread_locked = true; } void qemu_mutex_unlock_iothread(void) { + g_assert(qemu_mutex_iothread_locked()); iothread_locked = false; qemu_mutex_unlock(&qemu_global_mutex); } @@ -1392,13 +1588,6 @@ void pause_all_vcpus(void) if (qemu_in_vcpu_thread()) { cpu_stop_current(); - if (!kvm_enabled()) { - CPU_FOREACH(cpu) { - cpu->stop = false; - cpu->stopped = true; - } - return; - } } while (!all_vcpus_paused()) { @@ -1447,29 +1636,43 @@ void cpu_remove_sync(CPUState *cpu) static void qemu_tcg_init_vcpu(CPUState *cpu) { char thread_name[VCPU_THREAD_NAME_SIZE]; - static QemuCond *tcg_halt_cond; - static QemuThread *tcg_cpu_thread; + static QemuCond *single_tcg_halt_cond; + static QemuThread *single_tcg_cpu_thread; - /* share a single thread for all cpus with TCG */ - if (!tcg_cpu_thread) { + if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) { cpu->thread = g_malloc0(sizeof(QemuThread)); cpu->halt_cond = g_malloc0(sizeof(QemuCond)); qemu_cond_init(cpu->halt_cond); - tcg_halt_cond = cpu->halt_cond; - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG", + + if (qemu_tcg_mttcg_enabled()) { + /* create a thread per vCPU with TCG (MTTCG) */ + parallel_cpus = true; + snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG", cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); + + qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn, + cpu, QEMU_THREAD_JOINABLE); + + } else { + /* share a single thread for all cpus with TCG */ + snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG"); + qemu_thread_create(cpu->thread, thread_name, + qemu_tcg_rr_cpu_thread_fn, + cpu, QEMU_THREAD_JOINABLE); + + single_tcg_halt_cond = cpu->halt_cond; + single_tcg_cpu_thread = cpu->thread; + } #ifdef _WIN32 cpu->hThread = qemu_thread_get_handle(cpu->thread); #endif while (!cpu->created) { qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); } - tcg_cpu_thread = cpu->thread; } else { - cpu->thread = tcg_cpu_thread; - cpu->halt_cond = tcg_halt_cond; + /* For non-MTTCG cases we share the thread */ + cpu->thread = single_tcg_cpu_thread; + cpu->halt_cond = single_tcg_halt_cond; } } |