diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2011-10-24 10:51:12 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2011-10-24 10:51:12 -0500 |
commit | 952e849c150b4f1b89f8728cba00f925c1d6e75b (patch) | |
tree | 93e950b81e84d1d20a915d8b1da5c75d3911f553 | |
parent | db418a0a7ef5887ea0f3d167584e6f500bb0c4c5 (diff) | |
parent | 99435906cc92a45d6c2f7e18221d31349836f90e (diff) |
Merge remote-tracking branch 'bonzini/split-main-loop-for-anthony' into staging
-rw-r--r-- | Makefile.objs | 2 | ||||
-rw-r--r-- | async.c | 1 | ||||
-rw-r--r-- | cpus.c | 497 | ||||
-rw-r--r-- | cpus.h | 3 | ||||
-rw-r--r-- | exec-all.h | 14 | ||||
-rw-r--r-- | exec.c | 3 | ||||
-rw-r--r-- | hw/mac_dbdma.c | 5 | ||||
-rw-r--r-- | hw/mac_dbdma.h | 1 | ||||
-rw-r--r-- | iohandler.c | 55 | ||||
-rw-r--r-- | main-loop.c | 495 | ||||
-rw-r--r-- | main-loop.h | 351 | ||||
-rw-r--r-- | os-win32.c | 123 | ||||
-rw-r--r-- | qemu-char.h | 12 | ||||
-rw-r--r-- | qemu-common.h | 37 | ||||
-rw-r--r-- | qemu-coroutine-lock.c | 1 | ||||
-rw-r--r-- | qemu-os-posix.h | 4 | ||||
-rw-r--r-- | qemu-os-win32.h | 17 | ||||
-rw-r--r-- | qemu-timer.c | 489 | ||||
-rw-r--r-- | qemu-timer.h | 31 | ||||
-rw-r--r-- | savevm.c | 25 | ||||
-rw-r--r-- | slirp/libslirp.h | 11 | ||||
-rw-r--r-- | sysemu.h | 3 | ||||
-rw-r--r-- | vl.c | 189 |
23 files changed, 1309 insertions, 1060 deletions
diff --git a/Makefile.objs b/Makefile.objs index 9e2077833e..01587c8f8f 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -81,7 +81,7 @@ common-obj-y += $(oslib-obj-y) common-obj-$(CONFIG_WIN32) += os-win32.o common-obj-$(CONFIG_POSIX) += os-posix.o -common-obj-y += tcg-runtime.o host-utils.o +common-obj-y += tcg-runtime.o host-utils.o main-loop.o common-obj-y += irq.o input.o common-obj-$(CONFIG_PTIMER) += ptimer.o common-obj-$(CONFIG_MAX7310) += max7310.o @@ -24,6 +24,7 @@ #include "qemu-common.h" #include "qemu-aio.h" +#include "main-loop.h" /* Anchor of the list of Bottom Halves belonging to the context */ static struct QEMUBH *first_bh; @@ -33,17 +33,12 @@ #include "qemu-thread.h" #include "cpus.h" +#include "main-loop.h" #ifndef _WIN32 #include "compatfd.h" #endif -#ifdef SIGRTMIN -#define SIG_IPI (SIGRTMIN+4) -#else -#define SIG_IPI SIGUSR1 -#endif - #ifdef CONFIG_LINUX #include <sys/prctl.h> @@ -65,6 +60,281 @@ static CPUState *next_cpu; /***********************************************************/ +/* guest cycle counter */ + +/* Conversion factor from emulated instructions to virtual clock ticks. */ +static int icount_time_shift; +/* Arbitrarily pick 1MIPS as the minimum allowable speed. */ +#define MAX_ICOUNT_SHIFT 10 +/* Compensate for varying guest execution speed. */ +static int64_t qemu_icount_bias; +static QEMUTimer *icount_rt_timer; +static QEMUTimer *icount_vm_timer; +static QEMUTimer *icount_warp_timer; +static int64_t vm_clock_warp_start; +static int64_t qemu_icount; + +typedef struct TimersState { + int64_t cpu_ticks_prev; + int64_t cpu_ticks_offset; + int64_t cpu_clock_offset; + int32_t cpu_ticks_enabled; + int64_t dummy; +} TimersState; + +TimersState timers_state; + +/* Return the virtual CPU time, based on the instruction counter. */ +int64_t cpu_get_icount(void) +{ + int64_t icount; + CPUState *env = cpu_single_env;; + + icount = qemu_icount; + if (env) { + if (!can_do_io(env)) { + fprintf(stderr, "Bad clock read\n"); + } + icount -= (env->icount_decr.u16.low + env->icount_extra); + } + return qemu_icount_bias + (icount << icount_time_shift); +} + +/* return the host CPU cycle counter and handle stop/restart */ +int64_t cpu_get_ticks(void) +{ + if (use_icount) { + return cpu_get_icount(); + } + if (!timers_state.cpu_ticks_enabled) { + return timers_state.cpu_ticks_offset; + } else { + int64_t ticks; + ticks = cpu_get_real_ticks(); + if (timers_state.cpu_ticks_prev > ticks) { + /* Note: non increasing ticks may happen if the host uses + software suspend */ + timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks; + } + timers_state.cpu_ticks_prev = ticks; + return ticks + timers_state.cpu_ticks_offset; + } +} + +/* return the host CPU monotonic timer and handle stop/restart */ +int64_t cpu_get_clock(void) +{ + int64_t ti; + if (!timers_state.cpu_ticks_enabled) { + return timers_state.cpu_clock_offset; + } else { + ti = get_clock(); + return ti + timers_state.cpu_clock_offset; + } +} + +/* enable cpu_get_ticks() */ +void cpu_enable_ticks(void) +{ + if (!timers_state.cpu_ticks_enabled) { + timers_state.cpu_ticks_offset -= cpu_get_real_ticks(); + timers_state.cpu_clock_offset -= get_clock(); + timers_state.cpu_ticks_enabled = 1; + } +} + +/* disable cpu_get_ticks() : the clock is stopped. You must not call + cpu_get_ticks() after that. */ +void cpu_disable_ticks(void) +{ + if (timers_state.cpu_ticks_enabled) { + timers_state.cpu_ticks_offset = cpu_get_ticks(); + timers_state.cpu_clock_offset = cpu_get_clock(); + timers_state.cpu_ticks_enabled = 0; + } +} + +/* Correlation between real and virtual time is always going to be + fairly approximate, so ignore small variation. + When the guest is idle real and virtual time will be aligned in + the IO wait loop. */ +#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10) + +static void icount_adjust(void) +{ + int64_t cur_time; + int64_t cur_icount; + int64_t delta; + static int64_t last_delta; + /* If the VM is not running, then do nothing. */ + if (!runstate_is_running()) { + return; + } + cur_time = cpu_get_clock(); + cur_icount = qemu_get_clock_ns(vm_clock); + delta = cur_icount - cur_time; + /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ + if (delta > 0 + && last_delta + ICOUNT_WOBBLE < delta * 2 + && icount_time_shift > 0) { + /* The guest is getting too far ahead. Slow time down. */ + icount_time_shift--; + } + if (delta < 0 + && last_delta - ICOUNT_WOBBLE > delta * 2 + && icount_time_shift < MAX_ICOUNT_SHIFT) { + /* The guest is getting too far behind. Speed time up. */ + icount_time_shift++; + } + last_delta = delta; + qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift); +} + +static void icount_adjust_rt(void *opaque) +{ + qemu_mod_timer(icount_rt_timer, + qemu_get_clock_ms(rt_clock) + 1000); + icount_adjust(); +} + +static void icount_adjust_vm(void *opaque) +{ + qemu_mod_timer(icount_vm_timer, + qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10); + icount_adjust(); +} + +static int64_t qemu_icount_round(int64_t count) +{ + return (count + (1 << icount_time_shift) - 1) >> icount_time_shift; +} + +static void icount_warp_rt(void *opaque) +{ + if (vm_clock_warp_start == -1) { + return; + } + + if (runstate_is_running()) { + int64_t clock = qemu_get_clock_ns(rt_clock); + int64_t warp_delta = clock - vm_clock_warp_start; + if (use_icount == 1) { + qemu_icount_bias += warp_delta; + } else { + /* + * In adaptive mode, do not let the vm_clock run too + * far ahead of real time. + */ + int64_t cur_time = cpu_get_clock(); + int64_t cur_icount = qemu_get_clock_ns(vm_clock); + int64_t delta = cur_time - cur_icount; + qemu_icount_bias += MIN(warp_delta, delta); + } + if (qemu_clock_expired(vm_clock)) { + qemu_notify_event(); + } + } + vm_clock_warp_start = -1; +} + +void qemu_clock_warp(QEMUClock *clock) +{ + int64_t deadline; + + /* + * There are too many global variables to make the "warp" behavior + * applicable to other clocks. But a clock argument removes the + * need for if statements all over the place. + */ + if (clock != vm_clock || !use_icount) { + return; + } + + /* + * If the CPUs have been sleeping, advance the vm_clock timer now. This + * ensures that the deadline for the timer is computed correctly below. + * This also makes sure that the insn counter is synchronized before the + * CPU starts running, in case the CPU is woken by an event other than + * the earliest vm_clock timer. + */ + icount_warp_rt(NULL); + if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) { + qemu_del_timer(icount_warp_timer); + return; + } + + vm_clock_warp_start = qemu_get_clock_ns(rt_clock); + deadline = qemu_clock_deadline(vm_clock); + if (deadline > 0) { + /* + * Ensure the vm_clock proceeds even when the virtual CPU goes to + * sleep. Otherwise, the CPU might be waiting for a future timer + * interrupt to wake it up, but the interrupt never comes because + * the vCPU isn't running any insns and thus doesn't advance the + * vm_clock. + * + * An extreme solution for this problem would be to never let VCPUs + * sleep in icount mode if there is a pending vm_clock timer; rather + * time could just advance to the next vm_clock event. Instead, we + * do stop VCPUs and only advance vm_clock after some "real" time, + * (related to the time left until the next event) has passed. This + * rt_clock timer will do this. This avoids that the warps are too + * visible externally---for example, you will not be sending network + * packets continously instead of every 100ms. + */ + qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline); + } else { + qemu_notify_event(); + } +} + +static const VMStateDescription vmstate_timers = { + .name = "timer", + .version_id = 2, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_INT64(cpu_ticks_offset, TimersState), + VMSTATE_INT64(dummy, TimersState), + VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2), + VMSTATE_END_OF_LIST() + } +}; + +void configure_icount(const char *option) +{ + vmstate_register(NULL, 0, &vmstate_timers, &timers_state); + if (!option) { + return; + } + + icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL); + if (strcmp(option, "auto") != 0) { + icount_time_shift = strtol(option, NULL, 0); + use_icount = 1; + return; + } + + use_icount = 2; + + /* 125MIPS seems a reasonable initial guess at the guest speed. + It will be corrected fairly quickly anyway. */ + icount_time_shift = 3; + + /* Have both realtime and virtual time triggers for speed adjustment. + The realtime trigger catches emulated time passing too slowly, + the virtual time trigger catches emulated time passing too fast. + Realtime triggers occur even when idle, so use them less frequently + than VM triggers. */ + icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL); + qemu_mod_timer(icount_rt_timer, + qemu_get_clock_ms(rt_clock) + 1000); + icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL); + qemu_mod_timer(icount_vm_timer, + qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10); +} + +/***********************************************************/ void hw_error(const char *fmt, ...) { va_list ap; @@ -272,143 +542,10 @@ static void qemu_kvm_eat_signals(CPUState *env) #endif /* !CONFIG_LINUX */ #ifndef _WIN32 -static int io_thread_fd = -1; - -static void qemu_event_increment(void) -{ - /* Write 8 bytes to be compatible with eventfd. */ - static const uint64_t val = 1; - ssize_t ret; - - if (io_thread_fd == -1) { - return; - } - do { - ret = write(io_thread_fd, &val, sizeof(val)); - } while (ret < 0 && errno == EINTR); - - /* EAGAIN is fine, a read must be pending. */ - if (ret < 0 && errno != EAGAIN) { - fprintf(stderr, "qemu_event_increment: write() failed: %s\n", - strerror(errno)); - exit (1); - } -} - -static void qemu_event_read(void *opaque) -{ - int fd = (intptr_t)opaque; - ssize_t len; - char buffer[512]; - - /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */ - do { - len = read(fd, buffer, sizeof(buffer)); - } while ((len == -1 && errno == EINTR) || len == sizeof(buffer)); -} - -static int qemu_event_init(void) -{ - int err; - int fds[2]; - - err = qemu_eventfd(fds); - if (err == -1) { - return -errno; - } - err = fcntl_setfl(fds[0], O_NONBLOCK); - if (err < 0) { - goto fail; - } - err = fcntl_setfl(fds[1], O_NONBLOCK); - if (err < 0) { - goto fail; - } - qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL, - (void *)(intptr_t)fds[0]); - - io_thread_fd = fds[1]; - return 0; - -fail: - close(fds[0]); - close(fds[1]); - return err; -} - static void dummy_signal(int sig) { } -/* If we have signalfd, we mask out the signals we want to handle and then - * use signalfd to listen for them. We rely on whatever the current signal - * handler is to dispatch the signals when we receive them. - */ -static void sigfd_handler(void *opaque) -{ - int fd = (intptr_t)opaque; - struct qemu_signalfd_siginfo info; - struct sigaction action; - ssize_t len; - - while (1) { - do { - len = read(fd, &info, sizeof(info)); - } while (len == -1 && errno == EINTR); - - if (len == -1 && errno == EAGAIN) { - break; - } - - if (len != sizeof(info)) { - printf("read from sigfd returned %zd: %m\n", len); - return; - } - - sigaction(info.ssi_signo, NULL, &action); - if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) { - action.sa_sigaction(info.ssi_signo, - (siginfo_t *)&info, NULL); - } else if (action.sa_handler) { - action.sa_handler(info.ssi_signo); - } - } -} - -static int qemu_signal_init(void) -{ - int sigfd; - sigset_t set; - - /* - * SIG_IPI must be blocked in the main thread and must not be caught - * by sigwait() in the signal thread. Otherwise, the cpu thread will - * not catch it reliably. - */ - sigemptyset(&set); - sigaddset(&set, SIG_IPI); - pthread_sigmask(SIG_BLOCK, &set, NULL); - - sigemptyset(&set); - sigaddset(&set, SIGIO); - sigaddset(&set, SIGALRM); - sigaddset(&set, SIGBUS); - pthread_sigmask(SIG_BLOCK, &set, NULL); - - sigfd = qemu_signalfd(&set); - if (sigfd == -1) { - fprintf(stderr, "failed to create signalfd\n"); - return -errno; - } - - fcntl_setfl(sigfd, O_NONBLOCK); - - qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL, - (void *)(intptr_t)sigfd); - - return 0; -} - static void qemu_kvm_init_cpu_signals(CPUState *env) { int r; @@ -452,38 +589,6 @@ static void qemu_tcg_init_cpu_signals(void) } #else /* _WIN32 */ - -HANDLE qemu_event_handle; - -static void dummy_event_handler(void *opaque) -{ -} - -static int qemu_event_init(void) -{ - qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL); - if (!qemu_event_handle) { - fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError()); - return -1; - } - qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL); - return 0; -} - -static void qemu_event_increment(void) -{ - if (!SetEvent(qemu_event_handle)) { - fprintf(stderr, "qemu_event_increment: SetEvent failed: %ld\n", - GetLastError()); - exit (1); - } -} - -static int qemu_signal_init(void) -{ - return 0; -} - static void qemu_kvm_init_cpu_signals(CPUState *env) { abort(); @@ -509,38 +614,16 @@ static QemuCond qemu_cpu_cond; static QemuCond qemu_pause_cond; static QemuCond qemu_work_cond; -int qemu_init_main_loop(void) +void qemu_init_cpu_loop(void) { - int ret; - qemu_init_sigbus(); - - ret = qemu_signal_init(); - if (ret) { - return ret; - } - - /* Note eventfd must be drained before signalfd handlers run */ - ret = qemu_event_init(); - if (ret) { - return ret; - } - qemu_cond_init(&qemu_cpu_cond); qemu_cond_init(&qemu_pause_cond); qemu_cond_init(&qemu_work_cond); qemu_cond_init(&qemu_io_proceeded_cond); qemu_mutex_init(&qemu_global_mutex); - qemu_mutex_lock(&qemu_global_mutex); qemu_thread_get_self(&io_thread); - - return 0; -} - -void qemu_main_loop_start(void) -{ - resume_all_vcpus(); } void run_on_cpu(CPUState *env, void (*func)(void *data), void *data) @@ -686,7 +769,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) while (1) { cpu_exec_all(); - if (use_icount && qemu_next_icount_deadline() <= 0) { + if (use_icount && qemu_clock_deadline(vm_clock) <= 0) { qemu_notify_event(); } qemu_tcg_wait_io_event(); @@ -784,6 +867,7 @@ void pause_all_vcpus(void) { CPUState *penv = first_cpu; + qemu_clock_enable(vm_clock, false); while (penv) { penv->stop = 1; qemu_cpu_kick(penv); @@ -858,11 +942,6 @@ void qemu_init_vcpu(void *_env) } } -void qemu_notify_event(void) -{ - qemu_event_increment(); -} - void cpu_stop_current(void) { if (cpu_single_env) { @@ -914,7 +993,7 @@ static int tcg_cpu_exec(CPUState *env) qemu_icount -= (env->icount_decr.u16.low + env->icount_extra); env->icount_decr.u16.low = 0; env->icount_extra = 0; - count = qemu_icount_round(qemu_next_icount_deadline()); + count = qemu_icount_round(qemu_clock_deadline(vm_clock)); qemu_icount += count; decr = (count > 0xffff) ? 0xffff : count; count -= decr; @@ -1006,22 +1085,6 @@ void set_cpu_log_filename(const char *optarg) cpu_set_log_filename(optarg); } -/* Return the virtual CPU time, based on the instruction counter. */ -int64_t cpu_get_icount(void) -{ - int64_t icount; - CPUState *env = cpu_single_env;; - - icount = qemu_icount; - if (env) { - if (!can_do_io(env)) { - fprintf(stderr, "Bad clock read\n"); - } - icount -= (env->icount_decr.u16.low + env->icount_extra); - } - return qemu_icount_bias + (icount << icount_time_shift); -} - void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg) { /* XXX: implement xxx_cpu_list for targets that still miss it */ @@ -2,8 +2,7 @@ #define QEMU_CPUS_H /* cpus.c */ -int qemu_init_main_loop(void); -void qemu_main_loop_start(void); +void qemu_init_cpu_loop(void); void resume_all_vcpus(void); void pause_all_vcpus(void); void cpu_stop_current(void); diff --git a/exec-all.h b/exec-all.h index 1120f84661..72ef246793 100644 --- a/exec-all.h +++ b/exec-all.h @@ -356,4 +356,18 @@ extern int singlestep; /* cpu-exec.c */ extern volatile sig_atomic_t exit_request; +/* Deterministic execution requires that IO only be performed on the last + instruction of a TB so that interrupts take effect immediately. */ +static inline int can_do_io(CPUState *env) +{ + if (!use_icount) { + return 1; + } + /* If not executing code then assume we are ok. */ + if (!env->current_tb) { + return 1; + } + return env->can_do_io != 0; +} + #endif @@ -125,9 +125,6 @@ CPUState *cpu_single_env; 1 = Precise instruction counting. 2 = Adaptive rate instruction counting. */ int use_icount = 0; -/* Current instruction counter. While executing translated code this may - include some instructions that have not yet been executed. */ -int64_t qemu_icount; typedef struct PageDesc { /* list of TBs intersecting this ram page */ diff --git a/hw/mac_dbdma.c b/hw/mac_dbdma.c index 5affdd18a5..1791ec12e1 100644 --- a/hw/mac_dbdma.c +++ b/hw/mac_dbdma.c @@ -661,11 +661,6 @@ void DBDMA_register_channel(void *dbdma, int nchan, qemu_irq irq, ch->io.channel = ch; } -void DBDMA_schedule(void) -{ - qemu_notify_event(); -} - static void dbdma_control_write(DBDMA_channel *ch) { diff --git a/hw/mac_dbdma.h b/hw/mac_dbdma.h index 933e17c5b9..6d1abe6aae 100644 --- a/hw/mac_dbdma.h +++ b/hw/mac_dbdma.h @@ -41,5 +41,4 @@ struct DBDMA_io { void DBDMA_register_channel(void *dbdma, int nchan, qemu_irq irq, DBDMA_rw rw, DBDMA_flush flush, void *opaque); -void DBDMA_schedule(void); void* DBDMA_init (MemoryRegion **dbdma_mem); diff --git a/iohandler.c b/iohandler.c index 4cc1c5ade6..5640d49388 100644 --- a/iohandler.c +++ b/iohandler.c @@ -26,6 +26,7 @@ #include "qemu-common.h" #include "qemu-char.h" #include "qemu-queue.h" +#include "main-loop.h" #ifndef _WIN32 #include <sys/wait.h> @@ -80,64 +81,12 @@ int qemu_set_fd_handler2(int fd, return 0; } -typedef struct IOTrampoline -{ - GIOChannel *chan; - IOHandler *fd_read; - IOHandler *fd_write; - void *opaque; - guint tag; -} IOTrampoline; - -static gboolean fd_trampoline(GIOChannel *chan, GIOCondition cond, gpointer opaque) -{ - IOTrampoline *tramp = opaque; - - if ((cond & G_IO_IN) && tramp->fd_read) { - tramp->fd_read(tramp->opaque); - } - - if ((cond & G_IO_OUT) && tramp->fd_write) { - tramp->fd_write(tramp->opaque); - } - - return TRUE; -} - int qemu_set_fd_handler(int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque) { - static IOTrampoline fd_trampolines[FD_SETSIZE]; - IOTrampoline *tramp = &fd_trampolines[fd]; - - if (tramp->tag != 0) { - g_io_channel_unref(tramp->chan); - g_source_remove(tramp->tag); - tramp->tag = 0; - } - - if (fd_read || fd_write || opaque) { - GIOCondition cond = 0; - - tramp->fd_read = fd_read; - tramp->fd_write = fd_write; - tramp->opaque = opaque; - - if (fd_read) { - cond |= G_IO_IN | G_IO_ERR; - } - - if (fd_write) { - cond |= G_IO_OUT | G_IO_ERR; - } - - tramp->chan = g_io_channel_unix_new(fd); - tramp->tag = g_io_add_watch(tramp->chan, cond, fd_trampoline, tramp); - } - - return 0; + return qemu_set_fd_handler2(fd, NULL, fd_read, fd_write, opaque); } void qemu_iohandler_fill(int *pnfds, fd_set *readfds, fd_set *writefds, fd_set *xfds) diff --git a/main-loop.c b/main-loop.c new file mode 100644 index 0000000000..bfecdb7769 --- /dev/null +++ b/main-loop.c @@ -0,0 +1,495 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "config-host.h" +#include <unistd.h> +#include <signal.h> +#include <time.h> +#include <errno.h> +#include <sys/time.h> +#include <stdbool.h> + +#ifdef _WIN32 +#include <windows.h> +#include <winsock2.h> +#include <ws2tcpip.h> +#else +#include <sys/socket.h> +#include <netinet/in.h> +#include <net/if.h> +#include <arpa/inet.h> +#include <sys/select.h> +#include <sys/stat.h> +#include "compatfd.h" +#endif + +#include <glib.h> + +#include "main-loop.h" +#include "qemu-timer.h" +#include "slirp/libslirp.h" + +#ifndef _WIN32 + +static int io_thread_fd = -1; + +void qemu_notify_event(void) +{ + /* Write 8 bytes to be compatible with eventfd. */ + static const uint64_t val = 1; + ssize_t ret; + + if (io_thread_fd == -1) { + return; + } + do { + ret = write(io_thread_fd, &val, sizeof(val)); + } while (ret < 0 && errno == EINTR); + + /* EAGAIN is fine, a read must be pending. */ + if (ret < 0 && errno != EAGAIN) { + fprintf(stderr, "qemu_notify_event: write() failed: %s\n", + strerror(errno)); + exit(1); + } +} + +static void qemu_event_read(void *opaque) +{ + int fd = (intptr_t)opaque; + ssize_t len; + char buffer[512]; + + /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */ + do { + len = read(fd, buffer, sizeof(buffer)); + } while ((len == -1 && errno == EINTR) || len == sizeof(buffer)); +} + +static int qemu_event_init(void) +{ + int err; + int fds[2]; + + err = qemu_eventfd(fds); + if (err == -1) { + return -errno; + } + err = fcntl_setfl(fds[0], O_NONBLOCK); + if (err < 0) { + goto fail; + } + err = fcntl_setfl(fds[1], O_NONBLOCK); + if (err < 0) { + goto fail; + } + qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL, + (void *)(intptr_t)fds[0]); + + io_thread_fd = fds[1]; + return 0; + +fail: + close(fds[0]); + close(fds[1]); + return err; +} + +/* If we have signalfd, we mask out the signals we want to handle and then + * use signalfd to listen for them. We rely on whatever the current signal + * handler is to dispatch the signals when we receive them. + */ +static void sigfd_handler(void *opaque) +{ + int fd = (intptr_t)opaque; + struct qemu_signalfd_siginfo info; + struct sigaction action; + ssize_t len; + + while (1) { + do { + len = read(fd, &info, sizeof(info)); + } while (len == -1 && errno == EINTR); + + if (len == -1 && errno == EAGAIN) { + break; + } + + if (len != sizeof(info)) { + printf("read from sigfd returned %zd: %m\n", len); + return; + } + + sigaction(info.ssi_signo, NULL, &action); + if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) { + action.sa_sigaction(info.ssi_signo, + (siginfo_t *)&info, NULL); + } else if (action.sa_handler) { + action.sa_handler(info.ssi_signo); + } + } +} + +static int qemu_signal_init(void) +{ + int sigfd; + sigset_t set; + + /* + * SIG_IPI must be blocked in the main thread and must not be caught + * by sigwait() in the signal thread. Otherwise, the cpu thread will + * not catch it reliably. + */ + sigemptyset(&set); + sigaddset(&set, SIG_IPI); + pthread_sigmask(SIG_BLOCK, &set, NULL); + + sigemptyset(&set); + sigaddset(&set, SIGIO); + sigaddset(&set, SIGALRM); + sigaddset(&set, SIGBUS); + pthread_sigmask(SIG_BLOCK, &set, NULL); + + sigfd = qemu_signalfd(&set); + if (sigfd == -1) { + fprintf(stderr, "failed to create signalfd\n"); + return -errno; + } + + fcntl_setfl(sigfd, O_NONBLOCK); + + qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL, + (void *)(intptr_t)sigfd); + + return 0; +} + +#else /* _WIN32 */ + +HANDLE qemu_event_handle; + +static void dummy_event_handler(void *opaque) +{ +} + +static int qemu_event_init(void) +{ + qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL); + if (!qemu_event_handle) { + fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError()); + return -1; + } + qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL); + return 0; +} + +void qemu_notify_event(void) +{ + if (!SetEvent(qemu_event_handle)) { + fprintf(stderr, "qemu_notify_event: SetEvent failed: %ld\n", + GetLastError()); + exit(1); + } +} + +static int qemu_signal_init(void) +{ + return 0; +} +#endif + +int qemu_init_main_loop(void) +{ + int ret; + + qemu_mutex_lock_iothread(); + ret = qemu_signal_init(); + if (ret) { + return ret; + } + + /* Note eventfd must be drained before signalfd handlers run */ + ret = qemu_event_init(); + if (ret) { + return ret; + } + + return 0; +} + + +static GPollFD poll_fds[1024 * 2]; /* this is probably overkill */ +static int n_poll_fds; +static int max_priority; + +static void glib_select_fill(int *max_fd, fd_set *rfds, fd_set *wfds, + fd_set *xfds, struct timeval *tv) +{ + GMainContext *context = g_main_context_default(); + int i; + int timeout = 0, cur_timeout; + + g_main_context_prepare(context, &max_priority); + + n_poll_fds = g_main_context_query(context, max_priority, &timeout, + poll_fds, ARRAY_SIZE(poll_fds)); + g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds)); + + for (i = 0; i < n_poll_fds; i++) { + GPollFD *p = &poll_fds[i]; + + if ((p->events & G_IO_IN)) { + FD_SET(p->fd, rfds); + *max_fd = MAX(*max_fd, p->fd); + } + if ((p->events & G_IO_OUT)) { + FD_SET(p->fd, wfds); + *max_fd = MAX(*max_fd, p->fd); + } + if ((p->events & G_IO_ERR)) { + FD_SET(p->fd, xfds); + *max_fd = MAX(*max_fd, p->fd); + } + } + + cur_timeout = (tv->tv_sec * 1000) + ((tv->tv_usec + 500) / 1000); + if (timeout >= 0 && timeout < cur_timeout) { + tv->tv_sec = timeout / 1000; + tv->tv_usec = (timeout % 1000) * 1000; + } +} + +static void glib_select_poll(fd_set *rfds, fd_set *wfds, fd_set *xfds, + bool err) +{ + GMainContext *context = g_main_context_default(); + + if (!err) { + int i; + + for (i = 0; i < n_poll_fds; i++) { + GPollFD *p = &poll_fds[i]; + + if ((p->events & G_IO_IN) && FD_ISSET(p->fd, rfds)) { + p->revents |= G_IO_IN; + } + if ((p->events & G_IO_OUT) && FD_ISSET(p->fd, wfds)) { + p->revents |= G_IO_OUT; + } + if ((p->events & G_IO_ERR) && FD_ISSET(p->fd, xfds)) { + p->revents |= G_IO_ERR; + } + } + } + + if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) { + g_main_context_dispatch(context); + } +} + +#ifdef _WIN32 +/***********************************************************/ +/* Polling handling */ + +typedef struct PollingEntry { + PollingFunc *func; + void *opaque; + struct PollingEntry *next; +} PollingEntry; + +static PollingEntry *first_polling_entry; + +int qemu_add_polling_cb(PollingFunc *func, void *opaque) +{ + PollingEntry **ppe, *pe; + pe = g_malloc0(sizeof(PollingEntry)); + pe->func = func; + pe->opaque = opaque; + for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next); + *ppe = pe; + return 0; +} + +void qemu_del_polling_cb(PollingFunc *func, void *opaque) +{ + PollingEntry **ppe, *pe; + for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) { + pe = *ppe; + if (pe->func == func && pe->opaque == opaque) { + *ppe = pe->next; + g_free(pe); + break; + } + } +} + +/***********************************************************/ +/* Wait objects support */ +typedef struct WaitObjects { + int num; + HANDLE events[MAXIMUM_WAIT_OBJECTS + 1]; + WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1]; + void *opaque[MAXIMUM_WAIT_OBJECTS + 1]; +} WaitObjects; + +static WaitObjects wait_objects = {0}; + +int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque) +{ + WaitObjects *w = &wait_objects; + if (w->num >= MAXIMUM_WAIT_OBJECTS) { + return -1; + } + w->events[w->num] = handle; + w->func[w->num] = func; + w->opaque[w->num] = opaque; + w->num++; + return 0; +} + +void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque) +{ + int i, found; + WaitObjects *w = &wait_objects; + + found = 0; + for (i = 0; i < w->num; i++) { + if (w->events[i] == handle) { + found = 1; + } + if (found) { + w->events[i] = w->events[i + 1]; + w->func[i] = w->func[i + 1]; + w->opaque[i] = w->opaque[i + 1]; + } + } + if (found) { + w->num--; + } +} + +static void os_host_main_loop_wait(int *timeout) +{ + int ret, ret2, i; + PollingEntry *pe; + + /* XXX: need to suppress polling by better using win32 events */ + ret = 0; + for (pe = first_polling_entry; pe != NULL; pe = pe->next) { + ret |= pe->func(pe->opaque); + } + if (ret == 0) { + int err; + WaitObjects *w = &wait_objects; + + qemu_mutex_unlock_iothread(); + ret = WaitForMultipleObjects(w->num, w->events, FALSE, *timeout); + qemu_mutex_lock_iothread(); + if (WAIT_OBJECT_0 + 0 <= ret && ret <= WAIT_OBJECT_0 + w->num - 1) { + if (w->func[ret - WAIT_OBJECT_0]) { + w->func[ret - WAIT_OBJECT_0](w->opaque[ret - WAIT_OBJECT_0]); + } + + /* Check for additional signaled events */ + for (i = (ret - WAIT_OBJECT_0 + 1); i < w->num; i++) { + /* Check if event is signaled */ + ret2 = WaitForSingleObject(w->events[i], 0); + if (ret2 == WAIT_OBJECT_0) { + if (w->func[i]) { + w->func[i](w->opaque[i]); + } + } else if (ret2 != WAIT_TIMEOUT) { + err = GetLastError(); + fprintf(stderr, "WaitForSingleObject error %d %d\n", i, err); + } + } + } else if (ret != WAIT_TIMEOUT) { + err = GetLastError(); + fprintf(stderr, "WaitForMultipleObjects error %d %d\n", ret, err); + } + } + + *timeout = 0; +} +#else +static inline void os_host_main_loop_wait(int *timeout) +{ +} +#endif + +int main_loop_wait(int nonblocking) +{ + fd_set rfds, wfds, xfds; + int ret, nfds; + struct timeval tv; + int timeout; + + if (nonblocking) { + timeout = 0; + } else { + timeout = qemu_calculate_timeout(); + qemu_bh_update_timeout(&timeout); + } + + os_host_main_loop_wait(&timeout); + + tv.tv_sec = timeout / 1000; + tv.tv_usec = (timeout % 1000) * 1000; + + /* poll any events */ + /* XXX: separate device handlers from system ones */ + nfds = -1; + FD_ZERO(&rfds); + FD_ZERO(&wfds); + FD_ZERO(&xfds); + +#ifdef CONFIG_SLIRP + slirp_select_fill(&nfds, &rfds, &wfds, &xfds); +#endif + qemu_iohandler_fill(&nfds, &rfds, &wfds, &xfds); + glib_select_fill(&nfds, &rfds, &wfds, &xfds, &tv); + + if (timeout > 0) { + qemu_mutex_unlock_iothread(); + } + + ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv); + + if (timeout > 0) { + qemu_mutex_lock_iothread(); + } + + glib_select_poll(&rfds, &wfds, &xfds, (ret < 0)); + qemu_iohandler_poll(&rfds, &wfds, &xfds, ret); +#ifdef CONFIG_SLIRP + slirp_select_poll(&rfds, &wfds, &xfds, (ret < 0)); +#endif + + qemu_run_all_timers(); + + /* Check bottom-halves last in case any of the earlier events triggered + them. */ + qemu_bh_poll(); + + return ret; +} diff --git a/main-loop.h b/main-loop.h new file mode 100644 index 0000000000..8a716b133f --- /dev/null +++ b/main-loop.h @@ -0,0 +1,351 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_MAIN_LOOP_H +#define QEMU_MAIN_LOOP_H 1 + +#ifdef SIGRTMIN +#define SIG_IPI (SIGRTMIN+4) +#else +#define SIG_IPI SIGUSR1 +#endif + +/** + * qemu_init_main_loop: Set up the process so that it can run the main loop. + * + * This includes setting up signal handlers. It should be called before + * any other threads are created. In addition, threads other than the + * main one should block signals that are trapped by the main loop. + * For simplicity, you can consider these signals to be safe: SIGUSR1, + * SIGUSR2, thread signals (SIGFPE, SIGILL, SIGSEGV, SIGBUS) and real-time + * signals if available. Remember that Windows in practice does not have + * signals, though. + */ +int qemu_init_main_loop(void); + +/** + * main_loop_wait: Run one iteration of the main loop. + * + * If @nonblocking is true, poll for events, otherwise suspend until + * one actually occurs. The main loop usually consists of a loop that + * repeatedly calls main_loop_wait(false). + * + * Main loop services include file descriptor callbacks, bottom halves + * and timers (defined in qemu-timer.h). Bottom halves are similar to timers + * that execute immediately, but have a lower overhead and scheduling them + * is wait-free, thread-safe and signal-safe. + * + * It is sometimes useful to put a whole program in a coroutine. In this + * case, the coroutine actually should be started from within the main loop, + * so that the main loop can run whenever the coroutine yields. To do this, + * you can use a bottom half to enter the coroutine as soon as the main loop + * starts: + * + * void enter_co_bh(void *opaque) { + * QEMUCoroutine *co = opaque; + * qemu_coroutine_enter(co, NULL); + * } + * + * ... + * QEMUCoroutine *co = qemu_coroutine_create(coroutine_entry); + * QEMUBH *start_bh = qemu_bh_new(enter_co_bh, co); + * qemu_bh_schedule(start_bh); + * while (...) { + * main_loop_wait(false); + * } + * + * (In the future we may provide a wrapper for this). + * + * @nonblocking: Whether the caller should block until an event occurs. + */ +int main_loop_wait(int nonblocking); + +/** + * qemu_notify_event: Force processing of pending events. + * + * Similar to signaling a condition variable, qemu_notify_event forces + * main_loop_wait to look at pending events and exit. The caller of + * main_loop_wait will usually call it again very soon, so qemu_notify_event + * also has the side effect of recalculating the sets of file descriptors + * that the main loop waits for. + * + * Calling qemu_notify_event is rarely necessary, because main loop + * services (bottom halves and timers) call it themselves. One notable + * exception occurs when using qemu_set_fd_handler2 (see below). + */ +void qemu_notify_event(void); + +#ifdef _WIN32 +/* return TRUE if no sleep should be done afterwards */ +typedef int PollingFunc(void *opaque); + +/** + * qemu_add_polling_cb: Register a Windows-specific polling callback + * + * Currently, under Windows some events are polled rather than waited for. + * Polling callbacks do not ensure that @func is called timely, because + * the main loop might wait for an arbitrarily long time. If possible, + * you should instead create a separate thread that does a blocking poll + * and set a Win32 event object. The event can then be passed to + * qemu_add_wait_object. + * + * Polling callbacks really have nothing Windows specific in them, but + * as they are a hack and are currenly not necessary under POSIX systems, + * they are only available when QEMU is running under Windows. + * + * @func: The function that does the polling, and returns 1 to force + * immediate completion of main_loop_wait. + * @opaque: A pointer-size value that is passed to @func. + */ +int qemu_add_polling_cb(PollingFunc *func, void *opaque); + +/** + * qemu_del_polling_cb: Unregister a Windows-specific polling callback + * + * This function removes a callback that was registered with + * qemu_add_polling_cb. + * + * @func: The function that was passed to qemu_add_polling_cb. + * @opaque: A pointer-size value that was passed to qemu_add_polling_cb. + */ +void qemu_del_polling_cb(PollingFunc *func, void *opaque); + +/* Wait objects handling */ +typedef void WaitObjectFunc(void *opaque); + +/** + * qemu_add_wait_object: Register a callback for a Windows handle + * + * Under Windows, the iohandler mechanism can only be used with sockets. + * QEMU must use the WaitForMultipleObjects API to wait on other handles. + * This function registers a #HANDLE with QEMU, so that it will be included + * in the main loop's calls to WaitForMultipleObjects. When the handle + * is in a signaled state, QEMU will call @func. + * + * @handle: The Windows handle to be observed. + * @func: A function to be called when @handle is in a signaled state. + * @opaque: A pointer-size value that is passed to @func. + */ +int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque); + +/** + * qemu_del_wait_object: Unregister a callback for a Windows handle + * + * This function removes a callback that was registered with + * qemu_add_wait_object. + * + * @func: The function that was passed to qemu_add_wait_object. + * @opaque: A pointer-size value that was passed to qemu_add_wait_object. + */ +void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque); +#endif + +/* async I/O support */ + +typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size); +typedef int IOCanReadHandler(void *opaque); +typedef void IOHandler(void *opaque); + +/** + * qemu_set_fd_handler2: Register a file descriptor with the main loop + * + * This function tells the main loop to wake up whenever one of the + * following conditions is true: + * + * 1) if @fd_write is not %NULL, when the file descriptor is writable; + * + * 2) if @fd_read is not %NULL, when the file descriptor is readable. + * + * @fd_read_poll can be used to disable the @fd_read callback temporarily. + * This is useful to avoid calling qemu_set_fd_handler2 every time the + * client becomes interested in reading (or dually, stops being interested). + * A typical example is when @fd is a listening socket and you want to bound + * the number of active clients. Remember to call qemu_notify_event whenever + * the condition may change from %false to %true. + * + * The callbacks that are set up by qemu_set_fd_handler2 are level-triggered. + * If @fd_read does not read from @fd, or @fd_write does not write to @fd + * until its buffers are full, they will be called again on the next + * iteration. + * + * @fd: The file descriptor to be observed. Under Windows it must be + * a #SOCKET. + * + * @fd_read_poll: A function that returns 1 if the @fd_read callback + * should be fired. If the function returns 0, the main loop will not + * end its iteration even if @fd becomes readable. + * + * @fd_read: A level-triggered callback that is fired if @fd is readable + * at the beginning of a main loop iteration, or if it becomes readable + * during one. + * + * @fd_write: A level-triggered callback that is fired when @fd is writable + * at the beginning of a main loop iteration, or if it becomes writable + * during one. + * + * @opaque: A pointer-sized value that is passed to @fd_read_poll, + * @fd_read and @fd_write. + */ +int qemu_set_fd_handler2(int fd, + IOCanReadHandler *fd_read_poll, + IOHandler *fd_read, + IOHandler *fd_write, + void *opaque); + +/** + * qemu_set_fd_handler: Register a file descriptor with the main loop + * + * This function tells the main loop to wake up whenever one of the + * following conditions is true: + * + * 1) if @fd_write is not %NULL, when the file descriptor is writable; + * + * 2) if @fd_read is not %NULL, when the file descriptor is readable. + * + * The callbacks that are set up by qemu_set_fd_handler are level-triggered. + * If @fd_read does not read from @fd, or @fd_write does not write to @fd + * until its buffers are full, they will be called again on the next + * iteration. + * + * @fd: The file descriptor to be observed. Under Windows it must be + * a #SOCKET. + * + * @fd_read: A level-triggered callback that is fired if @fd is readable + * at the beginning of a main loop iteration, or if it becomes readable + * during one. + * + * @fd_write: A level-triggered callback that is fired when @fd is writable + * at the beginning of a main loop iteration, or if it becomes writable + * during one. + * + * @opaque: A pointer-sized value that is passed to @fd_read and @fd_write. + */ +int qemu_set_fd_handler(int fd, + IOHandler *fd_read, + IOHandler *fd_write, + void *opaque); + +typedef struct QEMUBH QEMUBH; +typedef void QEMUBHFunc(void *opaque); + +/** + * qemu_bh_new: Allocate a new bottom half structure. + * + * Bottom halves are lightweight callbacks whose invocation is guaranteed + * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure + * is opaque and must be allocated prior to its use. + */ +QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); + +/** + * qemu_bh_schedule: Schedule a bottom half. + * + * Scheduling a bottom half interrupts the main loop and causes the + * execution of the callback that was passed to qemu_bh_new. + * + * Bottom halves that are scheduled from a bottom half handler are instantly + * invoked. This can create an infinite loop if a bottom half handler + * schedules itself. + * + * @bh: The bottom half to be scheduled. + */ +void qemu_bh_schedule(QEMUBH *bh); + +/** + * qemu_bh_cancel: Cancel execution of a bottom half. + * + * Canceling execution of a bottom half undoes the effect of calls to + * qemu_bh_schedule without freeing its resources yet. While cancellation + * itself is also wait-free and thread-safe, it can of course race with the + * loop that executes bottom halves unless you are holding the iothread + * mutex. This makes it mostly useless if you are not holding the mutex. + * + * @bh: The bottom half to be canceled. + */ +void qemu_bh_cancel(QEMUBH *bh); + +/** + *qemu_bh_delete: Cancel execution of a bottom half and free its resources. + * + * Deleting a bottom half frees the memory that was allocated for it by + * qemu_bh_new. It also implies canceling the bottom half if it was + * scheduled. + * + * @bh: The bottom half to be deleted. + */ +void qemu_bh_delete(QEMUBH *bh); + +#ifdef CONFIG_POSIX +/** + * qemu_add_child_watch: Register a child process for reaping. + * + * Under POSIX systems, a parent process must read the exit status of + * its child processes using waitpid, or the operating system will not + * free some of the resources attached to that process. + * + * This function directs the QEMU main loop to observe a child process + * and call waitpid as soon as it exits; the watch is then removed + * automatically. It is useful whenever QEMU forks a child process + * but will find out about its termination by other means such as a + * "broken pipe". + * + * @pid: The pid that QEMU should observe. + */ +int qemu_add_child_watch(pid_t pid); +#endif + +/** + * qemu_mutex_lock_iothread: Lock the main loop mutex. + * + * This function locks the main loop mutex. The mutex is taken by + * qemu_init_main_loop and always taken except while waiting on + * external events (such as with select). The mutex should be taken + * by threads other than the main loop thread when calling + * qemu_bh_new(), qemu_set_fd_handler() and basically all other + * functions documented in this file. + */ +void qemu_mutex_lock_iothread(void); + +/** + * qemu_mutex_unlock_iothread: Unlock the main loop mutex. + * + * This function unlocks the main loop mutex. The mutex is taken by + * qemu_init_main_loop and always taken except while waiting on + * external events (such as with select). The mutex should be unlocked + * as soon as possible by threads other than the main loop thread, + * because it prevents the main loop from processing callbacks, + * including timers and bottom halves. + */ +void qemu_mutex_unlock_iothread(void); + +/* internal interfaces */ + +void qemu_iohandler_fill(int *pnfds, fd_set *readfds, fd_set *writefds, fd_set *xfds); +void qemu_iohandler_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds, int rc); + +void qemu_bh_schedule_idle(QEMUBH *bh); +int qemu_bh_poll(void); +void qemu_bh_update_timeout(int *timeout); + +#endif diff --git a/os-win32.c b/os-win32.c index f09f01fc49..79094016f1 100644 --- a/os-win32.c +++ b/os-win32.c @@ -48,129 +48,6 @@ int setenv(const char *name, const char *value, int overwrite) return result; } -/***********************************************************/ -/* Polling handling */ - -typedef struct PollingEntry { - PollingFunc *func; - void *opaque; - struct PollingEntry *next; -} PollingEntry; - -static PollingEntry *first_polling_entry; - -int qemu_add_polling_cb(PollingFunc *func, void *opaque) -{ - PollingEntry **ppe, *pe; - pe = g_malloc0(sizeof(PollingEntry)); - pe->func = func; - pe->opaque = opaque; - for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next); - *ppe = pe; - return 0; -} - -void qemu_del_polling_cb(PollingFunc *func, void *opaque) -{ - PollingEntry **ppe, *pe; - for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) { - pe = *ppe; - if (pe->func == func && pe->opaque == opaque) { - *ppe = pe->next; - g_free(pe); - break; - } - } -} - -/***********************************************************/ -/* Wait objects support */ -typedef struct WaitObjects { - int num; - HANDLE events[MAXIMUM_WAIT_OBJECTS + 1]; - WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1]; - void *opaque[MAXIMUM_WAIT_OBJECTS + 1]; -} WaitObjects; - -static WaitObjects wait_objects = {0}; - -int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque) -{ - WaitObjects *w = &wait_objects; - - if (w->num >= MAXIMUM_WAIT_OBJECTS) - return -1; - w->events[w->num] = handle; - w->func[w->num] = func; - w->opaque[w->num] = opaque; - w->num++; - return 0; -} - -void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque) -{ - int i, found; - WaitObjects *w = &wait_objects; - - found = 0; - for (i = 0; i < w->num; i++) { - if (w->events[i] == handle) - found = 1; - if (found) { - w->events[i] = w->events[i + 1]; - w->func[i] = w->func[i + 1]; - w->opaque[i] = w->opaque[i + 1]; - } - } - if (found) - w->num--; -} - -void os_host_main_loop_wait(int *timeout) -{ - int ret, ret2, i; - PollingEntry *pe; - - /* XXX: need to suppress polling by better using win32 events */ - ret = 0; - for(pe = first_polling_entry; pe != NULL; pe = pe->next) { - ret |= pe->func(pe->opaque); - } - if (ret == 0) { - int err; - WaitObjects *w = &wait_objects; - - qemu_mutex_unlock_iothread(); - ret = WaitForMultipleObjects(w->num, w->events, FALSE, *timeout); - qemu_mutex_lock_iothread(); - if (WAIT_OBJECT_0 + 0 <= ret && ret <= WAIT_OBJECT_0 + w->num - 1) { - if (w->func[ret - WAIT_OBJECT_0]) - w->func[ret - WAIT_OBJECT_0](w->opaque[ret - WAIT_OBJECT_0]); - - /* Check for additional signaled events */ - for(i = (ret - WAIT_OBJECT_0 + 1); i < w->num; i++) { - - /* Check if event is signaled */ - ret2 = WaitForSingleObject(w->events[i], 0); - if(ret2 == WAIT_OBJECT_0) { - if (w->func[i]) - w->func[i](w->opaque[i]); - } else if (ret2 == WAIT_TIMEOUT) { - } else { - err = GetLastError(); - fprintf(stderr, "WaitForSingleObject error %d %d\n", i, err); - } - } - } else if (ret == WAIT_TIMEOUT) { - } else { - err = GetLastError(); - fprintf(stderr, "WaitForMultipleObjects error %d %d\n", ret, err); - } - } - - *timeout = 0; -} - static BOOL WINAPI qemu_ctrl_handler(DWORD type) { exit(STATUS_CONTROL_C_EXIT); diff --git a/qemu-char.h b/qemu-char.h index eebbdd8f01..7efcf99f53 100644 --- a/qemu-char.h +++ b/qemu-char.h @@ -7,6 +7,7 @@ #include "qemu-config.h" #include "qobject.h" #include "qstring.h" +#include "main-loop.h" /* character device */ @@ -237,15 +238,4 @@ void qemu_chr_close_mem(CharDriverState *chr); QString *qemu_chr_mem_to_qs(CharDriverState *chr); size_t qemu_chr_mem_osize(const CharDriverState *chr); -/* async I/O support */ - -int qemu_set_fd_handler2(int fd, - IOCanReadHandler *fd_read_poll, - IOHandler *fd_read, - IOHandler *fd_write, - void *opaque); -int qemu_set_fd_handler(int fd, - IOHandler *fd_read, - IOHandler *fd_write, - void *opaque); #endif diff --git a/qemu-common.h b/qemu-common.h index 5e87bdf2f2..1c15cb17a7 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -13,7 +13,6 @@ typedef struct QEMUTimer QEMUTimer; typedef struct QEMUFile QEMUFile; -typedef struct QEMUBH QEMUBH; typedef struct DeviceState DeviceState; struct Monitor; @@ -96,6 +95,10 @@ static inline char *realpath(const char *path, char *resolved_path) } #endif +/* icount */ +void configure_icount(const char *option); +extern int use_icount; + /* FIXME: Remove NEED_CPU_H. */ #ifndef NEED_CPU_H @@ -113,23 +116,6 @@ static inline char *realpath(const char *path, char *resolved_path) int qemu_main(int argc, char **argv, char **envp); #endif -/* bottom halves */ -typedef void QEMUBHFunc(void *opaque); - -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); -void qemu_bh_schedule(QEMUBH *bh); -/* Bottom halfs that are scheduled from a bottom half handler are instantly - * invoked. This can create an infinite loop if a bottom half handler - * schedules itself. qemu_bh_schedule_idle() avoids this infinite loop by - * ensuring that the bottom half isn't executed until the next main loop - * iteration. - */ -void qemu_bh_schedule_idle(QEMUBH *bh); -void qemu_bh_cancel(QEMUBH *bh); -void qemu_bh_delete(QEMUBH *bh); -int qemu_bh_poll(void); -void qemu_bh_update_timeout(int *timeout); - void qemu_get_timedate(struct tm *tm, int offset); int qemu_timedate_diff(struct tm *tm); @@ -183,16 +169,12 @@ const char *path(const char *pathname); void *qemu_oom_check(void *ptr); -void qemu_mutex_lock_iothread(void); -void qemu_mutex_unlock_iothread(void); - int qemu_open(const char *name, int flags, ...); ssize_t qemu_write_full(int fd, const void *buf, size_t count) QEMU_WARN_UNUSED_RESULT; void qemu_set_cloexec(int fd); #ifndef _WIN32 -int qemu_add_child_watch(pid_t pid); int qemu_eventfd(int pipefd[2]); int qemu_pipe(int pipefd[2]); #endif @@ -207,14 +189,6 @@ int qemu_pipe(int pipefd[2]); void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2); -/* IO callbacks. */ -typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size); -typedef int IOCanReadHandler(void *opaque); -typedef void IOHandler(void *opaque); - -void qemu_iohandler_fill(int *pnfds, fd_set *readfds, fd_set *writefds, fd_set *xfds); -void qemu_iohandler_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds, int rc); - struct ParallelIOArg { void *buffer; int count; @@ -276,9 +250,6 @@ void cpu_exec_init_all(void); void cpu_save(QEMUFile *f, void *opaque); int cpu_load(QEMUFile *f, void *opaque, int version_id); -/* Force QEMU to process pending events */ -void qemu_notify_event(void); - /* Unblock cpu */ void qemu_cpu_kick(void *env); void qemu_cpu_kick_self(void); diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c index 2a385a3bb8..6b58160058 100644 --- a/qemu-coroutine-lock.c +++ b/qemu-coroutine-lock.c @@ -26,6 +26,7 @@ #include "qemu-coroutine.h" #include "qemu-coroutine-int.h" #include "qemu-queue.h" +#include "main-loop.h" #include "trace.h" static QTAILQ_HEAD(, Coroutine) unlock_bh_queue = diff --git a/qemu-os-posix.h b/qemu-os-posix.h index 81fd9ab389..920499d836 100644 --- a/qemu-os-posix.h +++ b/qemu-os-posix.h @@ -26,10 +26,6 @@ #ifndef QEMU_OS_POSIX_H #define QEMU_OS_POSIX_H -static inline void os_host_main_loop_wait(int *timeout) -{ -} - void os_set_line_buffering(void); void os_set_proc_name(const char *s); void os_setup_signal_handling(void); diff --git a/qemu-os-win32.h b/qemu-os-win32.h index 8a069d7fb6..8eda4bdc20 100644 --- a/qemu-os-win32.h +++ b/qemu-os-win32.h @@ -28,26 +28,11 @@ #include <windows.h> #include <winsock2.h> +#include "main-loop.h" /* Declaration of ffs() is missing in MinGW's strings.h. */ int ffs(int i); -/* Polling handling */ - -/* return TRUE if no sleep should be done afterwards */ -typedef int PollingFunc(void *opaque); - -int qemu_add_polling_cb(PollingFunc *func, void *opaque); -void qemu_del_polling_cb(PollingFunc *func, void *opaque); - -/* Wait objects handling */ -typedef void WaitObjectFunc(void *opaque); - -int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque); -void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque); - -void os_host_main_loop_wait(int *timeout); - static inline void os_setup_signal_handling(void) {} static inline void os_daemonize(void) {} static inline void os_setup_post(void) {} diff --git a/qemu-timer.c b/qemu-timer.c index ad1fc8b871..f11a28dd03 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -46,82 +46,6 @@ #include "qemu-timer.h" -/* Conversion factor from emulated instructions to virtual clock ticks. */ -int icount_time_shift; -/* Arbitrarily pick 1MIPS as the minimum allowable speed. */ -#define MAX_ICOUNT_SHIFT 10 -/* Compensate for varying guest execution speed. */ -int64_t qemu_icount_bias; -static QEMUTimer *icount_rt_timer; -static QEMUTimer *icount_vm_timer; - -/***********************************************************/ -/* guest cycle counter */ - -typedef struct TimersState { - int64_t cpu_ticks_prev; - int64_t cpu_ticks_offset; - int64_t cpu_clock_offset; - int32_t cpu_ticks_enabled; - int64_t dummy; -} TimersState; - -TimersState timers_state; - -/* return the host CPU cycle counter and handle stop/restart */ -int64_t cpu_get_ticks(void) -{ - if (use_icount) { - return cpu_get_icount(); - } - if (!timers_state.cpu_ticks_enabled) { - return timers_state.cpu_ticks_offset; - } else { - int64_t ticks; - ticks = cpu_get_real_ticks(); - if (timers_state.cpu_ticks_prev > ticks) { - /* Note: non increasing ticks may happen if the host uses - software suspend */ - timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks; - } - timers_state.cpu_ticks_prev = ticks; - return ticks + timers_state.cpu_ticks_offset; - } -} - -/* return the host CPU monotonic timer and handle stop/restart */ -static int64_t cpu_get_clock(void) -{ - int64_t ti; - if (!timers_state.cpu_ticks_enabled) { - return timers_state.cpu_clock_offset; - } else { - ti = get_clock(); - return ti + timers_state.cpu_clock_offset; - } -} - -/* enable cpu_get_ticks() */ -void cpu_enable_ticks(void) -{ - if (!timers_state.cpu_ticks_enabled) { - timers_state.cpu_ticks_offset -= cpu_get_real_ticks(); - timers_state.cpu_clock_offset -= get_clock(); - timers_state.cpu_ticks_enabled = 1; - } -} - -/* disable cpu_get_ticks() : the clock is stopped. You must not call - cpu_get_ticks() after that. */ -void cpu_disable_ticks(void) -{ - if (timers_state.cpu_ticks_enabled) { - timers_state.cpu_ticks_offset = cpu_get_ticks(); - timers_state.cpu_clock_offset = cpu_get_clock(); - timers_state.cpu_ticks_enabled = 0; - } -} - /***********************************************************/ /* timers */ @@ -133,7 +57,7 @@ struct QEMUClock { int type; int enabled; - QEMUTimer *warp_timer; + QEMUTimer *active_timers; NotifierList reset_notifiers; int64_t last; @@ -152,7 +76,7 @@ struct qemu_alarm_timer { char const *name; int (*start)(struct qemu_alarm_timer *t); void (*stop)(struct qemu_alarm_timer *t); - void (*rearm)(struct qemu_alarm_timer *t); + void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns); #if defined(__linux__) int fd; timer_t timer; @@ -180,12 +104,46 @@ static inline int alarm_has_dynticks(struct qemu_alarm_timer *t) return !!t->rearm; } +static int64_t qemu_next_alarm_deadline(void) +{ + int64_t delta; + int64_t rtdelta; + + if (!use_icount && vm_clock->active_timers) { + delta = vm_clock->active_timers->expire_time - + qemu_get_clock_ns(vm_clock); + } else { + delta = INT32_MAX; + } + if (host_clock->active_timers) { + int64_t hdelta = host_clock->active_timers->expire_time - + qemu_get_clock_ns(host_clock); + if (hdelta < delta) { + delta = hdelta; + } + } + if (rt_clock->active_timers) { + rtdelta = (rt_clock->active_timers->expire_time - + qemu_get_clock_ns(rt_clock)); + if (rtdelta < delta) { + delta = rtdelta; + } + } + + return delta; +} + static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) { - if (!alarm_has_dynticks(t)) + int64_t nearest_delta_ns; + assert(alarm_has_dynticks(t)); + if (!rt_clock->active_timers && + !vm_clock->active_timers && + !host_clock->active_timers) { return; - - t->rearm(t); + } + nearest_delta_ns = qemu_next_alarm_deadline(); + t->rearm(t, nearest_delta_ns); } /* TODO: MIN_TIMER_REARM_NS should be optimized */ @@ -195,83 +153,28 @@ static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) static int mm_start_timer(struct qemu_alarm_timer *t); static void mm_stop_timer(struct qemu_alarm_timer *t); -static void mm_rearm_timer(struct qemu_alarm_timer *t); +static void mm_rearm_timer(struct qemu_alarm_timer *t, int64_t delta); static int win32_start_timer(struct qemu_alarm_timer *t); static void win32_stop_timer(struct qemu_alarm_timer *t); -static void win32_rearm_timer(struct qemu_alarm_timer *t); +static void win32_rearm_timer(struct qemu_alarm_timer *t, int64_t delta); #else static int unix_start_timer(struct qemu_alarm_timer *t); static void unix_stop_timer(struct qemu_alarm_timer *t); -static void unix_rearm_timer(struct qemu_alarm_timer *t); +static void unix_rearm_timer(struct qemu_alarm_timer *t, int64_t delta); #ifdef __linux__ static int dynticks_start_timer(struct qemu_alarm_timer *t); static void dynticks_stop_timer(struct qemu_alarm_timer *t); -static void dynticks_rearm_timer(struct qemu_alarm_timer *t); +static void dynticks_rearm_timer(struct qemu_alarm_timer *t, int64_t delta); #endif /* __linux__ */ #endif /* _WIN32 */ -/* Correlation between real and virtual time is always going to be - fairly approximate, so ignore small variation. - When the guest is idle real and virtual time will be aligned in - the IO wait loop. */ -#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10) - -static void icount_adjust(void) -{ - int64_t cur_time; - int64_t cur_icount; - int64_t delta; - static int64_t last_delta; - /* If the VM is not running, then do nothing. */ - if (!runstate_is_running()) - return; - - cur_time = cpu_get_clock(); - cur_icount = qemu_get_clock_ns(vm_clock); - delta = cur_icount - cur_time; - /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ - if (delta > 0 - && last_delta + ICOUNT_WOBBLE < delta * 2 - && icount_time_shift > 0) { - /* The guest is getting too far ahead. Slow time down. */ - icount_time_shift--; - } - if (delta < 0 - && last_delta - ICOUNT_WOBBLE > delta * 2 - && icount_time_shift < MAX_ICOUNT_SHIFT) { - /* The guest is getting too far behind. Speed time up. */ - icount_time_shift++; - } - last_delta = delta; - qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift); -} - -static void icount_adjust_rt(void * opaque) -{ - qemu_mod_timer(icount_rt_timer, - qemu_get_clock_ms(rt_clock) + 1000); - icount_adjust(); -} - -static void icount_adjust_vm(void * opaque) -{ - qemu_mod_timer(icount_vm_timer, - qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10); - icount_adjust(); -} - -int64_t qemu_icount_round(int64_t count) -{ - return (count + (1 << icount_time_shift) - 1) >> icount_time_shift; -} - static struct qemu_alarm_timer alarm_timers[] = { #ifndef _WIN32 #ifdef __linux__ @@ -352,14 +255,10 @@ next: } } -#define QEMU_NUM_CLOCKS 3 - QEMUClock *rt_clock; QEMUClock *vm_clock; QEMUClock *host_clock; -static QEMUTimer *active_timers[QEMU_NUM_CLOCKS]; - static QEMUClock *qemu_new_clock(int type) { QEMUClock *clock; @@ -367,101 +266,43 @@ static QEMUClock *qemu_new_clock(int type) clock = g_malloc0(sizeof(QEMUClock)); clock->type = type; clock->enabled = 1; + clock->last = INT64_MIN; notifier_list_init(&clock->reset_notifiers); - /* required to detect & report backward jumps */ - if (type == QEMU_CLOCK_HOST) { - clock->last = get_clock_realtime(); - } return clock; } void qemu_clock_enable(QEMUClock *clock, int enabled) { + bool old = clock->enabled; clock->enabled = enabled; + if (enabled && !old) { + qemu_rearm_alarm_timer(alarm_timer); + } } -static int64_t vm_clock_warp_start; - -static void icount_warp_rt(void *opaque) +int64_t qemu_clock_has_timers(QEMUClock *clock) { - if (vm_clock_warp_start == -1) { - return; - } - - if (runstate_is_running()) { - int64_t clock = qemu_get_clock_ns(rt_clock); - int64_t warp_delta = clock - vm_clock_warp_start; - if (use_icount == 1) { - qemu_icount_bias += warp_delta; - } else { - /* - * In adaptive mode, do not let the vm_clock run too - * far ahead of real time. - */ - int64_t cur_time = cpu_get_clock(); - int64_t cur_icount = qemu_get_clock_ns(vm_clock); - int64_t delta = cur_time - cur_icount; - qemu_icount_bias += MIN(warp_delta, delta); - } - if (qemu_timer_expired(active_timers[QEMU_CLOCK_VIRTUAL], - qemu_get_clock_ns(vm_clock))) { - qemu_notify_event(); - } - } - vm_clock_warp_start = -1; + return !!clock->active_timers; } -void qemu_clock_warp(QEMUClock *clock) +int64_t qemu_clock_expired(QEMUClock *clock) { - int64_t deadline; + return (clock->active_timers && + clock->active_timers->expire_time < qemu_get_clock_ns(clock)); +} - if (!clock->warp_timer) { - return; - } +int64_t qemu_clock_deadline(QEMUClock *clock) +{ + /* To avoid problems with overflow limit this to 2^32. */ + int64_t delta = INT32_MAX; - /* - * There are too many global variables to make the "warp" behavior - * applicable to other clocks. But a clock argument removes the - * need for if statements all over the place. - */ - assert(clock == vm_clock); - - /* - * If the CPUs have been sleeping, advance the vm_clock timer now. This - * ensures that the deadline for the timer is computed correctly below. - * This also makes sure that the insn counter is synchronized before the - * CPU starts running, in case the CPU is woken by an event other than - * the earliest vm_clock timer. - */ - icount_warp_rt(NULL); - if (!all_cpu_threads_idle() || !active_timers[clock->type]) { - qemu_del_timer(clock->warp_timer); - return; + if (clock->active_timers) { + delta = clock->active_timers->expire_time - qemu_get_clock_ns(clock); } - - vm_clock_warp_start = qemu_get_clock_ns(rt_clock); - deadline = qemu_next_icount_deadline(); - if (deadline > 0) { - /* - * Ensure the vm_clock proceeds even when the virtual CPU goes to - * sleep. Otherwise, the CPU might be waiting for a future timer - * interrupt to wake it up, but the interrupt never comes because - * the vCPU isn't running any insns and thus doesn't advance the - * vm_clock. - * - * An extreme solution for this problem would be to never let VCPUs - * sleep in icount mode if there is a pending vm_clock timer; rather - * time could just advance to the next vm_clock event. Instead, we - * do stop VCPUs and only advance vm_clock after some "real" time, - * (related to the time left until the next event) has passed. This - * rt_clock timer will do this. This avoids that the warps are too - * visible externally---for example, you will not be sending network - * packets continously instead of every 100ms. - */ - qemu_mod_timer(clock->warp_timer, vm_clock_warp_start + deadline); - } else { - qemu_notify_event(); + if (delta < 0) { + delta = 0; } + return delta; } QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, @@ -489,7 +330,7 @@ void qemu_del_timer(QEMUTimer *ts) /* NOTE: this code must be signal safe because qemu_timer_expired() can be called from a signal. */ - pt = &active_timers[ts->clock->type]; + pt = &ts->clock->active_timers; for(;;) { t = *pt; if (!t) @@ -504,7 +345,7 @@ void qemu_del_timer(QEMUTimer *ts) /* modify the current timer so that it will be fired when current_time >= expire_time. The corresponding callback will be called. */ -static void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time) +void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time) { QEMUTimer **pt, *t; @@ -513,7 +354,7 @@ static void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time) /* add the timer in the sorted list */ /* NOTE: this code must be signal safe because qemu_timer_expired() can be called from a signal. */ - pt = &active_timers[ts->clock->type]; + pt = &ts->clock->active_timers; for(;;) { t = *pt; if (!qemu_timer_expired_ns(t, expire_time)) { @@ -526,7 +367,7 @@ static void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time) *pt = ts; /* Rearm if necessary */ - if (pt == &active_timers[ts->clock->type]) { + if (pt == &ts->clock->active_timers) { if (!alarm_timer->pending) { qemu_rearm_alarm_timer(alarm_timer); } @@ -538,8 +379,6 @@ static void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time) } } -/* modify the current timer so that it will be fired when current_time - >= expire_time. The corresponding callback will be called. */ void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time) { qemu_mod_timer_ns(ts, expire_time * ts->scale); @@ -548,7 +387,7 @@ void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time) int qemu_timer_pending(QEMUTimer *ts) { QEMUTimer *t; - for(t = active_timers[ts->clock->type]; t != NULL; t = t->next) { + for (t = ts->clock->active_timers; t != NULL; t = t->next) { if (t == ts) return 1; } @@ -569,7 +408,7 @@ static void qemu_run_timers(QEMUClock *clock) return; current_time = qemu_get_clock_ns(clock); - ptimer_head = &active_timers[clock->type]; + ptimer_head = &clock->active_timers; for(;;) { ts = *ptimer_head; if (!qemu_timer_expired_ns(ts, current_time)) { @@ -624,79 +463,11 @@ void init_clocks(void) rt_clock = qemu_new_clock(QEMU_CLOCK_REALTIME); vm_clock = qemu_new_clock(QEMU_CLOCK_VIRTUAL); host_clock = qemu_new_clock(QEMU_CLOCK_HOST); - - rtc_clock = host_clock; } -/* save a timer */ -void qemu_put_timer(QEMUFile *f, QEMUTimer *ts) +uint64_t qemu_timer_expire_time_ns(QEMUTimer *ts) { - uint64_t expire_time; - - if (qemu_timer_pending(ts)) { - expire_time = ts->expire_time; - } else { - expire_time = -1; - } - qemu_put_be64(f, expire_time); -} - -void qemu_get_timer(QEMUFile *f, QEMUTimer *ts) -{ - uint64_t expire_time; - - expire_time = qemu_get_be64(f); - if (expire_time != -1) { - qemu_mod_timer_ns(ts, expire_time); - } else { - qemu_del_timer(ts); - } -} - -static const VMStateDescription vmstate_timers = { - .name = "timer", - .version_id = 2, - .minimum_version_id = 1, - .minimum_version_id_old = 1, - .fields = (VMStateField []) { - VMSTATE_INT64(cpu_ticks_offset, TimersState), - VMSTATE_INT64(dummy, TimersState), - VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2), - VMSTATE_END_OF_LIST() - } -}; - -void configure_icount(const char *option) -{ - vmstate_register(NULL, 0, &vmstate_timers, &timers_state); - if (!option) - return; - - vm_clock->warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL); - - if (strcmp(option, "auto") != 0) { - icount_time_shift = strtol(option, NULL, 0); - use_icount = 1; - return; - } - - use_icount = 2; - - /* 125MIPS seems a reasonable initial guess at the guest speed. - It will be corrected fairly quickly anyway. */ - icount_time_shift = 3; - - /* Have both realtime and virtual time triggers for speed adjustment. - The realtime trigger catches emulated time passing too slowly, - the virtual time trigger catches emulated time passing too fast. - Realtime triggers occur even when idle, so use them less frequently - than VM triggers. */ - icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL); - qemu_mod_timer(icount_rt_timer, - qemu_get_clock_ms(rt_clock) + 1000); - icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL); - qemu_mod_timer(icount_vm_timer, - qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10); + return qemu_timer_pending(ts) ? ts->expire_time : -1; } void qemu_run_all_timers(void) @@ -710,16 +481,11 @@ void qemu_run_all_timers(void) } /* vm time timers */ - if (runstate_is_running()) { - qemu_run_timers(vm_clock); - } - + qemu_run_timers(vm_clock); qemu_run_timers(rt_clock); qemu_run_timers(host_clock); } -static int64_t qemu_next_alarm_deadline(void); - #ifdef _WIN32 static void CALLBACK host_alarm_handler(PVOID lpParam, BOOLEAN unused) #else @@ -767,50 +533,6 @@ static void host_alarm_handler(int host_signum) } } -int64_t qemu_next_icount_deadline(void) -{ - /* To avoid problems with overflow limit this to 2^32. */ - int64_t delta = INT32_MAX; - - assert(use_icount); - if (active_timers[QEMU_CLOCK_VIRTUAL]) { - delta = active_timers[QEMU_CLOCK_VIRTUAL]->expire_time - - qemu_get_clock_ns(vm_clock); - } - - if (delta < 0) - delta = 0; - - return delta; -} - -static int64_t qemu_next_alarm_deadline(void) -{ - int64_t delta; - int64_t rtdelta; - - if (!use_icount && active_timers[QEMU_CLOCK_VIRTUAL]) { - delta = active_timers[QEMU_CLOCK_VIRTUAL]->expire_time - - qemu_get_clock_ns(vm_clock); - } else { - delta = INT32_MAX; - } - if (active_timers[QEMU_CLOCK_HOST]) { - int64_t hdelta = active_timers[QEMU_CLOCK_HOST]->expire_time - - qemu_get_clock_ns(host_clock); - if (hdelta < delta) - delta = hdelta; - } - if (active_timers[QEMU_CLOCK_REALTIME]) { - rtdelta = (active_timers[QEMU_CLOCK_REALTIME]->expire_time - - qemu_get_clock_ns(rt_clock)); - if (rtdelta < delta) - delta = rtdelta; - } - - return delta; -} - #if defined(__linux__) #include "compatfd.h" @@ -863,20 +585,13 @@ static void dynticks_stop_timer(struct qemu_alarm_timer *t) timer_delete(host_timer); } -static void dynticks_rearm_timer(struct qemu_alarm_timer *t) +static void dynticks_rearm_timer(struct qemu_alarm_timer *t, + int64_t nearest_delta_ns) { timer_t host_timer = t->timer; struct itimerspec timeout; - int64_t nearest_delta_ns = INT64_MAX; int64_t current_ns; - assert(alarm_has_dynticks(t)); - if (!active_timers[QEMU_CLOCK_REALTIME] && - !active_timers[QEMU_CLOCK_VIRTUAL] && - !active_timers[QEMU_CLOCK_HOST]) - return; - - nearest_delta_ns = qemu_next_alarm_deadline(); if (nearest_delta_ns < MIN_TIMER_REARM_NS) nearest_delta_ns = MIN_TIMER_REARM_NS; @@ -918,19 +633,12 @@ static int unix_start_timer(struct qemu_alarm_timer *t) return 0; } -static void unix_rearm_timer(struct qemu_alarm_timer *t) +static void unix_rearm_timer(struct qemu_alarm_timer *t, + int64_t nearest_delta_ns) { struct itimerval itv; - int64_t nearest_delta_ns = INT64_MAX; int err; - assert(alarm_has_dynticks(t)); - if (!active_timers[QEMU_CLOCK_REALTIME] && - !active_timers[QEMU_CLOCK_VIRTUAL] && - !active_timers[QEMU_CLOCK_HOST]) - return; - - nearest_delta_ns = qemu_next_alarm_deadline(); if (nearest_delta_ns < MIN_TIMER_REARM_NS) nearest_delta_ns = MIN_TIMER_REARM_NS; @@ -1017,23 +725,14 @@ static void mm_stop_timer(struct qemu_alarm_timer *t) timeEndPeriod(mm_period); } -static void mm_rearm_timer(struct qemu_alarm_timer *t) +static void mm_rearm_timer(struct qemu_alarm_timer *t, int64_t delta) { - int nearest_delta_ms; - - assert(alarm_has_dynticks(t)); - if (!active_timers[QEMU_CLOCK_REALTIME] && - !active_timers[QEMU_CLOCK_VIRTUAL] && - !active_timers[QEMU_CLOCK_HOST]) { - return; - } - - timeKillEvent(mm_timer); - - nearest_delta_ms = (qemu_next_alarm_deadline() + 999999) / 1000000; + int nearest_delta_ms = (delta + 999999) / 1000000; if (nearest_delta_ms < 1) { nearest_delta_ms = 1; } + + timeKillEvent(mm_timer); mm_timer = timeSetEvent(nearest_delta_ms, mm_period, mm_alarm_handler, @@ -1085,19 +784,14 @@ static void win32_stop_timer(struct qemu_alarm_timer *t) } } -static void win32_rearm_timer(struct qemu_alarm_timer *t) +static void win32_rearm_timer(struct qemu_alarm_timer *t, + int64_t nearest_delta_ns) { HANDLE hTimer = t->timer; int nearest_delta_ms; BOOLEAN success; - assert(alarm_has_dynticks(t)); - if (!active_timers[QEMU_CLOCK_REALTIME] && - !active_timers[QEMU_CLOCK_VIRTUAL] && - !active_timers[QEMU_CLOCK_HOST]) - return; - - nearest_delta_ms = (qemu_next_alarm_deadline() + 999999) / 1000000; + nearest_delta_ms = (nearest_delta_ns + 999999) / 1000000; if (nearest_delta_ms < 1) { nearest_delta_ms = 1; } @@ -1116,11 +810,11 @@ static void win32_rearm_timer(struct qemu_alarm_timer *t) #endif /* _WIN32 */ -static void alarm_timer_on_change_state_rearm(void *opaque, int running, - RunState state) +static void quit_timers(void) { - if (running) - qemu_rearm_alarm_timer((struct qemu_alarm_timer *) opaque); + struct qemu_alarm_timer *t = alarm_timer; + alarm_timer = NULL; + t->stop(t); } int init_timer_alarm(void) @@ -1142,9 +836,9 @@ int init_timer_alarm(void) } /* first event is at time 0 */ + atexit(quit_timers); t->pending = 1; alarm_timer = t; - qemu_add_vm_change_state_handler(alarm_timer_on_change_state_rearm, t); return 0; @@ -1152,13 +846,6 @@ fail: return err; } -void quit_timers(void) -{ - struct qemu_alarm_timer *t = alarm_timer; - alarm_timer = NULL; - t->stop(t); -} - int qemu_calculate_timeout(void) { return 1000; diff --git a/qemu-timer.h b/qemu-timer.h index 0a43469847..67ca72e045 100644 --- a/qemu-timer.h +++ b/qemu-timer.h @@ -2,6 +2,7 @@ #define QEMU_TIMER_H #include "qemu-common.h" +#include "main-loop.h" #include "notify.h" #include <time.h> #include <sys/time.h> @@ -38,6 +39,9 @@ extern QEMUClock *vm_clock; extern QEMUClock *host_clock; int64_t qemu_get_clock_ns(QEMUClock *clock); +int64_t qemu_clock_has_timers(QEMUClock *clock); +int64_t qemu_clock_expired(QEMUClock *clock); +int64_t qemu_clock_deadline(QEMUClock *clock); void qemu_clock_enable(QEMUClock *clock, int enabled); void qemu_clock_warp(QEMUClock *clock); @@ -49,19 +53,18 @@ QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, QEMUTimerCB *cb, void *opaque); void qemu_free_timer(QEMUTimer *ts); void qemu_del_timer(QEMUTimer *ts); +void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time); void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time); int qemu_timer_pending(QEMUTimer *ts); int qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time); +uint64_t qemu_timer_expire_time_ns(QEMUTimer *ts); void qemu_run_all_timers(void); int qemu_alarm_pending(void); -int64_t qemu_next_icount_deadline(void); void configure_alarms(char const *opt); -void configure_icount(const char *option); int qemu_calculate_timeout(void); void init_clocks(void); int init_timer_alarm(void); -void quit_timers(void); int64_t cpu_get_ticks(void); void cpu_enable_ticks(void); @@ -150,12 +153,8 @@ void ptimer_run(ptimer_state *s, int oneshot); void ptimer_stop(ptimer_state *s); /* icount */ -int64_t qemu_icount_round(int64_t count); -extern int64_t qemu_icount; -extern int use_icount; -extern int icount_time_shift; -extern int64_t qemu_icount_bias; int64_t cpu_get_icount(void); +int64_t cpu_get_clock(void); /*******************************************/ /* host CPU ticks (if available) */ @@ -311,22 +310,6 @@ static inline int64_t cpu_get_real_ticks (void) } #endif -#ifdef NEED_CPU_H -/* Deterministic execution requires that IO only be performed on the last - instruction of a TB so that interrupts take effect immediately. */ -static inline int can_do_io(CPUState *env) -{ - if (!use_icount) - return 1; - - /* If not executing code then assume we are ok. */ - if (!env->current_tb) - return 1; - - return env->can_do_io != 0; -} -#endif - #ifdef CONFIG_PROFILER static inline int64_t profile_getclock(void) { @@ -81,6 +81,7 @@ #include "migration.h" #include "qemu_socket.h" #include "qemu-queue.h" +#include "qemu-timer.h" #include "cpus.h" #define SELF_ANNOUNCE_ROUNDS 5 @@ -712,6 +713,30 @@ uint64_t qemu_get_be64(QEMUFile *f) return v; } + +/* timer */ + +void qemu_put_timer(QEMUFile *f, QEMUTimer *ts) +{ + uint64_t expire_time; + + expire_time = qemu_timer_expire_time_ns(ts); + qemu_put_be64(f, expire_time); +} + +void qemu_get_timer(QEMUFile *f, QEMUTimer *ts) +{ + uint64_t expire_time; + + expire_time = qemu_get_be64(f); + if (expire_time != -1) { + qemu_mod_timer_ns(ts, expire_time); + } else { + qemu_del_timer(ts); + } +} + + /* bool */ static int get_bool(QEMUFile *f, void *pv, size_t size) diff --git a/slirp/libslirp.h b/slirp/libslirp.h index a7551235e2..890fd86c3c 100644 --- a/slirp/libslirp.h +++ b/slirp/libslirp.h @@ -3,8 +3,6 @@ #include "qemu-common.h" -#ifdef CONFIG_SLIRP - struct Slirp; typedef struct Slirp Slirp; @@ -44,13 +42,4 @@ void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port); -#else /* !CONFIG_SLIRP */ - -static inline void slirp_select_fill(int *pnfds, fd_set *readfds, - fd_set *writefds, fd_set *xfds) { } - -static inline void slirp_select_poll(fd_set *readfds, fd_set *writefds, - fd_set *xfds, int select_error) { } -#endif /* !CONFIG_SLIRP */ - #endif @@ -8,6 +8,7 @@ #include "qemu-timer.h" #include "qapi-types.h" #include "notify.h" +#include "main-loop.h" /* vl.c */ @@ -64,8 +65,6 @@ void do_info_snapshots(Monitor *mon); void qemu_announce_self(void); -int main_loop_wait(int nonblocking); - bool qemu_savevm_state_blocked(Monitor *mon); int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable, int shared); @@ -148,6 +148,7 @@ int main(int argc, char **argv) #include "qemu-objects.h" #include "qemu-options.h" #include "qmp-commands.h" +#include "main-loop.h" #ifdef CONFIG_VIRTFS #include "fsdev/qemu-fsdev.h" #endif @@ -1425,142 +1426,51 @@ void qemu_system_vmstop_request(RunState state) qemu_notify_event(); } -static GPollFD poll_fds[1024 * 2]; /* this is probably overkill */ -static int n_poll_fds; -static int max_priority; +qemu_irq qemu_system_powerdown; -static void glib_select_fill(int *max_fd, fd_set *rfds, fd_set *wfds, - fd_set *xfds, struct timeval *tv) +static bool main_loop_should_exit(void) { - GMainContext *context = g_main_context_default(); - int i; - int timeout = 0, cur_timeout; - - g_main_context_prepare(context, &max_priority); - - n_poll_fds = g_main_context_query(context, max_priority, &timeout, - poll_fds, ARRAY_SIZE(poll_fds)); - g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds)); - - for (i = 0; i < n_poll_fds; i++) { - GPollFD *p = &poll_fds[i]; - - if ((p->events & G_IO_IN)) { - FD_SET(p->fd, rfds); - *max_fd = MAX(*max_fd, p->fd); - } - if ((p->events & G_IO_OUT)) { - FD_SET(p->fd, wfds); - *max_fd = MAX(*max_fd, p->fd); - } - if ((p->events & G_IO_ERR)) { - FD_SET(p->fd, xfds); - *max_fd = MAX(*max_fd, p->fd); + RunState r; + if (qemu_debug_requested()) { + vm_stop(RUN_STATE_DEBUG); + } + if (qemu_shutdown_requested()) { + qemu_kill_report(); + monitor_protocol_event(QEVENT_SHUTDOWN, NULL); + if (no_shutdown) { + vm_stop(RUN_STATE_SHUTDOWN); + } else { + return true; } } - - cur_timeout = (tv->tv_sec * 1000) + ((tv->tv_usec + 500) / 1000); - if (timeout >= 0 && timeout < cur_timeout) { - tv->tv_sec = timeout / 1000; - tv->tv_usec = (timeout % 1000) * 1000; - } -} - -static void glib_select_poll(fd_set *rfds, fd_set *wfds, fd_set *xfds, - bool err) -{ - GMainContext *context = g_main_context_default(); - - if (!err) { - int i; - - for (i = 0; i < n_poll_fds; i++) { - GPollFD *p = &poll_fds[i]; - - if ((p->events & G_IO_IN) && FD_ISSET(p->fd, rfds)) { - p->revents |= G_IO_IN; - } - if ((p->events & G_IO_OUT) && FD_ISSET(p->fd, wfds)) { - p->revents |= G_IO_OUT; - } - if ((p->events & G_IO_ERR) && FD_ISSET(p->fd, xfds)) { - p->revents |= G_IO_ERR; - } + if (qemu_reset_requested()) { + pause_all_vcpus(); + cpu_synchronize_all_states(); + qemu_system_reset(VMRESET_REPORT); + resume_all_vcpus(); + if (runstate_check(RUN_STATE_INTERNAL_ERROR) || + runstate_check(RUN_STATE_SHUTDOWN)) { + runstate_set(RUN_STATE_PAUSED); } } - - if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) { - g_main_context_dispatch(context); - } -} - -int main_loop_wait(int nonblocking) -{ - fd_set rfds, wfds, xfds; - int ret, nfds; - struct timeval tv; - int timeout; - - if (nonblocking) - timeout = 0; - else { - timeout = qemu_calculate_timeout(); - qemu_bh_update_timeout(&timeout); - } - - os_host_main_loop_wait(&timeout); - - tv.tv_sec = timeout / 1000; - tv.tv_usec = (timeout % 1000) * 1000; - - /* poll any events */ - /* XXX: separate device handlers from system ones */ - nfds = -1; - FD_ZERO(&rfds); - FD_ZERO(&wfds); - FD_ZERO(&xfds); - - qemu_iohandler_fill(&nfds, &rfds, &wfds, &xfds); - slirp_select_fill(&nfds, &rfds, &wfds, &xfds); - glib_select_fill(&nfds, &rfds, &wfds, &xfds, &tv); - - if (timeout > 0) { - qemu_mutex_unlock_iothread(); + if (qemu_powerdown_requested()) { + monitor_protocol_event(QEVENT_POWERDOWN, NULL); + qemu_irq_raise(qemu_system_powerdown); } - - ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv); - - if (timeout > 0) { - qemu_mutex_lock_iothread(); + if (qemu_vmstop_requested(&r)) { + vm_stop(r); } - - qemu_iohandler_poll(&rfds, &wfds, &xfds, ret); - slirp_select_poll(&rfds, &wfds, &xfds, (ret < 0)); - glib_select_poll(&rfds, &wfds, &xfds, (ret < 0)); - - qemu_run_all_timers(); - - /* Check bottom-halves last in case any of the earlier events triggered - them. */ - qemu_bh_poll(); - - return ret; + return false; } -qemu_irq qemu_system_powerdown; - static void main_loop(void) { bool nonblocking; - int last_io __attribute__ ((unused)) = 0; + int last_io = 0; #ifdef CONFIG_PROFILER int64_t ti; #endif - RunState r; - - qemu_main_loop_start(); - - for (;;) { + do { nonblocking = !kvm_enabled() && last_io > 0; #ifdef CONFIG_PROFILER ti = profile_getclock(); @@ -1569,38 +1479,7 @@ static void main_loop(void) #ifdef CONFIG_PROFILER dev_time += profile_getclock() - ti; #endif - - if (qemu_debug_requested()) { - vm_stop(RUN_STATE_DEBUG); - } - if (qemu_shutdown_requested()) { - qemu_kill_report(); - monitor_protocol_event(QEVENT_SHUTDOWN, NULL); - if (no_shutdown) { - vm_stop(RUN_STATE_SHUTDOWN); - } else - break; - } - if (qemu_reset_requested()) { - pause_all_vcpus(); - cpu_synchronize_all_states(); - qemu_system_reset(VMRESET_REPORT); - resume_all_vcpus(); - if (runstate_check(RUN_STATE_INTERNAL_ERROR) || - runstate_check(RUN_STATE_SHUTDOWN)) { - runstate_set(RUN_STATE_PAUSED); - } - } - if (qemu_powerdown_requested()) { - monitor_protocol_event(QEVENT_POWERDOWN, NULL); - qemu_irq_raise(qemu_system_powerdown); - } - if (qemu_vmstop_requested(&r)) { - vm_stop(r); - } - } - bdrv_close_all(); - pause_all_vcpus(); + } while (!main_loop_should_exit()); } static void version(void) @@ -2311,6 +2190,7 @@ int main(int argc, char **argv, char **envp) runstate_init(); init_clocks(); + rtc_clock = host_clock; qemu_cache_utils_init(envp); @@ -3298,6 +3178,7 @@ int main(int argc, char **argv, char **envp) configure_accelerator(); + qemu_init_cpu_loop(); if (qemu_init_main_loop()) { fprintf(stderr, "qemu_init_main_loop failed\n"); exit(1); @@ -3564,8 +3445,10 @@ int main(int argc, char **argv, char **envp) os_setup_post(); + resume_all_vcpus(); main_loop(); - quit_timers(); + bdrv_close_all(); + pause_all_vcpus(); net_cleanup(); res_free(); |