aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2011-02-16 08:47:07 -0600
committerAnthony Liguori <aliguori@us.ibm.com>2011-02-16 08:47:07 -0600
commitc5d69e6bbf37bf5e3882060764b15e018e6a5321 (patch)
treee246e96bdfffbc5e07682c5a39ea7a65482a5935
parent630ecca0da959e65acad1ee0bf3a631bbb7ee052 (diff)
parent0ec329dab938e2d97d12a91f8ed15fec27b325e0 (diff)
Merge remote branch 'qemu-kvm/uq/master' into staging
-rw-r--r--Makefile.objs2
-rw-r--r--Makefile.target4
-rwxr-xr-xconfigure6
-rw-r--r--cpu-all.h6
-rw-r--r--cpu-common.h4
-rw-r--r--cpu-defs.h1
-rw-r--r--cpu-exec.c43
-rw-r--r--cpus.c725
-rw-r--r--cpus.h3
-rw-r--r--exec.c30
-rw-r--r--gdbstub.c19
-rw-r--r--hw/cirrus_vga.c1
-rw-r--r--hw/ide/core.c2
-rw-r--r--hw/kvmclock.c125
-rw-r--r--hw/kvmclock.h14
-rw-r--r--hw/pc_piix.c32
-rw-r--r--hw/scsi-disk.c2
-rw-r--r--hw/vga.c31
-rw-r--r--hw/vhost.c2
-rw-r--r--hw/virtio-blk.c2
-rw-r--r--hw/watchdog.c2
-rw-r--r--kvm-all.c89
-rw-r--r--kvm-stub.c15
-rw-r--r--kvm.h16
-rw-r--r--migration.c2
-rw-r--r--monitor.c4
-rw-r--r--qemu-common.h1
-rw-r--r--savevm.c4
-rw-r--r--sysemu.h12
-rw-r--r--target-i386/cpu.h1
-rw-r--r--target-i386/kvm.c112
-rw-r--r--target-ppc/kvm.c16
-rw-r--r--target-s390x/kvm.c16
-rw-r--r--vl.c62
34 files changed, 921 insertions, 485 deletions
diff --git a/Makefile.objs b/Makefile.objs
index adb584234e..b21f9d3894 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -141,7 +141,7 @@ common-obj-y += $(addprefix ui/, $(ui-obj-y))
common-obj-y += iov.o acl.o
common-obj-$(CONFIG_THREAD) += qemu-thread.o
-common-obj-$(CONFIG_IOTHREAD) += compatfd.o
+common-obj-$(CONFIG_POSIX) += compatfd.o
common-obj-y += notify.o event_notifier.o
common-obj-y += qemu-timer.o qemu-timer-common.o
diff --git a/Makefile.target b/Makefile.target
index a6c30ddc23..5a0fd40696 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -37,7 +37,7 @@ ifndef CONFIG_HAIKU
LIBS+=-lm
endif
-kvm.o kvm-all.o vhost.o vhost_net.o: QEMU_CFLAGS+=$(KVM_CFLAGS)
+kvm.o kvm-all.o vhost.o vhost_net.o kvmclock.o: QEMU_CFLAGS+=$(KVM_CFLAGS)
config-target.h: config-target.h-timestamp
config-target.h-timestamp: config-target.mak
@@ -218,7 +218,7 @@ obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o
obj-i386-y += vmport.o applesmc.o
obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
obj-i386-y += debugcon.o multiboot.o
-obj-i386-y += pc_piix.o
+obj-i386-y += pc_piix.o kvmclock.o
obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
# shared objects
diff --git a/configure b/configure
index 598e8e1ebb..a3f53456f4 100755
--- a/configure
+++ b/configure
@@ -2057,6 +2057,12 @@ EOF
if compile_prog "" "" ; then
signalfd=yes
+elif test "$kvm" = "yes" -a "$io_thread" != "yes"; then
+ echo
+ echo "ERROR: Host kernel lacks signalfd() support,"
+ echo "but KVM depends on it when the IO thread is disabled."
+ echo
+ exit 1
fi
# check if eventfd is supported
diff --git a/cpu-all.h b/cpu-all.h
index ffbd6a4df8..87b0f86667 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -959,6 +959,12 @@ int cpu_physical_memory_get_dirty_tracking(void);
int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
target_phys_addr_t end_addr);
+int cpu_physical_log_start(target_phys_addr_t start_addr,
+ ram_addr_t size);
+
+int cpu_physical_log_stop(target_phys_addr_t start_addr,
+ ram_addr_t size);
+
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
#endif /* !CONFIG_USER_ONLY */
diff --git a/cpu-common.h b/cpu-common.h
index 6d4a898ad1..54d21d4717 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -96,6 +96,10 @@ struct CPUPhysMemoryClient {
target_phys_addr_t end_addr);
int (*migration_log)(struct CPUPhysMemoryClient *client,
int enable);
+ int (*log_start)(struct CPUPhysMemoryClient *client,
+ target_phys_addr_t phys_addr, ram_addr_t size);
+ int (*log_stop)(struct CPUPhysMemoryClient *client,
+ target_phys_addr_t phys_addr, ram_addr_t size);
QLIST_ENTRY(CPUPhysMemoryClient) list;
};
diff --git a/cpu-defs.h b/cpu-defs.h
index 8d4bf86c53..db809ed465 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -205,6 +205,7 @@ typedef struct CPUWatchpoint {
uint32_t stopped; /* Artificially stopped */ \
struct QemuThread *thread; \
struct QemuCond *halt_cond; \
+ int thread_kicked; \
struct qemu_work_item *queued_work_first, *queued_work_last; \
const char *cpu_model_str; \
struct KVMState *kvm_state; \
diff --git a/cpu-exec.c b/cpu-exec.c
index 8c9fb8b1a2..b03b3a749a 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -196,28 +196,6 @@ static inline TranslationBlock *tb_find_fast(void)
return tb;
}
-static CPUDebugExcpHandler *debug_excp_handler;
-
-CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)
-{
- CPUDebugExcpHandler *old_handler = debug_excp_handler;
-
- debug_excp_handler = handler;
- return old_handler;
-}
-
-static void cpu_handle_debug_exception(CPUState *env)
-{
- CPUWatchpoint *wp;
-
- if (!env->watchpoint_hit)
- QTAILQ_FOREACH(wp, &env->watchpoints, entry)
- wp->flags &= ~BP_WATCHPOINT_HIT;
-
- if (debug_excp_handler)
- debug_excp_handler(env);
-}
-
/* main execution loop */
volatile sig_atomic_t exit_request;
@@ -248,13 +226,11 @@ int cpu_exec(CPUState *env1)
}
#if defined(TARGET_I386)
- if (!kvm_enabled()) {
- /* put eflags in CPU temporary format */
- CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
- DF = 1 - (2 * ((env->eflags >> 10) & 1));
- CC_OP = CC_OP_EFLAGS;
- env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
- }
+ /* put eflags in CPU temporary format */
+ CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+ DF = 1 - (2 * ((env->eflags >> 10) & 1));
+ CC_OP = CC_OP_EFLAGS;
+ env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
#elif defined(TARGET_SPARC)
#elif defined(TARGET_M68K)
env->cc_op = CC_OP_FLAGS;
@@ -279,7 +255,7 @@ int cpu_exec(CPUState *env1)
if (setjmp(env->jmp_env) == 0) {
#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
#undef env
- env = cpu_single_env;
+ env = cpu_single_env;
#define env cpu_single_env
#endif
/* if an exception is pending, we execute it here */
@@ -287,8 +263,6 @@ int cpu_exec(CPUState *env1)
if (env->exception_index >= EXCP_INTERRUPT) {
/* exit request from the cpu execution loop */
ret = env->exception_index;
- if (ret == EXCP_DEBUG)
- cpu_handle_debug_exception(env);
break;
} else {
#if defined(CONFIG_USER_ONLY)
@@ -340,11 +314,6 @@ int cpu_exec(CPUState *env1)
}
}
- if (kvm_enabled()) {
- kvm_cpu_exec(env);
- longjmp(env->jmp_env, 1);
- }
-
next_tb = 0; /* force lookup of first TB */
for(;;) {
interrupt_request = env->interrupt_request;
diff --git a/cpus.c b/cpus.c
index 2d920907e0..0f339459a5 100644
--- a/cpus.c
+++ b/cpus.c
@@ -34,9 +34,6 @@
#include "cpus.h"
#include "compatfd.h"
-#ifdef CONFIG_LINUX
-#include <sys/prctl.h>
-#endif
#ifdef SIGRTMIN
#define SIG_IPI (SIGRTMIN+4)
@@ -44,10 +41,24 @@
#define SIG_IPI SIGUSR1
#endif
+#ifdef CONFIG_LINUX
+
+#include <sys/prctl.h>
+
#ifndef PR_MCE_KILL
#define PR_MCE_KILL 33
#endif
+#ifndef PR_MCE_KILL_SET
+#define PR_MCE_KILL_SET 1
+#endif
+
+#ifndef PR_MCE_KILL_EARLY
+#define PR_MCE_KILL_EARLY 1
+#endif
+
+#endif /* CONFIG_LINUX */
+
static CPUState *next_cpu;
/***********************************************************/
@@ -119,45 +130,117 @@ static void do_vm_stop(int reason)
static int cpu_can_run(CPUState *env)
{
- if (env->stop)
+ if (env->stop) {
return 0;
- if (env->stopped || !vm_running)
+ }
+ if (env->stopped || !vm_running) {
return 0;
+ }
return 1;
}
-static int cpu_has_work(CPUState *env)
+static bool cpu_thread_is_idle(CPUState *env)
{
- if (env->stop)
- return 1;
- if (env->queued_work_first)
- return 1;
- if (env->stopped || !vm_running)
- return 0;
- if (!env->halted)
- return 1;
- if (qemu_cpu_has_work(env))
- return 1;
- return 0;
+ if (env->stop || env->queued_work_first) {
+ return false;
+ }
+ if (env->stopped || !vm_running) {
+ return true;
+ }
+ if (!env->halted || qemu_cpu_has_work(env)) {
+ return false;
+ }
+ return true;
}
-static int any_cpu_has_work(void)
+static bool all_cpu_threads_idle(void)
{
CPUState *env;
- for (env = first_cpu; env != NULL; env = env->next_cpu)
- if (cpu_has_work(env))
- return 1;
- return 0;
+ for (env = first_cpu; env != NULL; env = env->next_cpu) {
+ if (!cpu_thread_is_idle(env)) {
+ return false;
+ }
+ }
+ return true;
}
-static void cpu_debug_handler(CPUState *env)
+static CPUDebugExcpHandler *debug_excp_handler;
+
+CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)
+{
+ CPUDebugExcpHandler *old_handler = debug_excp_handler;
+
+ debug_excp_handler = handler;
+ return old_handler;
+}
+
+static void cpu_handle_debug_exception(CPUState *env)
{
+ CPUWatchpoint *wp;
+
+ if (!env->watchpoint_hit) {
+ QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
+ wp->flags &= ~BP_WATCHPOINT_HIT;
+ }
+ }
+ if (debug_excp_handler) {
+ debug_excp_handler(env);
+ }
+
gdb_set_stop_cpu(env);
- debug_requested = EXCP_DEBUG;
- vm_stop(EXCP_DEBUG);
+ qemu_system_debug_request();
+#ifdef CONFIG_IOTHREAD
+ env->stopped = 1;
+#endif
}
+#ifdef CONFIG_LINUX
+static void sigbus_reraise(void)
+{
+ sigset_t set;
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_handler = SIG_DFL;
+ if (!sigaction(SIGBUS, &action, NULL)) {
+ raise(SIGBUS);
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ sigprocmask(SIG_UNBLOCK, &set, NULL);
+ }
+ perror("Failed to re-raise SIGBUS!\n");
+ abort();
+}
+
+static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
+ void *ctx)
+{
+ if (kvm_on_sigbus(siginfo->ssi_code,
+ (void *)(intptr_t)siginfo->ssi_addr)) {
+ sigbus_reraise();
+ }
+}
+
+static void qemu_init_sigbus(void)
+{
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_flags = SA_SIGINFO;
+ action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
+ sigaction(SIGBUS, &action, NULL);
+
+ prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
+}
+
+#else /* !CONFIG_LINUX */
+
+static void qemu_init_sigbus(void)
+{
+}
+#endif /* !CONFIG_LINUX */
+
#ifndef _WIN32
static int io_thread_fd = -1;
@@ -167,9 +250,9 @@ static void qemu_event_increment(void)
static const uint64_t val = 1;
ssize_t ret;
- if (io_thread_fd == -1)
+ if (io_thread_fd == -1) {
return;
-
+ }
do {
ret = write(io_thread_fd, &val, sizeof(val));
} while (ret < 0 && errno == EINTR);
@@ -200,17 +283,17 @@ static int qemu_event_init(void)
int fds[2];
err = qemu_eventfd(fds);
- if (err == -1)
+ if (err == -1) {
return -errno;
-
+ }
err = fcntl_setfl(fds[0], O_NONBLOCK);
- if (err < 0)
+ if (err < 0) {
goto fail;
-
+ }
err = fcntl_setfl(fds[1], O_NONBLOCK);
- if (err < 0)
+ if (err < 0) {
goto fail;
-
+ }
qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
(void *)(unsigned long)fds[0]);
@@ -222,7 +305,109 @@ fail:
close(fds[1]);
return err;
}
-#else
+
+static void dummy_signal(int sig)
+{
+}
+
+/* If we have signalfd, we mask out the signals we want to handle and then
+ * use signalfd to listen for them. We rely on whatever the current signal
+ * handler is to dispatch the signals when we receive them.
+ */
+static void sigfd_handler(void *opaque)
+{
+ int fd = (unsigned long) opaque;
+ struct qemu_signalfd_siginfo info;
+ struct sigaction action;
+ ssize_t len;
+
+ while (1) {
+ do {
+ len = read(fd, &info, sizeof(info));
+ } while (len == -1 && errno == EINTR);
+
+ if (len == -1 && errno == EAGAIN) {
+ break;
+ }
+
+ if (len != sizeof(info)) {
+ printf("read from sigfd returned %zd: %m\n", len);
+ return;
+ }
+
+ sigaction(info.ssi_signo, NULL, &action);
+ if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
+ action.sa_sigaction(info.ssi_signo,
+ (siginfo_t *)&info, NULL);
+ } else if (action.sa_handler) {
+ action.sa_handler(info.ssi_signo);
+ }
+ }
+}
+
+static int qemu_signalfd_init(sigset_t mask)
+{
+ int sigfd;
+
+ sigfd = qemu_signalfd(&mask);
+ if (sigfd == -1) {
+ fprintf(stderr, "failed to create signalfd\n");
+ return -errno;
+ }
+
+ fcntl_setfl(sigfd, O_NONBLOCK);
+
+ qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
+ (void *)(unsigned long) sigfd);
+
+ return 0;
+}
+
+static void qemu_kvm_eat_signals(CPUState *env)
+{
+ struct timespec ts = { 0, 0 };
+ siginfo_t siginfo;
+ sigset_t waitset;
+ sigset_t chkset;
+ int r;
+
+ sigemptyset(&waitset);
+ sigaddset(&waitset, SIG_IPI);
+ sigaddset(&waitset, SIGBUS);
+
+ do {
+ r = sigtimedwait(&waitset, &siginfo, &ts);
+ if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
+ perror("sigtimedwait");
+ exit(1);
+ }
+
+ switch (r) {
+ case SIGBUS:
+ if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
+ sigbus_reraise();
+ }
+ break;
+ default:
+ break;
+ }
+
+ r = sigpending(&chkset);
+ if (r == -1) {
+ perror("sigpending");
+ exit(1);
+ }
+ } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
+
+#ifndef CONFIG_IOTHREAD
+ if (sigismember(&chkset, SIGIO) || sigismember(&chkset, SIGALRM)) {
+ qemu_notify_event();
+ }
+#endif
+}
+
+#else /* _WIN32 */
+
HANDLE qemu_event_handle;
static void dummy_event_handler(void *opaque)
@@ -248,12 +433,78 @@ static void qemu_event_increment(void)
exit (1);
}
}
-#endif
+
+static void qemu_kvm_eat_signals(CPUState *env)
+{
+}
+#endif /* _WIN32 */
#ifndef CONFIG_IOTHREAD
+static void qemu_kvm_init_cpu_signals(CPUState *env)
+{
+#ifndef _WIN32
+ int r;
+ sigset_t set;
+ struct sigaction sigact;
+
+ memset(&sigact, 0, sizeof(sigact));
+ sigact.sa_handler = dummy_signal;
+ sigaction(SIG_IPI, &sigact, NULL);
+
+ sigemptyset(&set);
+ sigaddset(&set, SIG_IPI);
+ sigaddset(&set, SIGIO);
+ sigaddset(&set, SIGALRM);
+ pthread_sigmask(SIG_BLOCK, &set, NULL);
+
+ pthread_sigmask(SIG_BLOCK, NULL, &set);
+ sigdelset(&set, SIG_IPI);
+ sigdelset(&set, SIGBUS);
+ sigdelset(&set, SIGIO);
+ sigdelset(&set, SIGALRM);
+ r = kvm_set_signal_mask(env, &set);
+ if (r) {
+ fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
+ exit(1);
+ }
+#endif
+}
+
+#ifndef _WIN32
+static sigset_t block_synchronous_signals(void)
+{
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ if (kvm_enabled()) {
+ /*
+ * We need to process timer signals synchronously to avoid a race
+ * between exit_request check and KVM vcpu entry.
+ */
+ sigaddset(&set, SIGIO);
+ sigaddset(&set, SIGALRM);
+ }
+
+ return set;
+}
+#endif
+
int qemu_init_main_loop(void)
{
- cpu_set_debug_excp_handler(cpu_debug_handler);
+#ifndef _WIN32
+ sigset_t blocked_signals;
+ int ret;
+
+ blocked_signals = block_synchronous_signals();
+
+ ret = qemu_signalfd_init(blocked_signals);
+ if (ret) {
+ return ret;
+ }
+#endif
+
+ qemu_init_sigbus();
return qemu_event_init();
}
@@ -265,12 +516,19 @@ void qemu_main_loop_start(void)
void qemu_init_vcpu(void *_env)
{
CPUState *env = _env;
+ int r;
env->nr_cores = smp_cores;
env->nr_threads = smp_threads;
- if (kvm_enabled())
- kvm_init_vcpu(env);
- return;
+
+ if (kvm_enabled()) {
+ r = kvm_init_vcpu(env);
+ if (r < 0) {
+ fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
+ exit(1);
+ }
+ qemu_kvm_init_cpu_signals(env);
+ }
}
int qemu_cpu_self(void *env)
@@ -293,7 +551,17 @@ void pause_all_vcpus(void)
void qemu_cpu_kick(void *env)
{
- return;
+}
+
+void qemu_cpu_kick_self(void)
+{
+#ifndef _WIN32
+ assert(cpu_single_env);
+
+ raise(SIG_IPI);
+#else
+ abort();
+#endif
}
void qemu_notify_event(void)
@@ -307,11 +575,16 @@ void qemu_notify_event(void)
if (next_cpu && env != next_cpu) {
cpu_exit(next_cpu);
}
+ exit_request = 1;
}
void qemu_mutex_lock_iothread(void) {}
void qemu_mutex_unlock_iothread(void) {}
+void cpu_stop_current(void)
+{
+}
+
void vm_stop(int reason)
{
do_vm_stop(reason);
@@ -337,61 +610,65 @@ static QemuCond qemu_system_cond;
static QemuCond qemu_pause_cond;
static QemuCond qemu_work_cond;
-static void tcg_init_ipi(void);
-static void kvm_init_ipi(CPUState *env);
-static sigset_t block_io_signals(void);
-
-/* If we have signalfd, we mask out the signals we want to handle and then
- * use signalfd to listen for them. We rely on whatever the current signal
- * handler is to dispatch the signals when we receive them.
- */
-static void sigfd_handler(void *opaque)
+static void cpu_signal(int sig)
{
- int fd = (unsigned long) opaque;
- struct qemu_signalfd_siginfo info;
- struct sigaction action;
- ssize_t len;
-
- while (1) {
- do {
- len = read(fd, &info, sizeof(info));
- } while (len == -1 && errno == EINTR);
+ if (cpu_single_env) {
+ cpu_exit(cpu_single_env);
+ }
+ exit_request = 1;
+}
- if (len == -1 && errno == EAGAIN) {
- break;
- }
+static void qemu_kvm_init_cpu_signals(CPUState *env)
+{
+ int r;
+ sigset_t set;
+ struct sigaction sigact;
- if (len != sizeof(info)) {
- printf("read from sigfd returned %zd: %m\n", len);
- return;
- }
+ memset(&sigact, 0, sizeof(sigact));
+ sigact.sa_handler = dummy_signal;
+ sigaction(SIG_IPI, &sigact, NULL);
- sigaction(info.ssi_signo, NULL, &action);
- if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
- action.sa_sigaction(info.ssi_signo,
- (siginfo_t *)&info, NULL);
- } else if (action.sa_handler) {
- action.sa_handler(info.ssi_signo);
- }
+ pthread_sigmask(SIG_BLOCK, NULL, &set);
+ sigdelset(&set, SIG_IPI);
+ sigdelset(&set, SIGBUS);
+ r = kvm_set_signal_mask(env, &set);
+ if (r) {
+ fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
+ exit(1);
}
}
-static int qemu_signalfd_init(sigset_t mask)
+static void qemu_tcg_init_cpu_signals(void)
{
- int sigfd;
+ sigset_t set;
+ struct sigaction sigact;
- sigfd = qemu_signalfd(&mask);
- if (sigfd == -1) {
- fprintf(stderr, "failed to create signalfd\n");
- return -errno;
- }
+ memset(&sigact, 0, sizeof(sigact));
+ sigact.sa_handler = cpu_signal;
+ sigaction(SIG_IPI, &sigact, NULL);
- fcntl_setfl(sigfd, O_NONBLOCK);
+ sigemptyset(&set);
+ sigaddset(&set, SIG_IPI);
+ pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+}
- qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
- (void *)(unsigned long) sigfd);
+static sigset_t block_io_signals(void)
+{
+ sigset_t set;
- return 0;
+ /* SIGUSR2 used by posix-aio-compat.c */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR2);
+ pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGIO);
+ sigaddset(&set, SIGALRM);
+ sigaddset(&set, SIG_IPI);
+ sigaddset(&set, SIGBUS);
+ pthread_sigmask(SIG_BLOCK, &set, NULL);
+
+ return set;
}
int qemu_init_main_loop(void)
@@ -399,18 +676,20 @@ int qemu_init_main_loop(void)
int ret;
sigset_t blocked_signals;
- cpu_set_debug_excp_handler(cpu_debug_handler);
+ qemu_init_sigbus();
blocked_signals = block_io_signals();
ret = qemu_signalfd_init(blocked_signals);
- if (ret)
+ if (ret) {
return ret;
+ }
/* Note eventfd must be drained before signalfd handlers run */
ret = qemu_event_init();
- if (ret)
+ if (ret) {
return ret;
+ }
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_system_cond);
@@ -442,10 +721,11 @@ void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
wi.func = func;
wi.data = data;
- if (!env->queued_work_first)
+ if (!env->queued_work_first) {
env->queued_work_first = &wi;
- else
+ } else {
env->queued_work_last->next = &wi;
+ }
env->queued_work_last = &wi;
wi.next = NULL;
wi.done = false;
@@ -463,8 +743,9 @@ static void flush_queued_work(CPUState *env)
{
struct qemu_work_item *wi;
- if (!env->queued_work_first)
+ if (!env->queued_work_first) {
return;
+ }
while ((wi = env->queued_work_first)) {
env->queued_work_first = wi->next;
@@ -483,14 +764,16 @@ static void qemu_wait_io_event_common(CPUState *env)
qemu_cond_signal(&qemu_pause_cond);
}
flush_queued_work(env);
+ env->thread_kicked = false;
}
static void qemu_tcg_wait_io_event(void)
{
CPUState *env;
- while (!any_cpu_has_work())
+ while (all_cpu_threads_idle()) {
qemu_cond_timedwait(tcg_halt_cond, &qemu_global_mutex, 1000);
+ }
qemu_mutex_unlock(&qemu_global_mutex);
@@ -509,134 +792,72 @@ static void qemu_tcg_wait_io_event(void)
}
}
-static void sigbus_reraise(void)
-{
- sigset_t set;
- struct sigaction action;
-
- memset(&action, 0, sizeof(action));
- action.sa_handler = SIG_DFL;
- if (!sigaction(SIGBUS, &action, NULL)) {
- raise(SIGBUS);
- sigemptyset(&set);
- sigaddset(&set, SIGBUS);
- sigprocmask(SIG_UNBLOCK, &set, NULL);
- }
- perror("Failed to re-raise SIGBUS!\n");
- abort();
-}
-
-static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
- void *ctx)
-{
-#if defined(TARGET_I386)
- if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr))
-#endif
- sigbus_reraise();
-}
-
-static void qemu_kvm_eat_signal(CPUState *env, int timeout)
-{
- struct timespec ts;
- int r, e;
- siginfo_t siginfo;
- sigset_t waitset;
- sigset_t chkset;
-
- ts.tv_sec = timeout / 1000;
- ts.tv_nsec = (timeout % 1000) * 1000000;
-
- sigemptyset(&waitset);
- sigaddset(&waitset, SIG_IPI);
- sigaddset(&waitset, SIGBUS);
-
- do {
- qemu_mutex_unlock(&qemu_global_mutex);
-
- r = sigtimedwait(&waitset, &siginfo, &ts);
- e = errno;
-
- qemu_mutex_lock(&qemu_global_mutex);
-
- if (r == -1 && !(e == EAGAIN || e == EINTR)) {
- fprintf(stderr, "sigtimedwait: %s\n", strerror(e));
- exit(1);
- }
-
- switch (r) {
- case SIGBUS:
-#ifdef TARGET_I386
- if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr))
-#endif
- sigbus_reraise();
- break;
- default:
- break;
- }
-
- r = sigpending(&chkset);
- if (r == -1) {
- fprintf(stderr, "sigpending: %s\n", strerror(e));
- exit(1);
- }
- } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
-}
-
static void qemu_kvm_wait_io_event(CPUState *env)
{
- while (!cpu_has_work(env))
+ while (cpu_thread_is_idle(env)) {
qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000);
+ }
- qemu_kvm_eat_signal(env, 0);
+ qemu_kvm_eat_signals(env);
qemu_wait_io_event_common(env);
}
-static int qemu_cpu_exec(CPUState *env);
-
-static void *kvm_cpu_thread_fn(void *arg)
+static void *qemu_kvm_cpu_thread_fn(void *arg)
{
CPUState *env = arg;
+ int r;
qemu_mutex_lock(&qemu_global_mutex);
qemu_thread_self(env->thread);
- if (kvm_enabled())
- kvm_init_vcpu(env);
- kvm_init_ipi(env);
+ r = kvm_init_vcpu(env);
+ if (r < 0) {
+ fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
+ exit(1);
+ }
+
+ qemu_kvm_init_cpu_signals(env);
/* signal CPU creation */
env->created = 1;
qemu_cond_signal(&qemu_cpu_cond);
/* and wait for machine initialization */
- while (!qemu_system_ready)
+ while (!qemu_system_ready) {
qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
+ }
while (1) {
- if (cpu_can_run(env))
- qemu_cpu_exec(env);
+ if (cpu_can_run(env)) {
+ r = kvm_cpu_exec(env);
+ if (r == EXCP_DEBUG) {
+ cpu_handle_debug_exception(env);
+ }
+ }
qemu_kvm_wait_io_event(env);
}
return NULL;
}
-static void *tcg_cpu_thread_fn(void *arg)
+static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *env = arg;
- tcg_init_ipi();
+ qemu_tcg_init_cpu_signals();
qemu_thread_self(env->thread);
/* signal CPU creation */
qemu_mutex_lock(&qemu_global_mutex);
- for (env = first_cpu; env != NULL; env = env->next_cpu)
+ for (env = first_cpu; env != NULL; env = env->next_cpu) {
env->created = 1;
+ }
qemu_cond_signal(&qemu_cpu_cond);
/* and wait for machine initialization */
- while (!qemu_system_ready)
+ while (!qemu_system_ready) {
qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
+ }
while (1) {
cpu_exec_all();
@@ -649,89 +870,32 @@ static void *tcg_cpu_thread_fn(void *arg)
void qemu_cpu_kick(void *_env)
{
CPUState *env = _env;
- qemu_cond_broadcast(env->halt_cond);
- qemu_thread_signal(env->thread, SIG_IPI);
-}
-
-int qemu_cpu_self(void *_env)
-{
- CPUState *env = _env;
- QemuThread this;
-
- qemu_thread_self(&this);
-
- return qemu_thread_equal(&this, env->thread);
-}
-
-static void cpu_signal(int sig)
-{
- if (cpu_single_env)
- cpu_exit(cpu_single_env);
- exit_request = 1;
-}
-
-static void tcg_init_ipi(void)
-{
- sigset_t set;
- struct sigaction sigact;
-
- memset(&sigact, 0, sizeof(sigact));
- sigact.sa_handler = cpu_signal;
- sigaction(SIG_IPI, &sigact, NULL);
- sigemptyset(&set);
- sigaddset(&set, SIG_IPI);
- pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-}
-
-static void dummy_signal(int sig)
-{
+ qemu_cond_broadcast(env->halt_cond);
+ if (!env->thread_kicked) {
+ qemu_thread_signal(env->thread, SIG_IPI);
+ env->thread_kicked = true;
+ }
}
-static void kvm_init_ipi(CPUState *env)
+void qemu_cpu_kick_self(void)
{
- int r;
- sigset_t set;
- struct sigaction sigact;
-
- memset(&sigact, 0, sizeof(sigact));
- sigact.sa_handler = dummy_signal;
- sigaction(SIG_IPI, &sigact, NULL);
+ assert(cpu_single_env);
- pthread_sigmask(SIG_BLOCK, NULL, &set);
- sigdelset(&set, SIG_IPI);
- sigdelset(&set, SIGBUS);
- r = kvm_set_signal_mask(env, &set);
- if (r) {
- fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r));
- exit(1);
+ if (!cpu_single_env->thread_kicked) {
+ qemu_thread_signal(cpu_single_env->thread, SIG_IPI);
+ cpu_single_env->thread_kicked = true;
}
}
-static sigset_t block_io_signals(void)
+int qemu_cpu_self(void *_env)
{
- sigset_t set;
- struct sigaction action;
-
- /* SIGUSR2 used by posix-aio-compat.c */
- sigemptyset(&set);
- sigaddset(&set, SIGUSR2);
- pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-
- sigemptyset(&set);
- sigaddset(&set, SIGIO);
- sigaddset(&set, SIGALRM);
- sigaddset(&set, SIG_IPI);
- sigaddset(&set, SIGBUS);
- pthread_sigmask(SIG_BLOCK, &set, NULL);
+ CPUState *env = _env;
+ QemuThread this;
- memset(&action, 0, sizeof(action));
- action.sa_flags = SA_SIGINFO;
- action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
- sigaction(SIGBUS, &action, NULL);
- prctl(PR_MCE_KILL, 1, 1, 0, 0);
+ qemu_thread_self(&this);
- return set;
+ return qemu_thread_equal(&this, env->thread);
}
void qemu_mutex_lock_iothread(void)
@@ -758,8 +922,9 @@ static int all_vcpus_paused(void)
CPUState *penv = first_cpu;
while (penv) {
- if (!penv->stopped)
+ if (!penv->stopped) {
return 0;
+ }
penv = (CPUState *)penv->next_cpu;
}
@@ -798,17 +963,19 @@ void resume_all_vcpus(void)
}
}
-static void tcg_init_vcpu(void *_env)
+static void qemu_tcg_init_vcpu(void *_env)
{
CPUState *env = _env;
+
/* share a single thread for all cpus with TCG */
if (!tcg_cpu_thread) {
env->thread = qemu_mallocz(sizeof(QemuThread));
env->halt_cond = qemu_mallocz(sizeof(QemuCond));
qemu_cond_init(env->halt_cond);
- qemu_thread_create(env->thread, tcg_cpu_thread_fn, env);
- while (env->created == 0)
+ qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env);
+ while (env->created == 0) {
qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
+ }
tcg_cpu_thread = env->thread;
tcg_halt_cond = env->halt_cond;
} else {
@@ -817,14 +984,15 @@ static void tcg_init_vcpu(void *_env)
}
}
-static void kvm_start_vcpu(CPUState *env)
+static void qemu_kvm_start_vcpu(CPUState *env)
{
env->thread = qemu_mallocz(sizeof(QemuThread));
env->halt_cond = qemu_mallocz(sizeof(QemuCond));
qemu_cond_init(env->halt_cond);
- qemu_thread_create(env->thread, kvm_cpu_thread_fn, env);
- while (env->created == 0)
+ qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env);
+ while (env->created == 0) {
qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
+ }
}
void qemu_init_vcpu(void *_env)
@@ -833,10 +1001,11 @@ void qemu_init_vcpu(void *_env)
env->nr_cores = smp_cores;
env->nr_threads = smp_threads;
- if (kvm_enabled())
- kvm_start_vcpu(env);
- else
- tcg_init_vcpu(env);
+ if (kvm_enabled()) {
+ qemu_kvm_start_vcpu(env);
+ } else {
+ qemu_tcg_init_vcpu(env);
+ }
}
void qemu_notify_event(void)
@@ -844,10 +1013,12 @@ void qemu_notify_event(void)
qemu_event_increment();
}
-static void qemu_system_vmstop_request(int reason)
+void cpu_stop_current(void)
{
- vmstop_requested = reason;
- qemu_notify_event();
+ if (cpu_single_env) {
+ cpu_single_env->stopped = 1;
+ cpu_exit(cpu_single_env);
+ }
}
void vm_stop(int reason)
@@ -861,10 +1032,7 @@ void vm_stop(int reason)
* FIXME: should not return to device code in case
* vm_stop() has been requested.
*/
- if (cpu_single_env) {
- cpu_exit(cpu_single_env);
- cpu_single_env->stop = 1;
- }
+ cpu_stop_current();
return;
}
do_vm_stop(reason);
@@ -872,7 +1040,7 @@ void vm_stop(int reason)
#endif
-static int qemu_cpu_exec(CPUState *env)
+static int tcg_cpu_exec(CPUState *env)
{
int ret;
#ifdef CONFIG_PROFILER
@@ -912,18 +1080,29 @@ static int qemu_cpu_exec(CPUState *env)
bool cpu_exec_all(void)
{
- if (next_cpu == NULL)
+ int r;
+
+ if (next_cpu == NULL) {
next_cpu = first_cpu;
+ }
for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
CPUState *env = next_cpu;
qemu_clock_enable(vm_clock,
(env->singlestep_enabled & SSTEP_NOTIMER) == 0);
- if (qemu_alarm_pending())
+ if (qemu_alarm_pending()) {
break;
+ }
if (cpu_can_run(env)) {
- if (qemu_cpu_exec(env) == EXCP_DEBUG) {
+ if (kvm_enabled()) {
+ r = kvm_cpu_exec(env);
+ qemu_kvm_eat_signals(env);
+ } else {
+ r = tcg_cpu_exec(env);
+ }
+ if (r == EXCP_DEBUG) {
+ cpu_handle_debug_exception(env);
break;
}
} else if (env->stop) {
@@ -931,7 +1110,7 @@ bool cpu_exec_all(void)
}
}
exit_request = 0;
- return any_cpu_has_work();
+ return !all_cpu_threads_idle();
}
void set_numa_modes(void)
diff --git a/cpus.h b/cpus.h
index bf4d9bb87a..e0211260c3 100644
--- a/cpus.h
+++ b/cpus.h
@@ -6,12 +6,11 @@ int qemu_init_main_loop(void);
void qemu_main_loop_start(void);
void resume_all_vcpus(void);
void pause_all_vcpus(void);
+void cpu_stop_current(void);
/* vl.c */
extern int smp_cores;
extern int smp_threads;
-extern int debug_requested;
-extern int vmstop_requested;
void vm_state_notify(int running, int reason);
bool cpu_exec_all(void);
void set_numa_modes(void);
diff --git a/exec.c b/exec.c
index 1b70dc1b5e..d611100dc3 100644
--- a/exec.c
+++ b/exec.c
@@ -2078,6 +2078,36 @@ int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
return ret;
}
+int cpu_physical_log_start(target_phys_addr_t start_addr,
+ ram_addr_t size)
+{
+ CPUPhysMemoryClient *client;
+ QLIST_FOREACH(client, &memory_client_list, list) {
+ if (client->log_start) {
+ int r = client->log_start(client, start_addr, size);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+ return 0;
+}
+
+int cpu_physical_log_stop(target_phys_addr_t start_addr,
+ ram_addr_t size)
+{
+ CPUPhysMemoryClient *client;
+ QLIST_FOREACH(client, &memory_client_list, list) {
+ if (client->log_stop) {
+ int r = client->log_stop(client, start_addr, size);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+ return 0;
+}
+
static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
{
ram_addr_t ram_addr;
diff --git a/gdbstub.c b/gdbstub.c
index d6556c9a2f..ed51a8a5bb 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2194,14 +2194,14 @@ static void gdb_vm_state_change(void *opaque, int running, int reason)
const char *type;
int ret;
- if (running || (reason != EXCP_DEBUG && reason != EXCP_INTERRUPT) ||
- s->state == RS_INACTIVE || s->state == RS_SYSCALL)
+ if (running || (reason != VMSTOP_DEBUG && reason != VMSTOP_USER) ||
+ s->state == RS_INACTIVE || s->state == RS_SYSCALL) {
return;
-
+ }
/* disable single step if it was enable */
cpu_single_step(env, 0);
- if (reason == EXCP_DEBUG) {
+ if (reason == VMSTOP_DEBUG) {
if (env->watchpoint_hit) {
switch (env->watchpoint_hit->flags & BP_MEM_ACCESS) {
case BP_MEM_READ:
@@ -2252,7 +2252,7 @@ void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...)
gdb_current_syscall_cb = cb;
s->state = RS_SYSCALL;
#ifndef CONFIG_USER_ONLY
- vm_stop(EXCP_DEBUG);
+ vm_stop(VMSTOP_DEBUG);
#endif
s->state = RS_IDLE;
va_start(va, fmt);
@@ -2326,7 +2326,7 @@ static void gdb_read_byte(GDBState *s, int ch)
if (vm_running) {
/* when the CPU is running, we cannot do anything except stop
it when receiving a char */
- vm_stop(EXCP_INTERRUPT);
+ vm_stop(VMSTOP_USER);
} else
#endif
{
@@ -2588,7 +2588,7 @@ static void gdb_chr_event(void *opaque, int event)
{
switch (event) {
case CHR_EVENT_OPENED:
- vm_stop(EXCP_INTERRUPT);
+ vm_stop(VMSTOP_USER);
gdb_has_xml = 0;
break;
default:
@@ -2628,8 +2628,9 @@ static int gdb_monitor_write(CharDriverState *chr, const uint8_t *buf, int len)
#ifndef _WIN32
static void gdb_sigterm_handler(int signal)
{
- if (vm_running)
- vm_stop(EXCP_INTERRUPT);
+ if (vm_running) {
+ vm_stop(VMSTOP_USER);
+ }
}
#endif
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index 5f45b5dee7..2724f7b480 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -31,7 +31,6 @@
#include "pci.h"
#include "console.h"
#include "vga_int.h"
-#include "kvm.h"
#include "loader.h"
/*
diff --git a/hw/ide/core.c b/hw/ide/core.c
index dd63664c0d..9c91a49767 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -465,7 +465,7 @@ static int ide_handle_rw_error(IDEState *s, int error, int op)
s->bus->dma->ops->set_unit(s->bus->dma, s->unit);
s->bus->dma->ops->add_status(s->bus->dma, op);
bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read);
- vm_stop(0);
+ vm_stop(VMSTOP_DISKFULL);
} else {
if (op & BM_STATUS_DMA_RETRY) {
dma_buf_commit(s, 0);
diff --git a/hw/kvmclock.c b/hw/kvmclock.c
new file mode 100644
index 0000000000..b6ceddfba6
--- /dev/null
+++ b/hw/kvmclock.c
@@ -0,0 +1,125 @@
+/*
+ * QEMU KVM support, paravirtual clock device
+ *
+ * Copyright (C) 2011 Siemens AG
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "sysemu.h"
+#include "sysbus.h"
+#include "kvm.h"
+#include "kvmclock.h"
+
+#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ADJUST_CLOCK)
+
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+
+typedef struct KVMClockState {
+ SysBusDevice busdev;
+ uint64_t clock;
+ bool clock_valid;
+} KVMClockState;
+
+static void kvmclock_pre_save(void *opaque)
+{
+ KVMClockState *s = opaque;
+ struct kvm_clock_data data;
+ int ret;
+
+ if (s->clock_valid) {
+ return;
+ }
+ ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
+ if (ret < 0) {
+ fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+ data.clock = 0;
+ }
+ s->clock = data.clock;
+ /*
+ * If the VM is stopped, declare the clock state valid to avoid re-reading
+ * it on next vmsave (which would return a different value). Will be reset
+ * when the VM is continued.
+ */
+ s->clock_valid = !vm_running;
+}
+
+static int kvmclock_post_load(void *opaque, int version_id)
+{
+ KVMClockState *s = opaque;
+ struct kvm_clock_data data;
+
+ data.clock = s->clock;
+ data.flags = 0;
+ return kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
+}
+
+static void kvmclock_vm_state_change(void *opaque, int running, int reason)
+{
+ KVMClockState *s = opaque;
+
+ if (running) {
+ s->clock_valid = false;
+ }
+}
+
+static int kvmclock_init(SysBusDevice *dev)
+{
+ KVMClockState *s = FROM_SYSBUS(KVMClockState, dev);
+
+ qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
+ return 0;
+}
+
+static const VMStateDescription kvmclock_vmsd = {
+ .name = "kvmclock",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .pre_save = kvmclock_pre_save,
+ .post_load = kvmclock_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(clock, KVMClockState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static SysBusDeviceInfo kvmclock_info = {
+ .qdev.name = "kvmclock",
+ .qdev.size = sizeof(KVMClockState),
+ .qdev.vmsd = &kvmclock_vmsd,
+ .qdev.no_user = 1,
+ .init = kvmclock_init,
+};
+
+/* Note: Must be called after VCPU initialization. */
+void kvmclock_create(void)
+{
+ if (kvm_enabled() &&
+ first_cpu->cpuid_kvm_features & (1ULL << KVM_FEATURE_CLOCKSOURCE)) {
+ sysbus_create_simple("kvmclock", -1, NULL);
+ }
+}
+
+static void kvmclock_register_device(void)
+{
+ if (kvm_enabled()) {
+ sysbus_register_withprop(&kvmclock_info);
+ }
+}
+
+device_init(kvmclock_register_device);
+
+#else /* !(CONFIG_KVM_PARA && KVM_CAP_ADJUST_CLOCK) */
+
+void kvmclock_create(void)
+{
+}
+#endif /* !(CONFIG_KVM_PARA && KVM_CAP_ADJUST_CLOCK) */
diff --git a/hw/kvmclock.h b/hw/kvmclock.h
new file mode 100644
index 0000000000..7a83cbe8f6
--- /dev/null
+++ b/hw/kvmclock.h
@@ -0,0 +1,14 @@
+/*
+ * QEMU KVM support, paravirtual clock device
+ *
+ * Copyright (C) 2011 Siemens AG
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+void kvmclock_create(void);
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index d0bd0cd1fa..291845478d 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -32,6 +32,7 @@
#include "boards.h"
#include "ide.h"
#include "kvm.h"
+#include "kvmclock.h"
#include "sysemu.h"
#include "sysbus.h"
#include "arch_init.h"
@@ -66,7 +67,8 @@ static void pc_init1(ram_addr_t ram_size,
const char *kernel_cmdline,
const char *initrd_filename,
const char *cpu_model,
- int pci_enabled)
+ int pci_enabled,
+ int kvmclock_enabled)
{
int i;
ram_addr_t below_4g_mem_size, above_4g_mem_size;
@@ -86,6 +88,10 @@ static void pc_init1(ram_addr_t ram_size,
pc_cpus_init(cpu_model);
+ if (kvmclock_enabled) {
+ kvmclock_create();
+ }
+
/* allocate ram and load rom/bios */
pc_memory_init(ram_size, kernel_filename, kernel_cmdline, initrd_filename,
&below_4g_mem_size, &above_4g_mem_size);
@@ -193,7 +199,19 @@ static void pc_init_pci(ram_addr_t ram_size,
{
pc_init1(ram_size, boot_device,
kernel_filename, kernel_cmdline,
- initrd_filename, cpu_model, 1);
+ initrd_filename, cpu_model, 1, 1);
+}
+
+static void pc_init_pci_no_kvmclock(ram_addr_t ram_size,
+ const char *boot_device,
+ const char *kernel_filename,
+ const char *kernel_cmdline,
+ const char *initrd_filename,
+ const char *cpu_model)
+{
+ pc_init1(ram_size, boot_device,
+ kernel_filename, kernel_cmdline,
+ initrd_filename, cpu_model, 1, 0);
}
static void pc_init_isa(ram_addr_t ram_size,
@@ -207,7 +225,7 @@ static void pc_init_isa(ram_addr_t ram_size,
cpu_model = "486";
pc_init1(ram_size, boot_device,
kernel_filename, kernel_cmdline,
- initrd_filename, cpu_model, 0);
+ initrd_filename, cpu_model, 0, 1);
}
static QEMUMachine pc_machine = {
@@ -222,7 +240,7 @@ static QEMUMachine pc_machine = {
static QEMUMachine pc_machine_v0_13 = {
.name = "pc-0.13",
.desc = "Standard PC",
- .init = pc_init_pci,
+ .init = pc_init_pci_no_kvmclock,
.max_cpus = 255,
.compat_props = (GlobalProperty[]) {
{
@@ -249,7 +267,7 @@ static QEMUMachine pc_machine_v0_13 = {
static QEMUMachine pc_machine_v0_12 = {
.name = "pc-0.12",
.desc = "Standard PC",
- .init = pc_init_pci,
+ .init = pc_init_pci_no_kvmclock,
.max_cpus = 255,
.compat_props = (GlobalProperty[]) {
{
@@ -280,7 +298,7 @@ static QEMUMachine pc_machine_v0_12 = {
static QEMUMachine pc_machine_v0_11 = {
.name = "pc-0.11",
.desc = "Standard PC, qemu 0.11",
- .init = pc_init_pci,
+ .init = pc_init_pci_no_kvmclock,
.max_cpus = 255,
.compat_props = (GlobalProperty[]) {
{
@@ -319,7 +337,7 @@ static QEMUMachine pc_machine_v0_11 = {
static QEMUMachine pc_machine_v0_10 = {
.name = "pc-0.10",
.desc = "Standard PC, qemu 0.10",
- .init = pc_init_pci,
+ .init = pc_init_pci_no_kvmclock,
.max_cpus = 255,
.compat_props = (GlobalProperty[]) {
{
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 488eedd2cd..b05e6547df 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -239,7 +239,7 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
r->status |= SCSI_REQ_STATUS_RETRY | type;
bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read);
- vm_stop(0);
+ vm_stop(VMSTOP_DISKFULL);
} else {
if (type == SCSI_REQ_STATUS_RETRY_READ) {
r->req.bus->complete(r->req.bus, SCSI_REASON_DATA, r->req.tag, 0);
diff --git a/hw/vga.c b/hw/vga.c
index e2151a2458..c22b8af833 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -28,7 +28,6 @@
#include "vga_int.h"
#include "pixel_ops.h"
#include "qemu-timer.h"
-#include "kvm.h"
//#define DEBUG_VGA
//#define DEBUG_VGA_MEM
@@ -1573,34 +1572,36 @@ static void vga_sync_dirty_bitmap(VGACommonState *s)
void vga_dirty_log_start(VGACommonState *s)
{
- if (kvm_enabled() && s->map_addr)
- kvm_log_start(s->map_addr, s->map_end - s->map_addr);
+ if (s->map_addr) {
+ cpu_physical_log_start(s->map_addr, s->map_end - s->map_addr);
+ }
- if (kvm_enabled() && s->lfb_vram_mapped) {
- kvm_log_start(isa_mem_base + 0xa0000, 0x8000);
- kvm_log_start(isa_mem_base + 0xa8000, 0x8000);
+ if (s->lfb_vram_mapped) {
+ cpu_physical_log_start(isa_mem_base + 0xa0000, 0x8000);
+ cpu_physical_log_start(isa_mem_base + 0xa8000, 0x8000);
}
#ifdef CONFIG_BOCHS_VBE
- if (kvm_enabled() && s->vbe_mapped) {
- kvm_log_start(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
+ if (s->vbe_mapped) {
+ cpu_physical_log_start(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
}
#endif
}
void vga_dirty_log_stop(VGACommonState *s)
{
- if (kvm_enabled() && s->map_addr)
- kvm_log_stop(s->map_addr, s->map_end - s->map_addr);
+ if (s->map_addr) {
+ cpu_physical_log_stop(s->map_addr, s->map_end - s->map_addr);
+ }
- if (kvm_enabled() && s->lfb_vram_mapped) {
- kvm_log_stop(isa_mem_base + 0xa0000, 0x8000);
- kvm_log_stop(isa_mem_base + 0xa8000, 0x8000);
+ if (s->lfb_vram_mapped) {
+ cpu_physical_log_stop(isa_mem_base + 0xa0000, 0x8000);
+ cpu_physical_log_stop(isa_mem_base + 0xa8000, 0x8000);
}
#ifdef CONFIG_BOCHS_VBE
- if (kvm_enabled() && s->vbe_mapped) {
- kvm_log_stop(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
+ if (s->vbe_mapped) {
+ cpu_physical_log_stop(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
}
#endif
}
diff --git a/hw/vhost.c b/hw/vhost.c
index 38cc3b365b..0ca3507f44 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -607,6 +607,8 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
hdev->client.set_memory = vhost_client_set_memory;
hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
hdev->client.migration_log = vhost_client_migration_log;
+ hdev->client.log_start = NULL;
+ hdev->client.log_stop = NULL;
hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions));
hdev->log = NULL;
hdev->log_size = 0;
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index ffac5a4d8f..b14fb995e8 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -78,7 +78,7 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
req->next = s->rq;
s->rq = req;
bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read);
- vm_stop(0);
+ vm_stop(VMSTOP_DISKFULL);
} else {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
bdrv_mon_event(s->bs, BDRV_ACTION_REPORT, is_read);
diff --git a/hw/watchdog.c b/hw/watchdog.c
index e9dd56e229..1c900a1189 100644
--- a/hw/watchdog.c
+++ b/hw/watchdog.c
@@ -132,7 +132,7 @@ void watchdog_perform_action(void)
case WDT_PAUSE: /* same as 'stop' command in monitor */
watchdog_mon_event("pause");
- vm_stop(0);
+ vm_stop(VMSTOP_WATCHDOG);
break;
case WDT_DEBUG:
diff --git a/kvm-all.c b/kvm-all.c
index 2ec9e0980d..e6a7de4722 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -78,7 +78,7 @@ struct KVMState
int many_ioeventfds;
};
-static KVMState *kvm_state;
+KVMState *kvm_state;
static const KVMCapabilityInfo kvm_required_capabilites[] = {
KVM_CAP_INFO(USER_MEMORY),
@@ -91,10 +91,6 @@ static KVMSlot *kvm_alloc_slot(KVMState *s)
int i;
for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
- /* KVM private memory slots */
- if (i >= 8 && i < 12) {
- continue;
- }
if (s->slots[i].memory_size == 0) {
return &s->slots[i];
}
@@ -199,7 +195,6 @@ int kvm_pit_in_kernel(void)
return kvm_state->pit_in_kernel;
}
-
int kvm_init_vcpu(CPUState *env)
{
KVMState *s = kvm_state;
@@ -219,6 +214,7 @@ int kvm_init_vcpu(CPUState *env)
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size < 0) {
+ ret = mmap_size;
DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
goto err;
}
@@ -278,13 +274,15 @@ static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
return kvm_set_user_memory_region(s, mem);
}
-int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
+static int kvm_log_start(CPUPhysMemoryClient *client,
+ target_phys_addr_t phys_addr, ram_addr_t size)
{
return kvm_dirty_pages_log_change(phys_addr, size, KVM_MEM_LOG_DIRTY_PAGES,
KVM_MEM_LOG_DIRTY_PAGES);
}
-int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
+static int kvm_log_stop(CPUPhysMemoryClient *client,
+ target_phys_addr_t phys_addr, ram_addr_t size)
{
return kvm_dirty_pages_log_change(phys_addr, size, 0,
KVM_MEM_LOG_DIRTY_PAGES);
@@ -648,6 +646,8 @@ static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
.set_memory = kvm_client_set_memory,
.sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
.migration_log = kvm_client_migration_log,
+ .log_start = kvm_log_start,
+ .log_stop = kvm_log_stop,
};
int kvm_init(void)
@@ -774,8 +774,8 @@ err:
return ret;
}
-static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
- uint32_t count)
+static void kvm_handle_io(uint16_t port, void *data, int direction, int size,
+ uint32_t count)
{
int i;
uint8_t *ptr = data;
@@ -809,8 +809,6 @@ static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
ptr += size;
}
-
- return 1;
}
#ifdef KVM_CAP_INTERNAL_ERROR_DATA
@@ -895,29 +893,34 @@ int kvm_cpu_exec(CPUState *env)
DPRINTF("kvm_cpu_exec()\n");
- do {
-#ifndef CONFIG_IOTHREAD
- if (env->exit_request) {
- DPRINTF("interrupt exit requested\n");
- ret = 0;
- break;
- }
-#endif
+ if (kvm_arch_process_irqchip_events(env)) {
+ env->exit_request = 0;
+ return EXCP_HLT;
+ }
- if (kvm_arch_process_irqchip_events(env)) {
- ret = 0;
- break;
- }
+ cpu_single_env = env;
+ do {
if (env->kvm_vcpu_dirty) {
kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
env->kvm_vcpu_dirty = 0;
}
kvm_arch_pre_run(env, run);
+ if (env->exit_request) {
+ DPRINTF("interrupt exit requested\n");
+ /*
+ * KVM requires us to reenter the kernel after IO exits to complete
+ * instruction emulation. This self-signal will ensure that we
+ * leave ASAP again.
+ */
+ qemu_cpu_kick_self();
+ }
cpu_single_env = NULL;
qemu_mutex_unlock_iothread();
+
ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
+
qemu_mutex_lock_iothread();
cpu_single_env = env;
kvm_arch_post_run(env, run);
@@ -925,7 +928,6 @@ int kvm_cpu_exec(CPUState *env)
kvm_flush_coalesced_mmio_buffer();
if (ret == -EINTR || ret == -EAGAIN) {
- cpu_exit(env);
DPRINTF("io window exit\n");
ret = 0;
break;
@@ -940,11 +942,12 @@ int kvm_cpu_exec(CPUState *env)
switch (run->exit_reason) {
case KVM_EXIT_IO:
DPRINTF("handle_io\n");
- ret = kvm_handle_io(run->io.port,
- (uint8_t *)run + run->io.data_offset,
- run->io.direction,
- run->io.size,
- run->io.count);
+ kvm_handle_io(run->io.port,
+ (uint8_t *)run + run->io.data_offset,
+ run->io.direction,
+ run->io.size,
+ run->io.count);
+ ret = 1;
break;
case KVM_EXIT_MMIO:
DPRINTF("handle_mmio\n");
@@ -960,7 +963,6 @@ int kvm_cpu_exec(CPUState *env)
case KVM_EXIT_SHUTDOWN:
DPRINTF("shutdown\n");
qemu_system_reset_request();
- ret = 1;
break;
case KVM_EXIT_UNKNOWN:
fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
@@ -976,8 +978,8 @@ int kvm_cpu_exec(CPUState *env)
DPRINTF("kvm_exit_debug\n");
#ifdef KVM_CAP_SET_GUEST_DEBUG
if (kvm_arch_debug(&run->debug.arch)) {
- env->exception_index = EXCP_DEBUG;
- return 0;
+ ret = EXCP_DEBUG;
+ goto out;
}
/* re-enter, this exception was guest-internal */
ret = 1;
@@ -992,14 +994,13 @@ int kvm_cpu_exec(CPUState *env)
if (ret < 0) {
cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
- vm_stop(0);
- env->exit_request = 1;
- }
- if (env->exit_request) {
- env->exit_request = 0;
- env->exception_index = EXCP_INTERRUPT;
+ vm_stop(VMSTOP_PANIC);
}
+ ret = EXCP_INTERRUPT;
+out:
+ env->exit_request = 0;
+ cpu_single_env = NULL;
return ret;
}
@@ -1365,3 +1366,13 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
return -ENOSYS;
#endif
}
+
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+ return kvm_arch_on_sigbus_vcpu(env, code, addr);
+}
+
+int kvm_on_sigbus(int code, void *addr)
+{
+ return kvm_arch_on_sigbus(code, addr);
+}
diff --git a/kvm-stub.c b/kvm-stub.c
index 88682f288b..30f6ec3956 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -33,16 +33,6 @@ int kvm_init_vcpu(CPUState *env)
return -ENOSYS;
}
-int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
-{
- return -ENOSYS;
-}
-
-int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
-{
- return -ENOSYS;
-}
-
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
{
return -ENOSYS;
@@ -147,6 +137,11 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign)
return -ENOSYS;
}
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+ return 1;
+}
+
int kvm_on_sigbus(int code, void *addr)
{
return 1;
diff --git a/kvm.h b/kvm.h
index ca57517af2..59b2c29fd9 100644
--- a/kvm.h
+++ b/kvm.h
@@ -58,9 +58,6 @@ int kvm_init_vcpu(CPUState *env);
int kvm_cpu_exec(CPUState *env);
#if !defined(CONFIG_USER_ONLY)
-int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size);
-int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size);
-
void kvm_setup_guest_memory(void *start, size_t size);
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
@@ -81,10 +78,14 @@ int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset);
int kvm_pit_in_kernel(void);
int kvm_irqchip_in_kernel(void);
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr);
+int kvm_on_sigbus(int code, void *addr);
+
/* internal API */
struct KVMState;
typedef struct KVMState KVMState;
+extern KVMState *kvm_state;
int kvm_ioctl(KVMState *s, int type, ...);
@@ -96,12 +97,11 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...);
extern const KVMCapabilityInfo kvm_arch_required_capabilities[];
-int kvm_arch_post_run(CPUState *env, struct kvm_run *run);
+void kvm_arch_pre_run(CPUState *env, struct kvm_run *run);
+void kvm_arch_post_run(CPUState *env, struct kvm_run *run);
int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run);
-int kvm_arch_pre_run(CPUState *env, struct kvm_run *run);
-
int kvm_arch_process_irqchip_events(CPUState *env);
int kvm_arch_get_registers(CPUState *env);
@@ -121,8 +121,8 @@ int kvm_arch_init_vcpu(CPUState *env);
void kvm_arch_reset_vcpu(CPUState *env);
-int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr);
-int kvm_on_sigbus(int code, void *addr);
+int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr);
+int kvm_arch_on_sigbus(int code, void *addr);
struct kvm_guest_debug;
struct kvm_debug_exit_arch;
diff --git a/migration.c b/migration.c
index 36125720a4..af3a1f2702 100644
--- a/migration.c
+++ b/migration.c
@@ -378,7 +378,7 @@ void migrate_fd_put_ready(void *opaque)
int old_vm_running = vm_running;
DPRINTF("done iterating\n");
- vm_stop(0);
+ vm_stop(VMSTOP_MIGRATE);
if ((qemu_savevm_state_complete(s->mon, s->file)) < 0) {
if (old_vm_running) {
diff --git a/monitor.c b/monitor.c
index 7fc311d720..22ae3bbff0 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1255,7 +1255,7 @@ static void do_singlestep(Monitor *mon, const QDict *qdict)
*/
static int do_stop(Monitor *mon, const QDict *qdict, QObject **ret_data)
{
- vm_stop(EXCP_INTERRUPT);
+ vm_stop(VMSTOP_USER);
return 0;
}
@@ -2783,7 +2783,7 @@ static void do_loadvm(Monitor *mon, const QDict *qdict)
int saved_vm_running = vm_running;
const char *name = qdict_get_str(qdict, "name");
- vm_stop(0);
+ vm_stop(VMSTOP_LOADVM);
if (load_vmstate(name) == 0 && saved_vm_running) {
vm_start();
diff --git a/qemu-common.h b/qemu-common.h
index c7ff280b95..a4d9c21a39 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -288,6 +288,7 @@ void qemu_notify_event(void);
/* Unblock cpu */
void qemu_cpu_kick(void *env);
+void qemu_cpu_kick_self(void);
int qemu_cpu_self(void *env);
/* work queue */
diff --git a/savevm.c b/savevm.c
index 6d83b0f4dc..a50fd31154 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1575,7 +1575,7 @@ static int qemu_savevm_state(Monitor *mon, QEMUFile *f)
int ret;
saved_vm_running = vm_running;
- vm_stop(0);
+ vm_stop(VMSTOP_SAVEVM);
if (qemu_savevm_state_blocked(mon)) {
ret = -EINVAL;
@@ -1904,7 +1904,7 @@ void do_savevm(Monitor *mon, const QDict *qdict)
}
saved_vm_running = vm_running;
- vm_stop(0);
+ vm_stop(VMSTOP_SAVEVM);
memset(sn, 0, sizeof(*sn));
diff --git a/sysemu.h b/sysemu.h
index 23ae17e2e9..0a83ab9e56 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -37,6 +37,16 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
void *opaque);
void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
+#define VMSTOP_USER 0
+#define VMSTOP_DEBUG 1
+#define VMSTOP_SHUTDOWN 2
+#define VMSTOP_DISKFULL 3
+#define VMSTOP_WATCHDOG 4
+#define VMSTOP_PANIC 5
+#define VMSTOP_SAVEVM 6
+#define VMSTOP_LOADVM 7
+#define VMSTOP_MIGRATE 8
+
void vm_start(void);
void vm_stop(int reason);
@@ -51,6 +61,8 @@ void cpu_disable_ticks(void);
void qemu_system_reset_request(void);
void qemu_system_shutdown_request(void);
void qemu_system_powerdown_request(void);
+void qemu_system_debug_request(void);
+void qemu_system_vmstop_request(int reason);
int qemu_shutdown_requested(void);
int qemu_reset_requested(void);
int qemu_powerdown_requested(void);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index af701a4412..5f1df8b4d3 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -734,6 +734,7 @@ typedef struct CPUX86State {
uint32_t sipi_vector;
uint32_t cpuid_kvm_features;
uint32_t cpuid_svm_features;
+ bool tsc_valid;
/* in order to simplify APIC support, we leave this pointer to the
user */
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 05010bbc38..0aa0a410da 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -301,6 +301,15 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
#endif
}
+static void cpu_update_state(void *opaque, int running, int reason)
+{
+ CPUState *env = opaque;
+
+ if (running) {
+ env->tsc_valid = false;
+ }
+}
+
int kvm_arch_init_vcpu(CPUState *env)
{
struct {
@@ -434,6 +443,8 @@ int kvm_arch_init_vcpu(CPUState *env)
}
#endif
+ qemu_add_vm_change_state_handler(cpu_update_state, env);
+
return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
}
@@ -1061,7 +1072,12 @@ static int kvm_get_msrs(CPUState *env)
if (has_msr_hsave_pa) {
msrs[n++].index = MSR_VM_HSAVE_PA;
}
- msrs[n++].index = MSR_IA32_TSC;
+
+ if (!env->tsc_valid) {
+ msrs[n++].index = MSR_IA32_TSC;
+ env->tsc_valid = !vm_running;
+ }
+
#ifdef TARGET_X86_64
if (lm_capable_kernel) {
msrs[n++].index = MSR_CSTAR;
@@ -1424,49 +1440,65 @@ int kvm_arch_get_registers(CPUState *env)
return 0;
}
-int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
+void kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
{
+ int ret;
+
/* Inject NMI */
if (env->interrupt_request & CPU_INTERRUPT_NMI) {
env->interrupt_request &= ~CPU_INTERRUPT_NMI;
DPRINTF("injected NMI\n");
- kvm_vcpu_ioctl(env, KVM_NMI);
- }
-
- /* Try to inject an interrupt if the guest can accept it */
- if (run->ready_for_interrupt_injection &&
- (env->interrupt_request & CPU_INTERRUPT_HARD) &&
- (env->eflags & IF_MASK)) {
- int irq;
-
- env->interrupt_request &= ~CPU_INTERRUPT_HARD;
- irq = cpu_get_pic_interrupt(env);
- if (irq >= 0) {
- struct kvm_interrupt intr;
- intr.irq = irq;
- /* FIXME: errors */
- DPRINTF("injected interrupt %d\n", irq);
- kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
+ ret = kvm_vcpu_ioctl(env, KVM_NMI);
+ if (ret < 0) {
+ fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
+ strerror(-ret));
}
}
- /* If we have an interrupt but the guest is not ready to receive an
- * interrupt, request an interrupt window exit. This will
- * cause a return to userspace as soon as the guest is ready to
- * receive interrupts. */
- if ((env->interrupt_request & CPU_INTERRUPT_HARD)) {
- run->request_interrupt_window = 1;
- } else {
- run->request_interrupt_window = 0;
- }
+ if (!kvm_irqchip_in_kernel()) {
+ /* Force the VCPU out of its inner loop to process the INIT request */
+ if (env->interrupt_request & CPU_INTERRUPT_INIT) {
+ env->exit_request = 1;
+ }
- DPRINTF("setting tpr\n");
- run->cr8 = cpu_get_apic_tpr(env->apic_state);
+ /* Try to inject an interrupt if the guest can accept it */
+ if (run->ready_for_interrupt_injection &&
+ (env->interrupt_request & CPU_INTERRUPT_HARD) &&
+ (env->eflags & IF_MASK)) {
+ int irq;
+
+ env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+ irq = cpu_get_pic_interrupt(env);
+ if (irq >= 0) {
+ struct kvm_interrupt intr;
+
+ intr.irq = irq;
+ DPRINTF("injected interrupt %d\n", irq);
+ ret = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
+ if (ret < 0) {
+ fprintf(stderr,
+ "KVM: injection failed, interrupt lost (%s)\n",
+ strerror(-ret));
+ }
+ }
+ }
- return 0;
+ /* If we have an interrupt but the guest is not ready to receive an
+ * interrupt, request an interrupt window exit. This will
+ * cause a return to userspace as soon as the guest is ready to
+ * receive interrupts. */
+ if ((env->interrupt_request & CPU_INTERRUPT_HARD)) {
+ run->request_interrupt_window = 1;
+ } else {
+ run->request_interrupt_window = 0;
+ }
+
+ DPRINTF("setting tpr\n");
+ run->cr8 = cpu_get_apic_tpr(env->apic_state);
+ }
}
-int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
+void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
{
if (run->if_flag) {
env->eflags |= IF_MASK;
@@ -1475,18 +1507,21 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
}
cpu_set_apic_tpr(env->apic_state, run->cr8);
cpu_set_apic_base(env->apic_state, run->apic_base);
-
- return 0;
}
int kvm_arch_process_irqchip_events(CPUState *env)
{
+ if (kvm_irqchip_in_kernel()) {
+ return 0;
+ }
+
+ if (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI)) {
+ env->halted = 0;
+ }
if (env->interrupt_request & CPU_INTERRUPT_INIT) {
kvm_cpu_synchronize_state(env);
do_cpu_init(env);
- env->exception_index = EXCP_HALTED;
}
-
if (env->interrupt_request & CPU_INTERRUPT_SIPI) {
kvm_cpu_synchronize_state(env);
do_cpu_sipi(env);
@@ -1501,7 +1536,6 @@ static int kvm_handle_halt(CPUState *env)
(env->eflags & IF_MASK)) &&
!(env->interrupt_request & CPU_INTERRUPT_NMI)) {
env->halted = 1;
- env->exception_index = EXCP_HLT;
return 0;
}
@@ -1839,7 +1873,7 @@ static void kvm_mce_inj_srao_memscrub2(CPUState *env, target_phys_addr_t paddr)
#endif
-int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
{
#if defined(KVM_CAP_MCE)
void *vaddr;
@@ -1889,7 +1923,7 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
return 0;
}
-int kvm_on_sigbus(int code, void *addr)
+int kvm_arch_on_sigbus(int code, void *addr)
{
#if defined(KVM_CAP_MCE)
if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 710eca1dca..bd4012a4ec 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -256,14 +256,12 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
return 0;
}
-int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
+void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
{
- return 0;
}
-int kvm_arch_process_irqchip_events(CPUState *env)
+void kvm_arch_process_irqchip_events(CPUState *env)
{
- return 0;
}
static int kvmppc_handle_halt(CPUState *env)
@@ -404,3 +402,13 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env)
{
return true;
}
+
+int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+ return 1;
+}
+
+int kvm_arch_on_sigbus(int code, void *addr)
+{
+ return 1;
+}
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 38823f54f7..b349812dba 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -169,14 +169,12 @@ int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
return 0;
}
-int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
+void kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
{
- return 0;
}
-int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
+void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
{
- return 0;
}
int kvm_arch_process_irqchip_events(CPUState *env)
@@ -505,3 +503,13 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env)
{
return true;
}
+
+int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+ return 1;
+}
+
+int kvm_arch_on_sigbus(int code, void *addr)
+{
+ return 1;
+}
diff --git a/vl.c b/vl.c
index ed2cdfae42..b436952d6c 100644
--- a/vl.c
+++ b/vl.c
@@ -1217,8 +1217,8 @@ static QTAILQ_HEAD(reset_handlers, QEMUResetEntry) reset_handlers =
static int reset_requested;
static int shutdown_requested;
static int powerdown_requested;
-int debug_requested;
-int vmstop_requested;
+static int debug_requested;
+static int vmstop_requested;
int qemu_shutdown_requested(void)
{
@@ -1296,6 +1296,7 @@ void qemu_system_reset_request(void)
} else {
reset_requested = 1;
}
+ cpu_stop_current();
qemu_notify_event();
}
@@ -1311,6 +1312,18 @@ void qemu_system_powerdown_request(void)
qemu_notify_event();
}
+void qemu_system_debug_request(void)
+{
+ debug_requested = 1;
+ qemu_notify_event();
+}
+
+void qemu_system_vmstop_request(int reason)
+{
+ vmstop_requested = reason;
+ qemu_notify_event();
+}
+
void main_loop_wait(int nonblocking)
{
IOHandlerRecord *ioh;
@@ -1388,52 +1401,51 @@ void main_loop_wait(int nonblocking)
}
-static int vm_can_run(void)
+#ifndef CONFIG_IOTHREAD
+static int vm_request_pending(void)
{
- if (powerdown_requested)
- return 0;
- if (reset_requested)
- return 0;
- if (shutdown_requested)
- return 0;
- if (debug_requested)
- return 0;
- return 1;
+ return powerdown_requested ||
+ reset_requested ||
+ shutdown_requested ||
+ debug_requested ||
+ vmstop_requested;
}
+#endif
qemu_irq qemu_system_powerdown;
static void main_loop(void)
{
+ bool nonblocking = false;
+#ifdef CONFIG_PROFILER
+ int64_t ti;
+#endif
int r;
qemu_main_loop_start();
for (;;) {
- do {
- bool nonblocking = false;
-#ifdef CONFIG_PROFILER
- int64_t ti;
-#endif
#ifndef CONFIG_IOTHREAD
- nonblocking = cpu_exec_all();
+ nonblocking = cpu_exec_all();
+ if (vm_request_pending()) {
+ nonblocking = true;
+ }
#endif
#ifdef CONFIG_PROFILER
- ti = profile_getclock();
+ ti = profile_getclock();
#endif
- main_loop_wait(nonblocking);
+ main_loop_wait(nonblocking);
#ifdef CONFIG_PROFILER
- dev_time += profile_getclock() - ti;
+ dev_time += profile_getclock() - ti;
#endif
- } while (vm_can_run());
- if ((r = qemu_debug_requested())) {
- vm_stop(r);
+ if (qemu_debug_requested()) {
+ vm_stop(VMSTOP_DEBUG);
}
if (qemu_shutdown_requested()) {
monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
if (no_shutdown) {
- vm_stop(0);
+ vm_stop(VMSTOP_SHUTDOWN);
no_shutdown = 0;
} else
break;