aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2017-03-03 16:41:09 +0000
committerPeter Maydell <peter.maydell@linaro.org>2017-03-03 16:41:09 +0000
commit5febe7671f5ec0a6842d64edfb920feb7bbb5f1e (patch)
tree4983379122a9105f30b12f57c0449e3d1e684b5c
parent5b10b94bd53229540b088342015d69bc5ef2cc1d (diff)
parentf6eb0b319e4bad3d01d74d71e3a6cf40f0ede720 (diff)
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* kernel header update (requested by David and Vijay) * GuestPanicInformation fixups (Anton) * record/replay icount fixes (Pavel) * cpu-exec cleanup, unification of icount_decr with tcg_exit_req (me) * KVM_CAP_IMMEDIATE_EXIT support (me) * vmxcap update (me) * iscsi locking fix (me) * VFIO ram device fix (Yongji) * scsi-hd vs. default CD-ROM (Hervé) * SMI migration fix (Dave) * spice-char segfault (Li Qiang) * improved "info mtree -f" (me) # gpg: Signature made Fri 03 Mar 2017 15:43:04 GMT # gpg: using RSA key 0xBFFBD25F78C7AE83 # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * remotes/bonzini/tags/for-upstream: (21 commits) iscsi: fix missing unlock memory: show region offset and ROM/RAM type in "info mtree -f" x86: Work around SMI migration breakages spice-char: fix segfault in char_spice_finalize vl: disable default cdrom when using explicitely scsi-hd memory: Introduce DEVICE_HOST_ENDIAN for ram device qmp-events: fix GUEST_PANICKED description formatting qapi: flatten GuestPanicInformation union vmxcap: update for September 2016 SDM vmxcap: port to Python 3 KVM: use KVM_CAP_IMMEDIATE_EXIT kvm: use atomic_read/atomic_set to access cpu->exit_request KVM: move SIG_IPI handling to kvm-all.c KVM: do not use sigtimedwait to catch SIGBUS KVM: remove kvm_arch_on_sigbus cpus: reorganize signal handling code KVM: x86: cleanup SIGBUS handlers cpus: remove ugly cast on sigbus_handler cpu-exec: remove unnecessary check of cpu->exit_request replay: check icount in cpu exec loop ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block/iscsi.c4
-rw-r--r--cpu-exec.c93
-rw-r--r--cpus.c102
-rw-r--r--include/exec/cpu-common.h6
-rw-r--r--include/exec/gen-icount.h53
-rw-r--r--include/hw/i386/pc.h4
-rw-r--r--include/qemu/compatfd.h42
-rw-r--r--include/qemu/osdep.h37
-rw-r--r--include/qom/cpu.h15
-rw-r--r--include/sysemu/kvm.h11
-rw-r--r--kvm-all.c150
-rw-r--r--kvm-stub.c12
-rw-r--r--memory.c27
-rw-r--r--qapi-schema.json12
-rw-r--r--qapi/event.json4
-rw-r--r--qom/cpu.c2
-rwxr-xr-xscripts/kvm/vmxcap23
-rw-r--r--spice-qemu-char.c5
-rw-r--r--target/arm/kvm.c10
-rw-r--r--target/i386/cpu.c17
-rw-r--r--target/i386/cpu.h3
-rw-r--r--target/i386/kvm.c88
-rw-r--r--target/mips/kvm.c12
-rw-r--r--target/ppc/kvm.c10
-rw-r--r--target/s390x/kvm.c10
-rw-r--r--tcg/tcg.h1
-rw-r--r--translate-all.c2
-rw-r--r--translate-common.c13
-rw-r--r--util/compatfd.c1
-rw-r--r--util/main-loop.c5
-rw-r--r--util/oslib-posix.c33
-rw-r--r--vl.c13
32 files changed, 428 insertions, 392 deletions
diff --git a/block/iscsi.c b/block/iscsi.c
index 76319a1a6e..75d890538e 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -637,6 +637,7 @@ retry:
}
#endif
if (iTask.task == NULL) {
+ qemu_mutex_unlock(&iscsilun->mutex);
return -ENOMEM;
}
#if LIBISCSI_API_VERSION < (20160603)
@@ -864,6 +865,7 @@ retry:
}
#endif
if (iTask.task == NULL) {
+ qemu_mutex_unlock(&iscsilun->mutex);
return -ENOMEM;
}
#if LIBISCSI_API_VERSION < (20160603)
@@ -904,6 +906,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
retry:
if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
0, iscsi_co_generic_cb, &iTask) == NULL) {
+ qemu_mutex_unlock(&iscsilun->mutex);
return -ENOMEM;
}
@@ -1237,6 +1240,7 @@ retry:
0, 0, iscsi_co_generic_cb, &iTask);
}
if (iTask.task == NULL) {
+ qemu_mutex_unlock(&iscsilun->mutex);
return -ENOMEM;
}
diff --git a/cpu-exec.c b/cpu-exec.c
index 1a5ad4889d..d04dd91ebd 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -186,12 +186,6 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
cc->set_pc(cpu, last_tb->pc);
}
}
- if (tb_exit == TB_EXIT_REQUESTED) {
- /* We were asked to stop executing TBs (probably a pending
- * interrupt. We've now stopped, so clear the flag.
- */
- atomic_set(&cpu->tcg_exit_req, 0);
- }
return ret;
}
@@ -560,8 +554,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
qemu_mutex_unlock_iothread();
}
-
- if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
+ /* Finally, check if we need to exit to the main loop. */
+ if (unlikely(atomic_read(&cpu->exit_request)
+ || (use_icount && cpu->icount_decr.u16.low + cpu->icount_extra == 0))) {
atomic_set(&cpu->exit_request, 0);
cpu->exception_index = EXCP_INTERRUPT;
return true;
@@ -571,62 +566,54 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
}
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
- TranslationBlock **last_tb, int *tb_exit,
- SyncClocks *sc)
+ TranslationBlock **last_tb, int *tb_exit)
{
uintptr_t ret;
-
- if (unlikely(atomic_read(&cpu->exit_request))) {
- return;
- }
+ int32_t insns_left;
trace_exec_tb(tb, tb->pc);
ret = cpu_tb_exec(cpu, tb);
tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
*tb_exit = ret & TB_EXIT_MASK;
- switch (*tb_exit) {
- case TB_EXIT_REQUESTED:
+ if (*tb_exit != TB_EXIT_REQUESTED) {
+ *last_tb = tb;
+ return;
+ }
+
+ *last_tb = NULL;
+ insns_left = atomic_read(&cpu->icount_decr.u32);
+ atomic_set(&cpu->icount_decr.u16.high, 0);
+ if (insns_left < 0) {
/* Something asked us to stop executing chained TBs; just
* continue round the main loop. Whatever requested the exit
- * will also have set something else (eg interrupt_request)
- * which we will handle next time around the loop. But we
- * need to ensure the tcg_exit_req read in generated code
- * comes before the next read of cpu->exit_request or
- * cpu->interrupt_request.
+ * will also have set something else (eg exit_request or
+ * interrupt_request) which we will handle next time around
+ * the loop. But we need to ensure the zeroing of icount_decr
+ * comes before the next read of cpu->exit_request
+ * or cpu->interrupt_request.
*/
smp_mb();
- *last_tb = NULL;
- break;
- case TB_EXIT_ICOUNT_EXPIRED:
- {
- /* Instruction counter expired. */
-#ifdef CONFIG_USER_ONLY
- abort();
-#else
- int insns_left = cpu->icount_decr.u32;
- *last_tb = NULL;
- if (cpu->icount_extra && insns_left >= 0) {
- /* Refill decrementer and continue execution. */
- cpu->icount_extra += insns_left;
- insns_left = MIN(0xffff, cpu->icount_extra);
- cpu->icount_extra -= insns_left;
- cpu->icount_decr.u16.low = insns_left;
- } else {
- if (insns_left > 0) {
- /* Execute remaining instructions. */
- cpu_exec_nocache(cpu, insns_left, tb, false);
- align_clocks(sc, cpu);
- }
- cpu->exception_index = EXCP_INTERRUPT;
- cpu_loop_exit(cpu);
- }
- break;
-#endif
+ return;
}
- default:
- *last_tb = tb;
- break;
+
+ /* Instruction counter expired. */
+ assert(use_icount);
+#ifndef CONFIG_USER_ONLY
+ if (cpu->icount_extra) {
+ /* Refill decrementer and continue execution. */
+ cpu->icount_extra += insns_left;
+ insns_left = MIN(0xffff, cpu->icount_extra);
+ cpu->icount_extra -= insns_left;
+ cpu->icount_decr.u16.low = insns_left;
+ } else {
+ /* Execute any remaining instructions, then let the main loop
+ * handle the next event.
+ */
+ if (insns_left > 0) {
+ cpu_exec_nocache(cpu, insns_left, tb, false);
+ }
}
+#endif
}
/* main execution loop */
@@ -635,7 +622,7 @@ int cpu_exec(CPUState *cpu)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
int ret;
- SyncClocks sc;
+ SyncClocks sc = { 0 };
/* replay_interrupt may need current_cpu */
current_cpu = cpu;
@@ -683,7 +670,7 @@ int cpu_exec(CPUState *cpu)
while (!cpu_handle_interrupt(cpu, &last_tb)) {
TranslationBlock *tb = tb_find(cpu, last_tb, tb_exit);
- cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
+ cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
/* Try to align the host and virtual clocks
if the guest is in advance */
align_clocks(&sc, cpu);
diff --git a/cpus.c b/cpus.c
index 8200ac6b75..c857ad2957 100644
--- a/cpus.c
+++ b/cpus.c
@@ -51,10 +51,6 @@
#include "hw/nmi.h"
#include "sysemu/replay.h"
-#ifndef _WIN32
-#include "qemu/compatfd.h"
-#endif
-
#ifdef CONFIG_LINUX
#include <sys/prctl.h>
@@ -924,13 +920,23 @@ static void sigbus_reraise(void)
abort();
}
-static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
- void *ctx)
+static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
{
- if (kvm_on_sigbus(siginfo->ssi_code,
- (void *)(intptr_t)siginfo->ssi_addr)) {
+ if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
sigbus_reraise();
}
+
+ if (current_cpu) {
+ /* Called asynchronously in VCPU thread. */
+ if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
+ sigbus_reraise();
+ }
+ } else {
+ /* Called synchronously (via signalfd) in main thread. */
+ if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
+ sigbus_reraise();
+ }
+ }
}
static void qemu_init_sigbus(void)
@@ -939,92 +945,17 @@ static void qemu_init_sigbus(void)
memset(&action, 0, sizeof(action));
action.sa_flags = SA_SIGINFO;
- action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
+ action.sa_sigaction = sigbus_handler;
sigaction(SIGBUS, &action, NULL);
prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
}
-
-static void qemu_kvm_eat_signals(CPUState *cpu)
-{
- struct timespec ts = { 0, 0 };
- siginfo_t siginfo;
- sigset_t waitset;
- sigset_t chkset;
- int r;
-
- sigemptyset(&waitset);
- sigaddset(&waitset, SIG_IPI);
- sigaddset(&waitset, SIGBUS);
-
- do {
- r = sigtimedwait(&waitset, &siginfo, &ts);
- if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
- perror("sigtimedwait");
- exit(1);
- }
-
- switch (r) {
- case SIGBUS:
- if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
- sigbus_reraise();
- }
- break;
- default:
- break;
- }
-
- r = sigpending(&chkset);
- if (r == -1) {
- perror("sigpending");
- exit(1);
- }
- } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
-}
-
#else /* !CONFIG_LINUX */
-
static void qemu_init_sigbus(void)
{
}
-
-static void qemu_kvm_eat_signals(CPUState *cpu)
-{
-}
#endif /* !CONFIG_LINUX */
-#ifndef _WIN32
-static void dummy_signal(int sig)
-{
-}
-
-static void qemu_kvm_init_cpu_signals(CPUState *cpu)
-{
- int r;
- sigset_t set;
- struct sigaction sigact;
-
- memset(&sigact, 0, sizeof(sigact));
- sigact.sa_handler = dummy_signal;
- sigaction(SIG_IPI, &sigact, NULL);
-
- pthread_sigmask(SIG_BLOCK, NULL, &set);
- sigdelset(&set, SIG_IPI);
- sigdelset(&set, SIGBUS);
- r = kvm_set_signal_mask(cpu, &set);
- if (r) {
- fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
- exit(1);
- }
-}
-
-#else /* _WIN32 */
-static void qemu_kvm_init_cpu_signals(CPUState *cpu)
-{
- abort();
-}
-#endif /* _WIN32 */
-
static QemuMutex qemu_global_mutex;
static QemuThread io_thread;
@@ -1099,7 +1030,6 @@ static void qemu_kvm_wait_io_event(CPUState *cpu)
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
- qemu_kvm_eat_signals(cpu);
qemu_wait_io_event_common(cpu);
}
@@ -1122,7 +1052,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
exit(1);
}
- qemu_kvm_init_cpu_signals(cpu);
+ kvm_init_cpu_signals(cpu);
/* signal CPU creation */
cpu->created = true;
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 8c305aa4fa..b62f0d82e4 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -36,6 +36,12 @@ enum device_endian {
DEVICE_LITTLE_ENDIAN,
};
+#if defined(HOST_WORDS_BIGENDIAN)
+#define DEVICE_HOST_ENDIAN DEVICE_BIG_ENDIAN
+#else
+#define DEVICE_HOST_ENDIAN DEVICE_LITTLE_ENDIAN
+#endif
+
/* address in the RAM (different from a physical address) */
#if defined(CONFIG_XEN_BACKEND)
typedef uint64_t ram_addr_t;
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 050de59b38..62d462e494 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -6,58 +6,55 @@
/* Helpers for instruction counting code generation. */
static int icount_start_insn_idx;
-static TCGLabel *icount_label;
static TCGLabel *exitreq_label;
static inline void gen_tb_start(TranslationBlock *tb)
{
- TCGv_i32 count, flag, imm;
+ TCGv_i32 count, imm;
exitreq_label = gen_new_label();
- flag = tcg_temp_new_i32();
- tcg_gen_ld_i32(flag, cpu_env,
- offsetof(CPUState, tcg_exit_req) - ENV_OFFSET);
- tcg_gen_brcondi_i32(TCG_COND_NE, flag, 0, exitreq_label);
- tcg_temp_free_i32(flag);
-
- if (!(tb->cflags & CF_USE_ICOUNT)) {
- return;
+ if (tb->cflags & CF_USE_ICOUNT) {
+ count = tcg_temp_local_new_i32();
+ } else {
+ count = tcg_temp_new_i32();
}
- icount_label = gen_new_label();
- count = tcg_temp_local_new_i32();
tcg_gen_ld_i32(count, cpu_env,
-ENV_OFFSET + offsetof(CPUState, icount_decr.u32));
- imm = tcg_temp_new_i32();
- /* We emit a movi with a dummy immediate argument. Keep the insn index
- * of the movi so that we later (when we know the actual insn count)
- * can update the immediate argument with the actual insn count. */
- icount_start_insn_idx = tcg_op_buf_count();
- tcg_gen_movi_i32(imm, 0xdeadbeef);
+ if (tb->cflags & CF_USE_ICOUNT) {
+ imm = tcg_temp_new_i32();
+ /* We emit a movi with a dummy immediate argument. Keep the insn index
+ * of the movi so that we later (when we know the actual insn count)
+ * can update the immediate argument with the actual insn count. */
+ icount_start_insn_idx = tcg_op_buf_count();
+ tcg_gen_movi_i32(imm, 0xdeadbeef);
+
+ tcg_gen_sub_i32(count, count, imm);
+ tcg_temp_free_i32(imm);
+ }
+
+ tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, exitreq_label);
- tcg_gen_sub_i32(count, count, imm);
- tcg_temp_free_i32(imm);
+ if (tb->cflags & CF_USE_ICOUNT) {
+ tcg_gen_st16_i32(count, cpu_env,
+ -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low));
+ }
- tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label);
- tcg_gen_st16_i32(count, cpu_env,
- -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low));
tcg_temp_free_i32(count);
}
static void gen_tb_end(TranslationBlock *tb, int num_insns)
{
- gen_set_label(exitreq_label);
- tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED);
-
if (tb->cflags & CF_USE_ICOUNT) {
/* Update the num_insn immediate parameter now that we know
* the actual insn count. */
tcg_set_insn_param(icount_start_insn_idx, 1, num_insns);
- gen_set_label(icount_label);
- tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_ICOUNT_EXPIRED);
}
+ gen_set_label(exitreq_label);
+ tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED);
+
/* Terminate the linked list. */
tcg_ctx.gen_op_buf[tcg_ctx.gen_op_buf[0].prev].next = 0;
}
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index d1f45540a1..ab303c7fee 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -623,6 +623,10 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
.driver = "Broadwell-noTSX" "-" TYPE_X86_CPU,\
.property = "xlevel",\
.value = stringify(0x8000000a),\
+ },{\
+ .driver = TYPE_X86_CPU,\
+ .property = "kvm-no-smi-migration",\
+ .value = "on",\
},
#define PC_COMPAT_2_2 \
diff --git a/include/qemu/compatfd.h b/include/qemu/compatfd.h
deleted file mode 100644
index aa12ee9364..0000000000
--- a/include/qemu/compatfd.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * signalfd/eventfd compatibility
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_COMPATFD_H
-#define QEMU_COMPATFD_H
-
-
-struct qemu_signalfd_siginfo {
- uint32_t ssi_signo; /* Signal number */
- int32_t ssi_errno; /* Error number (unused) */
- int32_t ssi_code; /* Signal code */
- uint32_t ssi_pid; /* PID of sender */
- uint32_t ssi_uid; /* Real UID of sender */
- int32_t ssi_fd; /* File descriptor (SIGIO) */
- uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */
- uint32_t ssi_band; /* Band event (SIGIO) */
- uint32_t ssi_overrun; /* POSIX timer overrun count */
- uint32_t ssi_trapno; /* Trap number that caused signal */
- int32_t ssi_status; /* Exit status or signal (SIGCHLD) */
- int32_t ssi_int; /* Integer sent by sigqueue(2) */
- uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */
- uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */
- uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */
- uint64_t ssi_addr; /* Address that generated signal
- (for hardware-generated signals) */
- uint8_t pad[48]; /* Pad size to 128 bytes (allow for
- additional fields in the future) */
-};
-
-int qemu_signalfd(const sigset_t *mask);
-
-#endif
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 56c9e22405..af37195fef 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -284,6 +284,15 @@ void qemu_anon_ram_free(void *ptr, size_t size);
#endif
+#if defined(CONFIG_LINUX)
+#ifndef BUS_MCEERR_AR
+#define BUS_MCEERR_AR 4
+#endif
+#ifndef BUS_MCEERR_AO
+#define BUS_MCEERR_AO 5
+#endif
+#endif
+
#if defined(__linux__) && \
(defined(__x86_64__) || defined(__arm__) || defined(__aarch64__))
/* Use 2 MiB alignment so transparent hugepages can be used by KVM.
@@ -297,6 +306,34 @@ void qemu_anon_ram_free(void *ptr, size_t size);
# define QEMU_VMALLOC_ALIGN getpagesize()
#endif
+#ifdef CONFIG_POSIX
+struct qemu_signalfd_siginfo {
+ uint32_t ssi_signo; /* Signal number */
+ int32_t ssi_errno; /* Error number (unused) */
+ int32_t ssi_code; /* Signal code */
+ uint32_t ssi_pid; /* PID of sender */
+ uint32_t ssi_uid; /* Real UID of sender */
+ int32_t ssi_fd; /* File descriptor (SIGIO) */
+ uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */
+ uint32_t ssi_band; /* Band event (SIGIO) */
+ uint32_t ssi_overrun; /* POSIX timer overrun count */
+ uint32_t ssi_trapno; /* Trap number that caused signal */
+ int32_t ssi_status; /* Exit status or signal (SIGCHLD) */
+ int32_t ssi_int; /* Integer sent by sigqueue(2) */
+ uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */
+ uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */
+ uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */
+ uint64_t ssi_addr; /* Address that generated signal
+ (for hardware-generated signals) */
+ uint8_t pad[48]; /* Pad size to 128 bytes (allow for
+ additional fields in the future) */
+};
+
+int qemu_signalfd(const sigset_t *mask);
+void sigaction_invoke(struct sigaction *action,
+ struct qemu_signalfd_siginfo *info);
+#endif
+
int qemu_madvise(void *addr, size_t len, int advice);
int qemu_open(const char *name, int flags, ...);
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 3e61c880da..c3292efe1c 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -275,11 +275,11 @@ struct qemu_work_item;
* @stopped: Indicates the CPU has been artificially stopped.
* @unplug: Indicates a pending CPU unplug request.
* @crash_occurred: Indicates the OS reported a crash (panic) for this CPU
- * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
- * CPU and return to its top level loop.
* @singlestep_enabled: Flags for single-stepping.
* @icount_extra: Instructions until next timer event.
- * @icount_decr: Number of cycles left, with interrupt flag in high bit.
+ * @icount_decr: Low 16 bits: number of cycles left, only used in icount mode.
+ * High 16 bits: Set to -1 to force TCG to stop executing linked TBs for this
+ * CPU and return to its top level loop (even in non-icount mode).
* This allows a single read-compare-cbranch-write sequence to test
* for both decrementer underflow and exceptions.
* @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution
@@ -382,10 +382,6 @@ struct CPUState {
/* TODO Move common fields from CPUArchState here. */
int cpu_index; /* used by alpha TCG */
uint32_t halted; /* used by alpha, cris, ppc TCG */
- union {
- uint32_t u32;
- icount_decr_u16 u16;
- } icount_decr;
uint32_t can_do_io;
int32_t exception_index; /* used by m68k TCG */
@@ -398,7 +394,10 @@ struct CPUState {
offset from AREG0. Leave this field at the end so as to make the
(absolute value) offset as small as possible. This reduces code
size, especially for hosts without large memory offsets. */
- uint32_t tcg_exit_req;
+ union {
+ uint32_t u32;
+ icount_decr_u16 u16;
+ } icount_decr;
bool hax_vcpu_dirty;
struct hax_vcpu_state *hax_vcpu;
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 3045ee7678..24281fc7f8 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -238,9 +238,6 @@ int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
target_ulong len, int type);
void kvm_remove_all_breakpoints(CPUState *cpu);
int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap);
-#ifndef _WIN32
-int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset);
-#endif
int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
int kvm_on_sigbus(int code, void *addr);
@@ -357,8 +354,10 @@ bool kvm_vcpu_id_is_valid(int vcpu_id);
/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
unsigned long kvm_arch_vcpu_id(CPUState *cpu);
-int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
-int kvm_arch_on_sigbus(int code, void *addr);
+#ifdef TARGET_I386
+#define KVM_HAVE_MCE_INJECTION 1
+void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
+#endif
void kvm_arch_init_irq_routing(KVMState *s);
@@ -461,6 +460,8 @@ void kvm_cpu_synchronize_state(CPUState *cpu);
void kvm_cpu_synchronize_post_reset(CPUState *cpu);
void kvm_cpu_synchronize_post_init(CPUState *cpu);
+void kvm_init_cpu_signals(CPUState *cpu);
+
/**
* kvm_irqchip_add_msi_route - Add MSI route for specific vector
* @s: KVM state
diff --git a/kvm-all.c b/kvm-all.c
index 0c94637c46..9040bd50a4 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -120,6 +120,7 @@ bool kvm_vm_attributes_allowed;
bool kvm_direct_msi_allowed;
bool kvm_ioeventfd_any_length_allowed;
bool kvm_msi_use_devid;
+static bool kvm_immediate_exit;
static const KVMCapabilityInfo kvm_required_capabilites[] = {
KVM_CAP_INFO(USER_MEMORY),
@@ -1619,6 +1620,7 @@ static int kvm_init(MachineState *ms)
goto err;
}
+ kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT);
s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
/* If unspecified, use the default value */
@@ -1893,6 +1895,61 @@ void kvm_cpu_synchronize_post_init(CPUState *cpu)
run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
}
+#ifdef KVM_HAVE_MCE_INJECTION
+static __thread void *pending_sigbus_addr;
+static __thread int pending_sigbus_code;
+static __thread bool have_sigbus_pending;
+#endif
+
+static void kvm_cpu_kick(CPUState *cpu)
+{
+ atomic_set(&cpu->kvm_run->immediate_exit, 1);
+}
+
+static void kvm_cpu_kick_self(void)
+{
+ if (kvm_immediate_exit) {
+ kvm_cpu_kick(current_cpu);
+ } else {
+ qemu_cpu_kick_self();
+ }
+}
+
+static void kvm_eat_signals(CPUState *cpu)
+{
+ struct timespec ts = { 0, 0 };
+ siginfo_t siginfo;
+ sigset_t waitset;
+ sigset_t chkset;
+ int r;
+
+ if (kvm_immediate_exit) {
+ atomic_set(&cpu->kvm_run->immediate_exit, 0);
+ /* Write kvm_run->immediate_exit before the cpu->exit_request
+ * write in kvm_cpu_exec.
+ */
+ smp_wmb();
+ return;
+ }
+
+ sigemptyset(&waitset);
+ sigaddset(&waitset, SIG_IPI);
+
+ do {
+ r = sigtimedwait(&waitset, &siginfo, &ts);
+ if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
+ perror("sigtimedwait");
+ exit(1);
+ }
+
+ r = sigpending(&chkset);
+ if (r == -1) {
+ perror("sigpending");
+ exit(1);
+ }
+ } while (sigismember(&chkset, SIG_IPI));
+}
+
int kvm_cpu_exec(CPUState *cpu)
{
struct kvm_run *run = cpu->kvm_run;
@@ -1901,7 +1958,7 @@ int kvm_cpu_exec(CPUState *cpu)
DPRINTF("kvm_cpu_exec()\n");
if (kvm_arch_process_async_events(cpu)) {
- cpu->exit_request = 0;
+ atomic_set(&cpu->exit_request, 0);
return EXCP_HLT;
}
@@ -1916,23 +1973,39 @@ int kvm_cpu_exec(CPUState *cpu)
}
kvm_arch_pre_run(cpu, run);
- if (cpu->exit_request) {
+ if (atomic_read(&cpu->exit_request)) {
DPRINTF("interrupt exit requested\n");
/*
* KVM requires us to reenter the kernel after IO exits to complete
* instruction emulation. This self-signal will ensure that we
* leave ASAP again.
*/
- qemu_cpu_kick_self();
+ kvm_cpu_kick_self();
}
+ /* Read cpu->exit_request before KVM_RUN reads run->immediate_exit.
+ * Matching barrier in kvm_eat_signals.
+ */
+ smp_rmb();
+
run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
attrs = kvm_arch_post_run(cpu, run);
+#ifdef KVM_HAVE_MCE_INJECTION
+ if (unlikely(have_sigbus_pending)) {
+ qemu_mutex_lock_iothread();
+ kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code,
+ pending_sigbus_addr);
+ have_sigbus_pending = false;
+ qemu_mutex_unlock_iothread();
+ }
+#endif
+
if (run_ret < 0) {
if (run_ret == -EINTR || run_ret == -EAGAIN) {
DPRINTF("io window exit\n");
+ kvm_eat_signals(cpu);
ret = EXCP_INTERRUPT;
break;
}
@@ -2026,7 +2099,7 @@ int kvm_cpu_exec(CPUState *cpu)
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
- cpu->exit_request = 0;
+ atomic_set(&cpu->exit_request, 0);
return ret;
}
@@ -2372,16 +2445,12 @@ void kvm_remove_all_breakpoints(CPUState *cpu)
}
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
-int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset)
+static int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset)
{
KVMState *s = kvm_state;
struct kvm_signal_mask *sigmask;
int r;
- if (!sigset) {
- return kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, NULL);
- }
-
sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
sigmask->len = s->sigmask_len;
@@ -2391,14 +2460,73 @@ int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset)
return r;
}
+
+static void kvm_ipi_signal(int sig)
+{
+ if (current_cpu) {
+ assert(kvm_immediate_exit);
+ kvm_cpu_kick(current_cpu);
+ }
+}
+
+void kvm_init_cpu_signals(CPUState *cpu)
+{
+ int r;
+ sigset_t set;
+ struct sigaction sigact;
+
+ memset(&sigact, 0, sizeof(sigact));
+ sigact.sa_handler = kvm_ipi_signal;
+ sigaction(SIG_IPI, &sigact, NULL);
+
+ pthread_sigmask(SIG_BLOCK, NULL, &set);
+#if defined KVM_HAVE_MCE_INJECTION
+ sigdelset(&set, SIGBUS);
+ pthread_sigmask(SIG_SETMASK, &set, NULL);
+#endif
+ sigdelset(&set, SIG_IPI);
+ if (kvm_immediate_exit) {
+ r = pthread_sigmask(SIG_SETMASK, &set, NULL);
+ } else {
+ r = kvm_set_signal_mask(cpu, &set);
+ }
+ if (r) {
+ fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
+ exit(1);
+ }
+}
+
+/* Called asynchronously in VCPU thread. */
int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
{
- return kvm_arch_on_sigbus_vcpu(cpu, code, addr);
+#ifdef KVM_HAVE_MCE_INJECTION
+ if (have_sigbus_pending) {
+ return 1;
+ }
+ have_sigbus_pending = true;
+ pending_sigbus_addr = addr;
+ pending_sigbus_code = code;
+ atomic_set(&cpu->exit_request, 1);
+ return 0;
+#else
+ return 1;
+#endif
}
+/* Called synchronously (via signalfd) in main thread. */
int kvm_on_sigbus(int code, void *addr)
{
- return kvm_arch_on_sigbus(code, addr);
+#ifdef KVM_HAVE_MCE_INJECTION
+ /* Action required MCE kills the process if SIGBUS is blocked. Because
+ * that's what happens in the I/O thread, where we handle MCE via signalfd,
+ * we can only get action optional here.
+ */
+ assert(code != BUS_MCEERR_AR);
+ kvm_arch_on_sigbus_vcpu(first_cpu, code, addr);
+ return 0;
+#else
+ return 1;
+#endif
}
int kvm_create_device(KVMState *s, uint64_t type, bool test)
diff --git a/kvm-stub.c b/kvm-stub.c
index b1b6b96c96..ef0c7346af 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -95,13 +95,6 @@ void kvm_remove_all_breakpoints(CPUState *cpu)
{
}
-#ifndef _WIN32
-int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset)
-{
- abort();
-}
-#endif
-
int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
{
return 1;
@@ -157,4 +150,9 @@ bool kvm_has_free_slot(MachineState *ms)
{
return false;
}
+
+void kvm_init_cpu_signals(CPUState *cpu)
+{
+ abort();
+}
#endif
diff --git a/memory.c b/memory.c
index d61caee867..284894b135 100644
--- a/memory.c
+++ b/memory.c
@@ -1182,7 +1182,7 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr,
static const MemoryRegionOps ram_device_mem_ops = {
.read = memory_region_ram_device_read,
.write = memory_region_ram_device_write,
- .endianness = DEVICE_NATIVE_ENDIAN,
+ .endianness = DEVICE_HOST_ENDIAN,
.valid = {
.min_access_size = 1,
.max_access_size = 8,
@@ -2588,13 +2588,24 @@ static void mtree_print_flatview(fprintf_function p, void *f,
while (n--) {
mr = range->mr;
- p(f, MTREE_INDENT TARGET_FMT_plx "-"
- TARGET_FMT_plx " (prio %d, %s): %s\n",
- int128_get64(range->addr.start),
- int128_get64(range->addr.start) + MR_SIZE(range->addr.size),
- mr->priority,
- memory_region_type(mr),
- memory_region_name(mr));
+ if (range->offset_in_region) {
+ p(f, MTREE_INDENT TARGET_FMT_plx "-"
+ TARGET_FMT_plx " (prio %d, %s): %s @" TARGET_FMT_plx "\n",
+ int128_get64(range->addr.start),
+ int128_get64(range->addr.start) + MR_SIZE(range->addr.size),
+ mr->priority,
+ range->readonly ? "rom" : memory_region_type(mr),
+ memory_region_name(mr),
+ range->offset_in_region);
+ } else {
+ p(f, MTREE_INDENT TARGET_FMT_plx "-"
+ TARGET_FMT_plx " (prio %d, %s): %s\n",
+ int128_get64(range->addr.start),
+ int128_get64(range->addr.start) + MR_SIZE(range->addr.size),
+ mr->priority,
+ range->readonly ? "rom" : memory_region_type(mr),
+ memory_region_name(mr));
+ }
range++;
}
diff --git a/qapi-schema.json b/qapi-schema.json
index fb39d1dc11..6febfa7b90 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -5915,6 +5915,16 @@
'data': [ 'pause', 'poweroff' ] }
##
+# @GuestPanicInformationType:
+#
+# An enumeration of the guest panic information types
+#
+# Since: 2.9
+##
+{ 'enum': 'GuestPanicInformationType',
+ 'data': [ 'hyper-v'] }
+
+##
# @GuestPanicInformation:
#
# Information about a guest panic
@@ -5922,6 +5932,8 @@
# Since: 2.9
##
{'union': 'GuestPanicInformation',
+ 'base': {'type': 'GuestPanicInformationType'},
+ 'discriminator': 'type',
'data': { 'hyper-v': 'GuestPanicInformationHyperV' } }
##
diff --git a/qapi/event.json b/qapi/event.json
index 970ff0255a..e02852cd8a 100644
--- a/qapi/event.json
+++ b/qapi/event.json
@@ -488,9 +488,9 @@
#
# @action: action that has been taken, currently always "pause"
#
-# @info: optional information about a panic
+# @info: #optional information about a panic (since 2.9)
#
-# Since: 1.5 (@info since 2.9)
+# Since: 1.5
#
# Example:
#
diff --git a/qom/cpu.c b/qom/cpu.c
index 58784bcbea..f02e9c0fae 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -133,7 +133,7 @@ void cpu_exit(CPUState *cpu)
atomic_set(&cpu->exit_request, 1);
/* Ensure cpu_exec will see the exit request after TCG has exited. */
smp_wmb();
- atomic_set(&cpu->tcg_exit_req, 1);
+ atomic_set(&cpu->icount_decr.u16.high, -1);
}
int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
index 222025525b..d9a6db0bb7 100755
--- a/scripts/kvm/vmxcap
+++ b/scripts/kvm/vmxcap
@@ -27,9 +27,9 @@ MSR_IA32_VMX_VMFUNC = 0x491
class msr(object):
def __init__(self):
try:
- self.f = open('/dev/cpu/0/msr', 'r', 0)
+ self.f = open('/dev/cpu/0/msr', 'rb', 0)
except:
- self.f = open('/dev/msr0', 'r', 0)
+ self.f = open('/dev/msr0', 'rb', 0)
def read(self, index, default = None):
import struct
self.f.seek(index)
@@ -49,7 +49,7 @@ class Control(object):
val = m.read(nr, 0)
return (val & 0xffffffff, val >> 32)
def show(self):
- print self.name
+ print(self.name)
mbz, mb1 = self.read2(self.cap_msr)
tmbz, tmb1 = 0, 0
if self.true_cap_msr:
@@ -69,7 +69,7 @@ class Control(object):
s = 'forced'
elif one and zero:
s = 'yes'
- print ' %-40s %s' % (self.bits[bit], s)
+ print(' %-40s %s' % (self.bits[bit], s))
class Misc(object):
def __init__(self, name, bits, msr):
@@ -77,9 +77,9 @@ class Misc(object):
self.bits = bits
self.msr = msr
def show(self):
- print self.name
+ print(self.name)
value = msr().read(self.msr, 0)
- print ' Hex: 0x%x' % (value)
+ print(' Hex: 0x%x' % (value))
def first_bit(key):
if type(key) is tuple:
return key[0]
@@ -94,7 +94,7 @@ class Misc(object):
def fmt(x):
return { True: 'yes', False: 'no' }[x]
v = (value >> lo) & ((1 << (hi - lo + 1)) - 1)
- print ' %-40s %s' % (self.bits[bits], fmt(v))
+ print(' %-40s %s' % (self.bits[bits], fmt(v)))
controls = [
Misc(
@@ -170,9 +170,13 @@ controls = [
12: 'Enable INVPCID',
13: 'Enable VM functions',
14: 'VMCS shadowing',
+ 15: 'Enable ENCLS exiting',
16: 'RDSEED exiting',
+ 17: 'Enable PML',
18: 'EPT-violation #VE',
+ 19: 'Conceal non-root operation from PT',
20: 'Enable XSAVES/XRSTORS',
+ 22: 'Mode-based execute control (XS/XU)',
25: 'TSC scaling',
},
cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -190,6 +194,8 @@ controls = [
20: 'Save IA32_EFER',
21: 'Load IA32_EFER',
22: 'Save VMX-preemption timer value',
+ 23: 'Clear IA32_BNDCFGS',
+ 24: 'Conceal VM exits from PT',
},
cap_msr = MSR_IA32_VMX_EXIT_CTLS,
true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS,
@@ -205,6 +211,8 @@ controls = [
13: 'Load IA32_PERF_GLOBAL_CTRL',
14: 'Load IA32_PAT',
15: 'Load IA32_EFER',
+ 16: 'Load IA32_BNDCFGS',
+ 17: 'Conceal VM entries from PT',
},
cap_msr = MSR_IA32_VMX_ENTRY_CTLS,
true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS,
@@ -223,6 +231,7 @@ controls = [
(25,27): 'MSR-load/store count recommendation',
28: 'IA32_SMM_MONITOR_CTL[2] can be set to 1',
29: 'VMWRITE to VM-exit information fields',
+ 30: 'Inject event with insn length=0',
(32,63): 'MSEG revision identifier',
},
msr = MSR_IA32_VMX_MISC_CTLS,
diff --git a/spice-qemu-char.c b/spice-qemu-char.c
index 6f46f46b25..4d1c76e8a4 100644
--- a/spice-qemu-char.c
+++ b/spice-qemu-char.c
@@ -215,7 +215,10 @@ static void char_spice_finalize(Object *obj)
SpiceChardev *s = SPICE_CHARDEV(obj);
vmc_unregister_interface(s);
- QLIST_REMOVE(s, next);
+
+ if (s->next.le_prev) {
+ QLIST_REMOVE(s, next);
+ }
g_free((char *)s->sin.subtype);
#if SPICE_SERVER_VERSION >= 0x000c02
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 395e986973..45554682f2 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -560,16 +560,6 @@ int kvm_arch_process_async_events(CPUState *cs)
return 0;
}
-int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr)
-{
- return 1;
-}
-
-int kvm_arch_on_sigbus(int code, void *addr)
-{
- return 1;
-}
-
/* The #ifdef protections are until 32bit headers are imported and can
* be removed once both 32 and 64 bit reach feature parity.
*/
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 89421c893b..fba92125ab 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3778,19 +3778,16 @@ static GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs)
GuestPanicInformation *panic_info = NULL;
if (env->features[FEAT_HYPERV_EDX] & HV_X64_GUEST_CRASH_MSR_AVAILABLE) {
- GuestPanicInformationHyperV *panic_info_hv =
- g_malloc0(sizeof(GuestPanicInformationHyperV));
panic_info = g_malloc0(sizeof(GuestPanicInformation));
- panic_info->type = GUEST_PANIC_INFORMATION_KIND_HYPER_V;
- panic_info->u.hyper_v.data = panic_info_hv;
+ panic_info->type = GUEST_PANIC_INFORMATION_TYPE_HYPER_V;
assert(HV_X64_MSR_CRASH_PARAMS >= 5);
- panic_info_hv->arg1 = env->msr_hv_crash_params[0];
- panic_info_hv->arg2 = env->msr_hv_crash_params[1];
- panic_info_hv->arg3 = env->msr_hv_crash_params[2];
- panic_info_hv->arg4 = env->msr_hv_crash_params[3];
- panic_info_hv->arg5 = env->msr_hv_crash_params[4];
+ panic_info->u.hyper_v.arg1 = env->msr_hv_crash_params[0];
+ panic_info->u.hyper_v.arg2 = env->msr_hv_crash_params[1];
+ panic_info->u.hyper_v.arg3 = env->msr_hv_crash_params[2];
+ panic_info->u.hyper_v.arg4 = env->msr_hv_crash_params[3];
+ panic_info->u.hyper_v.arg5 = env->msr_hv_crash_params[4];
}
return panic_info;
@@ -3986,6 +3983,8 @@ static Property x86_cpu_properties[] = {
DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true),
DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false),
DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true),
+ DEFINE_PROP_BOOL("kvm-no-smi-migration", X86CPU, kvm_no_smi_migration,
+ false),
DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true),
DEFINE_PROP_END_OF_LIST()
};
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 12a39d590f..ac2ad6d443 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1255,6 +1255,9 @@ struct X86CPU {
/* if true override the phys_bits value with a value read from the host */
bool host_phys_bits;
+ /* Stop SMI delivery for migration compatibility with old machines */
+ bool kvm_no_smi_migration;
+
/* Number of physical address bits supported */
uint32_t phys_bits;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 27fd0505df..887a81268f 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -64,13 +64,6 @@
* 255 kvm_msr_entry structs */
#define MSR_BUF_SIZE 4096
-#ifndef BUS_MCEERR_AR
-#define BUS_MCEERR_AR 4
-#endif
-#ifndef BUS_MCEERR_AO
-#define BUS_MCEERR_AO 5
-#endif
-
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
KVM_CAP_INFO(SET_TSS_ADDR),
KVM_CAP_INFO(EXT_CPUID),
@@ -462,70 +455,38 @@ static void hardware_memory_error(void)
exit(1);
}
-int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
+void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
{
X86CPU *cpu = X86_CPU(c);
CPUX86State *env = &cpu->env;
ram_addr_t ram_addr;
hwaddr paddr;
- if ((env->mcg_cap & MCG_SER_P) && addr
- && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) {
+ /* If we get an action required MCE, it has been injected by KVM
+ * while the VM was running. An action optional MCE instead should
+ * be coming from the main thread, which qemu_init_sigbus identifies
+ * as the "early kill" thread.
+ */
+ assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
+
+ if ((env->mcg_cap & MCG_SER_P) && addr) {
ram_addr = qemu_ram_addr_from_host(addr);
- if (ram_addr == RAM_ADDR_INVALID ||
- !kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
- fprintf(stderr, "Hardware memory error for memory used by "
- "QEMU itself instead of guest system!\n");
- /* Hope we are lucky for AO MCE */
- if (code == BUS_MCEERR_AO) {
- return 0;
- } else {
- hardware_memory_error();
- }
- }
- kvm_hwpoison_page_add(ram_addr);
- kvm_mce_inject(cpu, paddr, code);
- } else {
- if (code == BUS_MCEERR_AO) {
- return 0;
- } else if (code == BUS_MCEERR_AR) {
- hardware_memory_error();
- } else {
- return 1;
+ if (ram_addr != RAM_ADDR_INVALID &&
+ kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
+ kvm_hwpoison_page_add(ram_addr);
+ kvm_mce_inject(cpu, paddr, code);
+ return;
}
- }
- return 0;
-}
-int kvm_arch_on_sigbus(int code, void *addr)
-{
- X86CPU *cpu = X86_CPU(first_cpu);
-
- if ((cpu->env.mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
- ram_addr_t ram_addr;
- hwaddr paddr;
+ fprintf(stderr, "Hardware memory error for memory used by "
+ "QEMU itself instead of guest system!\n");
+ }
- /* Hope we are lucky for AO MCE */
- ram_addr = qemu_ram_addr_from_host(addr);
- if (ram_addr == RAM_ADDR_INVALID ||
- !kvm_physical_memory_addr_from_host(first_cpu->kvm_state,
- addr, &paddr)) {
- fprintf(stderr, "Hardware memory error for memory used by "
- "QEMU itself instead of guest system!: %p\n", addr);
- return 0;
- }
- kvm_hwpoison_page_add(ram_addr);
- kvm_mce_inject(X86_CPU(first_cpu), paddr, code);
- } else {
- if (code == BUS_MCEERR_AO) {
- return 0;
- } else if (code == BUS_MCEERR_AR) {
- hardware_memory_error();
- } else {
- return 1;
- }
+ if (code == BUS_MCEERR_AR) {
+ hardware_memory_error();
}
- return 0;
+
+ /* Hope we are lucky for AO MCE */
}
static int kvm_inject_mce_oldstyle(X86CPU *cpu)
@@ -2531,7 +2492,12 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
events.smi.pending = 0;
events.smi.latched_init = 0;
}
- events.flags |= KVM_VCPUEVENT_VALID_SMM;
+ /* Stop SMI delivery on old machine types to avoid a reboot
+ * on an inward migration of an old VM.
+ */
+ if (!cpu->kvm_no_smi_migration) {
+ events.flags |= KVM_VCPUEVENT_VALID_SMM;
+ }
}
if (level >= KVM_PUT_RESET_STATE) {
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
index 998c3412c3..0982e874bb 100644
--- a/target/mips/kvm.c
+++ b/target/mips/kvm.c
@@ -180,18 +180,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs)
return true;
}
-int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr)
-{
- DPRINTF("%s\n", __func__);
- return 1;
-}
-
-int kvm_arch_on_sigbus(int code, void *addr)
-{
- DPRINTF("%s\n", __func__);
- return 1;
-}
-
void kvm_arch_init_irq_routing(KVMState *s)
{
}
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index acc40ece65..03f5097eab 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2582,16 +2582,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
return true;
}
-int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
-{
- return 1;
-}
-
-int kvm_arch_on_sigbus(int code, void *addr)
-{
- return 1;
-}
-
void kvm_arch_init_irq_routing(KVMState *s)
{
}
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 5ec050cf89..ac47154b83 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2140,16 +2140,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
return true;
}
-int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
-{
- return 1;
-}
-
-int kvm_arch_on_sigbus(int code, void *addr)
-{
- return 1;
-}
-
void kvm_s390_io_interrupt(uint16_t subchannel_id,
uint16_t subchannel_nr, uint32_t io_int_parm,
uint32_t io_int_word)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 4c7f258220..6c216bb73f 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -1101,7 +1101,6 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi)
#define TB_EXIT_MASK 3
#define TB_EXIT_IDX0 0
#define TB_EXIT_IDX1 1
-#define TB_EXIT_ICOUNT_EXPIRED 2
#define TB_EXIT_REQUESTED 3
#ifdef HAVE_TCG_QEMU_TB_EXEC
diff --git a/translate-all.c b/translate-all.c
index 9bac061c9b..d42d003e67 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1930,7 +1930,7 @@ void cpu_interrupt(CPUState *cpu, int mask)
{
g_assert(qemu_mutex_iothread_locked());
cpu->interrupt_request |= mask;
- cpu->tcg_exit_req = 1;
+ cpu->icount_decr.u16.high = -1;
}
/*
diff --git a/translate-common.c b/translate-common.c
index d504dd0d33..40fe5a19bb 100644
--- a/translate-common.c
+++ b/translate-common.c
@@ -43,14 +43,11 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
if (!qemu_cpu_is_self(cpu)) {
qemu_cpu_kick(cpu);
} else {
- if (use_icount) {
- cpu->icount_decr.u16.high = 0xffff;
- if (!cpu->can_do_io
- && (mask & ~old_mask) != 0) {
- cpu_abort(cpu, "Raised interrupt while not in I/O function");
- }
- } else {
- cpu->tcg_exit_req = 1;
+ cpu->icount_decr.u16.high = -1;
+ if (use_icount &&
+ !cpu->can_do_io
+ && (mask & ~old_mask) != 0) {
+ cpu_abort(cpu, "Raised interrupt while not in I/O function");
}
}
}
diff --git a/util/compatfd.c b/util/compatfd.c
index 9a43042ae6..980bd33e52 100644
--- a/util/compatfd.c
+++ b/util/compatfd.c
@@ -15,7 +15,6 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
-#include "qemu/compatfd.h"
#include "qemu/thread.h"
#include <sys/syscall.h>
diff --git a/util/main-loop.c b/util/main-loop.c
index ad10bca211..ca7bb072f9 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -34,8 +34,6 @@
#ifndef _WIN32
-#include "qemu/compatfd.h"
-
/* If we have signalfd, we mask out the signals we want to handle and then
* use signalfd to listen for them. We rely on whatever the current signal
* handler is to dispatch the signals when we receive them.
@@ -63,8 +61,7 @@ static void sigfd_handler(void *opaque)
sigaction(info.ssi_signo, NULL, &action);
if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
- action.sa_sigaction(info.ssi_signo,
- (siginfo_t *)&info, NULL);
+ sigaction_invoke(&action, &info);
} else if (action.sa_handler) {
action.sa_handler(info.ssi_signo);
}
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index f63146407f..cd686aae3d 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -603,3 +603,36 @@ void qemu_free_stack(void *stack, size_t sz)
munmap(stack, sz);
}
+
+void sigaction_invoke(struct sigaction *action,
+ struct qemu_signalfd_siginfo *info)
+{
+ siginfo_t si = { 0 };
+ si.si_signo = info->ssi_signo;
+ si.si_errno = info->ssi_errno;
+ si.si_code = info->ssi_code;
+
+ /* Convert the minimal set of fields defined by POSIX.
+ * Positive si_code values are reserved for kernel-generated
+ * signals, where the valid siginfo fields are determined by
+ * the signal number. But according to POSIX, it is unspecified
+ * whether SI_USER and SI_QUEUE have values less than or equal to
+ * zero.
+ */
+ if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE ||
+ info->ssi_code <= 0) {
+ /* SIGTERM, etc. */
+ si.si_pid = info->ssi_pid;
+ si.si_uid = info->ssi_uid;
+ } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE ||
+ info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) {
+ si.si_addr = (void *)(uintptr_t)info->ssi_addr;
+ } else if (info->ssi_signo == SIGCHLD) {
+ si.si_pid = info->ssi_pid;
+ si.si_status = info->ssi_status;
+ si.si_uid = info->ssi_uid;
+ } else if (info->ssi_signo == SIGIO) {
+ si.si_band = info->ssi_band;
+ }
+ action->sa_sigaction(info->ssi_signo, &si, NULL);
+}
diff --git a/vl.c b/vl.c
index e10a27bdd6..16a3b5ed8b 100644
--- a/vl.c
+++ b/vl.c
@@ -227,6 +227,7 @@ static struct {
{ .driver = "ide-hd", .flag = &default_cdrom },
{ .driver = "ide-drive", .flag = &default_cdrom },
{ .driver = "scsi-cd", .flag = &default_cdrom },
+ { .driver = "scsi-hd", .flag = &default_cdrom },
{ .driver = "virtio-serial-pci", .flag = &default_virtcon },
{ .driver = "virtio-serial", .flag = &default_virtcon },
{ .driver = "VGA", .flag = &default_vga },
@@ -1717,14 +1718,14 @@ void qemu_system_guest_panicked(GuestPanicInformation *info)
}
if (info) {
- if (info->type == GUEST_PANIC_INFORMATION_KIND_HYPER_V) {
+ if (info->type == GUEST_PANIC_INFORMATION_TYPE_HYPER_V) {
qemu_log_mask(LOG_GUEST_ERROR, "HV crash parameters: (%#"PRIx64
" %#"PRIx64" %#"PRIx64" %#"PRIx64" %#"PRIx64")\n",
- info->u.hyper_v.data->arg1,
- info->u.hyper_v.data->arg2,
- info->u.hyper_v.data->arg3,
- info->u.hyper_v.data->arg4,
- info->u.hyper_v.data->arg5);
+ info->u.hyper_v.arg1,
+ info->u.hyper_v.arg2,
+ info->u.hyper_v.arg3,
+ info->u.hyper_v.arg4,
+ info->u.hyper_v.arg5);
}
qapi_free_GuestPanicInformation(info);
}