aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2022-11-03 10:54:37 -0400
committerStefan Hajnoczi <stefanha@redhat.com>2022-11-03 10:54:37 -0400
commit7f5acfcb662d32a736d0db41211cc7f340193bdd (patch)
treed00c7ca999519b539c5874e6021192050ba6d405
parent5e82102268f5a72b2880f9cb638e9719177a95f9 (diff)
parent03a60ae9cac546d05b076676491ed1606f9d9066 (diff)
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* bug fixes * reduced memory footprint for IPI virtualization on Intel processors * asynchronous teardown support (Linux only) # -----BEGIN PGP SIGNATURE----- # # iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmNiVykUHHBib256aW5p # QHJlZGhhdC5jb20ACgkQv/vSX3jHroN0Swf/YxjphCtFgYYSO14WP+7jAnfRZLhm # 0xWChWP8rco5I352OBFeFU64Av5XoLGNn6SZLl8lcg86lQ/G0D27jxu6wOcDDHgw # 0yTDO1gevj51UKsbxoC66OWSZwKTEo398/BHPDcI2W41yOFycSdtrPgspOrFRVvf # 7M3nNjuNPsQorZeuu8NGr3jakqbt99ZDXcyDEWbrEAcmy2JBRMbGgT0Kdnc6aZfW # CvL+1ljxzldNwGeNBbQW2QgODbfHx5cFZcy4Daze35l5Ra7K/FrgAzr6o/HXptya # 9fEs5LJQ1JWI6JtpaWwFy7fcIIOsJ0YW/hWWQZSDt9JdAJFE5/+vF+Kz5Q== # =CgrO # -----END PGP SIGNATURE----- # gpg: Signature made Wed 02 Nov 2022 07:40:25 EDT # gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83 # gpg: issuer "pbonzini@redhat.com" # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full] # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full] # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * tag 'for-upstream' of https://gitlab.com/bonzini/qemu: target/i386: Fix test for paging enabled util/log: Close per-thread log file on thread termination target/i386: Set maximum APIC ID to KVM prior to vCPU creation os-posix: asynchronous teardown for shutdown on Linux target/i386: Fix calculation of LOCK NEG eflags Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r--hw/i386/x86.c4
-rw-r--r--include/qemu/async-teardown.h22
-rw-r--r--meson.build1
-rw-r--r--os-posix.c6
-rw-r--r--qemu-options.hx19
-rw-r--r--target/i386/kvm/kvm-stub.c5
-rw-r--r--target/i386/kvm/kvm.c5
-rw-r--r--target/i386/kvm/kvm_i386.h2
-rw-r--r--target/i386/tcg/sysemu/excp_helper.c10
-rw-r--r--target/i386/tcg/translate.c2
-rw-r--r--util/async-teardown.c150
-rw-r--r--util/log.c9
-rw-r--r--util/meson.build1
13 files changed, 230 insertions, 6 deletions
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index bd50a064a3..78cc131926 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -140,6 +140,10 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
exit(EXIT_FAILURE);
}
+ if (kvm_enabled()) {
+ kvm_set_max_apic_id(x86ms->apic_id_limit);
+ }
+
possible_cpus = mc->possible_cpu_arch_ids(ms);
for (i = 0; i < ms->smp.cpus; i++) {
x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/include/qemu/async-teardown.h b/include/qemu/async-teardown.h
new file mode 100644
index 0000000000..092e7a37e7
--- /dev/null
+++ b/include/qemu/async-teardown.h
@@ -0,0 +1,22 @@
+/*
+ * Asynchronous teardown
+ *
+ * Copyright IBM, Corp. 2022
+ *
+ * Authors:
+ * Claudio Imbrenda <imbrenda@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version. See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_ASYNC_TEARDOWN_H
+#define QEMU_ASYNC_TEARDOWN_H
+
+#include "config-host.h"
+
+#ifdef CONFIG_LINUX
+void init_async_teardown(void);
+#endif
+
+#endif
diff --git a/meson.build b/meson.build
index d809d51791..1d448272ab 100644
--- a/meson.build
+++ b/meson.build
@@ -1955,6 +1955,7 @@ if targetos == 'windows'
endif
# has_function
+config_host_data.set('CONFIG_CLOSE_RANGE', cc.has_function('close_range'))
config_host_data.set('CONFIG_ACCEPT4', cc.has_function('accept4'))
config_host_data.set('CONFIG_CLOCK_ADJTIME', cc.has_function('clock_adjtime'))
config_host_data.set('CONFIG_DUP3', cc.has_function('dup3'))
diff --git a/os-posix.c b/os-posix.c
index 321fc4bd13..4858650c3e 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -39,6 +39,7 @@
#ifdef CONFIG_LINUX
#include <sys/prctl.h>
+#include "qemu/async-teardown.h"
#endif
/*
@@ -150,6 +151,11 @@ int os_parse_cmd_args(int index, const char *optarg)
case QEMU_OPTION_daemonize:
daemonize = 1;
break;
+#if defined(CONFIG_LINUX)
+ case QEMU_OPTION_asyncteardown:
+ init_async_teardown();
+ break;
+#endif
default:
return -1;
}
diff --git a/qemu-options.hx b/qemu-options.hx
index ceee0ddc25..911d82afa5 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4790,6 +4790,25 @@ HXCOMM Internal use
DEF("qtest", HAS_ARG, QEMU_OPTION_qtest, "", QEMU_ARCH_ALL)
DEF("qtest-log", HAS_ARG, QEMU_OPTION_qtest_log, "", QEMU_ARCH_ALL)
+#ifdef __linux__
+DEF("async-teardown", 0, QEMU_OPTION_asyncteardown,
+ "-async-teardown enable asynchronous teardown\n",
+ QEMU_ARCH_ALL)
+#endif
+SRST
+``-async-teardown``
+ Enable asynchronous teardown. A new process called "cleanup/<QEMU_PID>"
+ will be created at startup sharing the address space with the main qemu
+ process, using clone. It will wait for the main qemu process to
+ terminate completely, and then exit.
+ This allows qemu to terminate very quickly even if the guest was
+ huge, leaving the teardown of the address space to the cleanup
+ process. Since the cleanup process shares the same cgroups as the
+ main qemu process, accounting is performed correctly. This only
+ works if the cleanup process is not forcefully killed with SIGKILL
+ before the main qemu process has terminated completely.
+ERST
+
DEF("msg", HAS_ARG, QEMU_OPTION_msg,
"-msg [timestamp[=on|off]][,guest-name=[on|off]]\n"
" control error message format\n"
diff --git a/target/i386/kvm/kvm-stub.c b/target/i386/kvm/kvm-stub.c
index f6e7e4466e..e052f1c7b0 100644
--- a/target/i386/kvm/kvm-stub.c
+++ b/target/i386/kvm/kvm-stub.c
@@ -44,3 +44,8 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp)
{
abort();
}
+
+void kvm_set_max_apic_id(uint32_t max_apic_id)
+{
+ return;
+}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 4df0428089..a213209379 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -5723,3 +5723,8 @@ void kvm_arch_accel_class_init(ObjectClass *oc)
"Clock cycles without an event window "
"after which a notification VM exit occurs");
}
+
+void kvm_set_max_apic_id(uint32_t max_apic_id)
+{
+ kvm_vm_enable_cap(kvm_state, KVM_CAP_MAX_VCPU_ID, 0, max_apic_id);
+}
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index b7c38ba2c4..6a5c24e3dc 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -66,4 +66,6 @@ typedef struct kvm_msr_handlers {
bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr,
QEMUWRMSRHandler *wrmsr);
+void kvm_set_max_apic_id(uint32_t max_apic_id);
+
#endif
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index d51b5d7431..405a5d414a 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -553,12 +553,12 @@ static bool get_physical_address(CPUX86State *env, vaddr addr,
break;
default:
- in.cr3 = env->cr[3];
- in.mmu_idx = mmu_idx;
- in.ptw_idx = use_stage2 ? MMU_NESTED_IDX : MMU_PHYS_IDX;
- in.pg_mode = get_pg_mode(env);
+ if (likely(env->cr[0] & CR0_PG_MASK)) {
+ in.cr3 = env->cr[3];
+ in.mmu_idx = mmu_idx;
+ in.ptw_idx = use_stage2 ? MMU_NESTED_IDX : MMU_PHYS_IDX;
+ in.pg_mode = get_pg_mode(env);
- if (likely(in.pg_mode)) {
if (in.pg_mode & PG_MODE_LMA) {
/* test virtual address sign extension */
int shift = in.pg_mode & PG_MODE_LA57 ? 56 : 47;
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 0ee548ce56..28a4e6dc1d 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3320,7 +3320,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
tcg_temp_free(t2);
tcg_temp_free(a0);
- tcg_gen_mov_tl(s->T0, t0);
+ tcg_gen_neg_tl(s->T0, t0);
tcg_temp_free(t0);
} else {
tcg_gen_neg_tl(s->T0, s->T0);
diff --git a/util/async-teardown.c b/util/async-teardown.c
new file mode 100644
index 0000000000..62bfce1b3c
--- /dev/null
+++ b/util/async-teardown.c
@@ -0,0 +1,150 @@
+/*
+ * Asynchronous teardown
+ *
+ * Copyright IBM, Corp. 2022
+ *
+ * Authors:
+ * Claudio Imbrenda <imbrenda@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version. See the COPYING file in the top-level directory.
+ *
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/prctl.h>
+#include <signal.h>
+#include <sched.h>
+#include <unistd.h>
+
+#include "qemu/osdep.h"
+#include "qemu/async-teardown.h"
+
+#ifdef _SC_THREAD_STACK_MIN
+#define CLONE_STACK_SIZE sysconf(_SC_THREAD_STACK_MIN)
+#else
+#define CLONE_STACK_SIZE 16384
+#endif
+
+static pid_t the_ppid;
+
+/*
+ * Close all open file descriptors.
+ */
+static void close_all_open_fd(void)
+{
+ struct dirent *de;
+ int fd, dfd;
+ DIR *dir;
+
+#ifdef CONFIG_CLOSE_RANGE
+ int r = close_range(0, ~0U, 0);
+ if (!r) {
+ /* Success, no need to try other ways. */
+ return;
+ }
+#endif
+
+ dir = opendir("/proc/self/fd");
+ if (!dir) {
+ /* If /proc is not mounted, there is nothing that can be done. */
+ return;
+ }
+ /* Avoid closing the directory. */
+ dfd = dirfd(dir);
+
+ for (de = readdir(dir); de; de = readdir(dir)) {
+ fd = atoi(de->d_name);
+ if (fd != dfd) {
+ close(fd);
+ }
+ }
+ closedir(dir);
+}
+
+static void hup_handler(int signal)
+{
+ /* Check every second if this process has been reparented. */
+ while (the_ppid == getppid()) {
+ /* sleep() is safe to use in a signal handler. */
+ sleep(1);
+ }
+
+ /* At this point the parent process has terminated completely. */
+ _exit(0);
+}
+
+static int async_teardown_fn(void *arg)
+{
+ struct sigaction sa = { .sa_handler = hup_handler };
+ sigset_t hup_signal;
+ char name[16];
+
+ /* Set a meaningful name for this process. */
+ snprintf(name, 16, "cleanup/%d", the_ppid);
+ prctl(PR_SET_NAME, (unsigned long)name);
+
+ /*
+ * Close all file descriptors that might have been inherited from the
+ * main qemu process when doing clone, needed to make libvirt happy.
+ * Not using close_range for increased compatibility with older kernels.
+ */
+ close_all_open_fd();
+
+ /* Set up a handler for SIGHUP and unblock SIGHUP. */
+ sigaction(SIGHUP, &sa, NULL);
+ sigemptyset(&hup_signal);
+ sigaddset(&hup_signal, SIGHUP);
+ sigprocmask(SIG_UNBLOCK, &hup_signal, NULL);
+
+ /* Ask to receive SIGHUP when the parent dies. */
+ prctl(PR_SET_PDEATHSIG, SIGHUP);
+
+ /*
+ * Sleep forever, unless the parent process has already terminated. The
+ * only interruption can come from the SIGHUP signal, which in normal
+ * operation is received when the parent process dies.
+ */
+ if (the_ppid == getppid()) {
+ pause();
+ }
+
+ /* At this point the parent process has terminated completely. */
+ _exit(0);
+}
+
+/*
+ * Allocate a new stack of a reasonable size, and return a pointer to its top.
+ */
+static void *new_stack_for_clone(void)
+{
+ size_t stack_size = CLONE_STACK_SIZE;
+ char *stack_ptr;
+
+ /* Allocate a new stack and get a pointer to its top. */
+ stack_ptr = qemu_alloc_stack(&stack_size);
+#if !defined(HOST_HPPA)
+ /* The top is at the end of the area, except on HPPA. */
+ stack_ptr += stack_size;
+#endif
+
+ return stack_ptr;
+}
+
+/*
+ * Block all signals, start (clone) a new process sharing the address space
+ * with qemu (CLONE_VM), then restore signals.
+ */
+void init_async_teardown(void)
+{
+ sigset_t all_signals, old_signals;
+
+ the_ppid = getpid();
+
+ sigfillset(&all_signals);
+ sigprocmask(SIG_BLOCK, &all_signals, &old_signals);
+ clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL);
+ sigprocmask(SIG_SETMASK, &old_signals, NULL);
+}
diff --git a/util/log.c b/util/log.c
index d6eb0378c3..39866bdaf2 100644
--- a/util/log.c
+++ b/util/log.c
@@ -42,6 +42,7 @@ static QemuMutex global_mutex;
static char *global_filename;
static FILE *global_file;
static __thread FILE *thread_file;
+static __thread Notifier qemu_log_thread_cleanup_notifier;
int qemu_loglevel;
static bool log_append;
@@ -77,6 +78,12 @@ static int log_thread_id(void)
#endif
}
+static void qemu_log_thread_cleanup(Notifier *n, void *unused)
+{
+ fclose(thread_file);
+ thread_file = NULL;
+}
+
/* Lock/unlock output. */
FILE *qemu_log_trylock(void)
@@ -93,6 +100,8 @@ FILE *qemu_log_trylock(void)
return NULL;
}
thread_file = logfile;
+ qemu_log_thread_cleanup_notifier.notify = qemu_log_thread_cleanup;
+ qemu_thread_atexit_add(&qemu_log_thread_cleanup_notifier);
} else {
rcu_read_lock();
/*
diff --git a/util/meson.build b/util/meson.build
index c0a7bc54d4..59c1f467bb 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -3,6 +3,7 @@ util_ss.add(files('thread-context.c'), numa)
if not config_host_data.get('CONFIG_ATOMIC64')
util_ss.add(files('atomic64.c'))
endif
+util_ss.add(when: 'CONFIG_LINUX', if_true: files('async-teardown.c'))
util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
if config_host_data.get('CONFIG_EPOLL_CREATE1')