aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorEmilio G. Cota <cota@braap.org>2017-08-08 13:53:15 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2018-08-23 18:46:25 +0200
commitfe9959a275fc7b4744e49201a390627b3adda597 (patch)
treeb6d4d8e7b58866e9fa1ffd3a76ec1a1581065cc0 /include
parentc04649eeeaf5f84ba9e43d0c4ffbe1719b0d940c (diff)
qsp: QEMU's Synchronization Profiler
The goal of this module is to profile synchronization primitives (i.e. mutexes, recursive mutexes and condition variables) so that scalability issues can be quickly diagnosed. Sync primitives are profiled by QSP based on the vaddr of the object accessed as well as the call site (file:line_nr). That means the same object called from two different call sites will be tracked in separate entries, which might be reported together or separately (see subsequent commit on call site coalescing). Some perf numbers: Host: Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz Command: taskset -c 0 tests/atomic_add-bench -d 5 -m - Before: 54.80 Mops/s - After: 54.75 Mops/s That is, a negligible slowdown due to the now indirect call to qemu_mutex_lock. Note that using a branch instead of an indirect call introduces a more severe slowdown (53.65 Mops/s, i.e. 2% slowdown). Enabling the profiler (with -p, added in this series) is more interesting: - No profiling: 54.75 Mops/s - W/ profiling: 12.53 Mops/s That is, a 4.36X slowdown. We can break down this slowdown by removing the get_clock calls or the entry lookup: - No profiling: 54.75 Mops/s - W/o get_clock: 25.37 Mops/s - W/o entry lookup: 19.30 Mops/s - W/ profiling: 12.53 Mops/s Signed-off-by: Emilio G. Cota <cota@braap.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'include')
-rw-r--r--include/qemu/qht.h1
-rw-r--r--include/qemu/qsp.h22
-rw-r--r--include/qemu/thread-posix.h4
-rw-r--r--include/qemu/thread-win32.h5
-rw-r--r--include/qemu/thread.h65
5 files changed, 86 insertions, 11 deletions
diff --git a/include/qemu/qht.h b/include/qemu/qht.h
index 1fb9116fa0..c9a11cc29a 100644
--- a/include/qemu/qht.h
+++ b/include/qemu/qht.h
@@ -46,6 +46,7 @@ typedef bool (*qht_lookup_func_t)(const void *obj, const void *userp);
typedef void (*qht_iter_func_t)(struct qht *ht, void *p, uint32_t h, void *up);
#define QHT_MODE_AUTO_RESIZE 0x1 /* auto-resize when heavily loaded */
+#define QHT_MODE_RAW_MUTEXES 0x2 /* bypass the profiler (QSP) */
/**
* qht_init - Initialize a QHT
diff --git a/include/qemu/qsp.h b/include/qemu/qsp.h
new file mode 100644
index 0000000000..9c2bb60ff0
--- /dev/null
+++ b/include/qemu/qsp.h
@@ -0,0 +1,22 @@
+/*
+ * qsp.c - QEMU Synchronization Profiler
+ *
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * Note: this header file can *only* be included from thread.h.
+ */
+#ifndef QEMU_QSP_H
+#define QEMU_QSP_H
+
+#include "qemu/fprintf-fn.h"
+
+void qsp_report(FILE *f, fprintf_function cpu_fprintf, size_t max);
+
+bool qsp_is_enabled(void);
+void qsp_enable(void);
+void qsp_disable(void);
+
+#endif /* QEMU_QSP_H */
diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index fd27b34128..c903525062 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -6,8 +6,8 @@
typedef QemuMutex QemuRecMutex;
#define qemu_rec_mutex_destroy qemu_mutex_destroy
-#define qemu_rec_mutex_lock qemu_mutex_lock
-#define qemu_rec_mutex_trylock qemu_mutex_trylock
+#define qemu_rec_mutex_lock_impl qemu_mutex_lock_impl
+#define qemu_rec_mutex_trylock_impl qemu_mutex_trylock_impl
#define qemu_rec_mutex_unlock qemu_mutex_unlock
struct QemuMutex {
diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
index d668d789b4..50af5dd7ab 100644
--- a/include/qemu/thread-win32.h
+++ b/include/qemu/thread-win32.h
@@ -19,8 +19,9 @@ struct QemuRecMutex {
};
void qemu_rec_mutex_destroy(QemuRecMutex *mutex);
-void qemu_rec_mutex_lock(QemuRecMutex *mutex);
-int qemu_rec_mutex_trylock(QemuRecMutex *mutex);
+void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line);
+int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file,
+ int line);
void qemu_rec_mutex_unlock(QemuRecMutex *mutex);
struct QemuCond {
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index ef7bd16123..c90ea4783f 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -16,6 +16,9 @@ typedef struct QemuThread QemuThread;
#include "qemu/thread-posix.h"
#endif
+/* include QSP header once QemuMutex, QemuCond etc. are defined */
+#include "qemu/qsp.h"
+
#define QEMU_THREAD_JOINABLE 0
#define QEMU_THREAD_DETACHED 1
@@ -25,10 +28,51 @@ int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line);
void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line);
void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line);
-#define qemu_mutex_lock(mutex) \
- qemu_mutex_lock_impl(mutex, __FILE__, __LINE__)
-#define qemu_mutex_trylock(mutex) \
- qemu_mutex_trylock_impl(mutex, __FILE__, __LINE__)
+typedef void (*QemuMutexLockFunc)(QemuMutex *m, const char *f, int l);
+typedef int (*QemuMutexTrylockFunc)(QemuMutex *m, const char *f, int l);
+typedef void (*QemuRecMutexLockFunc)(QemuRecMutex *m, const char *f, int l);
+typedef int (*QemuRecMutexTrylockFunc)(QemuRecMutex *m, const char *f, int l);
+typedef void (*QemuCondWaitFunc)(QemuCond *c, QemuMutex *m, const char *f,
+ int l);
+
+extern QemuMutexLockFunc qemu_mutex_lock_func;
+extern QemuMutexTrylockFunc qemu_mutex_trylock_func;
+extern QemuRecMutexLockFunc qemu_rec_mutex_lock_func;
+extern QemuRecMutexTrylockFunc qemu_rec_mutex_trylock_func;
+extern QemuCondWaitFunc qemu_cond_wait_func;
+
+/* convenience macros to bypass the profiler */
+#define qemu_mutex_lock__raw(m) \
+ qemu_mutex_lock_impl(m, __FILE__, __LINE__)
+#define qemu_mutex_trylock__raw(m) \
+ qemu_mutex_trylock_impl(m, __FILE__, __LINE__)
+
+#define qemu_mutex_lock(m) ({ \
+ QemuMutexLockFunc _f = atomic_read(&qemu_mutex_lock_func); \
+ _f(m, __FILE__, __LINE__); \
+ })
+
+#define qemu_mutex_trylock(m) ({ \
+ QemuMutexTrylockFunc _f = atomic_read(&qemu_mutex_trylock_func); \
+ _f(m, __FILE__, __LINE__); \
+ })
+
+#define qemu_rec_mutex_lock(m) ({ \
+ QemuRecMutexLockFunc _f = atomic_read(&qemu_rec_mutex_lock_func); \
+ _f(m, __FILE__, __LINE__); \
+ })
+
+#define qemu_rec_mutex_trylock(m) ({ \
+ QemuRecMutexTrylockFunc _f; \
+ _f = atomic_read(&qemu_rec_mutex_trylock_func); \
+ _f(m, __FILE__, __LINE__); \
+ })
+
+#define qemu_cond_wait(c, m) ({ \
+ QemuCondWaitFunc _f = atomic_read(&qemu_cond_wait_func); \
+ _f(c, m, __FILE__, __LINE__); \
+ })
+
#define qemu_mutex_unlock(mutex) \
qemu_mutex_unlock_impl(mutex, __FILE__, __LINE__)
@@ -47,6 +91,16 @@ static inline void (qemu_mutex_unlock)(QemuMutex *mutex)
qemu_mutex_unlock(mutex);
}
+static inline void (qemu_rec_mutex_lock)(QemuRecMutex *mutex)
+{
+ qemu_rec_mutex_lock(mutex);
+}
+
+static inline int (qemu_rec_mutex_trylock)(QemuRecMutex *mutex)
+{
+ return qemu_rec_mutex_trylock(mutex);
+}
+
/* Prototypes for other functions are in thread-posix.h/thread-win32.h. */
void qemu_rec_mutex_init(QemuRecMutex *mutex);
@@ -63,9 +117,6 @@ void qemu_cond_broadcast(QemuCond *cond);
void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex,
const char *file, const int line);
-#define qemu_cond_wait(cond, mutex) \
- qemu_cond_wait_impl(cond, mutex, __FILE__, __LINE__)
-
static inline void (qemu_cond_wait)(QemuCond *cond, QemuMutex *mutex)
{
qemu_cond_wait(cond, mutex);