diff options
author | Anthony Liguori <aliguori@amazon.com> | 2013-10-18 10:01:49 -0700 |
---|---|---|
committer | Anthony Liguori <aliguori@amazon.com> | 2013-10-18 10:01:49 -0700 |
commit | 989644915c281ac83f06f65923d716272ede1ed8 (patch) | |
tree | 574ac7adcfdb572fd735b7f509c6ba95aabe4b95 | |
parent | 1cb9b64df380f232bcd142ab27c085cff0add1d8 (diff) | |
parent | 041603fe5d4537cd165941f96bd76a31f7f662fd (diff) |
Merge remote-tracking branch 'bonzini/iommu-for-anthony' into staging
# By Paolo Bonzini (10) and others
# Via Paolo Bonzini
* bonzini/iommu-for-anthony:
exec: remove qemu_safe_ram_ptr
icount: make it thread-safe
icount: document (future) locking rules for icount
icount: prepare the code for future races in calling qemu_clock_warp
icount: reorganize icount_warp_rt
icount: use cpu_get_icount() directly
timer: add timer_mod_anticipate and timer_mod_anticipate_ns
timer: extract timer_mod_ns_locked and timerlist_rearm
timer: make qemu_clock_enable sync between disable and timer's cb
qemu-thread: add QemuEvent
timer: protect timers_state's clock with seqlock
seqlock: introduce read-write seqlock
vga: Mark relevant portio lists regions as coalesced MMIO flushing
cirrus: Mark vga io region as coalesced MMIO flushing
portio: Allow to mark portio lists as coalesced MMIO flushing
compatfd: switch to QemuThread
memory: fix 128 arithmetic in info mtree
Message-id: 1382024935-28297-1-git-send-email-pbonzini@redhat.com
Signed-off-by: Anthony Liguori <aliguori@amazon.com>
-rw-r--r-- | cpus.c | 144 | ||||
-rw-r--r-- | exec.c | 97 | ||||
-rw-r--r-- | hw/display/cirrus_vga.c | 3 | ||||
-rw-r--r-- | hw/display/qxl.c | 1 | ||||
-rw-r--r-- | hw/display/vga.c | 5 | ||||
-rw-r--r-- | include/exec/ioport.h | 2 | ||||
-rw-r--r-- | include/qemu/seqlock.h | 72 | ||||
-rw-r--r-- | include/qemu/thread-posix.h | 8 | ||||
-rw-r--r-- | include/qemu/thread-win32.h | 4 | ||||
-rw-r--r-- | include/qemu/thread.h | 7 | ||||
-rw-r--r-- | include/qemu/timer.h | 34 | ||||
-rw-r--r-- | ioport.c | 9 | ||||
-rw-r--r-- | memory.c | 4 | ||||
-rw-r--r-- | qemu-timer.c | 97 | ||||
-rw-r--r-- | util/compatfd.c | 16 | ||||
-rw-r--r-- | util/qemu-thread-posix.c | 116 | ||||
-rw-r--r-- | util/qemu-thread-win32.c | 26 |
17 files changed, 502 insertions, 143 deletions
@@ -37,6 +37,7 @@ #include "sysemu/qtest.h" #include "qemu/main-loop.h" #include "qemu/bitmap.h" +#include "qemu/seqlock.h" #ifndef _WIN32 #include "qemu/compatfd.h" @@ -97,21 +98,32 @@ static bool all_cpu_threads_idle(void) /***********************************************************/ /* guest cycle counter */ +/* Protected by TimersState seqlock */ + +/* Compensate for varying guest execution speed. */ +static int64_t qemu_icount_bias; +static int64_t vm_clock_warp_start; /* Conversion factor from emulated instructions to virtual clock ticks. */ static int icount_time_shift; /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ #define MAX_ICOUNT_SHIFT 10 -/* Compensate for varying guest execution speed. */ -static int64_t qemu_icount_bias; + +/* Only written by TCG thread */ +static int64_t qemu_icount; + static QEMUTimer *icount_rt_timer; static QEMUTimer *icount_vm_timer; static QEMUTimer *icount_warp_timer; -static int64_t vm_clock_warp_start; -static int64_t qemu_icount; typedef struct TimersState { + /* Protected by BQL. */ int64_t cpu_ticks_prev; int64_t cpu_ticks_offset; + + /* cpu_clock_offset can be read out of BQL, so protect it with + * this lock. + */ + QemuSeqLock vm_clock_seqlock; int64_t cpu_clock_offset; int32_t cpu_ticks_enabled; int64_t dummy; @@ -120,7 +132,7 @@ typedef struct TimersState { static TimersState timers_state; /* Return the virtual CPU time, based on the instruction counter. */ -int64_t cpu_get_icount(void) +static int64_t cpu_get_icount_locked(void) { int64_t icount; CPUState *cpu = current_cpu; @@ -136,7 +148,21 @@ int64_t cpu_get_icount(void) return qemu_icount_bias + (icount << icount_time_shift); } +int64_t cpu_get_icount(void) +{ + int64_t icount; + unsigned start; + + do { + start = seqlock_read_begin(&timers_state.vm_clock_seqlock); + icount = cpu_get_icount_locked(); + } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); + + return icount; +} + /* return the host CPU cycle counter and handle stop/restart */ +/* Caller must hold the BQL */ int64_t cpu_get_ticks(void) { if (use_icount) { @@ -157,37 +183,63 @@ int64_t cpu_get_ticks(void) } } -/* return the host CPU monotonic timer and handle stop/restart */ -int64_t cpu_get_clock(void) +static int64_t cpu_get_clock_locked(void) { int64_t ti; + if (!timers_state.cpu_ticks_enabled) { - return timers_state.cpu_clock_offset; + ti = timers_state.cpu_clock_offset; } else { ti = get_clock(); - return ti + timers_state.cpu_clock_offset; + ti += timers_state.cpu_clock_offset; } + + return ti; } -/* enable cpu_get_ticks() */ +/* return the host CPU monotonic timer and handle stop/restart */ +int64_t cpu_get_clock(void) +{ + int64_t ti; + unsigned start; + + do { + start = seqlock_read_begin(&timers_state.vm_clock_seqlock); + ti = cpu_get_clock_locked(); + } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); + + return ti; +} + +/* enable cpu_get_ticks() + * Caller must hold BQL which server as mutex for vm_clock_seqlock. + */ void cpu_enable_ticks(void) { + /* Here, the really thing protected by seqlock is cpu_clock_offset. */ + seqlock_write_lock(&timers_state.vm_clock_seqlock); if (!timers_state.cpu_ticks_enabled) { timers_state.cpu_ticks_offset -= cpu_get_real_ticks(); timers_state.cpu_clock_offset -= get_clock(); timers_state.cpu_ticks_enabled = 1; } + seqlock_write_unlock(&timers_state.vm_clock_seqlock); } /* disable cpu_get_ticks() : the clock is stopped. You must not call - cpu_get_ticks() after that. */ + * cpu_get_ticks() after that. + * Caller must hold BQL which server as mutex for vm_clock_seqlock. + */ void cpu_disable_ticks(void) { + /* Here, the really thing protected by seqlock is cpu_clock_offset. */ + seqlock_write_lock(&timers_state.vm_clock_seqlock); if (timers_state.cpu_ticks_enabled) { timers_state.cpu_ticks_offset = cpu_get_ticks(); - timers_state.cpu_clock_offset = cpu_get_clock(); + timers_state.cpu_clock_offset = cpu_get_clock_locked(); timers_state.cpu_ticks_enabled = 0; } + seqlock_write_unlock(&timers_state.vm_clock_seqlock); } /* Correlation between real and virtual time is always going to be @@ -201,13 +253,19 @@ static void icount_adjust(void) int64_t cur_time; int64_t cur_icount; int64_t delta; + + /* Protected by TimersState mutex. */ static int64_t last_delta; + /* If the VM is not running, then do nothing. */ if (!runstate_is_running()) { return; } - cur_time = cpu_get_clock(); - cur_icount = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + seqlock_write_lock(&timers_state.vm_clock_seqlock); + cur_time = cpu_get_clock_locked(); + cur_icount = cpu_get_icount_locked(); + delta = cur_icount - cur_time; /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ if (delta > 0 @@ -224,6 +282,7 @@ static void icount_adjust(void) } last_delta = delta; qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift); + seqlock_write_unlock(&timers_state.vm_clock_seqlock); } static void icount_adjust_rt(void *opaque) @@ -248,30 +307,37 @@ static int64_t qemu_icount_round(int64_t count) static void icount_warp_rt(void *opaque) { - if (vm_clock_warp_start == -1) { + /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start + * changes from -1 to another value, so the race here is okay. + */ + if (atomic_read(&vm_clock_warp_start) == -1) { return; } + seqlock_write_lock(&timers_state.vm_clock_seqlock); if (runstate_is_running()) { int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - int64_t warp_delta = clock - vm_clock_warp_start; - if (use_icount == 1) { - qemu_icount_bias += warp_delta; - } else { + int64_t warp_delta; + + warp_delta = clock - vm_clock_warp_start; + if (use_icount == 2) { /* * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too * far ahead of real time. */ - int64_t cur_time = cpu_get_clock(); - int64_t cur_icount = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + int64_t cur_time = cpu_get_clock_locked(); + int64_t cur_icount = cpu_get_icount_locked(); int64_t delta = cur_time - cur_icount; - qemu_icount_bias += MIN(warp_delta, delta); - } - if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); + warp_delta = MIN(warp_delta, delta); } + qemu_icount_bias += warp_delta; } vm_clock_warp_start = -1; + seqlock_write_unlock(&timers_state.vm_clock_seqlock); + + if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { + qemu_clock_notify(QEMU_CLOCK_VIRTUAL); + } } void qtest_clock_warp(int64_t dest) @@ -281,7 +347,10 @@ void qtest_clock_warp(int64_t dest) while (clock < dest) { int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); int64_t warp = MIN(dest - clock, deadline); + seqlock_write_lock(&timers_state.vm_clock_seqlock); qemu_icount_bias += warp; + seqlock_write_unlock(&timers_state.vm_clock_seqlock); + qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); } @@ -290,6 +359,7 @@ void qtest_clock_warp(int64_t dest) void qemu_clock_warp(QEMUClockType type) { + int64_t clock; int64_t deadline; /* @@ -309,8 +379,8 @@ void qemu_clock_warp(QEMUClockType type) * the earliest QEMU_CLOCK_VIRTUAL timer. */ icount_warp_rt(NULL); - if (!all_cpu_threads_idle() || !qemu_clock_has_timers(QEMU_CLOCK_VIRTUAL)) { - timer_del(icount_warp_timer); + timer_del(icount_warp_timer); + if (!all_cpu_threads_idle()) { return; } @@ -319,17 +389,11 @@ void qemu_clock_warp(QEMUClockType type) return; } - vm_clock_warp_start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); /* We want to use the earliest deadline from ALL vm_clocks */ + clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); - - /* Maintain prior (possibly buggy) behaviour where if no deadline - * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than - * INT32_MAX nanoseconds ahead, we still use INT32_MAX - * nanoseconds. - */ - if ((deadline < 0) || (deadline > INT32_MAX)) { - deadline = INT32_MAX; + if (deadline < 0) { + return; } if (deadline > 0) { @@ -350,7 +414,12 @@ void qemu_clock_warp(QEMUClockType type) * you will not be sending network packets continuously instead of * every 100ms. */ - timer_mod(icount_warp_timer, vm_clock_warp_start + deadline); + seqlock_write_lock(&timers_state.vm_clock_seqlock); + if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) { + vm_clock_warp_start = clock; + } + seqlock_write_unlock(&timers_state.vm_clock_seqlock); + timer_mod_anticipate(icount_warp_timer, clock + deadline); } else if (deadline == 0) { qemu_clock_notify(QEMU_CLOCK_VIRTUAL); } @@ -371,6 +440,7 @@ static const VMStateDescription vmstate_timers = { void configure_icount(const char *option) { + seqlock_init(&timers_state.vm_clock_seqlock, NULL); vmstate_register(NULL, 0, &vmstate_timers, &timers_state); if (!option) { return; @@ -129,7 +129,6 @@ static PhysPageMap next_map; static void io_mem_init(void); static void memory_map_init(void); -static void *qemu_safe_ram_ptr(ram_addr_t addr); static MemoryRegion io_mem_watch; #endif @@ -626,22 +625,39 @@ void cpu_abort(CPUArchState *env, const char *fmt, ...) } #if !defined(CONFIG_USER_ONLY) +static RAMBlock *qemu_get_ram_block(ram_addr_t addr) +{ + RAMBlock *block; + + /* The list is protected by the iothread lock here. */ + block = ram_list.mru_block; + if (block && addr - block->offset < block->length) { + goto found; + } + QTAILQ_FOREACH(block, &ram_list.blocks, next) { + if (addr - block->offset < block->length) { + goto found; + } + } + + fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); + abort(); + +found: + ram_list.mru_block = block; + return block; +} + static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end, uintptr_t length) { - uintptr_t start1; + RAMBlock *block; + ram_addr_t start1; - /* we modify the TLB cache so that the dirty bit will be set again - when accessing the range */ - start1 = (uintptr_t)qemu_safe_ram_ptr(start); - /* Check that we don't span multiple blocks - this breaks the - address comparisons below. */ - if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1 - != (end - 1) - start) { - abort(); - } + block = qemu_get_ram_block(start); + assert(block == qemu_get_ram_block(end - 1)); + start1 = (uintptr_t)block->host + (start - block->offset); cpu_tlb_reset_dirty_all(start1, length); - } /* Note: start and end must be within the same ram block. */ @@ -1269,29 +1285,6 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) } #endif /* !_WIN32 */ -static RAMBlock *qemu_get_ram_block(ram_addr_t addr) -{ - RAMBlock *block; - - /* The list is protected by the iothread lock here. */ - block = ram_list.mru_block; - if (block && addr - block->offset < block->length) { - goto found; - } - QTAILQ_FOREACH(block, &ram_list.blocks, next) { - if (addr - block->offset < block->length) { - goto found; - } - } - - fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); - abort(); - -found: - ram_list.mru_block = block; - return block; -} - /* Return a host pointer to ram allocated with qemu_ram_alloc. With the exception of the softmmu code in this file, this should only be used for local memory (e.g. video ram) that the device owns, @@ -1319,40 +1312,6 @@ void *qemu_get_ram_ptr(ram_addr_t addr) return block->host + (addr - block->offset); } -/* Return a host pointer to ram allocated with qemu_ram_alloc. Same as - * qemu_get_ram_ptr but do not touch ram_list.mru_block. - * - * ??? Is this still necessary? - */ -static void *qemu_safe_ram_ptr(ram_addr_t addr) -{ - RAMBlock *block; - - /* The list is protected by the iothread lock here. */ - QTAILQ_FOREACH(block, &ram_list.blocks, next) { - if (addr - block->offset < block->length) { - if (xen_enabled()) { - /* We need to check if the requested address is in the RAM - * because we don't want to map the entire memory in QEMU. - * In that case just map until the end of the page. - */ - if (block->offset == 0) { - return xen_map_cache(addr, 0, 0); - } else if (block->host == NULL) { - block->host = - xen_map_cache(block->offset, block->length, 1); - } - } - return block->host + (addr - block->offset); - } - } - - fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); - abort(); - - return NULL; -} - /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr * but takes a size argument */ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c index dbd1f4a47b..e4c345fa82 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -2447,7 +2447,6 @@ static uint64_t cirrus_vga_ioport_read(void *opaque, hwaddr addr, VGACommonState *s = &c->vga; int val, index; - qemu_flush_coalesced_mmio_buffer(); addr += 0x3b0; if (vga_ioport_invalid(s, addr)) { @@ -2544,7 +2543,6 @@ static void cirrus_vga_ioport_write(void *opaque, hwaddr addr, uint64_t val, VGACommonState *s = &c->vga; int index; - qemu_flush_coalesced_mmio_buffer(); addr += 0x3b0; /* check port range access depending on color/monochrome mode */ @@ -2843,6 +2841,7 @@ static void cirrus_init_common(CirrusVGAState *s, Object *owner, /* Register ioport 0x3b0 - 0x3df */ memory_region_init_io(&s->cirrus_vga_io, owner, &cirrus_vga_io_ops, s, "cirrus-io", 0x30); + memory_region_set_flush_coalesced(&s->cirrus_vga_io); memory_region_add_subregion(system_io, 0x3b0, &s->cirrus_vga_io); memory_region_init(&s->low_mem_container, owner, diff --git a/hw/display/qxl.c b/hw/display/qxl.c index c2cea1ce88..de835d6af8 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -2073,6 +2073,7 @@ static int qxl_init_primary(PCIDevice *dev) pci_address_space(dev), pci_address_space_io(dev), false); portio_list_init(qxl_vga_port_list, OBJECT(dev), qxl_vga_portio_list, vga, "vga"); + portio_list_set_flush_coalesced(qxl_vga_port_list); portio_list_add(qxl_vga_port_list, pci_address_space_io(dev), 0x3b0); vga->con = graphic_console_init(DEVICE(dev), &qxl_ops, qxl); diff --git a/hw/display/vga.c b/hw/display/vga.c index 7b91d9c54e..b5e22849ab 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -359,8 +359,6 @@ uint32_t vga_ioport_read(void *opaque, uint32_t addr) VGACommonState *s = opaque; int val, index; - qemu_flush_coalesced_mmio_buffer(); - if (vga_ioport_invalid(s, addr)) { val = 0xff; } else { @@ -453,8 +451,6 @@ void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val) VGACommonState *s = opaque; int index; - qemu_flush_coalesced_mmio_buffer(); - /* check port range access depending on color/monochrome mode */ if (vga_ioport_invalid(s, addr)) { return; @@ -2373,6 +2369,7 @@ void vga_init(VGACommonState *s, Object *obj, MemoryRegion *address_space, memory_region_set_coalescing(vga_io_memory); if (init_vga_ports) { portio_list_init(vga_port_list, obj, vga_ports, s, "vga"); + portio_list_set_flush_coalesced(vga_port_list); portio_list_add(vga_port_list, address_space_io, 0x3b0); } if (vbe_ports) { diff --git a/include/exec/ioport.h b/include/exec/ioport.h index b3848be684..3bd6722627 100644 --- a/include/exec/ioport.h +++ b/include/exec/ioport.h @@ -64,11 +64,13 @@ typedef struct PortioList { struct MemoryRegion **regions; void *opaque; const char *name; + bool flush_coalesced_mmio; } PortioList; void portio_list_init(PortioList *piolist, Object *owner, const struct MemoryRegionPortio *callbacks, void *opaque, const char *name); +void portio_list_set_flush_coalesced(PortioList *piolist); void portio_list_destroy(PortioList *piolist); void portio_list_add(PortioList *piolist, struct MemoryRegion *address_space, diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h new file mode 100644 index 0000000000..3ff118a1a1 --- /dev/null +++ b/include/qemu/seqlock.h @@ -0,0 +1,72 @@ +/* + * Seqlock implementation for QEMU + * + * Copyright Red Hat, Inc. 2013 + * + * Author: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#ifndef QEMU_SEQLOCK_H +#define QEMU_SEQLOCK_H 1 + +#include <qemu/atomic.h> +#include <qemu/thread.h> + +typedef struct QemuSeqLock QemuSeqLock; + +struct QemuSeqLock { + QemuMutex *mutex; + unsigned sequence; +}; + +static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex) +{ + sl->mutex = mutex; + sl->sequence = 0; +} + +/* Lock out other writers and update the count. */ +static inline void seqlock_write_lock(QemuSeqLock *sl) +{ + if (sl->mutex) { + qemu_mutex_lock(sl->mutex); + } + ++sl->sequence; + + /* Write sequence before updating other fields. */ + smp_wmb(); +} + +static inline void seqlock_write_unlock(QemuSeqLock *sl) +{ + /* Write other fields before finalizing sequence. */ + smp_wmb(); + + ++sl->sequence; + if (sl->mutex) { + qemu_mutex_unlock(sl->mutex); + } +} + +static inline unsigned seqlock_read_begin(QemuSeqLock *sl) +{ + /* Always fail if a write is in progress. */ + unsigned ret = sl->sequence & ~1; + + /* Read sequence before reading other fields. */ + smp_rmb(); + return ret; +} + +static int seqlock_read_retry(const QemuSeqLock *sl, unsigned start) +{ + /* Read other fields before reading final sequence. */ + smp_rmb(); + return unlikely(sl->sequence != start); +} + +#endif diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h index 361566abc4..eb5c7a1da1 100644 --- a/include/qemu/thread-posix.h +++ b/include/qemu/thread-posix.h @@ -21,6 +21,14 @@ struct QemuSemaphore { #endif }; +struct QemuEvent { +#ifndef __linux__ + pthread_mutex_t lock; + pthread_cond_t cond; +#endif + unsigned value; +}; + struct QemuThread { pthread_t thread; }; diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h index 13adb958f0..3d58081bed 100644 --- a/include/qemu/thread-win32.h +++ b/include/qemu/thread-win32.h @@ -17,6 +17,10 @@ struct QemuSemaphore { HANDLE sema; }; +struct QemuEvent { + HANDLE event; +}; + typedef struct QemuThreadData QemuThreadData; struct QemuThread { QemuThreadData *data; diff --git a/include/qemu/thread.h b/include/qemu/thread.h index c02404b9fb..3e32c6531c 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -7,6 +7,7 @@ typedef struct QemuMutex QemuMutex; typedef struct QemuCond QemuCond; typedef struct QemuSemaphore QemuSemaphore; +typedef struct QemuEvent QemuEvent; typedef struct QemuThread QemuThread; #ifdef _WIN32 @@ -45,6 +46,12 @@ void qemu_sem_wait(QemuSemaphore *sem); int qemu_sem_timedwait(QemuSemaphore *sem, int ms); void qemu_sem_destroy(QemuSemaphore *sem); +void qemu_event_init(QemuEvent *ev, bool init); +void qemu_event_set(QemuEvent *ev); +void qemu_event_reset(QemuEvent *ev); +void qemu_event_wait(QemuEvent *ev); +void qemu_event_destroy(QemuEvent *ev); + void qemu_thread_create(QemuThread *thread, void *(*start_routine)(void *), void *arg, int mode); diff --git a/include/qemu/timer.h b/include/qemu/timer.h index b58903bef5..5afcffc3f9 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -189,6 +189,12 @@ void qemu_clock_notify(QEMUClockType type); * @enabled: true to enable, false to disable * * Enable or disable a clock + * Disabling the clock will wait for related timerlists to stop + * executing qemu_run_timers. Thus, this functions should not + * be used from the callback of a timer that is based on @clock. + * Doing so would cause a deadlock. + * + * Caller should hold BQL. */ void qemu_clock_enable(QEMUClockType type, bool enabled); @@ -539,6 +545,19 @@ void timer_del(QEMUTimer *ts); void timer_mod_ns(QEMUTimer *ts, int64_t expire_time); /** + * timer_mod_anticipate_ns: + * @ts: the timer + * @expire_time: the expiry time in nanoseconds + * + * Modify a timer to expire at @expire_time or the current time, + * whichever comes earlier. + * + * This function is thread-safe but the timer and its timer list must not be + * freed while this function is running. + */ +void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time); + +/** * timer_mod: * @ts: the timer * @expire_time: the expire time in the units associated with the timer @@ -552,6 +571,19 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time); void timer_mod(QEMUTimer *ts, int64_t expire_timer); /** + * timer_mod_anticipate: + * @ts: the timer + * @expire_time: the expiry time in nanoseconds + * + * Modify a timer to expire at @expire_time or the current time, whichever + * comes earlier, taking into account the scale associated with the timer. + * + * This function is thread-safe but the timer and its timer list must not be + * freed while this function is running. + */ +void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time); + +/** * timer_pending: * @ts: the timer * @@ -653,7 +685,9 @@ static inline int64_t qemu_soonest_timeout(int64_t timeout1, int64_t timeout2) void init_clocks(void); int64_t cpu_get_ticks(void); +/* Caller must hold BQL */ void cpu_enable_ticks(void); +/* Caller must hold BQL */ void cpu_disable_ticks(void); static inline int64_t get_ticks_per_sec(void) @@ -139,6 +139,12 @@ void portio_list_init(PortioList *piolist, piolist->opaque = opaque; piolist->owner = owner; piolist->name = name; + piolist->flush_coalesced_mmio = false; +} + +void portio_list_set_flush_coalesced(PortioList *piolist) +{ + piolist->flush_coalesced_mmio = true; } void portio_list_destroy(PortioList *piolist) @@ -231,6 +237,9 @@ static void portio_list_add_1(PortioList *piolist, */ memory_region_init_io(&mrpio->mr, piolist->owner, &portio_ops, mrpio, piolist->name, off_high - off_low); + if (piolist->flush_coalesced_mmio) { + memory_region_set_flush_coalesced(&mrpio->mr); + } memory_region_add_subregion(piolist->address_space, start + off_low, &mrpio->mr); piolist->regions[piolist->nr] = &mrpio->mr; @@ -1809,7 +1809,9 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, mr->alias->name, mr->alias_offset, mr->alias_offset - + (hwaddr)int128_get64(mr->size) - 1); + + (int128_nz(mr->size) ? + (hwaddr)int128_get64(int128_sub(mr->size, + int128_one())) : 0)); } else { mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): %s\n", diff --git a/qemu-timer.c b/qemu-timer.c index 6b62e88669..e15ce477cc 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -45,6 +45,7 @@ /* timers */ typedef struct QEMUClock { + /* We rely on BQL to protect the timerlists */ QLIST_HEAD(, QEMUTimerList) timerlists; NotifierList reset_notifiers; @@ -71,6 +72,9 @@ struct QEMUTimerList { QLIST_ENTRY(QEMUTimerList) list; QEMUTimerListNotifyCB *notify_cb; void *notify_opaque; + + /* lightweight method to mark the end of timerlist's running */ + QemuEvent timers_done_ev; }; /** @@ -99,6 +103,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type, QEMUClock *clock = qemu_clock_ptr(type); timer_list = g_malloc0(sizeof(QEMUTimerList)); + qemu_event_init(&timer_list->timers_done_ev, false); timer_list->clock = clock; timer_list->notify_cb = cb; timer_list->notify_opaque = opaque; @@ -143,13 +148,25 @@ void qemu_clock_notify(QEMUClockType type) } } +/* Disabling the clock will wait for related timerlists to stop + * executing qemu_run_timers. Thus, this functions should not + * be used from the callback of a timer that is based on @clock. + * Doing so would cause a deadlock. + * + * Caller should hold BQL. + */ void qemu_clock_enable(QEMUClockType type, bool enabled) { QEMUClock *clock = qemu_clock_ptr(type); + QEMUTimerList *tl; bool old = clock->enabled; clock->enabled = enabled; if (enabled && !old) { qemu_clock_notify(type); + } else if (!enabled && old) { + QLIST_FOREACH(tl, &clock->timerlists, list) { + qemu_event_wait(&tl->timers_done_ev); + } } } @@ -338,6 +355,34 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) } } +static bool timer_mod_ns_locked(QEMUTimerList *timer_list, + QEMUTimer *ts, int64_t expire_time) +{ + QEMUTimer **pt, *t; + + /* add the timer in the sorted list */ + pt = &timer_list->active_timers; + for (;;) { + t = *pt; + if (!timer_expired_ns(t, expire_time)) { + break; + } + pt = &t->next; + } + ts->expire_time = MAX(expire_time, 0); + ts->next = *pt; + *pt = ts; + + return pt == &timer_list->active_timers; +} + +static void timerlist_rearm(QEMUTimerList *timer_list) +{ + /* Interrupt execution to force deadline recalculation. */ + qemu_clock_warp(timer_list->clock->type); + timerlist_notify(timer_list); +} + /* stop a timer, but do not dealloc it */ void timer_del(QEMUTimer *ts) { @@ -353,30 +398,39 @@ void timer_del(QEMUTimer *ts) void timer_mod_ns(QEMUTimer *ts, int64_t expire_time) { QEMUTimerList *timer_list = ts->timer_list; - QEMUTimer **pt, *t; + bool rearm; qemu_mutex_lock(&timer_list->active_timers_lock); timer_del_locked(timer_list, ts); + rearm = timer_mod_ns_locked(timer_list, ts, expire_time); + qemu_mutex_unlock(&timer_list->active_timers_lock); - /* add the timer in the sorted list */ - pt = &timer_list->active_timers; - for(;;) { - t = *pt; - if (!timer_expired_ns(t, expire_time)) { - break; + if (rearm) { + timerlist_rearm(timer_list); + } +} + +/* modify the current timer so that it will be fired when current_time + >= expire_time or the current deadline, whichever comes earlier. + The corresponding callback will be called. */ +void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time) +{ + QEMUTimerList *timer_list = ts->timer_list; + bool rearm; + + qemu_mutex_lock(&timer_list->active_timers_lock); + if (ts->expire_time == -1 || ts->expire_time > expire_time) { + if (ts->expire_time != -1) { + timer_del_locked(timer_list, ts); } - pt = &t->next; + rearm = timer_mod_ns_locked(timer_list, ts, expire_time); + } else { + rearm = false; } - ts->expire_time = MAX(expire_time, 0); - ts->next = *pt; - *pt = ts; qemu_mutex_unlock(&timer_list->active_timers_lock); - /* Rearm if necessary */ - if (pt == &timer_list->active_timers) { - /* Interrupt execution to force deadline recalculation. */ - qemu_clock_warp(timer_list->clock->type); - timerlist_notify(timer_list); + if (rearm) { + timerlist_rearm(timer_list); } } @@ -385,6 +439,11 @@ void timer_mod(QEMUTimer *ts, int64_t expire_time) timer_mod_ns(ts, expire_time * ts->scale); } +void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time) +{ + timer_mod_anticipate_ns(ts, expire_time * ts->scale); +} + bool timer_pending(QEMUTimer *ts) { return ts->expire_time >= 0; @@ -403,8 +462,9 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) QEMUTimerCB *cb; void *opaque; + qemu_event_reset(&timer_list->timers_done_ev); if (!timer_list->clock->enabled) { - return progress; + goto out; } current_time = qemu_clock_get_ns(timer_list->clock->type); @@ -428,6 +488,9 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) cb(opaque); progress = true; } + +out: + qemu_event_set(&timer_list->timers_done_ev); return progress; } diff --git a/util/compatfd.c b/util/compatfd.c index 9cf3f2834d..430a41c855 100644 --- a/util/compatfd.c +++ b/util/compatfd.c @@ -15,9 +15,9 @@ #include "qemu-common.h" #include "qemu/compatfd.h" +#include "qemu/thread.h" #include <sys/syscall.h> -#include <pthread.h> struct sigfd_compat_info { @@ -28,10 +28,6 @@ struct sigfd_compat_info static void *sigwait_compat(void *opaque) { struct sigfd_compat_info *info = opaque; - sigset_t all; - - sigfillset(&all); - pthread_sigmask(SIG_BLOCK, &all, NULL); while (1) { int sig; @@ -71,9 +67,8 @@ static void *sigwait_compat(void *opaque) static int qemu_signalfd_compat(const sigset_t *mask) { - pthread_attr_t attr; - pthread_t tid; struct sigfd_compat_info *info; + QemuThread thread; int fds[2]; info = malloc(sizeof(*info)); @@ -93,12 +88,7 @@ static int qemu_signalfd_compat(const sigset_t *mask) memcpy(&info->mask, mask, sizeof(*mask)); info->fd = fds[1]; - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - - pthread_create(&tid, &attr, sigwait_compat, info); - - pthread_attr_destroy(&attr); + qemu_thread_create(&thread, sigwait_compat, info, QEMU_THREAD_DETACHED); return fds[0]; } diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index 4de133e7b2..37dd298631 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -20,7 +20,12 @@ #include <limits.h> #include <unistd.h> #include <sys/time.h> +#ifdef __linux__ +#include <sys/syscall.h> +#include <linux/futex.h> +#endif #include "qemu/thread.h" +#include "qemu/atomic.h" static void error_exit(int err, const char *msg) { @@ -272,6 +277,117 @@ void qemu_sem_wait(QemuSemaphore *sem) #endif } +#ifdef __linux__ +#define futex(...) syscall(__NR_futex, __VA_ARGS__) + +static inline void futex_wake(QemuEvent *ev, int n) +{ + futex(ev, FUTEX_WAKE, n, NULL, NULL, 0); +} + +static inline void futex_wait(QemuEvent *ev, unsigned val) +{ + futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0); +} +#else +static inline void futex_wake(QemuEvent *ev, int n) +{ + if (n == 1) { + pthread_cond_signal(&ev->cond); + } else { + pthread_cond_broadcast(&ev->cond); + } +} + +static inline void futex_wait(QemuEvent *ev, unsigned val) +{ + pthread_mutex_lock(&ev->lock); + if (ev->value == val) { + pthread_cond_wait(&ev->cond, &ev->lock); + } + pthread_mutex_unlock(&ev->lock); +} +#endif + +/* Valid transitions: + * - free->set, when setting the event + * - busy->set, when setting the event, followed by futex_wake + * - set->free, when resetting the event + * - free->busy, when waiting + * + * set->busy does not happen (it can be observed from the outside but + * it really is set->free->busy). + * + * busy->free provably cannot happen; to enforce it, the set->free transition + * is done with an OR, which becomes a no-op if the event has concurrently + * transitioned to free or busy. + */ + +#define EV_SET 0 +#define EV_FREE 1 +#define EV_BUSY -1 + +void qemu_event_init(QemuEvent *ev, bool init) +{ +#ifndef __linux__ + pthread_mutex_init(&ev->lock, NULL); + pthread_cond_init(&ev->cond, NULL); +#endif + + ev->value = (init ? EV_SET : EV_FREE); +} + +void qemu_event_destroy(QemuEvent *ev) +{ +#ifndef __linux__ + pthread_mutex_destroy(&ev->lock); + pthread_cond_destroy(&ev->cond); +#endif +} + +void qemu_event_set(QemuEvent *ev) +{ + if (atomic_mb_read(&ev->value) != EV_SET) { + if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) { + /* There were waiters, wake them up. */ + futex_wake(ev, INT_MAX); + } + } +} + +void qemu_event_reset(QemuEvent *ev) +{ + if (atomic_mb_read(&ev->value) == EV_SET) { + /* + * If there was a concurrent reset (or even reset+wait), + * do nothing. Otherwise change EV_SET->EV_FREE. + */ + atomic_or(&ev->value, EV_FREE); + } +} + +void qemu_event_wait(QemuEvent *ev) +{ + unsigned value; + + value = atomic_mb_read(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { + /* + * Leave the event reset and tell qemu_event_set that there + * are waiters. No need to retry, because there cannot be + * a concurent busy->free transition. After the CAS, the + * event will be either set or busy. + */ + if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + return; + } + } + futex_wait(ev, EV_BUSY); + } +} + + void qemu_thread_create(QemuThread *thread, void *(*start_routine)(void*), void *arg, int mode) diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index 517878dcc1..27a5217769 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -227,6 +227,32 @@ void qemu_sem_wait(QemuSemaphore *sem) } } +void qemu_event_init(QemuEvent *ev, bool init) +{ + /* Manual reset. */ + ev->event = CreateEvent(NULL, TRUE, init, NULL); +} + +void qemu_event_destroy(QemuEvent *ev) +{ + CloseHandle(ev->event); +} + +void qemu_event_set(QemuEvent *ev) +{ + SetEvent(ev->event); +} + +void qemu_event_reset(QemuEvent *ev) +{ + ResetEvent(ev->event); +} + +void qemu_event_wait(QemuEvent *ev) +{ + WaitForSingleObject(ev->event, INFINITE); +} + struct QemuThreadData { /* Passed to win32_start_routine. */ void *(*start_routine)(void *); |