diff options
80 files changed, 1831 insertions, 395 deletions
diff --git a/aio-posix.c b/aio-posix.c index 15855715d4..9453d83743 100644 --- a/aio-posix.c +++ b/aio-posix.c @@ -16,7 +16,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block.h" -#include "qemu/queue.h" +#include "qemu/rcu_queue.h" #include "qemu/sockets.h" #include "qemu/cutils.h" #include "trace.h" @@ -66,7 +66,7 @@ static bool aio_epoll_try_enable(AioContext *ctx) AioHandler *node; struct epoll_event event; - QLIST_FOREACH(node, &ctx->aio_handlers, node) { + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { int r; if (node->deleted || !node->pfd.events) { continue; @@ -212,24 +212,27 @@ void aio_set_fd_handler(AioContext *ctx, bool is_new = false; bool deleted = false; + qemu_lockcnt_lock(&ctx->list_lock); + node = find_aio_handler(ctx, fd); /* Are we deleting the fd handler? */ if (!io_read && !io_write && !io_poll) { if (node == NULL) { + qemu_lockcnt_unlock(&ctx->list_lock); return; } g_source_remove_poll(&ctx->source, &node->pfd); /* If the lock is held, just mark the node as deleted */ - if (ctx->walking_handlers) { + if (qemu_lockcnt_count(&ctx->list_lock)) { node->deleted = 1; node->pfd.revents = 0; } else { /* Otherwise, delete it for real. We can't just mark it as - * deleted because deleted nodes are only cleaned up after - * releasing the walking_handlers lock. + * deleted because deleted nodes are only cleaned up while + * no one is walking the handlers list. */ QLIST_REMOVE(node, node); deleted = true; @@ -243,7 +246,7 @@ void aio_set_fd_handler(AioContext *ctx, /* Alloc and insert if it's not already there */ node = g_new0(AioHandler, 1); node->pfd.fd = fd; - QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); + QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node); g_source_add_poll(&ctx->source, &node->pfd); is_new = true; @@ -265,6 +268,7 @@ void aio_set_fd_handler(AioContext *ctx, } aio_epoll_update(ctx, node, is_new); + qemu_lockcnt_unlock(&ctx->list_lock); aio_notify(ctx); if (deleted) { @@ -316,8 +320,8 @@ static void poll_set_started(AioContext *ctx, bool started) ctx->poll_started = started; - ctx->walking_handlers++; - QLIST_FOREACH(node, &ctx->aio_handlers, node) { + qemu_lockcnt_inc(&ctx->list_lock); + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { IOHandler *fn; if (node->deleted) { @@ -334,7 +338,7 @@ static void poll_set_started(AioContext *ctx, bool started) fn(node->opaque); } } - ctx->walking_handlers--; + qemu_lockcnt_dec(&ctx->list_lock); } @@ -349,54 +353,47 @@ bool aio_prepare(AioContext *ctx) bool aio_pending(AioContext *ctx) { AioHandler *node; + bool result = false; - QLIST_FOREACH(node, &ctx->aio_handlers, node) { + /* + * We have to walk very carefully in case aio_set_fd_handler is + * called while we're walking. + */ + qemu_lockcnt_inc(&ctx->list_lock); + + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { int revents; revents = node->pfd.revents & node->pfd.events; if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read && aio_node_check(ctx, node->is_external)) { - return true; + result = true; + break; } if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write && aio_node_check(ctx, node->is_external)) { - return true; + result = true; + break; } } + qemu_lockcnt_dec(&ctx->list_lock); - return false; + return result; } -/* - * Note that dispatch_fds == false has the side-effect of post-poning the - * freeing of deleted handlers. - */ -bool aio_dispatch(AioContext *ctx, bool dispatch_fds) +static bool aio_dispatch_handlers(AioContext *ctx) { - AioHandler *node = NULL; + AioHandler *node, *tmp; bool progress = false; /* - * If there are callbacks left that have been queued, we need to call them. - * Do not call select in this case, because it is possible that the caller - * does not need a complete flush (as is the case for aio_poll loops). - */ - if (aio_bh_poll(ctx)) { - progress = true; - } - - /* * We have to walk very carefully in case aio_set_fd_handler is * called while we're walking. */ - if (dispatch_fds) { - node = QLIST_FIRST(&ctx->aio_handlers); - } - while (node) { - AioHandler *tmp; - int revents; + qemu_lockcnt_inc(&ctx->list_lock); - ctx->walking_handlers++; + QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) { + int revents; revents = node->pfd.revents & node->pfd.events; node->pfd.revents = 0; @@ -420,15 +417,36 @@ bool aio_dispatch(AioContext *ctx, bool dispatch_fds) progress = true; } - tmp = node; - node = QLIST_NEXT(node, node); + if (node->deleted) { + if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { + QLIST_REMOVE(node, node); + g_free(node); + qemu_lockcnt_inc_and_unlock(&ctx->list_lock); + } + } + } - ctx->walking_handlers--; + qemu_lockcnt_dec(&ctx->list_lock); + return progress; +} - if (!ctx->walking_handlers && tmp->deleted) { - QLIST_REMOVE(tmp, node); - g_free(tmp); - } +/* + * Note that dispatch_fds == false has the side-effect of post-poning the + * freeing of deleted handlers. + */ +bool aio_dispatch(AioContext *ctx, bool dispatch_fds) +{ + bool progress; + + /* + * If there are callbacks left that have been queued, we need to call them. + * Do not call select in this case, because it is possible that the caller + * does not need a complete flush (as is the case for aio_poll loops). + */ + progress = aio_bh_poll(ctx); + + if (dispatch_fds) { + progress |= aio_dispatch_handlers(ctx); } /* Run our timers */ @@ -488,7 +506,7 @@ static bool run_poll_handlers_once(AioContext *ctx) bool progress = false; AioHandler *node; - QLIST_FOREACH(node, &ctx->aio_handlers, node) { + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { if (!node->deleted && node->io_poll && node->io_poll(node->opaque)) { progress = true; @@ -509,7 +527,7 @@ static bool run_poll_handlers_once(AioContext *ctx) * Note that ctx->notify_me must be non-zero so this function can detect * aio_notify(). * - * Note that the caller must have incremented ctx->walking_handlers. + * Note that the caller must have incremented ctx->list_lock. * * Returns: true if progress was made, false otherwise */ @@ -519,7 +537,7 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) int64_t end_time; assert(ctx->notify_me); - assert(ctx->walking_handlers > 0); + assert(qemu_lockcnt_count(&ctx->list_lock) > 0); assert(ctx->poll_disable_cnt == 0); trace_run_poll_handlers_begin(ctx, max_ns); @@ -541,7 +559,7 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) * * ctx->notify_me must be non-zero so this function can detect aio_notify(). * - * Note that the caller must have incremented ctx->walking_handlers. + * Note that the caller must have incremented ctx->list_lock. * * Returns: true if progress was made, false otherwise */ @@ -592,7 +610,7 @@ bool aio_poll(AioContext *ctx, bool blocking) atomic_add(&ctx->notify_me, 2); } - ctx->walking_handlers++; + qemu_lockcnt_inc(&ctx->list_lock); if (ctx->poll_max_ns) { start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); @@ -606,7 +624,7 @@ bool aio_poll(AioContext *ctx, bool blocking) /* fill pollfds */ if (!aio_epoll_enabled(ctx)) { - QLIST_FOREACH(node, &ctx->aio_handlers, node) { + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { if (!node->deleted && node->pfd.events && aio_node_check(ctx, node->is_external)) { add_pollfd(node); @@ -691,7 +709,7 @@ bool aio_poll(AioContext *ctx, bool blocking) } npfd = 0; - ctx->walking_handlers--; + qemu_lockcnt_dec(&ctx->list_lock); /* Run dispatch even if there were no readable fds to run timers */ if (aio_dispatch(ctx, ret > 0)) { diff --git a/aio-win32.c b/aio-win32.c index d19dc429d8..900524c9c2 100644 --- a/aio-win32.c +++ b/aio-win32.c @@ -21,6 +21,7 @@ #include "qemu/queue.h" #include "qemu/sockets.h" #include "qapi/error.h" +#include "qemu/rcu_queue.h" struct AioHandler { EventNotifier *e; @@ -45,6 +46,7 @@ void aio_set_fd_handler(AioContext *ctx, /* fd is a SOCKET in our case */ AioHandler *node; + qemu_lockcnt_lock(&ctx->list_lock); QLIST_FOREACH(node, &ctx->aio_handlers, node) { if (node->pfd.fd == fd && !node->deleted) { break; @@ -54,14 +56,14 @@ void aio_set_fd_handler(AioContext *ctx, /* Are we deleting the fd handler? */ if (!io_read && !io_write) { if (node) { - /* If the lock is held, just mark the node as deleted */ - if (ctx->walking_handlers) { + /* If aio_poll is in progress, just mark the node as deleted */ + if (qemu_lockcnt_count(&ctx->list_lock)) { node->deleted = 1; node->pfd.revents = 0; } else { /* Otherwise, delete it for real. We can't just mark it as * deleted because deleted nodes are only cleaned up after - * releasing the walking_handlers lock. + * releasing the list_lock. */ QLIST_REMOVE(node, node); g_free(node); @@ -74,7 +76,7 @@ void aio_set_fd_handler(AioContext *ctx, /* Alloc and insert if it's not already there */ node = g_new0(AioHandler, 1); node->pfd.fd = fd; - QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); + QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node); } node->pfd.events = 0; @@ -99,6 +101,7 @@ void aio_set_fd_handler(AioContext *ctx, FD_CONNECT | FD_WRITE | FD_OOB); } + qemu_lockcnt_unlock(&ctx->list_lock); aio_notify(ctx); } @@ -117,6 +120,7 @@ void aio_set_event_notifier(AioContext *ctx, { AioHandler *node; + qemu_lockcnt_lock(&ctx->list_lock); QLIST_FOREACH(node, &ctx->aio_handlers, node) { if (node->e == e && !node->deleted) { break; @@ -128,14 +132,14 @@ void aio_set_event_notifier(AioContext *ctx, if (node) { g_source_remove_poll(&ctx->source, &node->pfd); - /* If the lock is held, just mark the node as deleted */ - if (ctx->walking_handlers) { + /* aio_poll is in progress, just mark the node as deleted */ + if (qemu_lockcnt_count(&ctx->list_lock)) { node->deleted = 1; node->pfd.revents = 0; } else { /* Otherwise, delete it for real. We can't just mark it as * deleted because deleted nodes are only cleaned up after - * releasing the walking_handlers lock. + * releasing the list_lock. */ QLIST_REMOVE(node, node); g_free(node); @@ -149,7 +153,7 @@ void aio_set_event_notifier(AioContext *ctx, node->pfd.fd = (uintptr_t)event_notifier_get_handle(e); node->pfd.events = G_IO_IN; node->is_external = is_external; - QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); + QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node); g_source_add_poll(&ctx->source, &node->pfd); } @@ -157,6 +161,7 @@ void aio_set_event_notifier(AioContext *ctx, node->io_notify = io_notify; } + qemu_lockcnt_unlock(&ctx->list_lock); aio_notify(ctx); } @@ -175,10 +180,16 @@ bool aio_prepare(AioContext *ctx) bool have_select_revents = false; fd_set rfds, wfds; + /* + * We have to walk very carefully in case aio_set_fd_handler is + * called while we're walking. + */ + qemu_lockcnt_inc(&ctx->list_lock); + /* fill fd sets */ FD_ZERO(&rfds); FD_ZERO(&wfds); - QLIST_FOREACH(node, &ctx->aio_handlers, node) { + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { if (node->io_read) { FD_SET ((SOCKET)node->pfd.fd, &rfds); } @@ -188,7 +199,7 @@ bool aio_prepare(AioContext *ctx) } if (select(0, &rfds, &wfds, NULL, &tv0) > 0) { - QLIST_FOREACH(node, &ctx->aio_handlers, node) { + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { node->pfd.revents = 0; if (FD_ISSET(node->pfd.fd, &rfds)) { node->pfd.revents |= G_IO_IN; @@ -202,45 +213,55 @@ bool aio_prepare(AioContext *ctx) } } + qemu_lockcnt_dec(&ctx->list_lock); return have_select_revents; } bool aio_pending(AioContext *ctx) { AioHandler *node; + bool result = false; - QLIST_FOREACH(node, &ctx->aio_handlers, node) { + /* + * We have to walk very carefully in case aio_set_fd_handler is + * called while we're walking. + */ + qemu_lockcnt_inc(&ctx->list_lock); + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { if (node->pfd.revents && node->io_notify) { - return true; + result = true; + break; } if ((node->pfd.revents & G_IO_IN) && node->io_read) { - return true; + result = true; + break; } if ((node->pfd.revents & G_IO_OUT) && node->io_write) { - return true; + result = true; + break; } } - return false; + qemu_lockcnt_dec(&ctx->list_lock); + return result; } static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event) { AioHandler *node; bool progress = false; + AioHandler *tmp; + + qemu_lockcnt_inc(&ctx->list_lock); /* * We have to walk very carefully in case aio_set_fd_handler is * called while we're walking. */ - node = QLIST_FIRST(&ctx->aio_handlers); - while (node) { - AioHandler *tmp; + QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) { int revents = node->pfd.revents; - ctx->walking_handlers++; - if (!node->deleted && (revents || event_notifier_get_handle(node->e) == event) && node->io_notify) { @@ -275,17 +296,16 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event) } } - tmp = node; - node = QLIST_NEXT(node, node); - - ctx->walking_handlers--; - - if (!ctx->walking_handlers && tmp->deleted) { - QLIST_REMOVE(tmp, node); - g_free(tmp); + if (node->deleted) { + if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { + QLIST_REMOVE(node, node); + g_free(node); + qemu_lockcnt_inc_and_unlock(&ctx->list_lock); + } } } + qemu_lockcnt_dec(&ctx->list_lock); return progress; } @@ -323,20 +343,19 @@ bool aio_poll(AioContext *ctx, bool blocking) atomic_add(&ctx->notify_me, 2); } + qemu_lockcnt_inc(&ctx->list_lock); have_select_revents = aio_prepare(ctx); - ctx->walking_handlers++; - /* fill fd sets */ count = 0; - QLIST_FOREACH(node, &ctx->aio_handlers, node) { + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { if (!node->deleted && node->io_notify && aio_node_check(ctx, node->is_external)) { events[count++] = event_notifier_get_handle(node->e); } } - ctx->walking_handlers--; + qemu_lockcnt_dec(&ctx->list_lock); first = true; /* ctx->notifier is always registered. */ @@ -53,14 +53,14 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) .cb = cb, .opaque = opaque, }; - qemu_mutex_lock(&ctx->bh_lock); + qemu_lockcnt_lock(&ctx->list_lock); bh->next = ctx->first_bh; bh->scheduled = 1; bh->deleted = 1; /* Make sure that the members are ready before putting bh into list */ smp_wmb(); ctx->first_bh = bh; - qemu_mutex_unlock(&ctx->bh_lock); + qemu_lockcnt_unlock(&ctx->list_lock); aio_notify(ctx); } @@ -73,12 +73,12 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) .cb = cb, .opaque = opaque, }; - qemu_mutex_lock(&ctx->bh_lock); + qemu_lockcnt_lock(&ctx->list_lock); bh->next = ctx->first_bh; /* Make sure that the members are ready before putting bh into list */ smp_wmb(); ctx->first_bh = bh; - qemu_mutex_unlock(&ctx->bh_lock); + qemu_lockcnt_unlock(&ctx->list_lock); return bh; } @@ -92,14 +92,13 @@ int aio_bh_poll(AioContext *ctx) { QEMUBH *bh, **bhp, *next; int ret; + bool deleted = false; - ctx->walking_bh++; + qemu_lockcnt_inc(&ctx->list_lock); ret = 0; - for (bh = ctx->first_bh; bh; bh = next) { - /* Make sure that fetching bh happens before accessing its members */ - smp_read_barrier_depends(); - next = bh->next; + for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) { + next = atomic_rcu_read(&bh->next); /* The atomic_xchg is paired with the one in qemu_bh_schedule. The * implicit memory barrier ensures that the callback sees all writes * done by the scheduling thread. It also ensures that the scheduling @@ -114,13 +113,18 @@ int aio_bh_poll(AioContext *ctx) bh->idle = 0; aio_bh_call(bh); } + if (bh->deleted) { + deleted = true; + } } - ctx->walking_bh--; - /* remove deleted bhs */ - if (!ctx->walking_bh) { - qemu_mutex_lock(&ctx->bh_lock); + if (!deleted) { + qemu_lockcnt_dec(&ctx->list_lock); + return ret; + } + + if (qemu_lockcnt_dec_and_lock(&ctx->list_lock)) { bhp = &ctx->first_bh; while (*bhp) { bh = *bhp; @@ -131,9 +135,8 @@ int aio_bh_poll(AioContext *ctx) bhp = &bh->next; } } - qemu_mutex_unlock(&ctx->bh_lock); + qemu_lockcnt_unlock(&ctx->list_lock); } - return ret; } @@ -187,7 +190,8 @@ aio_compute_timeout(AioContext *ctx) int timeout = -1; QEMUBH *bh; - for (bh = ctx->first_bh; bh; bh = bh->next) { + for (bh = atomic_rcu_read(&ctx->first_bh); bh; + bh = atomic_rcu_read(&bh->next)) { if (bh->scheduled) { if (bh->idle) { /* idle bottom halves will be polled at least @@ -270,7 +274,8 @@ aio_ctx_finalize(GSource *source) } #endif - qemu_mutex_lock(&ctx->bh_lock); + qemu_lockcnt_lock(&ctx->list_lock); + assert(!qemu_lockcnt_count(&ctx->list_lock)); while (ctx->first_bh) { QEMUBH *next = ctx->first_bh->next; @@ -280,12 +285,12 @@ aio_ctx_finalize(GSource *source) g_free(ctx->first_bh); ctx->first_bh = next; } - qemu_mutex_unlock(&ctx->bh_lock); + qemu_lockcnt_unlock(&ctx->list_lock); aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL); event_notifier_cleanup(&ctx->notifier); qemu_rec_mutex_destroy(&ctx->lock); - qemu_mutex_destroy(&ctx->bh_lock); + qemu_lockcnt_destroy(&ctx->list_lock); timerlistgroup_deinit(&ctx->tlg); } @@ -372,6 +377,7 @@ AioContext *aio_context_new(Error **errp) goto fail; } g_source_set_can_recurse(&ctx->source, true); + qemu_lockcnt_init(&ctx->list_lock); aio_set_event_notifier(ctx, &ctx->notifier, false, (EventNotifierHandler *) @@ -381,7 +387,6 @@ AioContext *aio_context_new(Error **errp) ctx->linux_aio = NULL; #endif ctx->thread_pool = NULL; - qemu_mutex_init(&ctx->bh_lock); qemu_rec_mutex_init(&ctx->lock); timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); diff --git a/block/io.c b/block/io.c index 4f005623f7..c42b34a965 100644 --- a/block/io.c +++ b/block/io.c @@ -228,9 +228,7 @@ void bdrv_drained_begin(BlockDriverState *bs) bdrv_parent_drained_begin(bs); } - bdrv_io_unplugged_begin(bs); bdrv_drain_recurse(bs); - bdrv_io_unplugged_end(bs); } void bdrv_drained_end(BlockDriverState *bs) @@ -302,7 +300,6 @@ void bdrv_drain_all_begin(void) aio_context_acquire(aio_context); bdrv_parent_drained_begin(bs); - bdrv_io_unplugged_begin(bs); aio_disable_external(aio_context); aio_context_release(aio_context); @@ -347,7 +344,6 @@ void bdrv_drain_all_end(void) aio_context_acquire(aio_context); aio_enable_external(aio_context); - bdrv_io_unplugged_end(bs); bdrv_parent_drained_end(bs); aio_context_release(aio_context); } @@ -2650,7 +2646,7 @@ void bdrv_io_plug(BlockDriverState *bs) bdrv_io_plug(child->bs); } - if (bs->io_plugged++ == 0 && bs->io_plug_disabled == 0) { + if (bs->io_plugged++ == 0) { BlockDriver *drv = bs->drv; if (drv && drv->bdrv_io_plug) { drv->bdrv_io_plug(bs); @@ -2663,7 +2659,7 @@ void bdrv_io_unplug(BlockDriverState *bs) BdrvChild *child; assert(bs->io_plugged); - if (--bs->io_plugged == 0 && bs->io_plug_disabled == 0) { + if (--bs->io_plugged == 0) { BlockDriver *drv = bs->drv; if (drv && drv->bdrv_io_unplug) { drv->bdrv_io_unplug(bs); @@ -2674,36 +2670,3 @@ void bdrv_io_unplug(BlockDriverState *bs) bdrv_io_unplug(child->bs); } } - -void bdrv_io_unplugged_begin(BlockDriverState *bs) -{ - BdrvChild *child; - - if (bs->io_plug_disabled++ == 0 && bs->io_plugged > 0) { - BlockDriver *drv = bs->drv; - if (drv && drv->bdrv_io_unplug) { - drv->bdrv_io_unplug(bs); - } - } - - QLIST_FOREACH(child, &bs->children, next) { - bdrv_io_unplugged_begin(child->bs); - } -} - -void bdrv_io_unplugged_end(BlockDriverState *bs) -{ - BdrvChild *child; - - assert(bs->io_plug_disabled); - QLIST_FOREACH(child, &bs->children, next) { - bdrv_io_unplugged_end(child->bs); - } - - if (--bs->io_plug_disabled == 0 && bs->io_plugged > 0) { - BlockDriver *drv = bs->drv; - if (drv && drv->bdrv_io_plug) { - drv->bdrv_io_plug(bs); - } - } -} @@ -60,24 +60,15 @@ /* statistics */ int tlb_flush_count; -/* NOTE: - * If flush_global is true (the usual case), flush all tlb entries. - * If flush_global is false, flush (at least) all tlb entries not - * marked global. - * - * Since QEMU doesn't currently implement a global/not-global flag - * for tlb entries, at the moment tlb_flush() will also flush all - * tlb entries in the flush_global == false case. This is OK because - * CPU architectures generally permit an implementation to drop - * entries from the TLB at any time, so flushing more entries than - * required is only an efficiency issue, not a correctness issue. +/* This is OK because CPU architectures generally permit an + * implementation to drop entries from the TLB at any time, so + * flushing more entries than required is only an efficiency issue, + * not a correctness issue. */ -void tlb_flush(CPUState *cpu, int flush_global) +void tlb_flush(CPUState *cpu) { CPUArchState *env = cpu->env_ptr; - tlb_debug("(%d)\n", flush_global); - memset(env->tlb_table, -1, sizeof(env->tlb_table)); memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); @@ -144,7 +135,7 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", env->tlb_flush_addr, env->tlb_flush_mask); - tlb_flush(cpu, 1); + tlb_flush(cpu); return; } diff --git a/default-configs/m68k-softmmu.mak b/default-configs/m68k-softmmu.mak index d9552df076..60f7cdfbf2 100644 --- a/default-configs/m68k-softmmu.mak +++ b/default-configs/m68k-softmmu.mak @@ -1,6 +1,4 @@ # Default configuration for m68k-softmmu -include pci.mak -include usb.mak CONFIG_COLDFIRE=y CONFIG_PTIMER=y diff --git a/docs/lockcnt.txt b/docs/lockcnt.txt new file mode 100644 index 0000000000..2a79b3205b --- /dev/null +++ b/docs/lockcnt.txt @@ -0,0 +1,277 @@ +DOCUMENTATION FOR LOCKED COUNTERS (aka QemuLockCnt) +=================================================== + +QEMU often uses reference counts to track data structures that are being +accessed and should not be freed. For example, a loop that invoke +callbacks like this is not safe: + + QLIST_FOREACH_SAFE(ioh, &io_handlers, next, pioh) { + if (ioh->revents & G_IO_OUT) { + ioh->fd_write(ioh->opaque); + } + } + +QLIST_FOREACH_SAFE protects against deletion of the current node (ioh) +by stashing away its "next" pointer. However, ioh->fd_write could +actually delete the next node from the list. The simplest way to +avoid this is to mark the node as deleted, and remove it from the +list in the above loop: + + QLIST_FOREACH_SAFE(ioh, &io_handlers, next, pioh) { + if (ioh->deleted) { + QLIST_REMOVE(ioh, next); + g_free(ioh); + } else { + if (ioh->revents & G_IO_OUT) { + ioh->fd_write(ioh->opaque); + } + } + } + +If however this loop must also be reentrant, i.e. it is possible that +ioh->fd_write invokes the loop again, some kind of counting is needed: + + walking_handlers++; + QLIST_FOREACH_SAFE(ioh, &io_handlers, next, pioh) { + if (ioh->deleted) { + if (walking_handlers == 1) { + QLIST_REMOVE(ioh, next); + g_free(ioh); + } + } else { + if (ioh->revents & G_IO_OUT) { + ioh->fd_write(ioh->opaque); + } + } + } + walking_handlers--; + +One may think of using the RCU primitives, rcu_read_lock() and +rcu_read_unlock(); effectively, the RCU nesting count would take +the place of the walking_handlers global variable. Indeed, +reference counting and RCU have similar purposes, but their usage in +general is complementary: + +- reference counting is fine-grained and limited to a single data + structure; RCU delays reclamation of *all* RCU-protected data + structures; + +- reference counting works even in the presence of code that keeps + a reference for a long time; RCU critical sections in principle + should be kept short; + +- reference counting is often applied to code that is not thread-safe + but is reentrant; in fact, usage of reference counting in QEMU predates + the introduction of threads by many years. RCU is generally used to + protect readers from other threads freeing memory after concurrent + modifications to a data structure. + +- reclaiming data can be done by a separate thread in the case of RCU; + this can improve performance, but also delay reclamation undesirably. + With reference counting, reclamation is deterministic. + +This file documents QemuLockCnt, an abstraction for using reference +counting in code that has to be both thread-safe and reentrant. + + +QemuLockCnt concepts +-------------------- + +A QemuLockCnt comprises both a counter and a mutex; it has primitives +to increment and decrement the counter, and to take and release the +mutex. The counter notes how many visits to the data structures are +taking place (the visits could be from different threads, or there could +be multiple reentrant visits from the same thread). The basic rules +governing the counter/mutex pair then are the following: + +- Data protected by the QemuLockCnt must not be freed unless the + counter is zero and the mutex is taken. + +- A new visit cannot be started while the counter is zero and the + mutex is taken. + +Most of the time, the mutex protects all writes to the data structure, +not just frees, though there could be cases where this is not necessary. + +Reads, instead, can be done without taking the mutex, as long as the +readers and writers use the same macros that are used for RCU, for +example atomic_rcu_read, atomic_rcu_set, QLIST_FOREACH_RCU, etc. This is +because the reads are done outside a lock and a set or QLIST_INSERT_HEAD +can happen concurrently with the read. The RCU API ensures that the +processor and the compiler see all required memory barriers. + +This could be implemented simply by protecting the counter with the +mutex, for example: + + // (1) + qemu_mutex_lock(&walking_handlers_mutex); + walking_handlers++; + qemu_mutex_unlock(&walking_handlers_mutex); + + ... + + // (2) + qemu_mutex_lock(&walking_handlers_mutex); + if (--walking_handlers == 0) { + QLIST_FOREACH_SAFE(ioh, &io_handlers, next, pioh) { + if (ioh->deleted) { + QLIST_REMOVE(ioh, next); + g_free(ioh); + } + } + } + qemu_mutex_unlock(&walking_handlers_mutex); + +Here, no frees can happen in the code represented by the ellipsis. +If another thread is executing critical section (2), that part of +the code cannot be entered, because the thread will not be able +to increment the walking_handlers variable. And of course +during the visit any other thread will see a nonzero value for +walking_handlers, as in the single-threaded code. + +Note that it is possible for multiple concurrent accesses to delay +the cleanup arbitrarily; in other words, for the walking_handlers +counter to never become zero. For this reason, this technique is +more easily applicable if concurrent access to the structure is rare. + +However, critical sections are easy to forget since you have to do +them for each modification of the counter. QemuLockCnt ensures that +all modifications of the counter take the lock appropriately, and it +can also be more efficient in two ways: + +- it avoids taking the lock for many operations (for example + incrementing the counter while it is non-zero); + +- on some platforms, one can implement QemuLockCnt to hold the lock + and the mutex in a single word, making the fast path no more expensive + than simply managing a counter using atomic operations (see + docs/atomics.txt). This can be very helpful if concurrent access to + the data structure is expected to be rare. + + +Using the same mutex for frees and writes can still incur some small +inefficiencies; for example, a visit can never start if the counter is +zero and the mutex is taken---even if the mutex is taken by a write, +which in principle need not block a visit of the data structure. +However, these are usually not a problem if any of the following +assumptions are valid: + +- concurrent access is possible but rare + +- writes are rare + +- writes are frequent, but this kind of write (e.g. appending to a + list) has a very small critical section. + +For example, QEMU uses QemuLockCnt to manage an AioContext's list of +bottom halves and file descriptor handlers. Modifications to the list +of file descriptor handlers are rare. Creation of a new bottom half is +frequent and can happen on a fast path; however: 1) it is almost never +concurrent with a visit to the list of bottom halves; 2) it only has +three instructions in the critical path, two assignments and a smp_wmb(). + + +QemuLockCnt API +--------------- + +The QemuLockCnt API is described in include/qemu/thread.h. + + +QemuLockCnt usage +----------------- + +This section explains the typical usage patterns for QemuLockCnt functions. + +Setting a variable to a non-NULL value can be done between +qemu_lockcnt_lock and qemu_lockcnt_unlock: + + qemu_lockcnt_lock(&xyz_lockcnt); + if (!xyz) { + new_xyz = g_new(XYZ, 1); + ... + atomic_rcu_set(&xyz, new_xyz); + } + qemu_lockcnt_unlock(&xyz_lockcnt); + +Accessing the value can be done between qemu_lockcnt_inc and +qemu_lockcnt_dec: + + qemu_lockcnt_inc(&xyz_lockcnt); + if (xyz) { + XYZ *p = atomic_rcu_read(&xyz); + ... + /* Accesses can now be done through "p". */ + } + qemu_lockcnt_dec(&xyz_lockcnt); + +Freeing the object can similarly use qemu_lockcnt_lock and +qemu_lockcnt_unlock, but you also need to ensure that the count +is zero (i.e. there is no concurrent visit). Because qemu_lockcnt_inc +takes the QemuLockCnt's lock, the count cannot become non-zero while +the object is being freed. Freeing an object looks like this: + + qemu_lockcnt_lock(&xyz_lockcnt); + if (!qemu_lockcnt_count(&xyz_lockcnt)) { + g_free(xyz); + xyz = NULL; + } + qemu_lockcnt_unlock(&xyz_lockcnt); + +If an object has to be freed right after a visit, you can combine +the decrement, the locking and the check on count as follows: + + qemu_lockcnt_inc(&xyz_lockcnt); + if (xyz) { + XYZ *p = atomic_rcu_read(&xyz); + ... + /* Accesses can now be done through "p". */ + } + if (qemu_lockcnt_dec_and_lock(&xyz_lockcnt)) { + g_free(xyz); + xyz = NULL; + qemu_lockcnt_unlock(&xyz_lockcnt); + } + +QemuLockCnt can also be used to access a list as follows: + + qemu_lockcnt_inc(&io_handlers_lockcnt); + QLIST_FOREACH_RCU(ioh, &io_handlers, pioh) { + if (ioh->revents & G_IO_OUT) { + ioh->fd_write(ioh->opaque); + } + } + + if (qemu_lockcnt_dec_and_lock(&io_handlers_lockcnt)) { + QLIST_FOREACH_SAFE(ioh, &io_handlers, next, pioh) { + if (ioh->deleted) { + QLIST_REMOVE(ioh, next); + g_free(ioh); + } + } + qemu_lockcnt_unlock(&io_handlers_lockcnt); + } + +Again, the RCU primitives are used because new items can be added to the +list during the walk. QLIST_FOREACH_RCU ensures that the processor and +the compiler see the appropriate memory barriers. + +An alternative pattern uses qemu_lockcnt_dec_if_lock: + + qemu_lockcnt_inc(&io_handlers_lockcnt); + QLIST_FOREACH_SAFE_RCU(ioh, &io_handlers, next, pioh) { + if (ioh->deleted) { + if (qemu_lockcnt_dec_if_lock(&io_handlers_lockcnt)) { + QLIST_REMOVE(ioh, next); + g_free(ioh); + qemu_lockcnt_inc_and_unlock(&io_handlers_lockcnt); + } + } else { + if (ioh->revents & G_IO_OUT) { + ioh->fd_write(ioh->opaque); + } + } + } + qemu_lockcnt_dec(&io_handlers_lockcnt); + +Here you can use qemu_lockcnt_dec instead of qemu_lockcnt_dec_and_lock, +because there is no special task to do if the count goes from 1 to 0. diff --git a/docs/multiple-iothreads.txt b/docs/multiple-iothreads.txt index 0e7cdb2c28..e4d340bbb7 100644 --- a/docs/multiple-iothreads.txt +++ b/docs/multiple-iothreads.txt @@ -84,9 +84,8 @@ How to synchronize with an IOThread AioContext is not thread-safe so some rules must be followed when using file descriptors, event notifiers, timers, or BHs across threads: -1. AioContext functions can be called safely from file descriptor, event -notifier, timer, or BH callbacks invoked by the AioContext. No locking is -necessary. +1. AioContext functions can always be called safely. They handle their +own locking internally. 2. Other threads wishing to access the AioContext must use aio_context_acquire()/aio_context_release() for mutual exclusion. Once the @@ -94,16 +93,14 @@ context is acquired no other thread can access it or run event loop iterations in this AioContext. aio_context_acquire()/aio_context_release() calls may be nested. This -means you can call them if you're not sure whether #1 applies. +means you can call them if you're not sure whether #2 applies. There is currently no lock ordering rule if a thread needs to acquire multiple AioContexts simultaneously. Therefore, it is only safe for code holding the QEMU global mutex to acquire other AioContexts. -Side note: the best way to schedule a function call across threads is to create -a BH in the target AioContext beforehand and then call qemu_bh_schedule(). No -acquire/release or locking is needed for the qemu_bh_schedule() call. But be -sure to acquire the AioContext for aio_bh_new() if necessary. +Side note: the best way to schedule a function call across threads is to call +aio_bh_schedule_oneshot(). No acquire/release or locking is needed. AioContext and the block layer ------------------------------ @@ -544,7 +544,7 @@ static int cpu_common_post_load(void *opaque, int version_id) /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the version_id is increased. */ cpu->interrupt_request &= ~0x01; - tlb_flush(cpu, 1); + tlb_flush(cpu); return 0; } @@ -2426,7 +2426,7 @@ static void tcg_commit(MemoryListener *listener) */ d = atomic_rcu_read(&cpuas->as->dispatch); atomic_rcu_set(&cpuas->memory_dispatch, d); - tlb_flush(cpuas->cpu, 1); + tlb_flush(cpuas->cpu); } void address_space_init_dispatch(AddressSpace *as) diff --git a/hw/sh4/sh7750.c b/hw/sh4/sh7750.c index 3132d559d7..166e4bd947 100644 --- a/hw/sh4/sh7750.c +++ b/hw/sh4/sh7750.c @@ -417,7 +417,7 @@ static void sh7750_mem_writel(void *opaque, hwaddr addr, case SH7750_PTEH_A7: /* If asid changes, clear all registered tlb entries. */ if ((s->cpu->env.pteh & 0xff) != (mem_value & 0xff)) { - tlb_flush(CPU(s->cpu), 1); + tlb_flush(CPU(s->cpu)); } s->cpu->env.pteh = mem_value; return; diff --git a/include/block/aio.h b/include/block/aio.h index 4dca54d9c7..7df271d2b9 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -53,18 +53,12 @@ struct LinuxAioState; struct AioContext { GSource source; - /* Protects all fields from multi-threaded access */ + /* Used by AioContext users to protect from multi-threaded access. */ QemuRecMutex lock; - /* The list of registered AIO handlers */ + /* The list of registered AIO handlers. Protected by ctx->list_lock. */ QLIST_HEAD(, AioHandler) aio_handlers; - /* This is a simple lock used to protect the aio_handlers list. - * Specifically, it's used to ensure that no callbacks are removed while - * we're walking and dispatching callbacks. - */ - int walking_handlers; - /* Used to avoid unnecessary event_notifier_set calls in aio_notify; * accessed with atomic primitives. If this field is 0, everything * (file descriptors, bottom halves, timers) will be re-evaluated @@ -90,17 +84,15 @@ struct AioContext { */ uint32_t notify_me; - /* lock to protect between bh's adders and deleter */ - QemuMutex bh_lock; + /* A lock to protect between QEMUBH and AioHandler adders and deleter, + * and to ensure that no callbacks are removed while we're walking and + * dispatching them. + */ + QemuLockCnt list_lock; /* Anchor of the list of Bottom Halves belonging to the context */ struct QEMUBH *first_bh; - /* A simple lock used to protect the first_bh list, and ensure that - * no callbacks are removed while we're walking and dispatching callbacks. - */ - int walking_bh; - /* Used by aio_notify. * * "notified" is used to avoid expensive event_notifier_test_and_clear @@ -116,7 +108,9 @@ struct AioContext { bool notified; EventNotifier notifier; - /* Thread pool for performing work and receiving completion callbacks */ + /* Thread pool for performing work and receiving completion callbacks. + * Has its own locking. + */ struct ThreadPool *thread_pool; #ifdef CONFIG_LINUX_AIO @@ -126,7 +120,9 @@ struct AioContext { struct LinuxAioState *linux_aio; #endif - /* TimerLists for calling timers - one per clock type */ + /* TimerLists for calling timers - one per clock type. Has its own + * locking. + */ QEMUTimerListGroup tlg; int external_disable_cnt; @@ -180,9 +176,11 @@ void aio_context_unref(AioContext *ctx); * automatically takes care of calling aio_context_acquire and * aio_context_release. * - * Access to timers and BHs from a thread that has not acquired AioContext - * is possible. Access to callbacks for now must be done while the AioContext - * is owned by the thread (FIXME). + * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A + * thread still has to call those to avoid being interrupted by the guest. + * + * Bottom halves, timers and callbacks can be created or removed without + * acquiring the AioContext. */ void aio_context_acquire(AioContext *ctx); diff --git a/include/block/block.h b/include/block/block.h index 49bb0b239a..8b0dcdaa70 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -526,8 +526,6 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); void bdrv_io_plug(BlockDriverState *bs); void bdrv_io_unplug(BlockDriverState *bs); -void bdrv_io_unplugged_begin(BlockDriverState *bs); -void bdrv_io_unplugged_end(BlockDriverState *bs); /** * bdrv_drained_begin: diff --git a/include/block/block_int.h b/include/block/block_int.h index 4e4562d444..2d92d7edfe 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -526,9 +526,8 @@ struct BlockDriverState { uint64_t write_threshold_offset; NotifierWithReturn write_threshold_notifier; - /* counters for nested bdrv_io_plug and bdrv_io_unplugged_begin */ + /* counter for nested bdrv_io_plug */ unsigned io_plugged; - unsigned io_plug_disabled; int quiesce_counter; }; diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index a8c13cee66..bbc9478a50 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -95,15 +95,13 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr); /** * tlb_flush: * @cpu: CPU whose TLB should be flushed - * @flush_global: ignored * - * Flush the entire TLB for the specified CPU. - * The flush_global flag is in theory an indicator of whether the whole - * TLB should be flushed, or only those entries not marked global. - * In practice QEMU does not implement any global/not global flag for - * TLB entries, and the argument is ignored. + * Flush the entire TLB for the specified CPU. Most CPU architectures + * allow the implementation to drop entries from the TLB at any time + * so this is generally safe. If more selective flushing is required + * use one of the other functions for efficiency. */ -void tlb_flush(CPUState *cpu, int flush_global); +void tlb_flush(CPUState *cpu); /** * tlb_flush_page_by_mmuidx: * @cpu: CPU whose TLB should be flushed @@ -165,7 +163,7 @@ static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) { } -static inline void tlb_flush(CPUState *cpu, int flush_global) +static inline void tlb_flush(CPUState *cpu) { } diff --git a/include/qemu/futex.h b/include/qemu/futex.h new file mode 100644 index 0000000000..bb7dc9e296 --- /dev/null +++ b/include/qemu/futex.h @@ -0,0 +1,36 @@ +/* + * Wrappers around Linux futex syscall + * + * Copyright Red Hat, Inc. 2017 + * + * Author: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <sys/syscall.h> +#include <linux/futex.h> + +#define qemu_futex(...) syscall(__NR_futex, __VA_ARGS__) + +static inline void qemu_futex_wake(void *f, int n) +{ + qemu_futex(f, FUTEX_WAKE, n, NULL, NULL, 0); +} + +static inline void qemu_futex_wait(void *f, unsigned val) +{ + while (qemu_futex(f, FUTEX_WAIT, (int) val, NULL, NULL, 0)) { + switch (errno) { + case EWOULDBLOCK: + return; + case EINTR: + break; /* get out of switch and retry */ + default: + abort(); + } + } +} diff --git a/include/qemu/thread.h b/include/qemu/thread.h index e8e665f020..9910f49b3a 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -8,6 +8,7 @@ typedef struct QemuMutex QemuMutex; typedef struct QemuCond QemuCond; typedef struct QemuSemaphore QemuSemaphore; typedef struct QemuEvent QemuEvent; +typedef struct QemuLockCnt QemuLockCnt; typedef struct QemuThread QemuThread; #ifdef _WIN32 @@ -98,4 +99,115 @@ static inline void qemu_spin_unlock(QemuSpin *spin) __sync_lock_release(&spin->value); } +struct QemuLockCnt { +#ifndef CONFIG_LINUX + QemuMutex mutex; +#endif + unsigned count; +}; + +/** + * qemu_lockcnt_init: initialize a QemuLockcnt + * @lockcnt: the lockcnt to initialize + * + * Initialize lockcnt's counter to zero and prepare its mutex + * for usage. + */ +void qemu_lockcnt_init(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_destroy: destroy a QemuLockcnt + * @lockcnt: the lockcnt to destruct + * + * Destroy lockcnt's mutex. + */ +void qemu_lockcnt_destroy(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_inc: increment a QemuLockCnt's counter + * @lockcnt: the lockcnt to operate on + * + * If the lockcnt's count is zero, wait for critical sections + * to finish and increment lockcnt's count to 1. If the count + * is not zero, just increment it. + * + * Because this function can wait on the mutex, it must not be + * called while the lockcnt's mutex is held by the current thread. + * For the same reason, qemu_lockcnt_inc can also contribute to + * AB-BA deadlocks. This is a sample deadlock scenario: + * + * thread 1 thread 2 + * ------------------------------------------------------- + * qemu_lockcnt_lock(&lc1); + * qemu_lockcnt_lock(&lc2); + * qemu_lockcnt_inc(&lc2); + * qemu_lockcnt_inc(&lc1); + */ +void qemu_lockcnt_inc(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec: decrement a QemuLockCnt's counter + * @lockcnt: the lockcnt to operate on + */ +void qemu_lockcnt_dec(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec_and_lock: decrement a QemuLockCnt's counter and + * possibly lock it. + * @lockcnt: the lockcnt to operate on + * + * Decrement lockcnt's count. If the new count is zero, lock + * the mutex and return true. Otherwise, return false. + */ +bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec_if_lock: possibly decrement a QemuLockCnt's counter and + * lock it. + * @lockcnt: the lockcnt to operate on + * + * If the count is 1, decrement the count to zero, lock + * the mutex and return true. Otherwise, return false. + */ +bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_lock: lock a QemuLockCnt's mutex. + * @lockcnt: the lockcnt to operate on + * + * Remember that concurrent visits are not blocked unless the count is + * also zero. You can use qemu_lockcnt_count to check for this inside a + * critical section. + */ +void qemu_lockcnt_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_unlock: release a QemuLockCnt's mutex. + * @lockcnt: the lockcnt to operate on. + */ +void qemu_lockcnt_unlock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_inc_and_unlock: combined unlock/increment on a QemuLockCnt. + * @lockcnt: the lockcnt to operate on. + * + * This is the same as + * + * qemu_lockcnt_unlock(lockcnt); + * qemu_lockcnt_inc(lockcnt); + * + * but more efficient. + */ +void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_count: query a LockCnt's count. + * @lockcnt: the lockcnt to query. + * + * Note that the count can change at any time. Still, while the + * lockcnt is locked, one can usefully check whether the count + * is non-zero. + */ +unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt); + #endif @@ -270,8 +270,14 @@ static void cpu_common_reset(CPUState *cpu) cpu->exception_index = -1; cpu->crash_occurred = false; - for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) { - atomic_set(&cpu->tb_jmp_cache[i], NULL); + if (tcg_enabled()) { + for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) { + atomic_set(&cpu->tb_jmp_cache[i], NULL); + } + +#ifdef CONFIG_SOFTMMU + tlb_flush(cpu, 0); +#endif } } diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index 30d77ce71c..b4f97983e5 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -273,7 +273,7 @@ static void alpha_cpu_initfn(Object *obj) CPUAlphaState *env = &cpu->env; cs->env_ptr = env; - tlb_flush(cs, 1); + tlb_flush(cs); alpha_translate_init(); diff --git a/target/alpha/sys_helper.c b/target/alpha/sys_helper.c index bec1e178be..652195de6f 100644 --- a/target/alpha/sys_helper.c +++ b/target/alpha/sys_helper.c @@ -44,7 +44,7 @@ uint64_t helper_load_pcc(CPUAlphaState *env) #ifndef CONFIG_USER_ONLY void helper_tbia(CPUAlphaState *env) { - tlb_flush(CPU(alpha_env_get_cpu(env)), 1); + tlb_flush(CPU(alpha_env_get_cpu(env))); } void helper_tbis(CPUAlphaState *env, uint64_t p) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index f5cb30af6c..91046111d9 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -122,7 +122,8 @@ static void arm_cpu_reset(CPUState *s) acc->parent_reset(s); - memset(env, 0, offsetof(CPUARMState, features)); + memset(env, 0, offsetof(CPUARMState, end_reset_fields)); + g_hash_table_foreach(cpu->cp_regs, cp_reg_reset, cpu); g_hash_table_foreach(cpu->cp_regs, cp_reg_check_reset, cpu); @@ -226,8 +227,6 @@ static void arm_cpu_reset(CPUState *s) &env->vfp.fp_status); set_float_detect_tininess(float_tininess_before_rounding, &env->vfp.standard_fp_status); - tlb_flush(s, 1); - #ifndef CONFIG_USER_ONLY if (kvm_enabled()) { kvm_arm_reset_vcpu(cpu); diff --git a/target/arm/cpu.h b/target/arm/cpu.h index ab119e62ab..7bd16eec18 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -491,9 +491,12 @@ typedef struct CPUARMState { struct CPUBreakpoint *cpu_breakpoint[16]; struct CPUWatchpoint *cpu_watchpoint[16]; + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON - /* These fields after the common ones so they are preserved on reset. */ + /* Fields after CPU_COMMON are preserved across CPU reset. */ /* Internal CPU feature flags. */ uint64_t features; diff --git a/target/arm/helper.c b/target/arm/helper.c index 6c5c7ec811..b3875c7c6e 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -464,7 +464,7 @@ static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) ARMCPU *cpu = arm_env_get_cpu(env); raw_write(env, ri, value); - tlb_flush(CPU(cpu), 1); /* Flush TLB as domain not tracked in TLB */ + tlb_flush(CPU(cpu)); /* Flush TLB as domain not tracked in TLB */ } static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) @@ -475,7 +475,7 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* Unlike real hardware the qemu TLB uses virtual addresses, * not modified virtual addresses, so this causes a TLB flush. */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); raw_write(env, ri, value); } } @@ -491,7 +491,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, * format) this register includes the ASID, so do a TLB flush. * For PMSA it is purely a process ID and no action is needed. */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } raw_write(env, ri, value); } @@ -502,7 +502,7 @@ static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate all (TLBIALL) */ ARMCPU *cpu = arm_env_get_cpu(env); - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -520,7 +520,7 @@ static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate by ASID (TLBIASID) */ ARMCPU *cpu = arm_env_get_cpu(env); - tlb_flush(CPU(cpu), value == 0); + tlb_flush(CPU(cpu)); } static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -539,7 +539,7 @@ static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *other_cs; CPU_FOREACH(other_cs) { - tlb_flush(other_cs, 1); + tlb_flush(other_cs); } } @@ -549,7 +549,7 @@ static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *other_cs; CPU_FOREACH(other_cs) { - tlb_flush(other_cs, value == 0); + tlb_flush(other_cs); } } @@ -2304,7 +2304,7 @@ static void pmsav7_write(CPUARMState *env, const ARMCPRegInfo *ri, } u32p += env->cp15.c6_rgnr; - tlb_flush(CPU(cpu), 1); /* Mappings may have changed - purge! */ + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ *u32p = value; } @@ -2449,7 +2449,7 @@ static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, /* With LPAE the TTBCR could result in a change of ASID * via the TTBCR.A1 bit, so do a TLB flush. */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } vmsa_ttbcr_raw_write(env, ri, value); } @@ -2473,7 +2473,7 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, TCR *tcr = raw_ptr(env, ri); /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); tcr->raw_tcr = value; } @@ -2486,7 +2486,7 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, if (cpreg_field_is_64bit(ri)) { ARMCPU *cpu = arm_env_get_cpu(env); - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } raw_write(env, ri, value); } @@ -3154,7 +3154,7 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); /* ??? Lots of these bits are not implemented. */ /* This may enable/disable the MMU, so do a TLB flush. */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } static CPAccessResult fpexc32_access(CPUARMState *env, const ARMCPRegInfo *ri, @@ -3622,7 +3622,7 @@ static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) * HCR_DC Disables stage1 and enables stage2 translation */ if ((raw_read(env, ri) ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) { - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } raw_write(env, ri, value); } diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 4f09dfb95a..d0352e2045 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -3217,7 +3217,7 @@ static void disas_bitfield(DisasContext *s, uint32_t insn) tcg_tmp = read_cpu_reg(s, rn, 1); /* Recognize simple(r) extractions. */ - if (si <= ri) { + if (si >= ri) { /* Wd<s-r:0> = Wn<s:r> */ len = (si - ri) + 1; if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */ diff --git a/target/cris/cpu.c b/target/cris/cpu.c index 2e9ab9700e..5f766f09d6 100644 --- a/target/cris/cpu.c +++ b/target/cris/cpu.c @@ -52,9 +52,8 @@ static void cris_cpu_reset(CPUState *s) ccc->parent_reset(s); vr = env->pregs[PR_VR]; - memset(env, 0, offsetof(CPUCRISState, load_info)); + memset(env, 0, offsetof(CPUCRISState, end_reset_fields)); env->pregs[PR_VR] = vr; - tlb_flush(s, 1); #if defined(CONFIG_USER_ONLY) /* start in user mode with interrupts enabled. */ diff --git a/target/cris/cpu.h b/target/cris/cpu.h index 43d5f9d1da..920e1c33ba 100644 --- a/target/cris/cpu.h +++ b/target/cris/cpu.h @@ -167,10 +167,13 @@ typedef struct CPUCRISState { */ TLBSet tlbsets[2][4][16]; - CPU_COMMON + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; - /* Members from load_info on are preserved across resets. */ - void *load_info; + CPU_COMMON + + /* Members from load_info on are preserved across resets. */ + void *load_info; } CPUCRISState; /** diff --git a/target/i386/cpu.c b/target/i386/cpu.c index a149c8dc42..aba11ae171 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2820,8 +2820,6 @@ static void x86_cpu_reset(CPUState *s) memset(env, 0, offsetof(CPUX86State, end_reset_fields)); - tlb_flush(s, 1); - env->old_exception = -1; /* init to reset state */ diff --git a/target/i386/cpu.h b/target/i386/cpu.h index a04e46b166..6c1902b36e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1123,10 +1123,12 @@ typedef struct CPUX86State { uint8_t nmi_injected; uint8_t nmi_pending; + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON - /* Fields from here on are preserved across CPU reset. */ - struct {} end_reset_fields; + /* Fields after CPU_COMMON are preserved across CPU reset. */ /* processor features (e.g. for CPUID insn) */ /* Minimum level/xlevel/xlevel2, based on CPU model + features */ diff --git a/target/i386/fpu_helper.c b/target/i386/fpu_helper.c index 2049a8c01d..66474ad98e 100644 --- a/target/i386/fpu_helper.c +++ b/target/i386/fpu_helper.c @@ -1465,7 +1465,7 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) } if (env->pkru != old_pkru) { CPUState *cs = CPU(x86_env_get_cpu(env)); - tlb_flush(cs, 1); + tlb_flush(cs); } } } diff --git a/target/i386/helper.c b/target/i386/helper.c index 43e87ddba0..c86272efab 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -586,7 +586,7 @@ void x86_cpu_set_a20(X86CPU *cpu, int a20_state) /* when a20 is changed, all the MMU mappings are invalid, so we must flush everything */ - tlb_flush(cs, 1); + tlb_flush(cs); env->a20_mask = ~(1 << 20) | (a20_state << 20); } } @@ -599,7 +599,7 @@ void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0) qemu_log_mask(CPU_LOG_MMU, "CR0 update: CR0=0x%08x\n", new_cr0); if ((new_cr0 & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK)) != (env->cr[0] & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK))) { - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } #ifdef TARGET_X86_64 @@ -641,7 +641,7 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3) if (env->cr[0] & CR0_PG_MASK) { qemu_log_mask(CPU_LOG_MMU, "CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3); - tlb_flush(CPU(cpu), 0); + tlb_flush(CPU(cpu)); } } @@ -656,7 +656,7 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) if ((new_cr4 ^ env->cr[4]) & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_LA57_MASK)) { - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } /* Clear bits we're going to recompute. */ diff --git a/target/i386/machine.c b/target/i386/machine.c index 760f82b6c7..e002b4fc6d 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -387,7 +387,7 @@ static int cpu_post_load(void *opaque, int version_id) env->dr[7] = dr7 & ~(DR7_GLOBAL_BP_MASK | DR7_LOCAL_BP_MASK); cpu_x86_update_dr7(env, dr7); } - tlb_flush(cs, 1); + tlb_flush(cs); if (tcg_enabled()) { cpu_smm_update(cpu); diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c index 3f666b4b87..5029efef47 100644 --- a/target/i386/misc_helper.c +++ b/target/i386/misc_helper.c @@ -635,5 +635,5 @@ void helper_wrpkru(CPUX86State *env, uint32_t ecx, uint64_t val) } env->pkru = val; - tlb_flush(cs, 1); + tlb_flush(cs); } diff --git a/target/i386/svm_helper.c b/target/i386/svm_helper.c index 782b3f12f0..210f6aa7b5 100644 --- a/target/i386/svm_helper.c +++ b/target/i386/svm_helper.c @@ -289,7 +289,7 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) break; case TLB_CONTROL_FLUSH_ALL_ASID: /* FIXME: this is not 100% correct but should work for now */ - tlb_flush(cs, 1); + tlb_flush(cs); break; } diff --git a/target/lm32/cpu.c b/target/lm32/cpu.c index 8d939a7779..2b8c36b6d0 100644 --- a/target/lm32/cpu.c +++ b/target/lm32/cpu.c @@ -128,10 +128,9 @@ static void lm32_cpu_reset(CPUState *s) lcc->parent_reset(s); /* reset cpu state */ - memset(env, 0, offsetof(CPULM32State, eba)); + memset(env, 0, offsetof(CPULM32State, end_reset_fields)); lm32_cpu_init_cfg_reg(cpu); - tlb_flush(s, 1); } static void lm32_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) diff --git a/target/lm32/cpu.h b/target/lm32/cpu.h index d8a3515244..1d972cb26b 100644 --- a/target/lm32/cpu.h +++ b/target/lm32/cpu.h @@ -165,6 +165,9 @@ struct CPULM32State { struct CPUBreakpoint *cpu_breakpoint[4]; struct CPUWatchpoint *cpu_watchpoint[4]; + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON /* Fields from here on are preserved across CPU reset. */ diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c index ba17480098..fa10b6e4cd 100644 --- a/target/m68k/cpu.c +++ b/target/m68k/cpu.c @@ -52,7 +52,7 @@ static void m68k_cpu_reset(CPUState *s) mcc->parent_reset(s); - memset(env, 0, offsetof(CPUM68KState, features)); + memset(env, 0, offsetof(CPUM68KState, end_reset_fields)); #if !defined(CONFIG_USER_ONLY) env->sr = 0x2700; #endif @@ -61,7 +61,6 @@ static void m68k_cpu_reset(CPUState *s) cpu_m68k_set_ccr(env, 0); /* TODO: We should set PC from the interrupt vector. */ env->pc = 0; - tlb_flush(s, 1); } static void m68k_cpu_disas_set_info(CPUState *s, disassemble_info *info) diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h index 0b4ed7b8a6..809582212d 100644 --- a/target/m68k/cpu.h +++ b/target/m68k/cpu.h @@ -37,6 +37,7 @@ #define OS_DOUBLE 4 #define OS_EXTENDED 5 #define OS_PACKED 6 +#define OS_UNSIZED 7 #define MAX_QREGS 32 @@ -111,6 +112,9 @@ typedef struct CPUM68KState { uint32_t qregs[MAX_QREGS]; + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON /* Fields from here on are preserved across CPU reset. */ diff --git a/target/m68k/helper.h b/target/m68k/helper.h index 17ec342346..d7a4bf1db5 100644 --- a/target/m68k/helper.h +++ b/target/m68k/helper.h @@ -50,3 +50,13 @@ DEF_HELPER_2(flush_flags, void, env, i32) DEF_HELPER_2(set_ccr, void, env, i32) DEF_HELPER_FLAGS_1(get_ccr, TCG_CALL_NO_WG_SE, i32, env) DEF_HELPER_2(raise_exception, void, env, i32) + +DEF_HELPER_FLAGS_3(bfffo_reg, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) + +DEF_HELPER_FLAGS_4(bfexts_mem, TCG_CALL_NO_WG, i32, env, i32, s32, i32) +DEF_HELPER_FLAGS_4(bfextu_mem, TCG_CALL_NO_WG, i64, env, i32, s32, i32) +DEF_HELPER_FLAGS_5(bfins_mem, TCG_CALL_NO_WG, i32, env, i32, i32, s32, i32) +DEF_HELPER_FLAGS_4(bfchg_mem, TCG_CALL_NO_WG, i32, env, i32, s32, i32) +DEF_HELPER_FLAGS_4(bfclr_mem, TCG_CALL_NO_WG, i32, env, i32, s32, i32) +DEF_HELPER_FLAGS_4(bfset_mem, TCG_CALL_NO_WG, i32, env, i32, s32, i32) +DEF_HELPER_FLAGS_4(bfffo_mem, TCG_CALL_NO_WG, i64, env, i32, s32, i32) diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c index e56b815d73..7b5126c88d 100644 --- a/target/m68k/op_helper.c +++ b/target/m68k/op_helper.c @@ -469,3 +469,209 @@ void HELPER(cas2l)(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2) env->dregs[Dc1] = l1; env->dregs[Dc2] = l2; } + +struct bf_data { + uint32_t addr; + uint32_t bofs; + uint32_t blen; + uint32_t len; +}; + +static struct bf_data bf_prep(uint32_t addr, int32_t ofs, uint32_t len) +{ + int bofs, blen; + + /* Bound length; map 0 to 32. */ + len = ((len - 1) & 31) + 1; + + /* Note that ofs is signed. */ + addr += ofs / 8; + bofs = ofs % 8; + if (bofs < 0) { + bofs += 8; + addr -= 1; + } + + /* Compute the number of bytes required (minus one) to + satisfy the bitfield. */ + blen = (bofs + len - 1) / 8; + + /* Canonicalize the bit offset for data loaded into a 64-bit big-endian + word. For the cases where BLEN is not a power of 2, adjust ADDR so + that we can use the next power of two sized load without crossing a + page boundary, unless the field itself crosses the boundary. */ + switch (blen) { + case 0: + bofs += 56; + break; + case 1: + bofs += 48; + break; + case 2: + if (addr & 1) { + bofs += 8; + addr -= 1; + } + /* fallthru */ + case 3: + bofs += 32; + break; + case 4: + if (addr & 3) { + bofs += 8 * (addr & 3); + addr &= -4; + } + break; + default: + g_assert_not_reached(); + } + + return (struct bf_data){ + .addr = addr, + .bofs = bofs, + .blen = blen, + .len = len, + }; +} + +static uint64_t bf_load(CPUM68KState *env, uint32_t addr, int blen, + uintptr_t ra) +{ + switch (blen) { + case 0: + return cpu_ldub_data_ra(env, addr, ra); + case 1: + return cpu_lduw_data_ra(env, addr, ra); + case 2: + case 3: + return cpu_ldl_data_ra(env, addr, ra); + case 4: + return cpu_ldq_data_ra(env, addr, ra); + default: + g_assert_not_reached(); + } +} + +static void bf_store(CPUM68KState *env, uint32_t addr, int blen, + uint64_t data, uintptr_t ra) +{ + switch (blen) { + case 0: + cpu_stb_data_ra(env, addr, data, ra); + break; + case 1: + cpu_stw_data_ra(env, addr, data, ra); + break; + case 2: + case 3: + cpu_stl_data_ra(env, addr, data, ra); + break; + case 4: + cpu_stq_data_ra(env, addr, data, ra); + break; + default: + g_assert_not_reached(); + } +} + +uint32_t HELPER(bfexts_mem)(CPUM68KState *env, uint32_t addr, + int32_t ofs, uint32_t len) +{ + uintptr_t ra = GETPC(); + struct bf_data d = bf_prep(addr, ofs, len); + uint64_t data = bf_load(env, d.addr, d.blen, ra); + + return (int64_t)(data << d.bofs) >> (64 - d.len); +} + +uint64_t HELPER(bfextu_mem)(CPUM68KState *env, uint32_t addr, + int32_t ofs, uint32_t len) +{ + uintptr_t ra = GETPC(); + struct bf_data d = bf_prep(addr, ofs, len); + uint64_t data = bf_load(env, d.addr, d.blen, ra); + + /* Put CC_N at the top of the high word; put the zero-extended value + at the bottom of the low word. */ + data <<= d.bofs; + data >>= 64 - d.len; + data |= data << (64 - d.len); + + return data; +} + +uint32_t HELPER(bfins_mem)(CPUM68KState *env, uint32_t addr, uint32_t val, + int32_t ofs, uint32_t len) +{ + uintptr_t ra = GETPC(); + struct bf_data d = bf_prep(addr, ofs, len); + uint64_t data = bf_load(env, d.addr, d.blen, ra); + uint64_t mask = -1ull << (64 - d.len) >> d.bofs; + + data = (data & ~mask) | (((uint64_t)val << (64 - d.len)) >> d.bofs); + + bf_store(env, d.addr, d.blen, data, ra); + + /* The field at the top of the word is also CC_N for CC_OP_LOGIC. */ + return val << (32 - d.len); +} + +uint32_t HELPER(bfchg_mem)(CPUM68KState *env, uint32_t addr, + int32_t ofs, uint32_t len) +{ + uintptr_t ra = GETPC(); + struct bf_data d = bf_prep(addr, ofs, len); + uint64_t data = bf_load(env, d.addr, d.blen, ra); + uint64_t mask = -1ull << (64 - d.len) >> d.bofs; + + bf_store(env, d.addr, d.blen, data ^ mask, ra); + + return ((data & mask) << d.bofs) >> 32; +} + +uint32_t HELPER(bfclr_mem)(CPUM68KState *env, uint32_t addr, + int32_t ofs, uint32_t len) +{ + uintptr_t ra = GETPC(); + struct bf_data d = bf_prep(addr, ofs, len); + uint64_t data = bf_load(env, d.addr, d.blen, ra); + uint64_t mask = -1ull << (64 - d.len) >> d.bofs; + + bf_store(env, d.addr, d.blen, data & ~mask, ra); + + return ((data & mask) << d.bofs) >> 32; +} + +uint32_t HELPER(bfset_mem)(CPUM68KState *env, uint32_t addr, + int32_t ofs, uint32_t len) +{ + uintptr_t ra = GETPC(); + struct bf_data d = bf_prep(addr, ofs, len); + uint64_t data = bf_load(env, d.addr, d.blen, ra); + uint64_t mask = -1ull << (64 - d.len) >> d.bofs; + + bf_store(env, d.addr, d.blen, data | mask, ra); + + return ((data & mask) << d.bofs) >> 32; +} + +uint32_t HELPER(bfffo_reg)(uint32_t n, uint32_t ofs, uint32_t len) +{ + return (n ? clz32(n) : len) + ofs; +} + +uint64_t HELPER(bfffo_mem)(CPUM68KState *env, uint32_t addr, + int32_t ofs, uint32_t len) +{ + uintptr_t ra = GETPC(); + struct bf_data d = bf_prep(addr, ofs, len); + uint64_t data = bf_load(env, d.addr, d.blen, ra); + uint64_t mask = -1ull << (64 - d.len) >> d.bofs; + uint64_t n = (data & mask) << d.bofs; + uint32_t ffo = helper_bfffo_reg(n >> 32, ofs, d.len); + + /* Return FFO in the low word and N in the high word. + Note that because of MASK and the shift, the low word + is already zero. */ + return n | ffo; +} diff --git a/target/m68k/translate.c b/target/m68k/translate.c index 53293173c5..9f60fbc0db 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -595,18 +595,19 @@ static void gen_flush_flags(DisasContext *s) case CC_OP_DYNAMIC: gen_helper_flush_flags(cpu_env, QREG_CC_OP); + s->cc_op_synced = 1; break; default: t0 = tcg_const_i32(s->cc_op); gen_helper_flush_flags(cpu_env, t0); tcg_temp_free(t0); + s->cc_op_synced = 1; break; } /* Note that flush_flags also assigned to env->cc_op. */ s->cc_op = CC_OP_FLAGS; - s->cc_op_synced = 1; } static inline TCGv gen_extend(TCGv val, int opsize, int sign) @@ -711,13 +712,25 @@ static TCGv gen_lea_mode(CPUM68KState *env, DisasContext *s, case 0: /* Data register direct. */ case 1: /* Address register direct. */ return NULL_QREG; - case 2: /* Indirect register */ case 3: /* Indirect postincrement. */ + if (opsize == OS_UNSIZED) { + return NULL_QREG; + } + /* fallthru */ + case 2: /* Indirect register */ return get_areg(s, reg0); case 4: /* Indirect predecrememnt. */ + if (opsize == OS_UNSIZED) { + return NULL_QREG; + } reg = get_areg(s, reg0); tmp = tcg_temp_new(); - tcg_gen_subi_i32(tmp, reg, opsize_bytes(opsize)); + if (reg0 == 7 && opsize == OS_BYTE && + m68k_feature(s->env, M68K_FEATURE_M68000)) { + tcg_gen_subi_i32(tmp, reg, 2); + } else { + tcg_gen_subi_i32(tmp, reg, opsize_bytes(opsize)); + } return tmp; case 5: /* Indirect displacement. */ reg = get_areg(s, reg0); @@ -793,7 +806,12 @@ static TCGv gen_ea_mode(CPUM68KState *env, DisasContext *s, int mode, int reg0, result = gen_ldst(s, opsize, reg, val, what); if (what == EA_STORE || !addrp) { TCGv tmp = tcg_temp_new(); - tcg_gen_addi_i32(tmp, reg, opsize_bytes(opsize)); + if (reg0 == 7 && opsize == OS_BYTE && + m68k_feature(s->env, M68K_FEATURE_M68000)) { + tcg_gen_addi_i32(tmp, reg, 2); + } else { + tcg_gen_addi_i32(tmp, reg, opsize_bytes(opsize)); + } delay_set_areg(s, reg0, tmp, true); } return result; @@ -1794,9 +1812,16 @@ DISAS_INSN(bitop_im) op = (insn >> 6) & 3; bitnum = read_im16(env, s); - if (bitnum & 0xff00) { - disas_undef(env, s, insn); - return; + if (m68k_feature(s->env, M68K_FEATURE_M68000)) { + if (bitnum & 0xfe00) { + disas_undef(env, s, insn); + return; + } + } else { + if (bitnum & 0xff00) { + disas_undef(env, s, insn); + return; + } } SRC_EA(env, src1, opsize, 0, op ? &addr: NULL); @@ -1919,7 +1944,6 @@ DISAS_INSN(cas) default: g_assert_not_reached(); } - opc |= MO_ALIGN; ext = read_im16(env, s); @@ -1948,6 +1972,15 @@ DISAS_INSN(cas) gen_partset_reg(opsize, DREG(ext, 0), load); tcg_temp_free(load); + + switch (extract32(insn, 3, 3)) { + case 3: /* Indirect postincrement. */ + tcg_gen_addi_i32(AREG(insn, 0), addr, opsize_bytes(opsize)); + break; + case 4: /* Indirect predecrememnt. */ + tcg_gen_mov_i32(AREG(insn, 0), addr); + break; + } } DISAS_INSN(cas2w) @@ -3504,6 +3537,370 @@ DISAS_INSN(rotate_mem) set_cc_op(s, CC_OP_FLAGS); } +DISAS_INSN(bfext_reg) +{ + int ext = read_im16(env, s); + int is_sign = insn & 0x200; + TCGv src = DREG(insn, 0); + TCGv dst = DREG(ext, 12); + int len = ((extract32(ext, 0, 5) - 1) & 31) + 1; + int ofs = extract32(ext, 6, 5); /* big bit-endian */ + int pos = 32 - ofs - len; /* little bit-endian */ + TCGv tmp = tcg_temp_new(); + TCGv shift; + + /* In general, we're going to rotate the field so that it's at the + top of the word and then right-shift by the compliment of the + width to extend the field. */ + if (ext & 0x20) { + /* Variable width. */ + if (ext & 0x800) { + /* Variable offset. */ + tcg_gen_andi_i32(tmp, DREG(ext, 6), 31); + tcg_gen_rotl_i32(tmp, src, tmp); + } else { + tcg_gen_rotli_i32(tmp, src, ofs); + } + + shift = tcg_temp_new(); + tcg_gen_neg_i32(shift, DREG(ext, 0)); + tcg_gen_andi_i32(shift, shift, 31); + tcg_gen_sar_i32(QREG_CC_N, tmp, shift); + if (is_sign) { + tcg_gen_mov_i32(dst, QREG_CC_N); + } else { + tcg_gen_shr_i32(dst, tmp, shift); + } + tcg_temp_free(shift); + } else { + /* Immediate width. */ + if (ext & 0x800) { + /* Variable offset */ + tcg_gen_andi_i32(tmp, DREG(ext, 6), 31); + tcg_gen_rotl_i32(tmp, src, tmp); + src = tmp; + pos = 32 - len; + } else { + /* Immediate offset. If the field doesn't wrap around the + end of the word, rely on (s)extract completely. */ + if (pos < 0) { + tcg_gen_rotli_i32(tmp, src, ofs); + src = tmp; + pos = 32 - len; + } + } + + tcg_gen_sextract_i32(QREG_CC_N, src, pos, len); + if (is_sign) { + tcg_gen_mov_i32(dst, QREG_CC_N); + } else { + tcg_gen_extract_i32(dst, src, pos, len); + } + } + + tcg_temp_free(tmp); + set_cc_op(s, CC_OP_LOGIC); +} + +DISAS_INSN(bfext_mem) +{ + int ext = read_im16(env, s); + int is_sign = insn & 0x200; + TCGv dest = DREG(ext, 12); + TCGv addr, len, ofs; + + addr = gen_lea(env, s, insn, OS_UNSIZED); + if (IS_NULL_QREG(addr)) { + gen_addr_fault(s); + return; + } + + if (ext & 0x20) { + len = DREG(ext, 0); + } else { + len = tcg_const_i32(extract32(ext, 0, 5)); + } + if (ext & 0x800) { + ofs = DREG(ext, 6); + } else { + ofs = tcg_const_i32(extract32(ext, 6, 5)); + } + + if (is_sign) { + gen_helper_bfexts_mem(dest, cpu_env, addr, ofs, len); + tcg_gen_mov_i32(QREG_CC_N, dest); + } else { + TCGv_i64 tmp = tcg_temp_new_i64(); + gen_helper_bfextu_mem(tmp, cpu_env, addr, ofs, len); + tcg_gen_extr_i64_i32(dest, QREG_CC_N, tmp); + tcg_temp_free_i64(tmp); + } + set_cc_op(s, CC_OP_LOGIC); + + if (!(ext & 0x20)) { + tcg_temp_free(len); + } + if (!(ext & 0x800)) { + tcg_temp_free(ofs); + } +} + +DISAS_INSN(bfop_reg) +{ + int ext = read_im16(env, s); + TCGv src = DREG(insn, 0); + int len = ((extract32(ext, 0, 5) - 1) & 31) + 1; + int ofs = extract32(ext, 6, 5); /* big bit-endian */ + TCGv mask, tofs, tlen; + + TCGV_UNUSED(tofs); + TCGV_UNUSED(tlen); + if ((insn & 0x0f00) == 0x0d00) { /* bfffo */ + tofs = tcg_temp_new(); + tlen = tcg_temp_new(); + } + + if ((ext & 0x820) == 0) { + /* Immediate width and offset. */ + uint32_t maski = 0x7fffffffu >> (len - 1); + if (ofs + len <= 32) { + tcg_gen_shli_i32(QREG_CC_N, src, ofs); + } else { + tcg_gen_rotli_i32(QREG_CC_N, src, ofs); + } + tcg_gen_andi_i32(QREG_CC_N, QREG_CC_N, ~maski); + mask = tcg_const_i32(ror32(maski, ofs)); + if (!TCGV_IS_UNUSED(tofs)) { + tcg_gen_movi_i32(tofs, ofs); + tcg_gen_movi_i32(tlen, len); + } + } else { + TCGv tmp = tcg_temp_new(); + if (ext & 0x20) { + /* Variable width */ + tcg_gen_subi_i32(tmp, DREG(ext, 0), 1); + tcg_gen_andi_i32(tmp, tmp, 31); + mask = tcg_const_i32(0x7fffffffu); + tcg_gen_shr_i32(mask, mask, tmp); + if (!TCGV_IS_UNUSED(tlen)) { + tcg_gen_addi_i32(tlen, tmp, 1); + } + } else { + /* Immediate width */ + mask = tcg_const_i32(0x7fffffffu >> (len - 1)); + if (!TCGV_IS_UNUSED(tlen)) { + tcg_gen_movi_i32(tlen, len); + } + } + if (ext & 0x800) { + /* Variable offset */ + tcg_gen_andi_i32(tmp, DREG(ext, 6), 31); + tcg_gen_rotl_i32(QREG_CC_N, src, tmp); + tcg_gen_andc_i32(QREG_CC_N, QREG_CC_N, mask); + tcg_gen_rotr_i32(mask, mask, tmp); + if (!TCGV_IS_UNUSED(tofs)) { + tcg_gen_mov_i32(tofs, tmp); + } + } else { + /* Immediate offset (and variable width) */ + tcg_gen_rotli_i32(QREG_CC_N, src, ofs); + tcg_gen_andc_i32(QREG_CC_N, QREG_CC_N, mask); + tcg_gen_rotri_i32(mask, mask, ofs); + if (!TCGV_IS_UNUSED(tofs)) { + tcg_gen_movi_i32(tofs, ofs); + } + } + tcg_temp_free(tmp); + } + set_cc_op(s, CC_OP_LOGIC); + + switch (insn & 0x0f00) { + case 0x0a00: /* bfchg */ + tcg_gen_eqv_i32(src, src, mask); + break; + case 0x0c00: /* bfclr */ + tcg_gen_and_i32(src, src, mask); + break; + case 0x0d00: /* bfffo */ + gen_helper_bfffo_reg(DREG(ext, 12), QREG_CC_N, tofs, tlen); + tcg_temp_free(tlen); + tcg_temp_free(tofs); + break; + case 0x0e00: /* bfset */ + tcg_gen_orc_i32(src, src, mask); + break; + case 0x0800: /* bftst */ + /* flags already set; no other work to do. */ + break; + default: + g_assert_not_reached(); + } + tcg_temp_free(mask); +} + +DISAS_INSN(bfop_mem) +{ + int ext = read_im16(env, s); + TCGv addr, len, ofs; + TCGv_i64 t64; + + addr = gen_lea(env, s, insn, OS_UNSIZED); + if (IS_NULL_QREG(addr)) { + gen_addr_fault(s); + return; + } + + if (ext & 0x20) { + len = DREG(ext, 0); + } else { + len = tcg_const_i32(extract32(ext, 0, 5)); + } + if (ext & 0x800) { + ofs = DREG(ext, 6); + } else { + ofs = tcg_const_i32(extract32(ext, 6, 5)); + } + + switch (insn & 0x0f00) { + case 0x0a00: /* bfchg */ + gen_helper_bfchg_mem(QREG_CC_N, cpu_env, addr, ofs, len); + break; + case 0x0c00: /* bfclr */ + gen_helper_bfclr_mem(QREG_CC_N, cpu_env, addr, ofs, len); + break; + case 0x0d00: /* bfffo */ + t64 = tcg_temp_new_i64(); + gen_helper_bfffo_mem(t64, cpu_env, addr, ofs, len); + tcg_gen_extr_i64_i32(DREG(ext, 12), QREG_CC_N, t64); + tcg_temp_free_i64(t64); + break; + case 0x0e00: /* bfset */ + gen_helper_bfset_mem(QREG_CC_N, cpu_env, addr, ofs, len); + break; + case 0x0800: /* bftst */ + gen_helper_bfexts_mem(QREG_CC_N, cpu_env, addr, ofs, len); + break; + default: + g_assert_not_reached(); + } + set_cc_op(s, CC_OP_LOGIC); + + if (!(ext & 0x20)) { + tcg_temp_free(len); + } + if (!(ext & 0x800)) { + tcg_temp_free(ofs); + } +} + +DISAS_INSN(bfins_reg) +{ + int ext = read_im16(env, s); + TCGv dst = DREG(insn, 0); + TCGv src = DREG(ext, 12); + int len = ((extract32(ext, 0, 5) - 1) & 31) + 1; + int ofs = extract32(ext, 6, 5); /* big bit-endian */ + int pos = 32 - ofs - len; /* little bit-endian */ + TCGv tmp; + + tmp = tcg_temp_new(); + + if (ext & 0x20) { + /* Variable width */ + tcg_gen_neg_i32(tmp, DREG(ext, 0)); + tcg_gen_andi_i32(tmp, tmp, 31); + tcg_gen_shl_i32(QREG_CC_N, src, tmp); + } else { + /* Immediate width */ + tcg_gen_shli_i32(QREG_CC_N, src, 32 - len); + } + set_cc_op(s, CC_OP_LOGIC); + + /* Immediate width and offset */ + if ((ext & 0x820) == 0) { + /* Check for suitability for deposit. */ + if (pos >= 0) { + tcg_gen_deposit_i32(dst, dst, src, pos, len); + } else { + uint32_t maski = -2U << (len - 1); + uint32_t roti = (ofs + len) & 31; + tcg_gen_andi_i32(tmp, src, ~maski); + tcg_gen_rotri_i32(tmp, tmp, roti); + tcg_gen_andi_i32(dst, dst, ror32(maski, roti)); + tcg_gen_or_i32(dst, dst, tmp); + } + } else { + TCGv mask = tcg_temp_new(); + TCGv rot = tcg_temp_new(); + + if (ext & 0x20) { + /* Variable width */ + tcg_gen_subi_i32(rot, DREG(ext, 0), 1); + tcg_gen_andi_i32(rot, rot, 31); + tcg_gen_movi_i32(mask, -2); + tcg_gen_shl_i32(mask, mask, rot); + tcg_gen_mov_i32(rot, DREG(ext, 0)); + tcg_gen_andc_i32(tmp, src, mask); + } else { + /* Immediate width (variable offset) */ + uint32_t maski = -2U << (len - 1); + tcg_gen_andi_i32(tmp, src, ~maski); + tcg_gen_movi_i32(mask, maski); + tcg_gen_movi_i32(rot, len & 31); + } + if (ext & 0x800) { + /* Variable offset */ + tcg_gen_add_i32(rot, rot, DREG(ext, 6)); + } else { + /* Immediate offset (variable width) */ + tcg_gen_addi_i32(rot, rot, ofs); + } + tcg_gen_andi_i32(rot, rot, 31); + tcg_gen_rotr_i32(mask, mask, rot); + tcg_gen_rotr_i32(tmp, tmp, rot); + tcg_gen_and_i32(dst, dst, mask); + tcg_gen_or_i32(dst, dst, tmp); + + tcg_temp_free(rot); + tcg_temp_free(mask); + } + tcg_temp_free(tmp); +} + +DISAS_INSN(bfins_mem) +{ + int ext = read_im16(env, s); + TCGv src = DREG(ext, 12); + TCGv addr, len, ofs; + + addr = gen_lea(env, s, insn, OS_UNSIZED); + if (IS_NULL_QREG(addr)) { + gen_addr_fault(s); + return; + } + + if (ext & 0x20) { + len = DREG(ext, 0); + } else { + len = tcg_const_i32(extract32(ext, 0, 5)); + } + if (ext & 0x800) { + ofs = DREG(ext, 6); + } else { + ofs = tcg_const_i32(extract32(ext, 6, 5)); + } + + gen_helper_bfins_mem(QREG_CC_N, cpu_env, addr, src, ofs, len); + set_cc_op(s, CC_OP_LOGIC); + + if (!(ext & 0x20)) { + tcg_temp_free(len); + } + if (!(ext & 0x800)) { + tcg_temp_free(ofs); + } +} + DISAS_INSN(ff1) { TCGv reg; @@ -4595,6 +4992,20 @@ void register_m68k_insns (CPUM68KState *env) INSN(rotate8_reg, e030, f0f0, M68000); INSN(rotate16_reg, e070, f0f0, M68000); INSN(rotate_mem, e4c0, fcc0, M68000); + INSN(bfext_mem, e9c0, fdc0, BITFIELD); /* bfextu & bfexts */ + INSN(bfext_reg, e9c0, fdf8, BITFIELD); + INSN(bfins_mem, efc0, ffc0, BITFIELD); + INSN(bfins_reg, efc0, fff8, BITFIELD); + INSN(bfop_mem, eac0, ffc0, BITFIELD); /* bfchg */ + INSN(bfop_reg, eac0, fff8, BITFIELD); /* bfchg */ + INSN(bfop_mem, ecc0, ffc0, BITFIELD); /* bfclr */ + INSN(bfop_reg, ecc0, fff8, BITFIELD); /* bfclr */ + INSN(bfop_mem, edc0, ffc0, BITFIELD); /* bfffo */ + INSN(bfop_reg, edc0, fff8, BITFIELD); /* bfffo */ + INSN(bfop_mem, eec0, ffc0, BITFIELD); /* bfset */ + INSN(bfop_reg, eec0, fff8, BITFIELD); /* bfset */ + INSN(bfop_mem, e8c0, ffc0, BITFIELD); /* bftst */ + INSN(bfop_reg, e8c0, fff8, BITFIELD); /* bftst */ INSN(undef_fpu, f000, f000, CF_ISA_A); INSN(fpu, f200, ffc0, CF_FPU); INSN(fbcc, f280, ffc0, CF_FPU); diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index 389c7b691e..3d58869716 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -103,9 +103,8 @@ static void mb_cpu_reset(CPUState *s) mcc->parent_reset(s); - memset(env, 0, offsetof(CPUMBState, pvr)); + memset(env, 0, offsetof(CPUMBState, end_reset_fields)); env->res_addr = RES_ADDR_NONE; - tlb_flush(s, 1); /* Disable stack protector. */ env->shr = ~0; diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h index beb75ffd26..bf6963bcb7 100644 --- a/target/microblaze/cpu.h +++ b/target/microblaze/cpu.h @@ -267,6 +267,9 @@ struct CPUMBState { struct microblaze_mmu mmu; #endif + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON /* These fields are preserved on reset. */ diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c index a22a496ebb..a0f06758f8 100644 --- a/target/microblaze/mmu.c +++ b/target/microblaze/mmu.c @@ -255,7 +255,7 @@ void mmu_write(CPUMBState *env, uint32_t rn, uint32_t v) /* Changes to the zone protection reg flush the QEMU TLB. Fortunately, these are very uncommon. */ if (v != env->mmu.regs[rn]) { - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } env->mmu.regs[rn] = v; break; diff --git a/target/mips/cpu.c b/target/mips/cpu.c index 65ca607f88..1bb66b7a5a 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -100,8 +100,7 @@ static void mips_cpu_reset(CPUState *s) mcc->parent_reset(s); - memset(env, 0, offsetof(CPUMIPSState, mvp)); - tlb_flush(s, 1); + memset(env, 0, offsetof(CPUMIPSState, end_reset_fields)); cpu_state_reset(env); diff --git a/target/mips/cpu.h b/target/mips/cpu.h index 5182dc74ff..e1c78f55ec 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -607,6 +607,9 @@ struct CPUMIPSState { uint32_t CP0_TCStatus_rw_bitmask; /* Read/write bits in CP0_TCStatus */ int insn_flags; /* Supported instruction set */ + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON /* Fields from here on are preserved across CPU reset. */ @@ -1051,7 +1054,7 @@ static inline void compute_hflags(CPUMIPSState *env) } } -void cpu_mips_tlb_flush(CPUMIPSState *env, int flush_global); +void cpu_mips_tlb_flush(CPUMIPSState *env); void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc); void cpu_mips_store_status(CPUMIPSState *env, target_ulong val); void cpu_mips_store_cause(CPUMIPSState *env, target_ulong val); diff --git a/target/mips/helper.c b/target/mips/helper.c index c864b15b97..d2e77958fd 100644 --- a/target/mips/helper.c +++ b/target/mips/helper.c @@ -223,12 +223,12 @@ static int get_physical_address (CPUMIPSState *env, hwaddr *physical, return ret; } -void cpu_mips_tlb_flush(CPUMIPSState *env, int flush_global) +void cpu_mips_tlb_flush(CPUMIPSState *env) { MIPSCPU *cpu = mips_env_get_cpu(env); /* Flush qemu's TLB and discard all shadowed entries. */ - tlb_flush(CPU(cpu), flush_global); + tlb_flush(CPU(cpu)); env->tlb->tlb_in_use = env->tlb->nb_tlb; } @@ -290,7 +290,7 @@ void cpu_mips_store_status(CPUMIPSState *env, target_ulong val) #if defined(TARGET_MIPS64) if ((env->CP0_Status ^ old) & (old & (7 << CP0St_UX))) { /* Access to at least one of the 64-bit segments has been disabled */ - cpu_mips_tlb_flush(env, 1); + cpu_mips_tlb_flush(env); } #endif if (env->CP0_Config3 & (1 << CP0C3_MT)) { diff --git a/target/mips/op_helper.c b/target/mips/op_helper.c index 11d781fc91..b683fcb025 100644 --- a/target/mips/op_helper.c +++ b/target/mips/op_helper.c @@ -1409,7 +1409,7 @@ void helper_mtc0_entryhi(CPUMIPSState *env, target_ulong arg1) /* If the ASID changes, flush qemu's TLB. */ if ((old & env->CP0_EntryHi_ASID_mask) != (val & env->CP0_EntryHi_ASID_mask)) { - cpu_mips_tlb_flush(env, 1); + cpu_mips_tlb_flush(env); } } @@ -1999,7 +1999,7 @@ void r4k_helper_tlbinv(CPUMIPSState *env) tlb->EHINV = 1; } } - cpu_mips_tlb_flush(env, 1); + cpu_mips_tlb_flush(env); } void r4k_helper_tlbinvf(CPUMIPSState *env) @@ -2009,7 +2009,7 @@ void r4k_helper_tlbinvf(CPUMIPSState *env) for (idx = 0; idx < env->tlb->nb_tlb; idx++) { env->tlb->mmu.r4k.tlb[idx].EHINV = 1; } - cpu_mips_tlb_flush(env, 1); + cpu_mips_tlb_flush(env); } void r4k_helper_tlbwi(CPUMIPSState *env) @@ -2123,7 +2123,7 @@ void r4k_helper_tlbr(CPUMIPSState *env) /* If this will change the current ASID, flush qemu's TLB. */ if (ASID != tlb->ASID) - cpu_mips_tlb_flush (env, 1); + cpu_mips_tlb_flush(env); r4k_mips_tlb_flush_extra(env, env->tlb->nb_tlb); diff --git a/target/moxie/cpu.c b/target/moxie/cpu.c index b0be4a7551..927b1a1e44 100644 --- a/target/moxie/cpu.c +++ b/target/moxie/cpu.c @@ -45,10 +45,8 @@ static void moxie_cpu_reset(CPUState *s) mcc->parent_reset(s); - memset(env, 0, sizeof(CPUMoxieState)); + memset(env, 0, offsetof(CPUMoxieState, end_reset_fields)); env->pc = 0x1000; - - tlb_flush(s, 1); } static void moxie_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) diff --git a/target/moxie/cpu.h b/target/moxie/cpu.h index 3e880facf4..8991aaef9a 100644 --- a/target/moxie/cpu.h +++ b/target/moxie/cpu.h @@ -56,6 +56,9 @@ typedef struct CPUMoxieState { void *irq[8]; + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON } CPUMoxieState; diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c index 698e87bb25..422139d29f 100644 --- a/target/openrisc/cpu.c +++ b/target/openrisc/cpu.c @@ -44,14 +44,7 @@ static void openrisc_cpu_reset(CPUState *s) occ->parent_reset(s); -#ifndef CONFIG_USER_ONLY - memset(&cpu->env, 0, offsetof(CPUOpenRISCState, tlb)); -#else - memset(&cpu->env, 0, offsetof(CPUOpenRISCState, irq)); -#endif - - tlb_flush(s, 1); - /*tb_flush(&cpu->env); FIXME: Do we need it? */ + memset(&cpu->env, 0, offsetof(CPUOpenRISCState, end_reset_fields)); cpu->env.pc = 0x100; cpu->env.sr = SR_FO | SR_SM; diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h index aaf153579a..508ef568b4 100644 --- a/target/openrisc/cpu.h +++ b/target/openrisc/cpu.h @@ -300,6 +300,9 @@ typedef struct CPUOpenRISCState { in solt so far. */ uint32_t btaken; /* the SR_F bit */ + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON /* Fields from here on are preserved across CPU reset. */ diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c index 5fe3f11ffc..e43fc84ef7 100644 --- a/target/openrisc/interrupt.c +++ b/target/openrisc/interrupt.c @@ -45,7 +45,7 @@ void openrisc_cpu_do_interrupt(CPUState *cs) /* For machine-state changed between user-mode and supervisor mode, we need flush TLB when we enter&exit EXCP. */ - tlb_flush(cs, 1); + tlb_flush(cs); env->esr = env->sr; env->sr &= ~SR_DME; diff --git a/target/openrisc/interrupt_helper.c b/target/openrisc/interrupt_helper.c index 116f9109a7..0ed5146e8d 100644 --- a/target/openrisc/interrupt_helper.c +++ b/target/openrisc/interrupt_helper.c @@ -53,7 +53,7 @@ void HELPER(rfe)(CPUOpenRISCState *env) } if (need_flush_tlb) { - tlb_flush(cs, 1); + tlb_flush(cs); } #endif cs->interrupt_request |= CPU_INTERRUPT_EXITTB; diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c index a719e452be..daea902856 100644 --- a/target/openrisc/sys_helper.c +++ b/target/openrisc/sys_helper.c @@ -47,7 +47,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, case TO_SPR(0, 17): /* SR */ if ((env->sr & (SR_IME | SR_DME | SR_SM)) ^ (rb & (SR_IME | SR_DME | SR_SM))) { - tlb_flush(cs, 1); + tlb_flush(cs); } env->sr = rb; env->sr |= SR_FO; /* FO is const equal to 1 */ diff --git a/target/ppc/helper_regs.h b/target/ppc/helper_regs.h index 62138163a5..2627a70176 100644 --- a/target/ppc/helper_regs.h +++ b/target/ppc/helper_regs.h @@ -161,7 +161,7 @@ static inline void check_tlb_flush(CPUPPCState *env, bool global) { CPUState *cs = CPU(ppc_env_get_cpu(env)); if (env->tlb_need_flush & TLB_NEED_LOCAL_FLUSH) { - tlb_flush(cs, 1); + tlb_flush(cs); env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH; } @@ -176,7 +176,7 @@ static inline void check_tlb_flush(CPUPPCState *env, bool global) CPUPPCState *other_env = &cpu->env; other_env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH; - tlb_flush(other_cs, 1); + tlb_flush(other_cs); } } env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH; diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 1e6e705a4e..ab432bafaf 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -85,7 +85,7 @@ void helper_store_sdr1(CPUPPCState *env, target_ulong val) if (!env->external_htab) { if (env->spr[SPR_SDR1] != val) { ppc_store_sdr1(env, val); - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } } } @@ -114,7 +114,7 @@ void helper_store_403_pbr(CPUPPCState *env, uint32_t num, target_ulong value) if (likely(env->pb[num] != value)) { env->pb[num] = value; /* Should be optimized */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } } diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c index d09fc0a85f..f746f53615 100644 --- a/target/ppc/mmu_helper.c +++ b/target/ppc/mmu_helper.c @@ -248,7 +248,7 @@ static inline void ppc6xx_tlb_invalidate_all(CPUPPCState *env) tlb = &env->tlb.tlb6[nr]; pte_invalidate(&tlb->pte0); } - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } static inline void ppc6xx_tlb_invalidate_virt2(CPUPPCState *env, @@ -661,7 +661,7 @@ static inline void ppc4xx_tlb_invalidate_all(CPUPPCState *env) tlb = &env->tlb.tlbe[i]; tlb->prot &= ~PAGE_VALID; } - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } static int mmu40x_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, @@ -863,7 +863,7 @@ static void booke206_flush_tlb(CPUPPCState *env, int flags, tlb += booke206_tlb_size(env, i); } - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } static hwaddr booke206_tlb_to_page_size(CPUPPCState *env, @@ -1769,7 +1769,7 @@ void helper_store_ibatu(CPUPPCState *env, uint32_t nr, target_ulong value) #if !defined(FLUSH_ALL_TLBS) do_invalidate_BAT(env, env->IBAT[0][nr], mask); #else - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); #endif } } @@ -1804,7 +1804,7 @@ void helper_store_dbatu(CPUPPCState *env, uint32_t nr, target_ulong value) #if !defined(FLUSH_ALL_TLBS) do_invalidate_BAT(env, env->DBAT[0][nr], mask); #else - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); #endif } } @@ -1852,7 +1852,7 @@ void helper_store_601_batu(CPUPPCState *env, uint32_t nr, target_ulong value) } #if defined(FLUSH_ALL_TLBS) if (do_inval) { - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } #endif } @@ -1892,7 +1892,7 @@ void helper_store_601_batl(CPUPPCState *env, uint32_t nr, target_ulong value) env->DBAT[1][nr] = value; #if defined(FLUSH_ALL_TLBS) if (do_inval) { - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } #endif } @@ -1921,7 +1921,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) cpu_abort(CPU(cpu), "MPC8xx MMU model is not implemented\n"); break; case POWERPC_MMU_BOOKE: - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); break; case POWERPC_MMU_BOOKE206: booke206_flush_tlb(env, -1, 0); @@ -1937,7 +1937,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) case POWERPC_MMU_2_07a: #endif /* defined(TARGET_PPC64) */ env->tlb_need_flush = 0; - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); break; default: /* XXX: TODO */ @@ -2433,13 +2433,13 @@ void helper_440_tlbwe(CPUPPCState *env, uint32_t word, target_ulong entry, } tlb->PID = env->spr[SPR_440_MMUCR] & 0x000000FF; if (do_flush_tlbs) { - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } break; case 1: RPN = value & 0xFFFFFC0F; if ((tlb->prot & PAGE_VALID) && tlb->RPN != RPN) { - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } tlb->RPN = RPN; break; @@ -2555,7 +2555,7 @@ void helper_booke_setpid(CPUPPCState *env, uint32_t pidn, target_ulong pid) env->spr[pidn] = pid; /* changing PIDs mean we're in a different address space now */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } void helper_booke206_tlbwe(CPUPPCState *env) @@ -2650,7 +2650,7 @@ void helper_booke206_tlbwe(CPUPPCState *env) if (booke206_tlb_to_page_size(env, tlb) == TARGET_PAGE_SIZE) { tlb_flush_page(CPU(cpu), tlb->mas2 & MAS2_EPN_MASK); } else { - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } } @@ -2775,7 +2775,7 @@ void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address) /* flush TLB1 entries */ booke206_invalidate_ea_tlb(env, 1, address); CPU_FOREACH(cs) { - tlb_flush(cs, 1); + tlb_flush(cs); } } else { /* flush TLB0 entries */ @@ -2811,7 +2811,7 @@ void helper_booke206_tlbilx1(CPUPPCState *env, target_ulong address) } tlb += booke206_tlb_size(env, i); } - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } void helper_booke206_tlbilx3(CPUPPCState *env, target_ulong address) @@ -2852,7 +2852,7 @@ void helper_booke206_tlbilx3(CPUPPCState *env, target_ulong address) tlb->mas1 &= ~MAS1_VALID; } } - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type) diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c index 19ef2505e4..e6a835c14c 100644 --- a/target/ppc/translate_init.c +++ b/target/ppc/translate_init.c @@ -10416,9 +10416,6 @@ static void ppc_cpu_reset(CPUState *s) } env->spr[i] = spr->default_value; } - - /* Flush all TLBs */ - tlb_flush(s, 1); } #ifndef CONFIG_USER_ONLY diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 0a39d31237..066dcd17df 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -82,7 +82,6 @@ static void s390_cpu_reset(CPUState *s) scc->parent_reset(s); cpu->env.sigp_order = 0; s390_cpu_set_state(CPU_STATE_STOPPED, cpu); - tlb_flush(s, 1); } /* S390CPUClass::initial_reset() */ @@ -94,7 +93,7 @@ static void s390_cpu_initial_reset(CPUState *s) s390_cpu_reset(s); /* initial reset does not touch regs,fregs and aregs */ - memset(&env->fpc, 0, offsetof(CPUS390XState, cpu_num) - + memset(&env->fpc, 0, offsetof(CPUS390XState, end_reset_fields) - offsetof(CPUS390XState, fpc)); /* architectured initial values for CR 0 and 14 */ @@ -118,7 +117,6 @@ static void s390_cpu_initial_reset(CPUState *s) if (kvm_enabled()) { kvm_s390_reset_vcpu(cpu); } - tlb_flush(s, 1); } /* CPUClass:reset() */ @@ -133,7 +131,7 @@ static void s390_cpu_full_reset(CPUState *s) cpu->env.sigp_order = 0; s390_cpu_set_state(CPU_STATE_STOPPED, cpu); - memset(env, 0, offsetof(CPUS390XState, cpu_num)); + memset(env, 0, offsetof(CPUS390XState, end_reset_fields)); /* architectured initial values for CR 0 and 14 */ env->cregs[0] = CR0_RESET; @@ -156,7 +154,6 @@ static void s390_cpu_full_reset(CPUState *s) if (kvm_enabled()) { kvm_s390_reset_vcpu(cpu); } - tlb_flush(s, 1); } #if !defined(CONFIG_USER_ONLY) diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index fd36a25cf5..058ddad83a 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -139,9 +139,10 @@ typedef struct CPUS390XState { uint8_t riccb[64]; - CPU_COMMON + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; - /* reset does memset(0) up to here */ + CPU_COMMON uint32_t cpu_num; uint32_t machine_type; diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c index 3d223dec97..ea4dc22eeb 100644 --- a/target/s390x/gdbstub.c +++ b/target/s390x/gdbstub.c @@ -199,7 +199,7 @@ static int cpu_write_c_reg(CPUS390XState *env, uint8_t *mem_buf, int n) case S390_C0_REGNUM ... S390_C15_REGNUM: env->cregs[n] = ldtul_p(mem_buf); if (tcg_enabled()) { - tlb_flush(ENV_GET_CPU(env), 1); + tlb_flush(ENV_GET_CPU(env)); } cpu_synchronize_post_init(ENV_GET_CPU(env)); return 8; diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 99bc5e2834..675aba2e44 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -872,7 +872,7 @@ void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) s390_cpu_recompute_watchpoints(CPU(cpu)); } - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) @@ -900,7 +900,7 @@ void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) s390_cpu_recompute_watchpoints(CPU(cpu)); } - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) @@ -1036,7 +1036,7 @@ uint32_t HELPER(csp)(CPUS390XState *env, uint32_t r1, uint64_t r2) cpu_stl_data(env, a2, env->regs[(r1 + 1) & 15]); if (r2 & 0x3) { /* flush TLB / ALB */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } cc = 0; } else { @@ -1121,7 +1121,7 @@ void HELPER(ptlb)(CPUS390XState *env) { S390CPU *cpu = s390_env_get_cpu(env); - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } /* load using real address */ diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index a38f6a6ded..9a481c35dc 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -56,8 +56,7 @@ static void superh_cpu_reset(CPUState *s) scc->parent_reset(s); - memset(env, 0, offsetof(CPUSH4State, id)); - tlb_flush(s, 1); + memset(env, 0, offsetof(CPUSH4State, end_reset_fields)); env->pc = 0xA0000000; #if defined(CONFIG_USER_ONLY) diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index 478ab55868..cad8989f7e 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -175,6 +175,9 @@ typedef struct CPUSH4State { uint32_t ldst; + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON /* Fields from here on are preserved over CPU reset. */ diff --git a/target/sh4/helper.c b/target/sh4/helper.c index a33ac697c5..036c5ca56c 100644 --- a/target/sh4/helper.c +++ b/target/sh4/helper.c @@ -583,7 +583,7 @@ void cpu_load_tlb(CPUSH4State * env) entry->v = 0; } - tlb_flush(CPU(sh_env_get_cpu(s)), 1); + tlb_flush(CPU(sh_env_get_cpu(s))); } uint32_t cpu_sh4_read_mmaped_itlb_addr(CPUSH4State *s, diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 4e07b92fbd..d6583f1c2a 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -36,8 +36,7 @@ static void sparc_cpu_reset(CPUState *s) scc->parent_reset(s); - memset(env, 0, offsetof(CPUSPARCState, version)); - tlb_flush(s, 1); + memset(env, 0, offsetof(CPUSPARCState, end_reset_fields)); env->cwp = 0; #ifndef TARGET_SPARC64 env->wim = 1; diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h index 5fb0ed1aad..601c018a05 100644 --- a/target/sparc/cpu.h +++ b/target/sparc/cpu.h @@ -419,6 +419,9 @@ struct CPUSPARCState { /* NOTE: we allow 8 more registers to handle wrapping */ target_ulong regbase[MAX_NWINDOWS * 16 + 8]; + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON /* Fields from here on are preserved across CPU reset. */ diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c index de7d53ae20..a0171f73f7 100644 --- a/target/sparc/ldst_helper.c +++ b/target/sparc/ldst_helper.c @@ -816,7 +816,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, case 2: /* flush region (16M) */ case 3: /* flush context (4G) */ case 4: /* flush entire */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); break; default: break; @@ -841,7 +841,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, are invalid in normal mode. */ if ((oldreg ^ env->mmuregs[reg]) & (MMU_NF | env->def->mmu_bm)) { - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } break; case 1: /* Context Table Pointer Register */ @@ -852,7 +852,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, if (oldreg != env->mmuregs[reg]) { /* we flush when the MMU context changes because QEMU has no MMU context support */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } break; case 3: /* Synchronous Fault Status Register with Clear */ @@ -1509,13 +1509,13 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val, env->dmmu.mmu_primary_context = val; /* can be optimized to only flush MMU_USER_IDX and MMU_KERNEL_IDX entries */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); break; case 2: /* Secondary context */ env->dmmu.mmu_secondary_context = val; /* can be optimized to only flush MMU_USER_SECONDARY_IDX and MMU_KERNEL_SECONDARY_IDX entries */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); break; case 5: /* TSB access */ DPRINTF_MMU("dmmu TSB write: 0x%016" PRIx64 " -> 0x%016" @@ -1654,7 +1654,7 @@ void sparc_cpu_unassigned_access(CPUState *cs, hwaddr addr, /* flush neverland mappings created during no-fault mode, so the sequential MMU faults report proper fault types */ if (env->mmuregs[0] & MMU_NF) { - tlb_flush(cs, 1); + tlb_flush(cs); } } #else diff --git a/target/tilegx/cpu.c b/target/tilegx/cpu.c index 454793f94a..d90e38e88c 100644 --- a/target/tilegx/cpu.c +++ b/target/tilegx/cpu.c @@ -84,8 +84,7 @@ static void tilegx_cpu_reset(CPUState *s) tcc->parent_reset(s); - memset(env, 0, sizeof(CPUTLGState)); - tlb_flush(s, 1); + memset(env, 0, offsetof(CPUTLGState, end_reset_fields)); } static void tilegx_cpu_realizefn(DeviceState *dev, Error **errp) diff --git a/target/tilegx/cpu.h b/target/tilegx/cpu.h index 1735427233..f32be49f65 100644 --- a/target/tilegx/cpu.h +++ b/target/tilegx/cpu.h @@ -97,6 +97,9 @@ typedef struct CPUTLGState { uint32_t sigcode; /* Signal code */ #endif + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + CPU_COMMON } CPUTLGState; diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c index 785b76bd3a..08f50e2ba7 100644 --- a/target/tricore/cpu.c +++ b/target/tricore/cpu.c @@ -53,8 +53,6 @@ static void tricore_cpu_reset(CPUState *s) tcc->parent_reset(s); - tlb_flush(s, 1); - cpu_state_reset(env); } diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c index c169972b59..c9b78ce68e 100644 --- a/target/unicore32/cpu.c +++ b/target/unicore32/cpu.c @@ -133,7 +133,7 @@ static void uc32_cpu_initfn(Object *obj) env->regs[31] = 0x03000000; #endif - tlb_flush(cs, 1); + tlb_flush(cs); if (tcg_enabled() && !inited) { inited = true; diff --git a/target/unicore32/helper.c b/target/unicore32/helper.c index 7a5613e776..f9239dc7b8 100644 --- a/target/unicore32/helper.c +++ b/target/unicore32/helper.c @@ -106,7 +106,7 @@ void helper_cp0_set(CPUUniCore32State *env, uint32_t val, uint32_t creg, case 6: if ((cop <= 6) && (cop >= 2)) { /* invalid all tlb */ - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); return; } break; diff --git a/target/xtensa/op_helper.c b/target/xtensa/op_helper.c index dc25625d0d..dc0dd351bb 100644 --- a/target/xtensa/op_helper.c +++ b/target/xtensa/op_helper.c @@ -479,7 +479,7 @@ void HELPER(wsr_rasid)(CPUXtensaState *env, uint32_t v) v = (v & 0xffffff00) | 0x1; if (v != env->sregs[RASID]) { env->sregs[RASID] = v; - tlb_flush(CPU(cpu), 1); + tlb_flush(CPU(cpu)); } } diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 585b0d6234..6d227a5a6a 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -580,11 +580,9 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, tcg_target_long value) { - AArch64Insn insn; int i, wantinv, shift; tcg_target_long svalue = value; tcg_target_long ivalue = ~value; - tcg_target_long imask; /* For 32-bit values, discard potential garbage in value. For 64-bit values within [2**31, 2**32-1], we can create smaller sequences by @@ -630,42 +628,35 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, /* Would it take fewer insns to begin with MOVN? For the value and its inverse, count the number of 16-bit lanes that are 0. */ - for (i = wantinv = imask = 0; i < 64; i += 16) { + for (i = wantinv = 0; i < 64; i += 16) { tcg_target_long mask = 0xffffull << i; - if ((value & mask) == 0) { - wantinv -= 1; - } - if ((ivalue & mask) == 0) { - wantinv += 1; - imask |= mask; - } - } - - /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */ - insn = I3405_MOVZ; - if (wantinv > 0) { - value = ivalue; - insn = I3405_MOVN; - } - - /* Find the lowest lane that is not 0x0000. */ - shift = ctz64(value) & (63 & -16); - tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift); - - if (wantinv > 0) { - /* Re-invert the value, so MOVK sees non-inverted bits. */ - value = ~value; - /* Clear out all the 0xffff lanes. */ - value ^= imask; + wantinv -= ((value & mask) == 0); + wantinv += ((ivalue & mask) == 0); } - /* Clear out the lane that we just set. */ - value &= ~(0xffffUL << shift); - /* Iterate until all lanes have been set, and thus cleared from VALUE. */ - while (value) { + if (wantinv <= 0) { + /* Find the lowest lane that is not 0x0000. */ shift = ctz64(value) & (63 & -16); - tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift); + tcg_out_insn(s, 3405, MOVZ, type, rd, value >> shift, shift); + /* Clear out the lane that we just set. */ value &= ~(0xffffUL << shift); + /* Iterate until all non-zero lanes have been processed. */ + while (value) { + shift = ctz64(value) & (63 & -16); + tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift); + value &= ~(0xffffUL << shift); + } + } else { + /* Like above, but with the inverted value and MOVN to start. */ + shift = ctz64(ivalue) & (63 & -16); + tcg_out_insn(s, 3405, MOVN, type, rd, ivalue >> shift, shift); + ivalue &= ~(0xffffUL << shift); + while (ivalue) { + shift = ctz64(ivalue) & (63 & -16); + /* Provide MOVK with the non-inverted value. */ + tcg_out_insn(s, 3405, MOVK, type, rd, ~(ivalue >> shift), shift); + ivalue &= ~(0xffffUL << shift); + } } } @@ -964,6 +955,15 @@ static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, insn = I3401_SUBSI; bl = -bl; } + if (unlikely(al == TCG_REG_XZR)) { + /* ??? We want to allow al to be zero for the benefit of + negation via subtraction. However, that leaves open the + possibility of adding 0+const in the low part, and the + immediate add instructions encode XSP not XZR. Don't try + anything more elaborate here than loading another zero. */ + al = TCG_REG_TMP; + tcg_out_movi(s, ext, al, 0); + } tcg_out_insn_3401(s, insn, ext, rl, al, bl); } else { tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 0682d01a4e..a679280b92 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -1096,7 +1096,7 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, /* If we only got here because of load-and-test, and we couldn't use that, then we need to load the constant into a register. */ - if (!(facilities & FACILITY_EXT_IMM)) { + if (!(s390_facilities & FACILITY_EXT_IMM)) { c2 = TCG_TMP0; tcg_out_movi(s, type, c2, 0); goto do_reg; diff --git a/util/Makefile.objs b/util/Makefile.objs index ad0f9c7fe4..c1f247d675 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -1,5 +1,6 @@ util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o util-obj-y += bufferiszero.o +util-obj-y += lockcnt.o util-obj-$(CONFIG_POSIX) += compatfd.o util-obj-$(CONFIG_POSIX) += event_notifier-posix.o util-obj-$(CONFIG_POSIX) += mmap-alloc.o diff --git a/util/lockcnt.c b/util/lockcnt.c new file mode 100644 index 0000000000..4f88dcf8b8 --- /dev/null +++ b/util/lockcnt.c @@ -0,0 +1,397 @@ +/* + * QemuLockCnt implementation + * + * Copyright Red Hat, Inc. 2017 + * + * Author: + * Paolo Bonzini <pbonzini@redhat.com> + */ +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "qemu/atomic.h" +#include "trace.h" + +#ifdef CONFIG_LINUX +#include "qemu/futex.h" + +/* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter. + * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok, + * this is not the most relaxing citation I could make...). It is similar + * to mutex2 in the paper. + */ + +#define QEMU_LOCKCNT_STATE_MASK 3 +#define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */ +#define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */ +#define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */ + +#define QEMU_LOCKCNT_COUNT_STEP 4 +#define QEMU_LOCKCNT_COUNT_SHIFT 2 + +void qemu_lockcnt_init(QemuLockCnt *lockcnt) +{ + lockcnt->count = 0; +} + +void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) +{ +} + +/* *val is the current value of lockcnt->count. + * + * If the lock is free, try a cmpxchg from *val to new_if_free; return + * true and set *val to the old value found by the cmpxchg in + * lockcnt->count. + * + * If the lock is taken, wait for it to be released and return false + * *without trying again to take the lock*. Again, set *val to the + * new value of lockcnt->count. + * + * If *waited is true on return, new_if_free's bottom two bits must not + * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller + * does not know if there are other waiters. Furthermore, after *waited + * is set the caller has effectively acquired the lock. If it returns + * with the lock not taken, it must wake another futex waiter. + */ +static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val, + int new_if_free, bool *waited) +{ + /* Fast path for when the lock is free. */ + if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) { + int expected = *val; + + trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free); + *val = atomic_cmpxchg(&lockcnt->count, expected, new_if_free); + if (*val == expected) { + trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free); + *val = new_if_free; + return true; + } + } + + /* The slow path moves from locked to waiting if necessary, then + * does a futex wait. Both steps can be repeated ad nauseam, + * only getting out of the loop if we can have another shot at the + * fast path. Once we can, get out to compute the new destination + * value for the fast path. + */ + while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) { + if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) { + int expected = *val; + int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING; + + trace_lockcnt_futex_wait_prepare(lockcnt, expected, new); + *val = atomic_cmpxchg(&lockcnt->count, expected, new); + if (*val == expected) { + *val = new; + } + continue; + } + + if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) { + *waited = true; + trace_lockcnt_futex_wait(lockcnt, *val); + qemu_futex_wait(&lockcnt->count, *val); + *val = atomic_read(&lockcnt->count); + trace_lockcnt_futex_wait_resume(lockcnt, *val); + continue; + } + + abort(); + } + return false; +} + +static void lockcnt_wake(QemuLockCnt *lockcnt) +{ + trace_lockcnt_futex_wake(lockcnt); + qemu_futex_wake(&lockcnt->count, 1); +} + +void qemu_lockcnt_inc(QemuLockCnt *lockcnt) +{ + int val = atomic_read(&lockcnt->count); + bool waited = false; + + for (;;) { + if (val >= QEMU_LOCKCNT_COUNT_STEP) { + int expected = val; + val = atomic_cmpxchg(&lockcnt->count, val, val + QEMU_LOCKCNT_COUNT_STEP); + if (val == expected) { + break; + } + } else { + /* The fast path is (0, unlocked)->(1, unlocked). */ + if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP, + &waited)) { + break; + } + } + } + + /* If we were woken by another thread, we should also wake one because + * we are effectively releasing the lock that was given to us. This is + * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING + * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and + * wake someone. + */ + if (waited) { + lockcnt_wake(lockcnt); + } +} + +void qemu_lockcnt_dec(QemuLockCnt *lockcnt) +{ + atomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP); +} + +/* Decrement a counter, and return locked if it is decremented to zero. + * If the function returns true, it is impossible for the counter to + * become nonzero until the next qemu_lockcnt_unlock. + */ +bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) +{ + int val = atomic_read(&lockcnt->count); + int locked_state = QEMU_LOCKCNT_STATE_LOCKED; + bool waited = false; + + for (;;) { + if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) { + int expected = val; + val = atomic_cmpxchg(&lockcnt->count, val, val - QEMU_LOCKCNT_COUNT_STEP); + if (val == expected) { + break; + } + } else { + /* If count is going 1->0, take the lock. The fast path is + * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). + */ + if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { + return true; + } + + if (waited) { + /* At this point we do not know if there are more waiters. Assume + * there are. + */ + locked_state = QEMU_LOCKCNT_STATE_WAITING; + } + } + } + + /* If we were woken by another thread, but we're returning in unlocked + * state, we should also wake a thread because we are effectively + * releasing the lock that was given to us. This is the case where + * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low + * bits, and qemu_lockcnt_unlock would find it and wake someone. + */ + if (waited) { + lockcnt_wake(lockcnt); + } + return false; +} + +/* If the counter is one, decrement it and return locked. Otherwise do + * nothing. + * + * If the function returns true, it is impossible for the counter to + * become nonzero until the next qemu_lockcnt_unlock. + */ +bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) +{ + int val = atomic_read(&lockcnt->count); + int locked_state = QEMU_LOCKCNT_STATE_LOCKED; + bool waited = false; + + while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) { + /* If count is going 1->0, take the lock. The fast path is + * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). + */ + if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { + return true; + } + + if (waited) { + /* At this point we do not know if there are more waiters. Assume + * there are. + */ + locked_state = QEMU_LOCKCNT_STATE_WAITING; + } + } + + /* If we were woken by another thread, but we're returning in unlocked + * state, we should also wake a thread because we are effectively + * releasing the lock that was given to us. This is the case where + * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low + * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone. + */ + if (waited) { + lockcnt_wake(lockcnt); + } + return false; +} + +void qemu_lockcnt_lock(QemuLockCnt *lockcnt) +{ + int val = atomic_read(&lockcnt->count); + int step = QEMU_LOCKCNT_STATE_LOCKED; + bool waited = false; + + /* The third argument is only used if the low bits of val are 0 + * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired + * state. + */ + while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) { + if (waited) { + /* At this point we do not know if there are more waiters. Assume + * there are. + */ + step = QEMU_LOCKCNT_STATE_WAITING; + } + } +} + +void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) +{ + int expected, new, val; + + val = atomic_read(&lockcnt->count); + do { + expected = val; + new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK; + trace_lockcnt_unlock_attempt(lockcnt, val, new); + val = atomic_cmpxchg(&lockcnt->count, val, new); + } while (val != expected); + + trace_lockcnt_unlock_success(lockcnt, val, new); + if (val & QEMU_LOCKCNT_STATE_WAITING) { + lockcnt_wake(lockcnt); + } +} + +void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) +{ + int expected, new, val; + + val = atomic_read(&lockcnt->count); + do { + expected = val; + new = val & ~QEMU_LOCKCNT_STATE_MASK; + trace_lockcnt_unlock_attempt(lockcnt, val, new); + val = atomic_cmpxchg(&lockcnt->count, val, new); + } while (val != expected); + + trace_lockcnt_unlock_success(lockcnt, val, new); + if (val & QEMU_LOCKCNT_STATE_WAITING) { + lockcnt_wake(lockcnt); + } +} + +unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) +{ + return atomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT; +} +#else +void qemu_lockcnt_init(QemuLockCnt *lockcnt) +{ + qemu_mutex_init(&lockcnt->mutex); + lockcnt->count = 0; +} + +void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) +{ + qemu_mutex_destroy(&lockcnt->mutex); +} + +void qemu_lockcnt_inc(QemuLockCnt *lockcnt) +{ + int old; + for (;;) { + old = atomic_read(&lockcnt->count); + if (old == 0) { + qemu_lockcnt_lock(lockcnt); + qemu_lockcnt_inc_and_unlock(lockcnt); + return; + } else { + if (atomic_cmpxchg(&lockcnt->count, old, old + 1) == old) { + return; + } + } + } +} + +void qemu_lockcnt_dec(QemuLockCnt *lockcnt) +{ + atomic_dec(&lockcnt->count); +} + +/* Decrement a counter, and return locked if it is decremented to zero. + * It is impossible for the counter to become nonzero while the mutex + * is taken. + */ +bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) +{ + int val = atomic_read(&lockcnt->count); + while (val > 1) { + int old = atomic_cmpxchg(&lockcnt->count, val, val - 1); + if (old != val) { + val = old; + continue; + } + + return false; + } + + qemu_lockcnt_lock(lockcnt); + if (atomic_fetch_dec(&lockcnt->count) == 1) { + return true; + } + + qemu_lockcnt_unlock(lockcnt); + return false; +} + +/* Decrement a counter and return locked if it is decremented to zero. + * Otherwise do nothing. + * + * It is impossible for the counter to become nonzero while the mutex + * is taken. + */ +bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) +{ + /* No need for acquire semantics if we return false. */ + int val = atomic_read(&lockcnt->count); + if (val > 1) { + return false; + } + + qemu_lockcnt_lock(lockcnt); + if (atomic_fetch_dec(&lockcnt->count) == 1) { + return true; + } + + qemu_lockcnt_inc_and_unlock(lockcnt); + return false; +} + +void qemu_lockcnt_lock(QemuLockCnt *lockcnt) +{ + qemu_mutex_lock(&lockcnt->mutex); +} + +void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) +{ + atomic_inc(&lockcnt->count); + qemu_mutex_unlock(&lockcnt->mutex); +} + +void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) +{ + qemu_mutex_unlock(&lockcnt->mutex); +} + +unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) +{ + return atomic_read(&lockcnt->count); +} +#endif diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index d20cddec0c..37cd8ba3fe 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -11,10 +11,6 @@ * */ #include "qemu/osdep.h" -#ifdef __linux__ -#include <sys/syscall.h> -#include <linux/futex.h> -#endif #include "qemu/thread.h" #include "qemu/atomic.h" #include "qemu/notify.h" @@ -294,28 +290,9 @@ void qemu_sem_wait(QemuSemaphore *sem) } #ifdef __linux__ -#define futex(...) syscall(__NR_futex, __VA_ARGS__) - -static inline void futex_wake(QemuEvent *ev, int n) -{ - futex(ev, FUTEX_WAKE, n, NULL, NULL, 0); -} - -static inline void futex_wait(QemuEvent *ev, unsigned val) -{ - while (futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0)) { - switch (errno) { - case EWOULDBLOCK: - return; - case EINTR: - break; /* get out of switch and retry */ - default: - abort(); - } - } -} +#include "qemu/futex.h" #else -static inline void futex_wake(QemuEvent *ev, int n) +static inline void qemu_futex_wake(QemuEvent *ev, int n) { pthread_mutex_lock(&ev->lock); if (n == 1) { @@ -326,7 +303,7 @@ static inline void futex_wake(QemuEvent *ev, int n) pthread_mutex_unlock(&ev->lock); } -static inline void futex_wait(QemuEvent *ev, unsigned val) +static inline void qemu_futex_wait(QemuEvent *ev, unsigned val) { pthread_mutex_lock(&ev->lock); if (ev->value == val) { @@ -338,7 +315,7 @@ static inline void futex_wait(QemuEvent *ev, unsigned val) /* Valid transitions: * - free->set, when setting the event - * - busy->set, when setting the event, followed by futex_wake + * - busy->set, when setting the event, followed by qemu_futex_wake * - set->free, when resetting the event * - free->busy, when waiting * @@ -381,7 +358,7 @@ void qemu_event_set(QemuEvent *ev) if (atomic_read(&ev->value) != EV_SET) { if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) { /* There were waiters, wake them up. */ - futex_wake(ev, INT_MAX); + qemu_futex_wake(ev, INT_MAX); } } } @@ -419,7 +396,7 @@ void qemu_event_wait(QemuEvent *ev) return; } } - futex_wait(ev, EV_BUSY); + qemu_futex_wait(ev, EV_BUSY); } } diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index 728e76b5b2..178e0168a1 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -269,7 +269,7 @@ void qemu_sem_wait(QemuSemaphore *sem) * * Valid transitions: * - free->set, when setting the event - * - busy->set, when setting the event, followed by futex_wake + * - busy->set, when setting the event, followed by SetEvent * - set->free, when resetting the event * - free->busy, when waiting * diff --git a/util/trace-events b/util/trace-events index ed06aee2ec..2b8aa30739 100644 --- a/util/trace-events +++ b/util/trace-events @@ -30,3 +30,13 @@ qemu_anon_ram_free(void *ptr, size_t size) "ptr %p size %zu" hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx" hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64 hbitmap_set(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64 + +# util/lockcnt.c +lockcnt_fast_path_attempt(const void *lockcnt, int expected, int new) "lockcnt %p fast path %d->%d" +lockcnt_fast_path_success(const void *lockcnt, int expected, int new) "lockcnt %p fast path %d->%d succeeded" +lockcnt_unlock_attempt(const void *lockcnt, int expected, int new) "lockcnt %p unlock %d->%d" +lockcnt_unlock_success(const void *lockcnt, int expected, int new) "lockcnt %p unlock %d->%d succeeded" +lockcnt_futex_wait_prepare(const void *lockcnt, int expected, int new) "lockcnt %p preparing slow path %d->%d" +lockcnt_futex_wait(const void *lockcnt, int val) "lockcnt %p waiting on %d" +lockcnt_futex_wait_resume(const void *lockcnt, int new) "lockcnt %p after wait: %d" +lockcnt_futex_wake(const void *lockcnt) "lockcnt %p waking up one waiter" |