diff options
40 files changed, 1941 insertions, 312 deletions
diff --git a/cpu-exec.c b/cpu-exec.c index a4f0effaf4..fa506e628a 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -61,8 +61,7 @@ static void align_clocks(SyncClocks *sc, const CPUState *cpu) sleep_delay.tv_sec = sc->diff_clk / 1000000000LL; sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL; if (nanosleep(&sleep_delay, &rem_delay) < 0) { - sc->diff_clk -= (sleep_delay.tv_sec - rem_delay.tv_sec) * 1000000000LL; - sc->diff_clk -= sleep_delay.tv_nsec - rem_delay.tv_nsec; + sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec; } else { sc->diff_clk = 0; } @@ -101,10 +100,8 @@ static void init_delay_params(SyncClocks *sc, if (!icount_align_option) { return; } - sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - - sc->realtime_clock + - cpu_get_clock_offset(); + sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); + sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock; sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; if (sc->diff_clk < max_delay) { max_delay = sc->diff_clk; @@ -229,23 +229,6 @@ int64_t cpu_get_clock(void) return ti; } -/* return the offset between the host clock and virtual CPU clock */ -int64_t cpu_get_clock_offset(void) -{ - int64_t ti; - unsigned start; - - do { - start = seqlock_read_begin(&timers_state.vm_clock_seqlock); - ti = timers_state.cpu_clock_offset; - if (!timers_state.cpu_ticks_enabled) { - ti -= get_clock(); - } - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); - - return -ti; -} - /* enable cpu_get_ticks() * Caller must hold BQL which server as mutex for vm_clock_seqlock. */ diff --git a/disas/s390.c b/disas/s390.c index 25499ba419..974460c814 100644 --- a/disas/s390.c +++ b/disas/s390.c @@ -106,10 +106,6 @@ struct s390_opcode static const struct s390_opcode s390_opcodes[]; static const int s390_num_opcodes; -/* A opcode format table for the .insn pseudo mnemonic. */ -static const struct s390_opcode s390_opformats[]; -static const int s390_num_opformats; - /* Values defined for the flags field of a struct powerpc_opcode. */ /* The operands table is an array of struct s390_operand. */ @@ -844,37 +840,6 @@ static const struct s390_operand s390_operands[] = #define MASK_SIY_DRI { 0xff, 0x00, 0x00, 0x00, 0x00, 0xff } /* QEMU-END */ -/* The opcode formats table (blueprints for .insn pseudo mnemonic). */ - -static const struct s390_opcode s390_opformats[] = - { - { "e", OP8(0x00LL), MASK_E, INSTR_E, 3, 0 }, - { "ri", OP8(0x00LL), MASK_RI_RI, INSTR_RI_RI, 3, 0 }, - { "rie", OP8(0x00LL), MASK_RIE_RRP, INSTR_RIE_RRP, 3, 0 }, - { "ril", OP8(0x00LL), MASK_RIL_RP, INSTR_RIL_RP, 3, 0 }, - { "rilu", OP8(0x00LL), MASK_RIL_RU, INSTR_RIL_RU, 3, 0 }, - { "rr", OP8(0x00LL), MASK_RR_RR, INSTR_RR_RR, 3, 0 }, - { "rre", OP8(0x00LL), MASK_RRE_RR, INSTR_RRE_RR, 3, 0 }, - { "rrf", OP8(0x00LL), MASK_RRF_RURR, INSTR_RRF_RURR, 3, 0 }, - { "rs", OP8(0x00LL), MASK_RS_RRRD, INSTR_RS_RRRD, 3, 0 }, - { "rse", OP8(0x00LL), MASK_RSE_RRRD, INSTR_RSE_RRRD, 3, 0 }, - { "rsi", OP8(0x00LL), MASK_RSI_RRP, INSTR_RSI_RRP, 3, 0 }, - { "rsy", OP8(0x00LL), MASK_RSY_RRRD, INSTR_RSY_RRRD, 3, 3 }, - { "rx", OP8(0x00LL), MASK_RX_RRRD, INSTR_RX_RRRD, 3, 0 }, - { "rxe", OP8(0x00LL), MASK_RXE_RRRD, INSTR_RXE_RRRD, 3, 0 }, - { "rxf", OP8(0x00LL), MASK_RXF_RRRDR, INSTR_RXF_RRRDR,3, 0 }, - { "rxy", OP8(0x00LL), MASK_RXY_RRRD, INSTR_RXY_RRRD, 3, 3 }, - { "s", OP8(0x00LL), MASK_S_RD, INSTR_S_RD, 3, 0 }, - { "si", OP8(0x00LL), MASK_SI_URD, INSTR_SI_URD, 3, 0 }, - { "siy", OP8(0x00LL), MASK_SIY_URD, INSTR_SIY_URD, 3, 3 }, - { "ss", OP8(0x00LL), MASK_SS_RRRDRD, INSTR_SS_RRRDRD,3, 0 }, - { "sse", OP8(0x00LL), MASK_SSE_RDRD, INSTR_SSE_RDRD, 3, 0 }, - { "ssf", OP8(0x00LL), MASK_SSF_RRDRD, INSTR_SSF_RRDRD,3, 0 }, -}; - -static const int s390_num_opformats = - sizeof (s390_opformats) / sizeof (s390_opformats[0]); - /* include "s390-opc.tab" generated from opcodes/s390-opc.txt rev 1.17 */ /* The opcode table. This file was generated by s390-mkopc. diff --git a/docs/rcu.txt b/docs/rcu.txt new file mode 100644 index 0000000000..61752b93ab --- /dev/null +++ b/docs/rcu.txt @@ -0,0 +1,387 @@ +Using RCU (Read-Copy-Update) for synchronization +================================================ + +Read-copy update (RCU) is a synchronization mechanism that is used to +protect read-mostly data structures. RCU is very efficient and scalable +on the read side (it is wait-free), and thus can make the read paths +extremely fast. + +RCU supports concurrency between a single writer and multiple readers, +thus it is not used alone. Typically, the write-side will use a lock to +serialize multiple updates, but other approaches are possible (e.g., +restricting updates to a single task). In QEMU, when a lock is used, +this will often be the "iothread mutex", also known as the "big QEMU +lock" (BQL). Also, restricting updates to a single task is done in +QEMU using the "bottom half" API. + +RCU is fundamentally a "wait-to-finish" mechanism. The read side marks +sections of code with "critical sections", and the update side will wait +for the execution of all *currently running* critical sections before +proceeding, or before asynchronously executing a callback. + +The key point here is that only the currently running critical sections +are waited for; critical sections that are started _after_ the beginning +of the wait do not extend the wait, despite running concurrently with +the updater. This is the reason why RCU is more scalable than, +for example, reader-writer locks. It is so much more scalable that +the system will have a single instance of the RCU mechanism; a single +mechanism can be used for an arbitrary number of "things", without +having to worry about things such as contention or deadlocks. + +How is this possible? The basic idea is to split updates in two phases, +"removal" and "reclamation". During removal, we ensure that subsequent +readers will not be able to get a reference to the old data. After +removal has completed, a critical section will not be able to access +the old data. Therefore, critical sections that begin after removal +do not matter; as soon as all previous critical sections have finished, +there cannot be any readers who hold references to the data structure, +and these can now be safely reclaimed (e.g., freed or unref'ed). + +Here is a picutre: + + thread 1 thread 2 thread 3 + ------------------- ------------------------ ------------------- + enter RCU crit.sec. + | finish removal phase + | begin wait + | | enter RCU crit.sec. + exit RCU crit.sec | | + complete wait | + begin reclamation phase | + exit RCU crit.sec. + + +Note how thread 3 is still executing its critical section when thread 2 +starts reclaiming data. This is possible, because the old version of the +data structure was not accessible at the time thread 3 began executing +that critical section. + + +RCU API +======= + +The core RCU API is small: + + void rcu_read_lock(void); + + Used by a reader to inform the reclaimer that the reader is + entering an RCU read-side critical section. + + void rcu_read_unlock(void); + + Used by a reader to inform the reclaimer that the reader is + exiting an RCU read-side critical section. Note that RCU + read-side critical sections may be nested and/or overlapping. + + void synchronize_rcu(void); + + Blocks until all pre-existing RCU read-side critical sections + on all threads have completed. This marks the end of the removal + phase and the beginning of reclamation phase. + + Note that it would be valid for another update to come while + synchronize_rcu is running. Because of this, it is better that + the updater releases any locks it may hold before calling + synchronize_rcu. If this is not possible (for example, because + the updater is protected by the BQL), you can use call_rcu. + + void call_rcu1(struct rcu_head * head, + void (*func)(struct rcu_head *head)); + + This function invokes func(head) after all pre-existing RCU + read-side critical sections on all threads have completed. This + marks the end of the removal phase, with func taking care + asynchronously of the reclamation phase. + + The foo struct needs to have an rcu_head structure added, + perhaps as follows: + + struct foo { + struct rcu_head rcu; + int a; + char b; + long c; + }; + + so that the reclaimer function can fetch the struct foo address + and free it: + + call_rcu1(&foo.rcu, foo_reclaim); + + void foo_reclaim(struct rcu_head *rp) + { + struct foo *fp = container_of(rp, struct foo, rcu); + g_free(fp); + } + + For the common case where the rcu_head member is the first of the + struct, you can use the following macro. + + void call_rcu(T *p, + void (*func)(T *p), + field-name); + + call_rcu1 is typically used through this macro, in the common case + where the "struct rcu_head" is the first field in the struct. In + the above case, one could have written simply: + + call_rcu(foo_reclaim, g_free, rcu); + + typeof(*p) atomic_rcu_read(p); + + atomic_rcu_read() is similar to atomic_mb_read(), but it makes + some assumptions on the code that calls it. This allows a more + optimized implementation. + + atomic_rcu_read assumes that whenever a single RCU critical + section reads multiple shared data, these reads are either + data-dependent or need no ordering. This is almost always the + case when using RCU, because read-side critical sections typically + navigate one or more pointers (the pointers that are changed on + every update) until reaching a data structure of interest, + and then read from there. + + RCU read-side critical sections must use atomic_rcu_read() to + read data, unless concurrent writes are presented by another + synchronization mechanism. + + Furthermore, RCU read-side critical sections should traverse the + data structure in a single direction, opposite to the direction + in which the updater initializes it. + + void atomic_rcu_set(p, typeof(*p) v); + + atomic_rcu_set() is also similar to atomic_mb_set(), and it also + makes assumptions on the code that calls it in order to allow a more + optimized implementation. + + In particular, atomic_rcu_set() suffices for synchronization + with readers, if the updater never mutates a field within a + data item that is already accessible to readers. This is the + case when initializing a new copy of the RCU-protected data + structure; just ensure that initialization of *p is carried out + before atomic_rcu_set() makes the data item visible to readers. + If this rule is observed, writes will happen in the opposite + order as reads in the RCU read-side critical sections (or if + there is just one update), and there will be no need for other + synchronization mechanism to coordinate the accesses. + +The following APIs must be used before RCU is used in a thread: + + void rcu_register_thread(void); + + Mark a thread as taking part in the RCU mechanism. Such a thread + will have to report quiescent points regularly, either manually + or through the QemuCond/QemuSemaphore/QemuEvent APIs. + + void rcu_unregister_thread(void); + + Mark a thread as not taking part anymore in the RCU mechanism. + It is not a problem if such a thread reports quiescent points, + either manually or by using the QemuCond/QemuSemaphore/QemuEvent + APIs. + +Note that these APIs are relatively heavyweight, and should _not_ be +nested. + + +DIFFERENCES WITH LINUX +====================== + +- Waiting on a mutex is possible, though discouraged, within an RCU critical + section. This is because spinlocks are rarely (if ever) used in userspace + programming; not allowing this would prevent upgrading an RCU read-side + critical section to become an updater. + +- atomic_rcu_read and atomic_rcu_set replace rcu_dereference and + rcu_assign_pointer. They take a _pointer_ to the variable being accessed. + +- call_rcu is a macro that has an extra argument (the name of the first + field in the struct, which must be a struct rcu_head), and expects the + type of the callback's argument to be the type of the first argument. + call_rcu1 is the same as Linux's call_rcu. + + +RCU PATTERNS +============ + +Many patterns using read-writer locks translate directly to RCU, with +the advantages of higher scalability and deadlock immunity. + +In general, RCU can be used whenever it is possible to create a new +"version" of a data structure every time the updater runs. This may +sound like a very strict restriction, however: + +- the updater does not mean "everything that writes to a data structure", + but rather "everything that involves a reclamation step". See the + array example below + +- in some cases, creating a new version of a data structure may actually + be very cheap. For example, modifying the "next" pointer of a singly + linked list is effectively creating a new version of the list. + +Here are some frequently-used RCU idioms that are worth noting. + + +RCU list processing +------------------- + +TBD (not yet used in QEMU) + + +RCU reference counting +---------------------- + +Because grace periods are not allowed to complete while there is an RCU +read-side critical section in progress, the RCU read-side primitives +may be used as a restricted reference-counting mechanism. For example, +consider the following code fragment: + + rcu_read_lock(); + p = atomic_rcu_read(&foo); + /* do something with p. */ + rcu_read_unlock(); + +The RCU read-side critical section ensures that the value of "p" remains +valid until after the rcu_read_unlock(). In some sense, it is acquiring +a reference to p that is later released when the critical section ends. +The write side looks simply like this (with appropriate locking): + + qemu_mutex_lock(&foo_mutex); + old = foo; + atomic_rcu_set(&foo, new); + qemu_mutex_unlock(&foo_mutex); + synchronize_rcu(); + free(old); + +If the processing cannot be done purely within the critical section, it +is possible to combine this idiom with a "real" reference count: + + rcu_read_lock(); + p = atomic_rcu_read(&foo); + foo_ref(p); + rcu_read_unlock(); + /* do something with p. */ + foo_unref(p); + +The write side can be like this: + + qemu_mutex_lock(&foo_mutex); + old = foo; + atomic_rcu_set(&foo, new); + qemu_mutex_unlock(&foo_mutex); + synchronize_rcu(); + foo_unref(old); + +or with call_rcu: + + qemu_mutex_lock(&foo_mutex); + old = foo; + atomic_rcu_set(&foo, new); + qemu_mutex_unlock(&foo_mutex); + call_rcu(foo_unref, old, rcu); + +In both cases, the write side only performs removal. Reclamation +happens when the last reference to a "foo" object is dropped. +Using synchronize_rcu() is undesirably expensive, because the +last reference may be dropped on the read side. Hence you can +use call_rcu() instead: + + foo_unref(struct foo *p) { + if (atomic_fetch_dec(&p->refcount) == 1) { + call_rcu(foo_destroy, p, rcu); + } + } + + +Note that the same idioms would be possible with reader/writer +locks: + + read_lock(&foo_rwlock); write_mutex_lock(&foo_rwlock); + p = foo; p = foo; + /* do something with p. */ foo = new; + read_unlock(&foo_rwlock); free(p); + write_mutex_unlock(&foo_rwlock); + free(p); + + ------------------------------------------------------------------ + + read_lock(&foo_rwlock); write_mutex_lock(&foo_rwlock); + p = foo; old = foo; + foo_ref(p); foo = new; + read_unlock(&foo_rwlock); foo_unref(old); + /* do something with p. */ write_mutex_unlock(&foo_rwlock); + read_lock(&foo_rwlock); + foo_unref(p); + read_unlock(&foo_rwlock); + +foo_unref could use a mechanism such as bottom halves to move deallocation +out of the write-side critical section. + + +RCU resizable arrays +-------------------- + +Resizable arrays can be used with RCU. The expensive RCU synchronization +(or call_rcu) only needs to take place when the array is resized. +The two items to take care of are: + +- ensuring that the old version of the array is available between removal + and reclamation; + +- avoiding mismatches in the read side between the array data and the + array size. + +The first problem is avoided simply by not using realloc. Instead, +each resize will allocate a new array and copy the old data into it. +The second problem would arise if the size and the data pointers were +two members of a larger struct: + + struct mystuff { + ... + int data_size; + int data_alloc; + T *data; + ... + }; + +Instead, we store the size of the array with the array itself: + + struct arr { + int size; + int alloc; + T data[]; + }; + struct arr *global_array; + + read side: + rcu_read_lock(); + struct arr *array = atomic_rcu_read(&global_array); + x = i < array->size ? array->data[i] : -1; + rcu_read_unlock(); + return x; + + write side (running under a lock): + if (global_array->size == global_array->alloc) { + /* Creating a new version. */ + new_array = g_malloc(sizeof(struct arr) + + global_array->alloc * 2 * sizeof(T)); + new_array->size = global_array->size; + new_array->alloc = global_array->alloc * 2; + memcpy(new_array->data, global_array->data, + global_array->alloc * sizeof(T)); + + /* Removal phase. */ + old_array = global_array; + atomic_rcu_set(&new_array->data, new_array); + synchronize_rcu(); + + /* Reclamation phase. */ + free(old_array); + } + + +SOURCES +======= + +* Documentation/RCU/ from the Linux kernel diff --git a/fpu/softfloat-macros.h b/fpu/softfloat-macros.h index 0dcda93f72..5e030cd8e5 100644 --- a/fpu/softfloat-macros.h +++ b/fpu/softfloat-macros.h @@ -1,13 +1,24 @@ /* * QEMU float support macros * - * Derived from SoftFloat. + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. */ -/*============================================================================ - +/* +=============================================================================== This C source fragment is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b. +Arithmetic Package, Release 2a. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center @@ -16,24 +27,57 @@ National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ arithmetic/SoftFloat.html'. -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ -=============================================================================*/ +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ /*---------------------------------------------------------------------------- | This macro tests for minimum version of the GNU C compiler. @@ -107,10 +151,10 @@ static inline void shift64RightJamming(uint64_t a, int_fast16_t count, uint64_t | 63 bits of the extra result are all zero if and only if _all_but_the_last_ | bits shifted off were all zero. This extra result is stored in the location | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. -| (This routine makes more sense if `a0' and `a1' are considered to form -| a fixed-point value with binary point between `a0' and `a1'. This fixed- -| point value is shifted right by the number of bits given in `count', and -| the integer part of the result is returned at the location pointed to by +| (This routine makes more sense if `a0' and `a1' are considered to form a +| fixed-point value with binary point between `a0' and `a1'. This fixed-point +| value is shifted right by the number of bits given in `count', and the +| integer part of the result is returned at the location pointed to by | `z0Ptr'. The fractional part of the result may be slightly corrupted as | described above, and is returned at the location pointed to by `z1Ptr'.) *----------------------------------------------------------------------------*/ diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h index 518f694a68..23d73788ed 100644 --- a/fpu/softfloat-specialize.h +++ b/fpu/softfloat-specialize.h @@ -1,13 +1,24 @@ /* * QEMU float support * - * Derived from SoftFloat. + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. */ -/*============================================================================ - +/* +=============================================================================== This C source fragment is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b. +Arithmetic Package, Release 2a. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center @@ -16,29 +27,66 @@ National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ arithmetic/SoftFloat.html'. -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ -=============================================================================*/ +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ +/* Does the target distinguish signaling NaNs from non-signaling NaNs + * by setting the most significant bit of the mantissa for a signaling NaN? + * (The more common choice is to have it be zero for SNaN and one for QNaN.) + */ #if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32) -#define SNAN_BIT_IS_ONE 1 +#define SNAN_BIT_IS_ONE 1 #else -#define SNAN_BIT_IS_ONE 0 +#define SNAN_BIT_IS_ONE 0 #endif #if defined(TARGET_XTENSA) @@ -81,7 +129,7 @@ const float64 float64_default_nan = const_float64(LIT64( 0x7FFFFFFFFFFFFFFF )); #elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) const float64 float64_default_nan = const_float64(LIT64( 0x7FF8000000000000 )); #elif SNAN_BIT_IS_ONE -const float64 float64_default_nan = const_float64(LIT64( 0x7FF7FFFFFFFFFFFF )); +const float64 float64_default_nan = const_float64(LIT64(0x7FF7FFFFFFFFFFFF)); #else const float64 float64_default_nan = const_float64(LIT64( 0xFFF8000000000000 )); #endif @@ -91,7 +139,7 @@ const float64 float64_default_nan = const_float64(LIT64( 0xFFF8000000000000 )); *----------------------------------------------------------------------------*/ #if SNAN_BIT_IS_ONE #define floatx80_default_nan_high 0x7FFF -#define floatx80_default_nan_low LIT64( 0xBFFFFFFFFFFFFFFF ) +#define floatx80_default_nan_low LIT64(0xBFFFFFFFFFFFFFFF) #else #define floatx80_default_nan_high 0xFFFF #define floatx80_default_nan_low LIT64( 0xC000000000000000 ) @@ -105,8 +153,8 @@ const floatx80 floatx80_default_nan | `low' values hold the most- and least-significant bits, respectively. *----------------------------------------------------------------------------*/ #if SNAN_BIT_IS_ONE -#define float128_default_nan_high LIT64( 0x7FFF7FFFFFFFFFFF ) -#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) +#define float128_default_nan_high LIT64(0x7FFF7FFFFFFFFFFF) +#define float128_default_nan_low LIT64(0xFFFFFFFFFFFFFFFF) #else #define float128_default_nan_high LIT64( 0xFFFF800000000000 ) #define float128_default_nan_low LIT64( 0x0000000000000000 ) @@ -257,9 +305,9 @@ int float32_is_quiet_nan( float32 a_ ) { uint32_t a = float32_val(a_); #if SNAN_BIT_IS_ONE - return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); + return (((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff); #else - return ( 0xFF800000 <= (uint32_t) ( a<<1 ) ); + return ((uint32_t)(a << 1) >= 0xff800000); #endif } @@ -272,7 +320,7 @@ int float32_is_signaling_nan( float32 a_ ) { uint32_t a = float32_val(a_); #if SNAN_BIT_IS_ONE - return ( 0xFF800000 <= (uint32_t) ( a<<1 ) ); + return ((uint32_t)(a << 1) >= 0xff800000); #else return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); #endif @@ -665,11 +713,10 @@ int float64_is_quiet_nan( float64 a_ ) { uint64_t a = float64_val(a_); #if SNAN_BIT_IS_ONE - return - ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) - && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); + return (((a >> 51) & 0xfff) == 0xffe) + && (a & 0x0007ffffffffffffULL); #else - return ( LIT64( 0xFFF0000000000000 ) <= (uint64_t) ( a<<1 ) ); + return ((a << 1) >= 0xfff0000000000000ULL); #endif } @@ -682,7 +729,7 @@ int float64_is_signaling_nan( float64 a_ ) { uint64_t a = float64_val(a_); #if SNAN_BIT_IS_ONE - return ( LIT64( 0xFFF0000000000000 ) <= (uint64_t) ( a<<1 ) ); + return ((a << 1) >= 0xfff0000000000000ULL); #else return ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) @@ -866,11 +913,10 @@ int floatx80_is_quiet_nan( floatx80 a ) #if SNAN_BIT_IS_ONE uint64_t aLow; - aLow = a.low & ~ LIT64( 0x4000000000000000 ); - return - ( ( a.high & 0x7FFF ) == 0x7FFF ) - && (uint64_t) ( aLow<<1 ) - && ( a.low == aLow ); + aLow = a.low & ~0x4000000000000000ULL; + return ((a.high & 0x7fff) == 0x7fff) + && (aLow << 1) + && (a.low == aLow); #else return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (LIT64( 0x8000000000000000 ) <= ((uint64_t) ( a.low<<1 ))); @@ -886,8 +932,8 @@ int floatx80_is_quiet_nan( floatx80 a ) int floatx80_is_signaling_nan( floatx80 a ) { #if SNAN_BIT_IS_ONE - return ( ( a.high & 0x7FFF ) == 0x7FFF ) - && (LIT64( 0x8000000000000000 ) <= ((uint64_t) ( a.low<<1 ))); + return ((a.high & 0x7fff) == 0x7fff) + && ((a.low << 1) >= 0x8000000000000000ULL); #else uint64_t aLow; @@ -1031,13 +1077,12 @@ int float128_is_signaling_nan(float128 a_) int float128_is_quiet_nan( float128 a ) { #if SNAN_BIT_IS_ONE - return - ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) - && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); + return (((a.high >> 47) & 0xffff) == 0xfffe) + && (a.low || (a.high & 0x00007fffffffffffULL)); #else return - ( LIT64( 0xFFFE000000000000 ) <= (uint64_t) ( a.high<<1 ) ) - && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); + ((a.high << 1) >= 0xffff000000000000ULL) + && (a.low || (a.high & 0x0000ffffffffffffULL)); #endif } @@ -1050,8 +1095,8 @@ int float128_is_signaling_nan( float128 a ) { #if SNAN_BIT_IS_ONE return - ( LIT64( 0xFFFE000000000000 ) <= (uint64_t) ( a.high<<1 ) ) - && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); + ((a.high << 1) >= 0xffff000000000000ULL) + && (a.low || (a.high & 0x0000ffffffffffffULL)); #else return ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 16b21ebe61..a1f1cb33b4 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1,13 +1,24 @@ /* * QEMU float support * - * Derived from SoftFloat. + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. */ -/*============================================================================ - -This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic -Package, Release 2b. +/* +=============================================================================== +This C source file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center @@ -16,24 +27,57 @@ National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ arithmetic/SoftFloat.html'. -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ -=============================================================================*/ +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ /* softfloat (and in particular the code in softfloat-specialize.h) is * target-dependent and needs the TARGET_* macros. @@ -529,9 +573,9 @@ static inline float64 packFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig) | the inexact exception raised if the abstract input cannot be represented | exactly. However, if the abstract value is too large, the overflow and | inexact exceptions are raised and an infinity or maximal finite value is -| returned. If the abstract value is too small, the input value is rounded -| to a subnormal number, and the underflow and inexact exceptions are raised -| if the abstract input cannot be represented exactly as a subnormal double- +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal double- | precision floating-point number. | The input significand `zSig' has its binary point between bits 62 | and 61, which is 10 bits to the left of the usual location. This shifted @@ -1304,27 +1348,6 @@ float32 int64_to_float32(int64_t a STATUS_PARAM) } -float32 uint64_to_float32(uint64_t a STATUS_PARAM) -{ - int8 shiftCount; - - if ( a == 0 ) return float32_zero; - shiftCount = countLeadingZeros64( a ) - 40; - if ( 0 <= shiftCount ) { - return packFloat32(0, 0x95 - shiftCount, a<<shiftCount); - } - else { - shiftCount += 7; - if ( shiftCount < 0 ) { - shift64RightJamming( a, - shiftCount, &a ); - } - else { - a <<= shiftCount; - } - return roundAndPackFloat32(0, 0x9C - shiftCount, a STATUS_VAR); - } -} - /*---------------------------------------------------------------------------- | Returns the result of converting the 64-bit two's complement integer `a' | to the double-precision floating-point format. The conversion is performed @@ -1344,20 +1367,6 @@ float64 int64_to_float64(int64_t a STATUS_PARAM) } -float64 uint64_to_float64(uint64_t a STATUS_PARAM) -{ - int exp = 0x43C; - - if (a == 0) { - return float64_zero; - } - if ((int64_t)a < 0) { - shift64RightJamming(a, 1, &a); - exp += 1; - } - return normalizeRoundAndPackFloat64(0, exp, a STATUS_VAR); -} - /*---------------------------------------------------------------------------- | Returns the result of converting the 64-bit two's complement integer `a' | to the extended double-precision floating-point format. The conversion @@ -1412,6 +1421,71 @@ float128 int64_to_float128(int64_t a STATUS_PARAM) } +/*---------------------------------------------------------------------------- +| Returns the result of converting the 64-bit unsigned integer `a' +| to the single-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 uint64_to_float32(uint64_t a STATUS_PARAM) +{ + int shiftcount; + + if (a == 0) { + return float32_zero; + } + + /* Determine (left) shift needed to put first set bit into bit posn 23 + * (since packFloat32() expects the binary point between bits 23 and 22); + * this is the fast case for smallish numbers. + */ + shiftcount = countLeadingZeros64(a) - 40; + if (shiftcount >= 0) { + return packFloat32(0, 0x95 - shiftcount, a << shiftcount); + } + /* Otherwise we need to do a round-and-pack. roundAndPackFloat32() + * expects the binary point between bits 30 and 29, hence the + 7. + */ + shiftcount += 7; + if (shiftcount < 0) { + shift64RightJamming(a, -shiftcount, &a); + } else { + a <<= shiftcount; + } + + return roundAndPackFloat32(0, 0x9c - shiftcount, a STATUS_VAR); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 64-bit unsigned integer `a' +| to the double-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 uint64_to_float64(uint64_t a STATUS_PARAM) +{ + int exp = 0x43C; + int shiftcount; + + if (a == 0) { + return float64_zero; + } + + shiftcount = countLeadingZeros64(a) - 1; + if (shiftcount < 0) { + shift64RightJamming(a, -shiftcount, &a); + } else { + a <<= shiftcount; + } + return roundAndPackFloat64(0, exp - shiftcount, a STATUS_VAR); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 64-bit unsigned integer `a' +| to the quadruple-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + float128 uint64_to_float128(uint64_t a STATUS_PARAM) { if (a == 0) { diff --git a/hw/9pfs/virtio-9p-synth.c b/hw/9pfs/virtio-9p-synth.c index 71262bccd2..e75aa8772e 100644 --- a/hw/9pfs/virtio-9p-synth.c +++ b/hw/9pfs/virtio-9p-synth.c @@ -17,6 +17,7 @@ #include "virtio-9p-xattr.h" #include "fsdev/qemu-fsdev.h" #include "virtio-9p-synth.h" +#include "qemu/rcu.h" #include <sys/stat.h> diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 3b77c9a227..4ba8409668 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -62,6 +62,7 @@ typedef struct S390IPLState { static int s390_ipl_init(SysBusDevice *dev) { S390IPLState *ipl = S390_IPL(dev); + uint64_t pentry = KERN_IMAGE_START; int kernel_size; if (!ipl->kernel) { @@ -94,31 +95,31 @@ static int s390_ipl_init(SysBusDevice *dev) hw_error("could not load bootloader '%s'\n", bios_name); } return 0; + } + + kernel_size = load_elf(ipl->kernel, NULL, NULL, &pentry, NULL, + NULL, 1, ELF_MACHINE, 0); + if (kernel_size < 0) { + kernel_size = load_image_targphys(ipl->kernel, 0, ram_size); + } + if (kernel_size < 0) { + fprintf(stderr, "could not load kernel '%s'\n", ipl->kernel); + return -1; + } + /* + * Is it a Linux kernel (starting at 0x10000)? If yes, we fill in the + * kernel parameters here as well. Note: For old kernels (up to 3.2) + * we can not rely on the ELF entry point - it was 0x800 (the SALIPL + * loader) and it won't work. For this case we force it to 0x10000, too. + */ + if (pentry == KERN_IMAGE_START || pentry == 0x800) { + ipl->start_addr = KERN_IMAGE_START; + /* Overwrite parameters in the kernel image, which are "rom" */ + strcpy(rom_ptr(KERN_PARM_AREA), ipl->cmdline); } else { - uint64_t pentry = KERN_IMAGE_START; - kernel_size = load_elf(ipl->kernel, NULL, NULL, &pentry, NULL, - NULL, 1, ELF_MACHINE, 0); - if (kernel_size < 0) { - kernel_size = load_image_targphys(ipl->kernel, 0, ram_size); - } - if (kernel_size < 0) { - fprintf(stderr, "could not load kernel '%s'\n", ipl->kernel); - return -1; - } - /* - * Is it a Linux kernel (starting at 0x10000)? If yes, we fill in the - * kernel parameters here as well. Note: For old kernels (up to 3.2) - * we can not rely on the ELF entry point - it was 0x800 (the SALIPL - * loader) and it won't work. For this case we force it to 0x10000, too. - */ - if (pentry == KERN_IMAGE_START || pentry == 0x800) { - ipl->start_addr = KERN_IMAGE_START; - /* Overwrite parameters in the kernel image, which are "rom" */ - strcpy(rom_ptr(KERN_PARM_AREA), ipl->cmdline); - } else { - ipl->start_addr = pentry; - } + ipl->start_addr = pentry; } + if (ipl->initrd) { ram_addr_t initrd_offset; int initrd_size; diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 1201b8d57c..dc455a2bb7 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -170,7 +170,7 @@ S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh) S390pciState *s = S390_PCI_HOST_BRIDGE( object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL)); - if (!s) { + if (!s || !fh) { return NULL; } @@ -187,7 +187,7 @@ S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh) static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh, uint32_t fid, uint64_t faddr, uint32_t e) { - SeiContainer *sei_cont = g_malloc0(sizeof(SeiContainer)); + SeiContainer *sei_cont; S390pciState *s = S390_PCI_HOST_BRIDGE( object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL)); @@ -195,6 +195,7 @@ static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh, return; } + sei_cont = g_malloc0(sizeof(SeiContainer)); sei_cont->fh = fh; sei_cont->fid = fid; sei_cont->cc = cc; diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index 5ea13e5d79..9e5bc5b899 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -487,7 +487,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) CPUS390XState *env = &cpu->env; uint32_t fh; S390PCIBusDevice *pbdev; - ram_addr_t size; + hwaddr start, end; IOMMUTLBEntry entry; MemoryRegion *mr; @@ -504,7 +504,8 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) } fh = env->regs[r1] >> 32; - size = env->regs[r2 + 1]; + start = env->regs[r2]; + end = start + env->regs[r2 + 1]; pbdev = s390_pci_find_dev_by_fh(fh); @@ -515,15 +516,18 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) } mr = pci_device_iommu_address_space(pbdev->pdev)->root; - entry = mr->iommu_ops->translate(mr, env->regs[r2], 0); + while (start < end) { + entry = mr->iommu_ops->translate(mr, start, 0); - if (!entry.translated_addr) { - setcc(cpu, ZPCI_PCI_LS_ERR); - goto out; + if (!entry.translated_addr) { + setcc(cpu, ZPCI_PCI_LS_ERR); + goto out; + } + + memory_region_notify_iommu(mr, entry); + start += entry.addr_mask + 1; } - entry.addr_mask = size - 1; - memory_region_notify_iommu(mr, entry); setcc(cpu, ZPCI_PCI_LS_OK); out: return 0; @@ -784,10 +788,10 @@ int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba) stq_p(&fib.aisb, pbdev->routes.adapter.summary_addr); stq_p(&fib.fmb_addr, pbdev->fmb_addr); - data = (pbdev->isc << 28) | (pbdev->noi << 16) | - (pbdev->routes.adapter.ind_offset << 8) | (pbdev->sum << 7) | - pbdev->routes.adapter.summary_offset; - stw_p(&fib.data, data); + data = ((uint32_t)pbdev->isc << 28) | ((uint32_t)pbdev->noi << 16) | + ((uint32_t)pbdev->routes.adapter.ind_offset << 8) | + ((uint32_t)pbdev->sum << 7) | pbdev->routes.adapter.summary_offset; + stl_p(&fib.data, data); if (pbdev->fh >> ENABLE_BIT_OFFSET) { fib.fc |= 0x80; diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 9b740a3cfa..db39ae0e23 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -1756,6 +1756,8 @@ void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier) req->io_canceled = true; if (req->aiocb) { blk_aio_cancel_async(req->aiocb); + } else { + scsi_req_cancel_complete(req); } } diff --git a/hw/vfio/common.c b/hw/vfio/common.c index cf483fffa9..e71385e4fe 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -32,7 +32,7 @@ #include "trace.h" struct vfio_group_head vfio_group_list = - QLIST_HEAD_INITIALIZER(vfio_address_spaces); + QLIST_HEAD_INITIALIZER(vfio_group_list); struct vfio_as_head vfio_address_spaces = QLIST_HEAD_INITIALIZER(vfio_address_spaces); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 014a92ce5f..29caabc149 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3065,6 +3065,7 @@ static void vfio_put_device(VFIOPCIDevice *vdev) { g_free(vdev->vbasedev.name); if (vdev->msix) { + object_unparent(OBJECT(&vdev->msix->mmap_mem)); g_free(vdev->msix); vdev->msix = NULL; } diff --git a/include/exec/memory.h b/include/exec/memory.h index 0cd96b152e..06ffa1d185 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -33,6 +33,7 @@ #include "qemu/notify.h" #include "qapi/error.h" #include "qom/object.h" +#include "qemu/rcu.h" #define MAX_PHYS_ADDR_SPACE_BITS 62 #define MAX_PHYS_ADDR (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1) @@ -207,9 +208,13 @@ struct MemoryListener { */ struct AddressSpace { /* All fields are private. */ + struct rcu_head rcu; char *name; MemoryRegion *root; + + /* Accessed via RCU. */ struct FlatView *current_map; + int ioeventfd_nb; struct MemoryRegionIoeventfd *ioeventfds; struct AddressSpaceDispatch *dispatch; diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index e32e25d547..35019c9bf3 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -1,13 +1,24 @@ /* * QEMU float support * - * Derived from SoftFloat. + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. */ -/*============================================================================ - -This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic -Package, Release 2b. +/* +=============================================================================== +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center @@ -16,24 +27,57 @@ National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ arithmetic/SoftFloat.html'. -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ -=============================================================================*/ +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ #ifndef SOFTFLOAT_H #define SOFTFLOAT_H @@ -275,11 +319,11 @@ float64 uint32_to_float64(uint32_t STATUS_PARAM); floatx80 int32_to_floatx80(int32_t STATUS_PARAM); float128 int32_to_float128(int32_t STATUS_PARAM); float32 int64_to_float32(int64_t STATUS_PARAM); -float32 uint64_to_float32(uint64_t STATUS_PARAM); float64 int64_to_float64(int64_t STATUS_PARAM); -float64 uint64_to_float64(uint64_t STATUS_PARAM); floatx80 int64_to_floatx80(int64_t STATUS_PARAM); float128 int64_to_float128(int64_t STATUS_PARAM); +float32 uint64_to_float32(uint64_t STATUS_PARAM); +float64 uint64_to_float64(uint64_t STATUS_PARAM); float128 uint64_to_float128(uint64_t STATUS_PARAM); /* We provide the int16 versions for symmetry of API with float-to-int */ diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 93c2ae2f37..98e05ca875 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -129,6 +129,67 @@ #define atomic_set(ptr, i) ((*(__typeof__(*ptr) volatile*) (ptr)) = (i)) #endif +/** + * atomic_rcu_read - reads a RCU-protected pointer to a local variable + * into a RCU read-side critical section. The pointer can later be safely + * dereferenced within the critical section. + * + * This ensures that the pointer copy is invariant thorough the whole critical + * section. + * + * Inserts memory barriers on architectures that require them (currently only + * Alpha) and documents which pointers are protected by RCU. + * + * Unless the __ATOMIC_CONSUME memory order is available, atomic_rcu_read also + * includes a compiler barrier to ensure that value-speculative optimizations + * (e.g. VSS: Value Speculation Scheduling) does not perform the data read + * before the pointer read by speculating the value of the pointer. On new + * enough compilers, atomic_load takes care of such concern about + * dependency-breaking optimizations. + * + * Should match atomic_rcu_set(), atomic_xchg(), atomic_cmpxchg(). + */ +#ifndef atomic_rcu_read +#ifdef __ATOMIC_CONSUME +#define atomic_rcu_read(ptr) ({ \ + typeof(*ptr) _val; \ + __atomic_load(ptr, &_val, __ATOMIC_CONSUME); \ + _val; \ +}) +#else +#define atomic_rcu_read(ptr) ({ \ + typeof(*ptr) _val = atomic_read(ptr); \ + smp_read_barrier_depends(); \ + _val; \ +}) +#endif +#endif + +/** + * atomic_rcu_set - assigns (publicizes) a pointer to a new data structure + * meant to be read by RCU read-side critical sections. + * + * Documents which pointers will be dereferenced by RCU read-side critical + * sections and adds the required memory barriers on architectures requiring + * them. It also makes sure the compiler does not reorder code initializing the + * data structure before its publication. + * + * Should match atomic_rcu_read(). + */ +#ifndef atomic_rcu_set +#ifdef __ATOMIC_RELEASE +#define atomic_rcu_set(ptr, i) do { \ + typeof(*ptr) _val = (i); \ + __atomic_store(ptr, &_val, __ATOMIC_RELEASE); \ +} while(0) +#else +#define atomic_rcu_set(ptr, i) do { \ + smp_wmb(); \ + atomic_set(ptr, i); \ +} while (0) +#endif +#endif + /* These have the same semantics as Java volatile variables. * See http://gee.cs.oswego.edu/dl/jmm/cookbook.html: * "1. Issue a StoreStore barrier (wmb) before each volatile store." diff --git a/include/qemu/queue.h b/include/qemu/queue.h index a98eb3ad79..c602797652 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -104,6 +104,19 @@ struct { \ (head)->lh_first = NULL; \ } while (/*CONSTCOND*/0) +#define QLIST_SWAP(dstlist, srclist, field) do { \ + void *tmplist; \ + tmplist = (srclist)->lh_first; \ + (srclist)->lh_first = (dstlist)->lh_first; \ + if ((srclist)->lh_first != NULL) { \ + (srclist)->lh_first->field.le_prev = &(srclist)->lh_first; \ + } \ + (dstlist)->lh_first = tmplist; \ + if ((dstlist)->lh_first != NULL) { \ + (dstlist)->lh_first->field.le_prev = &(dstlist)->lh_first; \ + } \ +} while (/*CONSTCOND*/0) + #define QLIST_INSERT_AFTER(listelm, elm, field) do { \ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ (listelm)->field.le_next->field.le_prev = \ diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h new file mode 100644 index 0000000000..068a279a79 --- /dev/null +++ b/include/qemu/rcu.h @@ -0,0 +1,147 @@ +#ifndef QEMU_RCU_H +#define QEMU_RCU_H + +/* + * urcu-mb.h + * + * Userspace RCU header with explicit memory barrier. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include <stdlib.h> +#include <assert.h> +#include <limits.h> +#include <unistd.h> +#include <stdint.h> +#include <stdbool.h> +#include <glib.h> + +#include "qemu/compiler.h" +#include "qemu/thread.h" +#include "qemu/queue.h" +#include "qemu/atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Important ! + * + * Each thread containing read-side critical sections must be registered + * with rcu_register_thread() before calling rcu_read_lock(). + * rcu_unregister_thread() should be called before the thread exits. + */ + +#ifdef DEBUG_RCU +#define rcu_assert(args...) assert(args) +#else +#define rcu_assert(args...) +#endif + +/* + * Global quiescent period counter with low-order bits unused. + * Using a int rather than a char to eliminate false register dependencies + * causing stalls on some architectures. + */ +extern unsigned long rcu_gp_ctr; + +extern QemuEvent rcu_gp_event; + +struct rcu_reader_data { + /* Data used by both reader and synchronize_rcu() */ + unsigned long ctr; + bool waiting; + + /* Data used by reader only */ + unsigned depth; + + /* Data used for registry, protected by rcu_gp_lock */ + QLIST_ENTRY(rcu_reader_data) node; +}; + +extern __thread struct rcu_reader_data rcu_reader; + +static inline void rcu_read_lock(void) +{ + struct rcu_reader_data *p_rcu_reader = &rcu_reader; + unsigned ctr; + + if (p_rcu_reader->depth++ > 0) { + return; + } + + ctr = atomic_read(&rcu_gp_ctr); + atomic_xchg(&p_rcu_reader->ctr, ctr); + if (atomic_read(&p_rcu_reader->waiting)) { + atomic_set(&p_rcu_reader->waiting, false); + qemu_event_set(&rcu_gp_event); + } +} + +static inline void rcu_read_unlock(void) +{ + struct rcu_reader_data *p_rcu_reader = &rcu_reader; + + assert(p_rcu_reader->depth != 0); + if (--p_rcu_reader->depth > 0) { + return; + } + + atomic_xchg(&p_rcu_reader->ctr, 0); + if (atomic_read(&p_rcu_reader->waiting)) { + atomic_set(&p_rcu_reader->waiting, false); + qemu_event_set(&rcu_gp_event); + } +} + +extern void synchronize_rcu(void); + +/* + * Reader thread registration. + */ +extern void rcu_register_thread(void); +extern void rcu_unregister_thread(void); + +struct rcu_head; +typedef void RCUCBFunc(struct rcu_head *head); + +struct rcu_head { + struct rcu_head *next; + RCUCBFunc *func; +}; + +extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); + +/* The operands of the minus operator must have the same type, + * which must be the one that we specify in the cast. + */ +#define call_rcu(head, func, field) \ + call_rcu1(({ \ + char __attribute__((unused)) \ + offset_must_be_zero[-offsetof(typeof(*(head)), field)], \ + func_type_invalid = (func) - (void (*)(typeof(head)))(func); \ + &(head)->field; \ + }), \ + (RCUCBFunc *)(func)) + +#ifdef __cplusplus +} +#endif + +#endif /* QEMU_RCU_H */ diff --git a/include/qemu/thread.h b/include/qemu/thread.h index e89fdc9785..5114ec8e79 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -25,9 +25,6 @@ void qemu_mutex_lock(QemuMutex *mutex); int qemu_mutex_trylock(QemuMutex *mutex); void qemu_mutex_unlock(QemuMutex *mutex); -#define rcu_read_lock() do { } while (0) -#define rcu_read_unlock() do { } while (0) - void qemu_cond_init(QemuCond *cond); void qemu_cond_destroy(QemuCond *cond); diff --git a/include/qemu/timer.h b/include/qemu/timer.h index ca5befba0e..eba8b2109c 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -838,7 +838,6 @@ static inline int64_t get_clock(void) int64_t cpu_get_icount_raw(void); int64_t cpu_get_icount(void); int64_t cpu_get_clock(void); -int64_t cpu_get_clock_offset(void); int64_t cpu_icount_to_ns(int64_t icount); /*******************************************/ @@ -33,26 +33,12 @@ static bool memory_region_update_pending; static bool ioeventfd_update_pending; static bool global_dirty_log = false; -/* flat_view_mutex is taken around reading as->current_map; the critical - * section is extremely short, so I'm using a single mutex for every AS. - * We could also RCU for the read-side. - * - * The BQL is taken around transaction commits, hence both locks are taken - * while writing to as->current_map (with the BQL taken outside). - */ -static QemuMutex flat_view_mutex; - static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners = QTAILQ_HEAD_INITIALIZER(memory_listeners); static QTAILQ_HEAD(, AddressSpace) address_spaces = QTAILQ_HEAD_INITIALIZER(address_spaces); -static void memory_init(void) -{ - qemu_mutex_init(&flat_view_mutex); -} - typedef struct AddrRange AddrRange; /* @@ -242,6 +228,7 @@ struct FlatRange { * order. */ struct FlatView { + struct rcu_head rcu; unsigned ref; FlatRange *ranges; unsigned nr; @@ -654,10 +641,10 @@ static FlatView *address_space_get_flatview(AddressSpace *as) { FlatView *view; - qemu_mutex_lock(&flat_view_mutex); - view = as->current_map; + rcu_read_lock(); + view = atomic_rcu_read(&as->current_map); flatview_ref(view); - qemu_mutex_unlock(&flat_view_mutex); + rcu_read_unlock(); return view; } @@ -766,10 +753,9 @@ static void address_space_update_topology(AddressSpace *as) address_space_update_topology_pass(as, old_view, new_view, false); address_space_update_topology_pass(as, old_view, new_view, true); - qemu_mutex_lock(&flat_view_mutex); - flatview_unref(as->current_map); - as->current_map = new_view; - qemu_mutex_unlock(&flat_view_mutex); + /* Writes are protected by the BQL. */ + atomic_rcu_set(&as->current_map, new_view); + call_rcu(old_view, flatview_unref, rcu); /* Note that all the old MemoryRegions are still alive up to this * point. This relieves most MemoryListeners from the need to @@ -1263,7 +1249,6 @@ static void memory_region_finalize(Object *obj) MemoryRegion *mr = MEMORY_REGION(obj); assert(QTAILQ_EMPTY(&mr->subregions)); - assert(memory_region_transaction_depth == 0); mr->destructor(mr); memory_region_clear_coalescing(mr); g_free((char *)mr->name); @@ -1843,11 +1828,11 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr, } range = addrrange_make(int128_make64(addr), int128_make64(size)); - view = address_space_get_flatview(as); + rcu_read_lock(); + view = atomic_rcu_read(&as->current_map); fr = flatview_lookup(view, range); if (!fr) { - flatview_unref(view); - return ret; + goto out; } while (fr > view->ranges && addrrange_intersects(fr[-1].addr, range)) { @@ -1864,8 +1849,8 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr, ret.offset_within_address_space = int128_get64(range.start); ret.readonly = fr->readonly; memory_region_ref(ret.mr); - - flatview_unref(view); +out: + rcu_read_unlock(); return ret; } @@ -1958,10 +1943,6 @@ void memory_listener_unregister(MemoryListener *listener) void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) { - if (QTAILQ_EMPTY(&address_spaces)) { - memory_init(); - } - memory_region_transaction_begin(); as->root = root; as->current_map = g_new(FlatView, 1); @@ -1975,15 +1956,10 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) memory_region_transaction_commit(); } -void address_space_destroy(AddressSpace *as) +static void do_address_space_destroy(AddressSpace *as) { MemoryListener *listener; - /* Flush out anything from MemoryListeners listening in on this */ - memory_region_transaction_begin(); - as->root = NULL; - memory_region_transaction_commit(); - QTAILQ_REMOVE(&address_spaces, as, address_spaces_link); address_space_destroy_dispatch(as); QTAILQ_FOREACH(listener, &memory_listeners, link) { @@ -1995,6 +1971,21 @@ void address_space_destroy(AddressSpace *as) g_free(as->ioeventfds); } +void address_space_destroy(AddressSpace *as) +{ + /* Flush out anything from MemoryListeners listening in on this */ + memory_region_transaction_begin(); + as->root = NULL; + memory_region_transaction_commit(); + QTAILQ_REMOVE(&address_spaces, as, address_spaces_link); + + /* At this point, as->dispatch and as->current_map are dummy + * entries that the guest should never use. Wait for the old + * values to expire before freeing the data. + */ + call_rcu(as, do_address_space_destroy, rcu); +} + bool io_mem_read(MemoryRegion *mr, hwaddr addr, uint64_t *pval, unsigned size) { return memory_region_dispatch_read(mr, addr, pval, size); diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img Binary files differindex 44873ad181..dbe5a38262 100644 --- a/pc-bios/s390-ccw.img +++ b/pc-bios/s390-ccw.img diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index 115d8bbac6..b678d5ebb8 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -33,7 +33,7 @@ typedef struct ResetInfo { uint32_t ipl_continue; } ResetInfo; -ResetInfo save; +static ResetInfo save; static void jump_to_IPL_2(void) { @@ -80,7 +80,7 @@ static void jump_to_IPL_code(uint64_t address) */ static unsigned char _bprs[8*1024]; /* guessed "max" ECKD sector size */ -const int max_bprs_entries = sizeof(_bprs) / sizeof(ExtEckdBlockPtr); +static const int max_bprs_entries = sizeof(_bprs) / sizeof(ExtEckdBlockPtr); static inline void verify_boot_info(BootInfo *bip) { diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h index 6a4823d544..ab132e3579 100644 --- a/pc-bios/s390-ccw/bootmap.h +++ b/pc-bios/s390-ccw/bootmap.h @@ -15,7 +15,7 @@ #include "virtio.h" typedef uint64_t block_number_t; -#define NULL_BLOCK_NR 0xffffffffffffffff +#define NULL_BLOCK_NR 0xffffffffffffffffULL #define FREE_SPACE_FILLER '\xAA' diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c index f9ec2157ad..6f707bbcd4 100644 --- a/pc-bios/s390-ccw/main.c +++ b/pc-bios/s390-ccw/main.c @@ -13,7 +13,7 @@ char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); uint64_t boot_value; -struct subchannel_id blk_schid = { .one = 1 }; +static struct subchannel_id blk_schid = { .one = 1 }; /* * Priniciples of Operations (SA22-7832-09) chapter 17 requires that diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h index 2b773deafa..ceb7418a50 100644 --- a/pc-bios/s390-ccw/s390-ccw.h +++ b/pc-bios/s390-ccw/s390-ccw.h @@ -51,6 +51,8 @@ void disabled_wait(void); /* main.c */ void virtio_panic(const char *string); void write_subsystem_identification(void); +extern char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); +extern uint64_t boot_value; /* sclp-ascii.c */ void sclp_print(const char *string); diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c index c0540d1cd4..4dc91a7c43 100644 --- a/pc-bios/s390-ccw/virtio.c +++ b/pc-bios/s390-ccw/virtio.c @@ -11,7 +11,7 @@ #include "s390-ccw.h" #include "virtio.h" -struct vring block; +static struct vring block; static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); diff --git a/target-s390x/cc_helper.c b/target-s390x/cc_helper.c index 373eb176a1..00bc883a8a 100644 --- a/target-s390x/cc_helper.c +++ b/target-s390x/cc_helper.c @@ -179,16 +179,11 @@ static uint32_t cc_calc_subu_64(uint64_t a1, uint64_t a2, uint64_t ar) static uint32_t cc_calc_subb_64(uint64_t a1, uint64_t a2, uint64_t ar) { - /* We had borrow-in if normal subtraction isn't equal. */ - int borrow_in = ar - (a1 - a2); int borrow_out; - /* If a2 was ULONG_MAX, and borrow_in, then a2 is logically 65 bits, - and we must have had borrow out. */ - if (borrow_in && a2 == (uint64_t)-1) { - borrow_out = 1; + if (ar != a1 - a2) { /* difference means borrow-in */ + borrow_out = (a2 >= a1); } else { - a2 += borrow_in; borrow_out = (a2 > a1); } @@ -285,16 +280,11 @@ static uint32_t cc_calc_subu_32(uint32_t a1, uint32_t a2, uint32_t ar) static uint32_t cc_calc_subb_32(uint32_t a1, uint32_t a2, uint32_t ar) { - /* We had borrow-in if normal subtraction isn't equal. */ - int borrow_in = ar - (a1 - a2); int borrow_out; - /* If a2 was UINT_MAX, and borrow_in, then a2 is logically 65 bits, - and we must have had borrow out. */ - if (borrow_in && a2 == (uint32_t)-1) { - borrow_out = 1; + if (ar != a1 - a2) { /* difference means borrow-in */ + borrow_out = (a2 >= a1); } else { - a2 += borrow_in; borrow_out = (a2 > a1); } diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h index c123b6f023..2e2554c4b3 100644 --- a/target-s390x/cpu.h +++ b/target-s390x/cpu.h @@ -133,7 +133,9 @@ typedef struct CPUS390XState { /* reset does memset(0) up to here */ - int cpu_num; + uint32_t cpu_num; + uint32_t machine_type; + uint8_t *storage_keys; uint64_t tod_offset; diff --git a/target-s390x/helper.h b/target-s390x/helper.h index faebfd96aa..8d2c8596bb 100644 --- a/target-s390x/helper.h +++ b/target-s390x/helper.h @@ -111,5 +111,8 @@ DEF_HELPER_FLAGS_2(sacf, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_3(ipte, TCG_CALL_NO_RWG, void, env, i64, i64) DEF_HELPER_FLAGS_1(ptlb, TCG_CALL_NO_RWG, void, env) DEF_HELPER_2(lra, i64, env, i64) +DEF_HELPER_FLAGS_2(lura, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_2(lurag, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_3(stura, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(sturg, TCG_CALL_NO_WG, void, env, i64, i64) #endif diff --git a/target-s390x/insn-data.def b/target-s390x/insn-data.def index 4d2feb6977..8d8e47e0bf 100644 --- a/target-s390x/insn-data.def +++ b/target-s390x/insn-data.def @@ -285,8 +285,12 @@ /* EXTRACT ACCESS */ C(0xb24f, EAR, RRE, Z, 0, 0, new, r1_32, ear, 0) +/* EXTRACT CPU ATTRIBUTE */ + C(0xeb4c, ECAG, RSY_a, GIE, 0, a2, r1, 0, ecag, 0) /* EXTRACT FPC */ C(0xb38c, EFPC, RRE, Z, 0, 0, new, r1_32, efpc, 0) +/* EXTRACT PSW */ + C(0xb98d, EPSW, RRE, Z, 0, 0, 0, 0, epsw, 0) /* FIND LEFTMOST ONE */ C(0xb983, FLOGR, RRE, EI, 0, r2_o, r1_P, 0, flogr, 0) @@ -566,6 +570,10 @@ /* SET ACCESS */ C(0xb24e, SAR, RRE, Z, 0, r2_o, 0, 0, sar, 0) +/* SET ADDRESSING MODE */ + D(0x010c, SAM24, E, Z, 0, 0, 0, 0, sam, 0, 0) + D(0x010d, SAM31, E, Z, 0, 0, 0, 0, sam, 0, 1) + D(0x010e, SAM64, E, Z, 0, 0, 0, 0, sam, 0, 3) /* SET FPC */ C(0xb384, SFPC, RRE, Z, 0, r1_o, 0, 0, sfpc, 0) /* SET FPC AND SIGNAL */ @@ -733,6 +741,9 @@ C(0xb100, LRA, RX_a, Z, 0, a2, r1, 0, lra, 0) C(0xe313, LRAY, RXY_a, LD, 0, a2, r1, 0, lra, 0) C(0xe303, LRAG, RXY_a, Z, 0, a2, r1, 0, lra, 0) +/* LOAD USING REAL ADDRESS */ + C(0xb24b, LURA, RRE, Z, 0, r2, new, r1_32, lura, 0) + C(0xb905, LURAG, RRE, Z, 0, r2, r1, 0, lurag, 0) /* MOVE TO PRIMARY */ C(0xda00, MVCP, SS_d, Z, la1, a2, 0, 0, mvcp, 0) /* MOVE TO SECONDARY */ @@ -743,10 +754,6 @@ C(0xb22a, RRBE, RRE, Z, 0, r2_o, 0, 0, rrbe, 0) /* SERVICE CALL LOGICAL PROCESSOR (PV hypercall) */ C(0xb220, SERVC, RRE, Z, r1_o, r2_o, 0, 0, servc, 0) -/* SET ADDRESSING MODE */ - D(0x010c, SAM24, E, Z, 0, 0, 0, 0, sam, 0, 0) - D(0x010d, SAM31, E, Z, 0, 0, 0, 0, sam, 0, 1) - D(0x010e, SAM64, E, Z, 0, 0, 0, 0, sam, 0, 3) /* SET ADDRESS SPACE CONTROL FAST */ C(0xb279, SACF, S, Z, 0, a2, 0, 0, sacf, 0) /* SET CLOCK */ @@ -794,6 +801,7 @@ C(0xad00, STOSM, SI, Z, la1, 0, 0, 0, stnosm, 0) /* STORE USING REAL ADDRESS */ C(0xb246, STURA, RRE, Z, r1_o, r2_o, 0, 0, stura, 0) + C(0xb925, STURG, RRE, Z, r1_o, r2_o, 0, 0, sturg, 0) /* TEST PROTECTION */ C(0xe501, TPROT, SSE, Z, la1, a2, 0, 0, tprot, 0) diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index dcd75055c1..6f2d5b4924 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -1046,7 +1046,7 @@ static void kvm_handle_diag_308(S390CPU *cpu, struct kvm_run *run) uint64_t r1, r3; cpu_synchronize_state(CPU(cpu)); - r1 = (run->s390_sieic.ipa & 0x00f0) >> 8; + r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; r3 = run->s390_sieic.ipa & 0x000f; handle_diag_308(&cpu->env, r1, r3); } @@ -1091,7 +1091,7 @@ static int handle_diag(S390CPU *cpu, struct kvm_run *run, uint32_t ipb) break; default: DPRINTF("KVM: unknown DIAG: 0x%x\n", func_code); - r = -1; + enter_pgmcheck(cpu, PGM_SPECIFICATION); break; } diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c index 5a55de86a1..d67b345ad1 100644 --- a/target-s390x/mem_helper.c +++ b/target-s390x/mem_helper.c @@ -490,10 +490,18 @@ uint32_t HELPER(ex)(CPUS390XState *env, uint32_t cc, uint64_t v1, helper_mvc(env, l, get_address(env, 0, b1, d1), get_address(env, 0, b2, d2)); break; + case 0x400: + cc = helper_nc(env, l, get_address(env, 0, b1, d1), + get_address(env, 0, b2, d2)); + break; case 0x500: cc = helper_clc(env, l, get_address(env, 0, b1, d1), get_address(env, 0, b2, d2)); break; + case 0x600: + cc = helper_oc(env, l, get_address(env, 0, b1, d1), + get_address(env, 0, b2, d2)); + break; case 0x700: cc = helper_xc(env, l, get_address(env, 0, b1, d1), get_address(env, 0, b2, d2)); @@ -1034,12 +1042,34 @@ void HELPER(ptlb)(CPUS390XState *env) tlb_flush(CPU(cpu), 1); } +/* load using real address */ +uint64_t HELPER(lura)(CPUS390XState *env, uint64_t addr) +{ + CPUState *cs = CPU(s390_env_get_cpu(env)); + + return (uint32_t)ldl_phys(cs->as, get_address(env, 0, 0, addr)); +} + +uint64_t HELPER(lurag)(CPUS390XState *env, uint64_t addr) +{ + CPUState *cs = CPU(s390_env_get_cpu(env)); + + return ldq_phys(cs->as, get_address(env, 0, 0, addr)); +} + /* store using real address */ void HELPER(stura)(CPUS390XState *env, uint64_t addr, uint64_t v1) { CPUState *cs = CPU(s390_env_get_cpu(env)); - stw_phys(cs->as, get_address(env, 0, 0, addr), (uint32_t)v1); + stl_phys(cs->as, get_address(env, 0, 0, addr), (uint32_t)v1); +} + +void HELPER(sturg)(CPUS390XState *env, uint64_t addr, uint64_t v1) +{ + CPUState *cs = CPU(s390_env_get_cpu(env)); + + stq_phys(cs->as, get_address(env, 0, 0, addr), v1); } /* load real address */ diff --git a/target-s390x/translate.c b/target-s390x/translate.c index ab01bc004e..8b36eca718 100644 --- a/target-s390x/translate.c +++ b/target-s390x/translate.c @@ -317,12 +317,14 @@ static inline void gen_illegal_opcode(DisasContext *s) gen_program_exception(s, PGM_SPECIFICATION); } -static inline void check_privileged(DisasContext *s) +#ifndef CONFIG_USER_ONLY +static void check_privileged(DisasContext *s) { if (s->tb->flags & (PSW_MASK_PSTATE >> 32)) { gen_program_exception(s, PGM_PRIVILEGED); } } +#endif static TCGv_i64 get_address(DisasContext *s, int x2, int b2, int d2) { @@ -2045,12 +2047,37 @@ static ExitStatus op_ear(DisasContext *s, DisasOps *o) return NO_EXIT; } +static ExitStatus op_ecag(DisasContext *s, DisasOps *o) +{ + /* No cache information provided. */ + tcg_gen_movi_i64(o->out, -1); + return NO_EXIT; +} + static ExitStatus op_efpc(DisasContext *s, DisasOps *o) { tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, fpc)); return NO_EXIT; } +static ExitStatus op_epsw(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s->fields, r1); + int r2 = get_field(s->fields, r2); + TCGv_i64 t = tcg_temp_new_i64(); + + /* Note the "subsequently" in the PoO, which implies a defined result + if r1 == r2. Thus we cannot defer these writes to an output hook. */ + tcg_gen_shri_i64(t, psw_mask, 32); + store_reg32_i64(r1, t); + if (r2 != 0) { + store_reg32_i64(r2, psw_mask); + } + + tcg_temp_free_i64(t); + return NO_EXIT; +} + static ExitStatus op_ex(DisasContext *s, DisasOps *o) { /* ??? Perhaps a better way to implement EXECUTE is to set a bit in @@ -2460,6 +2487,24 @@ static ExitStatus op_lm64(DisasContext *s, DisasOps *o) return NO_EXIT; } +#ifndef CONFIG_USER_ONLY +static ExitStatus op_lura(DisasContext *s, DisasOps *o) +{ + check_privileged(s); + potential_page_fault(s); + gen_helper_lura(o->out, cpu_env, o->in2); + return NO_EXIT; +} + +static ExitStatus op_lurag(DisasContext *s, DisasOps *o) +{ + check_privileged(s); + potential_page_fault(s); + gen_helper_lurag(o->out, cpu_env, o->in2); + return NO_EXIT; +} +#endif + static ExitStatus op_mov2(DisasContext *s, DisasOps *o) { o->out = o->in2; @@ -2925,19 +2970,42 @@ static ExitStatus op_sacf(DisasContext *s, DisasOps *o) /* Addressing mode has changed, so end the block. */ return EXIT_PC_STALE; } +#endif static ExitStatus op_sam(DisasContext *s, DisasOps *o) { int sam = s->insn->data; - TCGv_i64 tsam = tcg_const_i64(sam); + TCGv_i64 tsam; + uint64_t mask; - /* Overwrite PSW_MASK_64 and PSW_MASK_32 */ - tcg_gen_deposit_i64(psw_mask, psw_mask, tsam, 31, 2); + switch (sam) { + case 0: + mask = 0xffffff; + break; + case 1: + mask = 0x7fffffff; + break; + default: + mask = -1; + break; + } + /* Bizzare but true, we check the address of the current insn for the + specification exception, not the next to be executed. Thus the PoO + documents that Bad Things Happen two bytes before the end. */ + if (s->pc & ~mask) { + gen_program_exception(s, PGM_SPECIFICATION); + return EXIT_NORETURN; + } + s->next_pc &= mask; + + tsam = tcg_const_i64(sam); + tcg_gen_deposit_i64(psw_mask, psw_mask, tsam, 31, 2); tcg_temp_free_i64(tsam); + + /* Always exit the TB, since we (may have) changed execution mode. */ return EXIT_PC_STALE; } -#endif static ExitStatus op_sar(DisasContext *s, DisasOps *o) { @@ -3221,8 +3289,14 @@ static ExitStatus op_stctl(DisasContext *s, DisasOps *o) static ExitStatus op_stidp(DisasContext *s, DisasOps *o) { + TCGv_i64 t1 = tcg_temp_new_i64(); + check_privileged(s); tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, cpu_num)); + tcg_gen_ld32u_i64(t1, cpu_env, offsetof(CPUS390XState, machine_type)); + tcg_gen_deposit_i64(o->out, o->out, t1, 32, 32); + tcg_temp_free_i64(t1); + return NO_EXIT; } @@ -3317,6 +3391,14 @@ static ExitStatus op_stura(DisasContext *s, DisasOps *o) gen_helper_stura(cpu_env, o->in2, o->in1); return NO_EXIT; } + +static ExitStatus op_sturg(DisasContext *s, DisasOps *o) +{ + check_privileged(s); + potential_page_fault(s); + gen_helper_sturg(cpu_env, o->in2, o->in1); + return NO_EXIT; +} #endif static ExitStatus op_st8(DisasContext *s, DisasOps *o) diff --git a/tests/Makefile b/tests/Makefile index c2e2e52f22..db5b3c3df1 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -60,6 +60,8 @@ gcov-files-test-mul64-y = util/host-utils.c check-unit-y += tests/test-int128$(EXESUF) # all code tested by test-int128 is inside int128.h gcov-files-test-int128-y = +check-unit-y += tests/rcutorture$(EXESUF) +gcov-files-rcutorture-y = util/rcu.c check-unit-y += tests/test-bitops$(EXESUF) check-unit-$(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) += tests/test-qdev-global-props$(EXESUF) check-unit-y += tests/check-qom-interface$(EXESUF) @@ -223,7 +225,8 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \ tests/test-qmp-input-visitor.o tests/test-qmp-input-strict.o \ tests/test-qmp-commands.o tests/test-visitor-serialization.o \ tests/test-x86-cpuid.o tests/test-mul64.o tests/test-int128.o \ - tests/test-opts-visitor.o tests/test-qmp-event.o + tests/test-opts-visitor.o tests/test-qmp-event.o \ + tests/rcutorture.o test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o \ tests/test-qapi-event.o @@ -252,6 +255,8 @@ tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o migration/xbzrle.o page_cache.o libqemuutil.a tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o tests/test-int128$(EXESUF): tests/test-int128.o +tests/rcutorture$(EXESUF): tests/rcutorture.o libqemuutil.a + tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\ hw/core/irq.o \ diff --git a/tests/rcutorture.c b/tests/rcutorture.c new file mode 100644 index 0000000000..60a2ccfe2e --- /dev/null +++ b/tests/rcutorture.c @@ -0,0 +1,451 @@ +/* + * rcutorture.c: simple user-level performance/stress test of RCU. + * + * Usage: + * ./rcu <nreaders> rperf [ <seconds> ] + * Run a read-side performance test with the specified + * number of readers for <seconds> seconds. + * ./rcu <nupdaters> uperf [ <seconds> ] + * Run an update-side performance test with the specified + * number of updaters and specified duration. + * ./rcu <nreaders> perf [ <seconds> ] + * Run a combined read/update performance test with the specified + * number of readers and one updater and specified duration. + * + * The above tests produce output as follows: + * + * n_reads: 46008000 n_updates: 146026 nreaders: 2 nupdaters: 1 duration: 1 + * ns/read: 43.4707 ns/update: 6848.1 + * + * The first line lists the total number of RCU reads and updates executed + * during the test, the number of reader threads, the number of updater + * threads, and the duration of the test in seconds. The second line + * lists the average duration of each type of operation in nanoseconds, + * or "nan" if the corresponding type of operation was not performed. + * + * ./rcu <nreaders> stress [ <seconds> ] + * Run a stress test with the specified number of readers and + * one updater. + * + * This test produces output as follows: + * + * n_reads: 114633217 n_updates: 3903415 n_mberror: 0 + * rcu_stress_count: 114618391 14826 0 0 0 0 0 0 0 0 0 + * + * The first line lists the number of RCU read and update operations + * executed, followed by the number of memory-ordering violations + * (which will be zero in a correct RCU implementation). The second + * line lists the number of readers observing progressively more stale + * data. A correct RCU implementation will have all but the first two + * numbers non-zero. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) 2008 Paul E. McKenney, IBM Corporation. + */ + +/* + * Test variables. + */ + +#include <glib.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "qemu/atomic.h" +#include "qemu/rcu.h" +#include "qemu/compiler.h" +#include "qemu/thread.h" + +long long n_reads = 0LL; +long n_updates = 0L; +int nthreadsrunning; + +#define GOFLAG_INIT 0 +#define GOFLAG_RUN 1 +#define GOFLAG_STOP 2 + +static volatile int goflag = GOFLAG_INIT; + +#define RCU_READ_RUN 1000 + +#define NR_THREADS 100 +static QemuThread threads[NR_THREADS]; +static struct rcu_reader_data *data[NR_THREADS]; +static int n_threads; + +static void create_thread(void *(*func)(void *)) +{ + if (n_threads >= NR_THREADS) { + fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS); + exit(-1); + } + qemu_thread_create(&threads[n_threads], "test", func, &data[n_threads], + QEMU_THREAD_JOINABLE); + n_threads++; +} + +static void wait_all_threads(void) +{ + int i; + + for (i = 0; i < n_threads; i++) { + qemu_thread_join(&threads[i]); + } + n_threads = 0; +} + +/* + * Performance test. + */ + +static void *rcu_read_perf_test(void *arg) +{ + int i; + long long n_reads_local = 0; + + rcu_register_thread(); + + *(struct rcu_reader_data **)arg = &rcu_reader; + atomic_inc(&nthreadsrunning); + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + while (goflag == GOFLAG_RUN) { + for (i = 0; i < RCU_READ_RUN; i++) { + rcu_read_lock(); + rcu_read_unlock(); + } + n_reads_local += RCU_READ_RUN; + } + atomic_add(&n_reads, n_reads_local); + + rcu_unregister_thread(); + return NULL; +} + +static void *rcu_update_perf_test(void *arg) +{ + long long n_updates_local = 0; + + rcu_register_thread(); + + *(struct rcu_reader_data **)arg = &rcu_reader; + atomic_inc(&nthreadsrunning); + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + while (goflag == GOFLAG_RUN) { + synchronize_rcu(); + n_updates_local++; + } + atomic_add(&n_updates, n_updates_local); + + rcu_unregister_thread(); + return NULL; +} + +static void perftestinit(void) +{ + nthreadsrunning = 0; +} + +static void perftestrun(int nthreads, int duration, int nreaders, int nupdaters) +{ + while (atomic_read(&nthreadsrunning) < nthreads) { + g_usleep(1000); + } + goflag = GOFLAG_RUN; + g_usleep(duration * G_USEC_PER_SEC); + goflag = GOFLAG_STOP; + wait_all_threads(); + printf("n_reads: %lld n_updates: %ld nreaders: %d nupdaters: %d duration: %d\n", + n_reads, n_updates, nreaders, nupdaters, duration); + printf("ns/read: %g ns/update: %g\n", + ((duration * 1000*1000*1000.*(double)nreaders) / + (double)n_reads), + ((duration * 1000*1000*1000.*(double)nupdaters) / + (double)n_updates)); + exit(0); +} + +static void perftest(int nreaders, int duration) +{ + int i; + + perftestinit(); + for (i = 0; i < nreaders; i++) { + create_thread(rcu_read_perf_test); + } + create_thread(rcu_update_perf_test); + perftestrun(i + 1, duration, nreaders, 1); +} + +static void rperftest(int nreaders, int duration) +{ + int i; + + perftestinit(); + for (i = 0; i < nreaders; i++) { + create_thread(rcu_read_perf_test); + } + perftestrun(i, duration, nreaders, 0); +} + +static void uperftest(int nupdaters, int duration) +{ + int i; + + perftestinit(); + for (i = 0; i < nupdaters; i++) { + create_thread(rcu_update_perf_test); + } + perftestrun(i, duration, 0, nupdaters); +} + +/* + * Stress test. + */ + +#define RCU_STRESS_PIPE_LEN 10 + +struct rcu_stress { + int pipe_count; + int mbtest; +}; + +struct rcu_stress rcu_stress_array[RCU_STRESS_PIPE_LEN] = { { 0 } }; +struct rcu_stress *rcu_stress_current; +int rcu_stress_idx; + +int n_mberror; +long long rcu_stress_count[RCU_STRESS_PIPE_LEN + 1]; + + +static void *rcu_read_stress_test(void *arg) +{ + int i; + int itercnt = 0; + struct rcu_stress *p; + int pc; + long long n_reads_local = 0; + volatile int garbage = 0; + + rcu_register_thread(); + + *(struct rcu_reader_data **)arg = &rcu_reader; + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + while (goflag == GOFLAG_RUN) { + rcu_read_lock(); + p = atomic_rcu_read(&rcu_stress_current); + if (p->mbtest == 0) { + n_mberror++; + } + rcu_read_lock(); + for (i = 0; i < 100; i++) { + garbage++; + } + rcu_read_unlock(); + pc = p->pipe_count; + rcu_read_unlock(); + if ((pc > RCU_STRESS_PIPE_LEN) || (pc < 0)) { + pc = RCU_STRESS_PIPE_LEN; + } + atomic_inc(&rcu_stress_count[pc]); + n_reads_local++; + if ((++itercnt % 0x1000) == 0) { + synchronize_rcu(); + } + } + atomic_add(&n_reads, n_reads_local); + + rcu_unregister_thread(); + return NULL; +} + +static void *rcu_update_stress_test(void *arg) +{ + int i; + struct rcu_stress *p; + + rcu_register_thread(); + + *(struct rcu_reader_data **)arg = &rcu_reader; + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + while (goflag == GOFLAG_RUN) { + i = rcu_stress_idx + 1; + if (i >= RCU_STRESS_PIPE_LEN) { + i = 0; + } + p = &rcu_stress_array[i]; + p->mbtest = 0; + smp_mb(); + p->pipe_count = 0; + p->mbtest = 1; + atomic_rcu_set(&rcu_stress_current, p); + rcu_stress_idx = i; + for (i = 0; i < RCU_STRESS_PIPE_LEN; i++) { + if (i != rcu_stress_idx) { + rcu_stress_array[i].pipe_count++; + } + } + synchronize_rcu(); + n_updates++; + } + + rcu_unregister_thread(); + return NULL; +} + +static void *rcu_fake_update_stress_test(void *arg) +{ + rcu_register_thread(); + + *(struct rcu_reader_data **)arg = &rcu_reader; + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + while (goflag == GOFLAG_RUN) { + synchronize_rcu(); + g_usleep(1000); + } + + rcu_unregister_thread(); + return NULL; +} + +static void stresstest(int nreaders, int duration) +{ + int i; + + rcu_stress_current = &rcu_stress_array[0]; + rcu_stress_current->pipe_count = 0; + rcu_stress_current->mbtest = 1; + for (i = 0; i < nreaders; i++) { + create_thread(rcu_read_stress_test); + } + create_thread(rcu_update_stress_test); + for (i = 0; i < 5; i++) { + create_thread(rcu_fake_update_stress_test); + } + goflag = GOFLAG_RUN; + g_usleep(duration * G_USEC_PER_SEC); + goflag = GOFLAG_STOP; + wait_all_threads(); + printf("n_reads: %lld n_updates: %ld n_mberror: %d\n", + n_reads, n_updates, n_mberror); + printf("rcu_stress_count:"); + for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++) { + printf(" %lld", rcu_stress_count[i]); + } + printf("\n"); + exit(0); +} + +/* GTest interface */ + +static void gtest_stress(int nreaders, int duration) +{ + int i; + + rcu_stress_current = &rcu_stress_array[0]; + rcu_stress_current->pipe_count = 0; + rcu_stress_current->mbtest = 1; + for (i = 0; i < nreaders; i++) { + create_thread(rcu_read_stress_test); + } + create_thread(rcu_update_stress_test); + for (i = 0; i < 5; i++) { + create_thread(rcu_fake_update_stress_test); + } + goflag = GOFLAG_RUN; + g_usleep(duration * G_USEC_PER_SEC); + goflag = GOFLAG_STOP; + wait_all_threads(); + g_assert_cmpint(n_mberror, ==, 0); + for (i = 2; i <= RCU_STRESS_PIPE_LEN; i++) { + g_assert_cmpint(rcu_stress_count[i], ==, 0); + } +} + +static void gtest_stress_1_1(void) +{ + gtest_stress(1, 1); +} + +static void gtest_stress_10_1(void) +{ + gtest_stress(10, 1); +} + +static void gtest_stress_1_5(void) +{ + gtest_stress(1, 5); +} + +static void gtest_stress_10_5(void) +{ + gtest_stress(10, 5); +} + +/* + * Mainprogram. + */ + +static void usage(int argc, char *argv[]) +{ + fprintf(stderr, "Usage: %s [nreaders [ perf | stress ] ]\n", argv[0]); + exit(-1); +} + +int main(int argc, char *argv[]) +{ + int nreaders = 1; + int duration = 1; + + if (argc >= 2 && argv[1][0] == '-') { + g_test_init(&argc, &argv, NULL); + if (g_test_quick()) { + g_test_add_func("/rcu/torture/1reader", gtest_stress_1_1); + g_test_add_func("/rcu/torture/10readers", gtest_stress_10_1); + } else { + g_test_add_func("/rcu/torture/1reader", gtest_stress_1_5); + g_test_add_func("/rcu/torture/10readers", gtest_stress_10_5); + } + return g_test_run(); + } + + if (argc >= 2) { + nreaders = strtoul(argv[1], NULL, 0); + } + if (argc > 3) { + duration = strtoul(argv[3], NULL, 0); + } + if (argc < 3 || strcmp(argv[2], "stress") == 0) { + stresstest(nreaders, duration); + } else if (strcmp(argv[2], "rperf") == 0) { + rperftest(nreaders, duration); + } else if (strcmp(argv[2], "uperf") == 0) { + uperftest(nreaders, duration); + } else if (strcmp(argv[2], "perf") == 0) { + perftest(nreaders, duration); + } + usage(argc, argv); + return 0; +} diff --git a/util/Makefile.objs b/util/Makefile.objs index 93007e2f56..ceaba30939 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -17,3 +17,4 @@ util-obj-y += throttle.o util-obj-y += getauxval.o util-obj-y += readline.o util-obj-y += rfifolock.o +util-obj-y += rcu.o diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index 41cb23df0c..50a29d8f7a 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -307,11 +307,13 @@ static inline void futex_wait(QemuEvent *ev, unsigned val) #else static inline void futex_wake(QemuEvent *ev, int n) { + pthread_mutex_lock(&ev->lock); if (n == 1) { pthread_cond_signal(&ev->cond); } else { pthread_cond_broadcast(&ev->cond); } + pthread_mutex_unlock(&ev->lock); } static inline void futex_wait(QemuEvent *ev, unsigned val) diff --git a/util/rcu.c b/util/rcu.c new file mode 100644 index 0000000000..c9c3e6e4ab --- /dev/null +++ b/util/rcu.c @@ -0,0 +1,291 @@ +/* + * urcu-mb.c + * + * Userspace RCU library with explicit memory barriers + * + * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * Copyright 2015 Red Hat, Inc. + * + * Ported to QEMU by Paolo Bonzini <pbonzini@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include "qemu-common.h" +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <stdint.h> +#include <errno.h> +#include "qemu/rcu.h" +#include "qemu/atomic.h" +#include "qemu/thread.h" + +/* + * Global grace period counter. Bit 0 is always one in rcu_gp_ctr. + * Bits 1 and above are defined in synchronize_rcu. + */ +#define RCU_GP_LOCKED (1UL << 0) +#define RCU_GP_CTR (1UL << 1) + +unsigned long rcu_gp_ctr = RCU_GP_LOCKED; + +QemuEvent rcu_gp_event; +static QemuMutex rcu_gp_lock; + +/* + * Check whether a quiescent state was crossed between the beginning of + * update_counter_and_wait and now. + */ +static inline int rcu_gp_ongoing(unsigned long *ctr) +{ + unsigned long v; + + v = atomic_read(ctr); + return v && (v != rcu_gp_ctr); +} + +/* Written to only by each individual reader. Read by both the reader and the + * writers. + */ +__thread struct rcu_reader_data rcu_reader; + +/* Protected by rcu_gp_lock. */ +typedef QLIST_HEAD(, rcu_reader_data) ThreadList; +static ThreadList registry = QLIST_HEAD_INITIALIZER(registry); + +/* Wait for previous parity/grace period to be empty of readers. */ +static void wait_for_readers(void) +{ + ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders); + struct rcu_reader_data *index, *tmp; + + for (;;) { + /* We want to be notified of changes made to rcu_gp_ongoing + * while we walk the list. + */ + qemu_event_reset(&rcu_gp_event); + + /* Instead of using atomic_mb_set for index->waiting, and + * atomic_mb_read for index->ctr, memory barriers are placed + * manually since writes to different threads are independent. + * atomic_mb_set has a smp_wmb before... + */ + smp_wmb(); + QLIST_FOREACH(index, ®istry, node) { + atomic_set(&index->waiting, true); + } + + /* ... and a smp_mb after. */ + smp_mb(); + + QLIST_FOREACH_SAFE(index, ®istry, node, tmp) { + if (!rcu_gp_ongoing(&index->ctr)) { + QLIST_REMOVE(index, node); + QLIST_INSERT_HEAD(&qsreaders, index, node); + + /* No need for mb_set here, worst of all we + * get some extra futex wakeups. + */ + atomic_set(&index->waiting, false); + } + } + + /* atomic_mb_read has smp_rmb after. */ + smp_rmb(); + + if (QLIST_EMPTY(®istry)) { + break; + } + + /* Wait for one thread to report a quiescent state and + * try again. + */ + qemu_event_wait(&rcu_gp_event); + } + + /* put back the reader list in the registry */ + QLIST_SWAP(®istry, &qsreaders, node); +} + +void synchronize_rcu(void) +{ + qemu_mutex_lock(&rcu_gp_lock); + + if (!QLIST_EMPTY(®istry)) { + /* In either case, the atomic_mb_set below blocks stores that free + * old RCU-protected pointers. + */ + if (sizeof(rcu_gp_ctr) < 8) { + /* For architectures with 32-bit longs, a two-subphases algorithm + * ensures we do not encounter overflow bugs. + * + * Switch parity: 0 -> 1, 1 -> 0. + */ + atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + wait_for_readers(); + atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + } else { + /* Increment current grace period. */ + atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); + } + + wait_for_readers(); + } + + qemu_mutex_unlock(&rcu_gp_lock); +} + + +#define RCU_CALL_MIN_SIZE 30 + +/* Multi-producer, single-consumer queue based on urcu/static/wfqueue.h + * from liburcu. Note that head is only used by the consumer. + */ +static struct rcu_head dummy; +static struct rcu_head *head = &dummy, **tail = &dummy.next; +static int rcu_call_count; +static QemuEvent rcu_call_ready_event; + +static void enqueue(struct rcu_head *node) +{ + struct rcu_head **old_tail; + + node->next = NULL; + old_tail = atomic_xchg(&tail, &node->next); + atomic_mb_set(old_tail, node); +} + +static struct rcu_head *try_dequeue(void) +{ + struct rcu_head *node, *next; + +retry: + /* Test for an empty list, which we do not expect. Note that for + * the consumer head and tail are always consistent. The head + * is consistent because only the consumer reads/writes it. + * The tail, because it is the first step in the enqueuing. + * It is only the next pointers that might be inconsistent. + */ + if (head == &dummy && atomic_mb_read(&tail) == &dummy.next) { + abort(); + } + + /* If the head node has NULL in its next pointer, the value is + * wrong and we need to wait until its enqueuer finishes the update. + */ + node = head; + next = atomic_mb_read(&head->next); + if (!next) { + return NULL; + } + + /* Since we are the sole consumer, and we excluded the empty case + * above, the queue will always have at least two nodes: the + * dummy node, and the one being removed. So we do not need to update + * the tail pointer. + */ + head = next; + + /* If we dequeued the dummy node, add it back at the end and retry. */ + if (node == &dummy) { + enqueue(node); + goto retry; + } + + return node; +} + +static void *call_rcu_thread(void *opaque) +{ + struct rcu_head *node; + + for (;;) { + int tries = 0; + int n = atomic_read(&rcu_call_count); + + /* Heuristically wait for a decent number of callbacks to pile up. + * Fetch rcu_call_count now, we only must process elements that were + * added before synchronize_rcu() starts. + */ + while (n < RCU_CALL_MIN_SIZE && ++tries <= 5) { + g_usleep(100000); + qemu_event_reset(&rcu_call_ready_event); + n = atomic_read(&rcu_call_count); + if (n < RCU_CALL_MIN_SIZE) { + qemu_event_wait(&rcu_call_ready_event); + n = atomic_read(&rcu_call_count); + } + } + + atomic_sub(&rcu_call_count, n); + synchronize_rcu(); + while (n > 0) { + node = try_dequeue(); + while (!node) { + qemu_event_reset(&rcu_call_ready_event); + node = try_dequeue(); + if (!node) { + qemu_event_wait(&rcu_call_ready_event); + node = try_dequeue(); + } + } + + n--; + node->func(node); + } + } + abort(); +} + +void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node)) +{ + node->func = func; + enqueue(node); + atomic_inc(&rcu_call_count); + qemu_event_set(&rcu_call_ready_event); +} + +void rcu_register_thread(void) +{ + assert(rcu_reader.ctr == 0); + qemu_mutex_lock(&rcu_gp_lock); + QLIST_INSERT_HEAD(®istry, &rcu_reader, node); + qemu_mutex_unlock(&rcu_gp_lock); +} + +void rcu_unregister_thread(void) +{ + qemu_mutex_lock(&rcu_gp_lock); + QLIST_REMOVE(&rcu_reader, node); + qemu_mutex_unlock(&rcu_gp_lock); +} + +static void __attribute__((__constructor__)) rcu_init(void) +{ + QemuThread thread; + + qemu_mutex_init(&rcu_gp_lock); + qemu_event_init(&rcu_gp_event, true); + + qemu_event_init(&rcu_call_ready_event, false); + qemu_thread_create(&thread, "call_rcu", call_rcu_thread, + NULL, QEMU_THREAD_DETACHED); + + rcu_register_thread(); +} |