aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpu-defs.h1
-rw-r--r--cpu-exec.c25
-rw-r--r--exec-all.h212
-rw-r--r--exec.c13
-rw-r--r--linux-user/elfload.c4
-rw-r--r--linux-user/main.c146
-rw-r--r--linux-user/mmap.c16
-rw-r--r--linux-user/qemu.h14
-rw-r--r--linux-user/signal.c4
-rw-r--r--qemu-lock.h249
10 files changed, 447 insertions, 237 deletions
diff --git a/cpu-defs.h b/cpu-defs.h
index f7f5f1730d..5804521a7e 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -166,6 +166,7 @@ typedef struct CPUTLBEntry {
\
void *next_cpu; /* next CPU sharing TB cache */ \
int cpu_index; /* CPU index (informative) */ \
+ int running; /* Nonzero if cpu is currently running(usermode). */ \
/* user data */ \
void *opaque; \
\
diff --git a/cpu-exec.c b/cpu-exec.c
index a8e67e8add..3a1ff4e077 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -44,7 +44,6 @@
#endif
int tb_invalidated_flag;
-static unsigned long next_tb;
//#define DEBUG_EXEC
//#define DEBUG_SIGNAL
@@ -93,8 +92,6 @@ static TranslationBlock *tb_find_slow(target_ulong pc,
target_ulong phys_pc, phys_page1, phys_page2, virt_page2;
uint8_t *tc_ptr;
- spin_lock(&tb_lock);
-
tb_invalidated_flag = 0;
regs_to_env(); /* XXX: do it just before cpu_gen_code() */
@@ -155,7 +152,6 @@ static TranslationBlock *tb_find_slow(target_ulong pc,
found:
/* we add the TB in the virtual pc hash table */
env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
- spin_unlock(&tb_lock);
return tb;
}
@@ -228,14 +224,6 @@ static inline TranslationBlock *tb_find_fast(void)
if (__builtin_expect(!tb || tb->pc != pc || tb->cs_base != cs_base ||
tb->flags != flags, 0)) {
tb = tb_find_slow(pc, cs_base, flags);
- /* Note: we do it here to avoid a gcc bug on Mac OS X when
- doing it in tb_find_slow */
- if (tb_invalidated_flag) {
- /* as some TB could have been invalidated because
- of memory exceptions while generating the code, we
- must recompute the hash index here */
- next_tb = 0;
- }
}
return tb;
}
@@ -249,6 +237,7 @@ int cpu_exec(CPUState *env1)
int ret, interrupt_request;
TranslationBlock *tb;
uint8_t *tc_ptr;
+ unsigned long next_tb;
if (cpu_halted(env1) == EXCP_HALTED)
return EXCP_HALTED;
@@ -577,7 +566,16 @@ int cpu_exec(CPUState *env1)
#endif
}
#endif
+ spin_lock(&tb_lock);
tb = tb_find_fast();
+ /* Note: we do it here to avoid a gcc bug on Mac OS X when
+ doing it in tb_find_slow */
+ if (tb_invalidated_flag) {
+ /* as some TB could have been invalidated because
+ of memory exceptions while generating the code, we
+ must recompute the hash index here */
+ next_tb = 0;
+ }
#ifdef DEBUG_EXEC
if ((loglevel & CPU_LOG_EXEC)) {
fprintf(logfile, "Trace 0x%08lx [" TARGET_FMT_lx "] %s\n",
@@ -594,11 +592,10 @@ int cpu_exec(CPUState *env1)
(env->kqemu_enabled != 2) &&
#endif
tb->page_addr[1] == -1) {
- spin_lock(&tb_lock);
tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb);
- spin_unlock(&tb_lock);
}
}
+ spin_unlock(&tb_lock);
tc_ptr = tb->tc_ptr;
env->current_tb = tb;
/* execute the generated code */
diff --git a/exec-all.h b/exec-all.h
index ba6f6da5a0..62a9394873 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -302,217 +302,7 @@ extern CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
extern void *io_mem_opaque[IO_MEM_NB_ENTRIES];
-#if defined(__hppa__)
-
-typedef int spinlock_t[4];
-
-#define SPIN_LOCK_UNLOCKED { 1, 1, 1, 1 }
-
-static inline void resetlock (spinlock_t *p)
-{
- (*p)[0] = (*p)[1] = (*p)[2] = (*p)[3] = 1;
-}
-
-#else
-
-typedef int spinlock_t;
-
-#define SPIN_LOCK_UNLOCKED 0
-
-static inline void resetlock (spinlock_t *p)
-{
- *p = SPIN_LOCK_UNLOCKED;
-}
-
-#endif
-
-#if defined(__powerpc__)
-static inline int testandset (int *p)
-{
- int ret;
- __asm__ __volatile__ (
- "0: lwarx %0,0,%1\n"
- " xor. %0,%3,%0\n"
- " bne 1f\n"
- " stwcx. %2,0,%1\n"
- " bne- 0b\n"
- "1: "
- : "=&r" (ret)
- : "r" (p), "r" (1), "r" (0)
- : "cr0", "memory");
- return ret;
-}
-#elif defined(__i386__)
-static inline int testandset (int *p)
-{
- long int readval = 0;
-
- __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
- : "+m" (*p), "+a" (readval)
- : "r" (1)
- : "cc");
- return readval;
-}
-#elif defined(__x86_64__)
-static inline int testandset (int *p)
-{
- long int readval = 0;
-
- __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
- : "+m" (*p), "+a" (readval)
- : "r" (1)
- : "cc");
- return readval;
-}
-#elif defined(__s390__)
-static inline int testandset (int *p)
-{
- int ret;
-
- __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n"
- " jl 0b"
- : "=&d" (ret)
- : "r" (1), "a" (p), "0" (*p)
- : "cc", "memory" );
- return ret;
-}
-#elif defined(__alpha__)
-static inline int testandset (int *p)
-{
- int ret;
- unsigned long one;
-
- __asm__ __volatile__ ("0: mov 1,%2\n"
- " ldl_l %0,%1\n"
- " stl_c %2,%1\n"
- " beq %2,1f\n"
- ".subsection 2\n"
- "1: br 0b\n"
- ".previous"
- : "=r" (ret), "=m" (*p), "=r" (one)
- : "m" (*p));
- return ret;
-}
-#elif defined(__sparc__)
-static inline int testandset (int *p)
-{
- int ret;
-
- __asm__ __volatile__("ldstub [%1], %0"
- : "=r" (ret)
- : "r" (p)
- : "memory");
-
- return (ret ? 1 : 0);
-}
-#elif defined(__arm__)
-static inline int testandset (int *spinlock)
-{
- register unsigned int ret;
- __asm__ __volatile__("swp %0, %1, [%2]"
- : "=r"(ret)
- : "0"(1), "r"(spinlock));
-
- return ret;
-}
-#elif defined(__mc68000)
-static inline int testandset (int *p)
-{
- char ret;
- __asm__ __volatile__("tas %1; sne %0"
- : "=r" (ret)
- : "m" (p)
- : "cc","memory");
- return ret;
-}
-#elif defined(__hppa__)
-
-/* Because malloc only guarantees 8-byte alignment for malloc'd data,
- and GCC only guarantees 8-byte alignment for stack locals, we can't
- be assured of 16-byte alignment for atomic lock data even if we
- specify "__attribute ((aligned(16)))" in the type declaration. So,
- we use a struct containing an array of four ints for the atomic lock
- type and dynamically select the 16-byte aligned int from the array
- for the semaphore. */
-#define __PA_LDCW_ALIGNMENT 16
-static inline void *ldcw_align (void *p) {
- unsigned long a = (unsigned long)p;
- a = (a + __PA_LDCW_ALIGNMENT - 1) & ~(__PA_LDCW_ALIGNMENT - 1);
- return (void *)a;
-}
-
-static inline int testandset (spinlock_t *p)
-{
- unsigned int ret;
- p = ldcw_align(p);
- __asm__ __volatile__("ldcw 0(%1),%0"
- : "=r" (ret)
- : "r" (p)
- : "memory" );
- return !ret;
-}
-
-#elif defined(__ia64)
-
-#include <ia64intrin.h>
-
-static inline int testandset (int *p)
-{
- return __sync_lock_test_and_set (p, 1);
-}
-#elif defined(__mips__)
-static inline int testandset (int *p)
-{
- int ret;
-
- __asm__ __volatile__ (
- " .set push \n"
- " .set noat \n"
- " .set mips2 \n"
- "1: li $1, 1 \n"
- " ll %0, %1 \n"
- " sc $1, %1 \n"
- " beqz $1, 1b \n"
- " .set pop "
- : "=r" (ret), "+R" (*p)
- :
- : "memory");
-
- return ret;
-}
-#else
-#error unimplemented CPU support
-#endif
-
-#if defined(CONFIG_USER_ONLY)
-static inline void spin_lock(spinlock_t *lock)
-{
- while (testandset(lock));
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
- resetlock(lock);
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
- return !testandset(lock);
-}
-#else
-static inline void spin_lock(spinlock_t *lock)
-{
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
- return 1;
-}
-#endif
+#include "qemu-lock.h"
extern spinlock_t tb_lock;
diff --git a/exec.c b/exec.c
index 4480dfa177..ffe4cc9da1 100644
--- a/exec.c
+++ b/exec.c
@@ -1341,10 +1341,20 @@ void cpu_set_log_filename(const char *filename)
/* mask must never be zero, except for A20 change call */
void cpu_interrupt(CPUState *env, int mask)
{
+#if !defined(USE_NPTL)
TranslationBlock *tb;
static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
+#endif
+ /* FIXME: This is probably not threadsafe. A different thread could
+ be in the mittle of a read-modify-write operation. */
env->interrupt_request |= mask;
+#if defined(USE_NPTL)
+ /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
+ problem and hope the cpu will stop of its own accord. For userspace
+ emulation this often isn't actually as bad as it sounds. Often
+ signals are used primarily to interrupt blocking syscalls. */
+#else
/* if the cpu is currently executing code, we must unlink it and
all the potentially executing TB */
tb = env->current_tb;
@@ -1353,6 +1363,7 @@ void cpu_interrupt(CPUState *env, int mask)
tb_reset_jump_recursive(tb);
resetlock(&interrupt_lock);
}
+#endif
}
void cpu_reset_interrupt(CPUState *env, int mask)
@@ -2015,7 +2026,6 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
end = TARGET_PAGE_ALIGN(end);
if (flags & PAGE_WRITE)
flags |= PAGE_WRITE_ORG;
- spin_lock(&tb_lock);
for(addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
p = page_find_alloc(addr >> TARGET_PAGE_BITS);
/* if the write protection is set, then we invalidate the code
@@ -2027,7 +2037,6 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
}
p->flags = flags;
}
- spin_unlock(&tb_lock);
}
int page_check_range(target_ulong start, target_ulong len, int flags)
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index ca0023e62e..67b7535d4c 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -89,7 +89,7 @@ enum {
static const char *get_elf_platform(void)
{
static char elf_platform[] = "i386";
- int family = (global_env->cpuid_version >> 8) & 0xff;
+ int family = (thread_env->cpuid_version >> 8) & 0xff;
if (family > 6)
family = 6;
if (family >= 3)
@@ -101,7 +101,7 @@ static const char *get_elf_platform(void)
static uint32_t get_elf_hwcap(void)
{
- return global_env->cpuid_features;
+ return thread_env->cpuid_features;
}
#ifdef TARGET_X86_64
diff --git a/linux-user/main.c b/linux-user/main.c
index 4bdec7e9bc..1f68766f0f 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -26,6 +26,8 @@
#include "qemu.h"
#include "qemu-common.h"
+/* For tb_lock */
+#include "exec-all.h"
#define DEBUG_LOGFILE "/tmp/qemu.log"
@@ -123,6 +125,135 @@ int64_t cpu_get_real_ticks(void)
#endif
+#if defined(USE_NPTL)
+/***********************************************************/
+/* Helper routines for implementing atomic operations. */
+
+/* To implement exclusive operations we force all cpus to syncronise.
+ We don't require a full sync, only that no cpus are executing guest code.
+ The alternative is to map target atomic ops onto host equivalents,
+ which requires quite a lot of per host/target work. */
+static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER;
+static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER;
+static int pending_cpus;
+
+/* Make sure everything is in a consistent state for calling fork(). */
+void fork_start(void)
+{
+ mmap_fork_start();
+ pthread_mutex_lock(&tb_lock);
+ pthread_mutex_lock(&exclusive_lock);
+}
+
+void fork_end(int child)
+{
+ if (child) {
+ /* Child processes created by fork() only have a single thread.
+ Discard information about the parent threads. */
+ first_cpu = thread_env;
+ thread_env->next_cpu = NULL;
+ pending_cpus = 0;
+ pthread_mutex_init(&exclusive_lock, NULL);
+ pthread_cond_init(&exclusive_cond, NULL);
+ pthread_cond_init(&exclusive_resume, NULL);
+ pthread_mutex_init(&tb_lock, NULL);
+ } else {
+ pthread_mutex_unlock(&exclusive_lock);
+ pthread_mutex_unlock(&tb_lock);
+ }
+ mmap_fork_end(child);
+}
+
+/* Wait for pending exclusive operations to complete. The exclusive lock
+ must be held. */
+static inline void exclusive_idle(void)
+{
+ while (pending_cpus) {
+ pthread_cond_wait(&exclusive_resume, &exclusive_lock);
+ }
+}
+
+/* Start an exclusive operation.
+ Must only be called from outside cpu_arm_exec. */
+static inline void start_exclusive(void)
+{
+ CPUState *other;
+ pthread_mutex_lock(&exclusive_lock);
+ exclusive_idle();
+
+ pending_cpus = 1;
+ /* Make all other cpus stop executing. */
+ for (other = first_cpu; other; other = other->next_cpu) {
+ if (other->running) {
+ pending_cpus++;
+ cpu_interrupt(other, CPU_INTERRUPT_EXIT);
+ }
+ }
+ if (pending_cpus > 1) {
+ pthread_cond_wait(&exclusive_cond, &exclusive_lock);
+ }
+}
+
+/* Finish an exclusive operation. */
+static inline void end_exclusive(void)
+{
+ pending_cpus = 0;
+ pthread_cond_broadcast(&exclusive_resume);
+ pthread_mutex_unlock(&exclusive_lock);
+}
+
+/* Wait for exclusive ops to finish, and begin cpu execution. */
+static inline void cpu_exec_start(CPUState *env)
+{
+ pthread_mutex_lock(&exclusive_lock);
+ exclusive_idle();
+ env->running = 1;
+ pthread_mutex_unlock(&exclusive_lock);
+}
+
+/* Mark cpu as not executing, and release pending exclusive ops. */
+static inline void cpu_exec_end(CPUState *env)
+{
+ pthread_mutex_lock(&exclusive_lock);
+ env->running = 0;
+ if (pending_cpus > 1) {
+ pending_cpus--;
+ if (pending_cpus == 1) {
+ pthread_cond_signal(&exclusive_cond);
+ }
+ }
+ exclusive_idle();
+ pthread_mutex_unlock(&exclusive_lock);
+}
+#else /* if !USE_NPTL */
+/* These are no-ops because we are not threadsafe. */
+static inline void cpu_exec_start(CPUState *env)
+{
+}
+
+static inline void cpu_exec_end(CPUState *env)
+{
+}
+
+static inline void start_exclusive(void)
+{
+}
+
+static inline void end_exclusive(void)
+{
+}
+
+void fork_start(void)
+{
+}
+
+void fork_end(int child)
+{
+}
+#endif
+
+
#ifdef TARGET_I386
/***********************************************************/
/* CPUX86 core interface */
@@ -378,8 +509,11 @@ do_kernel_trap(CPUARMState *env)
/* ??? No-op. Will need to do better for SMP. */
break;
case 0xffff0fc0: /* __kernel_cmpxchg */
- /* ??? This is not really atomic. However we don't support
- threads anyway, so it doesn't realy matter. */
+ /* XXX: This only works between threads, not between processes.
+ It's probably possible to implement this with native host
+ operations. However things like ldrex/strex are much harder so
+ there's not much point trying. */
+ start_exclusive();
cpsr = cpsr_read(env);
addr = env->regs[2];
/* FIXME: This should SEGV if the access fails. */
@@ -396,6 +530,7 @@ do_kernel_trap(CPUARMState *env)
cpsr &= ~CPSR_C;
}
cpsr_write(env, cpsr, CPSR_C);
+ end_exclusive();
break;
case 0xffff0fe0: /* __kernel_get_tls */
env->regs[0] = env->cp15.c13_tls2;
@@ -422,7 +557,9 @@ void cpu_loop(CPUARMState *env)
uint32_t addr;
for(;;) {
+ cpu_exec_start(env);
trapnr = cpu_arm_exec(env);
+ cpu_exec_end(env);
switch(trapnr) {
case EXCP_UDEF:
{
@@ -2044,8 +2181,7 @@ void usage(void)
_exit(1);
}
-/* XXX: currently only used for async signals (see signal.c) */
-CPUState *global_env;
+THREAD CPUState *thread_env;
void init_task_state(TaskState *ts)
{
@@ -2203,7 +2339,7 @@ int main(int argc, char **argv)
fprintf(stderr, "Unable to find CPU definition\n");
exit(1);
}
- global_env = env;
+ thread_env = env;
if (getenv("QEMU_STRACE")) {
do_strace = 1;
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index c0821386d9..b4ca1074b3 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -46,6 +46,22 @@ void mmap_unlock(void)
pthread_mutex_unlock(&mmap_mutex);
}
}
+
+/* Grab lock to make sure things are in a consistent state after fork(). */
+void mmap_fork_start(void)
+{
+ if (mmap_lock_count)
+ abort();
+ pthread_mutex_lock(&mmap_mutex);
+}
+
+void mmap_fork_end(int child)
+{
+ if (child)
+ pthread_mutex_init(&mmap_mutex, NULL);
+ else
+ pthread_mutex_unlock(&mmap_mutex);
+}
#else
/* We aren't threadsafe to start with, so no need to worry about locking. */
void mmap_lock(void)
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 81f7fb290e..d3a3c3c20f 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -37,6 +37,12 @@ typedef target_long abi_long;
#include "target_signal.h"
#include "gdbstub.h"
+#if defined(USE_NPTL)
+#define THREAD __thread
+#else
+#define THREAD
+#endif
+
/* This struct is used to hold certain information about the image.
* Basically, it replicates in user space what would be certain
* task_struct fields in the kernel
@@ -184,12 +190,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
abi_long arg2, abi_long arg3, abi_long arg4,
abi_long arg5, abi_long arg6);
void gemu_log(const char *fmt, ...) __attribute__((format(printf,1,2)));
-extern CPUState *global_env;
+extern THREAD CPUState *thread_env;
void cpu_loop(CPUState *env);
void init_paths(const char *prefix);
const char *path(const char *pathname);
char *target_strerror(int err);
int get_osversion(void);
+void fork_start(void);
+void fork_end(int child);
extern int loglevel;
extern FILE *logfile;
@@ -235,6 +243,10 @@ int target_msync(abi_ulong start, abi_ulong len, int flags);
extern unsigned long last_brk;
void mmap_lock(void);
void mmap_unlock(void);
+#if defined(USE_NPTL)
+void mmap_fork_start(void);
+void mmap_fork_end(int child);
+#endif
/* user access */
diff --git a/linux-user/signal.c b/linux-user/signal.c
index e6e1a08267..623a5e31c4 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -424,9 +424,9 @@ static void host_signal_handler(int host_signum, siginfo_t *info,
fprintf(stderr, "qemu: got signal %d\n", sig);
#endif
host_to_target_siginfo_noswap(&tinfo, info);
- if (queue_signal(global_env, sig, &tinfo) == 1) {
+ if (queue_signal(thread_env, sig, &tinfo) == 1) {
/* interrupt the virtual CPU as soon as possible */
- cpu_interrupt(global_env, CPU_INTERRUPT_EXIT);
+ cpu_interrupt(thread_env, CPU_INTERRUPT_EXIT);
}
}
diff --git a/qemu-lock.h b/qemu-lock.h
new file mode 100644
index 0000000000..fdd8da9435
--- /dev/null
+++ b/qemu-lock.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* Locking primitives. Most of this code should be redundant -
+ system emulation doesn't need/use locking, NPTL userspace uses
+ pthread mutexes, and non-NPTL userspace isn't threadsafe anyway.
+ In either case a spinlock is probably the wrong kind of lock.
+ Spinlocks are only good if you know annother CPU has the lock and is
+ likely to release it soon. In environments where you have more threads
+ than physical CPUs (the extreme case being a single CPU host) a spinlock
+ simply wastes CPU until the OS decides to preempt it. */
+#if defined(USE_NPTL)
+
+#include <pthread.h>
+#define spin_lock pthread_mutex_lock
+#define spin_unlock pthread_mutex_unlock
+#define spinlock_t pthread_mutex_t
+#define SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
+
+#else
+
+#if defined(__hppa__)
+
+typedef int spinlock_t[4];
+
+#define SPIN_LOCK_UNLOCKED { 1, 1, 1, 1 }
+
+static inline void resetlock (spinlock_t *p)
+{
+ (*p)[0] = (*p)[1] = (*p)[2] = (*p)[3] = 1;
+}
+
+#else
+
+typedef int spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED 0
+
+static inline void resetlock (spinlock_t *p)
+{
+ *p = SPIN_LOCK_UNLOCKED;
+}
+
+#endif
+
+#if defined(__powerpc__)
+static inline int testandset (int *p)
+{
+ int ret;
+ __asm__ __volatile__ (
+ "0: lwarx %0,0,%1\n"
+ " xor. %0,%3,%0\n"
+ " bne 1f\n"
+ " stwcx. %2,0,%1\n"
+ " bne- 0b\n"
+ "1: "
+ : "=&r" (ret)
+ : "r" (p), "r" (1), "r" (0)
+ : "cr0", "memory");
+ return ret;
+}
+#elif defined(__i386__)
+static inline int testandset (int *p)
+{
+ long int readval = 0;
+
+ __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+ : "+m" (*p), "+a" (readval)
+ : "r" (1)
+ : "cc");
+ return readval;
+}
+#elif defined(__x86_64__)
+static inline int testandset (int *p)
+{
+ long int readval = 0;
+
+ __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+ : "+m" (*p), "+a" (readval)
+ : "r" (1)
+ : "cc");
+ return readval;
+}
+#elif defined(__s390__)
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n"
+ " jl 0b"
+ : "=&d" (ret)
+ : "r" (1), "a" (p), "0" (*p)
+ : "cc", "memory" );
+ return ret;
+}
+#elif defined(__alpha__)
+static inline int testandset (int *p)
+{
+ int ret;
+ unsigned long one;
+
+ __asm__ __volatile__ ("0: mov 1,%2\n"
+ " ldl_l %0,%1\n"
+ " stl_c %2,%1\n"
+ " beq %2,1f\n"
+ ".subsection 2\n"
+ "1: br 0b\n"
+ ".previous"
+ : "=r" (ret), "=m" (*p), "=r" (one)
+ : "m" (*p));
+ return ret;
+}
+#elif defined(__sparc__)
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__("ldstub [%1], %0"
+ : "=r" (ret)
+ : "r" (p)
+ : "memory");
+
+ return (ret ? 1 : 0);
+}
+#elif defined(__arm__)
+static inline int testandset (int *spinlock)
+{
+ register unsigned int ret;
+ __asm__ __volatile__("swp %0, %1, [%2]"
+ : "=r"(ret)
+ : "0"(1), "r"(spinlock));
+
+ return ret;
+}
+#elif defined(__mc68000)
+static inline int testandset (int *p)
+{
+ char ret;
+ __asm__ __volatile__("tas %1; sne %0"
+ : "=r" (ret)
+ : "m" (p)
+ : "cc","memory");
+ return ret;
+}
+#elif defined(__hppa__)
+
+/* Because malloc only guarantees 8-byte alignment for malloc'd data,
+ and GCC only guarantees 8-byte alignment for stack locals, we can't
+ be assured of 16-byte alignment for atomic lock data even if we
+ specify "__attribute ((aligned(16)))" in the type declaration. So,
+ we use a struct containing an array of four ints for the atomic lock
+ type and dynamically select the 16-byte aligned int from the array
+ for the semaphore. */
+#define __PA_LDCW_ALIGNMENT 16
+static inline void *ldcw_align (void *p) {
+ unsigned long a = (unsigned long)p;
+ a = (a + __PA_LDCW_ALIGNMENT - 1) & ~(__PA_LDCW_ALIGNMENT - 1);
+ return (void *)a;
+}
+
+static inline int testandset (spinlock_t *p)
+{
+ unsigned int ret;
+ p = ldcw_align(p);
+ __asm__ __volatile__("ldcw 0(%1),%0"
+ : "=r" (ret)
+ : "r" (p)
+ : "memory" );
+ return !ret;
+}
+
+#elif defined(__ia64)
+
+#include <ia64intrin.h>
+
+static inline int testandset (int *p)
+{
+ return __sync_lock_test_and_set (p, 1);
+}
+#elif defined(__mips__)
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ " .set push \n"
+ " .set noat \n"
+ " .set mips2 \n"
+ "1: li $1, 1 \n"
+ " ll %0, %1 \n"
+ " sc $1, %1 \n"
+ " beqz $1, 1b \n"
+ " .set pop "
+ : "=r" (ret), "+R" (*p)
+ :
+ : "memory");
+
+ return ret;
+}
+#else
+#error unimplemented CPU support
+#endif
+
+#if defined(CONFIG_USER_ONLY)
+static inline void spin_lock(spinlock_t *lock)
+{
+ while (testandset(lock));
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+ resetlock(lock);
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ return !testandset(lock);
+}
+#else
+static inline void spin_lock(spinlock_t *lock)
+{
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ return 1;
+}
+#endif
+
+#endif