diff options
-rw-r--r-- | cpu-defs.h | 1 | ||||
-rw-r--r-- | cpu-exec.c | 25 | ||||
-rw-r--r-- | exec-all.h | 212 | ||||
-rw-r--r-- | exec.c | 13 | ||||
-rw-r--r-- | linux-user/elfload.c | 4 | ||||
-rw-r--r-- | linux-user/main.c | 146 | ||||
-rw-r--r-- | linux-user/mmap.c | 16 | ||||
-rw-r--r-- | linux-user/qemu.h | 14 | ||||
-rw-r--r-- | linux-user/signal.c | 4 | ||||
-rw-r--r-- | qemu-lock.h | 249 |
10 files changed, 447 insertions, 237 deletions
diff --git a/cpu-defs.h b/cpu-defs.h index f7f5f1730d..5804521a7e 100644 --- a/cpu-defs.h +++ b/cpu-defs.h @@ -166,6 +166,7 @@ typedef struct CPUTLBEntry { \ void *next_cpu; /* next CPU sharing TB cache */ \ int cpu_index; /* CPU index (informative) */ \ + int running; /* Nonzero if cpu is currently running(usermode). */ \ /* user data */ \ void *opaque; \ \ diff --git a/cpu-exec.c b/cpu-exec.c index a8e67e8add..3a1ff4e077 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -44,7 +44,6 @@ #endif int tb_invalidated_flag; -static unsigned long next_tb; //#define DEBUG_EXEC //#define DEBUG_SIGNAL @@ -93,8 +92,6 @@ static TranslationBlock *tb_find_slow(target_ulong pc, target_ulong phys_pc, phys_page1, phys_page2, virt_page2; uint8_t *tc_ptr; - spin_lock(&tb_lock); - tb_invalidated_flag = 0; regs_to_env(); /* XXX: do it just before cpu_gen_code() */ @@ -155,7 +152,6 @@ static TranslationBlock *tb_find_slow(target_ulong pc, found: /* we add the TB in the virtual pc hash table */ env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; - spin_unlock(&tb_lock); return tb; } @@ -228,14 +224,6 @@ static inline TranslationBlock *tb_find_fast(void) if (__builtin_expect(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags, 0)) { tb = tb_find_slow(pc, cs_base, flags); - /* Note: we do it here to avoid a gcc bug on Mac OS X when - doing it in tb_find_slow */ - if (tb_invalidated_flag) { - /* as some TB could have been invalidated because - of memory exceptions while generating the code, we - must recompute the hash index here */ - next_tb = 0; - } } return tb; } @@ -249,6 +237,7 @@ int cpu_exec(CPUState *env1) int ret, interrupt_request; TranslationBlock *tb; uint8_t *tc_ptr; + unsigned long next_tb; if (cpu_halted(env1) == EXCP_HALTED) return EXCP_HALTED; @@ -577,7 +566,16 @@ int cpu_exec(CPUState *env1) #endif } #endif + spin_lock(&tb_lock); tb = tb_find_fast(); + /* Note: we do it here to avoid a gcc bug on Mac OS X when + doing it in tb_find_slow */ + if (tb_invalidated_flag) { + /* as some TB could have been invalidated because + of memory exceptions while generating the code, we + must recompute the hash index here */ + next_tb = 0; + } #ifdef DEBUG_EXEC if ((loglevel & CPU_LOG_EXEC)) { fprintf(logfile, "Trace 0x%08lx [" TARGET_FMT_lx "] %s\n", @@ -594,11 +592,10 @@ int cpu_exec(CPUState *env1) (env->kqemu_enabled != 2) && #endif tb->page_addr[1] == -1) { - spin_lock(&tb_lock); tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb); - spin_unlock(&tb_lock); } } + spin_unlock(&tb_lock); tc_ptr = tb->tc_ptr; env->current_tb = tb; /* execute the generated code */ diff --git a/exec-all.h b/exec-all.h index ba6f6da5a0..62a9394873 100644 --- a/exec-all.h +++ b/exec-all.h @@ -302,217 +302,7 @@ extern CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4]; extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4]; extern void *io_mem_opaque[IO_MEM_NB_ENTRIES]; -#if defined(__hppa__) - -typedef int spinlock_t[4]; - -#define SPIN_LOCK_UNLOCKED { 1, 1, 1, 1 } - -static inline void resetlock (spinlock_t *p) -{ - (*p)[0] = (*p)[1] = (*p)[2] = (*p)[3] = 1; -} - -#else - -typedef int spinlock_t; - -#define SPIN_LOCK_UNLOCKED 0 - -static inline void resetlock (spinlock_t *p) -{ - *p = SPIN_LOCK_UNLOCKED; -} - -#endif - -#if defined(__powerpc__) -static inline int testandset (int *p) -{ - int ret; - __asm__ __volatile__ ( - "0: lwarx %0,0,%1\n" - " xor. %0,%3,%0\n" - " bne 1f\n" - " stwcx. %2,0,%1\n" - " bne- 0b\n" - "1: " - : "=&r" (ret) - : "r" (p), "r" (1), "r" (0) - : "cr0", "memory"); - return ret; -} -#elif defined(__i386__) -static inline int testandset (int *p) -{ - long int readval = 0; - - __asm__ __volatile__ ("lock; cmpxchgl %2, %0" - : "+m" (*p), "+a" (readval) - : "r" (1) - : "cc"); - return readval; -} -#elif defined(__x86_64__) -static inline int testandset (int *p) -{ - long int readval = 0; - - __asm__ __volatile__ ("lock; cmpxchgl %2, %0" - : "+m" (*p), "+a" (readval) - : "r" (1) - : "cc"); - return readval; -} -#elif defined(__s390__) -static inline int testandset (int *p) -{ - int ret; - - __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n" - " jl 0b" - : "=&d" (ret) - : "r" (1), "a" (p), "0" (*p) - : "cc", "memory" ); - return ret; -} -#elif defined(__alpha__) -static inline int testandset (int *p) -{ - int ret; - unsigned long one; - - __asm__ __volatile__ ("0: mov 1,%2\n" - " ldl_l %0,%1\n" - " stl_c %2,%1\n" - " beq %2,1f\n" - ".subsection 2\n" - "1: br 0b\n" - ".previous" - : "=r" (ret), "=m" (*p), "=r" (one) - : "m" (*p)); - return ret; -} -#elif defined(__sparc__) -static inline int testandset (int *p) -{ - int ret; - - __asm__ __volatile__("ldstub [%1], %0" - : "=r" (ret) - : "r" (p) - : "memory"); - - return (ret ? 1 : 0); -} -#elif defined(__arm__) -static inline int testandset (int *spinlock) -{ - register unsigned int ret; - __asm__ __volatile__("swp %0, %1, [%2]" - : "=r"(ret) - : "0"(1), "r"(spinlock)); - - return ret; -} -#elif defined(__mc68000) -static inline int testandset (int *p) -{ - char ret; - __asm__ __volatile__("tas %1; sne %0" - : "=r" (ret) - : "m" (p) - : "cc","memory"); - return ret; -} -#elif defined(__hppa__) - -/* Because malloc only guarantees 8-byte alignment for malloc'd data, - and GCC only guarantees 8-byte alignment for stack locals, we can't - be assured of 16-byte alignment for atomic lock data even if we - specify "__attribute ((aligned(16)))" in the type declaration. So, - we use a struct containing an array of four ints for the atomic lock - type and dynamically select the 16-byte aligned int from the array - for the semaphore. */ -#define __PA_LDCW_ALIGNMENT 16 -static inline void *ldcw_align (void *p) { - unsigned long a = (unsigned long)p; - a = (a + __PA_LDCW_ALIGNMENT - 1) & ~(__PA_LDCW_ALIGNMENT - 1); - return (void *)a; -} - -static inline int testandset (spinlock_t *p) -{ - unsigned int ret; - p = ldcw_align(p); - __asm__ __volatile__("ldcw 0(%1),%0" - : "=r" (ret) - : "r" (p) - : "memory" ); - return !ret; -} - -#elif defined(__ia64) - -#include <ia64intrin.h> - -static inline int testandset (int *p) -{ - return __sync_lock_test_and_set (p, 1); -} -#elif defined(__mips__) -static inline int testandset (int *p) -{ - int ret; - - __asm__ __volatile__ ( - " .set push \n" - " .set noat \n" - " .set mips2 \n" - "1: li $1, 1 \n" - " ll %0, %1 \n" - " sc $1, %1 \n" - " beqz $1, 1b \n" - " .set pop " - : "=r" (ret), "+R" (*p) - : - : "memory"); - - return ret; -} -#else -#error unimplemented CPU support -#endif - -#if defined(CONFIG_USER_ONLY) -static inline void spin_lock(spinlock_t *lock) -{ - while (testandset(lock)); -} - -static inline void spin_unlock(spinlock_t *lock) -{ - resetlock(lock); -} - -static inline int spin_trylock(spinlock_t *lock) -{ - return !testandset(lock); -} -#else -static inline void spin_lock(spinlock_t *lock) -{ -} - -static inline void spin_unlock(spinlock_t *lock) -{ -} - -static inline int spin_trylock(spinlock_t *lock) -{ - return 1; -} -#endif +#include "qemu-lock.h" extern spinlock_t tb_lock; @@ -1341,10 +1341,20 @@ void cpu_set_log_filename(const char *filename) /* mask must never be zero, except for A20 change call */ void cpu_interrupt(CPUState *env, int mask) { +#if !defined(USE_NPTL) TranslationBlock *tb; static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED; +#endif + /* FIXME: This is probably not threadsafe. A different thread could + be in the mittle of a read-modify-write operation. */ env->interrupt_request |= mask; +#if defined(USE_NPTL) + /* FIXME: TB unchaining isn't SMP safe. For now just ignore the + problem and hope the cpu will stop of its own accord. For userspace + emulation this often isn't actually as bad as it sounds. Often + signals are used primarily to interrupt blocking syscalls. */ +#else /* if the cpu is currently executing code, we must unlink it and all the potentially executing TB */ tb = env->current_tb; @@ -1353,6 +1363,7 @@ void cpu_interrupt(CPUState *env, int mask) tb_reset_jump_recursive(tb); resetlock(&interrupt_lock); } +#endif } void cpu_reset_interrupt(CPUState *env, int mask) @@ -2015,7 +2026,6 @@ void page_set_flags(target_ulong start, target_ulong end, int flags) end = TARGET_PAGE_ALIGN(end); if (flags & PAGE_WRITE) flags |= PAGE_WRITE_ORG; - spin_lock(&tb_lock); for(addr = start; addr < end; addr += TARGET_PAGE_SIZE) { p = page_find_alloc(addr >> TARGET_PAGE_BITS); /* if the write protection is set, then we invalidate the code @@ -2027,7 +2037,6 @@ void page_set_flags(target_ulong start, target_ulong end, int flags) } p->flags = flags; } - spin_unlock(&tb_lock); } int page_check_range(target_ulong start, target_ulong len, int flags) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index ca0023e62e..67b7535d4c 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -89,7 +89,7 @@ enum { static const char *get_elf_platform(void) { static char elf_platform[] = "i386"; - int family = (global_env->cpuid_version >> 8) & 0xff; + int family = (thread_env->cpuid_version >> 8) & 0xff; if (family > 6) family = 6; if (family >= 3) @@ -101,7 +101,7 @@ static const char *get_elf_platform(void) static uint32_t get_elf_hwcap(void) { - return global_env->cpuid_features; + return thread_env->cpuid_features; } #ifdef TARGET_X86_64 diff --git a/linux-user/main.c b/linux-user/main.c index 4bdec7e9bc..1f68766f0f 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -26,6 +26,8 @@ #include "qemu.h" #include "qemu-common.h" +/* For tb_lock */ +#include "exec-all.h" #define DEBUG_LOGFILE "/tmp/qemu.log" @@ -123,6 +125,135 @@ int64_t cpu_get_real_ticks(void) #endif +#if defined(USE_NPTL) +/***********************************************************/ +/* Helper routines for implementing atomic operations. */ + +/* To implement exclusive operations we force all cpus to syncronise. + We don't require a full sync, only that no cpus are executing guest code. + The alternative is to map target atomic ops onto host equivalents, + which requires quite a lot of per host/target work. */ +static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER; +static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER; +static int pending_cpus; + +/* Make sure everything is in a consistent state for calling fork(). */ +void fork_start(void) +{ + mmap_fork_start(); + pthread_mutex_lock(&tb_lock); + pthread_mutex_lock(&exclusive_lock); +} + +void fork_end(int child) +{ + if (child) { + /* Child processes created by fork() only have a single thread. + Discard information about the parent threads. */ + first_cpu = thread_env; + thread_env->next_cpu = NULL; + pending_cpus = 0; + pthread_mutex_init(&exclusive_lock, NULL); + pthread_cond_init(&exclusive_cond, NULL); + pthread_cond_init(&exclusive_resume, NULL); + pthread_mutex_init(&tb_lock, NULL); + } else { + pthread_mutex_unlock(&exclusive_lock); + pthread_mutex_unlock(&tb_lock); + } + mmap_fork_end(child); +} + +/* Wait for pending exclusive operations to complete. The exclusive lock + must be held. */ +static inline void exclusive_idle(void) +{ + while (pending_cpus) { + pthread_cond_wait(&exclusive_resume, &exclusive_lock); + } +} + +/* Start an exclusive operation. + Must only be called from outside cpu_arm_exec. */ +static inline void start_exclusive(void) +{ + CPUState *other; + pthread_mutex_lock(&exclusive_lock); + exclusive_idle(); + + pending_cpus = 1; + /* Make all other cpus stop executing. */ + for (other = first_cpu; other; other = other->next_cpu) { + if (other->running) { + pending_cpus++; + cpu_interrupt(other, CPU_INTERRUPT_EXIT); + } + } + if (pending_cpus > 1) { + pthread_cond_wait(&exclusive_cond, &exclusive_lock); + } +} + +/* Finish an exclusive operation. */ +static inline void end_exclusive(void) +{ + pending_cpus = 0; + pthread_cond_broadcast(&exclusive_resume); + pthread_mutex_unlock(&exclusive_lock); +} + +/* Wait for exclusive ops to finish, and begin cpu execution. */ +static inline void cpu_exec_start(CPUState *env) +{ + pthread_mutex_lock(&exclusive_lock); + exclusive_idle(); + env->running = 1; + pthread_mutex_unlock(&exclusive_lock); +} + +/* Mark cpu as not executing, and release pending exclusive ops. */ +static inline void cpu_exec_end(CPUState *env) +{ + pthread_mutex_lock(&exclusive_lock); + env->running = 0; + if (pending_cpus > 1) { + pending_cpus--; + if (pending_cpus == 1) { + pthread_cond_signal(&exclusive_cond); + } + } + exclusive_idle(); + pthread_mutex_unlock(&exclusive_lock); +} +#else /* if !USE_NPTL */ +/* These are no-ops because we are not threadsafe. */ +static inline void cpu_exec_start(CPUState *env) +{ +} + +static inline void cpu_exec_end(CPUState *env) +{ +} + +static inline void start_exclusive(void) +{ +} + +static inline void end_exclusive(void) +{ +} + +void fork_start(void) +{ +} + +void fork_end(int child) +{ +} +#endif + + #ifdef TARGET_I386 /***********************************************************/ /* CPUX86 core interface */ @@ -378,8 +509,11 @@ do_kernel_trap(CPUARMState *env) /* ??? No-op. Will need to do better for SMP. */ break; case 0xffff0fc0: /* __kernel_cmpxchg */ - /* ??? This is not really atomic. However we don't support - threads anyway, so it doesn't realy matter. */ + /* XXX: This only works between threads, not between processes. + It's probably possible to implement this with native host + operations. However things like ldrex/strex are much harder so + there's not much point trying. */ + start_exclusive(); cpsr = cpsr_read(env); addr = env->regs[2]; /* FIXME: This should SEGV if the access fails. */ @@ -396,6 +530,7 @@ do_kernel_trap(CPUARMState *env) cpsr &= ~CPSR_C; } cpsr_write(env, cpsr, CPSR_C); + end_exclusive(); break; case 0xffff0fe0: /* __kernel_get_tls */ env->regs[0] = env->cp15.c13_tls2; @@ -422,7 +557,9 @@ void cpu_loop(CPUARMState *env) uint32_t addr; for(;;) { + cpu_exec_start(env); trapnr = cpu_arm_exec(env); + cpu_exec_end(env); switch(trapnr) { case EXCP_UDEF: { @@ -2044,8 +2181,7 @@ void usage(void) _exit(1); } -/* XXX: currently only used for async signals (see signal.c) */ -CPUState *global_env; +THREAD CPUState *thread_env; void init_task_state(TaskState *ts) { @@ -2203,7 +2339,7 @@ int main(int argc, char **argv) fprintf(stderr, "Unable to find CPU definition\n"); exit(1); } - global_env = env; + thread_env = env; if (getenv("QEMU_STRACE")) { do_strace = 1; diff --git a/linux-user/mmap.c b/linux-user/mmap.c index c0821386d9..b4ca1074b3 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -46,6 +46,22 @@ void mmap_unlock(void) pthread_mutex_unlock(&mmap_mutex); } } + +/* Grab lock to make sure things are in a consistent state after fork(). */ +void mmap_fork_start(void) +{ + if (mmap_lock_count) + abort(); + pthread_mutex_lock(&mmap_mutex); +} + +void mmap_fork_end(int child) +{ + if (child) + pthread_mutex_init(&mmap_mutex, NULL); + else + pthread_mutex_unlock(&mmap_mutex); +} #else /* We aren't threadsafe to start with, so no need to worry about locking. */ void mmap_lock(void) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 81f7fb290e..d3a3c3c20f 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -37,6 +37,12 @@ typedef target_long abi_long; #include "target_signal.h" #include "gdbstub.h" +#if defined(USE_NPTL) +#define THREAD __thread +#else +#define THREAD +#endif + /* This struct is used to hold certain information about the image. * Basically, it replicates in user space what would be certain * task_struct fields in the kernel @@ -184,12 +190,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6); void gemu_log(const char *fmt, ...) __attribute__((format(printf,1,2))); -extern CPUState *global_env; +extern THREAD CPUState *thread_env; void cpu_loop(CPUState *env); void init_paths(const char *prefix); const char *path(const char *pathname); char *target_strerror(int err); int get_osversion(void); +void fork_start(void); +void fork_end(int child); extern int loglevel; extern FILE *logfile; @@ -235,6 +243,10 @@ int target_msync(abi_ulong start, abi_ulong len, int flags); extern unsigned long last_brk; void mmap_lock(void); void mmap_unlock(void); +#if defined(USE_NPTL) +void mmap_fork_start(void); +void mmap_fork_end(int child); +#endif /* user access */ diff --git a/linux-user/signal.c b/linux-user/signal.c index e6e1a08267..623a5e31c4 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -424,9 +424,9 @@ static void host_signal_handler(int host_signum, siginfo_t *info, fprintf(stderr, "qemu: got signal %d\n", sig); #endif host_to_target_siginfo_noswap(&tinfo, info); - if (queue_signal(global_env, sig, &tinfo) == 1) { + if (queue_signal(thread_env, sig, &tinfo) == 1) { /* interrupt the virtual CPU as soon as possible */ - cpu_interrupt(global_env, CPU_INTERRUPT_EXIT); + cpu_interrupt(thread_env, CPU_INTERRUPT_EXIT); } } diff --git a/qemu-lock.h b/qemu-lock.h new file mode 100644 index 0000000000..fdd8da9435 --- /dev/null +++ b/qemu-lock.h @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Locking primitives. Most of this code should be redundant - + system emulation doesn't need/use locking, NPTL userspace uses + pthread mutexes, and non-NPTL userspace isn't threadsafe anyway. + In either case a spinlock is probably the wrong kind of lock. + Spinlocks are only good if you know annother CPU has the lock and is + likely to release it soon. In environments where you have more threads + than physical CPUs (the extreme case being a single CPU host) a spinlock + simply wastes CPU until the OS decides to preempt it. */ +#if defined(USE_NPTL) + +#include <pthread.h> +#define spin_lock pthread_mutex_lock +#define spin_unlock pthread_mutex_unlock +#define spinlock_t pthread_mutex_t +#define SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER + +#else + +#if defined(__hppa__) + +typedef int spinlock_t[4]; + +#define SPIN_LOCK_UNLOCKED { 1, 1, 1, 1 } + +static inline void resetlock (spinlock_t *p) +{ + (*p)[0] = (*p)[1] = (*p)[2] = (*p)[3] = 1; +} + +#else + +typedef int spinlock_t; + +#define SPIN_LOCK_UNLOCKED 0 + +static inline void resetlock (spinlock_t *p) +{ + *p = SPIN_LOCK_UNLOCKED; +} + +#endif + +#if defined(__powerpc__) +static inline int testandset (int *p) +{ + int ret; + __asm__ __volatile__ ( + "0: lwarx %0,0,%1\n" + " xor. %0,%3,%0\n" + " bne 1f\n" + " stwcx. %2,0,%1\n" + " bne- 0b\n" + "1: " + : "=&r" (ret) + : "r" (p), "r" (1), "r" (0) + : "cr0", "memory"); + return ret; +} +#elif defined(__i386__) +static inline int testandset (int *p) +{ + long int readval = 0; + + __asm__ __volatile__ ("lock; cmpxchgl %2, %0" + : "+m" (*p), "+a" (readval) + : "r" (1) + : "cc"); + return readval; +} +#elif defined(__x86_64__) +static inline int testandset (int *p) +{ + long int readval = 0; + + __asm__ __volatile__ ("lock; cmpxchgl %2, %0" + : "+m" (*p), "+a" (readval) + : "r" (1) + : "cc"); + return readval; +} +#elif defined(__s390__) +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n" + " jl 0b" + : "=&d" (ret) + : "r" (1), "a" (p), "0" (*p) + : "cc", "memory" ); + return ret; +} +#elif defined(__alpha__) +static inline int testandset (int *p) +{ + int ret; + unsigned long one; + + __asm__ __volatile__ ("0: mov 1,%2\n" + " ldl_l %0,%1\n" + " stl_c %2,%1\n" + " beq %2,1f\n" + ".subsection 2\n" + "1: br 0b\n" + ".previous" + : "=r" (ret), "=m" (*p), "=r" (one) + : "m" (*p)); + return ret; +} +#elif defined(__sparc__) +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__("ldstub [%1], %0" + : "=r" (ret) + : "r" (p) + : "memory"); + + return (ret ? 1 : 0); +} +#elif defined(__arm__) +static inline int testandset (int *spinlock) +{ + register unsigned int ret; + __asm__ __volatile__("swp %0, %1, [%2]" + : "=r"(ret) + : "0"(1), "r"(spinlock)); + + return ret; +} +#elif defined(__mc68000) +static inline int testandset (int *p) +{ + char ret; + __asm__ __volatile__("tas %1; sne %0" + : "=r" (ret) + : "m" (p) + : "cc","memory"); + return ret; +} +#elif defined(__hppa__) + +/* Because malloc only guarantees 8-byte alignment for malloc'd data, + and GCC only guarantees 8-byte alignment for stack locals, we can't + be assured of 16-byte alignment for atomic lock data even if we + specify "__attribute ((aligned(16)))" in the type declaration. So, + we use a struct containing an array of four ints for the atomic lock + type and dynamically select the 16-byte aligned int from the array + for the semaphore. */ +#define __PA_LDCW_ALIGNMENT 16 +static inline void *ldcw_align (void *p) { + unsigned long a = (unsigned long)p; + a = (a + __PA_LDCW_ALIGNMENT - 1) & ~(__PA_LDCW_ALIGNMENT - 1); + return (void *)a; +} + +static inline int testandset (spinlock_t *p) +{ + unsigned int ret; + p = ldcw_align(p); + __asm__ __volatile__("ldcw 0(%1),%0" + : "=r" (ret) + : "r" (p) + : "memory" ); + return !ret; +} + +#elif defined(__ia64) + +#include <ia64intrin.h> + +static inline int testandset (int *p) +{ + return __sync_lock_test_and_set (p, 1); +} +#elif defined(__mips__) +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__ ( + " .set push \n" + " .set noat \n" + " .set mips2 \n" + "1: li $1, 1 \n" + " ll %0, %1 \n" + " sc $1, %1 \n" + " beqz $1, 1b \n" + " .set pop " + : "=r" (ret), "+R" (*p) + : + : "memory"); + + return ret; +} +#else +#error unimplemented CPU support +#endif + +#if defined(CONFIG_USER_ONLY) +static inline void spin_lock(spinlock_t *lock) +{ + while (testandset(lock)); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + resetlock(lock); +} + +static inline int spin_trylock(spinlock_t *lock) +{ + return !testandset(lock); +} +#else +static inline void spin_lock(spinlock_t *lock) +{ +} + +static inline void spin_unlock(spinlock_t *lock) +{ +} + +static inline int spin_trylock(spinlock_t *lock) +{ + return 1; +} +#endif + +#endif |