X-Git-Url: http://wien.tomnetworks.com/gitweb/?p=hs-boehmgc.git;a=blobdiff_plain;f=gc-7.2%2Flibatomic_ops%2Fsrc%2Fatomic_ops%2Fsysdeps%2Fgcc%2Fpowerpc.h;fp=gc-7.2%2Flibatomic_ops%2Fsrc%2Fatomic_ops%2Fsysdeps%2Fgcc%2Fpowerpc.h;h=83d7d39352a667d17b18b1adcde0c79c4ce0ef2f;hp=0000000000000000000000000000000000000000;hb=324587ba93dc77f37406d41fd2a20d0e0d94fb1d;hpb=2a4ea609491b225a1ceb06da70396e93916f137a diff --git a/gc-7.2/libatomic_ops/src/atomic_ops/sysdeps/gcc/powerpc.h b/gc-7.2/libatomic_ops/src/atomic_ops/sysdeps/gcc/powerpc.h new file mode 100644 index 0000000..83d7d39 --- /dev/null +++ b/gc-7.2/libatomic_ops/src/atomic_ops/sysdeps/gcc/powerpc.h @@ -0,0 +1,287 @@ +/* + * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. + * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. + * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P. + * + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED + * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program + * for any purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is granted, + * provided the above notices are retained, and a notice that the code was + * modified is included with the above copyright notice. + * + */ + +/* Memory model documented at http://www-106.ibm.com/developerworks/ */ +/* eserver/articles/archguide.html and (clearer) */ +/* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */ +/* There appears to be no implicit ordering between any kind of */ +/* independent memory references. */ +/* Architecture enforces some ordering based on control dependence. */ +/* I don't know if that could help. */ +/* Data-dependent loads are always ordered. */ +/* Based on the above references, eieio is intended for use on */ +/* uncached memory, which we don't support. It does not order loads */ +/* from cached memory. */ +/* Thanks to Maged Michael, Doug Lea, and Roger Hoover for helping to */ +/* track some of this down and correcting my misunderstandings. -HB */ +/* Earl Chew subsequently contributed further fixes & additions. */ + +#include "../all_aligned_atomic_load_store.h" + +#include "../test_and_set_t_is_ao_t.h" + /* There seems to be no byte equivalent of lwarx, so this */ + /* may really be what we want, at least in the 32-bit case. */ + +AO_INLINE void +AO_nop_full(void) +{ + __asm__ __volatile__("sync" : : : "memory"); +} +#define AO_HAVE_nop_full + +/* lwsync apparently works for everything but a StoreLoad barrier. */ +AO_INLINE void +AO_lwsync(void) +{ +#ifdef __NO_LWSYNC__ + __asm__ __volatile__("sync" : : : "memory"); +#else + __asm__ __volatile__("lwsync" : : : "memory"); +#endif +} + +#define AO_nop_write() AO_lwsync() +#define AO_HAVE_nop_write + +#define AO_nop_read() AO_lwsync() +#define AO_HAVE_nop_read + +/* We explicitly specify load_acquire, since it is important, and can */ +/* be implemented relatively cheaply. It could be implemented */ +/* with an ordinary load followed by a lwsync. But the general wisdom */ +/* seems to be that a data dependent branch followed by an isync is */ +/* cheaper. And the documentation is fairly explicit that this also */ +/* has acquire semantics. */ +/* ppc64 uses ld not lwz */ +AO_INLINE AO_t +AO_load_acquire(const volatile AO_t *addr) +{ + AO_t result; +#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) + __asm__ __volatile__ ( + "ld%U1%X1 %0,%1\n" + "cmpw %0,%0\n" + "bne- 1f\n" + "1: isync\n" + : "=r" (result) + : "m"(*addr) : "memory", "cr0"); +#else + /* FIXME: We should get gcc to allocate one of the condition */ + /* registers. I always got "impossible constraint" when I */ + /* tried the "y" constraint. */ + __asm__ __volatile__ ( + "lwz%U1%X1 %0,%1\n" + "cmpw %0,%0\n" + "bne- 1f\n" + "1: isync\n" + : "=r" (result) + : "m"(*addr) : "memory", "cc"); +#endif + return result; +} +#define AO_HAVE_load_acquire + +/* We explicitly specify store_release, since it relies */ +/* on the fact that lwsync is also a LoadStore barrier. */ +AO_INLINE void +AO_store_release(volatile AO_t *addr, AO_t value) +{ + AO_lwsync(); + *addr = value; +} +#define AO_HAVE_store_release + +/* This is similar to the code in the garbage collector. Deleting */ +/* this and having it synthesized from compare_and_swap would probably */ +/* only cost us a load immediate instruction. */ +AO_INLINE AO_TS_VAL_t +AO_test_and_set(volatile AO_TS_t *addr) { +#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) +/* Completely untested. And we should be using smaller objects anyway. */ + unsigned long oldval; + unsigned long temp = 1; /* locked value */ + + __asm__ __volatile__( + "1:ldarx %0,0,%1\n" /* load and reserve */ + "cmpdi %0, 0\n" /* if load is */ + "bne 2f\n" /* non-zero, return already set */ + "stdcx. %2,0,%1\n" /* else store conditional */ + "bne- 1b\n" /* retry if lost reservation */ + "2:\n" /* oldval is zero if we set */ + : "=&r"(oldval) + : "r"(addr), "r"(temp) + : "memory", "cr0"); +#else + int oldval; + int temp = 1; /* locked value */ + + __asm__ __volatile__( + "1:lwarx %0,0,%1\n" /* load and reserve */ + "cmpwi %0, 0\n" /* if load is */ + "bne 2f\n" /* non-zero, return already set */ + "stwcx. %2,0,%1\n" /* else store conditional */ + "bne- 1b\n" /* retry if lost reservation */ + "2:\n" /* oldval is zero if we set */ + : "=&r"(oldval) + : "r"(addr), "r"(temp) + : "memory", "cr0"); +#endif + return (AO_TS_VAL_t)oldval; +} +#define AO_HAVE_test_and_set + +AO_INLINE AO_TS_VAL_t +AO_test_and_set_acquire(volatile AO_TS_t *addr) { + AO_TS_VAL_t result = AO_test_and_set(addr); + AO_lwsync(); + return result; +} +#define AO_HAVE_test_and_set_acquire + +AO_INLINE AO_TS_VAL_t +AO_test_and_set_release(volatile AO_TS_t *addr) { + AO_lwsync(); + return AO_test_and_set(addr); +} +#define AO_HAVE_test_and_set_release + +AO_INLINE AO_TS_VAL_t +AO_test_and_set_full(volatile AO_TS_t *addr) { + AO_TS_VAL_t result; + AO_lwsync(); + result = AO_test_and_set(addr); + AO_lwsync(); + return result; +} +#define AO_HAVE_test_and_set_full + +AO_INLINE int +AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) { + AO_t oldval; + int result = 0; +#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) +/* FIXME: Completely untested. */ + __asm__ __volatile__( + "1:ldarx %0,0,%2\n" /* load and reserve */ + "cmpd %0, %4\n" /* if load is not equal to */ + "bne 2f\n" /* old, fail */ + "stdcx. %3,0,%2\n" /* else store conditional */ + "bne- 1b\n" /* retry if lost reservation */ + "li %1,1\n" /* result = 1; */ + "2:\n" + : "=&r"(oldval), "=&r"(result) + : "r"(addr), "r"(new_val), "r"(old), "1"(result) + : "memory", "cr0"); +#else + __asm__ __volatile__( + "1:lwarx %0,0,%2\n" /* load and reserve */ + "cmpw %0, %4\n" /* if load is not equal to */ + "bne 2f\n" /* old, fail */ + "stwcx. %3,0,%2\n" /* else store conditional */ + "bne- 1b\n" /* retry if lost reservation */ + "li %1,1\n" /* result = 1; */ + "2:\n" + : "=&r"(oldval), "=&r"(result) + : "r"(addr), "r"(new_val), "r"(old), "1"(result) + : "memory", "cr0"); +#endif + return result; +} +#define AO_HAVE_compare_and_swap + +AO_INLINE int +AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) { + int result = AO_compare_and_swap(addr, old, new_val); + AO_lwsync(); + return result; +} +#define AO_HAVE_compare_and_swap_acquire + +AO_INLINE int +AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) { + AO_lwsync(); + return AO_compare_and_swap(addr, old, new_val); +} +#define AO_HAVE_compare_and_swap_release + +AO_INLINE int +AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { + int result; + AO_lwsync(); + result = AO_compare_and_swap(addr, old, new_val); + AO_lwsync(); + return result; +} +#define AO_HAVE_compare_and_swap_full + +AO_INLINE AO_t +AO_fetch_and_add(volatile AO_t *addr, AO_t incr) { + AO_t oldval; + AO_t newval; +#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) +/* FIXME: Completely untested. */ + __asm__ __volatile__( + "1:ldarx %0,0,%2\n" /* load and reserve */ + "add %1,%0,%3\n" /* increment */ + "stdcx. %1,0,%2\n" /* store conditional */ + "bne- 1b\n" /* retry if lost reservation */ + : "=&r"(oldval), "=&r"(newval) + : "r"(addr), "r"(incr) + : "memory", "cr0"); +#else + __asm__ __volatile__( + "1:lwarx %0,0,%2\n" /* load and reserve */ + "add %1,%0,%3\n" /* increment */ + "stwcx. %1,0,%2\n" /* store conditional */ + "bne- 1b\n" /* retry if lost reservation */ + : "=&r"(oldval), "=&r"(newval) + : "r"(addr), "r"(incr) + : "memory", "cr0"); +#endif + return oldval; +} +#define AO_HAVE_fetch_and_add + +AO_INLINE AO_t +AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) { + AO_t result = AO_fetch_and_add(addr, incr); + AO_lwsync(); + return result; +} +#define AO_HAVE_fetch_and_add_acquire + +AO_INLINE AO_t +AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) { + AO_lwsync(); + return AO_fetch_and_add(addr, incr); +} +#define AO_HAVE_fetch_and_add_release + +AO_INLINE AO_t +AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) { + AO_t result; + AO_lwsync(); + result = AO_fetch_and_add(addr, incr); + AO_lwsync(); + return result; +} +#define AO_HAVE_fetch_and_add_full + +#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) +#else +# include "../ao_t_is_int.h" +#endif