Replace more __ARM_ARCH_... checks with HAVE_ARMV<N>.
[mono.git] / mono / utils / atomic.h
1 /*
2  * atomic.h:  Atomic operations
3  *
4  * Author:
5  *      Dick Porter (dick@ximian.com)
6  *
7  * (C) 2002 Ximian, Inc.
8  * Copyright 2012 Xamarin Inc
9  */
10
11 #ifndef _WAPI_ATOMIC_H_
12 #define _WAPI_ATOMIC_H_
13
14 #if defined(__NetBSD__)
15 #include <sys/param.h>
16
17 #if __NetBSD_Version__ > 499004000
18 #include <sys/atomic.h>
19 #define HAVE_ATOMIC_OPS
20 #endif
21
22 #endif
23
24 #include "config.h"
25 #include <glib.h>
26
27 #if defined(__WIN32__) || defined(_WIN32)
28
29 #include <windows.h>
30
31 #elif defined(__NetBSD__) && defined(HAVE_ATOMIC_OPS)
32
33 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
34        gint32 exch, gint32 comp)
35 {
36        return atomic_cas_32((uint32_t*)dest, comp, exch);
37 }
38
39 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
40 {
41        return atomic_cas_ptr(dest, comp, exch);
42 }
43
44 static inline gint32 InterlockedIncrement(volatile gint32 *val)
45 {
46        return atomic_inc_32_nv((uint32_t*)val);
47 }
48
49 static inline gint32 InterlockedDecrement(volatile gint32 *val)
50 {
51        return atomic_dec_32_nv((uint32_t*)val);
52 }
53
54 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
55 {
56        return atomic_swap_32((uint32_t*)val, new_val);
57 }
58
59 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
60                gpointer new_val)
61 {
62        return atomic_swap_ptr(val, new_val);
63 }
64
65 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
66 {
67        return atomic_add_32_nv((uint32_t*)val, add) - add;
68 }
69
70 #elif defined(__i386__) || defined(__x86_64__)
71
72 /*
73  * NB: The *Pointer() functions here assume that
74  * sizeof(pointer)==sizeof(gint32)
75  *
76  * NB2: These asm functions assume 486+ (some of the opcodes dont
77  * exist on 386).  If this becomes an issue, we can get configure to
78  * fall back to the non-atomic C versions of these calls.
79  */
80
81 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
82                                                 gint32 exch, gint32 comp)
83 {
84         gint32 old;
85
86         __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
87                               : "=m" (*dest), "=a" (old)
88                               : "r" (exch), "m" (*dest), "a" (comp));   
89         return(old);
90 }
91
92 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
93 {
94         gpointer old;
95
96         __asm__ __volatile__ ("lock; "
97 #if defined(__x86_64__)  && !defined(__native_client__)
98                               "cmpxchgq"
99 #else
100                               "cmpxchgl"
101 #endif
102                               " %2, %0"
103                               : "=m" (*dest), "=a" (old)
104                               : "r" (exch), "m" (*dest), "a" (comp));   
105
106         return(old);
107 }
108
109 static inline gint32 InterlockedIncrement(volatile gint32 *val)
110 {
111         gint32 tmp;
112         
113         __asm__ __volatile__ ("lock; xaddl %0, %1"
114                               : "=r" (tmp), "=m" (*val)
115                               : "0" (1), "m" (*val));
116
117         return(tmp+1);
118 }
119
120 static inline gint32 InterlockedDecrement(volatile gint32 *val)
121 {
122         gint32 tmp;
123         
124         __asm__ __volatile__ ("lock; xaddl %0, %1"
125                               : "=r" (tmp), "=m" (*val)
126                               : "0" (-1), "m" (*val));
127
128         return(tmp-1);
129 }
130
131 /*
132  * See
133  * http://msdn.microsoft.com/msdnmag/issues/0700/Win32/
134  * for the reasons for using cmpxchg and a loop here.
135  */
136 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
137 {
138         gint32 ret;
139
140         __asm__ __volatile__ ("1:; lock; cmpxchgl %2, %0; jne 1b"
141                               : "=m" (*val), "=a" (ret)
142                               : "r" (new_val), "m" (*val), "a" (*val));
143         return(ret);
144 }
145
146 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
147                                                   gpointer new_val)
148 {
149         gpointer ret;
150         
151         __asm__ __volatile__ ("1:; lock; "
152 #if defined(__x86_64__)  && !defined(__native_client__)
153                               "cmpxchgq"
154 #else
155                               "cmpxchgl"
156 #endif
157                               " %2, %0; jne 1b"
158                               : "=m" (*val), "=a" (ret)
159                               : "r" (new_val), "m" (*val), "a" (*val));
160
161         return(ret);
162 }
163
164 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
165 {
166         gint32 ret;
167         
168         __asm__ __volatile__ ("lock; xaddl %0, %1"
169                               : "=r" (ret), "=m" (*val)
170                               : "0" (add), "m" (*val));
171         
172         return(ret);
173 }
174
175 #elif (defined(sparc) || defined (__sparc__)) && defined(__GNUC__)
176
177 G_GNUC_UNUSED 
178 static inline gint32 InterlockedCompareExchange(volatile gint32 *_dest, gint32 _exch, gint32 _comp)
179 {
180        register volatile gint32 *dest asm("g1") = _dest;
181        register gint32 comp asm("o4") = _comp;
182        register gint32 exch asm("o5") = _exch;
183
184        __asm__ __volatile__(
185                /* cas [%%g1], %%o4, %%o5 */
186                ".word 0xdbe0500c"
187                : "=r" (exch)
188                : "0" (exch), "r" (dest), "r" (comp)
189                : "memory");
190
191        return exch;
192 }
193
194 G_GNUC_UNUSED 
195 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *_dest, gpointer _exch, gpointer _comp)
196 {
197        register volatile gpointer *dest asm("g1") = _dest;
198        register gpointer comp asm("o4") = _comp;
199        register gpointer exch asm("o5") = _exch;
200
201        __asm__ __volatile__(
202 #ifdef SPARCV9
203                /* casx [%%g1], %%o4, %%o5 */
204                ".word 0xdbf0500c"
205 #else
206                /* cas [%%g1], %%o4, %%o5 */
207                ".word 0xdbe0500c"
208 #endif
209                : "=r" (exch)
210                : "0" (exch), "r" (dest), "r" (comp)
211                : "memory");
212
213        return exch;
214 }
215
216 G_GNUC_UNUSED 
217 static inline gint32 InterlockedIncrement(volatile gint32 *_dest)
218 {
219        register volatile gint32 *dest asm("g1") = _dest;
220        register gint32 tmp asm("o4");
221        register gint32 ret asm("o5");
222
223        __asm__ __volatile__(
224                "1:     ld      [%%g1], %%o4\n\t"
225                "       add     %%o4, 1, %%o5\n\t"
226                /*      cas     [%%g1], %%o4, %%o5 */
227                "       .word   0xdbe0500c\n\t"
228                "       cmp     %%o4, %%o5\n\t"
229                "       bne     1b\n\t"
230                "        add    %%o5, 1, %%o5"
231                : "=&r" (tmp), "=&r" (ret)
232                : "r" (dest)
233                : "memory", "cc");
234
235         return ret;
236 }
237
238 G_GNUC_UNUSED 
239 static inline gint32 InterlockedDecrement(volatile gint32 *_dest)
240 {
241        register volatile gint32 *dest asm("g1") = _dest;
242        register gint32 tmp asm("o4");
243        register gint32 ret asm("o5");
244
245        __asm__ __volatile__(
246                "1:     ld      [%%g1], %%o4\n\t"
247                "       sub     %%o4, 1, %%o5\n\t"
248                /*      cas     [%%g1], %%o4, %%o5 */
249                "       .word   0xdbe0500c\n\t"
250                "       cmp     %%o4, %%o5\n\t"
251                "       bne     1b\n\t"
252                "        sub    %%o5, 1, %%o5"
253                : "=&r" (tmp), "=&r" (ret)
254                : "r" (dest)
255                : "memory", "cc");
256
257         return ret;
258 }
259
260 G_GNUC_UNUSED
261 static inline gint32 InterlockedExchange(volatile gint32 *_dest, gint32 exch)
262 {
263        register volatile gint32 *dest asm("g1") = _dest;
264        register gint32 tmp asm("o4");
265        register gint32 ret asm("o5");
266
267        __asm__ __volatile__(
268                "1:     ld      [%%g1], %%o4\n\t"
269                "       mov     %3, %%o5\n\t"
270                /*      cas     [%%g1], %%o4, %%o5 */
271                "       .word   0xdbe0500c\n\t"
272                "       cmp     %%o4, %%o5\n\t"
273                "       bne     1b\n\t"
274                "        nop"
275                : "=&r" (tmp), "=&r" (ret)
276                : "r" (dest), "r" (exch)
277                : "memory", "cc");
278
279         return ret;
280 }
281
282 G_GNUC_UNUSED
283 static inline gpointer InterlockedExchangePointer(volatile gpointer *_dest, gpointer exch)
284 {
285        register volatile gpointer *dest asm("g1") = _dest;
286        register gpointer tmp asm("o4");
287        register gpointer ret asm("o5");
288
289        __asm__ __volatile__(
290 #ifdef SPARCV9
291                "1:     ldx     [%%g1], %%o4\n\t"
292 #else
293                "1:     ld      [%%g1], %%o4\n\t"
294 #endif
295                "       mov     %3, %%o5\n\t"
296 #ifdef SPARCV9
297                /*      casx    [%%g1], %%o4, %%o5 */
298                "       .word   0xdbf0500c\n\t"
299 #else
300                /*      cas     [%%g1], %%o4, %%o5 */
301                "       .word   0xdbe0500c\n\t"
302 #endif
303                "       cmp     %%o4, %%o5\n\t"
304                "       bne     1b\n\t"
305                "        nop"
306                : "=&r" (tmp), "=&r" (ret)
307                : "r" (dest), "r" (exch)
308                : "memory", "cc");
309
310         return ret;
311 }
312
313 G_GNUC_UNUSED
314 static inline gint32 InterlockedExchangeAdd(volatile gint32 *_dest, gint32 add)
315 {
316        register volatile gint32 *dest asm("g1") = _dest;
317        register gint32 tmp asm("o4");
318        register gint32 ret asm("o5");
319
320        __asm__ __volatile__(
321                "1:     ld      [%%g1], %%o4\n\t"
322                "       add     %%o4, %3, %%o5\n\t"
323                /*      cas     [%%g1], %%o4, %%o5 */
324                "       .word   0xdbe0500c\n\t"
325                "       cmp     %%o4, %%o5\n\t"
326                "       bne     1b\n\t"
327                "        add    %%o5, %3, %%o5"
328                : "=&r" (tmp), "=&r" (ret)
329                : "r" (dest), "r" (add)
330                : "memory", "cc");
331
332         return ret;
333 }
334
335 #elif __s390x__
336
337 static inline gint32 
338 InterlockedCompareExchange(volatile gint32 *dest,
339                            gint32 exch, gint32 comp)
340 {
341         gint32 old;
342
343         __asm__ __volatile__ ("\tLA\t1,%0\n"
344                               "\tLR\t%1,%3\n"
345                               "\tCS\t%1,%2,0(1)\n"
346                               : "+m" (*dest), "=&r" (old)
347                               : "r" (exch), "r" (comp)
348                               : "1", "cc");     
349         return(old);
350 }
351
352 static inline gpointer 
353 InterlockedCompareExchangePointer(volatile gpointer *dest, 
354                                   gpointer exch, 
355                                   gpointer comp)
356 {
357         gpointer old;
358
359         __asm__ __volatile__ ("\tLA\t1,%0\n"
360                               "\tLGR\t%1,%3\n"
361                               "\tCSG\t%1,%2,0(1)\n"
362                               : "+m" (*dest), "=&r" (old)
363                               : "r" (exch), "r" (comp)
364                               : "1", "cc");
365
366         return(old);
367 }
368
369 static inline gint32 
370 InterlockedIncrement(volatile gint32 *val)
371 {
372         gint32 tmp;
373         
374         __asm__ __volatile__ ("\tLA\t2,%1\n"
375                               "0:\tLGF\t%0,%1\n"
376                               "\tLGFR\t1,%0\n"
377                               "\tAGHI\t1,1\n"
378                               "\tCS\t%0,1,0(2)\n"
379                               "\tJNZ\t0b\n"
380                               "\tLGFR\t%0,1"
381                               : "=r" (tmp), "+m" (*val)
382                               : : "1", "2", "cc");
383
384         return(tmp);
385 }
386
387 static inline gint32 
388 InterlockedDecrement(volatile gint32 *val)
389 {
390         gint32 tmp;
391         
392         __asm__ __volatile__ ("\tLA\t2,%1\n"
393                               "0:\tLGF\t%0,%1\n"
394                               "\tLGFR\t1,%0\n"
395                               "\tAGHI\t1,-1\n"
396                               "\tCS\t%0,1,0(2)\n"
397                               "\tJNZ\t0b\n"
398                               "\tLGFR\t%0,1"
399                               : "=r" (tmp), "+m" (*val)
400                               : : "1", "2", "cc");
401
402         return(tmp);
403 }
404
405 static inline gint32 
406 InterlockedExchange(volatile gint32 *val, gint32 new_val)
407 {
408         gint32 ret;
409         
410         __asm__ __volatile__ ("\tLA\t1,%0\n"
411                               "0:\tL\t%1,%0\n"
412                               "\tCS\t%1,%2,0(1)\n"
413                               "\tJNZ\t0b"
414                               : "+m" (*val), "=&r" (ret)
415                               : "r" (new_val)
416                               : "1", "cc");
417
418         return(ret);
419 }
420
421 static inline gpointer
422 InterlockedExchangePointer(volatile gpointer *val, gpointer new_val)
423 {
424         gpointer ret;
425         
426         __asm__ __volatile__ ("\tLA\t1,%0\n"
427                               "0:\tLG\t%1,%0\n"
428                               "\tCSG\t%1,%2,0(1)\n"
429                               "\tJNZ\t0b"
430                               : "+m" (*val), "=&r" (ret)
431                               : "r" (new_val)
432                               : "1", "cc");
433
434         return(ret);
435 }
436
437 static inline gint32 
438 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
439 {
440         gint32 ret;
441
442         __asm__ __volatile__ ("\tLA\t2,%1\n"
443                               "0:\tLGF\t%0,%1\n"
444                               "\tLGFR\t1,%0\n"
445                               "\tAGR\t1,%2\n"
446                               "\tCS\t%0,1,0(2)\n"
447                               "\tJNZ\t0b"
448                               : "=&r" (ret), "+m" (*val)
449                               : "r" (add) 
450                               : "1", "2", "cc");
451         
452         return(ret);
453 }
454
455 #elif defined(__mono_ppc__)
456
457 #ifdef G_COMPILER_CODEWARRIOR
458 static inline gint32 InterlockedIncrement(volatile register gint32 *val)
459 {
460         gint32 result = 0, tmp;
461         register gint32 result = 0;
462         register gint32 tmp;
463
464         asm
465         {
466                 @1:
467                         lwarx   tmp, 0, val
468                         addi    result, tmp, 1
469                         stwcx.  result, 0, val
470                         bne-    @1
471         }
472  
473         return result;
474 }
475
476 static inline gint32 InterlockedDecrement(register volatile gint32 *val)
477 {
478         register gint32 result = 0;
479         register gint32 tmp;
480
481         asm
482         {
483                 @1:
484                         lwarx   tmp, 0, val
485                         addi    result, tmp, -1
486                         stwcx.  result, 0, val
487                         bne-    @1
488         }
489
490         return result;
491 }
492 #define InterlockedCompareExchangePointer(dest,exch,comp) (void*)InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp))
493
494 static inline gint32 InterlockedCompareExchange(volatile register gint32 *dest, register gint32 exch, register gint32 comp)
495 {
496         register gint32 tmp = 0;
497
498         asm
499         {
500                 @1:
501                         lwarx   tmp, 0, dest
502                         cmpw    tmp, comp
503                         bne-    @2
504                         stwcx.  exch, 0, dest
505                         bne-    @1
506                 @2:
507         }
508
509         return tmp;
510 }
511 static inline gint32 InterlockedExchange(register volatile gint32 *dest, register gint32 exch)
512 {
513         register gint32 tmp = 0;
514
515         asm
516         {
517                 @1:
518                         lwarx   tmp, 0, dest
519                         stwcx.  exch, 0, dest
520                         bne-    @1
521         }
522
523         return tmp;
524 }
525 #define InterlockedExchangePointer(dest,exch) (void*)InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch))
526 #else
527
528 #if defined(__mono_ppc64__) && !defined(__mono_ilp32__)
529 #define LDREGX "ldarx"
530 #define STREGCXD "stdcx."
531 #define CMPREG "cmpd"
532 #else
533 #define LDREGX "lwarx"
534 #define STREGCXD "stwcx."
535 #define CMPREG "cmpw"
536 #endif
537
538 static inline gint32 InterlockedIncrement(volatile gint32 *val)
539 {
540         gint32 result = 0, tmp;
541
542         __asm__ __volatile__ ("\n1:\n\t"
543                               "lwarx  %0, 0, %2\n\t"
544                               "addi   %1, %0, 1\n\t"
545                               "stwcx. %1, 0, %2\n\t"
546                               "bne-   1b"
547                               : "=&b" (result), "=&b" (tmp): "r" (val): "cc", "memory");
548         return result + 1;
549 }
550
551 static inline gint32 InterlockedDecrement(volatile gint32 *val)
552 {
553         gint32 result = 0, tmp;
554
555         __asm__ __volatile__ ("\n1:\n\t"
556                               "lwarx  %0, 0, %2\n\t"
557                               "addi   %1, %0, -1\n\t"
558                               "stwcx. %1, 0, %2\n\t"
559                               "bne-   1b"
560                               : "=&b" (result), "=&b" (tmp): "r" (val): "cc", "memory");
561         return result - 1;
562 }
563
564 static inline gpointer InterlockedCompareExchangePointer (volatile gpointer *dest,
565                                                 gpointer exch, gpointer comp)
566 {
567         gpointer tmp = NULL;
568
569         __asm__ __volatile__ ("\n1:\n\t"
570                              LDREGX " %0, 0, %1\n\t"
571                              CMPREG " %0, %2\n\t" 
572                              "bne-    2f\n\t"
573                              STREGCXD " %3, 0, %1\n\t"
574                              "bne-    1b\n"
575                              "2:"
576                              : "=&r" (tmp)
577                              : "b" (dest), "r" (comp), "r" (exch): "cc", "memory");
578         return(tmp);
579 }
580
581 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
582                                                 gint32 exch, gint32 comp) {
583         gint32 tmp = 0;
584
585         __asm__ __volatile__ ("\n1:\n\t"
586                              "lwarx   %0, 0, %1\n\t"
587                              "cmpw    %0, %2\n\t" 
588                              "bne-    2f\n\t"
589                              "stwcx.  %3, 0, %1\n\t"
590                              "bne-    1b\n"
591                              "2:"
592                              : "=&r" (tmp)
593                              : "b" (dest), "r" (comp), "r" (exch): "cc", "memory");
594         return(tmp);
595 }
596
597 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
598 {
599         gint32 tmp = 0;
600
601         __asm__ __volatile__ ("\n1:\n\t"
602                               "lwarx  %0, 0, %2\n\t"
603                               "stwcx. %3, 0, %2\n\t"
604                               "bne    1b"
605                               : "=r" (tmp) : "0" (tmp), "b" (dest), "r" (exch): "cc", "memory");
606         return(tmp);
607 }
608
609 static inline gpointer InterlockedExchangePointer (volatile gpointer *dest, gpointer exch)
610 {
611         gpointer tmp = NULL;
612
613         __asm__ __volatile__ ("\n1:\n\t"
614                               LDREGX " %0, 0, %2\n\t"
615                               STREGCXD " %3, 0, %2\n\t"
616                               "bne    1b"
617                               : "=r" (tmp) : "0" (tmp), "b" (dest), "r" (exch): "cc", "memory");
618         return(tmp);
619 }
620
621 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
622 {
623         gint32 result, tmp;
624         __asm__ __volatile__ ("\n1:\n\t"
625                               "lwarx  %0, 0, %2\n\t"
626                               "add    %1, %0, %3\n\t"
627                               "stwcx. %1, 0, %2\n\t"
628                               "bne    1b"
629                               : "=&r" (result), "=&r" (tmp)
630                               : "r" (dest), "r" (add) : "cc", "memory");
631         return(result);
632 }
633
634 #undef LDREGX
635 #undef STREGCXD
636 #undef CMPREG
637
638 #endif /* !G_COMPILER_CODEWARRIOR */
639
640 #elif defined(__arm__)
641
642 #ifdef __native_client__
643 #define MASK_REGISTER(reg, cond) "bic" cond " " reg ", " reg ", #0xc0000000\n"
644 #define NACL_ALIGN() ".align 4\n"
645 #else
646 #define MASK_REGISTER(reg, cond)
647 #define NACL_ALIGN()
648 #endif
649
650 /*
651  * Atomic operations on ARM doesn't contain memory barriers, and the runtime code
652  * depends on this, so we add them explicitly.
653  */
654
655 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp)
656 {
657 #if defined(HAVE_ARMV6)
658         gint32 ret, tmp;
659         __asm__ __volatile__ (  "1:\n"
660                                 NACL_ALIGN()
661                                 "dmb\n"
662                                 "mov    %0, #0\n"
663                                 NACL_ALIGN()
664                                 MASK_REGISTER("%2", "al")
665                                 "ldrex %1, [%2]\n"
666                                 "teq    %1, %3\n"
667                                 "it eq\n"
668                                 NACL_ALIGN()
669                                 MASK_REGISTER("%2", "eq")
670                                 "strexeq %0, %4, [%2]\n"
671                                 "teq %0, #0\n"
672                                 "bne 1b\n"
673                                 "dmb\n"
674                                 : "=&r" (tmp), "=&r" (ret)
675                                 : "r" (dest), "r" (comp), "r" (exch)
676                                 : "memory", "cc");
677
678         return ret;
679 #else
680         gint32 a, b;
681
682         __asm__ __volatile__ (    "0:\n\t"
683                                   NACL_ALIGN()
684                                   MASK_REGISTER("%2", "al")
685                                   "ldr %1, [%2]\n\t"
686                                   "cmp %1, %4\n\t"
687                                   "mov %0, %1\n\t"
688                                   "bne 1f\n\t"
689                                   NACL_ALIGN()
690                                   MASK_REGISTER("%2", "al")
691                                   "swp %0, %3, [%2]\n\t"
692                                   "cmp %0, %1\n\t"
693                                   NACL_ALIGN()
694                                   MASK_REGISTER("%2", "ne")
695                                   "swpne %3, %0, [%2]\n\t"
696                                   "bne 0b\n\t"
697                                   "1:"
698                                   : "=&r" (a), "=&r" (b)
699                                   : "r" (dest), "r" (exch), "r" (comp)
700                                   : "cc", "memory");
701
702         return a;
703 #endif
704 }
705
706 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
707 {
708 #if defined(HAVE_ARMV6)
709         gpointer ret, tmp;
710         __asm__ __volatile__ (
711                                 "dmb\n"
712                                 "1:\n"
713                                 NACL_ALIGN()
714                                 "mov    %0, #0\n"
715                                 NACL_ALIGN()
716                                 MASK_REGISTER("%2", "al")
717                                 "ldrex %1, [%2]\n"
718                                 "teq    %1, %3\n"
719                                 "it eq\n"
720                                 NACL_ALIGN()
721                                 MASK_REGISTER("%2", "eq")
722                                 "strexeq %0, %4, [%2]\n"
723                                 "teq %0, #0\n"
724                                 "bne 1b\n"
725                                 "dmb\n"
726                                 : "=&r" (tmp), "=&r" (ret)
727                                 : "r" (dest), "r" (comp), "r" (exch)
728                                 : "memory", "cc");
729
730         return ret;
731 #else
732         gpointer a, b;
733
734         __asm__ __volatile__ (    "0:\n\t"
735                                   NACL_ALIGN()
736                                   MASK_REGISTER("%2", "al")
737                                   "ldr %1, [%2]\n\t"
738                                   "cmp %1, %4\n\t"
739                                   "mov %0, %1\n\t"
740                                   "bne 1f\n\t"
741                                   NACL_ALIGN()
742                                   MASK_REGISTER("%2", "eq")
743                                   "swpeq %0, %3, [%2]\n\t"
744                                   "cmp %0, %1\n\t"
745                                   NACL_ALIGN()
746                                   MASK_REGISTER("%2", "ne")
747                                   "swpne %3, %0, [%2]\n\t"
748                                   "bne 0b\n\t"
749                                   "1:"
750                                   : "=&r" (a), "=&r" (b)
751                                   : "r" (dest), "r" (exch), "r" (comp)
752                                   : "cc", "memory");
753
754         return a;
755 #endif
756 }
757
758 static inline gint32 InterlockedIncrement(volatile gint32 *dest)
759 {
760 #if defined(HAVE_ARMV6)
761         gint32 ret, flag;
762         __asm__ __volatile__ (
763                                 "dmb\n"
764                                 "1:\n"
765                                 NACL_ALIGN()
766                                 MASK_REGISTER("%2", "al")
767                                 "ldrex %0, [%2]\n"
768                                 "add %0, %0, %3\n"
769                                 NACL_ALIGN()
770                                 MASK_REGISTER("%2", "al")
771                                 "strex %1, %0, [%2]\n"
772                                 "teq %1, #0\n"
773                                 "bne 1b\n"
774                                 "dmb\n"
775                                 : "=&r" (ret), "=&r" (flag)
776                                 : "r" (dest), "r" (1)
777                                 : "memory", "cc");
778
779         return ret;
780 #else
781         gint32 a, b, c;
782
783         __asm__ __volatile__ (  "0:\n\t"
784                                 NACL_ALIGN()
785                                 MASK_REGISTER("%3", "al")
786                                 "ldr %0, [%3]\n\t"
787                                 "add %1, %0, %4\n\t"
788                                 NACL_ALIGN()
789                                 MASK_REGISTER("%3", "al")
790                                 "swp %2, %1, [%3]\n\t"
791                                 "cmp %0, %2\n\t"
792                                 NACL_ALIGN()
793                                 MASK_REGISTER("%3", "ne")
794                                 "swpne %1, %2, [%3]\n\t"
795                                 "bne 0b"
796                                 : "=&r" (a), "=&r" (b), "=&r" (c)
797                                 : "r" (dest), "r" (1)
798                                 : "cc", "memory");
799
800         return b;
801 #endif
802 }
803
804 static inline gint32 InterlockedDecrement(volatile gint32 *dest)
805 {
806 #if defined(HAVE_ARMV6)
807         gint32 ret, flag;
808         __asm__ __volatile__ (
809                                 "dmb\n"
810                                 "1:\n"
811                                 NACL_ALIGN()
812                                 MASK_REGISTER("%2", "al")
813                                 "ldrex %0, [%2]\n"
814                                 "sub %0, %0, %3\n"
815                                 NACL_ALIGN()
816                                 MASK_REGISTER("%2", "al")
817                                 "strex %1, %0, [%2]\n"
818                                 "teq %1, #0\n"
819                                 "bne 1b\n"
820                                 "dmb\n"
821                                 : "=&r" (ret), "=&r" (flag)
822                                 : "r" (dest), "r" (1)
823                                 : "memory", "cc");
824
825         return ret;
826 #else
827         gint32 a, b, c;
828
829         __asm__ __volatile__ (  "0:\n\t"
830                                 NACL_ALIGN()
831                                 MASK_REGISTER("%3", "al")
832                                 "ldr %0, [%3]\n\t"
833                                 "add %1, %0, %4\n\t"
834                                 NACL_ALIGN()
835                                 MASK_REGISTER("%3", "al")
836                                 "swp %2, %1, [%3]\n\t"
837                                 "cmp %0, %2\n\t"
838                                 NACL_ALIGN()
839                                 MASK_REGISTER("%3", "ne")
840                                 "swpne %1, %2, [%3]\n\t"
841                                 "bne 0b"
842                                 : "=&r" (a), "=&r" (b), "=&r" (c)
843                                 : "r" (dest), "r" (-1)
844                                 : "cc", "memory");
845
846         return b;
847 #endif
848 }
849
850 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
851 {
852 #if defined(HAVE_ARMV6)
853         gint32 ret, flag;
854         __asm__ __volatile__ (
855                                   "dmb\n"
856                               "1:\n"
857                               NACL_ALIGN()
858                               MASK_REGISTER("%3", "al")
859                               "ldrex %0, [%3]\n"
860                               NACL_ALIGN()
861                               MASK_REGISTER("%3", "al")
862                               "strex %1, %2, [%3]\n"
863                               "teq %1, #0\n"
864                               "bne 1b\n"
865                                   "dmb\n"
866                               : "=&r" (ret), "=&r" (flag)
867                               : "r" (exch), "r" (dest)
868                               : "memory", "cc");
869         return ret;
870 #else
871         gint32 a;
872
873         __asm__ __volatile__ (  NACL_ALIGN()
874                                 MASK_REGISTER("%1", "al")
875                                 "swp %0, %2, [%1]"
876                                 : "=&r" (a)
877                                 : "r" (dest), "r" (exch));
878
879         return a;
880 #endif
881 }
882
883 static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch)
884 {
885 #if defined(HAVE_ARMV6)
886         gpointer ret, flag;
887         __asm__ __volatile__ (
888                                   "dmb\n"
889                               "1:\n"
890                               NACL_ALIGN()
891                               MASK_REGISTER("%3", "al")
892                               "ldrex %0, [%3]\n"
893                               NACL_ALIGN()
894                               MASK_REGISTER("%3", "al")
895                               "strex %1, %2, [%3]\n"
896                               "teq %1, #0\n"
897                               "bne 1b\n"
898                                   "dmb\n"
899                               : "=&r" (ret), "=&r" (flag)
900                               : "r" (exch), "r" (dest)
901                               : "memory", "cc");
902         return ret;
903 #else
904         gpointer a;
905
906         __asm__ __volatile__ (  NACL_ALIGN()
907                                 MASK_REGISTER("%1", "al")
908                                 "swp %0, %2, [%1]"
909                                 : "=&r" (a)
910                                 : "r" (dest), "r" (exch));
911
912         return a;
913 #endif
914 }
915
916 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
917 {
918 #if defined(HAVE_ARMV6)
919         gint32 ret, tmp, flag;
920         __asm__ __volatile__ (
921                                 "dmb\n"
922                                 "1:\n"
923                                 NACL_ALIGN()
924                                 MASK_REGISTER("%3", "al")
925                                 "ldrex %0, [%3]\n"
926                                 "add %1, %0, %4\n"
927                                 NACL_ALIGN()
928                                 MASK_REGISTER("%3", "al")
929                                 "strex %2, %1, [%3]\n"
930                                 "teq %2, #0\n"
931                                 "bne 1b\n"
932                                 "dmb\n"
933                                 : "=&r" (ret), "=&r" (tmp), "=&r" (flag)
934                                 : "r" (dest), "r" (add)
935                                 : "memory", "cc");
936
937         return ret;
938 #else
939         int a, b, c;
940
941         __asm__ __volatile__ (  "0:\n\t"
942                                 NACL_ALIGN()
943                                 MASK_REGISTER("%3", "al")
944                                 "ldr %0, [%3]\n\t"
945                                 "add %1, %0, %4\n\t"
946                                 NACL_ALIGN()
947                                 MASK_REGISTER("%3", "al")
948                                 "swp %2, %1, [%3]\n\t"
949                                 "cmp %0, %2\n\t"
950                                 NACL_ALIGN()
951                                 MASK_REGISTER("%3", "ne")
952                                 "swpne %1, %2, [%3]\n\t"
953                                 "bne 0b"
954                                 : "=&r" (a), "=&r" (b), "=&r" (c)
955                                 : "r" (dest), "r" (add)
956                                 : "cc", "memory");
957
958         return a;
959 #endif
960 }
961
962 #undef NACL_ALIGN
963 #undef MASK_REGISTER
964
965 #elif defined(__ia64__)
966
967 #ifdef __INTEL_COMPILER
968 #include <ia64intrin.h>
969 #endif
970
971 static inline gint32 InterlockedCompareExchange(gint32 volatile *dest,
972                                                 gint32 exch, gint32 comp)
973 {
974         gint32 old;
975         guint64 real_comp;
976
977 #ifdef __INTEL_COMPILER
978         old = _InterlockedCompareExchange (dest, exch, comp);
979 #else
980         /* cmpxchg4 zero extends the value read from memory */
981         real_comp = (guint64)(guint32)comp;
982         asm volatile ("mov ar.ccv = %2 ;;\n\t"
983                                   "cmpxchg4.acq %0 = [%1], %3, ar.ccv\n\t"
984                                   : "=r" (old) : "r" (dest), "r" (real_comp), "r" (exch));
985 #endif
986
987         return(old);
988 }
989
990 static inline gpointer InterlockedCompareExchangePointer(gpointer volatile *dest,
991                                                 gpointer exch, gpointer comp)
992 {
993         gpointer old;
994
995 #ifdef __INTEL_COMPILER
996         old = _InterlockedCompareExchangePointer (dest, exch, comp);
997 #else
998         asm volatile ("mov ar.ccv = %2 ;;\n\t"
999                                   "cmpxchg8.acq %0 = [%1], %3, ar.ccv\n\t"
1000                                   : "=r" (old) : "r" (dest), "r" (comp), "r" (exch));
1001 #endif
1002
1003         return(old);
1004 }
1005
1006 static inline gint32 InterlockedIncrement(gint32 volatile *val)
1007 {
1008 #ifdef __INTEL_COMPILER
1009         return _InterlockedIncrement (val);
1010 #else
1011         gint32 old;
1012
1013         do {
1014                 old = *val;
1015         } while (InterlockedCompareExchange (val, old + 1, old) != old);
1016
1017         return old + 1;
1018 #endif
1019 }
1020
1021 static inline gint32 InterlockedDecrement(gint32 volatile *val)
1022 {
1023 #ifdef __INTEL_COMPILER
1024         return _InterlockedDecrement (val);
1025 #else
1026         gint32 old;
1027
1028         do {
1029                 old = *val;
1030         } while (InterlockedCompareExchange (val, old - 1, old) != old);
1031
1032         return old - 1;
1033 #endif
1034 }
1035
1036 static inline gint32 InterlockedExchange(gint32 volatile *dest, gint32 new_val)
1037 {
1038 #ifdef __INTEL_COMPILER
1039         return _InterlockedExchange (dest, new_val);
1040 #else
1041         gint32 res;
1042
1043         do {
1044                 res = *dest;
1045         } while (InterlockedCompareExchange (dest, new_val, res) != res);
1046
1047         return res;
1048 #endif
1049 }
1050
1051 static inline gpointer InterlockedExchangePointer(gpointer volatile *dest, gpointer new_val)
1052 {
1053 #ifdef __INTEL_COMPILER
1054         return (gpointer)_InterlockedExchange64 ((gint64*)dest, (gint64)new_val);
1055 #else
1056         gpointer res;
1057
1058         do {
1059                 res = *dest;
1060         } while (InterlockedCompareExchangePointer (dest, new_val, res) != res);
1061
1062         return res;
1063 #endif
1064 }
1065
1066 static inline gint32 InterlockedExchangeAdd(gint32 volatile *val, gint32 add)
1067 {
1068         gint32 old;
1069
1070 #ifdef __INTEL_COMPILER
1071         old = _InterlockedExchangeAdd (val, add);
1072 #else
1073         do {
1074                 old = *val;
1075         } while (InterlockedCompareExchange (val, old + add, old) != old);
1076
1077         return old;
1078 #endif
1079 }
1080
1081 #elif defined(__mips__)
1082
1083 #if SIZEOF_REGISTER == 8
1084 #error "Not implemented."
1085 #endif
1086
1087 static inline gint32 InterlockedIncrement(volatile gint32 *val)
1088 {
1089         gint32 tmp, result = 0;
1090
1091         __asm__ __volatile__ ("    .set    mips32\n"
1092                               "1:  ll      %0, %2\n"
1093                               "    addu    %1, %0, 1\n"
1094                               "    sc      %1, %2\n"
1095                               "    beqz    %1, 1b\n"
1096                               "    .set    mips0\n"
1097                               : "=&r" (result), "=&r" (tmp), "=m" (*val)
1098                               : "m" (*val));
1099         return result + 1;
1100 }
1101
1102 static inline gint32 InterlockedDecrement(volatile gint32 *val)
1103 {
1104         gint32 tmp, result = 0;
1105
1106         __asm__ __volatile__ ("    .set    mips32\n"
1107                               "1:  ll      %0, %2\n"
1108                               "    subu    %1, %0, 1\n"
1109                               "    sc      %1, %2\n"
1110                               "    beqz    %1, 1b\n"
1111                               "    .set    mips0\n"
1112                               : "=&r" (result), "=&r" (tmp), "=m" (*val)
1113                               : "m" (*val));
1114         return result - 1;
1115 }
1116
1117 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
1118                                                 gint32 exch, gint32 comp) {
1119         gint32 old, tmp;
1120
1121         __asm__ __volatile__ ("    .set    mips32\n"
1122                               "1:  ll      %0, %2\n"
1123                               "    bne     %0, %5, 2f\n"
1124                               "    move    %1, %4\n"
1125                               "    sc      %1, %2\n"
1126                               "    beqz    %1, 1b\n"
1127                               "2:  .set    mips0\n"
1128                               : "=&r" (old), "=&r" (tmp), "=m" (*dest)
1129                               : "m" (*dest), "r" (exch), "r" (comp));
1130         return(old);
1131 }
1132
1133 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
1134 {
1135         return (gpointer)(InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp)));
1136 }
1137
1138 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
1139 {
1140         gint32 result, tmp;
1141
1142         __asm__ __volatile__ ("    .set    mips32\n"
1143                               "1:  ll      %0, %2\n"
1144                               "    move    %1, %4\n"
1145                               "    sc      %1, %2\n"
1146                               "    beqz    %1, 1b\n"
1147                               "    .set    mips0\n"
1148                               : "=&r" (result), "=&r" (tmp), "=m" (*dest)
1149                               : "m" (*dest), "r" (exch));
1150         return(result);
1151 }
1152
1153 static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch)
1154 {
1155         return (gpointer)InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch));
1156 }
1157
1158 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
1159 {
1160         gint32 result, tmp;
1161
1162         __asm__ __volatile__ ("    .set    mips32\n"
1163                               "1:  ll      %0, %2\n"
1164                               "    addu    %1, %0, %4\n"
1165                               "    sc      %1, %2\n"
1166                               "    beqz    %1, 1b\n"
1167                               "    .set    mips0\n"
1168                               : "=&r" (result), "=&r" (tmp), "=m" (*dest)
1169                               : "m" (*dest), "r" (add));
1170         return result;
1171 }
1172
1173 #else
1174
1175 #define WAPI_NO_ATOMIC_ASM
1176
1177 extern gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp);
1178 extern gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp);
1179 extern gint32 InterlockedIncrement(volatile gint32 *dest);
1180 extern gint32 InterlockedDecrement(volatile gint32 *dest);
1181 extern gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch);
1182 extern gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch);
1183 extern gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add);
1184
1185 #endif
1186
1187 /* Not yet used */
1188 #ifdef USE_GCC_ATOMIC_OPS
1189
1190 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
1191                                                 gint32 exch, gint32 comp)
1192 {
1193         return __sync_val_compare_and_swap (dest, comp, exch);
1194 }
1195
1196 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
1197 {
1198         return __sync_val_compare_and_swap (dest, comp, exch);
1199 }
1200
1201 static inline gint32 InterlockedIncrement(volatile gint32 *val)
1202 {
1203         return __sync_add_and_fetch (val, 1);
1204 }
1205
1206 static inline gint32 InterlockedDecrement(volatile gint32 *val)
1207 {
1208         return __sync_add_and_fetch (val, -1);
1209 }
1210
1211 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
1212 {
1213         gint32 old_val;
1214         do {
1215                 old_val = *val;
1216         } while (__sync_val_compare_and_swap (val, old_val, new_val) != old_val);
1217         return old_val;
1218 }
1219
1220 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
1221                                                   gpointer new_val)
1222 {
1223         gpointer old_val;
1224         do {
1225                 old_val = *val;
1226         } while (__sync_val_compare_and_swap (val, old_val, new_val) != old_val);
1227         return old_val;
1228 }
1229
1230 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
1231 {
1232         return __sync_fetch_and_add (val, add);
1233 }
1234 #endif
1235
1236 #endif /* _WAPI_ATOMIC_H_ */