Use GCC atomics when USE_GCC_ATOMIC_OPS is defined.
[mono.git] / mono / utils / atomic.h
1 /*
2  * atomic.h:  Atomic operations
3  *
4  * Author:
5  *      Dick Porter (dick@ximian.com)
6  *
7  * (C) 2002 Ximian, Inc.
8  * Copyright 2012 Xamarin Inc
9  */
10
11 #ifndef _WAPI_ATOMIC_H_
12 #define _WAPI_ATOMIC_H_
13
14 #if defined(__NetBSD__)
15 #include <sys/param.h>
16
17 #if __NetBSD_Version__ > 499004000
18 #include <sys/atomic.h>
19 #define HAVE_ATOMIC_OPS
20 #endif
21
22 #endif
23
24 #include "config.h"
25 #include <glib.h>
26
27 /* On Windows, we always use the functions provided by the Windows API. */
28 #if defined(__WIN32__) || defined(_WIN32)
29
30 #include <windows.h>
31
32 /* Prefer GCC atomic ops if the target supports it (see configure.in). */
33 #elif defined(USE_GCC_ATOMIC_OPS)
34
35 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
36                                                 gint32 exch, gint32 comp)
37 {
38         return __sync_val_compare_and_swap (dest, comp, exch);
39 }
40
41 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
42 {
43         return __sync_val_compare_and_swap (dest, comp, exch);
44 }
45
46 static inline gint32 InterlockedIncrement(volatile gint32 *val)
47 {
48         return __sync_add_and_fetch (val, 1);
49 }
50
51 static inline gint32 InterlockedDecrement(volatile gint32 *val)
52 {
53         return __sync_add_and_fetch (val, -1);
54 }
55
56 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
57 {
58         gint32 old_val;
59         do {
60                 old_val = *val;
61         } while (__sync_val_compare_and_swap (val, old_val, new_val) != old_val);
62         return old_val;
63 }
64
65 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
66                                                   gpointer new_val)
67 {
68         gpointer old_val;
69         do {
70                 old_val = *val;
71         } while (__sync_val_compare_and_swap (val, old_val, new_val) != old_val);
72         return old_val;
73 }
74
75 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
76 {
77         return __sync_fetch_and_add (val, add);
78 }
79
80 #elif defined(__NetBSD__) && defined(HAVE_ATOMIC_OPS)
81
82 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
83        gint32 exch, gint32 comp)
84 {
85        return atomic_cas_32((uint32_t*)dest, comp, exch);
86 }
87
88 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
89 {
90        return atomic_cas_ptr(dest, comp, exch);
91 }
92
93 static inline gint32 InterlockedIncrement(volatile gint32 *val)
94 {
95        return atomic_inc_32_nv((uint32_t*)val);
96 }
97
98 static inline gint32 InterlockedDecrement(volatile gint32 *val)
99 {
100        return atomic_dec_32_nv((uint32_t*)val);
101 }
102
103 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
104 {
105        return atomic_swap_32((uint32_t*)val, new_val);
106 }
107
108 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
109                gpointer new_val)
110 {
111        return atomic_swap_ptr(val, new_val);
112 }
113
114 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
115 {
116        return atomic_add_32_nv((uint32_t*)val, add) - add;
117 }
118
119 #elif defined(__i386__) || defined(__x86_64__)
120
121 /*
122  * NB: The *Pointer() functions here assume that
123  * sizeof(pointer)==sizeof(gint32)
124  *
125  * NB2: These asm functions assume 486+ (some of the opcodes dont
126  * exist on 386).  If this becomes an issue, we can get configure to
127  * fall back to the non-atomic C versions of these calls.
128  */
129
130 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
131                                                 gint32 exch, gint32 comp)
132 {
133         gint32 old;
134
135         __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
136                               : "=m" (*dest), "=a" (old)
137                               : "r" (exch), "m" (*dest), "a" (comp));   
138         return(old);
139 }
140
141 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
142 {
143         gpointer old;
144
145         __asm__ __volatile__ ("lock; "
146 #if defined(__x86_64__)  && !defined(__native_client__)
147                               "cmpxchgq"
148 #else
149                               "cmpxchgl"
150 #endif
151                               " %2, %0"
152                               : "=m" (*dest), "=a" (old)
153                               : "r" (exch), "m" (*dest), "a" (comp));   
154
155         return(old);
156 }
157
158 static inline gint32 InterlockedIncrement(volatile gint32 *val)
159 {
160         gint32 tmp;
161         
162         __asm__ __volatile__ ("lock; xaddl %0, %1"
163                               : "=r" (tmp), "=m" (*val)
164                               : "0" (1), "m" (*val));
165
166         return(tmp+1);
167 }
168
169 static inline gint32 InterlockedDecrement(volatile gint32 *val)
170 {
171         gint32 tmp;
172         
173         __asm__ __volatile__ ("lock; xaddl %0, %1"
174                               : "=r" (tmp), "=m" (*val)
175                               : "0" (-1), "m" (*val));
176
177         return(tmp-1);
178 }
179
180 /*
181  * See
182  * http://msdn.microsoft.com/msdnmag/issues/0700/Win32/
183  * for the reasons for using cmpxchg and a loop here.
184  */
185 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
186 {
187         gint32 ret;
188
189         __asm__ __volatile__ ("1:; lock; cmpxchgl %2, %0; jne 1b"
190                               : "=m" (*val), "=a" (ret)
191                               : "r" (new_val), "m" (*val), "a" (*val));
192         return(ret);
193 }
194
195 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
196                                                   gpointer new_val)
197 {
198         gpointer ret;
199         
200         __asm__ __volatile__ ("1:; lock; "
201 #if defined(__x86_64__)  && !defined(__native_client__)
202                               "cmpxchgq"
203 #else
204                               "cmpxchgl"
205 #endif
206                               " %2, %0; jne 1b"
207                               : "=m" (*val), "=a" (ret)
208                               : "r" (new_val), "m" (*val), "a" (*val));
209
210         return(ret);
211 }
212
213 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
214 {
215         gint32 ret;
216         
217         __asm__ __volatile__ ("lock; xaddl %0, %1"
218                               : "=r" (ret), "=m" (*val)
219                               : "0" (add), "m" (*val));
220         
221         return(ret);
222 }
223
224 #elif (defined(sparc) || defined (__sparc__)) && defined(__GNUC__)
225
226 G_GNUC_UNUSED 
227 static inline gint32 InterlockedCompareExchange(volatile gint32 *_dest, gint32 _exch, gint32 _comp)
228 {
229        register volatile gint32 *dest asm("g1") = _dest;
230        register gint32 comp asm("o4") = _comp;
231        register gint32 exch asm("o5") = _exch;
232
233        __asm__ __volatile__(
234                /* cas [%%g1], %%o4, %%o5 */
235                ".word 0xdbe0500c"
236                : "=r" (exch)
237                : "0" (exch), "r" (dest), "r" (comp)
238                : "memory");
239
240        return exch;
241 }
242
243 G_GNUC_UNUSED 
244 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *_dest, gpointer _exch, gpointer _comp)
245 {
246        register volatile gpointer *dest asm("g1") = _dest;
247        register gpointer comp asm("o4") = _comp;
248        register gpointer exch asm("o5") = _exch;
249
250        __asm__ __volatile__(
251 #ifdef SPARCV9
252                /* casx [%%g1], %%o4, %%o5 */
253                ".word 0xdbf0500c"
254 #else
255                /* cas [%%g1], %%o4, %%o5 */
256                ".word 0xdbe0500c"
257 #endif
258                : "=r" (exch)
259                : "0" (exch), "r" (dest), "r" (comp)
260                : "memory");
261
262        return exch;
263 }
264
265 G_GNUC_UNUSED 
266 static inline gint32 InterlockedIncrement(volatile gint32 *_dest)
267 {
268        register volatile gint32 *dest asm("g1") = _dest;
269        register gint32 tmp asm("o4");
270        register gint32 ret asm("o5");
271
272        __asm__ __volatile__(
273                "1:     ld      [%%g1], %%o4\n\t"
274                "       add     %%o4, 1, %%o5\n\t"
275                /*      cas     [%%g1], %%o4, %%o5 */
276                "       .word   0xdbe0500c\n\t"
277                "       cmp     %%o4, %%o5\n\t"
278                "       bne     1b\n\t"
279                "        add    %%o5, 1, %%o5"
280                : "=&r" (tmp), "=&r" (ret)
281                : "r" (dest)
282                : "memory", "cc");
283
284         return ret;
285 }
286
287 G_GNUC_UNUSED 
288 static inline gint32 InterlockedDecrement(volatile gint32 *_dest)
289 {
290        register volatile gint32 *dest asm("g1") = _dest;
291        register gint32 tmp asm("o4");
292        register gint32 ret asm("o5");
293
294        __asm__ __volatile__(
295                "1:     ld      [%%g1], %%o4\n\t"
296                "       sub     %%o4, 1, %%o5\n\t"
297                /*      cas     [%%g1], %%o4, %%o5 */
298                "       .word   0xdbe0500c\n\t"
299                "       cmp     %%o4, %%o5\n\t"
300                "       bne     1b\n\t"
301                "        sub    %%o5, 1, %%o5"
302                : "=&r" (tmp), "=&r" (ret)
303                : "r" (dest)
304                : "memory", "cc");
305
306         return ret;
307 }
308
309 G_GNUC_UNUSED
310 static inline gint32 InterlockedExchange(volatile gint32 *_dest, gint32 exch)
311 {
312        register volatile gint32 *dest asm("g1") = _dest;
313        register gint32 tmp asm("o4");
314        register gint32 ret asm("o5");
315
316        __asm__ __volatile__(
317                "1:     ld      [%%g1], %%o4\n\t"
318                "       mov     %3, %%o5\n\t"
319                /*      cas     [%%g1], %%o4, %%o5 */
320                "       .word   0xdbe0500c\n\t"
321                "       cmp     %%o4, %%o5\n\t"
322                "       bne     1b\n\t"
323                "        nop"
324                : "=&r" (tmp), "=&r" (ret)
325                : "r" (dest), "r" (exch)
326                : "memory", "cc");
327
328         return ret;
329 }
330
331 G_GNUC_UNUSED
332 static inline gpointer InterlockedExchangePointer(volatile gpointer *_dest, gpointer exch)
333 {
334        register volatile gpointer *dest asm("g1") = _dest;
335        register gpointer tmp asm("o4");
336        register gpointer ret asm("o5");
337
338        __asm__ __volatile__(
339 #ifdef SPARCV9
340                "1:     ldx     [%%g1], %%o4\n\t"
341 #else
342                "1:     ld      [%%g1], %%o4\n\t"
343 #endif
344                "       mov     %3, %%o5\n\t"
345 #ifdef SPARCV9
346                /*      casx    [%%g1], %%o4, %%o5 */
347                "       .word   0xdbf0500c\n\t"
348 #else
349                /*      cas     [%%g1], %%o4, %%o5 */
350                "       .word   0xdbe0500c\n\t"
351 #endif
352                "       cmp     %%o4, %%o5\n\t"
353                "       bne     1b\n\t"
354                "        nop"
355                : "=&r" (tmp), "=&r" (ret)
356                : "r" (dest), "r" (exch)
357                : "memory", "cc");
358
359         return ret;
360 }
361
362 G_GNUC_UNUSED
363 static inline gint32 InterlockedExchangeAdd(volatile gint32 *_dest, gint32 add)
364 {
365        register volatile gint32 *dest asm("g1") = _dest;
366        register gint32 tmp asm("o4");
367        register gint32 ret asm("o5");
368
369        __asm__ __volatile__(
370                "1:     ld      [%%g1], %%o4\n\t"
371                "       add     %%o4, %3, %%o5\n\t"
372                /*      cas     [%%g1], %%o4, %%o5 */
373                "       .word   0xdbe0500c\n\t"
374                "       cmp     %%o4, %%o5\n\t"
375                "       bne     1b\n\t"
376                "        add    %%o5, %3, %%o5"
377                : "=&r" (tmp), "=&r" (ret)
378                : "r" (dest), "r" (add)
379                : "memory", "cc");
380
381         return ret;
382 }
383
384 #elif __s390x__
385
386 static inline gint32 
387 InterlockedCompareExchange(volatile gint32 *dest,
388                            gint32 exch, gint32 comp)
389 {
390         gint32 old;
391
392         __asm__ __volatile__ ("\tLA\t1,%0\n"
393                               "\tLR\t%1,%3\n"
394                               "\tCS\t%1,%2,0(1)\n"
395                               : "+m" (*dest), "=&r" (old)
396                               : "r" (exch), "r" (comp)
397                               : "1", "cc");     
398         return(old);
399 }
400
401 static inline gpointer 
402 InterlockedCompareExchangePointer(volatile gpointer *dest, 
403                                   gpointer exch, 
404                                   gpointer comp)
405 {
406         gpointer old;
407
408         __asm__ __volatile__ ("\tLA\t1,%0\n"
409                               "\tLGR\t%1,%3\n"
410                               "\tCSG\t%1,%2,0(1)\n"
411                               : "+m" (*dest), "=&r" (old)
412                               : "r" (exch), "r" (comp)
413                               : "1", "cc");
414
415         return(old);
416 }
417
418 static inline gint32 
419 InterlockedIncrement(volatile gint32 *val)
420 {
421         gint32 tmp;
422         
423         __asm__ __volatile__ ("\tLA\t2,%1\n"
424                               "0:\tLGF\t%0,%1\n"
425                               "\tLGFR\t1,%0\n"
426                               "\tAGHI\t1,1\n"
427                               "\tCS\t%0,1,0(2)\n"
428                               "\tJNZ\t0b\n"
429                               "\tLGFR\t%0,1"
430                               : "=r" (tmp), "+m" (*val)
431                               : : "1", "2", "cc");
432
433         return(tmp);
434 }
435
436 static inline gint32 
437 InterlockedDecrement(volatile gint32 *val)
438 {
439         gint32 tmp;
440         
441         __asm__ __volatile__ ("\tLA\t2,%1\n"
442                               "0:\tLGF\t%0,%1\n"
443                               "\tLGFR\t1,%0\n"
444                               "\tAGHI\t1,-1\n"
445                               "\tCS\t%0,1,0(2)\n"
446                               "\tJNZ\t0b\n"
447                               "\tLGFR\t%0,1"
448                               : "=r" (tmp), "+m" (*val)
449                               : : "1", "2", "cc");
450
451         return(tmp);
452 }
453
454 static inline gint32 
455 InterlockedExchange(volatile gint32 *val, gint32 new_val)
456 {
457         gint32 ret;
458         
459         __asm__ __volatile__ ("\tLA\t1,%0\n"
460                               "0:\tL\t%1,%0\n"
461                               "\tCS\t%1,%2,0(1)\n"
462                               "\tJNZ\t0b"
463                               : "+m" (*val), "=&r" (ret)
464                               : "r" (new_val)
465                               : "1", "cc");
466
467         return(ret);
468 }
469
470 static inline gpointer
471 InterlockedExchangePointer(volatile gpointer *val, gpointer new_val)
472 {
473         gpointer ret;
474         
475         __asm__ __volatile__ ("\tLA\t1,%0\n"
476                               "0:\tLG\t%1,%0\n"
477                               "\tCSG\t%1,%2,0(1)\n"
478                               "\tJNZ\t0b"
479                               : "+m" (*val), "=&r" (ret)
480                               : "r" (new_val)
481                               : "1", "cc");
482
483         return(ret);
484 }
485
486 static inline gint32 
487 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
488 {
489         gint32 ret;
490
491         __asm__ __volatile__ ("\tLA\t2,%1\n"
492                               "0:\tLGF\t%0,%1\n"
493                               "\tLGFR\t1,%0\n"
494                               "\tAGR\t1,%2\n"
495                               "\tCS\t%0,1,0(2)\n"
496                               "\tJNZ\t0b"
497                               : "=&r" (ret), "+m" (*val)
498                               : "r" (add) 
499                               : "1", "2", "cc");
500         
501         return(ret);
502 }
503
504 #elif defined(__mono_ppc__)
505
506 #ifdef G_COMPILER_CODEWARRIOR
507 static inline gint32 InterlockedIncrement(volatile register gint32 *val)
508 {
509         gint32 result = 0, tmp;
510         register gint32 result = 0;
511         register gint32 tmp;
512
513         asm
514         {
515                 @1:
516                         lwarx   tmp, 0, val
517                         addi    result, tmp, 1
518                         stwcx.  result, 0, val
519                         bne-    @1
520         }
521  
522         return result;
523 }
524
525 static inline gint32 InterlockedDecrement(register volatile gint32 *val)
526 {
527         register gint32 result = 0;
528         register gint32 tmp;
529
530         asm
531         {
532                 @1:
533                         lwarx   tmp, 0, val
534                         addi    result, tmp, -1
535                         stwcx.  result, 0, val
536                         bne-    @1
537         }
538
539         return result;
540 }
541 #define InterlockedCompareExchangePointer(dest,exch,comp) (void*)InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp))
542
543 static inline gint32 InterlockedCompareExchange(volatile register gint32 *dest, register gint32 exch, register gint32 comp)
544 {
545         register gint32 tmp = 0;
546
547         asm
548         {
549                 @1:
550                         lwarx   tmp, 0, dest
551                         cmpw    tmp, comp
552                         bne-    @2
553                         stwcx.  exch, 0, dest
554                         bne-    @1
555                 @2:
556         }
557
558         return tmp;
559 }
560 static inline gint32 InterlockedExchange(register volatile gint32 *dest, register gint32 exch)
561 {
562         register gint32 tmp = 0;
563
564         asm
565         {
566                 @1:
567                         lwarx   tmp, 0, dest
568                         stwcx.  exch, 0, dest
569                         bne-    @1
570         }
571
572         return tmp;
573 }
574 #define InterlockedExchangePointer(dest,exch) (void*)InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch))
575 #else
576
577 #if defined(__mono_ppc64__) && !defined(__mono_ilp32__)
578 #define LDREGX "ldarx"
579 #define STREGCXD "stdcx."
580 #define CMPREG "cmpd"
581 #else
582 #define LDREGX "lwarx"
583 #define STREGCXD "stwcx."
584 #define CMPREG "cmpw"
585 #endif
586
587 static inline gint32 InterlockedIncrement(volatile gint32 *val)
588 {
589         gint32 result = 0, tmp;
590
591         __asm__ __volatile__ ("\n1:\n\t"
592                               "lwarx  %0, 0, %2\n\t"
593                               "addi   %1, %0, 1\n\t"
594                               "stwcx. %1, 0, %2\n\t"
595                               "bne-   1b"
596                               : "=&b" (result), "=&b" (tmp): "r" (val): "cc", "memory");
597         return result + 1;
598 }
599
600 static inline gint32 InterlockedDecrement(volatile gint32 *val)
601 {
602         gint32 result = 0, tmp;
603
604         __asm__ __volatile__ ("\n1:\n\t"
605                               "lwarx  %0, 0, %2\n\t"
606                               "addi   %1, %0, -1\n\t"
607                               "stwcx. %1, 0, %2\n\t"
608                               "bne-   1b"
609                               : "=&b" (result), "=&b" (tmp): "r" (val): "cc", "memory");
610         return result - 1;
611 }
612
613 static inline gpointer InterlockedCompareExchangePointer (volatile gpointer *dest,
614                                                 gpointer exch, gpointer comp)
615 {
616         gpointer tmp = NULL;
617
618         __asm__ __volatile__ ("\n1:\n\t"
619                              LDREGX " %0, 0, %1\n\t"
620                              CMPREG " %0, %2\n\t" 
621                              "bne-    2f\n\t"
622                              STREGCXD " %3, 0, %1\n\t"
623                              "bne-    1b\n"
624                              "2:"
625                              : "=&r" (tmp)
626                              : "b" (dest), "r" (comp), "r" (exch): "cc", "memory");
627         return(tmp);
628 }
629
630 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
631                                                 gint32 exch, gint32 comp) {
632         gint32 tmp = 0;
633
634         __asm__ __volatile__ ("\n1:\n\t"
635                              "lwarx   %0, 0, %1\n\t"
636                              "cmpw    %0, %2\n\t" 
637                              "bne-    2f\n\t"
638                              "stwcx.  %3, 0, %1\n\t"
639                              "bne-    1b\n"
640                              "2:"
641                              : "=&r" (tmp)
642                              : "b" (dest), "r" (comp), "r" (exch): "cc", "memory");
643         return(tmp);
644 }
645
646 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
647 {
648         gint32 tmp = 0;
649
650         __asm__ __volatile__ ("\n1:\n\t"
651                               "lwarx  %0, 0, %2\n\t"
652                               "stwcx. %3, 0, %2\n\t"
653                               "bne    1b"
654                               : "=r" (tmp) : "0" (tmp), "b" (dest), "r" (exch): "cc", "memory");
655         return(tmp);
656 }
657
658 static inline gpointer InterlockedExchangePointer (volatile gpointer *dest, gpointer exch)
659 {
660         gpointer tmp = NULL;
661
662         __asm__ __volatile__ ("\n1:\n\t"
663                               LDREGX " %0, 0, %2\n\t"
664                               STREGCXD " %3, 0, %2\n\t"
665                               "bne    1b"
666                               : "=r" (tmp) : "0" (tmp), "b" (dest), "r" (exch): "cc", "memory");
667         return(tmp);
668 }
669
670 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
671 {
672         gint32 result, tmp;
673         __asm__ __volatile__ ("\n1:\n\t"
674                               "lwarx  %0, 0, %2\n\t"
675                               "add    %1, %0, %3\n\t"
676                               "stwcx. %1, 0, %2\n\t"
677                               "bne    1b"
678                               : "=&r" (result), "=&r" (tmp)
679                               : "r" (dest), "r" (add) : "cc", "memory");
680         return(result);
681 }
682
683 #undef LDREGX
684 #undef STREGCXD
685 #undef CMPREG
686
687 #endif /* !G_COMPILER_CODEWARRIOR */
688
689 #elif defined(__arm__)
690
691 #ifdef __native_client__
692 #define MASK_REGISTER(reg, cond) "bic" cond " " reg ", " reg ", #0xc0000000\n"
693 #define NACL_ALIGN() ".align 4\n"
694 #else
695 #define MASK_REGISTER(reg, cond)
696 #define NACL_ALIGN()
697 #endif
698
699 /*
700  * Atomic operations on ARM doesn't contain memory barriers, and the runtime code
701  * depends on this, so we add them explicitly.
702  */
703
704 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp)
705 {
706 #if defined(HAVE_ARMV7)
707         gint32 ret, tmp;
708         __asm__ __volatile__ (  "1:\n"
709                                 NACL_ALIGN()
710                                 "dmb\n"
711                                 "mov    %0, #0\n"
712                                 NACL_ALIGN()
713                                 MASK_REGISTER("%2", "al")
714                                 "ldrex %1, [%2]\n"
715                                 "teq    %1, %3\n"
716                                 "it eq\n"
717                                 NACL_ALIGN()
718                                 MASK_REGISTER("%2", "eq")
719                                 "strexeq %0, %4, [%2]\n"
720                                 "teq %0, #0\n"
721                                 "bne 1b\n"
722                                 "dmb\n"
723                                 : "=&r" (tmp), "=&r" (ret)
724                                 : "r" (dest), "r" (comp), "r" (exch)
725                                 : "memory", "cc");
726
727         return ret;
728 #else
729         gint32 a, b;
730
731         __asm__ __volatile__ (    "0:\n\t"
732                                   NACL_ALIGN()
733                                   MASK_REGISTER("%2", "al")
734                                   "ldr %1, [%2]\n\t"
735                                   "cmp %1, %4\n\t"
736                                   "mov %0, %1\n\t"
737                                   "bne 1f\n\t"
738                                   NACL_ALIGN()
739                                   MASK_REGISTER("%2", "al")
740                                   "swp %0, %3, [%2]\n\t"
741                                   "cmp %0, %1\n\t"
742                                   NACL_ALIGN()
743                                   MASK_REGISTER("%2", "ne")
744                                   "swpne %3, %0, [%2]\n\t"
745                                   "bne 0b\n\t"
746                                   "1:"
747                                   : "=&r" (a), "=&r" (b)
748                                   : "r" (dest), "r" (exch), "r" (comp)
749                                   : "cc", "memory");
750
751         return a;
752 #endif
753 }
754
755 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
756 {
757 #if defined(HAVE_ARMV7)
758         gpointer ret, tmp;
759         __asm__ __volatile__ (
760                                 "dmb\n"
761                                 "1:\n"
762                                 NACL_ALIGN()
763                                 "mov    %0, #0\n"
764                                 NACL_ALIGN()
765                                 MASK_REGISTER("%2", "al")
766                                 "ldrex %1, [%2]\n"
767                                 "teq    %1, %3\n"
768                                 "it eq\n"
769                                 NACL_ALIGN()
770                                 MASK_REGISTER("%2", "eq")
771                                 "strexeq %0, %4, [%2]\n"
772                                 "teq %0, #0\n"
773                                 "bne 1b\n"
774                                 "dmb\n"
775                                 : "=&r" (tmp), "=&r" (ret)
776                                 : "r" (dest), "r" (comp), "r" (exch)
777                                 : "memory", "cc");
778
779         return ret;
780 #else
781         gpointer a, b;
782
783         __asm__ __volatile__ (    "0:\n\t"
784                                   NACL_ALIGN()
785                                   MASK_REGISTER("%2", "al")
786                                   "ldr %1, [%2]\n\t"
787                                   "cmp %1, %4\n\t"
788                                   "mov %0, %1\n\t"
789                                   "bne 1f\n\t"
790                                   NACL_ALIGN()
791                                   MASK_REGISTER("%2", "eq")
792                                   "swpeq %0, %3, [%2]\n\t"
793                                   "cmp %0, %1\n\t"
794                                   NACL_ALIGN()
795                                   MASK_REGISTER("%2", "ne")
796                                   "swpne %3, %0, [%2]\n\t"
797                                   "bne 0b\n\t"
798                                   "1:"
799                                   : "=&r" (a), "=&r" (b)
800                                   : "r" (dest), "r" (exch), "r" (comp)
801                                   : "cc", "memory");
802
803         return a;
804 #endif
805 }
806
807 static inline gint32 InterlockedIncrement(volatile gint32 *dest)
808 {
809 #if defined(HAVE_ARMV7)
810         gint32 ret, flag;
811         __asm__ __volatile__ (
812                                 "dmb\n"
813                                 "1:\n"
814                                 NACL_ALIGN()
815                                 MASK_REGISTER("%2", "al")
816                                 "ldrex %0, [%2]\n"
817                                 "add %0, %0, %3\n"
818                                 NACL_ALIGN()
819                                 MASK_REGISTER("%2", "al")
820                                 "strex %1, %0, [%2]\n"
821                                 "teq %1, #0\n"
822                                 "bne 1b\n"
823                                 "dmb\n"
824                                 : "=&r" (ret), "=&r" (flag)
825                                 : "r" (dest), "r" (1)
826                                 : "memory", "cc");
827
828         return ret;
829 #else
830         gint32 a, b, c;
831
832         __asm__ __volatile__ (  "0:\n\t"
833                                 NACL_ALIGN()
834                                 MASK_REGISTER("%3", "al")
835                                 "ldr %0, [%3]\n\t"
836                                 "add %1, %0, %4\n\t"
837                                 NACL_ALIGN()
838                                 MASK_REGISTER("%3", "al")
839                                 "swp %2, %1, [%3]\n\t"
840                                 "cmp %0, %2\n\t"
841                                 NACL_ALIGN()
842                                 MASK_REGISTER("%3", "ne")
843                                 "swpne %1, %2, [%3]\n\t"
844                                 "bne 0b"
845                                 : "=&r" (a), "=&r" (b), "=&r" (c)
846                                 : "r" (dest), "r" (1)
847                                 : "cc", "memory");
848
849         return b;
850 #endif
851 }
852
853 static inline gint32 InterlockedDecrement(volatile gint32 *dest)
854 {
855 #if defined(HAVE_ARMV7)
856         gint32 ret, flag;
857         __asm__ __volatile__ (
858                                 "dmb\n"
859                                 "1:\n"
860                                 NACL_ALIGN()
861                                 MASK_REGISTER("%2", "al")
862                                 "ldrex %0, [%2]\n"
863                                 "sub %0, %0, %3\n"
864                                 NACL_ALIGN()
865                                 MASK_REGISTER("%2", "al")
866                                 "strex %1, %0, [%2]\n"
867                                 "teq %1, #0\n"
868                                 "bne 1b\n"
869                                 "dmb\n"
870                                 : "=&r" (ret), "=&r" (flag)
871                                 : "r" (dest), "r" (1)
872                                 : "memory", "cc");
873
874         return ret;
875 #else
876         gint32 a, b, c;
877
878         __asm__ __volatile__ (  "0:\n\t"
879                                 NACL_ALIGN()
880                                 MASK_REGISTER("%3", "al")
881                                 "ldr %0, [%3]\n\t"
882                                 "add %1, %0, %4\n\t"
883                                 NACL_ALIGN()
884                                 MASK_REGISTER("%3", "al")
885                                 "swp %2, %1, [%3]\n\t"
886                                 "cmp %0, %2\n\t"
887                                 NACL_ALIGN()
888                                 MASK_REGISTER("%3", "ne")
889                                 "swpne %1, %2, [%3]\n\t"
890                                 "bne 0b"
891                                 : "=&r" (a), "=&r" (b), "=&r" (c)
892                                 : "r" (dest), "r" (-1)
893                                 : "cc", "memory");
894
895         return b;
896 #endif
897 }
898
899 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
900 {
901 #if defined(HAVE_ARMV7)
902         gint32 ret, flag;
903         __asm__ __volatile__ (
904                                   "dmb\n"
905                               "1:\n"
906                               NACL_ALIGN()
907                               MASK_REGISTER("%3", "al")
908                               "ldrex %0, [%3]\n"
909                               NACL_ALIGN()
910                               MASK_REGISTER("%3", "al")
911                               "strex %1, %2, [%3]\n"
912                               "teq %1, #0\n"
913                               "bne 1b\n"
914                                   "dmb\n"
915                               : "=&r" (ret), "=&r" (flag)
916                               : "r" (exch), "r" (dest)
917                               : "memory", "cc");
918         return ret;
919 #else
920         gint32 a;
921
922         __asm__ __volatile__ (  NACL_ALIGN()
923                                 MASK_REGISTER("%1", "al")
924                                 "swp %0, %2, [%1]"
925                                 : "=&r" (a)
926                                 : "r" (dest), "r" (exch));
927
928         return a;
929 #endif
930 }
931
932 static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch)
933 {
934 #if defined(HAVE_ARMV7)
935         gpointer ret, flag;
936         __asm__ __volatile__ (
937                                   "dmb\n"
938                               "1:\n"
939                               NACL_ALIGN()
940                               MASK_REGISTER("%3", "al")
941                               "ldrex %0, [%3]\n"
942                               NACL_ALIGN()
943                               MASK_REGISTER("%3", "al")
944                               "strex %1, %2, [%3]\n"
945                               "teq %1, #0\n"
946                               "bne 1b\n"
947                                   "dmb\n"
948                               : "=&r" (ret), "=&r" (flag)
949                               : "r" (exch), "r" (dest)
950                               : "memory", "cc");
951         return ret;
952 #else
953         gpointer a;
954
955         __asm__ __volatile__ (  NACL_ALIGN()
956                                 MASK_REGISTER("%1", "al")
957                                 "swp %0, %2, [%1]"
958                                 : "=&r" (a)
959                                 : "r" (dest), "r" (exch));
960
961         return a;
962 #endif
963 }
964
965 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
966 {
967 #if defined(HAVE_ARMV7)
968         gint32 ret, tmp, flag;
969         __asm__ __volatile__ (
970                                 "dmb\n"
971                                 "1:\n"
972                                 NACL_ALIGN()
973                                 MASK_REGISTER("%3", "al")
974                                 "ldrex %0, [%3]\n"
975                                 "add %1, %0, %4\n"
976                                 NACL_ALIGN()
977                                 MASK_REGISTER("%3", "al")
978                                 "strex %2, %1, [%3]\n"
979                                 "teq %2, #0\n"
980                                 "bne 1b\n"
981                                 "dmb\n"
982                                 : "=&r" (ret), "=&r" (tmp), "=&r" (flag)
983                                 : "r" (dest), "r" (add)
984                                 : "memory", "cc");
985
986         return ret;
987 #else
988         int a, b, c;
989
990         __asm__ __volatile__ (  "0:\n\t"
991                                 NACL_ALIGN()
992                                 MASK_REGISTER("%3", "al")
993                                 "ldr %0, [%3]\n\t"
994                                 "add %1, %0, %4\n\t"
995                                 NACL_ALIGN()
996                                 MASK_REGISTER("%3", "al")
997                                 "swp %2, %1, [%3]\n\t"
998                                 "cmp %0, %2\n\t"
999                                 NACL_ALIGN()
1000                                 MASK_REGISTER("%3", "ne")
1001                                 "swpne %1, %2, [%3]\n\t"
1002                                 "bne 0b"
1003                                 : "=&r" (a), "=&r" (b), "=&r" (c)
1004                                 : "r" (dest), "r" (add)
1005                                 : "cc", "memory");
1006
1007         return a;
1008 #endif
1009 }
1010
1011 #undef NACL_ALIGN
1012 #undef MASK_REGISTER
1013
1014 #elif defined(__ia64__)
1015
1016 #ifdef __INTEL_COMPILER
1017 #include <ia64intrin.h>
1018 #endif
1019
1020 static inline gint32 InterlockedCompareExchange(gint32 volatile *dest,
1021                                                 gint32 exch, gint32 comp)
1022 {
1023         gint32 old;
1024         guint64 real_comp;
1025
1026 #ifdef __INTEL_COMPILER
1027         old = _InterlockedCompareExchange (dest, exch, comp);
1028 #else
1029         /* cmpxchg4 zero extends the value read from memory */
1030         real_comp = (guint64)(guint32)comp;
1031         asm volatile ("mov ar.ccv = %2 ;;\n\t"
1032                                   "cmpxchg4.acq %0 = [%1], %3, ar.ccv\n\t"
1033                                   : "=r" (old) : "r" (dest), "r" (real_comp), "r" (exch));
1034 #endif
1035
1036         return(old);
1037 }
1038
1039 static inline gpointer InterlockedCompareExchangePointer(gpointer volatile *dest,
1040                                                 gpointer exch, gpointer comp)
1041 {
1042         gpointer old;
1043
1044 #ifdef __INTEL_COMPILER
1045         old = _InterlockedCompareExchangePointer (dest, exch, comp);
1046 #else
1047         asm volatile ("mov ar.ccv = %2 ;;\n\t"
1048                                   "cmpxchg8.acq %0 = [%1], %3, ar.ccv\n\t"
1049                                   : "=r" (old) : "r" (dest), "r" (comp), "r" (exch));
1050 #endif
1051
1052         return(old);
1053 }
1054
1055 static inline gint32 InterlockedIncrement(gint32 volatile *val)
1056 {
1057 #ifdef __INTEL_COMPILER
1058         return _InterlockedIncrement (val);
1059 #else
1060         gint32 old;
1061
1062         do {
1063                 old = *val;
1064         } while (InterlockedCompareExchange (val, old + 1, old) != old);
1065
1066         return old + 1;
1067 #endif
1068 }
1069
1070 static inline gint32 InterlockedDecrement(gint32 volatile *val)
1071 {
1072 #ifdef __INTEL_COMPILER
1073         return _InterlockedDecrement (val);
1074 #else
1075         gint32 old;
1076
1077         do {
1078                 old = *val;
1079         } while (InterlockedCompareExchange (val, old - 1, old) != old);
1080
1081         return old - 1;
1082 #endif
1083 }
1084
1085 static inline gint32 InterlockedExchange(gint32 volatile *dest, gint32 new_val)
1086 {
1087 #ifdef __INTEL_COMPILER
1088         return _InterlockedExchange (dest, new_val);
1089 #else
1090         gint32 res;
1091
1092         do {
1093                 res = *dest;
1094         } while (InterlockedCompareExchange (dest, new_val, res) != res);
1095
1096         return res;
1097 #endif
1098 }
1099
1100 static inline gpointer InterlockedExchangePointer(gpointer volatile *dest, gpointer new_val)
1101 {
1102 #ifdef __INTEL_COMPILER
1103         return (gpointer)_InterlockedExchange64 ((gint64*)dest, (gint64)new_val);
1104 #else
1105         gpointer res;
1106
1107         do {
1108                 res = *dest;
1109         } while (InterlockedCompareExchangePointer (dest, new_val, res) != res);
1110
1111         return res;
1112 #endif
1113 }
1114
1115 static inline gint32 InterlockedExchangeAdd(gint32 volatile *val, gint32 add)
1116 {
1117         gint32 old;
1118
1119 #ifdef __INTEL_COMPILER
1120         old = _InterlockedExchangeAdd (val, add);
1121 #else
1122         do {
1123                 old = *val;
1124         } while (InterlockedCompareExchange (val, old + add, old) != old);
1125
1126         return old;
1127 #endif
1128 }
1129
1130 #elif defined(__mips__)
1131
1132 #if SIZEOF_REGISTER == 8
1133 #error "Not implemented."
1134 #endif
1135
1136 static inline gint32 InterlockedIncrement(volatile gint32 *val)
1137 {
1138         gint32 tmp, result = 0;
1139
1140         __asm__ __volatile__ ("    .set    mips32\n"
1141                               "1:  ll      %0, %2\n"
1142                               "    addu    %1, %0, 1\n"
1143                               "    sc      %1, %2\n"
1144                               "    beqz    %1, 1b\n"
1145                               "    .set    mips0\n"
1146                               : "=&r" (result), "=&r" (tmp), "=m" (*val)
1147                               : "m" (*val));
1148         return result + 1;
1149 }
1150
1151 static inline gint32 InterlockedDecrement(volatile gint32 *val)
1152 {
1153         gint32 tmp, result = 0;
1154
1155         __asm__ __volatile__ ("    .set    mips32\n"
1156                               "1:  ll      %0, %2\n"
1157                               "    subu    %1, %0, 1\n"
1158                               "    sc      %1, %2\n"
1159                               "    beqz    %1, 1b\n"
1160                               "    .set    mips0\n"
1161                               : "=&r" (result), "=&r" (tmp), "=m" (*val)
1162                               : "m" (*val));
1163         return result - 1;
1164 }
1165
1166 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
1167                                                 gint32 exch, gint32 comp) {
1168         gint32 old, tmp;
1169
1170         __asm__ __volatile__ ("    .set    mips32\n"
1171                               "1:  ll      %0, %2\n"
1172                               "    bne     %0, %5, 2f\n"
1173                               "    move    %1, %4\n"
1174                               "    sc      %1, %2\n"
1175                               "    beqz    %1, 1b\n"
1176                               "2:  .set    mips0\n"
1177                               : "=&r" (old), "=&r" (tmp), "=m" (*dest)
1178                               : "m" (*dest), "r" (exch), "r" (comp));
1179         return(old);
1180 }
1181
1182 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
1183 {
1184         return (gpointer)(InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp)));
1185 }
1186
1187 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
1188 {
1189         gint32 result, tmp;
1190
1191         __asm__ __volatile__ ("    .set    mips32\n"
1192                               "1:  ll      %0, %2\n"
1193                               "    move    %1, %4\n"
1194                               "    sc      %1, %2\n"
1195                               "    beqz    %1, 1b\n"
1196                               "    .set    mips0\n"
1197                               : "=&r" (result), "=&r" (tmp), "=m" (*dest)
1198                               : "m" (*dest), "r" (exch));
1199         return(result);
1200 }
1201
1202 static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch)
1203 {
1204         return (gpointer)InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch));
1205 }
1206
1207 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
1208 {
1209         gint32 result, tmp;
1210
1211         __asm__ __volatile__ ("    .set    mips32\n"
1212                               "1:  ll      %0, %2\n"
1213                               "    addu    %1, %0, %4\n"
1214                               "    sc      %1, %2\n"
1215                               "    beqz    %1, 1b\n"
1216                               "    .set    mips0\n"
1217                               : "=&r" (result), "=&r" (tmp), "=m" (*dest)
1218                               : "m" (*dest), "r" (add));
1219         return result;
1220 }
1221
1222 #else
1223
1224 #define WAPI_NO_ATOMIC_ASM
1225
1226 extern gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp);
1227 extern gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp);
1228 extern gint32 InterlockedIncrement(volatile gint32 *dest);
1229 extern gint32 InterlockedDecrement(volatile gint32 *dest);
1230 extern gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch);
1231 extern gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch);
1232 extern gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add);
1233
1234 #endif
1235
1236 #endif /* _WAPI_ATOMIC_H_ */