Merge remote-tracking branch 'upstream/master'
[mono.git] / mono / utils / atomic.h
1 /*
2  * atomic.h:  Atomic operations
3  *
4  * Author:
5  *      Dick Porter (dick@ximian.com)
6  *
7  * (C) 2002 Ximian, Inc.
8  * Copyright 2012 Xamarin Inc
9  */
10
11 #ifndef _WAPI_ATOMIC_H_
12 #define _WAPI_ATOMIC_H_
13
14 #if defined(__NetBSD__)
15 #include <sys/param.h>
16
17 #if __NetBSD_Version__ > 499004000
18 #include <sys/atomic.h>
19 #define HAVE_ATOMIC_OPS
20 #endif
21
22 #endif
23
24 #include <glib.h>
25
26 #if defined(__WIN32__) || defined(_WIN32)
27
28 #include <windows.h>
29
30 #elif defined(__NetBSD__) && defined(HAVE_ATOMIC_OPS)
31
32 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
33        gint32 exch, gint32 comp)
34 {
35        return atomic_cas_32((uint32_t*)dest, comp, exch);
36 }
37
38 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
39 {
40        return atomic_cas_ptr(dest, comp, exch);
41 }
42
43 static inline gint32 InterlockedIncrement(volatile gint32 *val)
44 {
45        return atomic_inc_32_nv((uint32_t*)val);
46 }
47
48 static inline gint32 InterlockedDecrement(volatile gint32 *val)
49 {
50        return atomic_dec_32_nv((uint32_t*)val);
51 }
52
53 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
54 {
55        return atomic_swap_32((uint32_t*)val, new_val);
56 }
57
58 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
59                gpointer new_val)
60 {
61        return atomic_swap_ptr(val, new_val);
62 }
63
64 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
65 {
66        return atomic_add_32_nv((uint32_t*)val, add) - add;
67 }
68
69 #elif defined(__i386__) || defined(__x86_64__)
70
71 /*
72  * NB: The *Pointer() functions here assume that
73  * sizeof(pointer)==sizeof(gint32)
74  *
75  * NB2: These asm functions assume 486+ (some of the opcodes dont
76  * exist on 386).  If this becomes an issue, we can get configure to
77  * fall back to the non-atomic C versions of these calls.
78  */
79
80 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
81                                                 gint32 exch, gint32 comp)
82 {
83         gint32 old;
84
85         __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
86                               : "=m" (*dest), "=a" (old)
87                               : "r" (exch), "m" (*dest), "a" (comp));   
88         return(old);
89 }
90
91 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
92 {
93         gpointer old;
94
95         __asm__ __volatile__ ("lock; "
96 #if defined(__x86_64__)  && !defined(__native_client__)
97                               "cmpxchgq"
98 #else
99                               "cmpxchgl"
100 #endif
101                               " %2, %0"
102                               : "=m" (*dest), "=a" (old)
103                               : "r" (exch), "m" (*dest), "a" (comp));   
104
105         return(old);
106 }
107
108 static inline gint32 InterlockedIncrement(volatile gint32 *val)
109 {
110         gint32 tmp;
111         
112         __asm__ __volatile__ ("lock; xaddl %0, %1"
113                               : "=r" (tmp), "=m" (*val)
114                               : "0" (1), "m" (*val));
115
116         return(tmp+1);
117 }
118
119 static inline gint32 InterlockedDecrement(volatile gint32 *val)
120 {
121         gint32 tmp;
122         
123         __asm__ __volatile__ ("lock; xaddl %0, %1"
124                               : "=r" (tmp), "=m" (*val)
125                               : "0" (-1), "m" (*val));
126
127         return(tmp-1);
128 }
129
130 /*
131  * See
132  * http://msdn.microsoft.com/msdnmag/issues/0700/Win32/
133  * for the reasons for using cmpxchg and a loop here.
134  */
135 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
136 {
137         gint32 ret;
138
139         __asm__ __volatile__ ("1:; lock; cmpxchgl %2, %0; jne 1b"
140                               : "=m" (*val), "=a" (ret)
141                               : "r" (new_val), "m" (*val), "a" (*val));
142         return(ret);
143 }
144
145 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
146                                                   gpointer new_val)
147 {
148         gpointer ret;
149         
150         __asm__ __volatile__ ("1:; lock; "
151 #if defined(__x86_64__)  && !defined(__native_client__)
152                               "cmpxchgq"
153 #else
154                               "cmpxchgl"
155 #endif
156                               " %2, %0; jne 1b"
157                               : "=m" (*val), "=a" (ret)
158                               : "r" (new_val), "m" (*val), "a" (*val));
159
160         return(ret);
161 }
162
163 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
164 {
165         gint32 ret;
166         
167         __asm__ __volatile__ ("lock; xaddl %0, %1"
168                               : "=r" (ret), "=m" (*val)
169                               : "0" (add), "m" (*val));
170         
171         return(ret);
172 }
173
174 #elif (defined(sparc) || defined (__sparc__)) && defined(__GNUC__)
175
176 G_GNUC_UNUSED 
177 static inline gint32 InterlockedCompareExchange(volatile gint32 *_dest, gint32 _exch, gint32 _comp)
178 {
179        register volatile gint32 *dest asm("g1") = _dest;
180        register gint32 comp asm("o4") = _comp;
181        register gint32 exch asm("o5") = _exch;
182
183        __asm__ __volatile__(
184                /* cas [%%g1], %%o4, %%o5 */
185                ".word 0xdbe0500c"
186                : "=r" (exch)
187                : "0" (exch), "r" (dest), "r" (comp)
188                : "memory");
189
190        return exch;
191 }
192
193 G_GNUC_UNUSED 
194 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *_dest, gpointer _exch, gpointer _comp)
195 {
196        register volatile gpointer *dest asm("g1") = _dest;
197        register gpointer comp asm("o4") = _comp;
198        register gpointer exch asm("o5") = _exch;
199
200        __asm__ __volatile__(
201 #ifdef SPARCV9
202                /* casx [%%g1], %%o4, %%o5 */
203                ".word 0xdbf0500c"
204 #else
205                /* cas [%%g1], %%o4, %%o5 */
206                ".word 0xdbe0500c"
207 #endif
208                : "=r" (exch)
209                : "0" (exch), "r" (dest), "r" (comp)
210                : "memory");
211
212        return exch;
213 }
214
215 G_GNUC_UNUSED 
216 static inline gint32 InterlockedIncrement(volatile gint32 *_dest)
217 {
218        register volatile gint32 *dest asm("g1") = _dest;
219        register gint32 tmp asm("o4");
220        register gint32 ret asm("o5");
221
222        __asm__ __volatile__(
223                "1:     ld      [%%g1], %%o4\n\t"
224                "       add     %%o4, 1, %%o5\n\t"
225                /*      cas     [%%g1], %%o4, %%o5 */
226                "       .word   0xdbe0500c\n\t"
227                "       cmp     %%o4, %%o5\n\t"
228                "       bne     1b\n\t"
229                "        add    %%o5, 1, %%o5"
230                : "=&r" (tmp), "=&r" (ret)
231                : "r" (dest)
232                : "memory", "cc");
233
234         return ret;
235 }
236
237 G_GNUC_UNUSED 
238 static inline gint32 InterlockedDecrement(volatile gint32 *_dest)
239 {
240        register volatile gint32 *dest asm("g1") = _dest;
241        register gint32 tmp asm("o4");
242        register gint32 ret asm("o5");
243
244        __asm__ __volatile__(
245                "1:     ld      [%%g1], %%o4\n\t"
246                "       sub     %%o4, 1, %%o5\n\t"
247                /*      cas     [%%g1], %%o4, %%o5 */
248                "       .word   0xdbe0500c\n\t"
249                "       cmp     %%o4, %%o5\n\t"
250                "       bne     1b\n\t"
251                "        sub    %%o5, 1, %%o5"
252                : "=&r" (tmp), "=&r" (ret)
253                : "r" (dest)
254                : "memory", "cc");
255
256         return ret;
257 }
258
259 G_GNUC_UNUSED
260 static inline gint32 InterlockedExchange(volatile gint32 *_dest, gint32 exch)
261 {
262        register volatile gint32 *dest asm("g1") = _dest;
263        register gint32 tmp asm("o4");
264        register gint32 ret asm("o5");
265
266        __asm__ __volatile__(
267                "1:     ld      [%%g1], %%o4\n\t"
268                "       mov     %3, %%o5\n\t"
269                /*      cas     [%%g1], %%o4, %%o5 */
270                "       .word   0xdbe0500c\n\t"
271                "       cmp     %%o4, %%o5\n\t"
272                "       bne     1b\n\t"
273                "        nop"
274                : "=&r" (tmp), "=&r" (ret)
275                : "r" (dest), "r" (exch)
276                : "memory", "cc");
277
278         return ret;
279 }
280
281 G_GNUC_UNUSED
282 static inline gpointer InterlockedExchangePointer(volatile gpointer *_dest, gpointer exch)
283 {
284        register volatile gpointer *dest asm("g1") = _dest;
285        register gpointer tmp asm("o4");
286        register gpointer ret asm("o5");
287
288        __asm__ __volatile__(
289 #ifdef SPARCV9
290                "1:     ldx     [%%g1], %%o4\n\t"
291 #else
292                "1:     ld      [%%g1], %%o4\n\t"
293 #endif
294                "       mov     %3, %%o5\n\t"
295 #ifdef SPARCV9
296                /*      casx    [%%g1], %%o4, %%o5 */
297                "       .word   0xdbf0500c\n\t"
298 #else
299                /*      cas     [%%g1], %%o4, %%o5 */
300                "       .word   0xdbe0500c\n\t"
301 #endif
302                "       cmp     %%o4, %%o5\n\t"
303                "       bne     1b\n\t"
304                "        nop"
305                : "=&r" (tmp), "=&r" (ret)
306                : "r" (dest), "r" (exch)
307                : "memory", "cc");
308
309         return ret;
310 }
311
312 G_GNUC_UNUSED
313 static inline gint32 InterlockedExchangeAdd(volatile gint32 *_dest, gint32 add)
314 {
315        register volatile gint32 *dest asm("g1") = _dest;
316        register gint32 tmp asm("o4");
317        register gint32 ret asm("o5");
318
319        __asm__ __volatile__(
320                "1:     ld      [%%g1], %%o4\n\t"
321                "       add     %%o4, %3, %%o5\n\t"
322                /*      cas     [%%g1], %%o4, %%o5 */
323                "       .word   0xdbe0500c\n\t"
324                "       cmp     %%o4, %%o5\n\t"
325                "       bne     1b\n\t"
326                "        add    %%o5, %3, %%o5"
327                : "=&r" (tmp), "=&r" (ret)
328                : "r" (dest), "r" (add)
329                : "memory", "cc");
330
331         return ret;
332 }
333
334 #elif __s390x__
335
336 static inline gint32 
337 InterlockedCompareExchange(volatile gint32 *dest,
338                            gint32 exch, gint32 comp)
339 {
340         gint32 old;
341
342         __asm__ __volatile__ ("\tLA\t1,%0\n"
343                               "\tLR\t%1,%3\n"
344                               "\tCS\t%1,%2,0(1)\n"
345                               : "+m" (*dest), "=&r" (old)
346                               : "r" (exch), "r" (comp)
347                               : "1", "cc");     
348         return(old);
349 }
350
351 static inline gpointer 
352 InterlockedCompareExchangePointer(volatile gpointer *dest, 
353                                   gpointer exch, 
354                                   gpointer comp)
355 {
356         gpointer old;
357
358         __asm__ __volatile__ ("\tLA\t1,%0\n"
359                               "\tLGR\t%1,%3\n"
360                               "\tCSG\t%1,%2,0(1)\n"
361                               : "+m" (*dest), "=&r" (old)
362                               : "r" (exch), "r" (comp)
363                               : "1", "cc");
364
365         return(old);
366 }
367
368 static inline gint32 
369 InterlockedIncrement(volatile gint32 *val)
370 {
371         gint32 tmp;
372         
373         __asm__ __volatile__ ("\tLA\t2,%1\n"
374                               "0:\tLGF\t%0,%1\n"
375                               "\tLGFR\t1,%0\n"
376                               "\tAGHI\t1,1\n"
377                               "\tCS\t%0,1,0(2)\n"
378                               "\tJNZ\t0b\n"
379                               "\tLGFR\t%0,1"
380                               : "=r" (tmp), "+m" (*val)
381                               : : "1", "2", "cc");
382
383         return(tmp);
384 }
385
386 static inline gint32 
387 InterlockedDecrement(volatile gint32 *val)
388 {
389         gint32 tmp;
390         
391         __asm__ __volatile__ ("\tLA\t2,%1\n"
392                               "0:\tLGF\t%0,%1\n"
393                               "\tLGFR\t1,%0\n"
394                               "\tAGHI\t1,-1\n"
395                               "\tCS\t%0,1,0(2)\n"
396                               "\tJNZ\t0b\n"
397                               "\tLGFR\t%0,1"
398                               : "=r" (tmp), "+m" (*val)
399                               : : "1", "2", "cc");
400
401         return(tmp);
402 }
403
404 static inline gint32 
405 InterlockedExchange(volatile gint32 *val, gint32 new_val)
406 {
407         gint32 ret;
408         
409         __asm__ __volatile__ ("\tLA\t1,%0\n"
410                               "0:\tL\t%1,%0\n"
411                               "\tCS\t%1,%2,0(1)\n"
412                               "\tJNZ\t0b"
413                               : "+m" (*val), "=&r" (ret)
414                               : "r" (new_val)
415                               : "1", "cc");
416
417         return(ret);
418 }
419
420 static inline gpointer
421 InterlockedExchangePointer(volatile gpointer *val, gpointer new_val)
422 {
423         gpointer ret;
424         
425         __asm__ __volatile__ ("\tLA\t1,%0\n"
426                               "0:\tLG\t%1,%0\n"
427                               "\tCSG\t%1,%2,0(1)\n"
428                               "\tJNZ\t0b"
429                               : "+m" (*val), "=&r" (ret)
430                               : "r" (new_val)
431                               : "1", "cc");
432
433         return(ret);
434 }
435
436 static inline gint32 
437 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
438 {
439         gint32 ret;
440
441         __asm__ __volatile__ ("\tLA\t2,%1\n"
442                               "0:\tLGF\t%0,%1\n"
443                               "\tLGFR\t1,%0\n"
444                               "\tAGR\t1,%2\n"
445                               "\tCS\t%0,1,0(2)\n"
446                               "\tJNZ\t0b"
447                               : "=&r" (ret), "+m" (*val)
448                               : "r" (add) 
449                               : "1", "2", "cc");
450         
451         return(ret);
452 }
453
454 #elif defined(__mono_ppc__)
455
456 #ifdef G_COMPILER_CODEWARRIOR
457 static inline gint32 InterlockedIncrement(volatile register gint32 *val)
458 {
459         gint32 result = 0, tmp;
460         register gint32 result = 0;
461         register gint32 tmp;
462
463         asm
464         {
465                 @1:
466                         lwarx   tmp, 0, val
467                         addi    result, tmp, 1
468                         stwcx.  result, 0, val
469                         bne-    @1
470         }
471  
472         return result;
473 }
474
475 static inline gint32 InterlockedDecrement(register volatile gint32 *val)
476 {
477         register gint32 result = 0;
478         register gint32 tmp;
479
480         asm
481         {
482                 @1:
483                         lwarx   tmp, 0, val
484                         addi    result, tmp, -1
485                         stwcx.  result, 0, val
486                         bne-    @1
487         }
488
489         return result;
490 }
491 #define InterlockedCompareExchangePointer(dest,exch,comp) (void*)InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp))
492
493 static inline gint32 InterlockedCompareExchange(volatile register gint32 *dest, register gint32 exch, register gint32 comp)
494 {
495         register gint32 tmp = 0;
496
497         asm
498         {
499                 @1:
500                         lwarx   tmp, 0, dest
501                         cmpw    tmp, comp
502                         bne-    @2
503                         stwcx.  exch, 0, dest
504                         bne-    @1
505                 @2:
506         }
507
508         return tmp;
509 }
510 static inline gint32 InterlockedExchange(register volatile gint32 *dest, register gint32 exch)
511 {
512         register gint32 tmp = 0;
513
514         asm
515         {
516                 @1:
517                         lwarx   tmp, 0, dest
518                         stwcx.  exch, 0, dest
519                         bne-    @1
520         }
521
522         return tmp;
523 }
524 #define InterlockedExchangePointer(dest,exch) (void*)InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch))
525 #else
526
527 #if defined(__mono_ppc64__) && !defined(__mono_ilp32__)
528 #define LDREGX "ldarx"
529 #define STREGCXD "stdcx."
530 #define CMPREG "cmpd"
531 #else
532 #define LDREGX "lwarx"
533 #define STREGCXD "stwcx."
534 #define CMPREG "cmpw"
535 #endif
536
537 static inline gint32 InterlockedIncrement(volatile gint32 *val)
538 {
539         gint32 result = 0, tmp;
540
541         __asm__ __volatile__ ("\n1:\n\t"
542                               "lwarx  %0, 0, %2\n\t"
543                               "addi   %1, %0, 1\n\t"
544                               "stwcx. %1, 0, %2\n\t"
545                               "bne-   1b"
546                               : "=&b" (result), "=&b" (tmp): "r" (val): "cc", "memory");
547         return result + 1;
548 }
549
550 static inline gint32 InterlockedDecrement(volatile gint32 *val)
551 {
552         gint32 result = 0, tmp;
553
554         __asm__ __volatile__ ("\n1:\n\t"
555                               "lwarx  %0, 0, %2\n\t"
556                               "addi   %1, %0, -1\n\t"
557                               "stwcx. %1, 0, %2\n\t"
558                               "bne-   1b"
559                               : "=&b" (result), "=&b" (tmp): "r" (val): "cc", "memory");
560         return result - 1;
561 }
562
563 static inline gpointer InterlockedCompareExchangePointer (volatile gpointer *dest,
564                                                 gpointer exch, gpointer comp)
565 {
566         gpointer tmp = NULL;
567
568         __asm__ __volatile__ ("\n1:\n\t"
569                              LDREGX " %0, 0, %1\n\t"
570                              CMPREG " %0, %2\n\t" 
571                              "bne-    2f\n\t"
572                              STREGCXD " %3, 0, %1\n\t"
573                              "bne-    1b\n"
574                              "2:"
575                              : "=&r" (tmp)
576                              : "b" (dest), "r" (comp), "r" (exch): "cc", "memory");
577         return(tmp);
578 }
579
580 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
581                                                 gint32 exch, gint32 comp) {
582         gint32 tmp = 0;
583
584         __asm__ __volatile__ ("\n1:\n\t"
585                              "lwarx   %0, 0, %1\n\t"
586                              "cmpw    %0, %2\n\t" 
587                              "bne-    2f\n\t"
588                              "stwcx.  %3, 0, %1\n\t"
589                              "bne-    1b\n"
590                              "2:"
591                              : "=&r" (tmp)
592                              : "b" (dest), "r" (comp), "r" (exch): "cc", "memory");
593         return(tmp);
594 }
595
596 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
597 {
598         gint32 tmp = 0;
599
600         __asm__ __volatile__ ("\n1:\n\t"
601                               "lwarx  %0, 0, %2\n\t"
602                               "stwcx. %3, 0, %2\n\t"
603                               "bne    1b"
604                               : "=r" (tmp) : "0" (tmp), "b" (dest), "r" (exch): "cc", "memory");
605         return(tmp);
606 }
607
608 static inline gpointer InterlockedExchangePointer (volatile gpointer *dest, gpointer exch)
609 {
610         gpointer tmp = NULL;
611
612         __asm__ __volatile__ ("\n1:\n\t"
613                               LDREGX " %0, 0, %2\n\t"
614                               STREGCXD " %3, 0, %2\n\t"
615                               "bne    1b"
616                               : "=r" (tmp) : "0" (tmp), "b" (dest), "r" (exch): "cc", "memory");
617         return(tmp);
618 }
619
620 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
621 {
622         gint32 result, tmp;
623         __asm__ __volatile__ ("\n1:\n\t"
624                               "lwarx  %0, 0, %2\n\t"
625                               "add    %1, %0, %3\n\t"
626                               "stwcx. %1, 0, %2\n\t"
627                               "bne    1b"
628                               : "=&r" (result), "=&r" (tmp)
629                               : "r" (dest), "r" (add) : "cc", "memory");
630         return(result);
631 }
632
633 #undef LDREGX
634 #undef STREGCXD
635 #undef CMPREG
636
637 #endif /* !G_COMPILER_CODEWARRIOR */
638
639 #elif defined(__arm__)
640
641 #ifdef __native_client__
642 #define MASK_REGISTER(reg, cond) "bic" cond " " reg ", " reg ", #0xc0000000\n"
643 #define NACL_ALIGN() ".align 4\n"
644 #else
645 #define MASK_REGISTER(reg, cond)
646 #define NACL_ALIGN()
647 #endif
648
649 /*
650  * Atomic operations on ARM doesn't contain memory barriers, and the runtime code
651  * depends on this, so we add them explicitly.
652  */
653
654 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp)
655 {
656 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
657         gint32 ret, tmp;
658         __asm__ __volatile__ (  "1:\n"
659                                 NACL_ALIGN()
660                                 "dmb\n"
661                                 "mov    %0, #0\n"
662                                 NACL_ALIGN()
663                                 MASK_REGISTER("%2", "al")
664                                 "ldrex %1, [%2]\n"
665                                 "teq    %1, %3\n"
666                                 "it eq\n"
667                                 NACL_ALIGN()
668                                 MASK_REGISTER("%2", "eq")
669                                 "strexeq %0, %4, [%2]\n"
670                                 "teq %0, #0\n"
671                                 "bne 1b\n"
672                                 "dmb\n"
673                                 : "=&r" (tmp), "=&r" (ret)
674                                 : "r" (dest), "r" (comp), "r" (exch)
675                                 : "memory", "cc");
676
677         return ret;
678 #else
679         gint32 a, b;
680
681         __asm__ __volatile__ (    "0:\n\t"
682                                   NACL_ALIGN()
683                                   MASK_REGISTER("%2", "al")
684                                   "ldr %1, [%2]\n\t"
685                                   "cmp %1, %4\n\t"
686                                   "mov %0, %1\n\t"
687                                   "bne 1f\n\t"
688                                   NACL_ALIGN()
689                                   MASK_REGISTER("%2", "al")
690                                   "swp %0, %3, [%2]\n\t"
691                                   "cmp %0, %1\n\t"
692                                   NACL_ALIGN()
693                                   MASK_REGISTER("%2", "ne")
694                                   "swpne %3, %0, [%2]\n\t"
695                                   "bne 0b\n\t"
696                                   "1:"
697                                   : "=&r" (a), "=&r" (b)
698                                   : "r" (dest), "r" (exch), "r" (comp)
699                                   : "cc", "memory");
700
701         return a;
702 #endif
703 }
704
705 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
706 {
707 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
708         gpointer ret, tmp;
709         __asm__ __volatile__ (
710                                 "dmb\n"
711                                 "1:\n"
712                                 NACL_ALIGN()
713                                 "mov    %0, #0\n"
714                                 NACL_ALIGN()
715                                 MASK_REGISTER("%2", "al")
716                                 "ldrex %1, [%2]\n"
717                                 "teq    %1, %3\n"
718                                 "it eq\n"
719                                 NACL_ALIGN()
720                                 MASK_REGISTER("%2", "eq")
721                                 "strexeq %0, %4, [%2]\n"
722                                 "teq %0, #0\n"
723                                 "bne 1b\n"
724                                 "dmb\n"
725                                 : "=&r" (tmp), "=&r" (ret)
726                                 : "r" (dest), "r" (comp), "r" (exch)
727                                 : "memory", "cc");
728
729         return ret;
730 #else
731         gpointer a, b;
732
733         __asm__ __volatile__ (    "0:\n\t"
734                                   NACL_ALIGN()
735                                   MASK_REGISTER("%2", "al")
736                                   "ldr %1, [%2]\n\t"
737                                   "cmp %1, %4\n\t"
738                                   "mov %0, %1\n\t"
739                                   "bne 1f\n\t"
740                                   NACL_ALIGN()
741                                   MASK_REGISTER("%2", "eq")
742                                   "swpeq %0, %3, [%2]\n\t"
743                                   "cmp %0, %1\n\t"
744                                   NACL_ALIGN()
745                                   MASK_REGISTER("%2", "ne")
746                                   "swpne %3, %0, [%2]\n\t"
747                                   "bne 0b\n\t"
748                                   "1:"
749                                   : "=&r" (a), "=&r" (b)
750                                   : "r" (dest), "r" (exch), "r" (comp)
751                                   : "cc", "memory");
752
753         return a;
754 #endif
755 }
756
757 static inline gint32 InterlockedIncrement(volatile gint32 *dest)
758 {
759 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
760         gint32 ret, flag;
761         __asm__ __volatile__ (
762                                 "dmb\n"
763                                 "1:\n"
764                                 NACL_ALIGN()
765                                 MASK_REGISTER("%2", "al")
766                                 "ldrex %0, [%2]\n"
767                                 "add %0, %0, %3\n"
768                                 NACL_ALIGN()
769                                 MASK_REGISTER("%2", "al")
770                                 "strex %1, %0, [%2]\n"
771                                 "teq %1, #0\n"
772                                 "bne 1b\n"
773                                 "dmb\n"
774                                 : "=&r" (ret), "=&r" (flag)
775                                 : "r" (dest), "r" (1)
776                                 : "memory", "cc");
777
778         return ret;
779 #else
780         gint32 a, b, c;
781
782         __asm__ __volatile__ (  "0:\n\t"
783                                 NACL_ALIGN()
784                                 MASK_REGISTER("%3", "al")
785                                 "ldr %0, [%3]\n\t"
786                                 "add %1, %0, %4\n\t"
787                                 NACL_ALIGN()
788                                 MASK_REGISTER("%3", "al")
789                                 "swp %2, %1, [%3]\n\t"
790                                 "cmp %0, %2\n\t"
791                                 NACL_ALIGN()
792                                 MASK_REGISTER("%3", "ne")
793                                 "swpne %1, %2, [%3]\n\t"
794                                 "bne 0b"
795                                 : "=&r" (a), "=&r" (b), "=&r" (c)
796                                 : "r" (dest), "r" (1)
797                                 : "cc", "memory");
798
799         return b;
800 #endif
801 }
802
803 static inline gint32 InterlockedDecrement(volatile gint32 *dest)
804 {
805 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
806         gint32 ret, flag;
807         __asm__ __volatile__ (
808                                 "dmb\n"
809                                 "1:\n"
810                                 NACL_ALIGN()
811                                 MASK_REGISTER("%2", "al")
812                                 "ldrex %0, [%2]\n"
813                                 "sub %0, %0, %3\n"
814                                 NACL_ALIGN()
815                                 MASK_REGISTER("%2", "al")
816                                 "strex %1, %0, [%2]\n"
817                                 "teq %1, #0\n"
818                                 "bne 1b\n"
819                                 "dmb\n"
820                                 : "=&r" (ret), "=&r" (flag)
821                                 : "r" (dest), "r" (1)
822                                 : "memory", "cc");
823
824         return ret;
825 #else
826         gint32 a, b, c;
827
828         __asm__ __volatile__ (  "0:\n\t"
829                                 NACL_ALIGN()
830                                 MASK_REGISTER("%3", "al")
831                                 "ldr %0, [%3]\n\t"
832                                 "add %1, %0, %4\n\t"
833                                 NACL_ALIGN()
834                                 MASK_REGISTER("%3", "al")
835                                 "swp %2, %1, [%3]\n\t"
836                                 "cmp %0, %2\n\t"
837                                 NACL_ALIGN()
838                                 MASK_REGISTER("%3", "ne")
839                                 "swpne %1, %2, [%3]\n\t"
840                                 "bne 0b"
841                                 : "=&r" (a), "=&r" (b), "=&r" (c)
842                                 : "r" (dest), "r" (-1)
843                                 : "cc", "memory");
844
845         return b;
846 #endif
847 }
848
849 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
850 {
851 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
852         gint32 ret, flag;
853         __asm__ __volatile__ (
854                                   "dmb\n"
855                               "1:\n"
856                               NACL_ALIGN()
857                               MASK_REGISTER("%3", "al")
858                               "ldrex %0, [%3]\n"
859                               NACL_ALIGN()
860                               MASK_REGISTER("%3", "al")
861                               "strex %1, %2, [%3]\n"
862                               "teq %1, #0\n"
863                               "bne 1b\n"
864                                   "dmb\n"
865                               : "=&r" (ret), "=&r" (flag)
866                               : "r" (exch), "r" (dest)
867                               : "memory", "cc");
868         return ret;
869 #else
870         gint32 a;
871
872         __asm__ __volatile__ (  NACL_ALIGN()
873                                 MASK_REGISTER("%1", "al")
874                                 "swp %0, %2, [%1]"
875                                 : "=&r" (a)
876                                 : "r" (dest), "r" (exch));
877
878         return a;
879 #endif
880 }
881
882 static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch)
883 {
884 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
885         gpointer ret, flag;
886         __asm__ __volatile__ (
887                                   "dmb\n"
888                               "1:\n"
889                               NACL_ALIGN()
890                               MASK_REGISTER("%3", "al")
891                               "ldrex %0, [%3]\n"
892                               NACL_ALIGN()
893                               MASK_REGISTER("%3", "al")
894                               "strex %1, %2, [%3]\n"
895                               "teq %1, #0\n"
896                               "bne 1b\n"
897                                   "dmb\n"
898                               : "=&r" (ret), "=&r" (flag)
899                               : "r" (exch), "r" (dest)
900                               : "memory", "cc");
901         return ret;
902 #else
903         gpointer a;
904
905         __asm__ __volatile__ (  NACL_ALIGN()
906                                 MASK_REGISTER("%1", "al")
907                                 "swp %0, %2, [%1]"
908                                 : "=&r" (a)
909                                 : "r" (dest), "r" (exch));
910
911         return a;
912 #endif
913 }
914
915 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
916 {
917 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
918         gint32 ret, tmp, flag;
919         __asm__ __volatile__ (
920                                 "dmb\n"
921                                 "1:\n"
922                                 NACL_ALIGN()
923                                 MASK_REGISTER("%3", "al")
924                                 "ldrex %0, [%3]\n"
925                                 "add %1, %0, %4\n"
926                                 NACL_ALIGN()
927                                 MASK_REGISTER("%3", "al")
928                                 "strex %2, %1, [%3]\n"
929                                 "teq %2, #0\n"
930                                 "bne 1b\n"
931                                 "dmb\n"
932                                 : "=&r" (ret), "=&r" (tmp), "=&r" (flag)
933                                 : "r" (dest), "r" (add)
934                                 : "memory", "cc");
935
936         return ret;
937 #else
938         int a, b, c;
939
940         __asm__ __volatile__ (  "0:\n\t"
941                                 NACL_ALIGN()
942                                 MASK_REGISTER("%3", "al")
943                                 "ldr %0, [%3]\n\t"
944                                 "add %1, %0, %4\n\t"
945                                 NACL_ALIGN()
946                                 MASK_REGISTER("%3", "al")
947                                 "swp %2, %1, [%3]\n\t"
948                                 "cmp %0, %2\n\t"
949                                 NACL_ALIGN()
950                                 MASK_REGISTER("%3", "ne")
951                                 "swpne %1, %2, [%3]\n\t"
952                                 "bne 0b"
953                                 : "=&r" (a), "=&r" (b), "=&r" (c)
954                                 : "r" (dest), "r" (add)
955                                 : "cc", "memory");
956
957         return a;
958 #endif
959 }
960
961 #elif defined(__ia64__)
962
963 #ifdef __INTEL_COMPILER
964 #include <ia64intrin.h>
965 #endif
966
967 static inline gint32 InterlockedCompareExchange(gint32 volatile *dest,
968                                                 gint32 exch, gint32 comp)
969 {
970         gint32 old;
971         guint64 real_comp;
972
973 #ifdef __INTEL_COMPILER
974         old = _InterlockedCompareExchange (dest, exch, comp);
975 #else
976         /* cmpxchg4 zero extends the value read from memory */
977         real_comp = (guint64)(guint32)comp;
978         asm volatile ("mov ar.ccv = %2 ;;\n\t"
979                                   "cmpxchg4.acq %0 = [%1], %3, ar.ccv\n\t"
980                                   : "=r" (old) : "r" (dest), "r" (real_comp), "r" (exch));
981 #endif
982
983         return(old);
984 }
985
986 static inline gpointer InterlockedCompareExchangePointer(gpointer volatile *dest,
987                                                 gpointer exch, gpointer comp)
988 {
989         gpointer old;
990
991 #ifdef __INTEL_COMPILER
992         old = _InterlockedCompareExchangePointer (dest, exch, comp);
993 #else
994         asm volatile ("mov ar.ccv = %2 ;;\n\t"
995                                   "cmpxchg8.acq %0 = [%1], %3, ar.ccv\n\t"
996                                   : "=r" (old) : "r" (dest), "r" (comp), "r" (exch));
997 #endif
998
999         return(old);
1000 }
1001
1002 static inline gint32 InterlockedIncrement(gint32 volatile *val)
1003 {
1004 #ifdef __INTEL_COMPILER
1005         return _InterlockedIncrement (val);
1006 #else
1007         gint32 old;
1008
1009         do {
1010                 old = *val;
1011         } while (InterlockedCompareExchange (val, old + 1, old) != old);
1012
1013         return old + 1;
1014 #endif
1015 }
1016
1017 static inline gint32 InterlockedDecrement(gint32 volatile *val)
1018 {
1019 #ifdef __INTEL_COMPILER
1020         return _InterlockedDecrement (val);
1021 #else
1022         gint32 old;
1023
1024         do {
1025                 old = *val;
1026         } while (InterlockedCompareExchange (val, old - 1, old) != old);
1027
1028         return old - 1;
1029 #endif
1030 }
1031
1032 static inline gint32 InterlockedExchange(gint32 volatile *dest, gint32 new_val)
1033 {
1034 #ifdef __INTEL_COMPILER
1035         return _InterlockedExchange (dest, new_val);
1036 #else
1037         gint32 res;
1038
1039         do {
1040                 res = *dest;
1041         } while (InterlockedCompareExchange (dest, new_val, res) != res);
1042
1043         return res;
1044 #endif
1045 }
1046
1047 static inline gpointer InterlockedExchangePointer(gpointer volatile *dest, gpointer new_val)
1048 {
1049 #ifdef __INTEL_COMPILER
1050         return (gpointer)_InterlockedExchange64 ((gint64*)dest, (gint64)new_val);
1051 #else
1052         gpointer res;
1053
1054         do {
1055                 res = *dest;
1056         } while (InterlockedCompareExchangePointer (dest, new_val, res) != res);
1057
1058         return res;
1059 #endif
1060 }
1061
1062 static inline gint32 InterlockedExchangeAdd(gint32 volatile *val, gint32 add)
1063 {
1064         gint32 old;
1065
1066 #ifdef __INTEL_COMPILER
1067         old = _InterlockedExchangeAdd (val, add);
1068 #else
1069         do {
1070                 old = *val;
1071         } while (InterlockedCompareExchange (val, old + add, old) != old);
1072
1073         return old;
1074 #endif
1075 }
1076
1077 #elif defined(__mips__)
1078
1079 #if SIZEOF_REGISTER == 8
1080 #error "Not implemented."
1081 #endif
1082
1083 static inline gint32 InterlockedIncrement(volatile gint32 *val)
1084 {
1085         gint32 tmp, result = 0;
1086
1087         __asm__ __volatile__ ("    .set    mips32\n"
1088                               "1:  ll      %0, %2\n"
1089                               "    addu    %1, %0, 1\n"
1090                               "    sc      %1, %2\n"
1091                               "    beqz    %1, 1b\n"
1092                               "    .set    mips0\n"
1093                               : "=&r" (result), "=&r" (tmp), "=m" (*val)
1094                               : "m" (*val));
1095         return result + 1;
1096 }
1097
1098 static inline gint32 InterlockedDecrement(volatile gint32 *val)
1099 {
1100         gint32 tmp, result = 0;
1101
1102         __asm__ __volatile__ ("    .set    mips32\n"
1103                               "1:  ll      %0, %2\n"
1104                               "    subu    %1, %0, 1\n"
1105                               "    sc      %1, %2\n"
1106                               "    beqz    %1, 1b\n"
1107                               "    .set    mips0\n"
1108                               : "=&r" (result), "=&r" (tmp), "=m" (*val)
1109                               : "m" (*val));
1110         return result - 1;
1111 }
1112
1113 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
1114                                                 gint32 exch, gint32 comp) {
1115         gint32 old, tmp;
1116
1117         __asm__ __volatile__ ("    .set    mips32\n"
1118                               "1:  ll      %0, %2\n"
1119                               "    bne     %0, %5, 2f\n"
1120                               "    move    %1, %4\n"
1121                               "    sc      %1, %2\n"
1122                               "    beqz    %1, 1b\n"
1123                               "2:  .set    mips0\n"
1124                               : "=&r" (old), "=&r" (tmp), "=m" (*dest)
1125                               : "m" (*dest), "r" (exch), "r" (comp));
1126         return(old);
1127 }
1128
1129 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
1130 {
1131         return (gpointer)(InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp)));
1132 }
1133
1134 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
1135 {
1136         gint32 result, tmp;
1137
1138         __asm__ __volatile__ ("    .set    mips32\n"
1139                               "1:  ll      %0, %2\n"
1140                               "    move    %1, %4\n"
1141                               "    sc      %1, %2\n"
1142                               "    beqz    %1, 1b\n"
1143                               "    .set    mips0\n"
1144                               : "=&r" (result), "=&r" (tmp), "=m" (*dest)
1145                               : "m" (*dest), "r" (exch));
1146         return(result);
1147 }
1148
1149 static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch)
1150 {
1151         return (gpointer)InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch));
1152 }
1153
1154 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
1155 {
1156         gint32 result, tmp;
1157
1158         __asm__ __volatile__ ("    .set    mips32\n"
1159                               "1:  ll      %0, %2\n"
1160                               "    addu    %1, %0, %4\n"
1161                               "    sc      %1, %2\n"
1162                               "    beqz    %1, 1b\n"
1163                               "    .set    mips0\n"
1164                               : "=&r" (result), "=&r" (tmp), "=m" (*dest)
1165                               : "m" (*dest), "r" (add));
1166         return result;
1167 }
1168
1169 #else
1170
1171 #define WAPI_NO_ATOMIC_ASM
1172
1173 extern gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp);
1174 extern gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp);
1175 extern gint32 InterlockedIncrement(volatile gint32 *dest);
1176 extern gint32 InterlockedDecrement(volatile gint32 *dest);
1177 extern gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch);
1178 extern gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch);
1179 extern gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add);
1180
1181 #endif
1182
1183 /* Not yet used */
1184 #ifdef USE_GCC_ATOMIC_OPS
1185
1186 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
1187                                                 gint32 exch, gint32 comp)
1188 {
1189         return __sync_val_compare_and_swap (dest, comp, exch);
1190 }
1191
1192 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
1193 {
1194         return __sync_val_compare_and_swap (dest, comp, exch);
1195 }
1196
1197 static inline gint32 InterlockedIncrement(volatile gint32 *val)
1198 {
1199         return __sync_add_and_fetch (val, 1);
1200 }
1201
1202 static inline gint32 InterlockedDecrement(volatile gint32 *val)
1203 {
1204         return __sync_add_and_fetch (val, -1);
1205 }
1206
1207 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
1208 {
1209         gint32 old_val;
1210         do {
1211                 old_val = *val;
1212         } while (__sync_val_compare_and_swap (val, old_val, new_val) != old_val);
1213         return old_val;
1214 }
1215
1216 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
1217                                                   gpointer new_val)
1218 {
1219         gpointer old_val;
1220         do {
1221                 old_val = *val;
1222         } while (__sync_val_compare_and_swap (val, old_val, new_val) != old_val);
1223         return old_val;
1224 }
1225
1226 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
1227 {
1228         return __sync_fetch_and_add (val, add);
1229 }
1230 #endif
1231
1232 #endif /* _WAPI_ATOMIC_H_ */