Merge pull request #572 from jack-pappas/sockets-ipproto
[mono.git] / mono / utils / atomic.h
1 /*
2  * atomic.h:  Atomic operations
3  *
4  * Author:
5  *      Dick Porter (dick@ximian.com)
6  *
7  * (C) 2002 Ximian, Inc.
8  * Copyright 2012 Xamarin Inc
9  */
10
11 #ifndef _WAPI_ATOMIC_H_
12 #define _WAPI_ATOMIC_H_
13
14 #if defined(__NetBSD__)
15 #include <sys/param.h>
16
17 #if __NetBSD_Version__ > 499004000
18 #include <sys/atomic.h>
19 #define HAVE_ATOMIC_OPS
20 #endif
21
22 #endif
23
24 #include <glib.h>
25
26 #if defined(__WIN32__) || defined(_WIN32)
27
28 #include <windows.h>
29
30 #elif defined(__NetBSD__) && defined(HAVE_ATOMIC_OPS)
31
32 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
33        gint32 exch, gint32 comp)
34 {
35        return atomic_cas_32((uint32_t*)dest, comp, exch);
36 }
37
38 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
39 {
40        return atomic_cas_ptr(dest, comp, exch);
41 }
42
43 static inline gint32 InterlockedIncrement(volatile gint32 *val)
44 {
45        return atomic_inc_32_nv((uint32_t*)val);
46 }
47
48 static inline gint32 InterlockedDecrement(volatile gint32 *val)
49 {
50        return atomic_dec_32_nv((uint32_t*)val);
51 }
52
53 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
54 {
55        return atomic_swap_32((uint32_t*)val, new_val);
56 }
57
58 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
59                gpointer new_val)
60 {
61        return atomic_swap_ptr(val, new_val);
62 }
63
64 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
65 {
66        return atomic_add_32_nv((uint32_t*)val, add) - add;
67 }
68
69 #elif defined(__i386__) || defined(__x86_64__)
70
71 /*
72  * NB: The *Pointer() functions here assume that
73  * sizeof(pointer)==sizeof(gint32)
74  *
75  * NB2: These asm functions assume 486+ (some of the opcodes dont
76  * exist on 386).  If this becomes an issue, we can get configure to
77  * fall back to the non-atomic C versions of these calls.
78  */
79
80 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
81                                                 gint32 exch, gint32 comp)
82 {
83         gint32 old;
84
85         __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
86                               : "=m" (*dest), "=a" (old)
87                               : "r" (exch), "m" (*dest), "a" (comp));   
88         return(old);
89 }
90
91 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
92 {
93         gpointer old;
94
95         __asm__ __volatile__ ("lock; "
96 #if defined(__x86_64__)  && !defined(__native_client__)
97                               "cmpxchgq"
98 #else
99                               "cmpxchgl"
100 #endif
101                               " %2, %0"
102                               : "=m" (*dest), "=a" (old)
103                               : "r" (exch), "m" (*dest), "a" (comp));   
104
105         return(old);
106 }
107
108 static inline gint32 InterlockedIncrement(volatile gint32 *val)
109 {
110         gint32 tmp;
111         
112         __asm__ __volatile__ ("lock; xaddl %0, %1"
113                               : "=r" (tmp), "=m" (*val)
114                               : "0" (1), "m" (*val));
115
116         return(tmp+1);
117 }
118
119 static inline gint32 InterlockedDecrement(volatile gint32 *val)
120 {
121         gint32 tmp;
122         
123         __asm__ __volatile__ ("lock; xaddl %0, %1"
124                               : "=r" (tmp), "=m" (*val)
125                               : "0" (-1), "m" (*val));
126
127         return(tmp-1);
128 }
129
130 /*
131  * See
132  * http://msdn.microsoft.com/msdnmag/issues/0700/Win32/
133  * for the reasons for using cmpxchg and a loop here.
134  */
135 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
136 {
137         gint32 ret;
138
139         __asm__ __volatile__ ("1:; lock; cmpxchgl %2, %0; jne 1b"
140                               : "=m" (*val), "=a" (ret)
141                               : "r" (new_val), "m" (*val), "a" (*val));
142         return(ret);
143 }
144
145 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
146                                                   gpointer new_val)
147 {
148         gpointer ret;
149         
150         __asm__ __volatile__ ("1:; lock; "
151 #if defined(__x86_64__)  && !defined(__native_client__)
152                               "cmpxchgq"
153 #else
154                               "cmpxchgl"
155 #endif
156                               " %2, %0; jne 1b"
157                               : "=m" (*val), "=a" (ret)
158                               : "r" (new_val), "m" (*val), "a" (*val));
159
160         return(ret);
161 }
162
163 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
164 {
165         gint32 ret;
166         
167         __asm__ __volatile__ ("lock; xaddl %0, %1"
168                               : "=r" (ret), "=m" (*val)
169                               : "0" (add), "m" (*val));
170         
171         return(ret);
172 }
173
174 #elif (defined(sparc) || defined (__sparc__)) && defined(__GNUC__)
175
176 G_GNUC_UNUSED 
177 static inline gint32 InterlockedCompareExchange(volatile gint32 *_dest, gint32 _exch, gint32 _comp)
178 {
179        register volatile gint32 *dest asm("g1") = _dest;
180        register gint32 comp asm("o4") = _comp;
181        register gint32 exch asm("o5") = _exch;
182
183        __asm__ __volatile__(
184                /* cas [%%g1], %%o4, %%o5 */
185                ".word 0xdbe0500c"
186                : "=r" (exch)
187                : "0" (exch), "r" (dest), "r" (comp)
188                : "memory");
189
190        return exch;
191 }
192
193 G_GNUC_UNUSED 
194 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *_dest, gpointer _exch, gpointer _comp)
195 {
196        register volatile gpointer *dest asm("g1") = _dest;
197        register gpointer comp asm("o4") = _comp;
198        register gpointer exch asm("o5") = _exch;
199
200        __asm__ __volatile__(
201 #ifdef SPARCV9
202                /* casx [%%g1], %%o4, %%o5 */
203                ".word 0xdbf0500c"
204 #else
205                /* cas [%%g1], %%o4, %%o5 */
206                ".word 0xdbe0500c"
207 #endif
208                : "=r" (exch)
209                : "0" (exch), "r" (dest), "r" (comp)
210                : "memory");
211
212        return exch;
213 }
214
215 G_GNUC_UNUSED 
216 static inline gint32 InterlockedIncrement(volatile gint32 *_dest)
217 {
218        register volatile gint32 *dest asm("g1") = _dest;
219        register gint32 tmp asm("o4");
220        register gint32 ret asm("o5");
221
222        __asm__ __volatile__(
223                "1:     ld      [%%g1], %%o4\n\t"
224                "       add     %%o4, 1, %%o5\n\t"
225                /*      cas     [%%g1], %%o4, %%o5 */
226                "       .word   0xdbe0500c\n\t"
227                "       cmp     %%o4, %%o5\n\t"
228                "       bne     1b\n\t"
229                "        add    %%o5, 1, %%o5"
230                : "=&r" (tmp), "=&r" (ret)
231                : "r" (dest)
232                : "memory", "cc");
233
234         return ret;
235 }
236
237 G_GNUC_UNUSED 
238 static inline gint32 InterlockedDecrement(volatile gint32 *_dest)
239 {
240        register volatile gint32 *dest asm("g1") = _dest;
241        register gint32 tmp asm("o4");
242        register gint32 ret asm("o5");
243
244        __asm__ __volatile__(
245                "1:     ld      [%%g1], %%o4\n\t"
246                "       sub     %%o4, 1, %%o5\n\t"
247                /*      cas     [%%g1], %%o4, %%o5 */
248                "       .word   0xdbe0500c\n\t"
249                "       cmp     %%o4, %%o5\n\t"
250                "       bne     1b\n\t"
251                "        sub    %%o5, 1, %%o5"
252                : "=&r" (tmp), "=&r" (ret)
253                : "r" (dest)
254                : "memory", "cc");
255
256         return ret;
257 }
258
259 G_GNUC_UNUSED
260 static inline gint32 InterlockedExchange(volatile gint32 *_dest, gint32 exch)
261 {
262        register volatile gint32 *dest asm("g1") = _dest;
263        register gint32 tmp asm("o4");
264        register gint32 ret asm("o5");
265
266        __asm__ __volatile__(
267                "1:     ld      [%%g1], %%o4\n\t"
268                "       mov     %3, %%o5\n\t"
269                /*      cas     [%%g1], %%o4, %%o5 */
270                "       .word   0xdbe0500c\n\t"
271                "       cmp     %%o4, %%o5\n\t"
272                "       bne     1b\n\t"
273                "        nop"
274                : "=&r" (tmp), "=&r" (ret)
275                : "r" (dest), "r" (exch)
276                : "memory", "cc");
277
278         return ret;
279 }
280
281 G_GNUC_UNUSED
282 static inline gpointer InterlockedExchangePointer(volatile gpointer *_dest, gpointer exch)
283 {
284        register volatile gpointer *dest asm("g1") = _dest;
285        register gpointer tmp asm("o4");
286        register gpointer ret asm("o5");
287
288        __asm__ __volatile__(
289 #ifdef SPARCV9
290                "1:     ldx     [%%g1], %%o4\n\t"
291 #else
292                "1:     ld      [%%g1], %%o4\n\t"
293 #endif
294                "       mov     %3, %%o5\n\t"
295 #ifdef SPARCV9
296                /*      casx    [%%g1], %%o4, %%o5 */
297                "       .word   0xdbf0500c\n\t"
298 #else
299                /*      cas     [%%g1], %%o4, %%o5 */
300                "       .word   0xdbe0500c\n\t"
301 #endif
302                "       cmp     %%o4, %%o5\n\t"
303                "       bne     1b\n\t"
304                "        nop"
305                : "=&r" (tmp), "=&r" (ret)
306                : "r" (dest), "r" (exch)
307                : "memory", "cc");
308
309         return ret;
310 }
311
312 G_GNUC_UNUSED
313 static inline gint32 InterlockedExchangeAdd(volatile gint32 *_dest, gint32 add)
314 {
315        register volatile gint32 *dest asm("g1") = _dest;
316        register gint32 tmp asm("o4");
317        register gint32 ret asm("o5");
318
319        __asm__ __volatile__(
320                "1:     ld      [%%g1], %%o4\n\t"
321                "       add     %%o4, %3, %%o5\n\t"
322                /*      cas     [%%g1], %%o4, %%o5 */
323                "       .word   0xdbe0500c\n\t"
324                "       cmp     %%o4, %%o5\n\t"
325                "       bne     1b\n\t"
326                "        add    %%o5, %3, %%o5"
327                : "=&r" (tmp), "=&r" (ret)
328                : "r" (dest), "r" (add)
329                : "memory", "cc");
330
331         return ret;
332 }
333
334 #elif __s390x__
335
336 static inline gint32 
337 InterlockedCompareExchange(volatile gint32 *dest,
338                            gint32 exch, gint32 comp)
339 {
340         gint32 old;
341
342         __asm__ __volatile__ ("\tLA\t1,%0\n"
343                               "\tLR\t%1,%3\n"
344                               "\tCS\t%1,%2,0(1)\n"
345                               : "+m" (*dest), "=&r" (old)
346                               : "r" (exch), "r" (comp)
347                               : "1", "cc");     
348         return(old);
349 }
350
351 static inline gpointer 
352 InterlockedCompareExchangePointer(volatile gpointer *dest, 
353                                   gpointer exch, 
354                                   gpointer comp)
355 {
356         gpointer old;
357
358         __asm__ __volatile__ ("\tLA\t1,%0\n"
359                               "\tLGR\t%1,%3\n"
360                               "\tCSG\t%1,%2,0(1)\n"
361                               : "+m" (*dest), "=&r" (old)
362                               : "r" (exch), "r" (comp)
363                               : "1", "cc");
364
365         return(old);
366 }
367
368 static inline gint32 
369 InterlockedIncrement(volatile gint32 *val)
370 {
371         gint32 tmp;
372         
373         __asm__ __volatile__ ("\tLA\t2,%1\n"
374                               "0:\tLGF\t%0,%1\n"
375                               "\tLGFR\t1,%0\n"
376                               "\tAGHI\t1,1\n"
377                               "\tCS\t%0,1,0(2)\n"
378                               "\tJNZ\t0b\n"
379                               "\tLGFR\t%0,1"
380                               : "=r" (tmp), "+m" (*val)
381                               : : "1", "2", "cc");
382
383         return(tmp);
384 }
385
386 static inline gint32 
387 InterlockedDecrement(volatile gint32 *val)
388 {
389         gint32 tmp;
390         
391         __asm__ __volatile__ ("\tLA\t2,%1\n"
392                               "0:\tLGF\t%0,%1\n"
393                               "\tLGFR\t1,%0\n"
394                               "\tAGHI\t1,-1\n"
395                               "\tCS\t%0,1,0(2)\n"
396                               "\tJNZ\t0b\n"
397                               "\tLGFR\t%0,1"
398                               : "=r" (tmp), "+m" (*val)
399                               : : "1", "2", "cc");
400
401         return(tmp);
402 }
403
404 static inline gint32 
405 InterlockedExchange(volatile gint32 *val, gint32 new_val)
406 {
407         gint32 ret;
408         
409         __asm__ __volatile__ ("\tLA\t1,%0\n"
410                               "0:\tL\t%1,%0\n"
411                               "\tCS\t%1,%2,0(1)\n"
412                               "\tJNZ\t0b"
413                               : "+m" (*val), "=&r" (ret)
414                               : "r" (new_val)
415                               : "1", "cc");
416
417         return(ret);
418 }
419
420 static inline gpointer
421 InterlockedExchangePointer(volatile gpointer *val, gpointer new_val)
422 {
423         gpointer ret;
424         
425         __asm__ __volatile__ ("\tLA\t1,%0\n"
426                               "0:\tLG\t%1,%0\n"
427                               "\tCSG\t%1,%2,0(1)\n"
428                               "\tJNZ\t0b"
429                               : "+m" (*val), "=&r" (ret)
430                               : "r" (new_val)
431                               : "1", "cc");
432
433         return(ret);
434 }
435
436 static inline gint32 
437 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
438 {
439         gint32 ret;
440
441         __asm__ __volatile__ ("\tLA\t2,%1\n"
442                               "0:\tLGF\t%0,%1\n"
443                               "\tLGFR\t1,%0\n"
444                               "\tAGR\t1,%2\n"
445                               "\tCS\t%0,1,0(2)\n"
446                               "\tJNZ\t0b"
447                               : "=&r" (ret), "+m" (*val)
448                               : "r" (add) 
449                               : "1", "2", "cc");
450         
451         return(ret);
452 }
453
454 #elif defined(__mono_ppc__)
455
456 #ifdef G_COMPILER_CODEWARRIOR
457 static inline gint32 InterlockedIncrement(volatile register gint32 *val)
458 {
459         gint32 result = 0, tmp;
460         register gint32 result = 0;
461         register gint32 tmp;
462
463         asm
464         {
465                 @1:
466                         lwarx   tmp, 0, val
467                         addi    result, tmp, 1
468                         stwcx.  result, 0, val
469                         bne-    @1
470         }
471  
472         return result;
473 }
474
475 static inline gint32 InterlockedDecrement(register volatile gint32 *val)
476 {
477         register gint32 result = 0;
478         register gint32 tmp;
479
480         asm
481         {
482                 @1:
483                         lwarx   tmp, 0, val
484                         addi    result, tmp, -1
485                         stwcx.  result, 0, val
486                         bne-    @1
487         }
488
489         return result;
490 }
491 #define InterlockedCompareExchangePointer(dest,exch,comp) (void*)InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp))
492
493 static inline gint32 InterlockedCompareExchange(volatile register gint32 *dest, register gint32 exch, register gint32 comp)
494 {
495         register gint32 tmp = 0;
496
497         asm
498         {
499                 @1:
500                         lwarx   tmp, 0, dest
501                         cmpw    tmp, comp
502                         bne-    @2
503                         stwcx.  exch, 0, dest
504                         bne-    @1
505                 @2:
506         }
507
508         return tmp;
509 }
510 static inline gint32 InterlockedExchange(register volatile gint32 *dest, register gint32 exch)
511 {
512         register gint32 tmp = 0;
513
514         asm
515         {
516                 @1:
517                         lwarx   tmp, 0, dest
518                         stwcx.  exch, 0, dest
519                         bne-    @1
520         }
521
522         return tmp;
523 }
524 #define InterlockedExchangePointer(dest,exch) (void*)InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch))
525 #else
526
527 #if defined(__mono_ppc64__) && !defined(__mono_ilp32__)
528 #define LDREGX "ldarx"
529 #define STREGCXD "stdcx."
530 #define CMPREG "cmpd"
531 #else
532 #define LDREGX "lwarx"
533 #define STREGCXD "stwcx."
534 #define CMPREG "cmpw"
535 #endif
536
537 static inline gint32 InterlockedIncrement(volatile gint32 *val)
538 {
539         gint32 result = 0, tmp;
540
541         __asm__ __volatile__ ("\n1:\n\t"
542                               "lwarx  %0, 0, %2\n\t"
543                               "addi   %1, %0, 1\n\t"
544                               "stwcx. %1, 0, %2\n\t"
545                               "bne-   1b"
546                               : "=&b" (result), "=&b" (tmp): "r" (val): "cc", "memory");
547         return result + 1;
548 }
549
550 static inline gint32 InterlockedDecrement(volatile gint32 *val)
551 {
552         gint32 result = 0, tmp;
553
554         __asm__ __volatile__ ("\n1:\n\t"
555                               "lwarx  %0, 0, %2\n\t"
556                               "addi   %1, %0, -1\n\t"
557                               "stwcx. %1, 0, %2\n\t"
558                               "bne-   1b"
559                               : "=&b" (result), "=&b" (tmp): "r" (val): "cc", "memory");
560         return result - 1;
561 }
562
563 static inline gpointer InterlockedCompareExchangePointer (volatile gpointer *dest,
564                                                 gpointer exch, gpointer comp)
565 {
566         gpointer tmp = NULL;
567
568         __asm__ __volatile__ ("\n1:\n\t"
569                              LDREGX " %0, 0, %1\n\t"
570                              CMPREG " %0, %2\n\t" 
571                              "bne-    2f\n\t"
572                              STREGCXD " %3, 0, %1\n\t"
573                              "bne-    1b\n"
574                              "2:"
575                              : "=&r" (tmp)
576                              : "b" (dest), "r" (comp), "r" (exch): "cc", "memory");
577         return(tmp);
578 }
579
580 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
581                                                 gint32 exch, gint32 comp) {
582         gint32 tmp = 0;
583
584         __asm__ __volatile__ ("\n1:\n\t"
585                              "lwarx   %0, 0, %1\n\t"
586                              "cmpw    %0, %2\n\t" 
587                              "bne-    2f\n\t"
588                              "stwcx.  %3, 0, %1\n\t"
589                              "bne-    1b\n"
590                              "2:"
591                              : "=&r" (tmp)
592                              : "b" (dest), "r" (comp), "r" (exch): "cc", "memory");
593         return(tmp);
594 }
595
596 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
597 {
598         gint32 tmp = 0;
599
600         __asm__ __volatile__ ("\n1:\n\t"
601                               "lwarx  %0, 0, %2\n\t"
602                               "stwcx. %3, 0, %2\n\t"
603                               "bne    1b"
604                               : "=r" (tmp) : "0" (tmp), "b" (dest), "r" (exch): "cc", "memory");
605         return(tmp);
606 }
607
608 static inline gpointer InterlockedExchangePointer (volatile gpointer *dest, gpointer exch)
609 {
610         gpointer tmp = NULL;
611
612         __asm__ __volatile__ ("\n1:\n\t"
613                               LDREGX " %0, 0, %2\n\t"
614                               STREGCXD " %3, 0, %2\n\t"
615                               "bne    1b"
616                               : "=r" (tmp) : "0" (tmp), "b" (dest), "r" (exch): "cc", "memory");
617         return(tmp);
618 }
619
620 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
621 {
622         gint32 result, tmp;
623         __asm__ __volatile__ ("\n1:\n\t"
624                               "lwarx  %0, 0, %2\n\t"
625                               "add    %1, %0, %3\n\t"
626                               "stwcx. %1, 0, %2\n\t"
627                               "bne    1b"
628                               : "=&r" (result), "=&r" (tmp)
629                               : "r" (dest), "r" (add) : "cc", "memory");
630         return(result);
631 }
632
633 #undef LDREGX
634 #undef STREGCXD
635 #undef CMPREG
636
637 #endif /* !G_COMPILER_CODEWARRIOR */
638
639 #elif defined(__arm__)
640
641 #ifdef __native_client__
642 #define MASK_REGISTER(reg, cond) "bic" cond " " reg ", " reg ", #0xc0000000\n"
643 #define NACL_ALIGN() ".align 4\n"
644 #else
645 #define MASK_REGISTER(reg, cond)
646 #define NACL_ALIGN()
647 #endif
648
649 /*
650  * Atomic operations on ARM doesn't contain memory barriers, and the runtime code
651  * depends on this, so we add them explicitly.
652  */
653
654 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp)
655 {
656 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
657         gint32 ret, tmp;
658         __asm__ __volatile__ (  "1:\n"
659                                 NACL_ALIGN()
660                                 "dmb\n"
661                                 "mov    %0, #0\n"
662                                 NACL_ALIGN()
663                                 MASK_REGISTER("%2", "al")
664                                 "ldrex %1, [%2]\n"
665                                 "teq    %1, %3\n"
666                                 "it eq\n"
667                                 NACL_ALIGN()
668                                 MASK_REGISTER("%2", "eq")
669                                 "strexeq %0, %4, [%2]\n"
670                                 "teq %0, #0\n"
671                                 "bne 1b\n"
672                                 "dmb\n"
673                                 : "=&r" (tmp), "=&r" (ret)
674                                 : "r" (dest), "r" (comp), "r" (exch)
675                                 : "memory", "cc");
676
677         return ret;
678 #else
679         gint32 a, b;
680
681         __asm__ __volatile__ (    "0:\n\t"
682                                   NACL_ALIGN()
683                                   MASK_REGISTER("%2", "al")
684                                   "ldr %1, [%2]\n\t"
685                                   "cmp %1, %4\n\t"
686                                   "mov %0, %1\n\t"
687                                   "bne 1f\n\t"
688                                   NACL_ALIGN()
689                                   MASK_REGISTER("%2", "al")
690                                   "swp %0, %3, [%2]\n\t"
691                                   "cmp %0, %1\n\t"
692                                   NACL_ALIGN()
693                                   MASK_REGISTER("%2", "ne")
694                                   "swpne %3, %0, [%2]\n\t"
695                                   "bne 0b\n\t"
696                                   "1:"
697                                   : "=&r" (a), "=&r" (b)
698                                   : "r" (dest), "r" (exch), "r" (comp)
699                                   : "cc", "memory");
700
701         return a;
702 #endif
703 }
704
705 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
706 {
707 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
708         gpointer ret, tmp;
709         __asm__ __volatile__ (
710                                 "dmb\n"
711                                 "1:\n"
712                                 NACL_ALIGN()
713                                 "mov    %0, #0\n"
714                                 NACL_ALIGN()
715                                 MASK_REGISTER("%2", "al")
716                                 "ldrex %1, [%2]\n"
717                                 "teq    %1, %3\n"
718                                 "it eq\n"
719                                 NACL_ALIGN()
720                                 MASK_REGISTER("%2", "eq")
721                                 "strexeq %0, %4, [%2]\n"
722                                 "teq %0, #0\n"
723                                 "bne 1b\n"
724                                 "dmb\n"
725                                 : "=&r" (tmp), "=&r" (ret)
726                                 : "r" (dest), "r" (comp), "r" (exch)
727                                 : "memory", "cc");
728
729         return ret;
730 #else
731         gpointer a, b;
732
733         __asm__ __volatile__ (    "0:\n\t"
734                                   NACL_ALIGN()
735                                   MASK_REGISTER("%2", "al")
736                                   "ldr %1, [%2]\n\t"
737                                   "cmp %1, %4\n\t"
738                                   "mov %0, %1\n\t"
739                                   "bne 1f\n\t"
740                                   NACL_ALIGN()
741                                   MASK_REGISTER("%2", "eq")
742                                   "swpeq %0, %3, [%2]\n\t"
743                                   "cmp %0, %1\n\t"
744                                   NACL_ALIGN()
745                                   MASK_REGISTER("%2", "ne")
746                                   "swpne %3, %0, [%2]\n\t"
747                                   "bne 0b\n\t"
748                                   "1:"
749                                   : "=&r" (a), "=&r" (b)
750                                   : "r" (dest), "r" (exch), "r" (comp)
751                                   : "cc", "memory");
752
753         return a;
754 #endif
755 }
756
757 static inline gint32 InterlockedIncrement(volatile gint32 *dest)
758 {
759 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
760         gint32 ret, flag;
761         __asm__ __volatile__ (
762                                 "dmb\n"
763                                 "1:\n"
764                                 NACL_ALIGN()
765                                 MASK_REGISTER("%2", "al")
766                                 "ldrex %0, [%2]\n"
767                                 "add %0, %0, %3\n"
768                                 NACL_ALIGN()
769                                 MASK_REGISTER("%2", "al")
770                                 "strex %1, %0, [%2]\n"
771                                 "teq %1, #0\n"
772                                 "bne 1b\n"
773                                 "dmb\n"
774                                 : "=&r" (ret), "=&r" (flag)
775                                 : "r" (dest), "r" (1)
776                                 : "memory", "cc");
777
778         return ret;
779 #else
780         gint32 a, b, c;
781
782         __asm__ __volatile__ (  "0:\n\t"
783                                 NACL_ALIGN()
784                                 MASK_REGISTER("%3", "al")
785                                 "ldr %0, [%3]\n\t"
786                                 "add %1, %0, %4\n\t"
787                                 NACL_ALIGN()
788                                 MASK_REGISTER("%3", "al")
789                                 "swp %2, %1, [%3]\n\t"
790                                 "cmp %0, %2\n\t"
791                                 NACL_ALIGN()
792                                 MASK_REGISTER("%3", "ne")
793                                 "swpne %1, %2, [%3]\n\t"
794                                 "bne 0b"
795                                 : "=&r" (a), "=&r" (b), "=&r" (c)
796                                 : "r" (dest), "r" (1)
797                                 : "cc", "memory");
798
799         return b;
800 #endif
801 }
802
803 static inline gint32 InterlockedDecrement(volatile gint32 *dest)
804 {
805 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
806         gint32 ret, flag;
807         __asm__ __volatile__ (
808                                 "dmb\n"
809                                 "1:\n"
810                                 NACL_ALIGN()
811                                 MASK_REGISTER("%2", "al")
812                                 "ldrex %0, [%2]\n"
813                                 "sub %0, %0, %3\n"
814                                 NACL_ALIGN()
815                                 MASK_REGISTER("%2", "al")
816                                 "strex %1, %0, [%2]\n"
817                                 "teq %1, #0\n"
818                                 "bne 1b\n"
819                                 "dmb\n"
820                                 : "=&r" (ret), "=&r" (flag)
821                                 : "r" (dest), "r" (1)
822                                 : "memory", "cc");
823
824         return ret;
825 #else
826         gint32 a, b, c;
827
828         __asm__ __volatile__ (  "0:\n\t"
829                                 NACL_ALIGN()
830                                 MASK_REGISTER("%3", "al")
831                                 "ldr %0, [%3]\n\t"
832                                 "add %1, %0, %4\n\t"
833                                 NACL_ALIGN()
834                                 MASK_REGISTER("%3", "al")
835                                 "swp %2, %1, [%3]\n\t"
836                                 "cmp %0, %2\n\t"
837                                 NACL_ALIGN()
838                                 MASK_REGISTER("%3", "ne")
839                                 "swpne %1, %2, [%3]\n\t"
840                                 "bne 0b"
841                                 : "=&r" (a), "=&r" (b), "=&r" (c)
842                                 : "r" (dest), "r" (-1)
843                                 : "cc", "memory");
844
845         return b;
846 #endif
847 }
848
849 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
850 {
851 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
852         gint32 ret, flag;
853         __asm__ __volatile__ (
854                                   "dmb\n"
855                               "1:\n"
856                               NACL_ALIGN()
857                               MASK_REGISTER("%3", "al")
858                               "ldrex %0, [%3]\n"
859                               NACL_ALIGN()
860                               MASK_REGISTER("%3", "al")
861                               "strex %1, %2, [%3]\n"
862                               "teq %1, #0\n"
863                               "bne 1b\n"
864                                   "dmb\n"
865                               : "=&r" (ret), "=&r" (flag)
866                               : "r" (exch), "r" (dest)
867                               : "memory", "cc");
868         return ret;
869 #else
870         gint32 a;
871
872         __asm__ __volatile__ (  NACL_ALIGN()
873                                 MASK_REGISTER("%1", "al")
874                                 "swp %0, %2, [%1]"
875                                 : "=&r" (a)
876                                 : "r" (dest), "r" (exch));
877
878         return a;
879 #endif
880 }
881
882 static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch)
883 {
884 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
885         gpointer ret, flag;
886         __asm__ __volatile__ (
887                                   "dmb\n"
888                               "1:\n"
889                               NACL_ALIGN()
890                               MASK_REGISTER("%3", "al")
891                               "ldrex %0, [%3]\n"
892                               NACL_ALIGN()
893                               MASK_REGISTER("%3", "al")
894                               "strex %1, %2, [%3]\n"
895                               "teq %1, #0\n"
896                               "bne 1b\n"
897                                   "dmb\n"
898                               : "=&r" (ret), "=&r" (flag)
899                               : "r" (exch), "r" (dest)
900                               : "memory", "cc");
901         return ret;
902 #else
903         gpointer a;
904
905         __asm__ __volatile__ (  NACL_ALIGN()
906                                 MASK_REGISTER("%1", "al")
907                                 "swp %0, %2, [%1]"
908                                 : "=&r" (a)
909                                 : "r" (dest), "r" (exch));
910
911         return a;
912 #endif
913 }
914
915 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
916 {
917 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7S__)
918         gint32 ret, tmp, flag;
919         __asm__ __volatile__ (
920                                 "dmb\n"
921                                 "1:\n"
922                                 NACL_ALIGN()
923                                 MASK_REGISTER("%3", "al")
924                                 "ldrex %0, [%3]\n"
925                                 "add %1, %0, %4\n"
926                                 NACL_ALIGN()
927                                 MASK_REGISTER("%3", "al")
928                                 "strex %2, %1, [%3]\n"
929                                 "teq %2, #0\n"
930                                 "bne 1b\n"
931                                 "dmb\n"
932                                 : "=&r" (ret), "=&r" (tmp), "=&r" (flag)
933                                 : "r" (dest), "r" (add)
934                                 : "memory", "cc");
935
936         return ret;
937 #else
938         int a, b, c;
939
940         __asm__ __volatile__ (  "0:\n\t"
941                                 NACL_ALIGN()
942                                 MASK_REGISTER("%3", "al")
943                                 "ldr %0, [%3]\n\t"
944                                 "add %1, %0, %4\n\t"
945                                 NACL_ALIGN()
946                                 MASK_REGISTER("%3", "al")
947                                 "swp %2, %1, [%3]\n\t"
948                                 "cmp %0, %2\n\t"
949                                 NACL_ALIGN()
950                                 MASK_REGISTER("%3", "ne")
951                                 "swpne %1, %2, [%3]\n\t"
952                                 "bne 0b"
953                                 : "=&r" (a), "=&r" (b), "=&r" (c)
954                                 : "r" (dest), "r" (add)
955                                 : "cc", "memory");
956
957         return a;
958 #endif
959 }
960
961 #undef NACL_ALIGN
962 #undef MASK_REGISTER
963
964 #elif defined(__ia64__)
965
966 #ifdef __INTEL_COMPILER
967 #include <ia64intrin.h>
968 #endif
969
970 static inline gint32 InterlockedCompareExchange(gint32 volatile *dest,
971                                                 gint32 exch, gint32 comp)
972 {
973         gint32 old;
974         guint64 real_comp;
975
976 #ifdef __INTEL_COMPILER
977         old = _InterlockedCompareExchange (dest, exch, comp);
978 #else
979         /* cmpxchg4 zero extends the value read from memory */
980         real_comp = (guint64)(guint32)comp;
981         asm volatile ("mov ar.ccv = %2 ;;\n\t"
982                                   "cmpxchg4.acq %0 = [%1], %3, ar.ccv\n\t"
983                                   : "=r" (old) : "r" (dest), "r" (real_comp), "r" (exch));
984 #endif
985
986         return(old);
987 }
988
989 static inline gpointer InterlockedCompareExchangePointer(gpointer volatile *dest,
990                                                 gpointer exch, gpointer comp)
991 {
992         gpointer old;
993
994 #ifdef __INTEL_COMPILER
995         old = _InterlockedCompareExchangePointer (dest, exch, comp);
996 #else
997         asm volatile ("mov ar.ccv = %2 ;;\n\t"
998                                   "cmpxchg8.acq %0 = [%1], %3, ar.ccv\n\t"
999                                   : "=r" (old) : "r" (dest), "r" (comp), "r" (exch));
1000 #endif
1001
1002         return(old);
1003 }
1004
1005 static inline gint32 InterlockedIncrement(gint32 volatile *val)
1006 {
1007 #ifdef __INTEL_COMPILER
1008         return _InterlockedIncrement (val);
1009 #else
1010         gint32 old;
1011
1012         do {
1013                 old = *val;
1014         } while (InterlockedCompareExchange (val, old + 1, old) != old);
1015
1016         return old + 1;
1017 #endif
1018 }
1019
1020 static inline gint32 InterlockedDecrement(gint32 volatile *val)
1021 {
1022 #ifdef __INTEL_COMPILER
1023         return _InterlockedDecrement (val);
1024 #else
1025         gint32 old;
1026
1027         do {
1028                 old = *val;
1029         } while (InterlockedCompareExchange (val, old - 1, old) != old);
1030
1031         return old - 1;
1032 #endif
1033 }
1034
1035 static inline gint32 InterlockedExchange(gint32 volatile *dest, gint32 new_val)
1036 {
1037 #ifdef __INTEL_COMPILER
1038         return _InterlockedExchange (dest, new_val);
1039 #else
1040         gint32 res;
1041
1042         do {
1043                 res = *dest;
1044         } while (InterlockedCompareExchange (dest, new_val, res) != res);
1045
1046         return res;
1047 #endif
1048 }
1049
1050 static inline gpointer InterlockedExchangePointer(gpointer volatile *dest, gpointer new_val)
1051 {
1052 #ifdef __INTEL_COMPILER
1053         return (gpointer)_InterlockedExchange64 ((gint64*)dest, (gint64)new_val);
1054 #else
1055         gpointer res;
1056
1057         do {
1058                 res = *dest;
1059         } while (InterlockedCompareExchangePointer (dest, new_val, res) != res);
1060
1061         return res;
1062 #endif
1063 }
1064
1065 static inline gint32 InterlockedExchangeAdd(gint32 volatile *val, gint32 add)
1066 {
1067         gint32 old;
1068
1069 #ifdef __INTEL_COMPILER
1070         old = _InterlockedExchangeAdd (val, add);
1071 #else
1072         do {
1073                 old = *val;
1074         } while (InterlockedCompareExchange (val, old + add, old) != old);
1075
1076         return old;
1077 #endif
1078 }
1079
1080 #elif defined(__mips__)
1081
1082 #if SIZEOF_REGISTER == 8
1083 #error "Not implemented."
1084 #endif
1085
1086 static inline gint32 InterlockedIncrement(volatile gint32 *val)
1087 {
1088         gint32 tmp, result = 0;
1089
1090         __asm__ __volatile__ ("    .set    mips32\n"
1091                               "1:  ll      %0, %2\n"
1092                               "    addu    %1, %0, 1\n"
1093                               "    sc      %1, %2\n"
1094                               "    beqz    %1, 1b\n"
1095                               "    .set    mips0\n"
1096                               : "=&r" (result), "=&r" (tmp), "=m" (*val)
1097                               : "m" (*val));
1098         return result + 1;
1099 }
1100
1101 static inline gint32 InterlockedDecrement(volatile gint32 *val)
1102 {
1103         gint32 tmp, result = 0;
1104
1105         __asm__ __volatile__ ("    .set    mips32\n"
1106                               "1:  ll      %0, %2\n"
1107                               "    subu    %1, %0, 1\n"
1108                               "    sc      %1, %2\n"
1109                               "    beqz    %1, 1b\n"
1110                               "    .set    mips0\n"
1111                               : "=&r" (result), "=&r" (tmp), "=m" (*val)
1112                               : "m" (*val));
1113         return result - 1;
1114 }
1115
1116 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
1117                                                 gint32 exch, gint32 comp) {
1118         gint32 old, tmp;
1119
1120         __asm__ __volatile__ ("    .set    mips32\n"
1121                               "1:  ll      %0, %2\n"
1122                               "    bne     %0, %5, 2f\n"
1123                               "    move    %1, %4\n"
1124                               "    sc      %1, %2\n"
1125                               "    beqz    %1, 1b\n"
1126                               "2:  .set    mips0\n"
1127                               : "=&r" (old), "=&r" (tmp), "=m" (*dest)
1128                               : "m" (*dest), "r" (exch), "r" (comp));
1129         return(old);
1130 }
1131
1132 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
1133 {
1134         return (gpointer)(InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp)));
1135 }
1136
1137 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
1138 {
1139         gint32 result, tmp;
1140
1141         __asm__ __volatile__ ("    .set    mips32\n"
1142                               "1:  ll      %0, %2\n"
1143                               "    move    %1, %4\n"
1144                               "    sc      %1, %2\n"
1145                               "    beqz    %1, 1b\n"
1146                               "    .set    mips0\n"
1147                               : "=&r" (result), "=&r" (tmp), "=m" (*dest)
1148                               : "m" (*dest), "r" (exch));
1149         return(result);
1150 }
1151
1152 static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch)
1153 {
1154         return (gpointer)InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch));
1155 }
1156
1157 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
1158 {
1159         gint32 result, tmp;
1160
1161         __asm__ __volatile__ ("    .set    mips32\n"
1162                               "1:  ll      %0, %2\n"
1163                               "    addu    %1, %0, %4\n"
1164                               "    sc      %1, %2\n"
1165                               "    beqz    %1, 1b\n"
1166                               "    .set    mips0\n"
1167                               : "=&r" (result), "=&r" (tmp), "=m" (*dest)
1168                               : "m" (*dest), "r" (add));
1169         return result;
1170 }
1171
1172 #else
1173
1174 #define WAPI_NO_ATOMIC_ASM
1175
1176 extern gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp);
1177 extern gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp);
1178 extern gint32 InterlockedIncrement(volatile gint32 *dest);
1179 extern gint32 InterlockedDecrement(volatile gint32 *dest);
1180 extern gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch);
1181 extern gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch);
1182 extern gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add);
1183
1184 #endif
1185
1186 /* Not yet used */
1187 #ifdef USE_GCC_ATOMIC_OPS
1188
1189 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
1190                                                 gint32 exch, gint32 comp)
1191 {
1192         return __sync_val_compare_and_swap (dest, comp, exch);
1193 }
1194
1195 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
1196 {
1197         return __sync_val_compare_and_swap (dest, comp, exch);
1198 }
1199
1200 static inline gint32 InterlockedIncrement(volatile gint32 *val)
1201 {
1202         return __sync_add_and_fetch (val, 1);
1203 }
1204
1205 static inline gint32 InterlockedDecrement(volatile gint32 *val)
1206 {
1207         return __sync_add_and_fetch (val, -1);
1208 }
1209
1210 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
1211 {
1212         gint32 old_val;
1213         do {
1214                 old_val = *val;
1215         } while (__sync_val_compare_and_swap (val, old_val, new_val) != old_val);
1216         return old_val;
1217 }
1218
1219 static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
1220                                                   gpointer new_val)
1221 {
1222         gpointer old_val;
1223         do {
1224                 old_val = *val;
1225         } while (__sync_val_compare_and_swap (val, old_val, new_val) != old_val);
1226         return old_val;
1227 }
1228
1229 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
1230 {
1231         return __sync_fetch_and_add (val, add);
1232 }
1233 #endif
1234
1235 #endif /* _WAPI_ATOMIC_H_ */