2 * Copyright (c) 2003 Hewlett-Packard Development Company, L.P.
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "../all_aligned_atomic_load_store.h"
25 /* Real X86 implementations appear */
26 /* to enforce ordering between memory operations, EXCEPT that a later */
27 /* read can pass earlier writes, presumably due to the visible */
28 /* presence of store buffers. */
29 /* We ignore the fact that the official specs */
30 /* seem to be much weaker (and arguably too weak to be usable). */
32 #include "../ordered_except_wr.h"
34 #ifdef AO_ASM_X64_AVAILABLE
35 # include "../test_and_set_t_is_char.h"
37 # include "../test_and_set_t_is_ao_t.h"
40 #include "../standard_ao_double_t.h"
43 /* Seems like over-kill, but that's what MSDN recommends. */
44 /* And apparently winbase.h is not always self-contained. */
46 /* Assume _MSC_VER >= 1400 */
49 #pragma intrinsic (_ReadWriteBarrier)
51 #pragma intrinsic (_InterlockedIncrement64)
52 #pragma intrinsic (_InterlockedDecrement64)
53 #pragma intrinsic (_InterlockedExchange64)
54 #pragma intrinsic (_InterlockedExchangeAdd64)
55 #pragma intrinsic (_InterlockedCompareExchange64)
58 AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
60 return _InterlockedExchangeAdd64((LONGLONG volatile *)p, (LONGLONG)incr);
63 #define AO_HAVE_fetch_and_add_full
66 AO_fetch_and_add1_full (volatile AO_t *p)
68 return _InterlockedIncrement64((LONGLONG volatile *)p) - 1;
71 #define AO_HAVE_fetch_and_add1_full
74 AO_fetch_and_sub1_full (volatile AO_t *p)
76 return _InterlockedDecrement64((LONGLONG volatile *)p) + 1;
79 #define AO_HAVE_fetch_and_sub1_full
82 AO_compare_and_swap_full(volatile AO_t *addr,
83 AO_t old, AO_t new_val)
85 return _InterlockedCompareExchange64((LONGLONG volatile *)addr,
86 (LONGLONG)new_val, (LONGLONG)old)
90 #define AO_HAVE_compare_and_swap_full
92 /* As far as we can tell, the lfence and sfence instructions are not */
93 /* currently needed or useful for cached memory accesses. */
95 /* Unfortunately mfence doesn't exist everywhere. */
96 /* IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) is */
97 /* probably a conservative test for it? */
99 #if defined(AO_USE_PENTIUM4_INSTRS)
107 #define AO_HAVE_nop_full
111 /* We could use the cpuid instruction. But that seems to be slower */
112 /* than the default implementation based on test_and_set_full. Thus */
113 /* we omit that bit of misinformation here. */
117 #ifdef AO_ASM_X64_AVAILABLE
119 AO_INLINE AO_TS_VAL_t
120 AO_test_and_set_full(volatile AO_TS_t *addr)
126 xchg byte ptr [rbx],al ;
130 #define AO_HAVE_test_and_set_full
132 #endif /* AO_ASM_X64_AVAILABLE */
134 #ifdef AO_CMPXCHG16B_AVAILABLE
136 /* AO_compare_double_and_swap_double_full needs implementation for Win64.
137 * Also see ../gcc/x86_64.h for partial old Opteron workaround.
140 # if _MSC_VER >= 1500
142 #pragma intrinsic (_InterlockedCompareExchange128)
145 AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
146 AO_t old_val1, AO_t old_val2,
147 AO_t new_val1, AO_t new_val2)
149 __int64 comparandResult[2];
150 comparandResult[0] = old_val1; /* low */
151 comparandResult[1] = old_val2; /* high */
152 return _InterlockedCompareExchange128((volatile __int64 *)addr,
153 new_val2 /* high */, new_val1 /* low */, comparandResult);
156 # define AO_HAVE_compare_double_and_swap_double_full
158 # elif defined(AO_ASM_X64_AVAILABLE)
160 /* If there is no intrinsic _InterlockedCompareExchange128 then we
161 * need basically what's given below.
165 AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
166 AO_t old_val1, AO_t old_val2,
167 AO_t new_val1, AO_t new_val2)
171 mov rdx,QWORD PTR [old_val2] ;
172 mov rax,QWORD PTR [old_val1] ;
173 mov rcx,QWORD PTR [new_val2] ;
174 mov rbx,QWORD PTR [new_val1] ;
175 lock cmpxchg16b [addr] ;
180 # define AO_HAVE_compare_double_and_swap_double_full
182 # endif /* _MSC_VER >= 1500 || AO_ASM_X64_AVAILABLE */
184 #endif /* AO_CMPXCHG16B_AVAILABLE */