src/mm/boehm-gc/libatomic_ops-1.2/src/atomic_ops/sysdeps/gcc/x86_64.h

   1 /*
   2  * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
   3  * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
   4  * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved.
   5  *
   6  *
   7  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
   8  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
   9  *
  10  * Permission is hereby granted to use or copy this program
  11  * for any purpose,  provided the above notices are retained on all copies.
  12  * Permission to modify the code and to distribute modified code is granted,
  13  * provided the above notices are retained, and a notice that the code was
  14  * modified is included with the above copyright notice.
  15  *
  16  * Some of the machine specific code was borrowed from our GC distribution.
  17  */
  18
  19 /* The following really assume we have a 486 or better.  Unfortunately  */
  20 /* gcc doesn't define a suitable feature test macro based on command    */
  21 /* line options.                                                        */
  22 /* We should perhaps test dynamically.                                  */
  23
  24 #include "../all_aligned_atomic_load_store.h"
  25
  26 /* Real X86 implementations, except for some old WinChips, appear       */
  27 /* to enforce ordering between memory operations, EXCEPT that a later   */
  28 /* read can pass earlier writes, presumably due to the visible          */
  29 /* presence of store buffers.                                           */
  30 /* We ignore both the WinChips, and the fact that the official specs    */
  31 /* seem to be much weaker (and arguably too weak to be usable).         */
  32
  33 #include "../ordered_except_wr.h"
  34
  35 #include "../test_and_set_t_is_char.h"
  36
  37 #if defined(AO_USE_PENTIUM4_INSTRS)
  38 AO_INLINE void
  39 AO_nop_full()
  40 {
  41   __asm__ __volatile__("mfence" : : : "memory");
  42 }
  43
  44 #define AO_HAVE_nop_full
  45
  46 #else
  47
  48 /* We could use the cpuid instruction.  But that seems to be slower     */
  49 /* than the default implementation based on test_and_set_full.  Thus    */
  50 /* we omit that bit of misinformation here.                             */
  51
  52 #endif
  53
  54 /* As far as we can tell, the lfence and sfence instructions are not    */
  55 /* currently needed or useful for cached memory accesses.               */
  56
  57 /* Really only works for 486 and later */
  58 AO_INLINE AO_t
  59 AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
  60 {
  61   AO_t result;
  62
  63   __asm__ __volatile__ ("lock; xaddq %0, %1" :
  64                         "=r" (result), "=m" (*p) : "0" (incr), "m" (*p)
  65                         : "memory");
  66   return result;
  67 }
  68
  69 #define AO_HAVE_fetch_and_add_full
  70
  71 AO_INLINE unsigned char
  72 AO_char_fetch_and_add_full (volatile unsigned char *p, unsigned char incr)
  73 {
  74   unsigned char result;
  75
  76   __asm__ __volatile__ ("lock; xaddb %0, %1" :
  77                         "=q" (result), "=m" (*p) : "0" (incr), "m" (*p)
  78                         : "memory");
  79   return result;
  80 }
  81
  82 #define AO_HAVE_char_fetch_and_add_full
  83
  84 AO_INLINE unsigned short
  85 AO_short_fetch_and_add_full (volatile unsigned short *p, unsigned short incr)
  86 {
  87   unsigned short result;
  88
  89   __asm__ __volatile__ ("lock; xaddw %0, %1" :
  90                         "=r" (result), "=m" (*p) : "0" (incr), "m" (*p)
  91                         : "memory");
  92   return result;
  93 }
  94
  95 #define AO_HAVE_short_fetch_and_add_full
  96
  97 AO_INLINE unsigned short
  98 AO_int_fetch_and_add_full (volatile unsigned int *p, unsigned int incr)
  99 {
 100   unsigned int result;
 101
 102   __asm__ __volatile__ ("lock; xaddl %0, %1" :
 103                         "=r" (result), "=m" (*p) : "0" (incr), "m" (*p)
 104                         : "memory");
 105   return result;
 106 }
 107
 108 #define AO_HAVE_int_fetch_and_add_full
 109
 110 /* Really only works for 486 and later */
 111 AO_INLINE void
 112 AO_or_full (volatile AO_t *p, AO_t incr)
 113 {
 114   __asm__ __volatile__ ("lock; orq %1, %0" :
 115                         "=m" (*p) : "r" (incr), "m" (*p) : "memory");
 116 }
 117
 118 #define AO_HAVE_or_full
 119
 120 AO_INLINE AO_TS_VAL_t
 121 AO_test_and_set_full(volatile AO_TS_t *addr)
 122 {
 123   unsigned char oldval;
 124   /* Note: the "xchg" instruction does not need a "lock" prefix */
 125   __asm__ __volatile__("xchgb %0, %1"
 126                 : "=q"(oldval), "=m"(*addr)
 127                 : "0"(0xff), "m"(*addr) : "memory");
 128   return (AO_TS_VAL_t)oldval;
 129 }
 130
 131 #define AO_HAVE_test_and_set_full
 132
 133 /* Returns nonzero if the comparison succeeded. */
 134 AO_INLINE int
 135 AO_compare_and_swap_full(volatile AO_t *addr,
 136                              AO_t old, AO_t new_val)
 137 {
 138   char result;
 139   __asm__ __volatile__("lock; cmpxchgq %3, %0; setz %1"
 140                        : "=m"(*addr), "=q"(result)
 141                        : "m"(*addr), "r" (new_val), "a"(old) : "memory");
 142   return (int) result;
 143 }
 144
 145 #define AO_HAVE_compare_and_swap_full
 146
 147 /* FIXME: The Intel version has a 16byte CAS instruction.       */