[mini] Implement atomic exchange with xchg on x86/amd64.
authorAlex Rønne Petersen <alexrp@xamarin.com>
Wed, 4 Feb 2015 12:14:15 +0000 (13:14 +0100)
committerAlex Rønne Petersen <alexrp@xamarin.com>
Wed, 4 Feb 2015 12:18:13 +0000 (13:18 +0100)
mono/mini/cpu-amd64.md
mono/mini/cpu-x86.md
mono/mini/mini-amd64.c
mono/mini/mini-x86.c

index 877bc298a78b659e37bff3ebf96973d18b5951c5..5d20beea6cc71c523da5d0370b80813519f5e658 100755 (executable)
@@ -304,8 +304,8 @@ tls_set: src1:i len:16
 tls_set_reg: src1:i src2:i len:32
 atomic_add_i4: src1:b src2:i dest:i len:32
 atomic_add_i8: src1:b src2:i dest:i len:32
-atomic_exchange_i4: src1:b src2:i dest:a len:32
-atomic_exchange_i8: src1:b src2:i dest:a len:32
+atomic_exchange_i4: src1:b src2:i dest:i clob:x len:8
+atomic_exchange_i8: src1:b src2:i dest:i clob:x len:8
 atomic_cas_i4: src1:b src2:i src3:a dest:a len:24
 atomic_cas_i8: src1:b src2:i src3:a dest:a len:24
 memory_barrier: len:3
index 487663f1bb1e9e650a0ed26b6165bcf51647a29a..3ec6a0027256c5e112c39a4ce0a27d11cd13e87d 100644 (file)
@@ -307,7 +307,7 @@ tls_get_reg: dest:i src1:i len:20
 tls_set: src1:i len:20
 tls_set_reg: src1:i src2:i len:20
 atomic_add_i4: src1:b src2:i dest:i len:16
-atomic_exchange_i4: src1:b src2:i dest:a len:24
+atomic_exchange_i4: src1:b src2:i dest:i clob:x len:8
 atomic_cas_i4: src1:b src2:i src3:a dest:a len:24
 memory_barrier: len:16
 atomic_load_i1: dest:y src1:b len:7
index c7d5590f495ad221e2c88de9fee43f342eb963c1..e1b58c1649f300df8de7a0e2b99a130bddf92908 100755 (executable)
@@ -5565,62 +5565,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                case OP_ATOMIC_EXCHANGE_I4:
                case OP_ATOMIC_EXCHANGE_I8: {
-                       guchar *br[2];
-                       int sreg2 = ins->sreg2;
-                       int breg = ins->inst_basereg;
-                       guint32 size;
-                       gboolean need_push = FALSE, rdx_pushed = FALSE;
-
-                       if (ins->opcode == OP_ATOMIC_EXCHANGE_I8)
-                               size = 8;
-                       else
-                               size = 4;
-
-                       /* 
-                        * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
-                        * an explanation of how this works.
-                        */
-
-                       /* cmpxchg uses eax as comperand, need to make sure we can use it
-                        * hack to overcome limits in x86 reg allocator 
-                        * (req: dreg == eax and sreg2 != eax and breg != eax) 
-                        */
-                       g_assert (ins->dreg == AMD64_RAX);
-
-                       if (breg == AMD64_RAX && ins->sreg2 == AMD64_RAX)
-                               /* Highly unlikely, but possible */
-                               need_push = TRUE;
-
-                       /* The pushes invalidate rsp */
-                       if ((breg == AMD64_RAX) || need_push) {
-                               amd64_mov_reg_reg (code, AMD64_R11, breg, 8);
-                               breg = AMD64_R11;
-                       }
-
-                       /* We need the EAX reg for the comparand */
-                       if (ins->sreg2 == AMD64_RAX) {
-                               if (breg != AMD64_R11) {
-                                       amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
-                                       sreg2 = AMD64_R11;
-                               } else {
-                                       g_assert (need_push);
-                                       amd64_push_reg (code, AMD64_RDX);
-                                       amd64_mov_reg_reg (code, AMD64_RDX, AMD64_RAX, size);
-                                       sreg2 = AMD64_RDX;
-                                       rdx_pushed = TRUE;
-                               }
-                       }
-
-                       amd64_mov_reg_membase (code, AMD64_RAX, breg, ins->inst_offset, size);
-
-                       br [0] = code; amd64_prefix (code, X86_LOCK_PREFIX);
-                       amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size);
-                       br [1] = code; amd64_branch8 (code, X86_CC_NE, -1, FALSE);
-                       amd64_patch (br [1], br [0]);
-
-                       if (rdx_pushed)
-                               amd64_pop_reg (code, AMD64_RDX);
+                       guint32 size = ins->opcode == OP_ATOMIC_EXCHANGE_I4 ? 4 : 8;
 
+                       /* LOCK prefix is implied. */
+                       amd64_xchg_membase_reg_size (code, ins->sreg1, ins->inst_offset, ins->sreg2, size);
+                       amd64_mov_reg_reg (code, ins->dreg, ins->sreg2, size);
                        break;
                }
                case OP_ATOMIC_CAS_I4:
index 6ed4af588474cfe11100b8c129cb894d923c51cb..9a3e056bbe74d59961cb1a7cec60b045f18fbf76 100644 (file)
@@ -4276,44 +4276,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                }
                case OP_ATOMIC_EXCHANGE_I4: {
-                       guchar *br[2];
-                       int sreg2 = ins->sreg2;
-                       int breg = ins->inst_basereg;
-
-                       g_assert (cfg->has_atomic_exchange_i4);
-
-                       /* cmpxchg uses eax as comperand, need to make sure we can use it
-                        * hack to overcome limits in x86 reg allocator 
-                        * (req: dreg == eax and sreg2 != eax and breg != eax) 
-                        */
-                       g_assert (ins->dreg == X86_EAX);
-                       
-                       /* We need the EAX reg for the cmpxchg */
-                       if (ins->sreg2 == X86_EAX) {
-                               sreg2 = (breg == X86_EDX) ? X86_EBX : X86_EDX;
-                               x86_push_reg (code, sreg2);
-                               x86_mov_reg_reg (code, sreg2, X86_EAX, 4);
-                       }
-
-                       if (breg == X86_EAX) {
-                               breg = (sreg2 == X86_ESI) ? X86_EDI : X86_ESI;
-                               x86_push_reg (code, breg);
-                               x86_mov_reg_reg (code, breg, X86_EAX, 4);
-                       }
-
-                       x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
-
-                       br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
-                       x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
-                       br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
-                       x86_patch (br [1], br [0]);
-
-                       if (breg != ins->inst_basereg)
-                               x86_pop_reg (code, breg);
-
-                       if (ins->sreg2 != sreg2)
-                               x86_pop_reg (code, sreg2);
-
+                       /* LOCK prefix is implied. */
+                       x86_xchg_membase_reg (code, ins->sreg1, ins->inst_offset, ins->sreg2, 4);
+                       x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
                        break;
                }
                case OP_ATOMIC_CAS_I4: {