An attempt to fix the register allocation problems for CAS on x86.
authorZoltan Varga <vargaz@gmail.com>
Fri, 20 Aug 2010 16:22:01 +0000 (18:22 +0200)
committerZoltan Varga <vargaz@gmail.com>
Fri, 20 Aug 2010 16:22:01 +0000 (18:22 +0200)
mono/mini/cpu-x86.md
mono/mini/mini-codegen.c
mono/mini/mini-x86.c

index f68950561a908025a05a6b233e37c42c5a17240e..42989f2bb78fa31ef55d706a4cfaf61ce3715dbb 100644 (file)
@@ -300,7 +300,7 @@ tls_get: dest:i len:20
 atomic_add_i4: src1:b src2:i dest:i len:16
 atomic_add_new_i4: src1:b src2:i dest:i len:16
 atomic_exchange_i4: src1:b src2:i dest:a len:24
-atomic_cas_i4: src1:b src2:i src3:a dest:i len:24
+atomic_cas_i4: src1:b src2:i src3:a dest:a len:24
 memory_barrier: len:16
 
 relaxed_nop: len:2
index 7a1476fbb5ded54194a8e9a24aac6dde608a6348..8b516b4af0fc8d51ccc99644ec92a572f4e13157 100644 (file)
@@ -1265,6 +1265,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                 * TRACK FIXED SREG2, 3, ...
                 */
                for (j = 1; j < num_sregs; ++j) {
+                       gboolean assign_fixed = TRUE;
                        int sreg = sregs [j];
                        int dest_sreg = dest_sregs [j];
                        if (dest_sreg != -1) {
@@ -1279,7 +1280,35 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                                                sreg_masks [k] &= ~ (regmask (dest_sreg));
                                                }
                                        }
-                                       else {
+                                       else if (j == 2) {
+                                               int k;
+
+                                               /*
+                                                * CAS.
+                                                * We need to special case this, since on x86, there are only 3
+                                                * free registers, and the code below assigns one of them to
+                                                * sreg, so we can run out of registers when trying to assign
+                                                * dreg. Instead, we just set of the register masks, and let the
+                                                * normal sreg2 assignment code handle this. It would be nice to
+                                                * do this for all the fixed reg cases too, but there is too much
+                                                * risk of breakage.
+                                                */
+                                               assign_fixed = FALSE;
+
+                                               val = rs->vassign [sreg];
+                                               if (val <= -1) {
+                                                       /* Nothing to do */
+                                               } else {
+                                                       /* Argument already in hard reg, need to copy */
+                                                       MonoInst *copy = create_copy_ins (cfg, bb, tmp, dest_sreg, val, NULL, ip, 0);
+                                                       insert_before_ins (bb, ins, copy);
+                                               }
+
+                                               for (k = 0; k < num_sregs; ++k) {
+                                                       if (k != j)
+                                                               sreg_masks [k] &= ~ (regmask (dest_sreg));
+                                               }                                               
+                                       } else {
                                                val = rs->vassign [sreg];
                                                if (val == -1) {
                                                        DEBUG (printf ("\tshortcut assignment of R%d to %s\n", sreg, mono_arch_regname (dest_sreg)));
@@ -1384,7 +1413,8 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                                assign_reg (cfg, rs, sregs [j], dest_sreg, 0);
                                        }
                                }
-                               sregs [j] = dest_sreg;
+                               if (assign_fixed)
+                                       sregs [j] = dest_sreg;
                        }
                }
                mono_inst_set_src_registers (ins, sregs);
@@ -1969,6 +1999,16 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                mono_inst_set_src_registers (ins, sregs);
 
+               /* Sanity check */
+               /* Do this for CAS only for now */
+               for (j = 1; j < num_sregs; ++j) {
+                       int sreg = sregs [j];
+                       int dest_sreg = dest_sregs [j];
+
+                       if (j == 2 && dest_sreg != -1)
+                               g_assert (sreg == dest_sreg);
+               }
+
                /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
                        DEBUG (printf ("freeable %s\n", mono_arch_regname (ins->sreg1)));
                        mono_regstate_free_int (rs, ins->sreg1);
index d0025845f5c1a64716d01e0763895cb7666e4907..7617bd7c5084862e1243c19cb6552907e7cf989d 100644 (file)
@@ -3993,15 +3993,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                }
                case OP_ATOMIC_CAS_I4: {
+                       g_assert (ins->dreg == X86_EAX);
                        g_assert (ins->sreg3 == X86_EAX);
                        g_assert (ins->sreg1 != X86_EAX);
                        g_assert (ins->sreg1 != ins->sreg2);
 
                        x86_prefix (code, X86_LOCK_PREFIX);
                        x86_cmpxchg_membase_reg (code, ins->sreg1, ins->inst_offset, ins->sreg2);
-
-                       if (ins->dreg != X86_EAX)
-                               x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
                        break;
                }
 #ifdef MONO_ARCH_SIMD_INTRINSICS