[jit] Fix some issues with constrained gsharedvt calls to be able to handle some...
[mono.git] / mono / mini / mini-codegen.c
index 079990b5e71e3143fac4491410ea58701b3662e6..ab064a1c6ff01117cc00f4b9b9af754a86c197d7 100644 (file)
@@ -14,6 +14,7 @@
 #include <mono/metadata/debug-helpers.h>
 #include <mono/metadata/threads.h>
 #include <mono/metadata/profiler-private.h>
+#include <mono/metadata/mempool-internals.h>
 #include <mono/utils/mono-math.h>
 
 #include "mini.h"
 #define MONO_ARCH_CALLEE_XREGS 0
 
 #endif
+
+#define MONO_ARCH_BANK_MIRRORED -2
+
+#ifdef MONO_ARCH_USE_SHARED_FP_SIMD_BANK
+
+#ifndef MONO_ARCH_NEED_SIMD_BANK
+#error "MONO_ARCH_USE_SHARED_FP_SIMD_BANK needs MONO_ARCH_NEED_SIMD_BANK to work"
+#endif
+
+#define get_mirrored_bank(bank) (((bank) == MONO_REG_SIMD ) ? MONO_REG_DOUBLE : (((bank) == MONO_REG_DOUBLE ) ? MONO_REG_SIMD : -1))
+
+#define is_hreg_mirrored(rs, bank, hreg) ((rs)->symbolic [(bank)] [(hreg)] == MONO_ARCH_BANK_MIRRORED)
+
+
+#else
+
+
+#define get_mirrored_bank(bank) (-1)
+
+#define is_hreg_mirrored(rs, bank, hreg) (0)
+
+#endif
+
+
+/* If the bank is mirrored return the true logical bank that the register in the
+ * physical register bank is allocated to.
+ */
+static inline int translate_bank (MonoRegState *rs, int bank, int hreg) {
+       return is_hreg_mirrored (rs, bank, hreg) ? get_mirrored_bank (bank) : bank;
+}
+
 /*
  * Every hardware register belongs to a register type or register bank. bank 0 
  * contains the int registers, bank 1 contains the fp registers.
 static const int regbank_size [] = {
        MONO_MAX_IREGS,
        MONO_MAX_FREGS,
+       MONO_MAX_IREGS,
+       MONO_MAX_IREGS,
        MONO_MAX_XREGS
 };
 
-/* FIXME: */
-#ifdef __mono_ilp32__
-#define OP_LOADR_MEMBASE OP_LOADI8_MEMBASE
-#define OP_STORER_MEMBASE_REG OP_STOREI8_MEMBASE_REG
-#else
-#define OP_LOADR_MEMBASE OP_LOAD_MEMBASE
-#define OP_STORER_MEMBASE_REG OP_STORE_MEMBASE_REG
-#endif
-
 static const int regbank_load_ops [] = { 
        OP_LOADR_MEMBASE,
        OP_LOADR8_MEMBASE,
+       OP_LOADR_MEMBASE,
+       OP_LOADR_MEMBASE,
        OP_LOADX_MEMBASE
 };
 
 static const int regbank_store_ops [] = { 
        OP_STORER_MEMBASE_REG,
        OP_STORER8_MEMBASE_REG,
+       OP_STORER_MEMBASE_REG,
+       OP_STORER_MEMBASE_REG,
        OP_STOREX_MEMBASE
 };
 
 static const int regbank_move_ops [] = { 
        OP_MOVE,
        OP_FMOVE,
+       OP_MOVE,
+       OP_MOVE,
        OP_XMOVE
 };
 
 #define regmask(reg) (((regmask_t)1) << (reg))
 
+#ifdef MONO_ARCH_USE_SHARED_FP_SIMD_BANK
 static const regmask_t regbank_callee_saved_regs [] = {
        MONO_ARCH_CALLEE_SAVED_REGS,
        MONO_ARCH_CALLEE_SAVED_FREGS,
+       MONO_ARCH_CALLEE_SAVED_REGS,
+       MONO_ARCH_CALLEE_SAVED_REGS,
        MONO_ARCH_CALLEE_SAVED_XREGS,
 };
+#endif
 
 static const regmask_t regbank_callee_regs [] = {
        MONO_ARCH_CALLEE_REGS,
        MONO_ARCH_CALLEE_FREGS,
+       MONO_ARCH_CALLEE_REGS,
+       MONO_ARCH_CALLEE_REGS,
        MONO_ARCH_CALLEE_XREGS,
 };
 
 static const int regbank_spill_var_size[] = {
        sizeof (mgreg_t),
        sizeof (double),
+       sizeof (mgreg_t),
+       sizeof (mgreg_t),
        16 /*FIXME make this a constant. Maybe MONO_ARCH_SIMD_VECTOR_SIZE? */
 };
 
 #define DEBUG(a) MINI_DEBUG(cfg->verbose_level, 3, a;)
 
-static inline GSList*
-g_slist_append_mempool (MonoMemPool *mp, GSList *list, gpointer data)
-{
-       GSList *new_list;
-       GSList *last;
-       
-       new_list = mono_mempool_alloc (mp, sizeof (GSList));
-       new_list->data = data;
-       new_list->next = NULL;
-       
-       if (list) {
-               last = list;
-               while (last->next)
-                       last = last->next;
-               last->next = new_list;
-               
-               return list;
-       } else
-               return new_list;
-}
-
 static inline void
 mono_regstate_assign (MonoRegState *rs)
 {
+#ifdef MONO_ARCH_USE_SHARED_FP_SIMD_BANK
+       /* The regalloc may fail if fp and simd logical regbanks share the same physical reg bank and
+        * if the values here are not the same.
+        */
+       g_assert(regbank_callee_regs [MONO_REG_SIMD] == regbank_callee_regs [MONO_REG_DOUBLE]);
+       g_assert(regbank_callee_saved_regs [MONO_REG_SIMD] == regbank_callee_saved_regs [MONO_REG_DOUBLE]);
+       g_assert(regbank_size [MONO_REG_SIMD] == regbank_size [MONO_REG_DOUBLE]);
+#endif
+
        if (rs->next_vreg > rs->vassign_size) {
                g_free (rs->vassign);
                rs->vassign_size = MAX (rs->next_vreg, 256);
@@ -122,12 +150,12 @@ mono_regstate_assign (MonoRegState *rs)
        memset (rs->isymbolic, 0, MONO_MAX_IREGS * sizeof (rs->isymbolic [0]));
        memset (rs->fsymbolic, 0, MONO_MAX_FREGS * sizeof (rs->fsymbolic [0]));
 
-       rs->symbolic [0] = rs->isymbolic;
-       rs->symbolic [1] = rs->fsymbolic;
+       rs->symbolic [MONO_REG_INT] = rs->isymbolic;
+       rs->symbolic [MONO_REG_DOUBLE] = rs->fsymbolic;
 
 #ifdef MONO_ARCH_NEED_SIMD_BANK
        memset (rs->xsymbolic, 0, MONO_MAX_XREGS * sizeof (rs->xsymbolic [0]));
-       rs->symbolic [2] = rs->xsymbolic;
+       rs->symbolic [MONO_REG_SIMD] = rs->xsymbolic;
 #endif
 }
 
@@ -175,10 +203,17 @@ static inline int
 mono_regstate_alloc_general (MonoRegState *rs, regmask_t allow, int bank)
 {
        int i;
+       int mirrored_bank;
        regmask_t mask = allow & rs->free_mask [bank];
        for (i = 0; i < regbank_size [bank]; ++i) {
                if (mask & ((regmask_t)1 << i)) {
                        rs->free_mask [bank] &= ~ ((regmask_t)1 << i);
+
+                       mirrored_bank = get_mirrored_bank (bank);
+                       if (mirrored_bank == -1)
+                               return i;
+
+                       rs->free_mask [mirrored_bank] = rs->free_mask [bank];
                        return i;
                }
        }
@@ -188,9 +223,17 @@ mono_regstate_alloc_general (MonoRegState *rs, regmask_t allow, int bank)
 static inline void
 mono_regstate_free_general (MonoRegState *rs, int reg, int bank)
 {
+       int mirrored_bank;
+
        if (reg >= 0) {
                rs->free_mask [bank] |= (regmask_t)1 << reg;
                rs->symbolic [bank][reg] = 0;
+
+               mirrored_bank = get_mirrored_bank (bank);
+               if (mirrored_bank == -1)
+                       return;
+               rs->free_mask [mirrored_bank] = rs->free_mask [bank];
+               rs->symbolic [mirrored_bank][reg] = 0;
        }
 }
 
@@ -199,10 +242,12 @@ mono_regname_full (int reg, int bank)
 {
        if (G_UNLIKELY (bank)) {
 #if MONO_ARCH_NEED_SIMD_BANK
-               if (bank == 2)
+               if (bank == MONO_REG_SIMD)
                        return mono_arch_xregname (reg);
 #endif
-               g_assert (bank == 1);
+               if (bank == MONO_REG_INT_REF || bank == MONO_REG_INT_MP)
+                       return mono_arch_regname (reg);
+               g_assert (bank == MONO_REG_DOUBLE);
                return mono_arch_fregname (reg);
        } else {
                return mono_arch_regname (reg);
@@ -228,6 +273,17 @@ mono_call_inst_add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, int vreg, i
        }
 }
 
+/*
+ * mono_call_inst_add_outarg_vt:
+ *
+ *   Register OUTARG_VT as belonging to CALL.
+ */
+void
+mono_call_inst_add_outarg_vt (MonoCompile *cfg, MonoCallInst *call, MonoInst *outarg_vt)
+{
+       call->outarg_vts = g_slist_append_mempool (cfg->mempool, call->outarg_vts, outarg_vt);
+}
+
 static void
 resize_spill_info (MonoCompile *cfg, int bank)
 {
@@ -239,7 +295,7 @@ resize_spill_info (MonoCompile *cfg, int bank)
 
        g_assert (bank < MONO_NUM_REGBANKS);
 
-       new_info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo) * new_len);
+       new_info = mono_mempool_alloc0 (cfg->mempool, sizeof (MonoSpillInfo) * new_len);
        if (orig_info)
                memcpy (new_info, orig_info, sizeof (MonoSpillInfo) * orig_len);
        for (i = orig_len; i < new_len; ++i)
@@ -355,7 +411,38 @@ typedef struct {
        regmask_t preferred_mask; /* the hreg where the register should be allocated, or 0 */
 } RegTrack;
 
-#ifndef DISABLE_LOGGING
+#if !defined(DISABLE_LOGGING) && !defined(DISABLE_JIT)
+
+static const char* const patch_info_str[] = {
+#define PATCH_INFO(a,b) "" #a,
+#include "patch-info.h"
+#undef PATCH_INFO
+};
+
+void
+mono_print_ji (const MonoJumpInfo *ji)
+{
+       switch (ji->type) {
+       case MONO_PATCH_INFO_RGCTX_FETCH: {
+               MonoJumpInfoRgctxEntry *entry = ji->data.rgctx_entry;
+
+               printf ("[RGCTX_FETCH ");
+               mono_print_ji (entry->data);
+               printf (" - %s]", mono_rgctx_info_type_to_str (entry->info_type));
+               break;
+       }
+       case MONO_PATCH_INFO_METHODCONST: {
+               char *s = mono_method_full_name (ji->data.method, TRUE);
+               printf ("[METHODCONST - %s]", s);
+               g_free (s);
+               break;
+       }
+       default:
+               printf ("[%s]", patch_info_str [ji->type]);
+               break;
+       }
+}
+
 void
 mono_print_ins_index (int i, MonoInst *ins)
 {
@@ -368,9 +455,22 @@ mono_print_ins_index (int i, MonoInst *ins)
        else
                printf (" %s", mono_inst_name (ins->opcode));
        if (spec == MONO_ARCH_CPU_SPEC) {
+               gboolean dest_base = FALSE;
+               switch (ins->opcode) {
+               case OP_STOREV_MEMBASE:
+                       dest_base = TRUE;
+                       break;
+               default:
+                       break;
+               }
+
                /* This is a lowered opcode */
-               if (ins->dreg != -1)
-                       printf (" R%d <-", ins->dreg);
+               if (ins->dreg != -1) {
+                       if (dest_base)
+                               printf (" [R%d + 0x%lx] <-", ins->dreg, (long)ins->inst_offset);
+                       else
+                               printf (" R%d <-", ins->dreg);
+               }
                if (ins->sreg1 != -1)
                        printf (" R%d", ins->sreg1);
                if (ins->sreg2 != -1)
@@ -413,6 +513,7 @@ mono_print_ins_index (int i, MonoInst *ins)
                        printf (" R%d", ((MonoInst*)ins->inst_p0)->dreg);
                        break;
                case OP_REGOFFSET:
+               case OP_GSHAREDVT_ARG_REGOFFSET:
                        printf (" + 0x%lx", (long)ins->inst_offset);
                        break;
                default:
@@ -478,6 +579,7 @@ mono_print_ins_index (int i, MonoInst *ins)
        case OP_IAND_IMM:
        case OP_IOR_IMM:
        case OP_IXOR_IMM:
+       case OP_SUB_IMM:
                printf (" [%d]", (int)ins->inst_imm);
                break;
        case OP_ADD_IMM:
@@ -493,17 +595,12 @@ mono_print_ins_index (int i, MonoInst *ins)
        case OP_R4CONST:
                printf (" [%f]", *(float*)ins->inst_p0);
                break;
-       case CEE_CALL:
-       case CEE_CALLVIRT:
        case OP_CALL:
        case OP_CALL_MEMBASE:
        case OP_CALL_REG:
        case OP_FCALL:
-       case OP_FCALLVIRT:
        case OP_LCALL:
-       case OP_LCALLVIRT:
        case OP_VCALL:
-       case OP_VCALLVIRT:
        case OP_VCALL_REG:
        case OP_VCALL_MEMBASE:
        case OP_VCALL2:
@@ -511,7 +608,7 @@ mono_print_ins_index (int i, MonoInst *ins)
        case OP_VCALL2_MEMBASE:
        case OP_VOIDCALL:
        case OP_VOIDCALL_MEMBASE:
-       case OP_VOIDCALLVIRT: {
+       case OP_TAILCALL: {
                MonoCallInst *call = (MonoCallInst*)ins;
                GSList *list;
 
@@ -528,6 +625,11 @@ mono_print_ins_index (int i, MonoInst *ins)
                        char *full_name = mono_method_full_name (call->method, TRUE);
                        printf (" [%s]", full_name);
                        g_free (full_name);
+               } else if (call->fptr_is_patch) {
+                       MonoJumpInfo *ji = (MonoJumpInfo*)call->fptr;
+
+                       printf (" ");
+                       mono_print_ji (ji);
                } else if (call->fptr) {
                        MonoJitICallInfo *info = mono_find_jit_icall_by_addr (call->fptr);
                        if (info)
@@ -553,16 +655,6 @@ mono_print_ins_index (int i, MonoInst *ins)
        case OP_CALL_HANDLER:
                printf (" [B%d]", ins->inst_target_bb->block_num);
                break;
-       case CEE_BNE_UN:
-       case CEE_BEQ:
-       case CEE_BLT:
-       case CEE_BLT_UN:
-       case CEE_BGT:
-       case CEE_BGT_UN:
-       case CEE_BGE:
-       case CEE_BGE_UN:
-       case CEE_BLE:
-       case CEE_BLE_UN:
        case OP_IBNE_UN:
        case OP_IBEQ:
        case OP_IBLT:
@@ -590,8 +682,14 @@ mono_print_ins_index (int i, MonoInst *ins)
                break;
        case OP_LIVERANGE_START:
        case OP_LIVERANGE_END:
+       case OP_GC_LIVENESS_DEF:
+       case OP_GC_LIVENESS_USE:
                printf (" R%d", (int)ins->inst_c1);
                break;
+       case OP_IL_SEQ_POINT:
+       case OP_SEQ_POINT:
+               printf (" il: %x", (int)ins->inst_imm);
+               break;
        default:
                break;
        }
@@ -620,11 +718,17 @@ print_regtrack (RegTrack *t, int num)
        }
 }
 #else
+
+void
+mono_print_ji (const MonoJumpInfo *ji)
+{
+}
+
 void
 mono_print_ins_index (int i, MonoInst *ins)
 {
 }
-#endif /* DISABLE_LOGGING */
+#endif /* !defined(DISABLE_LOGGING) && !defined(DISABLE_JIT) */
 
 void
 mono_print_ins (MonoInst *ins)
@@ -654,11 +758,23 @@ insert_after_ins (MonoBasicBlock *bb, MonoInst *ins, MonoInst **last, MonoInst*
        *last = to_insert;
 }
 
+static inline int
+get_vreg_bank (MonoCompile *cfg, int reg, int bank)
+{
+       if (vreg_is_ref (cfg, reg))
+               return MONO_REG_INT_REF;
+       else if (vreg_is_mp (cfg, reg))
+               return MONO_REG_INT_MP;
+       else
+               return bank;
+}
+
 /*
- * Force the spilling of the variable in the symbolic register 'reg'.
+ * Force the spilling of the variable in the symbolic register 'reg', and free 
+ * the hreg it was assigned to.
  */
-static int
-get_register_force_spilling (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **last, MonoInst *ins, int reg, int bank)
+static void
+spill_vreg (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **last, MonoInst *ins, int reg, int bank)
 {
        MonoInst *load;
        int i, sel, spill;
@@ -668,6 +784,9 @@ get_register_force_spilling (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **la
        symbolic = rs->symbolic [bank];
        sel = rs->vassign [reg];
 
+       /* the vreg we need to spill lives in another logical reg bank */
+       bank = translate_bank (cfg->rs, bank, sel);
+
        /*i = rs->isymbolic [sel];
        g_assert (i == reg);*/
        i = reg;
@@ -681,7 +800,7 @@ get_register_force_spilling (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **la
        MONO_INST_NEW (cfg, load, regbank_load_ops [bank]);
        load->dreg = sel;
        load->inst_basereg = cfg->frame_reg;
-       load->inst_offset = mono_spillvar_offset (cfg, spill, bank);
+       load->inst_offset = mono_spillvar_offset (cfg, spill, get_vreg_bank (cfg, reg, bank));
        insert_after_ins (bb, ins, last, load);
        DEBUG (printf ("SPILLED LOAD (%d at 0x%08lx(%%ebp)) R%d (freed %s)\n", spill, (long)load->inst_offset, i, mono_regname_full (sel, bank)));
        if (G_UNLIKELY (bank))
@@ -690,7 +809,10 @@ get_register_force_spilling (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **la
                i = mono_regstate_alloc_int (rs, regmask (sel));
        g_assert (i == sel);
 
-       return sel;
+       if (G_UNLIKELY (bank))
+               mono_regstate_free_general (rs, sel, bank);
+       else
+               mono_regstate_free_int (rs, sel);
 }
 
 /* This isn't defined on older glib versions and on some platforms */
@@ -711,7 +833,7 @@ get_register_spilling (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **last, Mo
 
        g_assert (bank < MONO_NUM_REGBANKS);
 
-       DEBUG (printf ("\tstart regmask to assign R%d: 0x%08" G_GUINT64_FORMAT " (R%d <- R%d R%d R%d)\n", reg, (guint64)regmask, ins->dreg, ins->sreg1, ins->sreg2, ins->sreg3));
+       DEBUG (printf ("\tstart regmask to assign R%d: 0x%08llu (R%d <- R%d R%d R%d)\n", reg, (unsigned long long)regmask, ins->dreg, ins->sreg1, ins->sreg2, ins->sreg3));
        /* exclude the registers in the current instruction */
        num_sregs = mono_inst_get_src_registers (ins, sregs);
        for (i = 0; i < num_sregs; ++i) {
@@ -728,7 +850,7 @@ get_register_spilling (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **last, Mo
                DEBUG (printf ("\t\texcluding dreg %s\n", mono_regname_full (ins->dreg, bank)));
        }
 
-       DEBUG (printf ("\t\tavailable regmask: 0x%08" G_GUINT64_FORMAT "\n", (guint64)regmask));
+       DEBUG (printf ("\t\tavailable regmask: 0x%08llu\n", (unsigned long long)regmask));
        g_assert (regmask); /* need at least a register we can free */
        sel = 0;
        /* we should track prev_use and spill the register that's farther */
@@ -736,6 +858,10 @@ get_register_spilling (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **last, Mo
                for (i = 0; i < regbank_size [bank]; ++i) {
                        if (regmask & (regmask (i))) {
                                sel = i;
+
+                               /* the vreg we need to load lives in another logical bank */
+                               bank = translate_bank (cfg->rs, bank, sel);
+
                                DEBUG (printf ("\t\tselected register %s has assignment %d\n", mono_regname_full (sel, bank), rs->symbolic [bank] [sel]));
                                break;
                        }
@@ -765,7 +891,7 @@ get_register_spilling (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **last, Mo
        MONO_INST_NEW (cfg, load, regbank_load_ops [bank]);
        load->dreg = sel;
        load->inst_basereg = cfg->frame_reg;
-       load->inst_offset = mono_spillvar_offset (cfg, spill, bank);
+       load->inst_offset = mono_spillvar_offset (cfg, spill, get_vreg_bank (cfg, i, bank));
        insert_after_ins (bb, ins, last, load);
        DEBUG (printf ("\tSPILLED LOAD (%d at 0x%08lx(%%ebp)) R%d (freed %s)\n", spill, (long)load->inst_offset, i, mono_regname_full (sel, bank)));
        if (G_UNLIKELY (bank))
@@ -777,21 +903,25 @@ get_register_spilling (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **last, Mo
        return sel;
 }
 
+/*
+ * free_up_hreg:
+ *
+ *   Free up the hreg HREG by spilling the vreg allocated to it.
+ */
 static void
-free_up_reg (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **last, MonoInst *ins, int hreg, int bank)
+free_up_hreg (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **last, MonoInst *ins, int hreg, int bank)
 {
        if (G_UNLIKELY (bank)) {
-               if (!(cfg->rs->free_mask [1] & (regmask (hreg)))) {
+               if (!(cfg->rs->free_mask [bank] & (regmask (hreg)))) {
+                       bank = translate_bank (cfg->rs, bank, hreg);
                        DEBUG (printf ("\tforced spill of R%d\n", cfg->rs->symbolic [bank] [hreg]));
-                       get_register_force_spilling (cfg, bb, last, ins, cfg->rs->symbolic [bank] [hreg], bank);
-                       mono_regstate_free_general (cfg->rs, hreg, bank);
+                       spill_vreg (cfg, bb, last, ins, cfg->rs->symbolic [bank] [hreg], bank);
                }
        }
        else {
                if (!(cfg->rs->ifree_mask & (regmask (hreg)))) {
                        DEBUG (printf ("\tforced spill of R%d\n", cfg->rs->isymbolic [hreg]));
-                       get_register_force_spilling (cfg, bb, last, ins, cfg->rs->isymbolic [hreg], bank);
-                       mono_regstate_free_int (cfg->rs, hreg);
+                       spill_vreg (cfg, bb, last, ins, cfg->rs->isymbolic [hreg], bank);
                }
        }
 }
@@ -814,10 +944,24 @@ create_copy_ins (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst **last, int dest
        return copy;
 }
 
-static MonoInst*
-create_spilled_store (MonoCompile *cfg, MonoBasicBlock *bb, int spill, int reg, int prev_reg, MonoInst **last, MonoInst *ins, int bank)
+static inline const char*
+regbank_to_string (int bank)
+{
+       if (bank == MONO_REG_INT_REF)
+               return "REF ";
+       else if (bank == MONO_REG_INT_MP)
+               return "MP ";
+       else
+               return "";
+}
+
+static void
+create_spilled_store (MonoCompile *cfg, MonoBasicBlock *bb, int spill, int reg, int prev_reg, MonoInst **last, MonoInst *ins, MonoInst *insert_before, int bank)
 {
-       MonoInst *store;
+       MonoInst *store, *def;
+       
+       bank = get_vreg_bank (cfg, prev_reg, bank);
+
        MONO_INST_NEW (cfg, store, regbank_store_ops [bank]);
        store->sreg1 = reg;
        store->inst_destbasereg = cfg->frame_reg;
@@ -825,9 +969,20 @@ create_spilled_store (MonoCompile *cfg, MonoBasicBlock *bb, int spill, int reg,
        if (ins) {
                mono_bblock_insert_after_ins (bb, ins, store);
                *last = store;
+       } else if (insert_before) {
+               insert_before_ins (bb, insert_before, store);
+       } else {
+               g_assert_not_reached ();
+       }
+       DEBUG (printf ("\t%sSPILLED STORE (%d at 0x%08lx(%%ebp)) R%d (from %s)\n", regbank_to_string (bank), spill, (long)store->inst_offset, prev_reg, mono_regname_full (reg, bank)));
+
+       if (((bank == MONO_REG_INT_REF) || (bank == MONO_REG_INT_MP)) && cfg->compute_gc_maps) {
+               g_assert (prev_reg != -1);
+               MONO_INST_NEW (cfg, def, OP_GC_SPILL_SLOT_LIVENESS_DEF);
+               def->inst_c0 = spill;
+               def->inst_c1 = bank;
+               mono_bblock_insert_after_ins (bb, store, def);
        }
-       DEBUG (printf ("\tSPILLED STORE (%d at 0x%08lx(%%ebp)) R%d (from %s)\n", spill, (long)store->inst_offset, prev_reg, mono_regname_full (reg, bank)));
-       return store;
 }
 
 /* flags used in reginfo->flags */
@@ -883,6 +1038,8 @@ static inline void
 assign_reg (MonoCompile *cfg, MonoRegState *rs, int reg, int hreg, int bank)
 {
        if (G_UNLIKELY (bank)) {
+               int mirrored_bank;
+
                g_assert (reg >= regbank_size [bank]);
                g_assert (hreg < regbank_size [bank]);
                g_assert (! is_global_freg (hreg));
@@ -890,12 +1047,28 @@ assign_reg (MonoCompile *cfg, MonoRegState *rs, int reg, int hreg, int bank)
                rs->vassign [reg] = hreg;
                rs->symbolic [bank] [hreg] = reg;
                rs->free_mask [bank] &= ~ (regmask (hreg));
+
+               mirrored_bank = get_mirrored_bank (bank);
+               if (mirrored_bank == -1)
+                       return;
+
+               /* Make sure the other logical reg bank that this bank shares
+                * a single hard reg bank knows that this hard reg is not free.
+                */
+               rs->free_mask [mirrored_bank] = rs->free_mask [bank];
+
+               /* Mark the other logical bank that the this bank shares
+                * a single hard reg bank with as mirrored.
+                */
+               rs->symbolic [mirrored_bank] [hreg] = MONO_ARCH_BANK_MIRRORED;
+
        }
        else {
                g_assert (reg >= MONO_MAX_IREGS);
                g_assert (hreg < MONO_MAX_IREGS);
-#ifndef TARGET_ARM
+#if !defined(TARGET_ARM) && !defined(TARGET_ARM64)
                /* this seems to trigger a gcc compilation bug sometime (hreg is 0) */
+               /* On arm64, rgctx_reg is a global hreg, and it is used to pass an argument */
                g_assert (! is_global_ireg (hreg));
 #endif
 
@@ -916,6 +1089,8 @@ get_callee_mask (const char spec)
 static gint8 desc_to_fixed_reg [256];
 static gboolean desc_to_fixed_reg_inited = FALSE;
 
+#ifndef DISABLE_JIT
+
 /*
  * Local register allocation.
  * We first scan the list of instructions and we save the liveness info of
@@ -939,7 +1114,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
        int fpstack [8];
        int sp = 0;
 #endif
-       int num_sregs;
+       int num_sregs = 0;
        int sregs [MONO_MAX_SRC_REGS];
 
        if (!bb->code)
@@ -949,6 +1124,23 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                for (i = 0; i < 256; ++i)
                        desc_to_fixed_reg [i] = MONO_ARCH_INST_FIXED_REG (i);
                desc_to_fixed_reg_inited = TRUE;
+
+               /* Validate the cpu description against the info in mini-ops.h */
+#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_ARM64)
+               for (i = OP_LOAD; i < OP_LAST; ++i) {
+                       const char *ispec;
+
+                       spec = ins_get_spec (i);
+                       ispec = INS_INFO (i);
+
+                       if ((spec [MONO_INST_DEST] && (ispec [MONO_INST_DEST] == ' ')))
+                               printf ("Instruction metadata for %s inconsistent.\n", mono_inst_name (i));
+                       if ((spec [MONO_INST_SRC1] && (ispec [MONO_INST_SRC1] == ' ')))
+                               printf ("Instruction metadata for %s inconsistent.\n", mono_inst_name (i));
+                       if ((spec [MONO_INST_SRC2] && (ispec [MONO_INST_SRC2] == ' ')))
+                               printf ("Instruction metadata for %s inconsistent.\n", mono_inst_name (i));
+               }
+#endif
        }
 
        rs->next_vreg = bb->max_vreg;
@@ -982,6 +1174,8 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
         * bblock.
         */
        for (ins = bb->code; ins; ins = ins->next) {
+               gboolean modify = FALSE;
+
                spec = ins_get_spec (ins->opcode);
 
                if ((ins->dreg != -1) && (ins->dreg < max)) {
@@ -1007,19 +1201,21 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
 #if SIZEOF_REGISTER == 4
                                if (MONO_ARCH_INST_IS_REGPAIR (spec [MONO_INST_SRC1 + j])) {
                                        sregs [j]++;
+                                       modify = TRUE;
                                        memset (&reginfo [sregs [j] + 1], 0, sizeof (RegTrack));
                                }
 #endif
                        }
                }
-               mono_inst_set_src_registers (ins, sregs);
+               if (modify)
+                       mono_inst_set_src_registers (ins, sregs);
        }
 
        /*if (cfg->opt & MONO_OPT_COPYPROP)
                local_copy_prop (cfg, ins);*/
 
        i = 1;
-       DEBUG (printf ("\nLOCAL REGALLOC: BASIC BLOCK %d:\n", bb->block_num));
+       DEBUG (printf ("\nLOCAL REGALLOC BLOCK %d:\n", bb->block_num));
        /* forward pass on the instructions to collect register liveness info */
        MONO_BB_FOR_EACH_INS (bb, ins) {
                spec = ins_get_spec (ins->opcode);
@@ -1105,7 +1301,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                        ins->dreg = -1;
                }
 
-               if (spec [MONO_INST_CLOB] == 'c') {
+               if (spec [MONO_INST_CLOB] == 'c' && MONO_IS_CALL (ins)) {
                        /* A call instruction implicitly uses all registers in call->out_ireg_args */
 
                        MonoCallInst *call = (MonoCallInst*)ins;
@@ -1208,125 +1404,166 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                for (j = 1; j < num_sregs; ++j) {
                        int sreg = sregs [j];
                        int dest_sreg = dest_sregs [j];
-                       if (dest_sreg != -1) {
-                               if (rs->ifree_mask & (regmask (dest_sreg))) {
-                                       if (is_global_ireg (sreg)) {
-                                               int k;
+
+                       if (dest_sreg == -1)
+                               continue;
+
+                       if (j == 2) {
+                               int k;
+
+                               /*
+                                * CAS.
+                                * We need to special case this, since on x86, there are only 3
+                                * free registers, and the code below assigns one of them to
+                                * sreg, so we can run out of registers when trying to assign
+                                * dreg. Instead, we just set up the register masks, and let the
+                                * normal sreg2 assignment code handle this. It would be nice to
+                                * do this for all the fixed reg cases too, but there is too much
+                                * risk of breakage.
+                                */
+
+                               /* Make sure sreg will be assigned to dest_sreg, and the other sregs won't */
+                               sreg_masks [j] = regmask (dest_sreg);
+                               for (k = 0; k < num_sregs; ++k) {
+                                       if (k != j)
+                                               sreg_masks [k] &= ~ (regmask (dest_sreg));
+                               }                                               
+
+                               /*
+                                * Spill sreg1/2 if they are assigned to dest_sreg.
+                                */
+                               for (k = 0; k < num_sregs; ++k) {
+                                       if (k != j && is_soft_reg (sregs [k], 0) && rs->vassign [sregs [k]] == dest_sreg)
+                                               free_up_hreg (cfg, bb, tmp, ins, dest_sreg, 0);
+                               }
+
+                               /*
+                                * We can also run out of registers while processing sreg2 if sreg3 is
+                                * assigned to another hreg, so spill sreg3 now.
+                                */
+                               if (is_soft_reg (sreg, 0) && rs->vassign [sreg] >= 0 && rs->vassign [sreg] != dest_sreg) {
+                                       spill_vreg (cfg, bb, tmp, ins, sreg, 0);
+                               }
+                               continue;
+                       }
+
+                       if (rs->ifree_mask & (regmask (dest_sreg))) {
+                               if (is_global_ireg (sreg)) {
+                                       int k;
+                                       /* Argument already in hard reg, need to copy */
+                                       MonoInst *copy = create_copy_ins (cfg, bb, tmp, dest_sreg, sreg, NULL, ip, 0);
+                                       insert_before_ins (bb, ins, copy);
+                                       for (k = 0; k < num_sregs; ++k) {
+                                               if (k != j)
+                                                       sreg_masks [k] &= ~ (regmask (dest_sreg));
+                                       }
+                                       /* See below */
+                                       dreg_mask &= ~ (regmask (dest_sreg));
+                               } else {
+                                       val = rs->vassign [sreg];
+                                       if (val == -1) {
+                                               DEBUG (printf ("\tshortcut assignment of R%d to %s\n", sreg, mono_arch_regname (dest_sreg)));
+                                               assign_reg (cfg, rs, sreg, dest_sreg, 0);
+                                       } else if (val < -1) {
+                                               /* FIXME: */
+                                               g_assert_not_reached ();
+                                       } else {
                                                /* Argument already in hard reg, need to copy */
-                                               MonoInst *copy = create_copy_ins (cfg, bb, tmp, dest_sreg, sreg, NULL, ip, 0);
+                                               MonoInst *copy = create_copy_ins (cfg, bb, tmp, dest_sreg, val, NULL, ip, 0);
+                                               int k;
+
                                                insert_before_ins (bb, ins, copy);
                                                for (k = 0; k < num_sregs; ++k) {
                                                        if (k != j)
                                                                sreg_masks [k] &= ~ (regmask (dest_sreg));
                                                }
+                                               /* 
+                                                * Prevent the dreg from being allocated to dest_sreg
+                                                * too, since it could force sreg1 to be allocated to 
+                                                * the same reg on x86.
+                                                */
+                                               dreg_mask &= ~ (regmask (dest_sreg));
                                        }
-                                       else {
-                                               val = rs->vassign [sreg];
-                                               if (val == -1) {
-                                                       DEBUG (printf ("\tshortcut assignment of R%d to %s\n", sreg, mono_arch_regname (dest_sreg)));
-                                                       assign_reg (cfg, rs, sreg, dest_sreg, 0);
-                                               } else if (val < -1) {
-                                                       /* FIXME: */
-                                                       g_assert_not_reached ();
-                                               } else {
-                                                       /* Argument already in hard reg, need to copy */
-                                                       MonoInst *copy = create_copy_ins (cfg, bb, tmp, dest_sreg, val, NULL, ip, 0);
-                                                       int k;
-
-                                                       insert_before_ins (bb, ins, copy);
-                                                       for (k = 0; k < num_sregs; ++k) {
-                                                               if (k != j)
-                                                                       sreg_masks [k] &= ~ (regmask (dest_sreg));
-                                                       }
-                                                       /* 
-                                                        * Prevent the dreg from being allocate to dest_sreg 
-                                                        * too, since it could force sreg1 to be allocated to 
-                                                        * the same reg on x86.
-                                                        */
-                                                       dreg_mask &= ~ (regmask (dest_sreg));
-                                               }
-                                       }
-                               } else {
-                                       gboolean need_spill = TRUE;
-                                       gboolean need_assign = TRUE;
-                                       int k;
-
-                                       dreg_mask &= ~ (regmask (dest_sreg));
-                                       for (k = 0; k < num_sregs; ++k) {
-                                               if (k != j)
-                                                       sreg_masks [k] &= ~ (regmask (dest_sreg));
-                                       }
+                               }
+                       } else {
+                               gboolean need_spill = TRUE;
+                               gboolean need_assign = TRUE;
+                               int k;
+
+                               dreg_mask &= ~ (regmask (dest_sreg));
+                               for (k = 0; k < num_sregs; ++k) {
+                                       if (k != j)
+                                               sreg_masks [k] &= ~ (regmask (dest_sreg));
+                               }
 
+                               /* 
+                                * First check if dreg is assigned to dest_sreg2, since we
+                                * can't spill a dreg.
+                                */
+                               if (spec [MONO_INST_DEST])
+                                       val = rs->vassign [ins->dreg];
+                               else
+                                       val = -1;
+                               if (val == dest_sreg && ins->dreg != sreg) {
                                        /* 
-                                        * First check if dreg is assigned to dest_sreg2, since we
-                                        * can't spill a dreg.
+                                        * the destination register is already assigned to 
+                                        * dest_sreg2: we need to allocate another register for it 
+                                        * and then copy from this to dest_sreg2.
                                         */
-                                       val = rs->vassign [ins->dreg];
-                                       if (val == dest_sreg && ins->dreg != sreg) {
-                                               /* 
-                                                * the destination register is already assigned to 
-                                                * dest_sreg2: we need to allocate another register for it 
-                                                * and then copy from this to dest_sreg2.
-                                                */
-                                               int new_dest;
-                                               new_dest = alloc_int_reg (cfg, bb, tmp, ins, dreg_mask, ins->dreg, &reginfo [ins->dreg]);
-                                               g_assert (new_dest >= 0);
-                                               DEBUG (printf ("\tchanging dreg R%d to %s from %s\n", ins->dreg, mono_arch_regname (new_dest), mono_arch_regname (dest_sreg)));
-
-                                               prev_dreg = ins->dreg;
-                                               assign_reg (cfg, rs, ins->dreg, new_dest, 0);
-                                               clob_dreg = ins->dreg;
-                                               create_copy_ins (cfg, bb, tmp, dest_sreg, new_dest, ins, ip, 0);
-                                               mono_regstate_free_int (rs, dest_sreg);
-                                               need_spill = FALSE;
-                                       }
+                                       int new_dest;
+                                       new_dest = alloc_int_reg (cfg, bb, tmp, ins, dreg_mask, ins->dreg, &reginfo [ins->dreg]);
+                                       g_assert (new_dest >= 0);
+                                       DEBUG (printf ("\tchanging dreg R%d to %s from %s\n", ins->dreg, mono_arch_regname (new_dest), mono_arch_regname (dest_sreg)));
+
+                                       prev_dreg = ins->dreg;
+                                       assign_reg (cfg, rs, ins->dreg, new_dest, 0);
+                                       clob_dreg = ins->dreg;
+                                       create_copy_ins (cfg, bb, tmp, dest_sreg, new_dest, ins, ip, 0);
+                                       mono_regstate_free_int (rs, dest_sreg);
+                                       need_spill = FALSE;
+                               }
 
-                                       if (is_global_ireg (sreg)) {
-                                               MonoInst *copy = create_copy_ins (cfg, bb, tmp, dest_sreg, sreg, NULL, ip, 0);
-                                               insert_before_ins (bb, ins, copy);
-                                               need_assign = FALSE;
-                                       }
-                                       else {
-                                               val = rs->vassign [sreg];
-                                               if (val == dest_sreg) {
-                                                       /* sreg2 is already assigned to the correct register */
-                                                       need_spill = FALSE;
-                                               } else if (val < -1) {
-                                                       /* sreg2 is spilled, it can be assigned to dest_sreg2 */
-                                               } else if (val >= 0) {
-                                                       /* sreg2 already assigned to another register */
-                                                       /*
-                                                        * We couldn't emit a copy from val to dest_sreg2, because
-                                                        * val might be spilled later while processing this 
-                                                        * instruction. So we spill sreg2 so it can be allocated to
-                                                        * dest_sreg2.
-                                                        */
-                                                       DEBUG (printf ("\tforced spill of R%d\n", sreg));
-                                                       free_up_reg (cfg, bb, tmp, ins, val, 0);
-                                               }
+                               if (is_global_ireg (sreg)) {
+                                       MonoInst *copy = create_copy_ins (cfg, bb, tmp, dest_sreg, sreg, NULL, ip, 0);
+                                       insert_before_ins (bb, ins, copy);
+                                       need_assign = FALSE;
+                               }
+                               else {
+                                       val = rs->vassign [sreg];
+                                       if (val == dest_sreg) {
+                                               /* sreg2 is already assigned to the correct register */
+                                               need_spill = FALSE;
+                                       } else if (val < -1) {
+                                               /* sreg2 is spilled, it can be assigned to dest_sreg2 */
+                                       } else if (val >= 0) {
+                                               /* sreg2 already assigned to another register */
+                                               /*
+                                                * We couldn't emit a copy from val to dest_sreg2, because
+                                                * val might be spilled later while processing this 
+                                                * instruction. So we spill sreg2 so it can be allocated to
+                                                * dest_sreg2.
+                                                */
+                                               free_up_hreg (cfg, bb, tmp, ins, val, 0);
                                        }
+                               }
 
-                                       if (need_spill) {
-                                               DEBUG (printf ("\tforced spill of R%d\n", rs->isymbolic [dest_sreg]));
-                                               free_up_reg (cfg, bb, tmp, ins, dest_sreg, 0);
-                                       }
+                               if (need_spill) {
+                                       free_up_hreg (cfg, bb, tmp, ins, dest_sreg, 0);
+                               }
 
-                                       if (need_assign) {
-                                               if (rs->vassign [sreg] < -1) {
-                                                       MonoInst *store;
-                                                       int spill;
+                               if (need_assign) {
+                                       if (rs->vassign [sreg] < -1) {
+                                               int spill;
 
-                                                       /* Need to emit a spill store */
-                                                       spill = - rs->vassign [sreg] - 1;
-                                                       store = create_spilled_store (cfg, bb, spill, dest_sreg, sreg, tmp, NULL, bank);
-                                                       insert_before_ins (bb, ins, store);
-                                               }
-                                               /* force-set sreg2 */
-                                               assign_reg (cfg, rs, sregs [j], dest_sreg, 0);
+                                               /* Need to emit a spill store */
+                                               spill = - rs->vassign [sreg] - 1;
+                                               create_spilled_store (cfg, bb, spill, dest_sreg, sreg, tmp, NULL, ins, bank);
                                        }
+                                       /* force-set sreg2 */
+                                       assign_reg (cfg, rs, sregs [j], dest_sreg, 0);
                                }
-                               sregs [j] = dest_sreg;
                        }
+                       sregs [j] = dest_sreg;
                }
                mono_inst_set_src_registers (ins, sregs);
 
@@ -1353,8 +1590,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                        val = rs->vassign [ins->dreg];
                        if (is_soft_reg (ins->dreg, bank) && (val >= 0) && (!(regmask (val) & dreg_mask))) {
                                /* DREG is already allocated to a register needed for sreg1 */
-                               get_register_force_spilling (cfg, bb, tmp, ins, ins->dreg, 0);
-                               mono_regstate_free_int (rs, val);
+                           spill_vreg (cfg, bb, tmp, ins, ins->dreg, 0);
                        }
                }
 
@@ -1369,13 +1605,13 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        if (dest_dreg != -1) {
                                if (rs->vassign [ins->dreg] != dest_dreg)
-                                       free_up_reg (cfg, bb, tmp, ins, dest_dreg, 0);
+                                       free_up_hreg (cfg, bb, tmp, ins, dest_dreg, 0);
 
                                dreg2 = ins->dreg + 1;
                                dest_dreg2 = MONO_ARCH_INST_REGPAIR_REG2 (spec_dest, dest_dreg);
                                if (dest_dreg2 != -1) {
                                        if (rs->vassign [dreg2] != dest_dreg2)
-                                               free_up_reg (cfg, bb, tmp, ins, dest_dreg2, 0);
+                                               free_up_hreg (cfg, bb, tmp, ins, dest_dreg2, 0);
                                }
                        }
                }
@@ -1411,7 +1647,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                val = alloc_reg (cfg, bb, tmp, ins, dreg_mask, ins->dreg, &reginfo [ins->dreg], bank);
                                assign_reg (cfg, rs, ins->dreg, val, bank);
                                if (spill)
-                                       create_spilled_store (cfg, bb, spill, val, prev_dreg, tmp, ins, bank);
+                                       create_spilled_store (cfg, bb, spill, val, prev_dreg, tmp, ins, NULL, bank);
                        }
 
                        DEBUG (printf ("\tassigned dreg %s to dest R%d\n", mono_regname_full (val, bank), ins->dreg));
@@ -1441,7 +1677,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                if (val < 0)
                                        val = get_register_spilling (cfg, bb, tmp, ins, mask, reg2, bank);
                                if (spill)
-                                       create_spilled_store (cfg, bb, spill, val, reg2, tmp, ins, bank);
+                                       create_spilled_store (cfg, bb, spill, val, reg2, tmp, ins, NULL, bank);
                        }
                        else {
                                if (! (mask & (regmask (val)))) {
@@ -1490,12 +1726,16 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                        ins->dreg = dest_dreg;
 
                        if (G_UNLIKELY (bank)) {
-                               if (rs->symbolic [bank] [dest_dreg] >= regbank_size [bank])
-                                       free_up_reg (cfg, bb, tmp, ins, dest_dreg, bank);
+                               /* the register we need to free up may be used in another logical regbank
+                                * so do a translate just in case.
+                                */
+                               int translated_bank = translate_bank (cfg->rs, bank, dest_dreg);
+                               if (rs->symbolic [translated_bank] [dest_dreg] >= regbank_size [translated_bank])
+                                       free_up_hreg (cfg, bb, tmp, ins, dest_dreg, translated_bank);
                        }
                        else {
                                if (rs->isymbolic [dest_dreg] >= MONO_MAX_IREGS)
-                                       free_up_reg (cfg, bb, tmp, ins, dest_dreg, bank);
+                                       free_up_hreg (cfg, bb, tmp, ins, dest_dreg, bank);
                        }
                }
 
@@ -1514,8 +1754,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                 */
                if ((clob_reg != -1) && (!(rs->ifree_mask & (regmask (clob_reg))))) {
                        DEBUG (printf ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
-                       get_register_force_spilling (cfg, bb, tmp, ins, rs->isymbolic [clob_reg], 0);
-                       mono_regstate_free_int (rs, clob_reg);
+                       free_up_hreg (cfg, bb, tmp, ins, clob_reg, 0);
                }
 
                if (spec [MONO_INST_CLOB] == 'c') {
@@ -1543,7 +1782,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        s = regmask (j);
                                        if ((clob_mask & s) && !(rs->ifree_mask & s) && (j != ins->sreg1)) {
                                                if ((j != dreg) && (j != dreg2))
-                                                       get_register_force_spilling (cfg, bb, tmp, ins, rs->isymbolic [j], 0);
+                                                       free_up_hreg (cfg, bb, tmp, ins, j, 0);
                                                else if (rs->isymbolic [j])
                                                        /* The hreg is assigned to the dreg of this instruction */
                                                        rs->vassign [rs->isymbolic [j]] = -1;
@@ -1561,10 +1800,18 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                                dreg = -1;
 
                                        for (j = 0; j < regbank_size [cur_bank]; ++j) {
+
+                                               /* we are looping though the banks in the outer loop
+                                                * so, we don't need to deal with mirrored hregs
+                                                * because we will get them in one of the other bank passes.
+                                                */
+                                               if (is_hreg_mirrored (rs, cur_bank, j))
+                                                       continue;
+
                                                s = regmask (j);
-                                               if ((clob_mask & s) && !(rs->free_mask [cur_bank] & s) && (j != ins->sreg1)) {
+                                               if ((clob_mask & s) && !(rs->free_mask [cur_bank] & s)) {
                                                        if (j != dreg)
-                                                               get_register_force_spilling (cfg, bb, tmp, ins, rs->symbolic [cur_bank] [j], cur_bank);
+                                                               free_up_hreg (cfg, bb, tmp, ins, j, cur_bank);
                                                        else if (rs->symbolic [cur_bank] [j])
                                                                /* The hreg is assigned to the dreg of this instruction */
                                                                rs->vassign [rs->symbolic [cur_bank] [j]] = -1;
@@ -1578,7 +1825,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                /*
                 * TRACK ARGUMENT REGS
                 */
-               if (spec [MONO_INST_CLOB] == 'c') {
+               if (spec [MONO_INST_CLOB] == 'c' && MONO_IS_CALL (ins)) {
                        MonoCallInst *call = (MonoCallInst*)ins;
                        GSList *list;
 
@@ -1694,9 +1941,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                        sreg_masks [0] = regmask (dest_sregs [0]);
 
                        if ((rs->vassign [sregs [0]] != dest_sregs [0]) && !(rs->ifree_mask & (regmask (dest_sregs [0])))) {
-                               DEBUG (printf ("\tforced spill of R%d\n", rs->isymbolic [dest_sregs [0]]));
-                               get_register_force_spilling (cfg, bb, tmp, ins, rs->isymbolic [dest_sregs [0]], 0);
-                               mono_regstate_free_int (rs, dest_sregs [0]);
+                               free_up_hreg (cfg, bb, tmp, ins, dest_sregs [0], 0);
                        }
                        if (is_global_ireg (sregs [0])) {
                                /* The argument is already in a hard reg, need to copy */
@@ -1733,12 +1978,11 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                DEBUG (printf ("\tassigned sreg1 %s to R%d\n", mono_regname_full (val, bank), sregs [0]));
 
                                if (spill) {
-                                       MonoInst *store = create_spilled_store (cfg, bb, spill, val, prev_sregs [0], tmp, NULL, bank);
                                        /*
                                         * Need to insert before the instruction since it can
                                         * overwrite sreg1.
                                         */
-                                       insert_before_ins (bb, ins, store);
+                                       create_spilled_store (cfg, bb, spill, val, prev_sregs [0], tmp, NULL, ins, bank);
                                }
                        }
                        else if ((dest_sregs [0] != -1) && (dest_sregs [0] != val)) {
@@ -1867,9 +2111,29 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                        bank = sreg_bank (j, spec);
                        if (MONO_ARCH_INST_IS_REGPAIR (spec [MONO_INST_SRC1 + j]))
                                g_assert_not_reached ();
-                       if (is_soft_reg (sregs [j], bank)) {
+
+                       if (dest_sregs [j] != -1 && is_global_ireg (sregs [j])) {
+                               /*
+                                * Argument already in a global hard reg, copy it to the fixed reg, without
+                                * allocating it to the fixed reg.
+                                */
+                               MonoInst *copy = create_copy_ins (cfg, bb, tmp, dest_sregs [j], sregs [j], NULL, ip, 0);
+                               insert_before_ins (bb, ins, copy);
+                               sregs [j] = dest_sregs [j];
+                       } else if (is_soft_reg (sregs [j], bank)) {
                                val = rs->vassign [sregs [j]];
 
+                               if (dest_sregs [j] != -1 && val >= 0 && dest_sregs [j] != val) {
+                                       /*
+                                        * The sreg is already allocated to a hreg, but not to the fixed
+                                        * reg required by the instruction. Spill the sreg, so it can be
+                                        * allocated to the fixed reg by the code below.
+                                        */
+                                       /* Currently, this code should only be hit for CAS */
+                                       spill_vreg (cfg, bb, tmp, ins, sregs [j], 0);
+                                       val = rs->vassign [sregs [j]];
+                               }
+
                                if (val < 0) {
                                        int spill = 0;
                                        if (val < -1) {
@@ -1880,12 +2144,11 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        assign_reg (cfg, rs, sregs [j], val, bank);
                                        DEBUG (printf ("\tassigned sreg%d %s to R%d\n", j + 1, mono_regname_full (val, bank), sregs [j]));
                                        if (spill) {
-                                               MonoInst *store = create_spilled_store (cfg, bb, spill, val, prev_sregs [j], tmp, NULL, bank);
                                                /*
                                                 * Need to insert before the instruction since it can
                                                 * overwrite sreg2.
                                                 */
-                                               insert_before_ins (bb, ins, store);
+                                               create_spilled_store (cfg, bb, spill, val, sregs [j], tmp, NULL, ins, bank);
                                        }
                                }
                                sregs [j] = val;
@@ -1898,6 +2161,24 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                mono_inst_set_src_registers (ins, sregs);
 
+               /* Sanity check */
+               /* Do this only for CAS for now */
+               for (j = 1; j < num_sregs; ++j) {
+                       int sreg = sregs [j];
+                       int dest_sreg = dest_sregs [j];
+
+                       if (j == 2 && dest_sreg != -1) {
+                               int k;
+
+                               g_assert (sreg == dest_sreg);
+
+                               for (k = 0; k < num_sregs; ++k) {
+                                       if (k != j)
+                                               g_assert (sregs [k] != dest_sreg);
+                               }
+                       }
+               }
+
                /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
                        DEBUG (printf ("freeable %s\n", mono_arch_regname (ins->sreg1)));
                        mono_regstate_free_int (rs, ins->sreg1);
@@ -1956,8 +2237,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        MONO_INST_NEW (cfg, fxch, OP_X86_FXCH);
                                        fxch->inst_imm = sp - 1 - i;
 
-                                       prev->next = fxch;
-                                       fxch->next = ins;
+                                       mono_bblock_insert_after_ins (bb, prev, fxch);
                                        prev = fxch;
 
                                        tmp = fpstack [sp - 1];
@@ -1971,8 +2251,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                MONO_INST_NEW (cfg, fxch, OP_X86_FXCH);
                                fxch->inst_imm = 1;
 
-                               prev->next = fxch;
-                               fxch->next = ins;
+                               mono_bblock_insert_after_ins (bb, prev, fxch);
                                prev = fxch;
 
                                tmp = fpstack [sp - 1];
@@ -1996,8 +2275,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        MONO_INST_NEW (cfg, fxch, OP_X86_FXCH);
                                        fxch->inst_imm = sp - 1 - i;
 
-                                       prev->next = fxch;
-                                       fxch->next = ins;
+                                       mono_bblock_insert_after_ins (bb, prev, fxch);
                                        prev = fxch;
 
                                        tmp = fpstack [sp - 1];
@@ -2024,8 +2302,7 @@ mono_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        MONO_INST_NEW (cfg, fxch, OP_X86_FXCH);
                                        fxch->inst_imm = sp - 1 - i;
 
-                                       prev->next = fxch;
-                                       fxch->next = ins;
+                                       mono_bblock_insert_after_ins (bb, prev, fxch);
                                        prev = fxch;
 
                                        tmp = fpstack [sp - 1];
@@ -2071,7 +2348,6 @@ CompRelation
 mono_opcode_to_cond (int opcode)
 {
        switch (opcode) {
-       case CEE_BEQ:
        case OP_CEQ:
        case OP_IBEQ:
        case OP_ICEQ:
@@ -2079,12 +2355,15 @@ mono_opcode_to_cond (int opcode)
        case OP_LCEQ:
        case OP_FBEQ:
        case OP_FCEQ:
+       case OP_RBEQ:
+       case OP_RCEQ:
        case OP_COND_EXC_EQ:
        case OP_COND_EXC_IEQ:
        case OP_CMOV_IEQ:
        case OP_CMOV_LEQ:
                return CMP_EQ;
-       case CEE_BNE_UN:
+       case OP_FCNEQ:
+       case OP_ICNEQ:
        case OP_IBNE_UN:
        case OP_LBNE_UN:
        case OP_FBNE_UN:
@@ -2093,21 +2372,22 @@ mono_opcode_to_cond (int opcode)
        case OP_CMOV_INE_UN:
        case OP_CMOV_LNE_UN:
                return CMP_NE;
-       case CEE_BLE:
+       case OP_FCLE:
+       case OP_ICLE:
        case OP_IBLE:
        case OP_LBLE:
        case OP_FBLE:
        case OP_CMOV_ILE:
        case OP_CMOV_LLE:
                return CMP_LE;
-       case CEE_BGE:
+       case OP_FCGE:
+       case OP_ICGE:
        case OP_IBGE:
        case OP_LBGE:
        case OP_FBGE:
        case OP_CMOV_IGE:
        case OP_CMOV_LGE:
                return CMP_GE;
-       case CEE_BLT:
        case OP_CLT:
        case OP_IBLT:
        case OP_ICLT:
@@ -2115,12 +2395,13 @@ mono_opcode_to_cond (int opcode)
        case OP_LCLT:
        case OP_FBLT:
        case OP_FCLT:
+       case OP_RBLT:
+       case OP_RCLT:
        case OP_COND_EXC_LT:
        case OP_COND_EXC_ILT:
        case OP_CMOV_ILT:
        case OP_CMOV_LLT:
                return CMP_LT;
-       case CEE_BGT:
        case OP_CGT:
        case OP_IBGT:
        case OP_ICGT:
@@ -2128,13 +2409,15 @@ mono_opcode_to_cond (int opcode)
        case OP_LCGT:
        case OP_FBGT:
        case OP_FCGT:
+       case OP_RBGT:
+       case OP_RCGT:
        case OP_COND_EXC_GT:
        case OP_COND_EXC_IGT:
        case OP_CMOV_IGT:
        case OP_CMOV_LGT:
                return CMP_GT;
 
-       case CEE_BLE_UN:
+       case OP_ICLE_UN:
        case OP_IBLE_UN:
        case OP_LBLE_UN:
        case OP_FBLE_UN:
@@ -2143,14 +2426,14 @@ mono_opcode_to_cond (int opcode)
        case OP_CMOV_ILE_UN:
        case OP_CMOV_LLE_UN:
                return CMP_LE_UN;
-       case CEE_BGE_UN:
+
+       case OP_ICGE_UN:
        case OP_IBGE_UN:
        case OP_LBGE_UN:
        case OP_FBGE_UN:
        case OP_CMOV_IGE_UN:
        case OP_CMOV_LGE_UN:
                return CMP_GE_UN;
-       case CEE_BLT_UN:
        case OP_CLT_UN:
        case OP_IBLT_UN:
        case OP_ICLT_UN:
@@ -2158,12 +2441,13 @@ mono_opcode_to_cond (int opcode)
        case OP_LCLT_UN:
        case OP_FBLT_UN:
        case OP_FCLT_UN:
+       case OP_RBLT_UN:
+       case OP_RCLT_UN:
        case OP_COND_EXC_LT_UN:
        case OP_COND_EXC_ILT_UN:
        case OP_CMOV_ILT_UN:
        case OP_CMOV_LLT_UN:
                return CMP_LT_UN;
-       case CEE_BGT_UN:
        case OP_CGT_UN:
        case OP_IBGT_UN:
        case OP_ICGT_UN:
@@ -2171,6 +2455,8 @@ mono_opcode_to_cond (int opcode)
        case OP_LCGT_UN:
        case OP_FCGT_UN:
        case OP_FBGT_UN:
+       case OP_RCGT_UN:
+       case OP_RBGT_UN:
        case OP_COND_EXC_GT_UN:
        case OP_COND_EXC_IGT_UN:
        case OP_CMOV_IGT_UN:
@@ -2215,9 +2501,7 @@ mono_negate_cond (CompRelation cond)
 CompType
 mono_opcode_to_type (int opcode, int cmp_opcode)
 {
-       if ((opcode >= CEE_BEQ) && (opcode <= CEE_BLT_UN))
-               return CMP_TYPE_L;
-       else if ((opcode >= OP_CEQ) && (opcode <= OP_CLT_UN))
+       if ((opcode >= OP_CEQ) && (opcode <= OP_CLT_UN))
                return CMP_TYPE_L;
        else if ((opcode >= OP_IBEQ) && (opcode <= OP_IBLT_UN))
                return CMP_TYPE_I;
@@ -2237,7 +2521,6 @@ mono_opcode_to_type (int opcode, int cmp_opcode)
                switch (cmp_opcode) {
                case OP_ICOMPARE:
                case OP_ICOMPARE_IMM:
-               case OP_LCOMPARE_IMM:
                        return CMP_TYPE_I;
                default:
                        return CMP_TYPE_L;
@@ -2248,6 +2531,8 @@ mono_opcode_to_type (int opcode, int cmp_opcode)
        }
 }
 
+#endif /* DISABLE_JIT */
+
 gboolean
 mono_is_regsize_var (MonoType *t)
 {
@@ -2284,10 +2569,13 @@ mono_is_regsize_var (MonoType *t)
                return FALSE;
        case MONO_TYPE_VALUETYPE:
                return FALSE;
+       default:
+               return FALSE;
        }
-       return FALSE;
 }
 
+#ifndef DISABLE_JIT
+
 /*
  * mono_peephole_ins:
  *
@@ -2296,7 +2584,8 @@ mono_is_regsize_var (MonoType *t)
 void
 mono_peephole_ins (MonoBasicBlock *bb, MonoInst *ins)
 {
-       MonoInst *last_ins = ins->prev;
+       int filter = FILTER_IL_SEQ_POINT;
+       MonoInst *last_ins = mono_inst_prev (ins, filter);
 
        switch (ins->opcode) {
        case OP_MUL_IMM: 
@@ -2319,6 +2608,8 @@ mono_peephole_ins (MonoBasicBlock *bb, MonoInst *ins)
                 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
                 * OP_MOVE reg1, reg2
                 */
+               if (last_ins && last_ins->opcode == OP_GC_LIVENESS_DEF)
+                       last_ins = mono_inst_prev (ins, filter);
                if (last_ins &&
                        (((ins->opcode == OP_LOADI4_MEMBASE) && (last_ins->opcode == OP_STOREI4_MEMBASE_REG)) ||
                         ((ins->opcode == OP_LOAD_MEMBASE) && (last_ins->opcode == OP_STORE_MEMBASE_REG))) &&
@@ -2433,7 +2724,7 @@ mono_peephole_ins (MonoBasicBlock *bb, MonoInst *ins)
                 * OP_MOVE sreg, dreg 
                 * OP_MOVE dreg, sreg
                 */
-               if (last_ins && last_ins->opcode == OP_MOVE &&
+               if (last_ins && last_ins->opcode == ins->opcode &&
                        ins->sreg1 == last_ins->dreg &&
                        ins->dreg == last_ins->sreg1) {
                        MONO_DELETE_INS (bb, ins);
@@ -2445,3 +2736,27 @@ mono_peephole_ins (MonoBasicBlock *bb, MonoInst *ins)
        }
 }
 
+int
+mini_exception_id_by_name (const char *name)
+{
+       if (strcmp (name, "IndexOutOfRangeException") == 0)
+               return MONO_EXC_INDEX_OUT_OF_RANGE;
+       if (strcmp (name, "OverflowException") == 0)
+               return MONO_EXC_OVERFLOW;
+       if (strcmp (name, "ArithmeticException") == 0)
+               return MONO_EXC_ARITHMETIC;
+       if (strcmp (name, "DivideByZeroException") == 0)
+               return MONO_EXC_DIVIDE_BY_ZERO;
+       if (strcmp (name, "InvalidCastException") == 0)
+               return MONO_EXC_INVALID_CAST;
+       if (strcmp (name, "NullReferenceException") == 0)
+               return MONO_EXC_NULL_REF;
+       if (strcmp (name, "ArrayTypeMismatchException") == 0)
+               return MONO_EXC_ARRAY_TYPE_MISMATCH;
+       if (strcmp (name, "ArgumentException") == 0)
+               return MONO_EXC_ARGUMENT;
+       g_error ("Unknown intrinsic exception %s\n", name);
+       return -1;
+}
+
+#endif /* DISABLE_JIT */