2008-07-25 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-x86.c
index 822881789e30507df16b70dccc1ffffc82d63965..4932fc5c1b83e9041ccdffc4a0c62fe99b4a6572 100644 (file)
 #include <mono/metadata/debug-helpers.h>
 #include <mono/metadata/threads.h>
 #include <mono/metadata/profiler-private.h>
+#include <mono/metadata/mono-debug.h>
 #include <mono/utils/mono-math.h>
 
 #include "trace.h"
 #include "mini-x86.h"
-#include "debug-mini.h"
 #include "inssel.h"
 #include "cpu-x86.h"
 
@@ -61,16 +61,19 @@ static CRITICAL_SECTION mini_arch_mutex;
 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
 #endif
 
-#define NOT_IMPLEMENTED g_assert_not_reached ()
+MonoBreakpointInfo
+mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
 
 const char*
-mono_arch_regname (int reg) {
+mono_arch_regname (int reg)
+{
        switch (reg) {
        case X86_EAX: return "%eax";
        case X86_EBX: return "%ebx";
        case X86_ECX: return "%ecx";
        case X86_EDX: return "%edx";
-       case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
+       case X86_ESP: return "%esp";    
+       case X86_EBP: return "%ebp";
        case X86_EDI: return "%edi";
        case X86_ESI: return "%esi";
        }
@@ -78,8 +81,28 @@ mono_arch_regname (int reg) {
 }
 
 const char*
-mono_arch_fregname (int reg) {
-       return "unknown";
+mono_arch_fregname (int reg)
+{
+       switch (reg) {
+       case 0:
+               return "%fr0";
+       case 1:
+               return "%fr1";
+       case 2:
+               return "%fr2";
+       case 3:
+               return "%fr3";
+       case 4:
+               return "%fr4";
+       case 5:
+               return "%fr5";
+       case 6:
+               return "%fr6";
+       case 7:
+               return "%fr7";
+       default:
+               return "unknown";
+       }
 }
 
 typedef enum {
@@ -240,14 +263,13 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn
  * For x86 win32, see ???.
  */
 static CallInfo*
-get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
+get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
 {
        guint32 i, gr, fr;
        MonoType *ret_type;
        int n = sig->hasthis + sig->param_count;
        guint32 stack_size = 0;
        CallInfo *cinfo;
-       MonoGenericSharingContext *gsctx = cfg ? cfg->generic_sharing_context : NULL;
 
        if (mp)
                cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
@@ -471,9 +493,9 @@ mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJit
 
        for (k = 0; k < param_count; k++) {
                
-               if (csig->pinvoke)
+               if (csig->pinvoke) {
                        size = mono_type_native_stack_size (csig->params [k], &align);
-               else {
+               else {
                        int ialign;
                        size = mini_type_stack_size (NULL, csig->params [k], &ialign);
                        align = ialign;
@@ -781,7 +803,7 @@ mono_arch_allocate_vars (MonoCompile *cfg)
        header = mono_method_get_header (cfg->method);
        sig = mono_method_signature (cfg->method);
 
-       cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
+       cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
 
        cfg->frame_reg = MONO_ARCH_BASEREG;
        offset = 0;
@@ -840,15 +862,30 @@ mono_arch_allocate_vars (MonoCompile *cfg)
 
        switch (cinfo->ret.storage) {
        case ArgOnStack:
-               cfg->ret->opcode = OP_REGOFFSET;
-               cfg->ret->inst_basereg = X86_EBP;
-               cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
+               if (MONO_TYPE_ISSTRUCT (sig->ret)) {
+                       /* 
+                        * In the new IR, the cfg->vret_addr variable represents the
+                        * vtype return value.
+                        */
+                       cfg->vret_addr->opcode = OP_REGOFFSET;
+                       cfg->vret_addr->inst_basereg = cfg->frame_reg;
+                       cfg->vret_addr->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
+                       if (G_UNLIKELY (cfg->verbose_level > 1)) {
+                               printf ("vret_addr =");
+                               mono_print_ins (cfg->vret_addr);
+                       }
+               } else {
+                       cfg->ret->opcode = OP_REGOFFSET;
+                       cfg->ret->inst_basereg = X86_EBP;
+                       cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
+               }
                break;
        case ArgValuetypeInReg:
                break;
        case ArgInIReg:
                cfg->ret->opcode = OP_REGVAR;
                cfg->ret->inst_c0 = cinfo->ret.reg;
+               cfg->ret->dreg = cinfo->ret.reg;
                break;
        case ArgNone:
        case ArgOnFloatFpStack:
@@ -887,16 +924,15 @@ mono_arch_create_vars (MonoCompile *cfg)
 
        sig = mono_method_signature (cfg->method);
 
-       cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
+       cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
 
        if (cinfo->ret.storage == ArgValuetypeInReg)
                cfg->ret_var_is_local = TRUE;
+       if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig->ret)) {
+               cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
+       }
 }
 
-/* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
- * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
- */
-
 static void
 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
 {
@@ -974,7 +1010,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
        sig = call->signature;
        n = sig->param_count + sig->hasthis;
 
-       cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
+       cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
 
        if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
                sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
@@ -1009,7 +1045,9 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                        call->out_args = arg;
 
                        if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
-                               guint32 size, align;
+                               gint align;
+                               guint32 ialign;
+                               guint32 size;
 
                                if (t->type == MONO_TYPE_TYPEDBYREF) {
                                        size = sizeof (MonoTypedRef);
@@ -1017,7 +1055,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                                }
                                else
                                        if (sig->pinvoke)
-                                               size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
+                                               size = mono_type_native_stack_size (&in->klass->byval_arg, &ialign);
                                        else {
                                                int ialign;
                                                size = mini_type_stack_size (cfg->generic_sharing_context, &in->klass->byval_arg, &ialign);
@@ -1095,7 +1133,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                        if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
                                cinfo->stack_usage -= 4;
        }
-       
+
        call->stack_usage = cinfo->stack_usage;
 
 #if defined(__APPLE__)
@@ -1110,6 +1148,242 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
        return call;
 }
 
+static void
+emit_sig_cookie2 (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
+{
+       MonoMethodSignature *tmp_sig;
+
+       /* FIXME: Add support for signature tokens to AOT */
+       cfg->disable_aot = TRUE;
+
+       /*
+        * mono_ArgIterator_Setup assumes the signature cookie is 
+        * passed first and all the arguments which were before it are
+        * passed on the stack after the signature. So compensate by 
+        * passing a different signature.
+        */
+       tmp_sig = mono_metadata_signature_dup (call->signature);
+       tmp_sig->param_count -= call->signature->sentinelpos;
+       tmp_sig->sentinelpos = 0;
+       memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
+
+       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_X86_PUSH_IMM, -1, -1, tmp_sig);
+}
+
+void
+mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
+{
+       MonoInst *arg, *in;
+       MonoMethodSignature *sig;
+       int i, n;
+       CallInfo *cinfo;
+       int sentinelpos = 0;
+
+       sig = call->signature;
+       n = sig->param_count + sig->hasthis;
+
+       cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
+
+       if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
+               sentinelpos = sig->sentinelpos + (sig->hasthis ? 1 : 0);
+
+       if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
+               MonoInst *vtarg;
+
+               if (cinfo->ret.storage == ArgValuetypeInReg) {
+                       if (cinfo->ret.pair_storage [0] == ArgInIReg && cinfo->ret.pair_storage [1] == ArgNone) {
+                               /*
+                                * Tell the JIT to use a more efficient calling convention: call using
+                                * OP_CALL, compute the result location after the call, and save the 
+                                * result there.
+                                */
+                               call->vret_in_reg = TRUE;
+                       } else {
+                               /*
+                                * The valuetype is in EAX:EDX after the call, needs to be copied to
+                                * the stack. Save the address here, so the call instruction can
+                                * access it.
+                                */
+                               MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
+                               vtarg->sreg1 = call->vret_var->dreg;
+                               MONO_ADD_INS (cfg->cbb, vtarg);
+                       }
+               }
+       }
+
+#if defined(__APPLE__)
+       if (cinfo->need_stack_align) {
+               MONO_INST_NEW (cfg, arg, OP_SUB_IMM);
+               arg->dreg = X86_ESP;
+               arg->sreg1 = X86_ESP;
+               arg->inst_imm = cinfo->stack_align_amount;
+               MONO_ADD_INS (cfg->cbb, arg);
+       }
+#endif 
+
+       /* Handle the case where there are no implicit arguments */
+       if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
+               emit_sig_cookie2 (cfg, call, cinfo);
+       }
+
+       /* Arguments are pushed in the reverse order */
+       for (i = n - 1; i >= 0; i --) {
+               ArgInfo *ainfo = cinfo->args + i;
+               MonoType *t;
+
+               if (i >= sig->hasthis)
+                       t = sig->params [i - sig->hasthis];
+               else
+                       t = &mono_defaults.int_class->byval_arg;
+               t = mono_type_get_underlying_type (t);
+
+               MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
+
+               in = call->args [i];
+               arg->cil_code = in->cil_code;
+               arg->sreg1 = in->dreg;
+               arg->type = in->type;
+
+               g_assert (in->dreg != -1);
+
+               if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
+                       gint align;
+                       guint32 ialign;
+                       guint32 size;
+
+                       g_assert (in->klass);
+
+                       if (t->type == MONO_TYPE_TYPEDBYREF) {
+                               size = sizeof (MonoTypedRef);
+                               align = sizeof (gpointer);
+                       }
+                       else
+                               if (sig->pinvoke) {
+                                       size = mono_type_native_stack_size (&in->klass->byval_arg, &ialign);
+                                       align = ialign;
+                               } else {
+                                       size = mini_type_stack_size (cfg->generic_sharing_context, &in->klass->byval_arg, &align);
+                               }
+
+                       if (size > 0) {
+                               arg->opcode = OP_OUTARG_VT;
+                               arg->sreg1 = in->dreg;
+                               arg->klass = in->klass;
+                               arg->backend.size = size;
+
+                               MONO_ADD_INS (cfg->cbb, arg);
+                       }
+               }
+               else {
+                       switch (ainfo->storage) {
+                       case ArgOnStack:
+                               arg->opcode = OP_X86_PUSH;
+                               if (!t->byref) {
+                                       if (t->type == MONO_TYPE_R4) {
+                                               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 4);
+                                               arg->opcode = OP_STORER4_MEMBASE_REG;
+                                               arg->inst_destbasereg = X86_ESP;
+                                               arg->inst_offset = 0;
+                                       } else if (t->type == MONO_TYPE_R8) {
+                                               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
+                                               arg->opcode = OP_STORER8_MEMBASE_REG;
+                                               arg->inst_destbasereg = X86_ESP;
+                                               arg->inst_offset = 0;
+                                       } else if (t->type == MONO_TYPE_I8 || t->type == MONO_TYPE_U8) {
+                                               arg->sreg1 ++;
+                                               MONO_EMIT_NEW_UNALU (cfg, OP_X86_PUSH, -1, in->dreg + 2);
+                                       }
+                               }
+                               break;
+                       default:
+                               g_assert_not_reached ();
+                       }
+                       
+                       MONO_ADD_INS (cfg->cbb, arg);
+               }
+
+               if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
+                       /* Emit the signature cookie just before the implicit arguments */
+                       emit_sig_cookie2 (cfg, call, cinfo);
+               }
+       }
+
+       if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
+               MonoInst *vtarg;
+
+               if (cinfo->ret.storage == ArgValuetypeInReg) {
+                       /* Already done */
+               }
+               else if (cinfo->ret.storage == ArgInIReg) {
+                       NOT_IMPLEMENTED;
+                       /* The return address is passed in a register */
+                       MONO_INST_NEW (cfg, vtarg, OP_MOVE);
+                       vtarg->sreg1 = call->inst.dreg;
+                       vtarg->dreg = mono_regstate_next_int (cfg->rs);
+                       MONO_ADD_INS (cfg->cbb, vtarg);
+                               
+                       mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
+               } else {
+                       MonoInst *vtarg;
+                       MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
+                       vtarg->type = STACK_MP;
+                       vtarg->sreg1 = call->vret_var->dreg;
+                       MONO_ADD_INS (cfg->cbb, vtarg);
+               }
+
+               /* if the function returns a struct, the called method already does a ret $0x4 */
+               cinfo->stack_usage -= 4;
+       }
+
+       call->stack_usage = cinfo->stack_usage;
+}
+
+void
+mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
+{
+       MonoInst *arg;
+       int size = ins->backend.size;
+
+       if (size <= 4) {
+               MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
+               arg->sreg1 = src->dreg;
+
+               MONO_ADD_INS (cfg->cbb, arg);
+       } else if (size <= 20) {        
+               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 4));
+               mini_emit_memcpy2 (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
+       } else {
+               MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
+               arg->inst_basereg = src->dreg;
+               arg->inst_offset = 0;
+               arg->inst_imm = size;
+                                       
+               MONO_ADD_INS (cfg->cbb, arg);
+       }
+}
+
+void
+mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
+{
+       MonoType *ret = mono_type_get_underlying_type (mono_method_signature (method)->ret);
+
+       if (!ret->byref) {
+               if (ret->type == MONO_TYPE_R4) {
+                       /* Nothing to do */
+                       return;
+               } else if (ret->type == MONO_TYPE_R8) {
+                       /* Nothing to do */
+                       return;
+               } else if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
+                       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, val->dreg + 1);
+                       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, val->dreg + 2);
+                       return;
+               }
+       }
+                       
+       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
+}
+
 /*
  * Allow tracing to work with this interface (with an optional argument)
  */
@@ -1317,17 +1591,18 @@ emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer dat
 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
 
 /*
- * peephole_pass_1:
+ * mono_peephole_pass_1:
  *
  *   Perform peephole opts which should/can be performed before local regalloc
  */
-static void
-peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
+void
+mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
-       MonoInst *ins, *last_ins = NULL;
-       ins = bb->code;
+       MonoInst *ins, *n;
+
+       MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
+               MonoInst *last_ins = ins->prev;
 
-       while (ins) {
                switch (ins->opcode) {
                case OP_IADD_IMM:
                case OP_ADD_IMM:
@@ -1380,141 +1655,7 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                                                ins->opcode = OP_X86_TEST_NULL;
                                }
 
-                       break;
-               case OP_LOAD_MEMBASE:
-               case OP_LOADI4_MEMBASE:
-                       /* 
-                        * Note: if reg1 = reg2 the load op is removed
-                        *
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
-                        * OP_LOAD_MEMBASE offset(basereg), reg2
-                        * -->
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
-                        * OP_MOVE reg1, reg2
-                        */
-                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
-                                        || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
-                           ins->inst_basereg == last_ins->inst_destbasereg &&
-                           ins->inst_offset == last_ins->inst_offset) {
-                               if (ins->dreg == last_ins->sreg1) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
-                                       continue;
-                               } else {
-                                       //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
-                                       ins->opcode = OP_MOVE;
-                                       ins->sreg1 = last_ins->sreg1;
-                               }
-
-                       /* 
-                        * Note: reg1 must be different from the basereg in the second load
-                        * Note: if reg1 = reg2 is equal then second load is removed
-                        *
-                        * OP_LOAD_MEMBASE offset(basereg), reg1
-                        * OP_LOAD_MEMBASE offset(basereg), reg2
-                        * -->
-                        * OP_LOAD_MEMBASE offset(basereg), reg1
-                        * OP_MOVE reg1, reg2
-                        */
-                       } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
-                                          || last_ins->opcode == OP_LOAD_MEMBASE) &&
-                             ins->inst_basereg != last_ins->dreg &&
-                             ins->inst_basereg == last_ins->inst_basereg &&
-                             ins->inst_offset == last_ins->inst_offset) {
-
-                               if (ins->dreg == last_ins->dreg) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
-                                       continue;
-                               } else {
-                                       ins->opcode = OP_MOVE;
-                                       ins->sreg1 = last_ins->dreg;
-                               }
-
-                               //g_assert_not_reached ();
-
-#if 0
-                       /* 
-                        * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
-                        * OP_LOAD_MEMBASE offset(basereg), reg
-                        * -->
-                        * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
-                        * OP_ICONST reg, imm
-                        */
-                       } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
-                                               || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
-                                  ins->inst_basereg == last_ins->inst_destbasereg &&
-                                  ins->inst_offset == last_ins->inst_offset) {
-                               //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
-                               ins->opcode = OP_ICONST;
-                               ins->inst_c0 = last_ins->inst_imm;
-                               g_assert_not_reached (); // check this rule
-#endif
-                       }
-                       break;
-               case OP_LOADU1_MEMBASE:
-               case OP_LOADI1_MEMBASE:
-                       /* 
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
-                        * OP_LOAD_MEMBASE offset(basereg), reg2
-                        * -->
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
-                        * CONV_I2/U2 reg1, reg2
-                        */
-                       if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
-                               (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
-                                       ins->inst_basereg == last_ins->inst_destbasereg &&
-                                       ins->inst_offset == last_ins->inst_offset) {
-                               ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
-                               ins->sreg1 = last_ins->sreg1;
-                       }
-                       break;
-               case OP_LOADU2_MEMBASE:
-               case OP_LOADI2_MEMBASE:
-                       /* 
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
-                        * OP_LOAD_MEMBASE offset(basereg), reg2
-                        * -->
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
-                        * CONV_I2/U2 reg1, reg2
-                        */
-                       if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
-                                       ins->inst_basereg == last_ins->inst_destbasereg &&
-                                       ins->inst_offset == last_ins->inst_offset) {
-                               ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
-                               ins->sreg1 = last_ins->sreg1;
-                       }
-                       break;
-               case CEE_CONV_I4:
-               case CEE_CONV_U4:
-               case OP_ICONV_TO_I4:
-               case OP_MOVE:
-                       /*
-                        * Removes:
-                        *
-                        * OP_MOVE reg, reg 
-                        */
-                       if (ins->dreg == ins->sreg1) {
-                               if (last_ins)
-                                       last_ins->next = ins->next;                             
-                               ins = ins->next;
-                               continue;
-                       }
-                       /* 
-                        * Removes:
-                        *
-                        * OP_MOVE sreg, dreg 
-                        * OP_MOVE dreg, sreg
-                        */
-                       if (last_ins && last_ins->opcode == OP_MOVE &&
-                           ins->sreg1 == last_ins->dreg &&
-                           ins->dreg == last_ins->sreg1) {
-                               last_ins->next = ins->next;                             
-                               ins = ins->next;                                
-                               continue;
-                       }
-                       break;
-                       
+                       break;                  
                case OP_X86_PUSH_MEMBASE:
                        if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
                                         last_ins->opcode == OP_STORE_MEMBASE_REG) &&
@@ -1525,20 +1666,17 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                }
-               last_ins = ins;
-               ins = ins->next;
+
+               mono_peephole_ins (bb, ins);
        }
-       bb->last_ins = last_ins;
 }
 
-static void
-peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
+void
+mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
-       MonoInst *ins, *last_ins = NULL;
-       ins = bb->code;
-
-       while (ins) {
+       MonoInst *ins, *n;
 
+       MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
                switch (ins->opcode) {
                case OP_ICONST:
                        /* reg = 0 -> XOR (reg, reg) */
@@ -1581,175 +1719,51 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
                                ins->opcode = OP_X86_DEC_REG;
                        break;
-               case OP_X86_COMPARE_MEMBASE_IMM:
-                       /* 
-                        * OP_STORE_MEMBASE_REG reg, offset(basereg)
-                        * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
-                        * -->
-                        * OP_STORE_MEMBASE_REG reg, offset(basereg)
-                        * OP_COMPARE_IMM reg, imm
-                        *
-                        * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
-                        */
-                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
-                           ins->inst_basereg == last_ins->inst_destbasereg &&
-                           ins->inst_offset == last_ins->inst_offset) {
-                                       ins->opcode = OP_COMPARE_IMM;
-                                       ins->sreg1 = last_ins->sreg1;
-
-                                       /* check if we can remove cmp reg,0 with test null */
-                                       if (!ins->inst_imm)
-                                               ins->opcode = OP_X86_TEST_NULL;
-                               }
+               }
 
-                       break;
-               case OP_LOAD_MEMBASE:
-               case OP_LOADI4_MEMBASE:
-                       /* 
-                        * Note: if reg1 = reg2 the load op is removed
-                        *
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
-                        * OP_LOAD_MEMBASE offset(basereg), reg2
-                        * -->
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
-                        * OP_MOVE reg1, reg2
-                        */
-                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
-                                        || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
-                           ins->inst_basereg == last_ins->inst_destbasereg &&
-                           ins->inst_offset == last_ins->inst_offset) {
-                               if (ins->dreg == last_ins->sreg1) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
-                                       continue;
-                               } else {
-                                       //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
-                                       ins->opcode = OP_MOVE;
-                                       ins->sreg1 = last_ins->sreg1;
-                               }
+               mono_peephole_ins (bb, ins);
+       }
+}
 
-                       /* 
-                        * Note: reg1 must be different from the basereg in the second load
-                        * Note: if reg1 = reg2 is equal then second load is removed
-                        *
-                        * OP_LOAD_MEMBASE offset(basereg), reg1
-                        * OP_LOAD_MEMBASE offset(basereg), reg2
-                        * -->
-                        * OP_LOAD_MEMBASE offset(basereg), reg1
-                        * OP_MOVE reg1, reg2
-                        */
-                       } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
-                                          || last_ins->opcode == OP_LOAD_MEMBASE) &&
-                             ins->inst_basereg != last_ins->dreg &&
-                             ins->inst_basereg == last_ins->inst_basereg &&
-                             ins->inst_offset == last_ins->inst_offset) {
-
-                               if (ins->dreg == last_ins->dreg) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
-                                       continue;
-                               } else {
-                                       ins->opcode = OP_MOVE;
-                                       ins->sreg1 = last_ins->dreg;
-                               }
+/*
+ * mono_arch_lowering_pass:
+ *
+ *  Converts complex opcodes into simpler ones so that each IR instruction
+ * corresponds to one machine instruction.
+ */
+void
+mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
+{
+       MonoInst *ins, *next;
 
-                               //g_assert_not_reached ();
+       if (bb->max_vreg > cfg->rs->next_vreg)
+               cfg->rs->next_vreg = bb->max_vreg;
 
-#if 0
-                       /* 
-                        * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
-                        * OP_LOAD_MEMBASE offset(basereg), reg
-                        * -->
-                        * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
-                        * OP_ICONST reg, imm
-                        */
-                       } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
-                                               || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
-                                  ins->inst_basereg == last_ins->inst_destbasereg &&
-                                  ins->inst_offset == last_ins->inst_offset) {
-                               //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
-                               ins->opcode = OP_ICONST;
-                               ins->inst_c0 = last_ins->inst_imm;
-                               g_assert_not_reached (); // check this rule
-#endif
-                       }
-                       break;
-               case OP_LOADU1_MEMBASE:
-               case OP_LOADI1_MEMBASE:
-                       /* 
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
-                        * OP_LOAD_MEMBASE offset(basereg), reg2
-                        * -->
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
-                        * CONV_I2/U2 reg1, reg2
-                        */
-                       if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
-                               (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
-                                       ins->inst_basereg == last_ins->inst_destbasereg &&
-                                       ins->inst_offset == last_ins->inst_offset) {
-                               ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
-                               ins->sreg1 = last_ins->sreg1;
-                       }
-                       break;
-               case OP_LOADU2_MEMBASE:
-               case OP_LOADI2_MEMBASE:
-                       /* 
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
-                        * OP_LOAD_MEMBASE offset(basereg), reg2
-                        * -->
-                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
-                        * CONV_I2/U2 reg1, reg2
-                        */
-                       if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
-                                       ins->inst_basereg == last_ins->inst_destbasereg &&
-                                       ins->inst_offset == last_ins->inst_offset) {
-                               ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
-                               ins->sreg1 = last_ins->sreg1;
-                       }
-                       break;
-               case CEE_CONV_I4:
-               case CEE_CONV_U4:
-               case OP_ICONV_TO_I4:
-               case OP_MOVE:
-                       /*
-                        * Removes:
-                        *
-                        * OP_MOVE reg, reg 
-                        */
-                       if (ins->dreg == ins->sreg1) {
-                               if (last_ins)
-                                       last_ins->next = ins->next;                             
-                               ins = ins->next;
-                               continue;
-                       }
+       /*
+        * FIXME: Need to add more instructions, but the current machine 
+        * description can't model some parts of the composite instructions like
+        * cdq.
+        */
+       MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) {
+               switch (ins->opcode) {
+               case OP_IREM_IMM:
+               case OP_IDIV_IMM:
+               case OP_IDIV_UN_IMM:
+               case OP_IREM_UN_IMM:
                        /* 
-                        * Removes:
-                        *
-                        * OP_MOVE sreg, dreg 
-                        * OP_MOVE dreg, sreg
+                        * Keep the cases where we could generated optimized code, otherwise convert
+                        * to the non-imm variant.
                         */
-                       if (last_ins && last_ins->opcode == OP_MOVE &&
-                           ins->sreg1 == last_ins->dreg &&
-                           ins->dreg == last_ins->sreg1) {
-                               last_ins->next = ins->next;                             
-                               ins = ins->next;                                
-                               continue;
-                       }
+                       if ((ins->opcode == OP_IREM_IMM) && mono_is_power_of_two (ins->inst_imm) >= 0)
+                               break;
+                       mono_decompose_op_imm (cfg, bb, ins);
                        break;
-               case OP_X86_PUSH_MEMBASE:
-                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
-                                        last_ins->opcode == OP_STORE_MEMBASE_REG) &&
-                           ins->inst_basereg == last_ins->inst_destbasereg &&
-                           ins->inst_offset == last_ins->inst_offset) {
-                                   ins->opcode = OP_X86_PUSH;
-                                   ins->sreg1 = last_ins->sreg1;
-                       }
+               default:
                        break;
                }
-               last_ins = ins;
-               ins = ins->next;
        }
-       bb->last_ins = last_ins;
+
+       bb->max_vreg = cfg->rs->next_vreg;
 }
 
 static const int 
@@ -1772,15 +1786,6 @@ cc_signed_table [] = {
        FALSE, FALSE, FALSE, FALSE
 };
 
-void
-mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
-{
-       if (cfg->opt & MONO_OPT_PEEPHOLE)
-               peephole_pass_1 (cfg, bb);
-
-       mono_local_regalloc (cfg, bb);
-}
-
 static unsigned char*
 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
 {
@@ -1943,7 +1948,7 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
 
        /* Move return value to the target register */
        switch (ins->opcode) {
-       case CEE_CALL:
+       case OP_CALL:
        case OP_CALL_REG:
        case OP_CALL_MEMBASE:
                if (ins->dreg != X86_EAX)
@@ -1952,7 +1957,10 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
        case OP_VCALL:
        case OP_VCALL_REG:
        case OP_VCALL_MEMBASE:
-               cinfo = get_call_info (cfg, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
+       case OP_VCALL2:
+       case OP_VCALL2_REG:
+       case OP_VCALL2_MEMBASE:
+               cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
                if (cinfo->ret.storage == ArgValuetypeInReg) {
                        /* Pop the destination address from the stack */
                        x86_pop_reg (code, X86_ECX);
@@ -2034,7 +2042,7 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
 
        sig = mono_method_signature (method);
 
-       cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
+       cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
        
        /* This is the opposite of the code in emit_prolog */
 
@@ -2088,13 +2096,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        MonoCallInst *call;
        guint offset;
        guint8 *code = cfg->native_code + cfg->code_len;
-       MonoInst *last_ins = NULL;
-       guint last_offset = 0;
        int max_len, cpos;
 
-       if (cfg->opt & MONO_OPT_PEEPHOLE)
-               peephole_pass (cfg, bb);
-
        if (cfg->opt & MONO_OPT_LOOP) {
                int pad, align = LOOP_ALIGNMENT;
                /* set alignment depending on cpu */
@@ -2126,20 +2129,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
        mono_debug_open_block (cfg, bb, offset);
 
-       ins = bb->code;
-       while (ins) {
+       MONO_BB_FOR_EACH_INS (bb, ins) {
                offset = code - cfg->native_code;
 
                max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
 
-               if (offset > (cfg->code_size - max_len - 16)) {
+               if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
                        cfg->code_size *= 2;
                        cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
                        code = cfg->native_code + offset;
                        mono_jit_stats.code_reallocs++;
                }
 
-               mono_debug_record_line_number (cfg, ins, offset);
+               if (cfg->debug_info)
+                       mono_debug_record_line_number (cfg, ins, offset);
 
                switch (ins->opcode) {
                case OP_BIGMUL:
@@ -2173,14 +2176,25 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_STOREI4_MEMBASE_REG:
                        x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
                        break;
-               case CEE_LDIND_I:
-               case CEE_LDIND_I4:
-               case CEE_LDIND_U4:
-                       x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
+               case OP_STORE_MEM_IMM:
+                       x86_mov_mem_imm (code, ins->inst_p0, ins->inst_c0, 4);
                        break;
                case OP_LOADU4_MEM:
-                       x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
-                       x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
+                       if (cfg->new_ir)
+                               x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
+                       else
+                               x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
+                       break;
+               case OP_LOAD_MEM:
+               case OP_LOADI4_MEM:
+                       /* These are created by the cprop pass so they use inst_imm as the source */
+                       x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
+                       break;
+               case OP_LOADU1_MEM:
+                       x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, FALSE);
+                       break;
+               case OP_LOADU2_MEM:
+                       x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, TRUE);
                        break;
                case OP_LOAD_MEMBASE:
                case OP_LOADI4_MEMBASE:
@@ -2199,22 +2213,26 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_LOADI2_MEMBASE:
                        x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
                        break;
-               case CEE_CONV_I1:
+               case OP_ICONV_TO_I1:
+               case OP_SEXT_I1:
                        x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
                        break;
-               case CEE_CONV_I2:
+               case OP_ICONV_TO_I2:
+               case OP_SEXT_I2:
                        x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
                        break;
-               case CEE_CONV_U1:
+               case OP_ICONV_TO_U1:
                        x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
                        break;
-               case CEE_CONV_U2:
+               case OP_ICONV_TO_U2:
                        x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
                        break;
                case OP_COMPARE:
+               case OP_ICOMPARE:
                        x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
                        break;
                case OP_COMPARE_IMM:
+               case OP_ICOMPARE_IMM:
                        x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
                        break;
                case OP_X86_COMPARE_MEMBASE_REG:
@@ -2238,13 +2256,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_X86_ADD_MEMBASE_IMM:
                        x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
                        break;
-               case OP_X86_ADD_MEMBASE:
+               case OP_X86_ADD_REG_MEMBASE:
                        x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
                        break;
                case OP_X86_SUB_MEMBASE_IMM:
                        x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
                        break;
-               case OP_X86_SUB_MEMBASE:
+               case OP_X86_SUB_REG_MEMBASE:
                        x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
                        break;
                case OP_X86_AND_MEMBASE_IMM:
@@ -2256,6 +2274,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_X86_XOR_MEMBASE_IMM:
                        x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
                        break;
+               case OP_X86_ADD_MEMBASE_REG:
+                       x86_alu_membase_reg (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2);
+                       break;
+               case OP_X86_SUB_MEMBASE_REG:
+                       x86_alu_membase_reg (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2);
+                       break;
+               case OP_X86_AND_MEMBASE_REG:
+                       x86_alu_membase_reg (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2);
+                       break;
+               case OP_X86_OR_MEMBASE_REG:
+                       x86_alu_membase_reg (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
+                       break;
+               case OP_X86_XOR_MEMBASE_REG:
+                       x86_alu_membase_reg (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
+                       break;
                case OP_X86_INC_MEMBASE:
                        x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
                        break;
@@ -2268,104 +2301,171 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_X86_DEC_REG:
                        x86_dec_reg (code, ins->dreg);
                        break;
-               case OP_X86_MUL_MEMBASE:
+               case OP_X86_MUL_REG_MEMBASE:
                        x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
                        break;
+               case OP_X86_AND_REG_MEMBASE:
+                       x86_alu_reg_membase (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset);
+                       break;
+               case OP_X86_OR_REG_MEMBASE:
+                       x86_alu_reg_membase (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset);
+                       break;
+               case OP_X86_XOR_REG_MEMBASE:
+                       x86_alu_reg_membase (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset);
+                       break;
                case OP_BREAK:
                        x86_breakpoint (code);
                        break;
+               case OP_NOP:
+               case OP_DUMMY_USE:
+               case OP_DUMMY_STORE:
+               case OP_NOT_REACHED:
+               case OP_NOT_NULL:
+                       break;
                case OP_ADDCC:
-               case CEE_ADD:
+               case OP_IADDCC:
+               case OP_IADD:
                        x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
                        break;
                case OP_ADC:
+               case OP_IADC:
                        x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
                        break;
                case OP_ADDCC_IMM:
                case OP_ADD_IMM:
+               case OP_IADD_IMM:
                        x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
                        break;
                case OP_ADC_IMM:
+               case OP_IADC_IMM:
                        x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
                        break;
                case OP_SUBCC:
-               case CEE_SUB:
+               case OP_ISUBCC:
+               case OP_ISUB:
                        x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
                        break;
                case OP_SBB:
+               case OP_ISBB:
                        x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
                        break;
                case OP_SUBCC_IMM:
                case OP_SUB_IMM:
+               case OP_ISUB_IMM:
                        x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
                        break;
                case OP_SBB_IMM:
+               case OP_ISBB_IMM:
                        x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
                        break;
-               case CEE_AND:
+               case OP_IAND:
                        x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
                        break;
                case OP_AND_IMM:
+               case OP_IAND_IMM:
                        x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
                        break;
-               case CEE_DIV:
-                       x86_cdq (code);
-                       x86_div_reg (code, ins->sreg2, TRUE);
+               case OP_IDIV:
+               case OP_IREM:
+                       /* 
+                        * The code is the same for div/rem, the allocator will allocate dreg
+                        * to RAX/RDX as appropriate.
+                        */
+                       if (ins->sreg2 == X86_EDX) {
+                               /* cdq clobbers this */
+                               x86_push_reg (code, ins->sreg2);
+                               x86_cdq (code);
+                               x86_div_membase (code, X86_ESP, 0, TRUE);
+                               x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);                            
+                       } else {
+                               x86_cdq (code);
+                               x86_div_reg (code, ins->sreg2, TRUE);
+                       }
                        break;
-               case CEE_DIV_UN:
-                       x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
-                       x86_div_reg (code, ins->sreg2, FALSE);
+               case OP_IDIV_UN:
+               case OP_IREM_UN:
+                       if (ins->sreg2 == X86_EDX) {
+                               x86_push_reg (code, ins->sreg2);
+                               x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
+                               x86_div_membase (code, X86_ESP, 0, FALSE);
+                               x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);                            
+                       } else {
+                               x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
+                               x86_div_reg (code, ins->sreg2, FALSE);
+                       }
                        break;
                case OP_DIV_IMM:
                        x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
                        x86_cdq (code);
                        x86_div_reg (code, ins->sreg2, TRUE);
                        break;
-               case CEE_REM:
-                       x86_cdq (code);
-                       x86_div_reg (code, ins->sreg2, TRUE);
-                       break;
-               case CEE_REM_UN:
-                       x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
-                       x86_div_reg (code, ins->sreg2, FALSE);
-                       break;
-               case OP_REM_IMM:
-                       x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
-                       x86_cdq (code);
-                       x86_div_reg (code, ins->sreg2, TRUE);
+               case OP_IREM_IMM: {
+                       int power = mono_is_power_of_two (ins->inst_imm);
+
+                       g_assert (ins->sreg1 == X86_EAX);
+                       g_assert (ins->dreg == X86_EAX);
+                       g_assert (power >= 0);
+
+                       if (power == 1) {
+                               /* Based on http://compilers.iecc.com/comparch/article/93-04-079 */
+                               x86_cdq (code);
+                               x86_alu_reg_imm (code, X86_AND, X86_EAX, 1);
+                               /* 
+                                * If the divident is >= 0, this does not nothing. If it is positive, it
+                                * it transforms %eax=0 into %eax=0, and %eax=1 into %eax=-1.
+                                */
+                               x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EDX);
+                               x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
+                       } else {
+                               /* Based on gcc code */
+
+                               /* Add compensation for negative dividents */
+                               x86_cdq (code);
+                               x86_shift_reg_imm (code, X86_SHR, X86_EDX, 32 - power);
+                               x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_EDX);
+                               /* Compute remainder */
+                               x86_alu_reg_imm (code, X86_AND, X86_EAX, (1 << power) - 1);
+                               /* Remove compensation */
+                               x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
+                       }
                        break;
-               case CEE_OR:
+               }
+               case OP_IOR:
                        x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
                        break;
                case OP_OR_IMM:
+               case OP_IOR_IMM:
                        x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
                        break;
-               case CEE_XOR:
                case OP_IXOR:
                        x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
                        break;
                case OP_XOR_IMM:
+               case OP_IXOR_IMM:
                        x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
                        break;
-               case CEE_SHL:
+               case OP_ISHL:
                        g_assert (ins->sreg2 == X86_ECX);
                        x86_shift_reg (code, X86_SHL, ins->dreg);
                        break;
-               case CEE_SHR:
+               case OP_ISHR:
                        g_assert (ins->sreg2 == X86_ECX);
                        x86_shift_reg (code, X86_SAR, ins->dreg);
                        break;
                case OP_SHR_IMM:
+               case OP_ISHR_IMM:
                        x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
                        break;
                case OP_SHR_UN_IMM:
+               case OP_ISHR_UN_IMM:
                        x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
                        break;
-               case CEE_SHR_UN:
+               case OP_ISHR_UN:
                        g_assert (ins->sreg2 == X86_ECX);
                        x86_shift_reg (code, X86_SHR, ins->dreg);
                        break;
                case OP_SHL_IMM:
+               case OP_ISHL_IMM:
                        x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
                        break;
                case OP_LSHL: {
@@ -2449,22 +2549,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
                        }
                        break;
-               case CEE_NOT:
+               case OP_INOT:
                        x86_not_reg (code, ins->sreg1);
                        break;
-               case CEE_NEG:
+               case OP_INEG:
                        x86_neg_reg (code, ins->sreg1);
                        break;
-               case OP_SEXT_I1:
-                       x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
-                       break;
-               case OP_SEXT_I2:
-                       x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
-                       break;
-               case CEE_MUL:
+
+               case OP_IMUL:
                        x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
                        break;
                case OP_MUL_IMM:
+               case OP_IMUL_IMM:
                        switch (ins->inst_imm) {
                        case 2:
                                /* MOV r1, r2 */
@@ -2522,11 +2618,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                break;
                        }
                        break;
-               case CEE_MUL_OVF:
+               case OP_IMUL_OVF:
                        x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
                        EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
                        break;
-               case CEE_MUL_OVF_UN: {
+               case OP_IMUL_OVF_UN: {
                        /* the mul operation and the exception check should most likely be split */
                        int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
                        /*g_assert (ins->sreg2 == X86_EAX);
@@ -2572,6 +2668,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
                        x86_mov_reg_imm (code, ins->dreg, 0);
                        break;
+               case OP_JUMP_TABLE:
+                       mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
+                       x86_mov_reg_imm (code, ins->dreg, 0);
+                       break;
                case OP_LOAD_GOTADDR:
                        x86_call_imm (code, 0);
                        /* 
@@ -2590,12 +2690,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
                        x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
                        break;
-               case CEE_CONV_I4:
                case OP_MOVE:
                        x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
                        break;
-               case CEE_CONV_U4:
-                       g_assert_not_reached ();
                case OP_JMP: {
                        /*
                         * Note: this 'frame destruction' logic is useful for tail calls, too.
@@ -2654,8 +2751,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FCALL:
                case OP_LCALL:
                case OP_VCALL:
+               case OP_VCALL2:
                case OP_VOIDCALL:
-               case CEE_CALL:
+               case OP_CALL:
                        call = (MonoCallInst*)ins;
                        if (ins->flags & MONO_INST_HAS_METHOD)
                                code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
@@ -2688,6 +2786,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FCALL_REG:
                case OP_LCALL_REG:
                case OP_VCALL_REG:
+               case OP_VCALL2_REG:
                case OP_VOIDCALL_REG:
                case OP_CALL_REG:
                        call = (MonoCallInst*)ins;
@@ -2703,6 +2802,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FCALL_MEMBASE:
                case OP_LCALL_MEMBASE:
                case OP_VCALL_MEMBASE:
+               case OP_VCALL2_MEMBASE:
                case OP_VOIDCALL_MEMBASE:
                case OP_CALL_MEMBASE:
                        call = (MonoCallInst*)ins;
@@ -2759,9 +2859,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        code = mono_emit_stack_alloc (code, ins);
                        x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
                        break;
-               case CEE_RET:
-                       x86_ret (code);
+               case OP_LOCALLOC_IMM: {
+                       guint32 size = ins->inst_imm;
+                       size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
+
+                       if (ins->flags & MONO_INST_INIT) {
+                               /* FIXME: Optimize this */
+                               x86_mov_reg_imm (code, ins->dreg, size);
+                               ins->sreg1 = ins->dreg;
+
+                               code = mono_emit_stack_alloc (code, ins);
+                               x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
+                       } else {
+                               x86_alu_reg_imm (code, X86_SUB, X86_ESP, size);
+                               x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
+                       }
                        break;
+               }
                case OP_THROW: {
                        x86_push_reg (code, ins->sreg1);
                        code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
@@ -2774,10 +2888,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                                          (gpointer)"mono_arch_rethrow_exception");
                        break;
                }
-               case OP_CALL_HANDLER: 
-                       /* Align stack */
-#ifdef __APPLE__
-                       x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
+               case OP_CALL_HANDLER:
+#if __APPLE__
+       x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
 #endif
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
                        x86_call_imm (code, 0);
@@ -2785,13 +2898,29 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
 #endif
                        break;
+               case OP_START_HANDLER: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       x86_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, X86_ESP, 4);
+                       break;
+               }
+               case OP_ENDFINALLY: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
+                       x86_ret (code);
+                       break;
+               }
+               case OP_ENDFILTER: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
+                       /* The local allocator will put the result into EAX */
+                       x86_ret (code);
+                       break;
+               }
+
                case OP_LABEL:
                        ins->inst_c0 = code - cfg->native_code;
                        break;
                case OP_BR:
-                       //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
-                       //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
-                       //break;
                        if (ins->flags & MONO_INST_BRLABEL) {
                                if (ins->inst_i0->inst_c0) {
                                        x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
@@ -2825,6 +2954,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_CGT:
                case OP_CGT_UN:
                case OP_CNE:
+               case OP_ICEQ:
+               case OP_ICLT:
+               case OP_ICLT_UN:
+               case OP_ICGT:
+               case OP_ICGT_UN:
                        x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
                        x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
                        break;
@@ -2838,6 +2972,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_COND_EXC_GE_UN:
                case OP_COND_EXC_LE:
                case OP_COND_EXC_LE_UN:
+               case OP_COND_EXC_IEQ:
+               case OP_COND_EXC_INE_UN:
+               case OP_COND_EXC_ILT:
+               case OP_COND_EXC_ILT_UN:
+               case OP_COND_EXC_IGT:
+               case OP_COND_EXC_IGT_UN:
+               case OP_COND_EXC_IGE:
+               case OP_COND_EXC_IGE_UN:
+               case OP_COND_EXC_ILE:
+               case OP_COND_EXC_ILE_UN:
                        EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
                        break;
                case OP_COND_EXC_OV:
@@ -2846,19 +2990,39 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_COND_EXC_NC:
                        EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
                        break;
-               case CEE_BEQ:
-               case CEE_BNE_UN:
-               case CEE_BLT:
-               case CEE_BLT_UN:
-               case CEE_BGT:
-               case CEE_BGT_UN:
-               case CEE_BGE:
-               case CEE_BGE_UN:
-               case CEE_BLE:
-               case CEE_BLE_UN:
+               case OP_COND_EXC_IOV:
+               case OP_COND_EXC_INO:
+               case OP_COND_EXC_IC:
+               case OP_COND_EXC_INC:
+                       EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
+                       break;
+               case OP_IBEQ:
+               case OP_IBNE_UN:
+               case OP_IBLT:
+               case OP_IBLT_UN:
+               case OP_IBGT:
+               case OP_IBGT_UN:
+               case OP_IBGE:
+               case OP_IBGE_UN:
+               case OP_IBLE:
+               case OP_IBLE_UN:
                        EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
                        break;
 
+               case OP_CMOV_IEQ:
+               case OP_CMOV_IGE:
+               case OP_CMOV_IGT:
+               case OP_CMOV_ILE:
+               case OP_CMOV_ILT:
+               case OP_CMOV_INE_UN:
+               case OP_CMOV_IGE_UN:
+               case OP_CMOV_IGT_UN:
+               case OP_CMOV_ILE_UN:
+               case OP_CMOV_ILT_UN:
+                       g_assert (ins->dreg == ins->sreg1);
+                       x86_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
+                       break;
+
                /* floating point opcodes */
                case OP_R8CONST: {
                        double d = *(double *)ins->inst_p0;
@@ -2919,18 +3083,27 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_LOADR4_MEMBASE:
                        x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
                        break;
-               case CEE_CONV_R4: /* FIXME: change precision */
-               case CEE_CONV_R8:
+               case OP_ICONV_TO_R4: /* FIXME: change precision */
+               case OP_ICONV_TO_R8:
                        x86_push_reg (code, ins->sreg1);
                        x86_fild_membase (code, X86_ESP, 0, FALSE);
                        x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
                        break;
+               case OP_ICONV_TO_R_UN:
+                       x86_push_imm (code, 0);
+                       x86_push_reg (code, ins->sreg1);
+                       x86_fild_membase (code, X86_ESP, 0, TRUE);
+                       x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
+                       break;
                case OP_X86_FP_LOAD_I8:
                        x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
                        break;
                case OP_X86_FP_LOAD_I4:
                        x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
                        break;
+               case OP_FCONV_TO_R4:
+                       /* FIXME: nothing to do ?? */
+                       break;
                case OP_FCONV_TO_I1:
                        code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
                        break;
@@ -2961,7 +3134,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_fldcw_membase (code, X86_ESP, 0);
                        x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
                        break;
-               case OP_LCONV_TO_R_UN: { 
+               case OP_LCONV_TO_R8_2:
+                       x86_push_reg (code, ins->sreg2);
+                       x86_push_reg (code, ins->sreg1);
+                       x86_fild_membase (code, X86_ESP, 0, TRUE);
+                       x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
+                       break;
+               case OP_LCONV_TO_R4_2:
+                       x86_push_reg (code, ins->sreg2);
+                       x86_push_reg (code, ins->sreg1);
+                       x86_fild_membase (code, X86_ESP, 0, TRUE);
+                       /* Change precision */
+                       x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
+                       x86_fld_membase (code, X86_ESP, 0, FALSE);
+                       x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
+                       break;
+               case OP_LCONV_TO_R_UN:
+               case OP_LCONV_TO_R_UN_2: { 
                        static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
                        guint8 *br;
 
@@ -2990,7 +3179,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        break;
                }
-               case OP_LCONV_TO_OVF_I: {
+               case OP_LCONV_TO_OVF_I:
+               case OP_LCONV_TO_OVF_I4_2: {
                        guint8 *br [3], *label [1];
                        MonoInst *tins;
 
@@ -3033,6 +3223,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
                        break;
                }
+               case OP_FMOVE:
+                       /* Not needed on the fp stack */
+                       break;
                case OP_FADD:
                        x86_fp_op_reg (code, X86_FADD, 1, TRUE);
                        break;
@@ -3104,10 +3297,37 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;          
                case OP_SQRT:
                        x86_fsqrt (code);
-                       break;          
+                       break;
+               case OP_IMIN:
+                       g_assert (cfg->opt & MONO_OPT_CMOV);
+                       g_assert (ins->dreg == ins->sreg1);
+                       x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
+                       x86_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
+                       break;
+               case OP_IMIN_UN:
+                       g_assert (cfg->opt & MONO_OPT_CMOV);
+                       g_assert (ins->dreg == ins->sreg1);
+                       x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
+                       x86_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
+                       break;
+               case OP_IMAX:
+                       g_assert (cfg->opt & MONO_OPT_CMOV);
+                       g_assert (ins->dreg == ins->sreg1);
+                       x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
+                       x86_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
+                       break;
+               case OP_IMAX_UN:
+                       g_assert (cfg->opt & MONO_OPT_CMOV);
+                       g_assert (ins->dreg == ins->sreg1);
+                       x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
+                       x86_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
+                       break;
                case OP_X86_FPOP:
                        x86_fstp (code, 0);
-                       break;          
+                       break;
+               case OP_X86_FXCH:
+                       x86_fxch (code, ins->inst_imm);
+                       break;
                case OP_FREM: {
                        guint8 *l1, *l2;
 
@@ -3392,13 +3612,22 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
                        break;
                case OP_CKFINITE: {
+                       guchar *br1;
                        x86_push_reg (code, X86_EAX);
                        x86_fxam (code);
                        x86_fnstsw (code);
                        x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
                        x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
                        x86_pop_reg (code, X86_EAX);
+
+                       /* Have to clean up the fp stack before throwing the exception */
+                       br1 = code;
+                       x86_branch8 (code, X86_CC_NE, 0, FALSE);
+
+                       x86_fstp (code, 0);                     
                        EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
+
+                       x86_patch (br1, code);
                        break;
                }
                case OP_TLS_GET: {
@@ -3473,7 +3702,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        break;
                }
-               case OP_ATOMIC_EXCHANGE_I4: {
+               case OP_ATOMIC_EXCHANGE_I4:
+               case OP_ATOMIC_CAS_IMM_I4: {
                        guchar *br[2];
                        int sreg2 = ins->sreg2;
                        int breg = ins->inst_basereg;
@@ -3482,8 +3712,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         * hack to overcome limits in x86 reg allocator 
                         * (req: dreg == eax and sreg2 != eax and breg != eax) 
                         */
-                       if (ins->dreg != X86_EAX)
-                               x86_push_reg (code, X86_EAX);
+                       g_assert (ins->dreg == X86_EAX);
                        
                        /* We need the EAX reg for the cmpxchg */
                        if (ins->sreg2 == X86_EAX) {
@@ -3498,43 +3727,40 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                breg = X86_ESI;
                        }
 
-                       x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
+                       if (ins->opcode == OP_ATOMIC_CAS_IMM_I4) {
+                               x86_mov_reg_imm (code, X86_EAX, ins->backend.data);
+
+                               x86_prefix (code, X86_LOCK_PREFIX);
+                               x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
+                       } else {
+                               x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
 
-                       br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
-                       x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
-                       br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
-                       x86_patch (br [1], br [0]);
+                               br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
+                               x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
+                               br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
+                               x86_patch (br [1], br [0]);
+                       }
 
                        if (breg != ins->inst_basereg)
                                x86_pop_reg (code, X86_ESI);
 
-                       if (ins->dreg != X86_EAX) {
-                               x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
-                               x86_pop_reg (code, X86_EAX);
-                       }
-
                        if (ins->sreg2 != sreg2)
                                x86_pop_reg (code, X86_EDX);
 
                        break;
                }
                default:
-                       g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
+                       g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
                        g_assert_not_reached ();
                }
 
-               if ((code - cfg->native_code - offset) > max_len) {
+               if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
                        g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
                                   mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
                        g_assert_not_reached ();
                }
               
                cpos += max_len;
-
-               last_ins = ins;
-               last_offset = offset;
-               
-               ins = ins->next;
        }
 
        cfg->code_len = code - cfg->native_code;
@@ -3608,7 +3834,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        int alloc_size, pos, max_offset, i;
        guint8 *code;
 
-       cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 1024);
+       cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 10240);
 
        if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
                cfg->code_size += 512;
@@ -3622,14 +3848,19 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        pos = 0;
 
        if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
-               /* Might need to attach the thread to the JIT */
-               if (lmf_tls_offset != -1) {
-                       guint8 *buf;
+               /* Might need to attach the thread to the JIT  or change the domain for the callback */
+               if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
+                       guint8 *buf, *no_domain_branch;
 
+                       code = emit_tls_get (code, X86_EAX, appdomain_tls_offset);
+                       x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
+                       no_domain_branch = code;
+                       x86_branch8 (code, X86_CC_NE, 0, 0);
                        code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
                        x86_test_reg_reg (code, X86_EAX, X86_EAX);
                        buf = code;
                        x86_branch8 (code, X86_CC_NE, 0, 0);
+                       x86_patch (no_domain_branch, code);
                        x86_push_imm (code, cfg->domain);
                        code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
                        x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
@@ -3639,7 +3870,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        /* FIXME: Add a separate key for LMF to avoid this */
                        x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
 #endif
-               } else {
+               }
+               else {
                        g_assert (!cfg->compile_aot);
                        x86_push_imm (code, cfg->domain);
                        code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
@@ -3767,7 +3999,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        max_offset = 0;
        if (cfg->opt & MONO_OPT_BRANCH) {
                for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
-                       MonoInst *ins = bb->code;
+                       MonoInst *ins;
                        bb->max_offset = max_offset;
 
                        if (cfg->prof_options & MONO_PROFILE_COVERAGE)
@@ -3776,12 +4008,11 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
                                max_offset += LOOP_ALIGNMENT;
 
-                       while (ins) {
+                       MONO_BB_FOR_EACH_INS (bb, ins) {
                                if (ins->opcode == OP_LABEL)
                                        ins->inst_c1 = max_offset;
                                
                                max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
-                               ins = ins->next;
                        }
                }
        }
@@ -3789,6 +4020,13 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
                code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
 
+       /* store runtime generic context */
+       if (cfg->rgctx_var) {
+               g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && cfg->rgctx_var->inst_basereg == X86_EBP);
+
+               x86_mov_membase_reg (code, X86_EBP, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 4);
+       }
+
        /* load arguments allocated to register from the stack */
        sig = mono_method_signature (method);
        pos = 0;
@@ -3805,6 +4043,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 
        cfg->code_len = code - cfg->native_code;
 
+       g_assert (cfg->code_len < cfg->code_size);
+
        return code;
 }
 
@@ -3853,7 +4093,7 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                        x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
                } else {
                        /* Find a spare register */
-                       switch (sig->ret->type) {
+                       switch (mono_type_get_underlying_type (sig->ret)->type) {
                        case MONO_TYPE_I8:
                        case MONO_TYPE_U8:
                                prev_lmf_reg = X86_EDI;
@@ -3913,7 +4153,7 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        }
 
        /* Load returned vtypes into registers if needed */
-       cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
+       cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
        if (cinfo->ret.storage == ArgValuetypeInReg) {
                for (quad = 0; quad < 2; quad ++) {
                        switch (cinfo->ret.pair_storage [quad]) {
@@ -4072,6 +4312,12 @@ mono_arch_flush_register_windows (void)
 {
 }
 
+gboolean 
+mono_arch_is_inst_imm (gint64 imm)
+{
+       return TRUE;
+}
+
 /*
  * Support for fast access to the thread-local lmf structure using the GS
  * segment register on NPTL + kernel 2.6.x.
@@ -4123,7 +4369,7 @@ void
 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
 {
        MonoCallInst *call = (MonoCallInst*)inst;
-       CallInfo *cinfo = get_call_info (cfg, cfg->mempool, inst->signature, FALSE);
+       CallInfo *cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, inst->signature, FALSE);
 
        /* add the this argument */
        if (this_reg != -1) {
@@ -4288,10 +4534,10 @@ mono_arch_find_imt_method (gpointer *regs, guint8 *code)
 }
 
 MonoObject*
-mono_arch_find_this_argument (gpointer *regs, MonoMethod *method)
+mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
 {
        MonoMethodSignature *sig = mono_method_signature (method);
-       CallInfo *cinfo = get_call_info (NULL, NULL, sig, FALSE);
+       CallInfo *cinfo = get_call_info (gsctx, NULL, sig, FALSE);
        int this_argument_offset;
        MonoObject *this_argument;
 
@@ -4310,6 +4556,12 @@ mono_arch_find_this_argument (gpointer *regs, MonoMethod *method)
 }
 #endif
 
+MonoVTable*
+mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code)
+{
+       return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
+}
+
 MonoInst*
 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
 {
@@ -4335,6 +4587,29 @@ mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethod
                        MONO_INST_NEW (cfg, ins, OP_ABS);
                        ins->inst_i0 = args [0];
                }
+
+               if (cfg->opt & MONO_OPT_CMOV) {
+                       int opcode = 0;
+
+                       if (strcmp (cmethod->name, "Min") == 0) {
+                               if (fsig->params [0]->type == MONO_TYPE_I4)
+                                       opcode = OP_IMIN;
+                               else if (fsig->params [0]->type == MONO_TYPE_U4)
+                                       opcode = OP_IMIN_UN;
+                       } else if (strcmp (cmethod->name, "Max") == 0) {
+                               if (fsig->params [0]->type == MONO_TYPE_I4)
+                                       opcode = OP_IMAX;
+                               else if (fsig->params [0]->type == MONO_TYPE_U4)
+                                       opcode = OP_IMAX_UN;
+                       }               
+
+                       if (opcode) {
+                               MONO_INST_NEW (cfg, ins, opcode);
+                               ins->inst_i0 = args [0];
+                               ins->inst_i1 = args [1];
+                       }
+               }
+
 #if 0
                /* OP_FREM is not IEEE compatible */
                else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
@@ -4343,48 +4618,74 @@ mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethod
                        ins->inst_i1 = args [1];
                }
 #endif
-       } else if (cmethod->klass == mono_defaults.thread_class &&
-                          strcmp (cmethod->name, "MemoryBarrier") == 0) {
-               MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
-       } else if(cmethod->klass->image == mono_defaults.corlib &&
-                          (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
-                          (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
-
-               if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
-                       MonoInst *ins_iconst;
-
-                       MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
-                       MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
-                       ins_iconst->inst_c0 = 1;
+       }
 
-                       ins->inst_i0 = args [0];
-                       ins->inst_i1 = ins_iconst;
-               } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
-                       MonoInst *ins_iconst;
+       return ins;
+}
 
-                       MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
-                       MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
-                       ins_iconst->inst_c0 = -1;
+MonoInst*
+mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
+{
+       MonoInst *ins = NULL;
+       int opcode = 0;
 
-                       ins->inst_i0 = args [0];
-                       ins->inst_i1 = ins_iconst;
-               } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
-                       MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
+       if (cmethod->klass == mono_defaults.math_class) {
+               if (strcmp (cmethod->name, "Sin") == 0) {
+                       opcode = OP_SIN;
+               } else if (strcmp (cmethod->name, "Cos") == 0) {
+                       opcode = OP_COS;
+               } else if (strcmp (cmethod->name, "Tan") == 0) {
+                       opcode = OP_TAN;
+               } else if (strcmp (cmethod->name, "Atan") == 0) {
+                       opcode = OP_ATAN;
+               } else if (strcmp (cmethod->name, "Sqrt") == 0) {
+                       opcode = OP_SQRT;
+               } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
+                       opcode = OP_ABS;
+               }
+               
+               if (opcode) {
+                       MONO_INST_NEW (cfg, ins, opcode);
+                       ins->type = STACK_R8;
+                       ins->dreg = mono_alloc_freg (cfg);
+                       ins->sreg1 = args [0]->dreg;
+                       MONO_ADD_INS (cfg->cbb, ins);
+               }
 
-                       ins->inst_i0 = args [0];
-                       ins->inst_i1 = args [1];
-               } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
-                       MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
+               if (cfg->opt & MONO_OPT_CMOV) {
+                       int opcode = 0;
+
+                       if (strcmp (cmethod->name, "Min") == 0) {
+                               if (fsig->params [0]->type == MONO_TYPE_I4)
+                                       opcode = OP_IMIN;
+                       } else if (strcmp (cmethod->name, "Max") == 0) {
+                               if (fsig->params [0]->type == MONO_TYPE_I4)
+                                       opcode = OP_IMAX;
+                       }               
+
+                       if (opcode) {
+                               MONO_INST_NEW (cfg, ins, opcode);
+                               ins->type = STACK_I4;
+                               ins->dreg = mono_alloc_ireg (cfg);
+                               ins->sreg1 = args [0]->dreg;
+                               ins->sreg2 = args [1]->dreg;
+                               MONO_ADD_INS (cfg->cbb, ins);
+                       }
+               }
 
+#if 0
+               /* OP_FREM is not IEEE compatible */
+               else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
+                       MONO_INST_NEW (cfg, ins, OP_FREM);
                        ins->inst_i0 = args [0];
                        ins->inst_i1 = args [1];
                }
+#endif
        }
 
        return ins;
 }
 
-
 gboolean
 mono_arch_print_tree (MonoInst *tree, int arity)
 {
@@ -4394,7 +4695,9 @@ mono_arch_print_tree (MonoInst *tree, int arity)
 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
 {
        MonoInst* ins;
-       
+
+       return NULL;
+
        if (appdomain_tls_offset == -1)
                return NULL;
 
@@ -4440,71 +4743,66 @@ mono_arch_get_patch_offset (guint8 *code)
        else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
                /* pop <REG>; add <OFFSET>, <REG> */
                return 3;
+       else if ((code [0] >= 0xb8) && (code [0] < 0xb8 + 8))
+               /* mov <REG>, imm */
+               return 1;
        else {
                g_assert_not_reached ();
                return -1;
        }
 }
 
-struct _MonoDebuggerBreakpointInfo {
-       const guint8 *address;
-       guint8 saved_byte;
-};
-
-static MonoDebuggerBreakpointInfo breakpoint_info_area [MONO_DEBUGGER_BREAKPOINT_TABLE_SIZE];
-static volatile const MonoDebuggerBreakpointInfo *breakpoint_table [MONO_DEBUGGER_BREAKPOINT_TABLE_SIZE];
-
-volatile const MonoDebuggerBreakpointInfo *_mono_debugger_breakpoint_info_area = breakpoint_info_area;
-volatile const MonoDebuggerBreakpointInfo **mono_debugger_breakpoint_table = breakpoint_table;
-
-/*
- * Removes breakpoints from target memory.
- *
- * @orig_address:
- * The original memory address.
+/**
+ * mono_breakpoint_clean_code:
  *
- * @code:
- * A copy of @size bytes from that memory area, which we can modify.
+ * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
+ * breakpoints in the original code, they are removed in the copy.
  *
- * Returns:
- * TRUE if there were any breakpoints in that area, FALSE if not.
+ * Returns TRUE if no sw breakpoint was present.
  */
 gboolean
-mono_debugger_remove_breakpoints_from_code (const guint8 *orig_address, guint8 *code, int size)
+mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
 {
-       gboolean found_breakpoint = FALSE;
        int i;
-
-       for (i = 0; i < MONO_DEBUGGER_BREAKPOINT_TABLE_SIZE; i++) {
-               volatile const MonoDebuggerBreakpointInfo *info = mono_debugger_breakpoint_table [i];
-               int offset;
-
-               if (!info)
-                       continue;
-
-               if ((info->address < orig_address) || (info->address > orig_address + size))
+       gboolean can_write = TRUE;
+       /*
+        * If method_start is non-NULL we need to perform bound checks, since we access memory
+        * at code - offset we could go before the start of the method and end up in a different
+        * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
+        * instead.
+        */
+       if (!method_start || code - offset >= method_start) {
+               memcpy (buf, code - offset, size);
+       } else {
+               int diff = code - method_start;
+               memset (buf, 0, size);
+               memcpy (buf + offset - diff, method_start, diff + size - offset);
+       }
+       code -= offset;
+       for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
+               int idx = mono_breakpoint_info_index [i];
+               guint8 *ptr;
+               if (idx < 1)
                        continue;
-
-               if (!found_breakpoint) {
-                       memcpy (code, orig_address, size);
-                       found_breakpoint = TRUE;
+               ptr = mono_breakpoint_info [idx].address;
+               if (ptr >= code && ptr < code + size) {
+                       guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
+                       can_write = FALSE;
+                       /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
+                       buf [ptr - code] = saved_byte;
                }
-
-               offset = info->address - orig_address;
-               code [offset] = info->saved_byte;
        }
-
-       return found_breakpoint;
+       return can_write;
 }
 
 gpointer
 mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
 {
-       guint8 buf [16];
+       guint8 buf [8];
        guint8 reg = 0;
        gint32 disp = 0;
 
-       mono_debugger_remove_breakpoints_from_code (code - 8, buf, sizeof (buf));
+       mono_breakpoint_clean_code (NULL, code, 8, buf, sizeof (buf));
        code = buf + 8;
 
        *displacement = 0;
@@ -4578,13 +4876,16 @@ mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
 }
 
 gpointer
-mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
+mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig,
+               gssize *regs, guint8 *code)
 {
        guint32 esp = regs [X86_ESP];
        CallInfo *cinfo;
        gpointer res;
 
-       cinfo = get_call_info (NULL, NULL, sig, FALSE);
+       if (!gsctx && code)
+               gsctx = mono_get_generic_context_from_code (code);
+       cinfo = get_call_info (gsctx, NULL, sig, FALSE);
 
        /*
         * The stack looks like:
@@ -4621,11 +4922,8 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
 
        if (has_target) {
                static guint8* cached = NULL;
-               mono_mini_arch_lock ();
-               if (cached) {
-                       mono_mini_arch_unlock ();
+               if (cached)
                        return cached;
-               }
                
                start = code = mono_global_codeman_reserve (64);
 
@@ -4637,9 +4935,11 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
 
                g_assert ((code - start) < 64);
 
-               cached = start;
+               mono_debug_add_delegate_trampoline (start, code - start);
+
+               mono_memory_barrier ();
 
-               mono_mini_arch_unlock ();
+               cached = start;
        } else {
                static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
                int i = 0;
@@ -4650,12 +4950,9 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
                        if (!mono_is_regsize_var (sig->params [i]))
                                return NULL;
 
-               mono_mini_arch_lock ();
                code = cache [sig->param_count];
-               if (code) {
-                       mono_mini_arch_unlock ();
+               if (code)
                        return code;
-               }
 
                /*
                 * The stack contains:
@@ -4688,10 +4985,24 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
 
                g_assert ((code - start) < code_reserve);
 
-               cache [sig->param_count] = start;
+               mono_debug_add_delegate_trampoline (start, code - start);
+
+               mono_memory_barrier ();
 
-               mono_mini_arch_unlock ();
+               cache [sig->param_count] = start;
        }
 
        return start;
 }
+
+gpointer
+mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
+{
+       switch (reg) {
+       case X86_ECX: return (gpointer)ctx->ecx;
+       case X86_EDX: return (gpointer)ctx->edx;
+       case X86_EBP: return (gpointer)ctx->ebp;
+       case X86_ESP: return (gpointer)ctx->esp;
+       default: return ((gpointer)(&ctx->eax)[reg]);
+       }
+}