2008-02-08 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-x86.c
index 23176ccbe9ae6b582537f40c7f0781ab857c485b..af13147e58d9226663db4ec49f564643fef9e02b 100644 (file)
@@ -19,6 +19,7 @@
 #include <mono/metadata/debug-helpers.h>
 #include <mono/metadata/threads.h>
 #include <mono/metadata/profiler-private.h>
+#include <mono/metadata/mono-debug.h>
 #include <mono/utils/mono-math.h>
 
 #include "trace.h"
@@ -60,16 +61,19 @@ static CRITICAL_SECTION mini_arch_mutex;
 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
 #endif
 
-#define NOT_IMPLEMENTED g_assert_not_reached ()
+MonoBreakpointInfo
+mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
 
 const char*
-mono_arch_regname (int reg) {
+mono_arch_regname (int reg)
+{
        switch (reg) {
        case X86_EAX: return "%eax";
        case X86_EBX: return "%ebx";
        case X86_ECX: return "%ecx";
        case X86_EDX: return "%edx";
-       case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
+       case X86_ESP: return "%esp";    
+       case X86_EBP: return "%ebp";
        case X86_EDI: return "%edi";
        case X86_ESI: return "%esi";
        }
@@ -77,8 +81,28 @@ mono_arch_regname (int reg) {
 }
 
 const char*
-mono_arch_fregname (int reg) {
-       return "unknown";
+mono_arch_fregname (int reg)
+{
+       switch (reg) {
+       case 0:
+               return "%fr0";
+       case 1:
+               return "%fr1";
+       case 2:
+               return "%fr2";
+       case 3:
+               return "%fr3";
+       case 4:
+               return "%fr4";
+       case 5:
+               return "%fr5";
+       case 6:
+               return "%fr6";
+       case 7:
+               return "%fr7";
+       default:
+               return "unknown";
+       }
 }
 
 typedef enum {
@@ -923,9 +947,7 @@ emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
 
        arg->inst_left = sig_arg;
        arg->type = STACK_PTR;
-       /* prepend, so they get reversed */
-       arg->next = call->out_args;
-       call->out_args = arg;
+       MONO_INST_LIST_ADD (&arg->node, &call->out_args);
 }
 
 /*
@@ -1003,9 +1025,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                        arg->cil_code = in->cil_code;
                        arg->inst_left = in;
                        arg->type = in->type;
-                       /* prepend, so they get reversed */
-                       arg->next = call->out_args;
-                       call->out_args = arg;
+                       MONO_INST_LIST_ADD (&arg->node, &call->out_args);
 
                        if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
                                guint32 size, align;
@@ -1085,14 +1105,12 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                        zero_inst->inst_p0 = 0;
                        arg->inst_left = zero_inst;
                        arg->type = STACK_PTR;
-                       /* prepend, so they get reversed */
-                       arg->next = call->out_args;
-                       call->out_args = arg;
-               }
-               else
+                       MONO_INST_LIST_ADD (&arg->node, &call->out_args);
+               } else {
                        /* if the function returns a struct, the called method already does a ret $0x4 */
                        if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
                                cinfo->stack_usage -= 4;
+               }
        }
        
        call->stack_usage = cinfo->stack_usage;
@@ -1101,8 +1119,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
        if (cinfo->need_stack_align) {
                MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
                arg->inst_c0 = cinfo->stack_align_amount;
-               arg->next = call->out_args;
-               call->out_args = arg;
+               MONO_INST_LIST_ADD (&arg->node, &call->out_args);
         }
 #endif 
 
@@ -1316,17 +1333,17 @@ emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer dat
 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
 
 /*
- * peephole_pass_1:
+ * mono_peephole_pass_1:
  *
  *   Perform peephole opts which should/can be performed before local regalloc
  */
-static void
-peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
+void
+mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
-       MonoInst *ins, *last_ins = NULL;
-       ins = bb->code;
+       MonoInst *ins, *n;
 
-       while (ins) {
+       MONO_INST_LIST_FOR_EACH_ENTRY_SAFE (ins, n, &bb->ins_list, node) {
+               MonoInst *last_ins = mono_inst_list_prev (&ins->node, &bb->ins_list);
                switch (ins->opcode) {
                case OP_IADD_IMM:
                case OP_ADD_IMM:
@@ -1396,8 +1413,7 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                            ins->inst_basereg == last_ins->inst_destbasereg &&
                            ins->inst_offset == last_ins->inst_offset) {
                                if (ins->dreg == last_ins->sreg1) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       MONO_DEL_INS (ins);
                                        continue;
                                } else {
                                        //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
@@ -1422,8 +1438,7 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                              ins->inst_offset == last_ins->inst_offset) {
 
                                if (ins->dreg == last_ins->dreg) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       MONO_DEL_INS (ins);
                                        continue;
                                } else {
                                        ins->opcode = OP_MOVE;
@@ -1464,7 +1479,7 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                                (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
                                        ins->inst_basereg == last_ins->inst_destbasereg &&
                                        ins->inst_offset == last_ins->inst_offset) {
-                               ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
+                               ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? OP_ICONV_TO_I1 : OP_ICONV_TO_U1;
                                ins->sreg1 = last_ins->sreg1;
                        }
                        break;
@@ -1480,12 +1495,10 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
                                        ins->inst_basereg == last_ins->inst_destbasereg &&
                                        ins->inst_offset == last_ins->inst_offset) {
-                               ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
+                               ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? OP_ICONV_TO_I2 : OP_ICONV_TO_U2;
                                ins->sreg1 = last_ins->sreg1;
                        }
                        break;
-               case CEE_CONV_I4:
-               case CEE_CONV_U4:
                case OP_ICONV_TO_I4:
                case OP_MOVE:
                        /*
@@ -1494,9 +1507,7 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                         * OP_MOVE reg, reg 
                         */
                        if (ins->dreg == ins->sreg1) {
-                               if (last_ins)
-                                       last_ins->next = ins->next;                             
-                               ins = ins->next;
+                               MONO_DEL_INS (ins);
                                continue;
                        }
                        /* 
@@ -1508,8 +1519,7 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (last_ins && last_ins->opcode == OP_MOVE &&
                            ins->sreg1 == last_ins->dreg &&
                            ins->dreg == last_ins->sreg1) {
-                               last_ins->next = ins->next;                             
-                               ins = ins->next;                                
+                               MONO_DEL_INS (ins);
                                continue;
                        }
                        break;
@@ -1524,25 +1534,26 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                }
-               last_ins = ins;
-               ins = ins->next;
        }
-       bb->last_ins = last_ins;
 }
 
-static void
-peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
+void
+mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
-       MonoInst *ins, *last_ins = NULL;
-       ins = bb->code;
+       MonoInst *ins, *n;
 
-       while (ins) {
+       MONO_INST_LIST_FOR_EACH_ENTRY_SAFE (ins, n, &bb->ins_list, node) {
+               MonoInst *last_ins = mono_inst_list_prev (&ins->node, &bb->ins_list);
 
                switch (ins->opcode) {
-               case OP_ICONST:
+               case OP_ICONST: {
+                       MonoInst *next;
+
                        /* reg = 0 -> XOR (reg, reg) */
                        /* XOR sets cflags on x86, so we cant do it always */
-                       if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
+                       next = mono_inst_list_next (&ins->node, &bb->ins_list);
+                       if (ins->inst_c0 == 0 && (!next ||
+                                       (next && INST_IGNORES_CFLAGS (next->opcode)))) {
                                MonoInst *ins2;
 
                                ins->opcode = OP_IXOR;
@@ -1553,23 +1564,22 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                                 * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
                                 * since it takes 3 bytes instead of 7.
                                 */
-                               for (ins2 = ins->next; ins2; ins2 = ins2->next) {
+                               for (ins2 = mono_inst_list_next (&ins->node, &bb->ins_list); ins2;
+                                               ins2 = mono_inst_list_next (&ins2->node, &bb->ins_list)) {
                                        if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
                                                ins2->opcode = OP_STORE_MEMBASE_REG;
                                                ins2->sreg1 = ins->dreg;
-                                       }
-                                       else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
+                                       } else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
                                                ins2->opcode = OP_STOREI4_MEMBASE_REG;
                                                ins2->sreg1 = ins->dreg;
-                                       }
-                                       else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
+                                       } else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
                                                /* Continue iteration */
-                                       }
-                                       else
+                                       } else
                                                break;
                                }
                        }
                        break;
+               }
                case OP_IADD_IMM:
                case OP_ADD_IMM:
                        if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
@@ -1618,8 +1628,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                            ins->inst_basereg == last_ins->inst_destbasereg &&
                            ins->inst_offset == last_ins->inst_offset) {
                                if (ins->dreg == last_ins->sreg1) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       MONO_DEL_INS (ins);
                                        continue;
                                } else {
                                        //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
@@ -1644,8 +1653,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                              ins->inst_offset == last_ins->inst_offset) {
 
                                if (ins->dreg == last_ins->dreg) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       MONO_DEL_INS (ins);
                                        continue;
                                } else {
                                        ins->opcode = OP_MOVE;
@@ -1686,7 +1694,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                                (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
                                        ins->inst_basereg == last_ins->inst_destbasereg &&
                                        ins->inst_offset == last_ins->inst_offset) {
-                               ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
+                               ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? OP_ICONV_TO_I1 : OP_ICONV_TO_U1;
                                ins->sreg1 = last_ins->sreg1;
                        }
                        break;
@@ -1702,12 +1710,10 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
                                        ins->inst_basereg == last_ins->inst_destbasereg &&
                                        ins->inst_offset == last_ins->inst_offset) {
-                               ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
+                               ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? OP_ICONV_TO_I2 : OP_ICONV_TO_U2;
                                ins->sreg1 = last_ins->sreg1;
                        }
                        break;
-               case CEE_CONV_I4:
-               case CEE_CONV_U4:
                case OP_ICONV_TO_I4:
                case OP_MOVE:
                        /*
@@ -1716,9 +1722,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                         * OP_MOVE reg, reg 
                         */
                        if (ins->dreg == ins->sreg1) {
-                               if (last_ins)
-                                       last_ins->next = ins->next;                             
-                               ins = ins->next;
+                               MONO_DEL_INS (ins);
                                continue;
                        }
                        /* 
@@ -1730,8 +1734,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (last_ins && last_ins->opcode == OP_MOVE &&
                            ins->sreg1 == last_ins->dreg &&
                            ins->dreg == last_ins->sreg1) {
-                               last_ins->next = ins->next;                             
-                               ins = ins->next;                                
+                               MONO_DEL_INS (ins);
                                continue;
                        }
                        break;
@@ -1745,10 +1748,12 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                }
-               last_ins = ins;
-               ins = ins->next;
        }
-       bb->last_ins = last_ins;
+}
+
+void
+mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
+{
 }
 
 static const int 
@@ -1771,15 +1776,6 @@ cc_signed_table [] = {
        FALSE, FALSE, FALSE, FALSE
 };
 
-void
-mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
-{
-       if (cfg->opt & MONO_OPT_PEEPHOLE)
-               peephole_pass_1 (cfg, bb);
-
-       mono_local_regalloc (cfg, bb);
-}
-
 static unsigned char*
 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
 {
@@ -1942,7 +1938,7 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
 
        /* Move return value to the target register */
        switch (ins->opcode) {
-       case CEE_CALL:
+       case OP_CALL:
        case OP_CALL_REG:
        case OP_CALL_MEMBASE:
                if (ins->dreg != X86_EAX)
@@ -2087,13 +2083,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        MonoCallInst *call;
        guint offset;
        guint8 *code = cfg->native_code + cfg->code_len;
-       MonoInst *last_ins = NULL;
-       guint last_offset = 0;
        int max_len, cpos;
 
-       if (cfg->opt & MONO_OPT_PEEPHOLE)
-               peephole_pass (cfg, bb);
-
        if (cfg->opt & MONO_OPT_LOOP) {
                int pad, align = LOOP_ALIGNMENT;
                /* set alignment depending on cpu */
@@ -2125,20 +2116,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
        mono_debug_open_block (cfg, bb, offset);
 
-       ins = bb->code;
-       while (ins) {
+       MONO_BB_FOR_EACH_INS (bb, ins) {
                offset = code - cfg->native_code;
 
                max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
 
-               if (offset > (cfg->code_size - max_len - 16)) {
+               if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
                        cfg->code_size *= 2;
                        cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
                        code = cfg->native_code + offset;
                        mono_jit_stats.code_reallocs++;
                }
 
-               mono_debug_record_line_number (cfg, ins, offset);
+               if (cfg->debug_info)
+                       mono_debug_record_line_number (cfg, ins, offset);
 
                switch (ins->opcode) {
                case OP_BIGMUL:
@@ -2172,11 +2163,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_STOREI4_MEMBASE_REG:
                        x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
                        break;
-               case CEE_LDIND_I:
-               case CEE_LDIND_I4:
-               case CEE_LDIND_U4:
-                       x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
-                       break;
                case OP_LOADU4_MEM:
                        x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
                        x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
@@ -2198,16 +2184,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_LOADI2_MEMBASE:
                        x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
                        break;
-               case CEE_CONV_I1:
+               case OP_ICONV_TO_I1:
+               case OP_SEXT_I1:
                        x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
                        break;
-               case CEE_CONV_I2:
+               case OP_ICONV_TO_I2:
+               case OP_SEXT_I2:
                        x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
                        break;
-               case CEE_CONV_U1:
+               case OP_ICONV_TO_U1:
                        x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
                        break;
-               case CEE_CONV_U2:
+               case OP_ICONV_TO_U2:
                        x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
                        break;
                case OP_COMPARE:
@@ -2274,7 +2262,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_breakpoint (code);
                        break;
                case OP_ADDCC:
-               case CEE_ADD:
+               case OP_IADD:
                        x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
                        break;
                case OP_ADC:
@@ -2288,7 +2276,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
                        break;
                case OP_SUBCC:
-               case CEE_SUB:
+               case OP_ISUB:
                        x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
                        break;
                case OP_SBB:
@@ -2301,56 +2289,68 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_SBB_IMM:
                        x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
                        break;
-               case CEE_AND:
+               case OP_IAND:
                        x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
                        break;
                case OP_AND_IMM:
                        x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
                        break;
-               case CEE_DIV:
-                       x86_cdq (code);
-                       x86_div_reg (code, ins->sreg2, TRUE);
+               case OP_IDIV:
+               case OP_IREM:
+                       /* 
+                        * The code is the same for div/rem, the allocator will allocate dreg
+                        * to RAX/RDX as appropriate.
+                        */
+                       if (ins->sreg2 == X86_EDX) {
+                               /* cdq clobbers this */
+                               x86_push_reg (code, ins->sreg2);
+                               x86_cdq (code);
+                               x86_div_membase (code, X86_ESP, 0, TRUE);
+                               x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);                            
+                       } else {
+                               x86_cdq (code);
+                               x86_div_reg (code, ins->sreg2, TRUE);
+                       }
                        break;
-               case CEE_DIV_UN:
-                       x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
-                       x86_div_reg (code, ins->sreg2, FALSE);
+               case OP_IDIV_UN:
+               case OP_IREM_UN:
+                       if (ins->sreg2 == X86_EDX) {
+                               x86_push_reg (code, ins->sreg2);
+                               x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
+                               x86_div_membase (code, X86_ESP, 0, FALSE);
+                               x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);                            
+                       } else {
+                               x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
+                               x86_div_reg (code, ins->sreg2, FALSE);
+                       }
                        break;
                case OP_DIV_IMM:
                        x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
                        x86_cdq (code);
                        x86_div_reg (code, ins->sreg2, TRUE);
                        break;
-               case CEE_REM:
-                       x86_cdq (code);
-                       x86_div_reg (code, ins->sreg2, TRUE);
-                       break;
-               case CEE_REM_UN:
-                       x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
-                       x86_div_reg (code, ins->sreg2, FALSE);
-                       break;
                case OP_REM_IMM:
                        x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
                        x86_cdq (code);
                        x86_div_reg (code, ins->sreg2, TRUE);
                        break;
-               case CEE_OR:
+               case OP_IOR:
                        x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
                        break;
                case OP_OR_IMM:
                        x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
                        break;
-               case CEE_XOR:
                case OP_IXOR:
                        x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
                        break;
                case OP_XOR_IMM:
                        x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
                        break;
-               case CEE_SHL:
+               case OP_ISHL:
                        g_assert (ins->sreg2 == X86_ECX);
                        x86_shift_reg (code, X86_SHL, ins->dreg);
                        break;
-               case CEE_SHR:
+               case OP_ISHR:
                        g_assert (ins->sreg2 == X86_ECX);
                        x86_shift_reg (code, X86_SAR, ins->dreg);
                        break;
@@ -2360,7 +2360,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_SHR_UN_IMM:
                        x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
                        break;
-               case CEE_SHR_UN:
+               case OP_ISHR_UN:
                        g_assert (ins->sreg2 == X86_ECX);
                        x86_shift_reg (code, X86_SHR, ins->dreg);
                        break;
@@ -2448,19 +2448,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
                        }
                        break;
-               case CEE_NOT:
+               case OP_INOT:
                        x86_not_reg (code, ins->sreg1);
                        break;
-               case CEE_NEG:
+               case OP_INEG:
                        x86_neg_reg (code, ins->sreg1);
                        break;
-               case OP_SEXT_I1:
-                       x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
-                       break;
-               case OP_SEXT_I2:
-                       x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
-                       break;
-               case CEE_MUL:
+
+               case OP_IMUL:
                        x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
                        break;
                case OP_MUL_IMM:
@@ -2521,11 +2516,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                break;
                        }
                        break;
-               case CEE_MUL_OVF:
+               case OP_IMUL_OVF:
                        x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
                        EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
                        break;
-               case CEE_MUL_OVF_UN: {
+               case OP_IMUL_OVF_UN: {
                        /* the mul operation and the exception check should most likely be split */
                        int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
                        /*g_assert (ins->sreg2 == X86_EAX);
@@ -2589,12 +2584,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
                        x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
                        break;
-               case CEE_CONV_I4:
                case OP_MOVE:
                        x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
                        break;
-               case CEE_CONV_U4:
-                       g_assert_not_reached ();
                case OP_JMP: {
                        /*
                         * Note: this 'frame destruction' logic is useful for tail calls, too.
@@ -2654,7 +2646,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_LCALL:
                case OP_VCALL:
                case OP_VOIDCALL:
-               case CEE_CALL:
+               case OP_CALL:
                        call = (MonoCallInst*)ins;
                        if (ins->flags & MONO_INST_HAS_METHOD)
                                code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
@@ -2758,9 +2750,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        code = mono_emit_stack_alloc (code, ins);
                        x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
                        break;
-               case CEE_RET:
-                       x86_ret (code);
-                       break;
                case OP_THROW: {
                        x86_push_reg (code, ins->sreg1);
                        code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
@@ -2784,13 +2773,29 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
 #endif
                        break;
+               case OP_START_HANDLER: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       x86_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, X86_ESP, 4);
+                       break;
+               }
+               case OP_ENDFINALLY: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
+                       x86_ret (code);
+                       break;
+               }
+               case OP_ENDFILTER: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
+                       /* The local allocator will put the result into EAX */
+                       x86_ret (code);
+                       break;
+               }
+
                case OP_LABEL:
                        ins->inst_c0 = code - cfg->native_code;
                        break;
                case OP_BR:
-                       //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
-                       //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
-                       //break;
                        if (ins->flags & MONO_INST_BRLABEL) {
                                if (ins->inst_i0->inst_c0) {
                                        x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
@@ -2845,16 +2850,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_COND_EXC_NC:
                        EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
                        break;
-               case CEE_BEQ:
-               case CEE_BNE_UN:
-               case CEE_BLT:
-               case CEE_BLT_UN:
-               case CEE_BGT:
-               case CEE_BGT_UN:
-               case CEE_BGE:
-               case CEE_BGE_UN:
-               case CEE_BLE:
-               case CEE_BLE_UN:
+               case OP_IBEQ:
+               case OP_IBNE_UN:
+               case OP_IBLT:
+               case OP_IBLT_UN:
+               case OP_IBGT:
+               case OP_IBGT_UN:
+               case OP_IBGE:
+               case OP_IBGE_UN:
+               case OP_IBLE:
+               case OP_IBLE_UN:
                        EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
                        break;
 
@@ -2918,8 +2923,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_LOADR4_MEMBASE:
                        x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
                        break;
-               case CEE_CONV_R4: /* FIXME: change precision */
-               case CEE_CONV_R8:
+               case OP_ICONV_TO_R4: /* FIXME: change precision */
+               case OP_ICONV_TO_R8:
                        x86_push_reg (code, ins->sreg1);
                        x86_fild_membase (code, X86_ESP, 0, FALSE);
                        x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
@@ -3391,13 +3396,22 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
                        break;
                case OP_CKFINITE: {
+                       guchar *br1;
                        x86_push_reg (code, X86_EAX);
                        x86_fxam (code);
                        x86_fnstsw (code);
                        x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
                        x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
                        x86_pop_reg (code, X86_EAX);
+
+                       /* Have to clean up the fp stack before throwing the exception */
+                       br1 = code;
+                       x86_branch8 (code, X86_CC_NE, 0, FALSE);
+
+                       x86_fstp (code, 0);                     
                        EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
+
+                       x86_patch (br1, code);
                        break;
                }
                case OP_TLS_GET: {
@@ -3518,22 +3532,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                }
                default:
-                       g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
+                       g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
                        g_assert_not_reached ();
                }
 
-               if ((code - cfg->native_code - offset) > max_len) {
+               if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
                        g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
                                   mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
                        g_assert_not_reached ();
                }
               
                cpos += max_len;
-
-               last_ins = ins;
-               last_offset = offset;
-               
-               ins = ins->next;
        }
 
        cfg->code_len = code - cfg->native_code;
@@ -3607,7 +3616,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        int alloc_size, pos, max_offset, i;
        guint8 *code;
 
-       cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
+       cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 1024);
 
        if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
                cfg->code_size += 512;
@@ -3621,14 +3630,19 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        pos = 0;
 
        if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
-               /* Might need to attach the thread to the JIT */
-               if (lmf_tls_offset != -1) {
-                       guint8 *buf;
+               /* Might need to attach the thread to the JIT  or change the domain for the callback */
+               if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
+                       guint8 *buf, *no_domain_branch;
 
+                       code = emit_tls_get (code, X86_EAX, appdomain_tls_offset);
+                       x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
+                       no_domain_branch = code;
+                       x86_branch8 (code, X86_CC_NE, 0, 0);
                        code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
                        x86_test_reg_reg (code, X86_EAX, X86_EAX);
                        buf = code;
                        x86_branch8 (code, X86_CC_NE, 0, 0);
+                       x86_patch (no_domain_branch, code);
                        x86_push_imm (code, cfg->domain);
                        code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
                        x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
@@ -3766,7 +3780,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        max_offset = 0;
        if (cfg->opt & MONO_OPT_BRANCH) {
                for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
-                       MonoInst *ins = bb->code;
+                       MonoInst *ins;
                        bb->max_offset = max_offset;
 
                        if (cfg->prof_options & MONO_PROFILE_COVERAGE)
@@ -3775,12 +3789,11 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
                                max_offset += LOOP_ALIGNMENT;
 
-                       while (ins) {
+                       MONO_BB_FOR_EACH_INS (bb, ins) {
                                if (ins->opcode == OP_LABEL)
                                        ins->inst_c1 = max_offset;
                                
                                max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
-                               ins = ins->next;
                        }
                }
        }
@@ -3852,7 +3865,7 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                        x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
                } else {
                        /* Find a spare register */
-                       switch (sig->ret->type) {
+                       switch (mono_type_get_underlying_type (sig->ret)->type) {
                        case MONO_TYPE_I8:
                        case MONO_TYPE_U8:
                                prev_lmf_reg = X86_EDI;
@@ -4342,42 +4355,6 @@ mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethod
                        ins->inst_i1 = args [1];
                }
 #endif
-       } else if (cmethod->klass == mono_defaults.thread_class &&
-                          strcmp (cmethod->name, "MemoryBarrier") == 0) {
-               MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
-       } else if(cmethod->klass->image == mono_defaults.corlib &&
-                          (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
-                          (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
-
-               if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
-                       MonoInst *ins_iconst;
-
-                       MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
-                       MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
-                       ins_iconst->inst_c0 = 1;
-
-                       ins->inst_i0 = args [0];
-                       ins->inst_i1 = ins_iconst;
-               } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
-                       MonoInst *ins_iconst;
-
-                       MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
-                       MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
-                       ins_iconst->inst_c0 = -1;
-
-                       ins->inst_i0 = args [0];
-                       ins->inst_i1 = ins_iconst;
-               } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
-                       MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
-
-                       ins->inst_i0 = args [0];
-                       ins->inst_i1 = args [1];
-               } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
-                       MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
-
-                       ins->inst_i0 = args [0];
-                       ins->inst_i1 = args [1];
-               }
        }
 
        return ins;
@@ -4445,12 +4422,38 @@ mono_arch_get_patch_offset (guint8 *code)
        }
 }
 
+gboolean
+mono_breakpoint_clean_code (guint8 *code, guint8 *buf, int size)
+{
+       int i;
+       gboolean can_write = TRUE;
+       memcpy (buf, code, size);
+       for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
+               int idx = mono_breakpoint_info_index [i];
+               guint8 *ptr;
+               if (idx < 1)
+                       continue;
+               ptr = mono_breakpoint_info [idx].address;
+               if (ptr >= code && ptr < code + size) {
+                       guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
+                       can_write = FALSE;
+                       /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
+                       buf [ptr - code] = saved_byte;
+               }
+       }
+       return can_write;
+}
+
 gpointer
 mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
 {
+       guint8 buf [8];
        guint8 reg = 0;
        gint32 disp = 0;
 
+       mono_breakpoint_clean_code (code - 8, buf, sizeof (buf));
+       code = buf + 8;
+
        *displacement = 0;
 
        /* go to the start of the call instruction
@@ -4582,7 +4585,7 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
                g_assert ((code - start) < 64);
 
                cached = start;
-
+               mono_debug_add_delegate_trampoline (start, code - start);
                mono_mini_arch_unlock ();
        } else {
                static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
@@ -4634,6 +4637,7 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
 
                cache [sig->param_count] = start;
 
+               mono_debug_add_delegate_trampoline (start, code - start);
                mono_mini_arch_unlock ();
        }