2007-06-10 Sanghyeon Seo <sanxiyn@gmail.com>
[mono.git] / mono / mini / mini-x86.c
index 5562d0bd8f2ebb6b87759e93835c3b25d22cf2e7..6da3071a44ad6c3c36387fd680bf120f9101a832 100644 (file)
@@ -11,7 +11,9 @@
 #include "mini.h"
 #include <string.h>
 #include <math.h>
+#ifdef HAVE_UNISTD_H
 #include <unistd.h>
+#endif
 
 #include <mono/metadata/appdomain.h>
 #include <mono/metadata/debug-helpers.h>
@@ -113,7 +115,8 @@ typedef struct {
 
 static X86_Reg_No param_regs [] = { 0 };
 
-#if defined(PLATFORM_WIN32) || defined(__APPLE__)
+#if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
+#define SMALL_STRUCTS_IN_REGS
 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
 #endif
 
@@ -179,7 +182,7 @@ add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
        else 
                size = mono_type_stack_size (&klass->byval_arg, NULL);
 
-#if defined(PLATFORM_WIN32) || defined (__APPLE__)
+#ifdef SMALL_STRUCTS_IN_REGS
        if (sig->pinvoke && is_return) {
                MonoMarshalType *info;
 
@@ -231,7 +234,7 @@ add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
  * For x86 win32, see ???.
  */
 static CallInfo*
-get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
+get_call_info (MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
 {
        guint32 i, gr, fr;
        MonoType *ret_type;
@@ -239,7 +242,10 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
        guint32 stack_size = 0;
        CallInfo *cinfo;
 
-       cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
+       if (mp)
+               cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
+       else
+               cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
 
        gr = 0;
        fr = 0;
@@ -438,7 +444,7 @@ mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJit
        int offset = 8;
        CallInfo *cinfo;
 
-       cinfo = get_call_info (csig, FALSE);
+       cinfo = get_call_info (NULL, csig, FALSE);
 
        if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
                frame_size += sizeof (gpointer);
@@ -661,34 +667,6 @@ mono_arch_is_int_overflow (void *sigctx, void *info)
        return FALSE;
 }
 
-static gboolean
-is_regsize_var (MonoType *t) {
-       if (t->byref)
-               return TRUE;
-       switch (mono_type_get_underlying_type (t)->type) {
-       case MONO_TYPE_I4:
-       case MONO_TYPE_U4:
-       case MONO_TYPE_I:
-       case MONO_TYPE_U:
-       case MONO_TYPE_PTR:
-       case MONO_TYPE_FNPTR:
-               return TRUE;
-       case MONO_TYPE_OBJECT:
-       case MONO_TYPE_STRING:
-       case MONO_TYPE_CLASS:
-       case MONO_TYPE_SZARRAY:
-       case MONO_TYPE_ARRAY:
-               return TRUE;
-       case MONO_TYPE_GENERICINST:
-               if (!mono_type_generic_inst_is_valuetype (t))
-                       return TRUE;
-               return FALSE;
-       case MONO_TYPE_VALUETYPE:
-               return FALSE;
-       }
-       return FALSE;
-}
-
 GList *
 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
 {
@@ -709,9 +687,7 @@ mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
 
                /* we dont allocate I1 to registers because there is no simply way to sign extend 
                 * 8bit quantities in caller saved registers on x86 */
-               if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
-                   (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
-                   (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
+               if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
                        g_assert (MONO_VARINFO (cfg, i)->reg == -1);
                        g_assert (i == vmv->idx);
                        vars = g_list_prepend (vars, vmv);
@@ -774,7 +750,7 @@ mono_arch_allocate_vars (MonoCompile *cfg)
        header = mono_method_get_header (cfg->method);
        sig = mono_method_signature (cfg->method);
 
-       cinfo = get_call_info (sig, FALSE);
+       cinfo = get_call_info (cfg->mempool, sig, FALSE);
 
        cfg->frame_reg = MONO_ARCH_BASEREG;
        offset = 0;
@@ -824,7 +800,6 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                        //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
                }
        }
-       g_free (offsets);
        offset += locals_stack_size;
 
 
@@ -859,7 +834,7 @@ mono_arch_allocate_vars (MonoCompile *cfg)
 
        for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
                ArgInfo *ainfo = &cinfo->args [i];
-               inst = cfg->varinfo [i];
+               inst = cfg->args [i];
                if (inst->opcode != OP_REGVAR) {
                        inst->opcode = OP_REGOFFSET;
                        inst->inst_basereg = X86_EBP;
@@ -871,8 +846,6 @@ mono_arch_allocate_vars (MonoCompile *cfg)
        offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
 
        cfg->stack_offset = offset;
-
-       g_free (cinfo);
 }
 
 void
@@ -883,12 +856,10 @@ mono_arch_create_vars (MonoCompile *cfg)
 
        sig = mono_method_signature (cfg->method);
 
-       cinfo = get_call_info (sig, FALSE);
+       cinfo = get_call_info (cfg->mempool, sig, FALSE);
 
        if (cinfo->ret.storage == ArgValuetypeInReg)
                cfg->ret_var_is_local = TRUE;
-
-       g_free (cinfo);
 }
 
 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
@@ -944,7 +915,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
        sig = call->signature;
        n = sig->param_count + sig->hasthis;
 
-       cinfo = get_call_info (sig, FALSE);
+       cinfo = get_call_info (cfg->mempool, sig, FALSE);
 
        if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
                sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
@@ -1058,8 +1029,6 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
         }
 #endif 
 
-       g_free (cinfo);
-
        return call;
 }
 
@@ -1267,40 +1236,44 @@ emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer dat
        return code;
 }
 
-/* FIXME: Add more instructions */
-#define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
+#define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
 
+/*
+ * peephole_pass_1:
+ *
+ *   Perform peephole opts which should/can be performed before local regalloc
+ */
 static void
-peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
+peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
        MonoInst *ins, *last_ins = NULL;
        ins = bb->code;
 
        while (ins) {
-
                switch (ins->opcode) {
-               case OP_ICONST:
-                       /* reg = 0 -> XOR (reg, reg) */
-                       /* XOR sets cflags on x86, so we cant do it always */
-                       if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
-                               ins->opcode = CEE_XOR;
-                               ins->sreg1 = ins->dreg;
-                               ins->sreg2 = ins->dreg;
-                       }
+               case OP_IADD_IMM:
+               case OP_ADD_IMM:
+                       if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
+                               /* 
+                                * X86_LEA is like ADD, but doesn't have the
+                                * sreg1==dreg restriction.
+                                */
+                               ins->opcode = OP_X86_LEA_MEMBASE;
+                               ins->inst_basereg = ins->sreg1;
+                       } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
+                               ins->opcode = OP_X86_INC_REG;
                        break;
-               case OP_MUL_IMM: 
-                       /* remove unnecessary multiplication with 1 */
-                       if (ins->inst_imm == 1) {
-                               if (ins->dreg != ins->sreg1) {
-                                       ins->opcode = OP_MOVE;
-                               } else {
-                                       last_ins->next = ins->next;
-                                       ins = ins->next;
-                                       continue;
-                               }
-                       }
+               case OP_SUB_IMM:
+               case OP_ISUB_IMM:
+                       if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
+                               ins->opcode = OP_X86_LEA_MEMBASE;
+                               ins->inst_basereg = ins->sreg1;
+                               ins->inst_imm = -ins->inst_imm;
+                       } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
+                               ins->opcode = OP_X86_DEC_REG;
                        break;
                case OP_COMPARE_IMM:
+               case OP_ICOMPARE_IMM:
                        /* OP_COMPARE_IMM (reg, 0) 
                         * --> 
                         * OP_X86_TEST_NULL (reg) 
@@ -1436,6 +1409,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case CEE_CONV_I4:
                case CEE_CONV_U4:
+               case OP_ICONV_TO_I4:
                case OP_MOVE:
                        /*
                         * Removes:
@@ -1479,6 +1453,227 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
        bb->last_ins = last_ins;
 }
 
+static void
+peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
+{
+       MonoInst *ins, *last_ins = NULL;
+       ins = bb->code;
+
+       while (ins) {
+
+               switch (ins->opcode) {
+               case OP_ICONST:
+                       /* reg = 0 -> XOR (reg, reg) */
+                       /* XOR sets cflags on x86, so we cant do it always */
+                       if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
+                               MonoInst *ins2;
+
+                               ins->opcode = OP_IXOR;
+                               ins->sreg1 = ins->dreg;
+                               ins->sreg2 = ins->dreg;
+
+                               /* 
+                                * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
+                                * since it takes 3 bytes instead of 7.
+                                */
+                               for (ins2 = ins->next; ins2; ins2 = ins2->next) {
+                                       if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
+                                               ins2->opcode = OP_STORE_MEMBASE_REG;
+                                               ins2->sreg1 = ins->dreg;
+                                       }
+                                       else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
+                                               ins2->opcode = OP_STOREI4_MEMBASE_REG;
+                                               ins2->sreg1 = ins->dreg;
+                                       }
+                                       else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
+                                               /* Continue iteration */
+                                       }
+                                       else
+                                               break;
+                               }
+                       }
+                       break;
+               case OP_IADD_IMM:
+               case OP_ADD_IMM:
+                       if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
+                               ins->opcode = OP_X86_INC_REG;
+                       break;
+               case OP_ISUB_IMM:
+               case OP_SUB_IMM:
+                       if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
+                               ins->opcode = OP_X86_DEC_REG;
+                       break;
+               case OP_X86_COMPARE_MEMBASE_IMM:
+                       /* 
+                        * OP_STORE_MEMBASE_REG reg, offset(basereg)
+                        * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
+                        * -->
+                        * OP_STORE_MEMBASE_REG reg, offset(basereg)
+                        * OP_COMPARE_IMM reg, imm
+                        *
+                        * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
+                        */
+                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
+                           ins->inst_basereg == last_ins->inst_destbasereg &&
+                           ins->inst_offset == last_ins->inst_offset) {
+                                       ins->opcode = OP_COMPARE_IMM;
+                                       ins->sreg1 = last_ins->sreg1;
+
+                                       /* check if we can remove cmp reg,0 with test null */
+                                       if (!ins->inst_imm)
+                                               ins->opcode = OP_X86_TEST_NULL;
+                               }
+
+                       break;
+               case OP_LOAD_MEMBASE:
+               case OP_LOADI4_MEMBASE:
+                       /* 
+                        * Note: if reg1 = reg2 the load op is removed
+                        *
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
+                        * OP_LOAD_MEMBASE offset(basereg), reg2
+                        * -->
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+                        * OP_MOVE reg1, reg2
+                        */
+                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
+                                        || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
+                           ins->inst_basereg == last_ins->inst_destbasereg &&
+                           ins->inst_offset == last_ins->inst_offset) {
+                               if (ins->dreg == last_ins->sreg1) {
+                                       last_ins->next = ins->next;                             
+                                       ins = ins->next;                                
+                                       continue;
+                               } else {
+                                       //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
+                                       ins->opcode = OP_MOVE;
+                                       ins->sreg1 = last_ins->sreg1;
+                               }
+
+                       /* 
+                        * Note: reg1 must be different from the basereg in the second load
+                        * Note: if reg1 = reg2 is equal then second load is removed
+                        *
+                        * OP_LOAD_MEMBASE offset(basereg), reg1
+                        * OP_LOAD_MEMBASE offset(basereg), reg2
+                        * -->
+                        * OP_LOAD_MEMBASE offset(basereg), reg1
+                        * OP_MOVE reg1, reg2
+                        */
+                       } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
+                                          || last_ins->opcode == OP_LOAD_MEMBASE) &&
+                             ins->inst_basereg != last_ins->dreg &&
+                             ins->inst_basereg == last_ins->inst_basereg &&
+                             ins->inst_offset == last_ins->inst_offset) {
+
+                               if (ins->dreg == last_ins->dreg) {
+                                       last_ins->next = ins->next;                             
+                                       ins = ins->next;                                
+                                       continue;
+                               } else {
+                                       ins->opcode = OP_MOVE;
+                                       ins->sreg1 = last_ins->dreg;
+                               }
+
+                               //g_assert_not_reached ();
+
+#if 0
+                       /* 
+                        * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
+                        * OP_LOAD_MEMBASE offset(basereg), reg
+                        * -->
+                        * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
+                        * OP_ICONST reg, imm
+                        */
+                       } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
+                                               || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
+                                  ins->inst_basereg == last_ins->inst_destbasereg &&
+                                  ins->inst_offset == last_ins->inst_offset) {
+                               //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
+                               ins->opcode = OP_ICONST;
+                               ins->inst_c0 = last_ins->inst_imm;
+                               g_assert_not_reached (); // check this rule
+#endif
+                       }
+                       break;
+               case OP_LOADU1_MEMBASE:
+               case OP_LOADI1_MEMBASE:
+                       /* 
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
+                        * OP_LOAD_MEMBASE offset(basereg), reg2
+                        * -->
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+                        * CONV_I2/U2 reg1, reg2
+                        */
+                       if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
+                               (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
+                                       ins->inst_basereg == last_ins->inst_destbasereg &&
+                                       ins->inst_offset == last_ins->inst_offset) {
+                               ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
+                               ins->sreg1 = last_ins->sreg1;
+                       }
+                       break;
+               case OP_LOADU2_MEMBASE:
+               case OP_LOADI2_MEMBASE:
+                       /* 
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
+                        * OP_LOAD_MEMBASE offset(basereg), reg2
+                        * -->
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+                        * CONV_I2/U2 reg1, reg2
+                        */
+                       if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
+                                       ins->inst_basereg == last_ins->inst_destbasereg &&
+                                       ins->inst_offset == last_ins->inst_offset) {
+                               ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
+                               ins->sreg1 = last_ins->sreg1;
+                       }
+                       break;
+               case CEE_CONV_I4:
+               case CEE_CONV_U4:
+               case OP_ICONV_TO_I4:
+               case OP_MOVE:
+                       /*
+                        * Removes:
+                        *
+                        * OP_MOVE reg, reg 
+                        */
+                       if (ins->dreg == ins->sreg1) {
+                               if (last_ins)
+                                       last_ins->next = ins->next;                             
+                               ins = ins->next;
+                               continue;
+                       }
+                       /* 
+                        * Removes:
+                        *
+                        * OP_MOVE sreg, dreg 
+                        * OP_MOVE dreg, sreg
+                        */
+                       if (last_ins && last_ins->opcode == OP_MOVE &&
+                           ins->sreg1 == last_ins->dreg &&
+                           ins->dreg == last_ins->sreg1) {
+                               last_ins->next = ins->next;                             
+                               ins = ins->next;                                
+                               continue;
+                       }
+                       break;
+               case OP_X86_PUSH_MEMBASE:
+                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
+                                        last_ins->opcode == OP_STORE_MEMBASE_REG) &&
+                           ins->inst_basereg == last_ins->inst_destbasereg &&
+                           ins->inst_offset == last_ins->inst_offset) {
+                                   ins->opcode = OP_X86_PUSH;
+                                   ins->sreg1 = last_ins->sreg1;
+                       }
+                       break;
+               }
+               last_ins = ins;
+               ins = ins->next;
+       }
+       bb->last_ins = last_ins;
+}
+
 static const int 
 branch_cc_table [] = {
        X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
@@ -1486,12 +1681,25 @@ branch_cc_table [] = {
        X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
 };
 
-static const char*const * ins_spec = x86_desc;
+/* Maps CMP_... constants to X86_CC_... constants */
+static const int
+cc_table [] = {
+       X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
+       X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
+};
+
+static const int
+cc_signed_table [] = {
+       TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
+       FALSE, FALSE, FALSE, FALSE
+};
 
-/*#include "cprop.c"*/
 void
 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
 {
+       if (cfg->opt & MONO_OPT_PEEPHOLE)
+               peephole_pass_1 (cfg, bb);
+
        mono_local_regalloc (cfg, bb);
 }
 
@@ -1651,7 +1859,7 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
        case OP_VCALL:
        case OP_VCALL_REG:
        case OP_VCALL_MEMBASE:
-               cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
+               cinfo = get_call_info (cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
                if (cinfo->ret.storage == ArgValuetypeInReg) {
                        /* Pop the destination address from the stack */
                        x86_pop_reg (code, X86_ECX);
@@ -1669,7 +1877,6 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
                                }
                        }
                }
-               g_free (cinfo);
        default:
                break;
        }
@@ -1734,14 +1941,14 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
 
        sig = mono_method_signature (method);
 
-       cinfo = get_call_info (sig, FALSE);
+       cinfo = get_call_info (cfg->mempool, sig, FALSE);
        
        /* This is the opposite of the code in emit_prolog */
 
        for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
                ArgInfo *ainfo = cinfo->args + i;
                MonoType *arg_type;
-               inst = cfg->varinfo [i];
+               inst = cfg->args [i];
 
                if (sig->hasthis && (i == 0))
                        arg_type = &mono_defaults.object_class->byval_arg;
@@ -1759,8 +1966,6 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
                }
        }
 
-       g_free (cinfo);
-
        return code;
 }
 
@@ -1832,7 +2037,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        while (ins) {
                offset = code - cfg->native_code;
 
-               max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
+               max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
 
                if (offset > (cfg->code_size - max_len - 16)) {
                        cfg->code_size *= 2;
@@ -1973,7 +2178,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_X86_MUL_MEMBASE:
                        x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
                        break;
-               case CEE_BREAK:
+               case OP_BREAK:
                        x86_breakpoint (code);
                        break;
                case OP_ADDCC:
@@ -2043,6 +2248,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
                        break;
                case CEE_XOR:
+               case OP_IXOR:
                        x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
                        break;
                case OP_XOR_IMM:
@@ -2297,7 +2503,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case CEE_CONV_U4:
                        g_assert_not_reached ();
-               case CEE_JMP: {
+               case OP_JMP: {
                        /*
                         * Note: this 'frame destruction' logic is useful for tail calls, too.
                         * Keep in sync with the code in emit_epilog.
@@ -2455,15 +2661,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_LOCALLOC:
                        /* keep alignment */
-                       x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
-                       x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
+                       x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
+                       x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
                        code = mono_emit_stack_alloc (code, ins);
                        x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
                        break;
                case CEE_RET:
                        x86_ret (code);
                        break;
-               case CEE_THROW: {
+               case OP_THROW: {
                        x86_push_reg (code, ins->sreg1);
                        code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                                          (gpointer)"mono_arch_throw_exception");
@@ -2489,7 +2695,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_LABEL:
                        ins->inst_c0 = code - cfg->native_code;
                        break;
-               case CEE_BR:
+               case OP_BR:
                        //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
                        //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
                        //break;
@@ -2521,27 +2727,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_jump_reg (code, ins->sreg1);
                        break;
                case OP_CEQ:
-                       x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
-                       x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
-                       break;
                case OP_CLT:
-                       x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
-                       x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
-                       break;
                case OP_CLT_UN:
-                       x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
-                       x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
-                       break;
                case OP_CGT:
-                       x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
-                       x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
-                       break;
                case OP_CGT_UN:
-                       x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
-                       x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
-                       break;
                case OP_CNE:
-                       x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
+                       x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
                        x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
                        break;
                case OP_COND_EXC_EQ:
@@ -2554,6 +2745,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_COND_EXC_GE_UN:
                case OP_COND_EXC_LE:
                case OP_COND_EXC_LE_UN:
+                       EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
+                       break;
                case OP_COND_EXC_OV:
                case OP_COND_EXC_NO:
                case OP_COND_EXC_C:
@@ -2570,7 +2763,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case CEE_BGE_UN:
                case CEE_BLE:
                case CEE_BLE_UN:
-                       EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
+                       EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
                        break;
 
                /* floating point opcodes */
@@ -3024,7 +3217,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FBGT:
                case OP_FBGT_UN:
                        if (cfg->opt & MONO_OPT_FCMOV) {
-                               EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
+                               if (ins->opcode == OP_FBGT) {
+                                       guchar *br1;
+
+                                       /* skip branch if C1=1 */
+                                       br1 = code;
+                                       x86_branch8 (code, X86_CC_P, 0, FALSE);
+                                       /* branch if (C0 | C3) = 1 */
+                                       EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
+                                       x86_patch (br1, code);
+                               } else {
+                                       EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
+                               }
                                break;
                        }
                        x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
@@ -3094,7 +3298,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
                        EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
                        break;
-               case CEE_CKFINITE: {
+               case OP_CKFINITE: {
                        x86_push_reg (code, X86_EAX);
                        x86_fxam (code);
                        x86_fnstsw (code);
@@ -3479,7 +3683,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                if (ins->opcode == OP_LABEL)
                                        ins->inst_c1 = max_offset;
                                
-                               max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
+                               max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
                                ins = ins->next;
                        }
                }
@@ -3493,7 +3697,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        pos = 0;
 
        for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
-               inst = cfg->varinfo [pos];
+               inst = cfg->args [pos];
                if (inst->opcode == OP_REGVAR) {
                        x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
                        if (cfg->verbose_level > 2)
@@ -3535,7 +3739,7 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
                code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
 
-       /* the code restoring the registers must be kept in sync with CEE_JMP */
+       /* the code restoring the registers must be kept in sync with OP_JMP */
        pos = 0;
        
        if (method->save_lmf) {
@@ -3615,7 +3819,7 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        }
 
        /* Load returned vtypes into registers if needed */
-       cinfo = get_call_info (sig, FALSE);
+       cinfo = get_call_info (cfg->mempool, sig, FALSE);
        if (cinfo->ret.storage == ArgValuetypeInReg) {
                for (quad = 0; quad < 2; quad ++) {
                        switch (cinfo->ret.pair_storage [quad]) {
@@ -3652,8 +3856,6 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        else
                x86_ret (code);
 
-       g_free (cinfo);
-
        cfg->code_len = code - cfg->native_code;
 
        g_assert (cfg->code_len < cfg->code_size);
@@ -3738,7 +3940,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                                        exc_throw_start [nthrows] = code;
                                }
 
-                               x86_push_imm (code, exc_class->type_token);
+                               x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
                                patch_info->data.name = "mono_arch_throw_corlib_exception";
                                patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
                                patch_info->ip.i = code - cfg->native_code;
@@ -3827,7 +4029,7 @@ void
 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
 {
        MonoCallInst *call = (MonoCallInst*)inst;
-       CallInfo *cinfo = get_call_info (inst->signature, FALSE);
+       CallInfo *cinfo = get_call_info (cfg->mempool, inst->signature, FALSE);
 
        /* add the this argument */
        if (this_reg != -1) {
@@ -3881,8 +4083,6 @@ mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_re
                        mono_bblock_add_inst (cfg->cbb, vtarg);
                }
        }
-
-       g_free (cinfo);
 }
 
 MonoInst*
@@ -4074,22 +4274,94 @@ mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
        return (gpointer*)(((gint32)(regs [reg])) + disp);
 }
 
-gpointer
-mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
+gpointer
+mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
 {
-       guint8 reg = 0;
-       gint32 disp = 0;
+       guint32 esp = regs [X86_ESP];
+       CallInfo *cinfo;
+       gpointer res;
 
-       code -= 7;
-       if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
-               reg = x86_modrm_rm (code [1]);
-               disp = code [4];
+       cinfo = get_call_info (NULL, sig, FALSE);
 
-               if (reg == X86_EAX)
-                       return NULL;
-               else
-                       return (gpointer*)(((gint32)(regs [reg])) + disp);
+       /*
+        * The stack looks like:
+        * <other args>
+        * <this=delegate>
+        * <possible vtype return address>
+        * <return addr>
+        * <4 pointers pushed by mono_arch_create_trampoline_code ()>
+        */
+       res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
+       g_free (cinfo);
+       return res;
+}
+
+gpointer
+mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
+{
+       guint8 *code, *start;
+       MonoDomain *domain = mono_domain_get ();
+
+       /* FIXME: Support more cases */
+       if (MONO_TYPE_ISSTRUCT (sig->ret))
+               return NULL;
+
+       /*
+        * The stack contains:
+        * <delegate>
+        * <return addr>
+        */
+
+       if (has_target) {
+               mono_domain_lock (domain);
+               start = code = mono_code_manager_reserve (domain->code_mp, 64);
+               mono_domain_unlock (domain);
+
+               /* Replace the this argument with the target */
+               x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
+               x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
+               x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
+               x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+
+               g_assert ((code - start) < 64);
+       } else {
+               if (sig->param_count == 0) {
+                       mono_domain_lock (domain);
+                       start = code = mono_code_manager_reserve (domain->code_mp, 32 + (sig->param_count * 8));
+                       mono_domain_unlock (domain);
+               
+                       x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
+                       x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+               } else {
+                       /* 
+                        * The code below does not work in the presence of exceptions, since it 
+                        * creates a new frame.
+                        */
+                       start = NULL;
+#if 0
+                       for (i = 0; i < sig->param_count; ++i)
+                               if (!mono_is_regsize_var (sig->params [i]))
+                                       return NULL;
+
+                       mono_domain_lock (domain);
+                       start = code = mono_code_manager_reserve (domain->code_mp, 32 + (sig->param_count * 8));
+                       mono_domain_unlock (domain);
+
+                       /* Load this == delegate */
+                       x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
+
+                       /* Push arguments in opposite order, taking changes in ESP into account */
+                       for (i = 0; i < sig->param_count; ++i)
+                               x86_push_membase (code, X86_ESP, 4 + (sig->param_count * 4));
+
+                       /* Call the delegate */
+                       x86_call_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+                       if (sig->param_count > 0)
+                               x86_alu_reg_imm (code, X86_ADD, X86_ESP, sig->param_count * 4);
+                       x86_ret (code);
+#endif
+               }
        }
 
-       return NULL;
+       return start;
 }