Moved ProviderCollectionTest.cs from System assembly to System.Configuration.
[mono.git] / mono / mini / mini-arm.c
index 68a3e6c9aa708023cfbd4b2f811e86b98d729678..96fe2249c56fe4ac3f2656b9921adc689ab18171 100644 (file)
 #include "mono/arch/arm/arm-vfp-codegen.h"
 #endif
 
+/* This mutex protects architecture specific caches */
+#define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
+#define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
+static CRITICAL_SECTION mini_arch_mutex;
+
 static int v5_supported = 0;
 static int thumb_supported = 0;
 
-static int mono_arm_is_rotated_imm8 (guint32 val, gint *rot_amount);
-
 /*
  * TODO:
  * floating point support: on ARM it is a mess, there are at least 3
@@ -38,7 +41,7 @@ static int mono_arm_is_rotated_imm8 (guint32 val, gint *rot_amount);
  * 2) softfloat: the compiler emulates all the fp ops. Usually uses the
  *    ugly swapped double format (I guess a softfloat-vfp exists, too, though).
  * 3) VFP: the new and actually sensible and useful FP support. Implemented
- *    in HW or kernel-emulated, requires new tools. I think this ios what symbian uses.
+ *    in HW or kernel-emulated, requires new tools. I think this is what symbian uses.
  *
  * The plan is to write the FPA support first. softfloat can be tested in a chroot.
  */
@@ -48,8 +51,19 @@ int mono_exc_esp_offset = 0;
 #define arm_is_imm8(v) ((v) > -256 && (v) < 256)
 #define arm_is_fpimm8(v) ((v) >= -1020 && (v) <= 1020)
 
+#define LDR_MASK ((0xf << ARMCOND_SHIFT) | (3 << 26) | (1 << 22) | (1 << 20) | (15 << 12))
+#define LDR_PC_VAL ((ARMCOND_AL << ARMCOND_SHIFT) | (1 << 26) | (0 << 22) | (1 << 20) | (15 << 12))
+#define IS_LDR_PC(val) (((val) & LDR_MASK) == LDR_PC_VAL)
+
+#define ADD_LR_PC_4 ((ARMCOND_AL << ARMCOND_SHIFT) | (1 << 25) | (1 << 23) | (ARMREG_PC << 16) | (ARMREG_LR << 12) | 4)
+#define MOV_LR_PC ((ARMCOND_AL << ARMCOND_SHIFT) | (1 << 24) | (0xa << 20) |  (ARMREG_LR << 12) | ARMREG_PC)
+#define DEBUG_IMT 0
+
+void mini_emit_memcpy2 (MonoCompile *cfg, int destreg, int doffset, int srcreg, int soffset, int size, int align);
+
 const char*
-mono_arch_regname (int reg) {
+mono_arch_regname (int reg)
+{
        static const char * rnames[] = {
                "arm_r0", "arm_r1", "arm_r2", "arm_r3", "arm_v1",
                "arm_v2", "arm_v3", "arm_v4", "arm_v5", "arm_v6",
@@ -62,7 +76,8 @@ mono_arch_regname (int reg) {
 }
 
 const char*
-mono_arch_fregname (int reg) {
+mono_arch_fregname (int reg)
+{
        static const char * rnames[] = {
                "arm_f0", "arm_f1", "arm_f2", "arm_f3", "arm_f4",
                "arm_f5", "arm_f6", "arm_f7", "arm_f8", "arm_f9",
@@ -149,6 +164,38 @@ emit_call_reg (guint8 *code, int reg)
        return code;
 }
 
+static guint8*
+emit_call_seq (MonoCompile *cfg, guint8 *code)
+{
+       if (cfg->method->dynamic) {
+               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
+               ARM_B (code, 0);
+               *(gpointer*)code = NULL;
+               code += 4;
+               code = emit_call_reg (code, ARMREG_IP);
+       } else {
+               ARM_BL (code, 0);
+       }
+       return code;
+}
+
+static guint8*
+emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
+{
+       switch (ins->opcode) {
+       case OP_FCALL:
+       case OP_FCALL_REG:
+       case OP_FCALL_MEMBASE:
+#ifdef ARM_FPU_FPA
+               if (ins->dreg != ARM_FPA_F0)
+                       ARM_MVFD (code, ins->dreg, ARM_FPA_F0);
+#endif
+               break;
+       }
+
+       return code;
+}
+
 /*
  * mono_arch_get_argument_info:
  * @csig:  a method signature
@@ -164,7 +211,7 @@ int
 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
 {
        int k, frame_size = 0;
-       int size, align, pad;
+       guint32 size, align, pad;
        int offset = 8;
 
        if (MONO_TYPE_ISSTRUCT (csig->ret)) { 
@@ -182,11 +229,7 @@ mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJit
        arg_info [0].size = frame_size;
 
        for (k = 0; k < param_count; k++) {
-               
-               if (csig->pinvoke)
-                       size = mono_type_native_stack_size (csig->params [k], &align);
-               else
-                       size = mono_type_stack_size (csig->params [k], &align);
+               size = mini_type_stack_size_full (NULL, csig->params [k], &align, csig->pinvoke);
 
                /* ignore alignment for now */
                align = 1;
@@ -208,6 +251,175 @@ mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJit
        return frame_size;
 }
 
+static gpointer
+decode_vcall_slot_from_ldr (guint32 ldr, gpointer *regs, int *displacement)
+{
+       char *o = NULL;
+       int reg, offset = 0;
+       reg = (ldr >> 16 ) & 0xf;
+       offset = ldr & 0xfff;
+       if (((ldr >> 23) & 1) == 0) /*U bit, 0 means negative and 1 positive*/
+               offset = -offset;
+       /*g_print ("found vcall at r%d + %d for code at %p 0x%x\n", reg, offset, code, *code);*/
+       o = regs [reg];
+
+       *displacement = offset;
+       return o;
+}
+
+gpointer
+mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement)
+{
+       guint32* code = (guint32*)code_ptr;
+
+       /* Locate the address of the method-specific trampoline. The call using
+       the vtable slot that took the processing flow to 'arch_create_jit_trampoline' 
+       looks something like this:
+
+               ldr rA, rX, #offset
+               mov lr, pc
+               mov pc, rA
+       or better:
+               mov lr, pc
+               ldr pc, rX, #offset
+
+       The call sequence could be also:
+               ldr ip, pc, 0
+               b skip
+               function pointer literal
+               skip:
+               mov lr, pc
+               mov pc, ip
+       Note that on ARM5+ we can use one instruction instead of the last two.
+       Therefore, we need to locate the 'ldr rA' instruction to know which
+       register was used to hold the method addrs.
+       */
+
+       /* This is the instruction after "ldc pc, xxx", "mov pc, xxx" or "bl xxx" could be either the IMT value or some other instruction*/
+       --code;
+
+       /* Three possible code sequences can happen here:
+        * interface call:
+        * 
+        * add lr, [pc + #4]
+        * ldr pc, [rX - #offset]
+        * .word IMT value
+        * 
+        * virtual call:
+        * 
+        * mov lr, pc
+        * ldr pc, [rX - #offset] 
+        * 
+        * direct branch with bl:
+        * 
+        * bl #offset
+        * 
+        * direct branch with mov: 
+        * 
+        * mv pc, rX
+        * 
+        * We only need to identify interface and virtual calls, the others can be ignored.
+        * 
+        */
+       if (IS_LDR_PC (code [-1]) && code [-2] == ADD_LR_PC_4)
+               return decode_vcall_slot_from_ldr (code [-1], regs, displacement);
+
+       if (IS_LDR_PC (code [0]) && code [-1] == MOV_LR_PC)
+               return decode_vcall_slot_from_ldr (code [0], regs, displacement);
+
+       return NULL;
+}
+
+gpointer*
+mono_arch_get_vcall_slot_addr (guint8* code, gpointer *regs)
+{
+       gpointer vt;
+       int displacement;
+       vt = mono_arch_get_vcall_slot (code, regs, &displacement);
+       if (!vt)
+               return NULL;
+       return (gpointer*)((char*)vt + displacement);
+}
+
+#define MAX_ARCH_DELEGATE_PARAMS 3
+
+gpointer
+mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
+{
+       guint8 *code, *start;
+
+       /* FIXME: Support more cases */
+       if (MONO_TYPE_ISSTRUCT (sig->ret))
+               return NULL;
+
+       if (has_target) {
+               static guint8* cached = NULL;
+               mono_mini_arch_lock ();
+               if (cached) {
+                       mono_mini_arch_unlock ();
+                       return cached;
+               }
+               
+               start = code = mono_global_codeman_reserve (12);
+
+               /* Replace the this argument with the target */
+               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_R0, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+               ARM_LDR_IMM (code, ARMREG_R0, ARMREG_R0, G_STRUCT_OFFSET (MonoDelegate, target));
+               ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_IP);
+
+               g_assert ((code - start) <= 12);
+
+               mono_arch_flush_icache (code, 12);
+               cached = start;
+               mono_mini_arch_unlock ();
+               return cached;
+       } else {
+               static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
+               int size, i;
+
+               if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
+                       return NULL;
+               for (i = 0; i < sig->param_count; ++i)
+                       if (!mono_is_regsize_var (sig->params [i]))
+                               return NULL;
+
+               mono_mini_arch_lock ();
+               code = cache [sig->param_count];
+               if (code) {
+                       mono_mini_arch_unlock ();
+                       return code;
+               }
+
+               size = 8 + sig->param_count * 4;
+               start = code = mono_global_codeman_reserve (size);
+
+               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_R0, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+               /* slide down the arguments */
+               for (i = 0; i < sig->param_count; ++i) {
+                       ARM_MOV_REG_REG (code, (ARMREG_R0 + i), (ARMREG_R0 + i + 1));
+               }
+               ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_IP);
+
+               g_assert ((code - start) <= size);
+
+               mono_arch_flush_icache (code, size);
+               cache [sig->param_count] = start;
+               mono_mini_arch_unlock ();
+               return start;
+       }
+
+       return NULL;
+}
+
+gpointer
+mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, gssize *regs, guint8 *code)
+{
+       /* FIXME: handle returning a struct */
+       if (MONO_TYPE_ISSTRUCT (sig->ret))
+               return (gpointer)regs [ARMREG_R1];
+       return (gpointer)regs [ARMREG_R0];
+}
+
 /*
  * Initialize the cpu to execute managed code.
  */
@@ -216,6 +428,23 @@ mono_arch_cpu_init (void)
 {
 }
 
+/*
+ * Initialize architecture specific code.
+ */
+void
+mono_arch_init (void)
+{
+       InitializeCriticalSection (&mini_arch_mutex);   
+}
+
+/*
+ * Cleanup architecture specific code.
+ */
+void
+mono_arch_cleanup (void)
+{
+}
+
 /*
  * This function returns the optimizations supported on this cpu.
  */
@@ -223,6 +452,10 @@ guint32
 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
 {
        guint32 opts = 0;
+#if __APPLE__
+       thumb_supported = TRUE;
+       v5_supported = TRUE;
+#else
        char buf [512];
        char *line;
        FILE *file = fopen ("/proc/cpuinfo", "r");
@@ -248,6 +481,7 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask)
                fclose (file);
                /*printf ("features: v5: %d, thumb: %d\n", v5_supported, thumb_supported);*/
        }
+#endif
 
        /* no arm-specific optimizations yet */
        *exclude_mask = 0;
@@ -258,7 +492,7 @@ static gboolean
 is_regsize_var (MonoType *t) {
        if (t->byref)
                return TRUE;
-       t = mono_type_get_underlying_type (t);
+       t = mini_type_get_underlying_type (NULL, t);
        switch (t->type) {
        case MONO_TYPE_I4:
        case MONO_TYPE_U4:
@@ -321,7 +555,9 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
        regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V2));
        regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V3));
        regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V4));
-       regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V5));
+       if (!(cfg->compile_aot || cfg->uses_rgctx_reg))
+               /* V5 is reserved for passing the vtable/rgctx/IMT method */
+               regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V5));
        /*regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V6));*/
        /*regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V7));*/
 
@@ -345,6 +581,9 @@ mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
 void
 mono_arch_flush_icache (guint8 *code, gint size)
 {
+#if __APPLE__
+       sys_icache_invalidate (code, size);
+#else
        __asm __volatile ("mov r0, %0\n"
                        "mov r1, %1\n"
                        "mov r2, %2\n"
@@ -352,12 +591,9 @@ mono_arch_flush_icache (guint8 *code, gint size)
                        : /* no outputs */
                        : "r" (code), "r" (code + size), "r" (0)
                        : "r0", "r1", "r3" );
-
+#endif
 }
 
-#define NOT_IMPLEMENTED(x) \
-                g_error ("FIXME: %s is not yet implemented. (trampoline)", x);
-
 enum {
        RegTypeGeneral,
        RegTypeBase,
@@ -409,7 +645,7 @@ add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple)
                        ainfo->reg = ARMREG_SP; /* in the caller */
                        ainfo->regtype = RegTypeBaseGen;
                        *stack_size += 4;
-               } else if (*gr > ARMREG_R3) {
+               } else if (*gr >= ARMREG_R3) {
 #ifdef __ARM_EABI__
                        *stack_size += 7;
                        *stack_size &= ~7;
@@ -435,7 +671,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 {
        guint i, gr;
        int n = sig->hasthis + sig->param_count;
-       guint32 simpletype;
+       MonoType *simpletype;
        guint32 stack_size = 0;
        CallInfo *cinfo = g_malloc0 (sizeof (CallInfo) + sizeof (ArgInfo) * n);
 
@@ -468,8 +704,8 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        n++;
                        continue;
                }
-               simpletype = mono_type_get_underlying_type (sig->params [i])->type;
-               switch (simpletype) {
+               simpletype = mini_type_get_underlying_type (NULL, sig->params [i]);
+               switch (simpletype->type) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
                case MONO_TYPE_U1:
@@ -518,7 +754,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        int align_size;
                        int nwords;
 
-                       if (simpletype == MONO_TYPE_TYPEDBYREF) {
+                       if (simpletype->type == MONO_TYPE_TYPEDBYREF) {
                                size = sizeof (MonoTypedRef);
                        } else {
                                MonoClass *klass = mono_class_from_mono_type (sig->params [i]);
@@ -566,8 +802,8 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
        }
 
        {
-               simpletype = mono_type_get_underlying_type (sig->ret)->type;
-               switch (simpletype) {
+               simpletype = mini_type_get_underlying_type (NULL, sig->ret);
+               switch (simpletype->type) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
                case MONO_TYPE_U1:
@@ -627,7 +863,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
  * The locals var stuff should most likely be split in another method.
  */
 void
-mono_arch_allocate_vars (MonoCompile *m)
+mono_arch_allocate_vars (MonoCompile *cfg)
 {
        MonoMethodSignature *sig;
        MonoMethodHeader *header;
@@ -636,13 +872,13 @@ mono_arch_allocate_vars (MonoCompile *m)
        int frame_reg = ARMREG_FP;
 
        /* FIXME: this will change when we use FP as gcc does */
-       m->flags |= MONO_CFG_HAS_SPILLUP;
+       cfg->flags |= MONO_CFG_HAS_SPILLUP;
 
        /* allow room for the vararg method args: void* and long/double */
-       if (mono_jit_trace_calls != NULL && mono_trace_eval (m->method))
-               m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
+       if (mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method))
+               cfg->param_area = MAX (cfg->param_area, sizeof (gpointer)*8);
 
-       header = mono_method_get_header (m->method);
+       header = mono_method_get_header (cfg->method);
 
        /* 
         * We use the frame register also for any method that has
@@ -653,28 +889,29 @@ mono_arch_allocate_vars (MonoCompile *m)
         * filters get called before stack unwinding happens) when the filter
         * code would call any method (this also applies to finally etc.).
         */ 
-       if ((m->flags & MONO_CFG_HAS_ALLOCA) || header->num_clauses)
+       if ((cfg->flags & MONO_CFG_HAS_ALLOCA) || header->num_clauses)
                frame_reg = ARMREG_FP;
-       m->frame_reg = frame_reg;
+       cfg->frame_reg = frame_reg;
        if (frame_reg != ARMREG_SP) {
-               m->used_int_regs |= 1 << frame_reg;
+               cfg->used_int_regs |= 1 << frame_reg;
        }
 
-       sig = mono_method_signature (m->method);
+       if (!cfg->compile_aot || cfg->uses_rgctx_reg)
+               /* V5 is reserved for passing the vtable/rgctx/IMT method */
+               cfg->used_int_regs |= (1 << ARMREG_V5);
+
+       sig = mono_method_signature (cfg->method);
        
        offset = 0;
        curinst = 0;
-       if (MONO_TYPE_ISSTRUCT (sig->ret)) {
-               m->ret->opcode = OP_REGVAR;
-               m->ret->inst_c0 = ARMREG_R0;
-       } else {
+       if (!MONO_TYPE_ISSTRUCT (sig->ret)) {
                /* FIXME: handle long and FP values */
-               switch (mono_type_get_underlying_type (sig->ret)->type) {
+               switch (mini_type_get_underlying_type (NULL, sig->ret)->type) {
                case MONO_TYPE_VOID:
                        break;
                default:
-                       m->ret->opcode = OP_REGVAR;
-                       m->ret->inst_c0 = ARMREG_R0;
+                       cfg->ret->opcode = OP_REGVAR;
+                       cfg->ret->inst_c0 = ARMREG_R0;
                        break;
                }
        }
@@ -689,44 +926,51 @@ mono_arch_allocate_vars (MonoCompile *m)
        //offset &= ~(8 - 1);
 
        /* add parameter area size for called functions */
-       offset += m->param_area;
+       offset += cfg->param_area;
        offset += 8 - 1;
        offset &= ~(8 - 1);
-       if (m->flags & MONO_CFG_HAS_FPOUT)
+       if (cfg->flags & MONO_CFG_HAS_FPOUT)
                offset += 8;
 
        /* allow room to save the return value */
-       if (mono_jit_trace_calls != NULL && mono_trace_eval (m->method))
+       if (mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method))
                offset += 8;
 
        /* the MonoLMF structure is stored just below the stack pointer */
 
        if (sig->call_convention == MONO_CALL_VARARG) {
-                m->sig_cookie = 0;
+                cfg->sig_cookie = 0;
         }
 
        if (MONO_TYPE_ISSTRUCT (sig->ret)) {
-               inst = m->ret;
+               inst = cfg->vret_addr;
                offset += sizeof(gpointer) - 1;
                offset &= ~(sizeof(gpointer) - 1);
                inst->inst_offset = offset;
                inst->opcode = OP_REGOFFSET;
                inst->inst_basereg = frame_reg;
+               if (G_UNLIKELY (cfg->verbose_level > 1)) {
+                       printf ("vret_addr =");
+                       mono_print_ins (cfg->vret_addr);
+               }
                offset += sizeof(gpointer);
                if (sig->call_convention == MONO_CALL_VARARG)
-                       m->sig_cookie += sizeof (gpointer);
+                       cfg->sig_cookie += sizeof (gpointer);
        }
 
-       curinst = m->locals_start;
-       for (i = curinst; i < m->num_varinfo; ++i) {
-               inst = m->varinfo [i];
+       curinst = cfg->locals_start;
+       for (i = curinst; i < cfg->num_varinfo; ++i) {
+               inst = cfg->varinfo [i];
                if ((inst->flags & MONO_INST_IS_DEAD) || inst->opcode == OP_REGVAR)
                        continue;
 
                /* inst->backend.is_pinvoke indicates native sized value types, this is used by the
                * pinvoke wrappers when they call functions returning structure */
-               if (inst->backend.is_pinvoke && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
-                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), &align);
+               if (inst->backend.is_pinvoke && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF) {
+                       guint32 ualign;
+                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), &ualign);
+                       align = ualign;
+               }
                else
                        size = mono_type_size (inst->inst_vtype, &align);
 
@@ -746,7 +990,7 @@ mono_arch_allocate_vars (MonoCompile *m)
 
        curinst = 0;
        if (sig->hasthis) {
-               inst = m->args [curinst];
+               inst = cfg->args [curinst];
                if (inst->opcode != OP_REGVAR) {
                        inst->opcode = OP_REGOFFSET;
                        inst->inst_basereg = frame_reg;
@@ -755,13 +999,13 @@ mono_arch_allocate_vars (MonoCompile *m)
                        inst->inst_offset = offset;
                        offset += sizeof (gpointer);
                        if (sig->call_convention == MONO_CALL_VARARG)
-                               m->sig_cookie += sizeof (gpointer);
+                               cfg->sig_cookie += sizeof (gpointer);
                }
                curinst++;
        }
 
        for (i = 0; i < sig->param_count; ++i) {
-               inst = m->args [curinst];
+               inst = cfg->args [curinst];
                if (inst->opcode != OP_REGVAR) {
                        inst->opcode = OP_REGOFFSET;
                        inst->inst_basereg = frame_reg;
@@ -776,7 +1020,7 @@ mono_arch_allocate_vars (MonoCompile *m)
                        inst->inst_offset = offset;
                        offset += size;
                        if ((sig->call_convention == MONO_CALL_VARARG) && (i < sig->sentinelpos)) 
-                               m->sig_cookie += size;
+                               cfg->sig_cookie += size;
                }
                curinst++;
        }
@@ -786,13 +1030,24 @@ mono_arch_allocate_vars (MonoCompile *m)
        offset &= ~(8 - 1);
 
        /* change sign? */
-       m->stack_offset = offset;
-
+       cfg->stack_offset = offset;
 }
 
-/* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
- * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
- */
+void
+mono_arch_create_vars (MonoCompile *cfg)
+{
+       MonoMethodSignature *sig;
+
+       sig = mono_method_signature (cfg->method);
+
+       if (MONO_TYPE_ISSTRUCT (sig->ret)) {
+               cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
+               if (G_UNLIKELY (cfg->verbose_level > 1)) {
+                       printf ("vret_addr = ");
+                       mono_print_ins (cfg->vret_addr);
+               }
+       }
+}
 
 /* 
  * take the arguments and generate the arch-specific
@@ -927,6 +1182,268 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
        return call;
 }
 
+void
+mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
+{
+       MonoInst *in, *ins;
+       MonoMethodSignature *sig;
+       int i, n;
+       CallInfo *cinfo;
+
+       sig = call->signature;
+       n = sig->param_count + sig->hasthis;
+       
+       cinfo = calculate_sizes (sig, sig->pinvoke);
+
+       for (i = 0; i < n; ++i) {
+               ArgInfo *ainfo = cinfo->args + i;
+               MonoType *t;
+
+               if (i >= sig->hasthis)
+                       t = sig->params [i - sig->hasthis];
+               else
+                       t = &mono_defaults.int_class->byval_arg;
+               t = mini_type_get_underlying_type (NULL, t);
+
+               if ((sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
+                       /* FIXME: */
+                       NOT_IMPLEMENTED;
+               }
+
+               in = call->args [i];
+
+               switch (ainfo->regtype) {
+               case RegTypeGeneral:
+                       if (!t->byref && ((t->type == MONO_TYPE_I8) || (t->type == MONO_TYPE_U8))) {
+                               MONO_INST_NEW (cfg, ins, OP_MOVE);
+                               ins->dreg = mono_alloc_ireg (cfg);
+                               ins->sreg1 = in->dreg + 1;
+                               MONO_ADD_INS (cfg->cbb, ins);
+                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
+
+                               MONO_INST_NEW (cfg, ins, OP_MOVE);
+                               ins->dreg = mono_alloc_ireg (cfg);
+                               ins->sreg1 = in->dreg + 2;
+                               MONO_ADD_INS (cfg->cbb, ins);
+                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg + 1, FALSE);
+                       } else if (!t->byref && ((t->type == MONO_TYPE_R8) || (t->type == MONO_TYPE_R4))) {
+#ifndef MONO_ARCH_SOFT_FLOAT
+                               int creg;
+#endif
+
+                               if (ainfo->size == 4) {
+#ifdef MONO_ARCH_SOFT_FLOAT
+                                       /* mono_emit_call_args () have already done the r8->r4 conversion */
+                                       /* The converted value is in an int vreg */
+                                       MONO_INST_NEW (cfg, ins, OP_MOVE);
+                                       ins->dreg = mono_alloc_ireg (cfg);
+                                       ins->sreg1 = in->dreg;
+                                       MONO_ADD_INS (cfg->cbb, ins);
+                                       mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
+#else
+                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, ARMREG_SP, (cfg->param_area - 8), in->dreg);
+                                       creg = mono_alloc_ireg (cfg);
+                                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 8));
+                                       mono_call_inst_add_outarg_reg (cfg, call, creg, ainfo->reg, FALSE);
+#endif
+                               } else {
+#ifdef MONO_ARCH_SOFT_FLOAT
+                                       MONO_INST_NEW (cfg, ins, OP_FGETLOW32);
+                                       ins->dreg = mono_alloc_ireg (cfg);
+                                       ins->sreg1 = in->dreg;
+                                       MONO_ADD_INS (cfg->cbb, ins);
+                                       mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
+
+                                       MONO_INST_NEW (cfg, ins, OP_FGETHIGH32);
+                                       ins->dreg = mono_alloc_ireg (cfg);
+                                       ins->sreg1 = in->dreg;
+                                       MONO_ADD_INS (cfg->cbb, ins);
+                                       mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg + 1, FALSE);
+#else
+                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, ARMREG_SP, (cfg->param_area - 8), in->dreg);
+                                       creg = mono_alloc_ireg (cfg);
+                                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 8));
+                                       mono_call_inst_add_outarg_reg (cfg, call, creg, ainfo->reg, FALSE);
+                                       creg = mono_alloc_ireg (cfg);
+                                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 8 + 4));
+                                       mono_call_inst_add_outarg_reg (cfg, call, creg, ainfo->reg + 1, FALSE);
+#endif
+                               }
+                               cfg->flags |= MONO_CFG_HAS_FPOUT;
+                       } else {
+                               MONO_INST_NEW (cfg, ins, OP_MOVE);
+                               ins->dreg = mono_alloc_ireg (cfg);
+                               ins->sreg1 = in->dreg;
+                               MONO_ADD_INS (cfg->cbb, ins);
+
+                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
+                       }
+                       break;
+               case RegTypeStructByAddr:
+                       NOT_IMPLEMENTED;
+#if 0
+                       /* FIXME: where si the data allocated? */
+                       arg->backend.reg3 = ainfo->reg;
+                       call->used_iregs |= 1 << ainfo->reg;
+                       g_assert_not_reached ();
+#endif
+                       break;
+               case RegTypeStructByVal:
+                       MONO_INST_NEW (cfg, ins, OP_OUTARG_VT);
+                       ins->opcode = OP_OUTARG_VT;
+                       ins->sreg1 = in->dreg;
+                       ins->klass = in->klass;
+                       ins->inst_p0 = call;
+                       ins->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
+                       memcpy (ins->inst_p1, ainfo, sizeof (ArgInfo));
+                       MONO_ADD_INS (cfg->cbb, ins);
+                       break;
+               case RegTypeBase:
+                       if (!t->byref && ((t->type == MONO_TYPE_I8) || (t->type == MONO_TYPE_U8))) {
+                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI8_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
+                       } else if (!t->byref && ((t->type == MONO_TYPE_R4) || (t->type == MONO_TYPE_R8))) {
+                               if (t->type == MONO_TYPE_R8) {
+                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
+                               } else {
+#ifdef MONO_ARCH_SOFT_FLOAT
+                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
+#else
+                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
+#endif
+                               }
+                       } else {
+                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
+                       }
+                       break;
+               case RegTypeBaseGen:
+                       if (!t->byref && ((t->type == MONO_TYPE_I8) || (t->type == MONO_TYPE_U8))) {
+                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, ainfo->offset, (G_BYTE_ORDER == G_BIG_ENDIAN) ? in->dreg + 1 : in->dreg + 2);
+                               MONO_INST_NEW (cfg, ins, OP_MOVE);
+                               ins->dreg = mono_alloc_ireg (cfg);
+                               ins->sreg1 = G_BYTE_ORDER == G_BIG_ENDIAN ? in->dreg + 2 : in->dreg + 1;
+                               MONO_ADD_INS (cfg->cbb, ins);
+                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ARMREG_R3, FALSE);
+                       } else if (!t->byref && (t->type == MONO_TYPE_R8)) {
+                               int creg;
+
+#ifdef MONO_ARCH_SOFT_FLOAT
+                               g_assert_not_reached ();
+#endif
+
+                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, ARMREG_SP, (cfg->param_area - 8), in->dreg);
+                               creg = mono_alloc_ireg (cfg);
+                               mono_call_inst_add_outarg_reg (cfg, call, creg, ARMREG_R3, FALSE);
+                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 8));
+                               creg = mono_alloc_ireg (cfg);
+                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 4));
+                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, ainfo->offset, creg);
+                               cfg->flags |= MONO_CFG_HAS_FPOUT;
+                       } else {
+                               g_assert_not_reached ();
+                       }
+                       break;
+               case RegTypeFP: {
+                       /* FIXME: */
+                       NOT_IMPLEMENTED;
+#if 0
+                       arg->backend.reg3 = ainfo->reg;
+                       /* FP args are passed in int regs */
+                       call->used_iregs |= 1 << ainfo->reg;
+                       if (ainfo->size == 8) {
+                               arg->opcode = OP_OUTARG_R8;
+                               call->used_iregs |= 1 << (ainfo->reg + 1);
+                       } else {
+                               arg->opcode = OP_OUTARG_R4;
+                       }
+#endif
+                       cfg->flags |= MONO_CFG_HAS_FPOUT;
+                       break;
+               }
+               default:
+                       g_assert_not_reached ();
+               }
+       }
+
+       if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
+               MonoInst *vtarg;
+
+               MONO_INST_NEW (cfg, vtarg, OP_MOVE);
+               vtarg->sreg1 = call->vret_var->dreg;
+               vtarg->dreg = mono_alloc_preg (cfg);
+               MONO_ADD_INS (cfg->cbb, vtarg);
+
+               mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
+       }
+
+       call->stack_usage = cinfo->stack_usage;
+
+       g_free (cinfo);
+}
+
+void
+mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
+{
+       MonoCallInst *call = (MonoCallInst*)ins->inst_p0;
+       ArgInfo *ainfo = ins->inst_p1;
+       int ovf_size = ainfo->vtsize;
+       int doffset = ainfo->offset;
+       int i, soffset, dreg;
+
+       soffset = 0;
+       for (i = 0; i < ainfo->size; ++i) {
+               dreg = mono_alloc_ireg (cfg);
+               MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, soffset);
+               mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg + i, FALSE);
+               soffset += sizeof (gpointer);
+       }
+       //g_print ("vt size: %d at R%d + %d\n", doffset, vt->inst_basereg, vt->inst_offset);
+       if (ovf_size != 0)
+               mini_emit_memcpy2 (cfg, ARMREG_SP, doffset, src->dreg, soffset, ovf_size * sizeof (gpointer), 0);
+}
+
+void
+mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
+{
+       MonoType *ret = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
+
+       if (!ret->byref) {
+               if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
+                       MonoInst *ins;
+
+                       MONO_INST_NEW (cfg, ins, OP_SETLRET);
+                       ins->sreg1 = val->dreg + 1;
+                       ins->sreg2 = val->dreg + 2;
+                       MONO_ADD_INS (cfg->cbb, ins);
+                       return;
+               }
+#ifdef MONO_ARCH_SOFT_FLOAT
+               if (ret->type == MONO_TYPE_R8) {
+                       MonoInst *ins;
+
+                       MONO_INST_NEW (cfg, ins, OP_SETFRET);
+                       ins->dreg = cfg->ret->dreg;
+                       ins->sreg1 = val->dreg;
+                       MONO_ADD_INS (cfg->cbb, ins);
+                       return;
+               }
+               if (ret->type == MONO_TYPE_R4) {
+                       /* Already converted to an int in method_to_ir () */
+                       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
+                       return;
+               }                       
+#endif
+       }
+
+       /* FIXME: */
+       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
+}
+
+gboolean 
+mono_arch_is_inst_imm (gint64 imm)
+{
+       return TRUE;
+}
+
 /*
  * Allow tracing to work with this interface (with an optional argument)
  */
@@ -958,7 +1475,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
        int save_mode = SAVE_NONE;
        int offset;
        MonoMethod *method = cfg->method;
-       int rtype = mono_type_get_underlying_type (mono_method_signature (method)->ret)->type;
+       int rtype = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret)->type;
        int save_offset = cfg->param_area;
        save_offset += 7;
        save_offset &= ~7;
@@ -1085,23 +1602,30 @@ if (ins->flags & MONO_INST_BRLABEL) { \
 
 #define EMIT_COND_SYSTEM_EXCEPTION(cond,exc_name) EMIT_COND_SYSTEM_EXCEPTION_FLAGS(branch_cc_table [(cond)], (exc_name))
 
-static void
-peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
+void
+mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
-       MonoInst *ins, *last_ins = NULL;
-       ins = bb->code;
+}
 
-       while (ins) {
+void
+mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
+{
+       MonoInst *ins, *n, *last_ins = NULL;
 
+       MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
                switch (ins->opcode) {
                case OP_MUL_IMM: 
+               case OP_IMUL_IMM: 
+                       /* Already done by an arch-independent pass */
+                       if (cfg->new_ir)
+                               break;
+
                        /* remove unnecessary multiplication with 1 */
                        if (ins->inst_imm == 1) {
                                if (ins->dreg != ins->sreg1) {
                                        ins->opcode = OP_MOVE;
                                } else {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       MONO_DELETE_INS (bb, ins);
                                        continue;
                                }
                        } else {
@@ -1123,8 +1647,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                            ins->inst_basereg == last_ins->inst_destbasereg &&
                            ins->inst_offset == last_ins->inst_offset) {
                                if (ins->dreg == last_ins->sreg1) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       MONO_DELETE_INS (bb, ins);
                                        continue;
                                } else {
                                        //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
@@ -1147,8 +1670,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                              ins->inst_offset == last_ins->inst_offset) {
 
                                if (ins->dreg == last_ins->dreg) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       MONO_DELETE_INS (bb, ins);
                                        continue;
                                } else {
                                        ins->opcode = OP_MOVE;
@@ -1181,7 +1703,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
                                        ins->inst_basereg == last_ins->inst_destbasereg &&
                                        ins->inst_offset == last_ins->inst_offset) {
-                               ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
+                               ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? OP_ICONV_TO_I1 : OP_ICONV_TO_U1;
                                ins->sreg1 = last_ins->sreg1;                           
                        }
                        break;
@@ -1190,22 +1712,17 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
                                        ins->inst_basereg == last_ins->inst_destbasereg &&
                                        ins->inst_offset == last_ins->inst_offset) {
-                               ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
+                               ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? OP_ICONV_TO_I2 : OP_ICONV_TO_U2;
                                ins->sreg1 = last_ins->sreg1;                           
                        }
                        break;
-               case CEE_CONV_I4:
-               case CEE_CONV_U4:
                case OP_MOVE:
-               case OP_SETREG:
                        ins->opcode = OP_MOVE;
                        /* 
                         * OP_MOVE reg, reg 
                         */
                        if (ins->dreg == ins->sreg1) {
-                               if (last_ins)
-                                       last_ins->next = ins->next;                             
-                               ins = ins->next;
+                               MONO_DELETE_INS (bb, ins);
                                continue;
                        }
                        /* 
@@ -1215,8 +1732,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (last_ins && last_ins->opcode == OP_MOVE &&
                            ins->sreg1 == last_ins->dreg &&
                            ins->dreg == last_ins->sreg1) {
-                               last_ins->next = ins->next;                             
-                               ins = ins->next;                                
+                               MONO_DELETE_INS (bb, ins);
                                continue;
                        }
                        break;
@@ -1256,24 +1772,9 @@ branch_cc_table [] = {
        ARMCOND_LO
 };
 
-
-static void
-insert_after_ins (MonoBasicBlock *bb, MonoInst *ins, MonoInst *to_insert)
-{
-       if (ins == NULL) {
-               ins = bb->code;
-               bb->code = to_insert;
-               to_insert->next = ins;
-       } else {
-               to_insert->next = ins->next;
-               ins->next = to_insert;
-       }
-}
-
 #define NEW_INS(cfg,dest,op) do {       \
-               (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
-               (dest)->opcode = (op);  \
-               insert_after_ins (bb, last_ins, (dest)); \
+               MONO_INST_NEW ((cfg), (dest), (op)); \
+        mono_bblock_insert_before_ins (bb, ins, (dest)); \
        } while (0)
 
 static int
@@ -1281,13 +1782,15 @@ map_to_reg_reg_op (int op)
 {
        switch (op) {
        case OP_ADD_IMM:
-               return CEE_ADD;
+               return OP_IADD;
        case OP_SUB_IMM:
-               return CEE_SUB;
+               return OP_ISUB;
        case OP_AND_IMM:
-               return CEE_AND;
+               return OP_IAND;
        case OP_COMPARE_IMM:
                return OP_COMPARE;
+       case OP_ICOMPARE_IMM:
+               return OP_ICOMPARE;
        case OP_ADDCC_IMM:
                return OP_ADDCC;
        case OP_ADC_IMM:
@@ -1297,9 +1800,9 @@ map_to_reg_reg_op (int op)
        case OP_SBB_IMM:
                return OP_SBB;
        case OP_OR_IMM:
-               return CEE_OR;
+               return OP_IOR;
        case OP_XOR_IMM:
-               return CEE_XOR;
+               return OP_IXOR;
        case OP_LOAD_MEMBASE:
                return OP_LOAD_MEMINDEX;
        case OP_LOADI4_MEMBASE:
@@ -1343,7 +1846,7 @@ map_to_reg_reg_op (int op)
  * represented with very simple instructions with no register
  * requirements.
  */
-static void
+void
 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 {
        MonoInst *ins, *temp, *last_ins = NULL;
@@ -1353,29 +1856,40 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
        if (bb->max_vreg > cfg->rs->next_vreg)
                cfg->rs->next_vreg = bb->max_vreg;
 
-       ins = bb->code;
-       while (ins) {
+       MONO_BB_FOR_EACH_INS (bb, ins) {
 loop_start:
                switch (ins->opcode) {
                case OP_ADD_IMM:
                case OP_SUB_IMM:
                case OP_AND_IMM:
                case OP_COMPARE_IMM:
+               case OP_ICOMPARE_IMM:
                case OP_ADDCC_IMM:
                case OP_ADC_IMM:
                case OP_SUBCC_IMM:
                case OP_SBB_IMM:
                case OP_OR_IMM:
                case OP_XOR_IMM:
+               case OP_IADD_IMM:
+               case OP_ISUB_IMM:
+               case OP_IAND_IMM:
+               case OP_IADC_IMM:
+               case OP_ISBB_IMM:
+               case OP_IOR_IMM:
+               case OP_IXOR_IMM:
                        if ((imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount)) < 0) {
                                NEW_INS (cfg, temp, OP_ICONST);
                                temp->inst_c0 = ins->inst_imm;
                                temp->dreg = mono_regstate_next_int (cfg->rs);
                                ins->sreg2 = temp->dreg;
-                               ins->opcode = map_to_reg_reg_op (ins->opcode);
+                               if (cfg->new_ir)
+                                       ins->opcode = mono_op_imm_to_op (ins->opcode);
+                               else
+                                       ins->opcode = map_to_reg_reg_op (ins->opcode);
                        }
                        break;
                case OP_MUL_IMM:
+               case OP_IMUL_IMM:
                        if (ins->inst_imm == 1) {
                                ins->opcode = OP_MOVE;
                                break;
@@ -1395,7 +1909,14 @@ loop_start:
                        temp->inst_c0 = ins->inst_imm;
                        temp->dreg = mono_regstate_next_int (cfg->rs);
                        ins->sreg2 = temp->dreg;
-                       ins->opcode = CEE_MUL;
+                       ins->opcode = OP_IMUL;
+                       break;
+               case OP_LOCALLOC_IMM:
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_imm;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg1 = temp->dreg;
+                       ins->opcode = OP_LOCALLOC;
                        break;
                case OP_LOAD_MEMBASE:
                case OP_LOADI4_MEMBASE:
@@ -1491,24 +2012,48 @@ loop_start:
                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                        last_ins = temp;
                        goto loop_start; /* make it handle the possibly big ins->inst_offset */
+               case OP_FCOMPARE: {
+                       gboolean swap = FALSE;
+                       int reg;
+
+                       /* Some fp compares require swapped operands */
+                       g_assert (ins->next);
+                       switch (ins->next->opcode) {
+                       case OP_FBGT:
+                               ins->next->opcode = OP_FBLT;
+                               swap = TRUE;
+                               break;
+                       case OP_FBGT_UN:
+                               ins->next->opcode = OP_FBLT_UN;
+                               swap = TRUE;
+                               break;
+                       case OP_FBLE:
+                               ins->next->opcode = OP_FBGE;
+                               swap = TRUE;
+                               break;
+                       case OP_FBLE_UN:
+                               ins->next->opcode = OP_FBGE_UN;
+                               swap = TRUE;
+                               break;
+                       default:
+                               break;
+                       }
+                       if (swap) {
+                               reg = ins->sreg1;
+                               ins->sreg1 = ins->sreg2;
+                               ins->sreg2 = reg;
+                       }
+                       break;
+               }
                }
+
                last_ins = ins;
-               ins = ins->next;
        }
        bb->last_ins = last_ins;
        bb->max_vreg = cfg->rs->next_vreg;
 
 }
 
-void
-mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
-{
-       if (!bb->code)
-               return;
-       mono_arch_lowering_pass (cfg, bb);
-       mono_local_regalloc (cfg, bb);
-}
-
 static guchar*
 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
 {
@@ -1583,6 +2128,9 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
                                return 1;
                        } else if ((thunks [0] == 0) && (thunks [1] == 0) && (thunks [2] == 0)) {
                                /* found a free slot instead: emit thunk */
+                               /* ARMREG_IP is fine to use since this can't be an IMT call
+                                * which is indirect
+                                */
                                code = (guchar*)thunks;
                                ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
                                if (thumb_supported)
@@ -1705,8 +2253,56 @@ arm_patch (guchar *code, const guchar *target)
         *      address constant // execution never reaches here
         */
        if ((ins & 0x0ffffff0) == 0x12fff10) {
-               /* branch and exchange: the address is constructed in a reg */
+               /* Branch and exchange: the address is constructed in a reg 
+                * We can patch BX when the code sequence is the following:
+                *  ldr     ip, [pc, #0]    ; 0x8
+                *  b       0xc
+                *  .word code_ptr
+                *  mov     lr, pc
+                *  bx      ips
+                * */
+               guint32 ccode [4];
+               guint8 *emit = (guint8*)ccode;
+               ARM_LDR_IMM (emit, ARMREG_IP, ARMREG_PC, 0);
+               ARM_B (emit, 0);
+               ARM_MOV_REG_REG (emit, ARMREG_LR, ARMREG_PC);
+               ARM_BX (emit, ARMREG_IP);
+
+               /*patching from magic trampoline*/
+               if (ins == ccode [3]) {
+                       g_assert (code32 [-4] == ccode [0]);
+                       g_assert (code32 [-3] == ccode [1]);
+                       g_assert (code32 [-1] == ccode [2]);
+                       code32 [-2] = (guint32)target;
+                       return;
+               }
+               /*patching from JIT*/
+               if (ins == ccode [0]) {
+                       g_assert (code32 [1] == ccode [1]);
+                       g_assert (code32 [3] == ccode [2]);
+                       g_assert (code32 [4] == ccode [3]);
+                       code32 [2] = (guint32)target;
+                       return;
+               }
                g_assert_not_reached ();
+       } else if ((ins & 0x0ffffff0) == 0x12fff30) {
+               /*
+                * ldr ip, [pc, #0]
+                * b 0xc
+                * .word code_ptr
+                * blx ip
+                */
+               guint32 ccode [4];
+               guint8 *emit = (guint8*)ccode;
+               ARM_LDR_IMM (emit, ARMREG_IP, ARMREG_PC, 0);
+               ARM_B (emit, 0);
+               ARM_BLX_REG (emit, ARMREG_IP);
+
+               g_assert (code32 [-3] == ccode [0]);
+               g_assert (code32 [-2] == ccode [1]);
+               g_assert (code32 [0] == ccode [2]);
+
+               code32 [-1] = (guint32)target;
        } else {
                guint32 ccode [4];
                guint32 *tmp = ccode;
@@ -1736,7 +2332,7 @@ arm_patch (guchar *code, const guchar *target)
  * to be used with the emit macros.
  * Return -1 otherwise.
  */
-static int
+int
 mono_arm_is_rotated_imm8 (guint32 val, gint *rot_amount)
 {
        guint32 res, i;
@@ -1801,21 +2397,140 @@ mono_arm_emit_load_imm (guint8 *code, int dreg, guint32 val)
        return code;
 }
 
-void
-mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
+/*
+ * emit_load_volatile_arguments:
+ *
+ *  Load volatile arguments from the stack to the original input registers.
+ * Required before a tail call.
+ */
+static guint8*
+emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
 {
-       MonoInst *ins;
-       MonoCallInst *call;
-       guint offset;
-       guint8 *code = cfg->native_code + cfg->code_len;
-       MonoInst *last_ins = NULL;
+       MonoMethod *method = cfg->method;
+       MonoMethodSignature *sig;
+       MonoInst *inst;
+       CallInfo *cinfo;
+       guint32 i, pos;
+
+       /* FIXME: Generate intermediate code instead */
+
+       sig = mono_method_signature (method);
+
+       /* This is the opposite of the code in emit_prolog */
+
+       pos = 0;
+
+       cinfo = calculate_sizes (sig, sig->pinvoke);
+
+       if (MONO_TYPE_ISSTRUCT (sig->ret)) {
+               ArgInfo *ainfo = &cinfo->ret;
+               inst = cfg->vret_addr;
+               g_assert (arm_is_imm12 (inst->inst_offset));
+               ARM_LDR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
+       }
+       for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
+               ArgInfo *ainfo = cinfo->args + i;
+               inst = cfg->args [pos];
+               
+               if (cfg->verbose_level > 2)
+                       g_print ("Loading argument %d (type: %d)\n", i, ainfo->regtype);
+               if (inst->opcode == OP_REGVAR) {
+                       if (ainfo->regtype == RegTypeGeneral)
+                               ARM_MOV_REG_REG (code, inst->dreg, ainfo->reg);
+                       else if (ainfo->regtype == RegTypeFP) {
+                               g_assert_not_reached ();
+                       } else if (ainfo->regtype == RegTypeBase) {
+                               // FIXME:
+                               NOT_IMPLEMENTED;
+                               /*
+                               if (arm_is_imm12 (prev_sp_offset + ainfo->offset)) {
+                                       ARM_LDR_IMM (code, inst->dreg, ARMREG_SP, (prev_sp_offset + ainfo->offset));
+                               } else {
+                                       code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
+                                       ARM_LDR_REG_REG (code, inst->dreg, ARMREG_SP, ARMREG_IP);
+                               }
+                               */
+                       } else
+                               g_assert_not_reached ();
+               } else {
+                       if (ainfo->regtype == RegTypeGeneral) {
+                               switch (ainfo->size) {
+                               case 1:
+                               case 2:
+                                       // FIXME:
+                                       NOT_IMPLEMENTED;
+                                       break;
+                               case 8:
+                                       g_assert (arm_is_imm12 (inst->inst_offset));
+                                       ARM_LDR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
+                                       g_assert (arm_is_imm12 (inst->inst_offset + 4));
+                                       ARM_LDR_IMM (code, ainfo->reg + 1, inst->inst_basereg, inst->inst_offset + 4);
+                                       break;
+                               default:
+                                       if (arm_is_imm12 (inst->inst_offset)) {
+                                               ARM_LDR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
+                                               ARM_LDR_REG_REG (code, ainfo->reg, inst->inst_basereg, ARMREG_IP);
+                                       }
+                                       break;
+                               }
+                       } else if (ainfo->regtype == RegTypeBaseGen) {
+                               // FIXME:
+                               NOT_IMPLEMENTED;
+                       } else if (ainfo->regtype == RegTypeBase) {
+                               // FIXME:
+                               NOT_IMPLEMENTED;
+                       } else if (ainfo->regtype == RegTypeFP) {
+                               g_assert_not_reached ();
+                       } else if (ainfo->regtype == RegTypeStructByVal) {
+                               int doffset = inst->inst_offset;
+                               int soffset = 0;
+                               int cur_reg;
+                               int size = 0;
+                               if (mono_class_from_mono_type (inst->inst_vtype))
+                                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL);
+                               for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
+                                       if (arm_is_imm12 (doffset)) {
+                                               ARM_LDR_IMM (code, ainfo->reg + cur_reg, inst->inst_basereg, doffset);
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, doffset);
+                                               ARM_LDR_REG_REG (code, ainfo->reg + cur_reg, inst->inst_basereg, ARMREG_IP);
+                                       }
+                                       soffset += sizeof (gpointer);
+                                       doffset += sizeof (gpointer);
+                               }
+                               if (ainfo->vtsize)
+                                       // FIXME:
+                                       NOT_IMPLEMENTED;
+                       } else if (ainfo->regtype == RegTypeStructByAddr) {
+                       } else {
+                               // FIXME:
+                               NOT_IMPLEMENTED;
+                       }
+               }
+               pos ++;
+       }
+
+       g_free (cinfo);
+
+       return code;
+}
+
+#ifndef DISABLE_JIT
+
+void
+mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
+{
+       MonoInst *ins;
+       MonoCallInst *call;
+       guint offset;
+       guint8 *code = cfg->native_code + cfg->code_len;
+       MonoInst *last_ins = NULL;
        guint last_offset = 0;
        int max_len, cpos;
        int imm8, rot_amount;
 
-       if (cfg->opt & MONO_OPT_PEEPHOLE)
-               peephole_pass (cfg, bb);
-
        /* we don't align basic blocks of loops on arm */
 
        if (cfg->verbose_level > 2)
@@ -1834,8 +2549,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                //x86_inc_mem (code, &cov->data [bb->dfn].count); 
        }
 
-       ins = bb->code;
-       while (ins) {
+    if (mono_break_at_bb_method && mono_method_desc_full_match (mono_break_at_bb_method, cfg->method) && bb->block_num == mono_break_at_bb_bb_num) {
+               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+                                                        (gpointer)"mono_break");
+               code = emit_call_seq (cfg, code);
+       }
+
+       MONO_BB_FOR_EACH_INS (bb, ins) {
                offset = code - cfg->native_code;
 
                max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
@@ -1901,18 +2621,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_STRB_REG_REG (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case OP_STOREI2_MEMINDEX:
-                       /* note: the args are reversed in the macro */
-                       ARM_STRH_REG_REG (code, ins->inst_destbasereg, ins->sreg1, ins->sreg2);
+                       ARM_STRH_REG_REG (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case OP_STORE_MEMINDEX:
                case OP_STOREI4_MEMINDEX:
                        ARM_STR_REG_REG (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
-               case CEE_LDIND_I:
-               case CEE_LDIND_I4:
-               case CEE_LDIND_U4:
-                       g_assert_not_reached ();
-                       break;
                case OP_LOADU4_MEM:
                        g_assert_not_reached ();
                        break;
@@ -1922,19 +2636,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_LDR_REG_REG (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADI1_MEMINDEX:
-                       /* note: the args are reversed in the macro */
-                       ARM_LDRSB_REG_REG (code, ins->inst_basereg, ins->dreg, ins->sreg2);
+                       ARM_LDRSB_REG_REG (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADU1_MEMINDEX:
                        ARM_LDRB_REG_REG (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADI2_MEMINDEX:
-                       /* note: the args are reversed in the macro */
-                       ARM_LDRSH_REG_REG (code, ins->inst_basereg, ins->dreg, ins->sreg2);
+                       ARM_LDRSH_REG_REG (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADU2_MEMINDEX:
-                       /* note: the args are reversed in the macro */
-                       ARM_LDRH_REG_REG (code, ins->inst_basereg, ins->dreg, ins->sreg2);
+                       ARM_LDRH_REG_REG (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOAD_MEMBASE:
                case OP_LOADI4_MEMBASE:
@@ -1963,42 +2674,59 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert (arm_is_imm8 (ins->inst_offset));
                        ARM_LDRSH_IMM (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
                        break;
-               case CEE_CONV_I1:
+               case OP_ICONV_TO_I1:
                        ARM_SHL_IMM (code, ins->dreg, ins->sreg1, 24);
                        ARM_SAR_IMM (code, ins->dreg, ins->dreg, 24);
                        break;
-               case CEE_CONV_I2:
+               case OP_ICONV_TO_I2:
                        ARM_SHL_IMM (code, ins->dreg, ins->sreg1, 16);
                        ARM_SAR_IMM (code, ins->dreg, ins->dreg, 16);
                        break;
-               case CEE_CONV_U1:
+               case OP_ICONV_TO_U1:
                        ARM_AND_REG_IMM8 (code, ins->dreg, ins->sreg1, 0xff);
                        break;
-               case CEE_CONV_U2:
+               case OP_ICONV_TO_U2:
                        ARM_SHL_IMM (code, ins->dreg, ins->sreg1, 16);
                        ARM_SHR_IMM (code, ins->dreg, ins->dreg, 16);
                        break;
                case OP_COMPARE:
+               case OP_ICOMPARE:
                        ARM_CMP_REG_REG (code, ins->sreg1, ins->sreg2);
                        break;
                case OP_COMPARE_IMM:
+               case OP_ICOMPARE_IMM:
                        imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount);
                        g_assert (imm8 >= 0);
                        ARM_CMP_REG_IMM (code, ins->sreg1, imm8, rot_amount);
                        break;
                case OP_BREAK:
-                       *(int*)code = 0xe7f001f0;
-                       *(int*)code = 0xef9f0001;
-                       code += 4;
+                       /*
+                        * gdb does not like encountering the hw breakpoint ins in the debugged code. 
+                        * So instead of emitting a trap, we emit a call a C function and place a 
+                        * breakpoint there.
+                        */
+                       //*(int*)code = 0xef9f0001;
+                       //code += 4;
                        //ARM_DBRK (code);
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+                                                                (gpointer)"mono_break");
+                       code = emit_call_seq (cfg, code);
+                       break;
+               case OP_NOP:
+               case OP_DUMMY_USE:
+               case OP_DUMMY_STORE:
+               case OP_NOT_REACHED:
+               case OP_NOT_NULL:
                        break;
                case OP_ADDCC:
+               case OP_IADDCC:
                        ARM_ADDS_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
-               case CEE_ADD:
+               case OP_IADD:
                        ARM_ADD_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_ADC:
+               case OP_IADC:
                        ARM_ADCS_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_ADDCC_IMM:
@@ -2007,28 +2735,30 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_ADDS_REG_IMM (code, ins->dreg, ins->sreg1, imm8, rot_amount);
                        break;
                case OP_ADD_IMM:
+               case OP_IADD_IMM:
                        imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount);
                        g_assert (imm8 >= 0);
                        ARM_ADD_REG_IMM (code, ins->dreg, ins->sreg1, imm8, rot_amount);
                        break;
                case OP_ADC_IMM:
+               case OP_IADC_IMM:
                        imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount);
                        g_assert (imm8 >= 0);
                        ARM_ADCS_REG_IMM (code, ins->dreg, ins->sreg1, imm8, rot_amount);
                        break;
-               case CEE_ADD_OVF:
+               case OP_IADD_OVF:
                        ARM_ADD_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        //EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
-               case CEE_ADD_OVF_UN:
+               case OP_IADD_OVF_UN:
                        ARM_ADD_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        //EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
-               case CEE_SUB_OVF:
+               case OP_ISUB_OVF:
                        ARM_SUB_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        //EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
-               case CEE_SUB_OVF_UN:
+               case OP_ISUB_OVF_UN:
                        ARM_SUB_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        //EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "OverflowException");
                        break;
@@ -2049,6 +2779,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        //EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "OverflowException");
                        break;
                case OP_SUBCC:
+               case OP_ISUBCC:
                        ARM_SUBS_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_SUBCC_IMM:
@@ -2056,18 +2787,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert (imm8 >= 0);
                        ARM_SUBS_REG_IMM (code, ins->dreg, ins->sreg1, imm8, rot_amount);
                        break;
-               case CEE_SUB:
+               case OP_ISUB:
                        ARM_SUB_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_SBB:
+               case OP_ISBB:
                        ARM_SBCS_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_SUB_IMM:
+               case OP_ISUB_IMM:
                        imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount);
                        g_assert (imm8 >= 0);
                        ARM_SUB_REG_IMM (code, ins->dreg, ins->sreg1, imm8, rot_amount);
                        break;
                case OP_SBB_IMM:
+               case OP_ISBB_IMM:
                        imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount);
                        g_assert (imm8 >= 0);
                        ARM_SBCS_REG_IMM (code, ins->dreg, ins->sreg1, imm8, rot_amount);
@@ -2082,66 +2816,78 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert (imm8 >= 0);
                        ARM_RSC_REG_IMM (code, ins->dreg, ins->sreg1, imm8, rot_amount);
                        break;
-               case CEE_AND:
+               case OP_IAND:
                        ARM_AND_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_AND_IMM:
+               case OP_IAND_IMM:
                        imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount);
                        g_assert (imm8 >= 0);
                        ARM_AND_REG_IMM (code, ins->dreg, ins->sreg1, imm8, rot_amount);
                        break;
-               case CEE_DIV:
-               case CEE_DIV_UN:
+               case OP_IDIV:
+               case OP_IDIV_UN:
                case OP_DIV_IMM:
-               case CEE_REM:
-               case CEE_REM_UN:
+               case OP_IREM:
+               case OP_IREM_UN:
                case OP_REM_IMM:
                        /* crappy ARM arch doesn't have a DIV instruction */
                        g_assert_not_reached ();
-               case CEE_OR:
+               case OP_IOR:
                        ARM_ORR_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_OR_IMM:
+               case OP_IOR_IMM:
                        imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount);
                        g_assert (imm8 >= 0);
                        ARM_ORR_REG_IMM (code, ins->dreg, ins->sreg1, imm8, rot_amount);
                        break;
-               case CEE_XOR:
+               case OP_IXOR:
                        ARM_EOR_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_XOR_IMM:
+               case OP_IXOR_IMM:
                        imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount);
                        g_assert (imm8 >= 0);
                        ARM_EOR_REG_IMM (code, ins->dreg, ins->sreg1, imm8, rot_amount);
                        break;
-               case CEE_SHL:
+               case OP_ISHL:
                        ARM_SHL_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_SHL_IMM:
+               case OP_ISHL_IMM:
                        if (ins->inst_imm)
                                ARM_SHL_IMM (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       else if (ins->dreg != ins->sreg1)
+                               ARM_MOV_REG_REG (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_SHR:
+               case OP_ISHR:
                        ARM_SAR_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_SHR_IMM:
+               case OP_ISHR_IMM:
                        if (ins->inst_imm)
                                ARM_SAR_IMM (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       else if (ins->dreg != ins->sreg1)
+                               ARM_MOV_REG_REG (code, ins->dreg, ins->sreg1);
                        break;
                case OP_SHR_UN_IMM:
+               case OP_ISHR_UN_IMM:
                        if (ins->inst_imm)
                                ARM_SHR_IMM (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       else if (ins->dreg != ins->sreg1)
+                               ARM_MOV_REG_REG (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_SHR_UN:
+               case OP_ISHR_UN:
                        ARM_SHR_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
-               case CEE_NOT:
+               case OP_INOT:
                        ARM_MVN_REG_REG (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_NEG:
+               case OP_INEG:
                        ARM_RSB_REG_IMM8 (code, ins->dreg, ins->sreg1, 0);
                        break;
-               case CEE_MUL:
+               case OP_IMUL:
                        if (ins->dreg == ins->sreg2)
                                ARM_MUL_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        else
@@ -2150,26 +2896,30 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_MUL_IMM:
                        g_assert_not_reached ();
                        break;
-               case CEE_MUL_OVF:
+               case OP_IMUL_OVF:
                        /* FIXME: handle ovf/ sreg2 != dreg */
                        ARM_MUL_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
-               case CEE_MUL_OVF_UN:
+               case OP_IMUL_OVF_UN:
                        /* FIXME: handle ovf/ sreg2 != dreg */
                        ARM_MUL_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_ICONST:
-               case OP_SETREGIMM:
                        code = mono_arm_emit_load_imm (code, ins->dreg, ins->inst_c0);
                        break;
                case OP_AOTCONST:
-                       g_assert_not_reached ();
+                       /* Load the GOT offset */
                        mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
+                       ARM_LDR_IMM (code, ins->dreg, ARMREG_PC, 0);
+                       ARM_B (code, 0);
+                       *(gpointer*)code = NULL;
+                       code += 4;
+                       /* Load the value from the GOT */
+                       ARM_LDR_REG_REG (code, ins->dreg, ARMREG_PC, ins->dreg);
                        break;
-               case CEE_CONV_I4:
-               case CEE_CONV_U4:
+               case OP_ICONV_TO_I4:
+               case OP_ICONV_TO_U4:
                case OP_MOVE:
-               case OP_SETREG:
                        if (ins->dreg != ins->sreg1)
                                ARM_MOV_REG_REG (code, ins->dreg, ins->sreg1);
                        break;
@@ -2185,7 +2935,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                ARM_MOV_REG_REG (code, ARM_MSW_REG, saved);
                        break;
                }
-               case OP_SETFREG:
                case OP_FMOVE:
 #ifdef ARM_FPU_FPA
                        ARM_MVFD (code, ins->dreg, ins->sreg1);
@@ -2206,6 +2955,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         * Keep in sync with mono_arch_emit_epilog
                         */
                        g_assert (!cfg->method->save_lmf);
+
+                       code = emit_load_volatile_arguments (cfg, code);
+
                        code = emit_big_add (code, ARMREG_SP, cfg->frame_reg, cfg->stack_usage);
                        ARM_POP_NWB (code, cfg->used_int_regs | ((1 << ARMREG_SP)) | ((1 << ARMREG_LR)));
                        mono_add_patch_info (cfg, (guint8*) code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
@@ -2230,39 +2982,55 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FCALL:
                case OP_LCALL:
                case OP_VCALL:
+               case OP_VCALL2:
                case OP_VOIDCALL:
-               case CEE_CALL:
+               case OP_CALL:
                        call = (MonoCallInst*)ins;
                        if (ins->flags & MONO_INST_HAS_METHOD)
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method);
                        else
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
-                       if (cfg->method->dynamic) {
-                               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
-                               ARM_B (code, 0);
-                               *(gpointer*)code = NULL;
-                               code += 4;
-                               code = emit_call_reg (code, ARMREG_IP);
-                       } else {
-                               ARM_BL (code, 0);
-                       }
+                       code = emit_call_seq (cfg, code);
+                       code = emit_move_return_value (cfg, ins, code);
                        break;
                case OP_FCALL_REG:
                case OP_LCALL_REG:
                case OP_VCALL_REG:
+               case OP_VCALL2_REG:
                case OP_VOIDCALL_REG:
                case OP_CALL_REG:
                        code = emit_call_reg (code, ins->sreg1);
+                       code = emit_move_return_value (cfg, ins, code);
                        break;
                case OP_FCALL_MEMBASE:
                case OP_LCALL_MEMBASE:
                case OP_VCALL_MEMBASE:
+               case OP_VCALL2_MEMBASE:
                case OP_VOIDCALL_MEMBASE:
                case OP_CALL_MEMBASE:
                        g_assert (arm_is_imm12 (ins->inst_offset));
                        g_assert (ins->sreg1 != ARMREG_LR);
-                       ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
-                       ARM_LDR_IMM (code, ARMREG_PC, ins->sreg1, ins->inst_offset);
+                       call = (MonoCallInst*)ins;
+                       if (call->method->klass->flags & TYPE_ATTRIBUTE_INTERFACE) {
+                               ARM_ADD_REG_IMM8 (code, ARMREG_LR, ARMREG_PC, 4);
+                               ARM_LDR_IMM (code, ARMREG_PC, ins->sreg1, ins->inst_offset);
+                               /* 
+                                * We can't embed the method in the code stream in PIC code, or
+                                * in gshared code.
+                                * Instead, we put it in V5 in code emitted by 
+                                * mono_arch_emit_imt_argument (), and embed NULL here to 
+                                * signal the IMT thunk that the value is in V5.
+                                */
+                               if (call->dynamic_imt_arg)
+                                       *((gpointer*)code) = NULL;
+                               else
+                                       *((gpointer*)code) = (gpointer)call->method;
+                               code += 4;
+                       } else {
+                               ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
+                               ARM_LDR_IMM (code, ARMREG_PC, ins->sreg1, ins->inst_offset);
+                       }
+                       code = emit_move_return_value (cfg, ins, code);
                        break;
                case OP_OUTARG:
                        g_assert_not_reached ();
@@ -2274,8 +3042,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        alloca_waste &= ~7;
                        /* round the size to 8 bytes */
                        ARM_ADD_REG_IMM8 (code, ins->dreg, ins->sreg1, 7);
-                       ARM_BIC_REG_IMM8 (code, ins->dreg, ins->sreg1, 7);
-                       ARM_ADD_REG_IMM8 (code, ins->dreg, ins->dreg, alloca_waste);
+                       ARM_BIC_REG_IMM8 (code, ins->dreg, ins->dreg, 7);
+                       if (alloca_waste)
+                               ARM_ADD_REG_IMM8 (code, ins->dreg, ins->dreg, alloca_waste);
                        ARM_SUB_REG_REG (code, ARMREG_SP, ARMREG_SP, ins->dreg);
                        /* memzero the area: dreg holds the size, sp is the pointer */
                        if (ins->flags & MONO_INST_INIT) {
@@ -2288,30 +3057,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                arm_patch (branch_to_cond, code);
                                /* decrement by 4 and set flags */
                                ARM_SUBS_REG_IMM8 (code, ins->dreg, ins->dreg, 4);
-                               ARM_B_COND (code, ARMCOND_LT, 0);
+                               ARM_B_COND (code, ARMCOND_GE, 0);
                                arm_patch (code - 4, start_loop);
                        }
                        ARM_ADD_REG_IMM8 (code, ins->dreg, ARMREG_SP, alloca_waste);
                        break;
                }
-               case CEE_RET:
-                       g_assert_not_reached ();
-                       ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_LR);
-                       break;
                case OP_THROW: {
                        if (ins->sreg1 != ARMREG_R0)
                                ARM_MOV_REG_REG (code, ARMREG_R0, ins->sreg1);
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                             (gpointer)"mono_arch_throw_exception");
-                       if (cfg->method->dynamic) {
-                               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
-                               ARM_B (code, 0);
-                               *(gpointer*)code = NULL;
-                               code += 4;
-                               code = emit_call_reg (code, ARMREG_IP);
-                       } else {
-                               ARM_BL (code, 0);
-                       }
+                       code = emit_call_seq (cfg, code);
                        break;
                }
                case OP_RETHROW: {
@@ -2319,47 +3076,48 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                ARM_MOV_REG_REG (code, ARMREG_R0, ins->sreg1);
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                             (gpointer)"mono_arch_rethrow_exception");
-                       if (cfg->method->dynamic) {
-                               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
-                               ARM_B (code, 0);
-                               *(gpointer*)code = NULL;
-                               code += 4;
-                               code = emit_call_reg (code, ARMREG_IP);
-                       } else {
-                               ARM_BL (code, 0);
-                       }
+                       code = emit_call_seq (cfg, code);
                        break;
                }
-               case OP_START_HANDLER:
-                       if (arm_is_imm12 (ins->inst_left->inst_offset)) {
-                               ARM_STR_IMM (code, ARMREG_LR, ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
+               case OP_START_HANDLER: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+
+                       if (arm_is_imm12 (spvar->inst_offset)) {
+                               ARM_STR_IMM (code, ARMREG_LR, spvar->inst_basereg, spvar->inst_offset);
                        } else {
-                               code = mono_arm_emit_load_imm (code, ARMREG_IP, ins->inst_left->inst_offset);
-                               ARM_STR_REG_REG (code, ARMREG_LR, ins->inst_left->inst_basereg, ARMREG_IP);
+                               code = mono_arm_emit_load_imm (code, ARMREG_IP, spvar->inst_offset);
+                               ARM_STR_REG_REG (code, ARMREG_LR, spvar->inst_basereg, ARMREG_IP);
                        }
                        break;
-               case OP_ENDFILTER:
+               }
+               case OP_ENDFILTER: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+
                        if (ins->sreg1 != ARMREG_R0)
                                ARM_MOV_REG_REG (code, ARMREG_R0, ins->sreg1);
-                       if (arm_is_imm12 (ins->inst_left->inst_offset)) {
-                               ARM_LDR_IMM (code, ARMREG_IP, ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
+                       if (arm_is_imm12 (spvar->inst_offset)) {
+                               ARM_LDR_IMM (code, ARMREG_IP, spvar->inst_basereg, spvar->inst_offset);
                        } else {
-                               g_assert (ARMREG_IP != ins->inst_left->inst_basereg);
-                               code = mono_arm_emit_load_imm (code, ARMREG_IP, ins->inst_left->inst_offset);
-                               ARM_LDR_REG_REG (code, ARMREG_IP, ins->inst_left->inst_basereg, ARMREG_IP);
+                               g_assert (ARMREG_IP != spvar->inst_basereg);
+                               code = mono_arm_emit_load_imm (code, ARMREG_IP, spvar->inst_offset);
+                               ARM_LDR_REG_REG (code, ARMREG_IP, spvar->inst_basereg, ARMREG_IP);
                        }
                        ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_IP);
                        break;
-               case OP_ENDFINALLY:
-                       if (arm_is_imm12 (ins->inst_left->inst_offset)) {
-                               ARM_LDR_IMM (code, ARMREG_IP, ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
+               }
+               case OP_ENDFINALLY: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+
+                       if (arm_is_imm12 (spvar->inst_offset)) {
+                               ARM_LDR_IMM (code, ARMREG_IP, spvar->inst_basereg, spvar->inst_offset);
                        } else {
-                               g_assert (ARMREG_IP != ins->inst_left->inst_basereg);
-                               code = mono_arm_emit_load_imm (code, ARMREG_IP, ins->inst_left->inst_offset);
-                               ARM_LDR_REG_REG (code, ARMREG_IP, ins->inst_left->inst_basereg, ARMREG_IP);
+                               g_assert (ARMREG_IP != spvar->inst_basereg);
+                               code = mono_arm_emit_load_imm (code, ARMREG_IP, spvar->inst_offset);
+                               ARM_LDR_REG_REG (code, ARMREG_IP, spvar->inst_basereg, ARMREG_IP);
                        }
                        ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_IP);
                        break;
+               }
                case OP_CALL_HANDLER: 
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
                        ARM_BL (code, 0);
@@ -2389,7 +3147,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_BR_REG:
                        ARM_MOV_REG_REG (code, ARMREG_PC, ins->sreg1);
                        break;
-               case CEE_SWITCH:
+               case OP_SWITCH:
                        /* 
                         * In the normal case we have:
                         *      ldr pc, [pc, ins->sreg1 << 2]
@@ -2400,6 +3158,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         * After follows the data.
                         * FIXME: add aot support.
                         */
+                       if (cfg->new_ir)
+                               mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_SWITCH, ins->inst_p0);
                        max_len += 4 * GPOINTER_TO_INT (ins->klass);
                        if (offset > (cfg->code_size - max_len - 16)) {
                                cfg->code_size += max_len;
@@ -2412,22 +3172,27 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        code += 4 * GPOINTER_TO_INT (ins->klass);
                        break;
                case OP_CEQ:
+               case OP_ICEQ:
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 0, ARMCOND_NE);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_EQ);
                        break;
                case OP_CLT:
+               case OP_ICLT:
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_LT);
                        break;
                case OP_CLT_UN:
+               case OP_ICLT_UN:
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_LO);
                        break;
                case OP_CGT:
+               case OP_ICGT:
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_GT);
                        break;
                case OP_CGT_UN:
+               case OP_ICGT_UN:
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_HI);
                        break;
@@ -2443,45 +3208,87 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_COND_EXC_LE_UN:
                        EMIT_COND_SYSTEM_EXCEPTION (ins->opcode - OP_COND_EXC_EQ, ins->inst_p1);
                        break;
+               case OP_COND_EXC_IEQ:
+               case OP_COND_EXC_INE_UN:
+               case OP_COND_EXC_ILT:
+               case OP_COND_EXC_ILT_UN:
+               case OP_COND_EXC_IGT:
+               case OP_COND_EXC_IGT_UN:
+               case OP_COND_EXC_IGE:
+               case OP_COND_EXC_IGE_UN:
+               case OP_COND_EXC_ILE:
+               case OP_COND_EXC_ILE_UN:
+                       EMIT_COND_SYSTEM_EXCEPTION (ins->opcode - OP_COND_EXC_IEQ, ins->inst_p1);
+                       break;
                case OP_COND_EXC_C:
                case OP_COND_EXC_OV:
                case OP_COND_EXC_NC:
                case OP_COND_EXC_NO:
-                       g_assert_not_reached ();
-                       break;
-               case CEE_BEQ:
-               case CEE_BNE_UN:
-               case CEE_BLT:
-               case CEE_BLT_UN:
-               case CEE_BGT:
-               case CEE_BGT_UN:
-               case CEE_BGE:
-               case CEE_BGE_UN:
-               case CEE_BLE:
-               case CEE_BLE_UN:
-                       EMIT_COND_BRANCH (ins, ins->opcode - CEE_BEQ);
+               case OP_COND_EXC_IC:
+               case OP_COND_EXC_IOV:
+               case OP_COND_EXC_INC:
+               case OP_COND_EXC_INO:
+                       /* FIXME: */
+                       break;
+               case OP_IBEQ:
+               case OP_IBNE_UN:
+               case OP_IBLT:
+               case OP_IBLT_UN:
+               case OP_IBGT:
+               case OP_IBGT_UN:
+               case OP_IBGE:
+               case OP_IBGE_UN:
+               case OP_IBLE:
+               case OP_IBLE_UN:
+                       EMIT_COND_BRANCH (ins, ins->opcode - OP_IBEQ);
                        break;
 
                /* floating point opcodes */
 #ifdef ARM_FPU_FPA
                case OP_R8CONST:
-                       /* FIXME: we can optimize the imm load by dealing with part of 
-                        * the displacement in LDFD (aligning to 512).
-                        */
-                       code = mono_arm_emit_load_imm (code, ARMREG_LR, (guint32)ins->inst_p0);
-                       ARM_LDFD (code, ins->dreg, ARMREG_LR, 0);
+                       if (cfg->compile_aot) {
+                               ARM_LDFD (code, ins->dreg, ARMREG_PC, 0);
+                               ARM_B (code, 1);
+                               *(guint32*)code = ((guint32*)(ins->inst_p0))[0];
+                               code += 4;
+                               *(guint32*)code = ((guint32*)(ins->inst_p0))[1];
+                               code += 4;
+                       } else {
+                               /* FIXME: we can optimize the imm load by dealing with part of 
+                                * the displacement in LDFD (aligning to 512).
+                                */
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, (guint32)ins->inst_p0);
+                               ARM_LDFD (code, ins->dreg, ARMREG_LR, 0);
+                       }
                        break;
                case OP_R4CONST:
-                       code = mono_arm_emit_load_imm (code, ARMREG_LR, (guint32)ins->inst_p0);
-                       ARM_LDFS (code, ins->dreg, ARMREG_LR, 0);
+                       if (cfg->compile_aot) {
+                               ARM_LDFS (code, ins->dreg, ARMREG_PC, 0);
+                               ARM_B (code, 0);
+                               *(guint32*)code = ((guint32*)(ins->inst_p0))[0];
+                               code += 4;
+                       } else {
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, (guint32)ins->inst_p0);
+                               ARM_LDFS (code, ins->dreg, ARMREG_LR, 0);
+                       }
                        break;
                case OP_STORER8_MEMBASE_REG:
-                       g_assert (arm_is_fpimm8 (ins->inst_offset));
-                       ARM_STFD (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
+                       /* This is generated by the local regalloc pass which runs after the lowering pass */
+                       if (!arm_is_fpimm8 (ins->inst_offset)) {
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                               ARM_STFD (code, ins->sreg1, ARMREG_LR, 0);
+                       } else {
+                               ARM_STFD (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
+                       }
                        break;
                case OP_LOADR8_MEMBASE:
-                       g_assert (arm_is_fpimm8 (ins->inst_offset));
-                       ARM_LDFD (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                       /* This is generated by the local regalloc pass which runs after the lowering pass */
+                       if (!arm_is_fpimm8 (ins->inst_offset)) {
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                               ARM_LDFD (code, ins->dreg, ARMREG_LR, 0);
+                       } else {
+                               ARM_LDFD (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                       }
                        break;
                case OP_STORER4_MEMBASE_REG:
                        g_assert (arm_is_fpimm8 (ins->inst_offset));
@@ -2491,7 +3298,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert (arm_is_fpimm8 (ins->inst_offset));
                        ARM_LDFS (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
                        break;
-               case CEE_CONV_R_UN: {
+               case OP_ICONV_TO_R_UN: {
                        int tmpreg;
                        tmpreg = ins->dreg == 0? 1: 0;
                        ARM_CMP_REG_IMM8 (code, ins->sreg1, 0);
@@ -2517,24 +3324,41 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         */
                        break;
                }
-               case CEE_CONV_R4:
+               case OP_ICONV_TO_R4:
                        ARM_FLTS (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_CONV_R8:
+               case OP_ICONV_TO_R8:
                        ARM_FLTD (code, ins->dreg, ins->sreg1);
                        break;
 #elif defined(ARM_FPU_VFP)
                case OP_R8CONST:
-                       /* FIXME: we can optimize the imm load by dealing with part of 
-                        * the displacement in LDFD (aligning to 512).
-                        */
-                       code = mono_arm_emit_load_imm (code, ARMREG_LR, (guint32)ins->inst_p0);
-                       ARM_FLDD (code, ins->dreg, ARMREG_LR, 0);
+                       if (cfg->compile_aot) {
+                               ARM_FLDD (code, ins->dreg, ARMREG_PC, 0);
+                               ARM_B (code, 1);
+                               *(guint32*)code = ((guint32*)(ins->inst_p0))[0];
+                               code += 4;
+                               *(guint32*)code = ((guint32*)(ins->inst_p0))[1];
+                               code += 4;
+                       } else {
+                               /* FIXME: we can optimize the imm load by dealing with part of 
+                                * the displacement in LDFD (aligning to 512).
+                                */
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, (guint32)ins->inst_p0);
+                               ARM_FLDD (code, ins->dreg, ARMREG_LR, 0);
+                       }
                        break;
                case OP_R4CONST:
-                       code = mono_arm_emit_load_imm (code, ARMREG_LR, (guint32)ins->inst_p0);
-                       ARM_FLDS (code, ins->dreg, ARMREG_LR, 0);
-                       ARM_CVTS (code, ins->dreg, ins->dreg);
+                       if (cfg->compile_aot) {
+                               ARM_FLDS (code, ins->dreg, ARMREG_PC, 0);
+                               ARM_B (code, 0);
+                               *(guint32*)code = ((guint32*)(ins->inst_p0))[0];
+                               code += 4;
+                               ARM_CVTS (code, ins->dreg, ins->dreg);
+                       } else {
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, (guint32)ins->inst_p0);
+                               ARM_FLDS (code, ins->dreg, ARMREG_LR, 0);
+                               ARM_CVTS (code, ins->dreg, ins->dreg);
+                       }
                        break;
                case OP_STORER8_MEMBASE_REG:
                        g_assert (arm_is_fpimm8 (ins->inst_offset));
@@ -2552,15 +3376,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert (arm_is_fpimm8 (ins->inst_offset));
                        ARM_FLDS (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
                        break;
-               case CEE_CONV_R_UN: {
+               case OP_ICONV_TO_R_UN: {
                        g_assert_not_reached ();
                        break;
                }
-               case CEE_CONV_R4:
+               case OP_ICONV_TO_R4:
                        g_assert_not_reached ();
                        //ARM_FLTS (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_CONV_R8:
+               case OP_ICONV_TO_R8:
                        g_assert_not_reached ();
                        //ARM_FLTD (code, ins->dreg, ins->sreg1);
                        break;
@@ -2594,31 +3418,35 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert_not_reached ();
                        /* Implemented as helper calls */
                        break;
-               case OP_LCONV_TO_OVF_I: {
-#if ARM_PORT
-                       guint32 *negative_branch, *msword_positive_branch, *msword_negative_branch, *ovf_ex_target;
-                       // Check if its negative
-                       ppc_cmpi (code, 0, 0, ins->sreg1, 0);
-                       negative_branch = code;
-                       ppc_bc (code, PPC_BR_TRUE, PPC_BR_LT, 0);
-                       // Its positive msword == 0
-                       ppc_cmpi (code, 0, 0, ins->sreg2, 0);
-                       msword_positive_branch = code;
-                       ppc_bc (code, PPC_BR_TRUE, PPC_BR_EQ, 0);
-
-                       ovf_ex_target = code;
-                       //EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_ALWAYS, 0, "OverflowException");
-                       // Negative
-                       ppc_patch (negative_branch, code);
-                       ppc_cmpi (code, 0, 0, ins->sreg2, -1);
-                       msword_negative_branch = code;
-                       ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
-                       ppc_patch (msword_negative_branch, ovf_ex_target);
+               case OP_LCONV_TO_OVF_I:
+               case OP_LCONV_TO_OVF_I4_2: {
+                       guint32 *high_bit_not_set, *valid_negative, *invalid_negative, *valid_positive;
+                       /* 
+                        * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
+                        */
+
+                       ARM_CMP_REG_IMM8 (code, ins->sreg1, 0);
+                       high_bit_not_set = code;
+                       ARM_B_COND (code, ARMCOND_GE, 0); /*branch if bit 31 of the lower part is not set*/
+
+                       ARM_CMN_REG_IMM8 (code, ins->sreg2, 1); /*This have the same effect as CMP reg, 0xFFFFFFFF */
+                       valid_negative = code;
+                       ARM_B_COND (code, ARMCOND_EQ, 0); /*branch if upper part == 0xFFFFFFFF (lower part has bit 31 set) */
+                       invalid_negative = code;
+                       ARM_B_COND (code, ARMCOND_AL, 0);
                        
-                       ppc_patch (msword_positive_branch, code);
-                       if (ins->dreg != ins->sreg1)
-                               ppc_mr (code, ins->dreg, ins->sreg1);
-#endif
+                       arm_patch (high_bit_not_set, code);
+
+                       ARM_CMP_REG_IMM8 (code, ins->sreg2, 0);
+                       valid_positive = code;
+                       ARM_B_COND (code, ARMCOND_EQ, 0); /*branch if upper part == 0 (lower part has bit 31 clear)*/
+
+                       arm_patch (invalid_negative, code);
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (ARMCOND_AL, "OverflowException");
+
+                       arm_patch (valid_negative, code);
+                       arm_patch (valid_positive, code);
+
                        if (ins->dreg != ins->sreg1)
                                ARM_MOV_REG_REG (code, ins->dreg, ins->sreg1);
                        break;
@@ -2661,9 +3489,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert_not_reached ();
                        break;
                case OP_FCOMPARE:
-                       /* each fp compare op needs to do its own */
-                       g_assert_not_reached ();
-                       //ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
+#ifdef ARM_FPU_FPA
+                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
+#elif defined(ARM_FPU_VFP)
+                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
+#endif
                        break;
                case OP_FCEQ:
 #ifdef ARM_FPU_FPA
@@ -2721,97 +3551,39 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                 * V        Unordered               ARMCOND_VS
                 */
                case OP_FBEQ:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
-#endif
-                       EMIT_COND_BRANCH (ins, CEE_BEQ - CEE_BEQ);
+                       EMIT_COND_BRANCH (ins, OP_IBEQ - OP_IBEQ);
                        break;
                case OP_FBNE_UN:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
-#endif
-                       EMIT_COND_BRANCH (ins, CEE_BNE_UN - CEE_BEQ);
+                       EMIT_COND_BRANCH (ins, OP_IBNE_UN - OP_IBEQ);
                        break;
                case OP_FBLT:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
-#endif
                        EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_MI); /* N set */
                        break;
                case OP_FBLT_UN:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
-#endif
                        EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_VS); /* V set */
                        EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_MI); /* N set */
                        break;
                case OP_FBGT:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg2, ins->sreg1);
-#endif
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_MI); /* N set, swapped args */
-                       break;
                case OP_FBGT_UN:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg2, ins->sreg1);
-#endif
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_VS); /* V set */
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_MI); /* N set, swapped args */
+               case OP_FBLE:
+               case OP_FBLE_UN:
+                       g_assert_not_reached ();
                        break;
                case OP_FBGE:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
-#endif
                        EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_CS);
                        break;
                case OP_FBGE_UN:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
-#endif
                        EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_VS); /* V set */
                        EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_GE);
                        break;
-               case OP_FBLE:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg2, ins->sreg1);
-#endif
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_CS); /* swapped */
-                       break;
-               case OP_FBLE_UN:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg2, ins->sreg1);
-#endif
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_VS); /* V set */
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_GE); /* swapped */
-                       break;
+
                case OP_CKFINITE: {
-                       /*ppc_stfd (code, ins->sreg1, -8, ppc_sp);
-                       ppc_lwz (code, ppc_r11, -8, ppc_sp);
-                       ppc_rlwinm (code, ppc_r11, ppc_r11, 0, 1, 31);
-                       ppc_addis (code, ppc_r11, ppc_r11, -32752);
-                       ppc_rlwinmd (code, ppc_r11, ppc_r11, 1, 31, 31);
-                       EMIT_COND_SYSTEM_EXCEPTION (CEE_BEQ - CEE_BEQ, "ArithmeticException");*/
+#ifdef ARM_FPU_FPA
+                       if (ins->dreg != ins->sreg1)
+                               ARM_MVFD (code, ins->dreg, ins->sreg1);
+#else
                        g_assert_not_reached ();
+#endif
                        break;
                }
                default:
@@ -2829,16 +3601,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                last_ins = ins;
                last_offset = offset;
-               
-               ins = ins->next;
        }
 
        cfg->code_len = code - cfg->native_code;
 }
 
+#endif /* DISABLE_JIT */
+
 void
 mono_arch_register_lowlevel_calls (void)
 {
+       /* The signature doesn't matter */
+       mono_register_jit_icall (mono_arm_throw_exception, "mono_arm_throw_exception", mono_create_icall_signature ("void"), TRUE);
 }
 
 #define patch_lis_ori(ip,val) do {\
@@ -2851,25 +3625,36 @@ void
 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
 {
        MonoJumpInfo *patch_info;
+       gboolean compile_aot = !run_cctors;
 
        for (patch_info = ji; patch_info; patch_info = patch_info->next) {
                unsigned char *ip = patch_info->ip.i + code;
                const unsigned char *target;
 
-               if (patch_info->type == MONO_PATCH_INFO_SWITCH) {
+               if (patch_info->type == MONO_PATCH_INFO_SWITCH && !compile_aot) {
                        gpointer *jt = (gpointer*)(ip + 8);
                        int i;
                        /* jt is the inlined jump table, 2 instructions after ip
                         * In the normal case we store the absolute addresses,
                         * otherwise the displacements.
                         */
-                       for (i = 0; i < patch_info->data.table->table_size; i++) { 
+                       for (i = 0; i < patch_info->data.table->table_size; i++)
                                jt [i] = code + (int)patch_info->data.table->table [i];
-                       }
                        continue;
                }
                target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
 
+               if (compile_aot) {
+                       switch (patch_info->type) {
+                       case MONO_PATCH_INFO_BB:
+                       case MONO_PATCH_INFO_LABEL:
+                               break;
+                       default:
+                               /* No need to patch these */
+                               continue;
+                       }
+               }
+
                switch (patch_info->type) {
                case MONO_PATCH_INFO_IP:
                        g_assert_not_reached ();
@@ -3004,9 +3789,21 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                if (cfg->prof_options & MONO_PROFILE_COVERAGE)
                        max_offset += 6; 
 
-               while (ins) {
+               MONO_BB_FOR_EACH_INS (bb, ins)
                        max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
-                       ins = ins->next;
+       }
+
+       /* store runtime generic context */
+       if (cfg->rgctx_var) {
+               MonoInst *ins = cfg->rgctx_var;
+
+               g_assert (ins->opcode == OP_REGOFFSET);
+
+               if (arm_is_imm12 (ins->inst_offset)) {
+                       ARM_STR_IMM (code, MONO_ARCH_RGCTX_REG, ins->inst_basereg, ins->inst_offset);
+               } else {
+                       code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                       ARM_STR_REG_REG (code, MONO_ARCH_RGCTX_REG, ins->inst_basereg, ARMREG_LR);
                }
        }
 
@@ -3017,7 +3814,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 
        if (MONO_TYPE_ISSTRUCT (sig->ret)) {
                ArgInfo *ainfo = &cinfo->ret;
-               inst = cfg->ret;
+               inst = cfg->vret_addr;
                g_assert (arm_is_imm12 (inst->inst_offset));
                ARM_STR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
        }
@@ -3033,8 +3830,12 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        else if (ainfo->regtype == RegTypeFP) {
                                g_assert_not_reached ();
                        } else if (ainfo->regtype == RegTypeBase) {
-                               g_assert (arm_is_imm12 (prev_sp_offset + ainfo->offset));
-                               ARM_LDR_IMM (code, inst->dreg, ARMREG_SP, (prev_sp_offset + ainfo->offset));
+                               if (arm_is_imm12 (prev_sp_offset + ainfo->offset)) {
+                                       ARM_LDR_IMM (code, inst->dreg, ARMREG_SP, (prev_sp_offset + ainfo->offset));
+                               } else {
+                                       code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
+                                       ARM_LDR_REG_REG (code, inst->dreg, ARMREG_SP, ARMREG_IP);
+                               }
                        } else
                                g_assert_not_reached ();
 
@@ -3057,8 +3858,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                                ARM_STRH_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
                                        } else {
                                                code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
-                                               ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, inst->inst_basereg);
-                                               ARM_STRH_IMM (code, ainfo->reg, ARMREG_IP, 0);
+                                               ARM_STRH_REG_REG (code, ainfo->reg, inst->inst_basereg, ARMREG_IP);
                                        }
                                        break;
                                case 8:
@@ -3083,36 +3883,57 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                ARM_STR_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset + 4);
                                ARM_STR_IMM (code, ARMREG_R3, inst->inst_basereg, inst->inst_offset);
                        } else if (ainfo->regtype == RegTypeBase) {
-                               g_assert (arm_is_imm12 (prev_sp_offset + ainfo->offset));
+                               if (arm_is_imm12 (prev_sp_offset + ainfo->offset)) {
+                                       ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
+                               } else {
+                                       code = mono_arm_emit_load_imm (code, ARMREG_IP, prev_sp_offset + ainfo->offset);
+                                       ARM_LDR_REG_REG (code, ARMREG_LR, ARMREG_SP, ARMREG_IP);
+                               }
+
                                switch (ainfo->size) {
                                case 1:
-                                       ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
-                                       g_assert (arm_is_imm12 (inst->inst_offset));
-                                       ARM_STRB_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
+                                       if (arm_is_imm8 (inst->inst_offset)) {
+                                               ARM_STRB_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
+                                               ARM_STRB_REG_REG (code, ARMREG_LR, inst->inst_basereg, ARMREG_IP);
+                                       }
                                        break;
                                case 2:
-                                       ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
                                        if (arm_is_imm8 (inst->inst_offset)) {
                                                ARM_STRH_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
                                        } else {
                                                code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
-                                               ARM_ADD_REG_REG (code, ARMREG_IP, ARMREG_IP, inst->inst_basereg);
-                                               ARM_STRH_IMM (code, ARMREG_LR, ARMREG_IP, 0);
+                                               ARM_STRH_REG_REG (code, ARMREG_LR, inst->inst_basereg, ARMREG_IP);
                                        }
                                        break;
                                case 8:
-                                       g_assert (arm_is_imm12 (inst->inst_offset));
-                                       ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
-                                       ARM_STR_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
-                                       g_assert (arm_is_imm12 (prev_sp_offset + ainfo->offset + 4));
-                                       g_assert (arm_is_imm12 (inst->inst_offset + 4));
-                                       ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset + 4));
-                                       ARM_STR_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset + 4);
+                                       if (arm_is_imm12 (inst->inst_offset)) {
+                                               ARM_STR_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
+                                               ARM_STR_REG_REG (code, ARMREG_LR, inst->inst_basereg, ARMREG_IP);
+                                       }
+                                       if (arm_is_imm12 (prev_sp_offset + ainfo->offset + 4)) {
+                                               ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset + 4));
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, prev_sp_offset + ainfo->offset + 4);
+                                               ARM_LDR_REG_REG (code, ARMREG_LR, ARMREG_SP, ARMREG_IP);
+                                       }
+                                       if (arm_is_imm12 (inst->inst_offset + 4)) {
+                                               ARM_STR_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset + 4);
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset + 4);
+                                               ARM_STR_REG_REG (code, ARMREG_LR, inst->inst_basereg, ARMREG_IP);
+                                       }
                                        break;
                                default:
-                                       g_assert (arm_is_imm12 (inst->inst_offset));
-                                       ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
-                                       ARM_STR_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
+                                       if (arm_is_imm12 (inst->inst_offset)) {
+                                               ARM_STR_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
+                                               ARM_STR_REG_REG (code, ARMREG_LR, inst->inst_basereg, ARMREG_IP);
+                                       }
                                        break;
                                }
                        } else if (ainfo->regtype == RegTypeFP) {
@@ -3122,11 +3943,14 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                int soffset = 0;
                                int cur_reg;
                                int size = 0;
-                               if (mono_class_from_mono_type (inst->inst_vtype))
-                                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL);
+                               size = mini_type_stack_size_full (cfg->generic_sharing_context, inst->inst_vtype, NULL, sig->pinvoke);
                                for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
-                                       g_assert (arm_is_imm12 (doffset));
-                                       ARM_STR_IMM (code, ainfo->reg + cur_reg, inst->inst_basereg, doffset);
+                                       if (arm_is_imm12 (doffset)) {
+                                               ARM_STR_IMM (code, ainfo->reg + cur_reg, inst->inst_basereg, doffset);
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, doffset);
+                                               ARM_STR_REG_REG (code, ainfo->reg + cur_reg, inst->inst_basereg, ARMREG_IP);
+                                       }
                                        soffset += sizeof (gpointer);
                                        doffset += sizeof (gpointer);
                                }
@@ -3149,15 +3973,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 
                mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                             (gpointer)"mono_get_lmf_addr");
-               if (cfg->method->dynamic) {
-                       ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
-                       ARM_B (code, 0);
-                       *(gpointer*)code = NULL;
-                       code += 4;
-                       code = emit_call_reg (code, ARMREG_IP);
-               } else {
-                       ARM_BL (code, 0);
-               }
+               code = emit_call_seq (cfg, code);
                /* we build the MonoLMF structure on the stack - see mini-arm.h */
                /* lmf_offset is the offset from the previous stack pointer,
                 * alloc_size is the total stack space allocated, so the offset
@@ -3174,9 +3990,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                ARM_STR_IMM (code, ARMREG_R2, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, previous_lmf));
                /* *(lmf_addr) = r1 */
                ARM_STR_IMM (code, ARMREG_R1, ARMREG_R0, G_STRUCT_OFFSET (MonoLMF, previous_lmf));
-               /* save method info */
-               code = mono_arm_emit_load_imm (code, ARMREG_R2, GPOINTER_TO_INT (method));
-               ARM_STR_IMM (code, ARMREG_R2, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, method));
+               /* Skip method (only needed for trampoline LMF frames) */
                ARM_STR_IMM (code, ARMREG_SP, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, ebp));
                /* save the current IP */
                ARM_MOV_REG_REG (code, ARMREG_R2, ARMREG_PC);
@@ -3299,13 +4113,12 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
      
        /* 
         * make sure we have enough space for exceptions
-        * 12 is the simulated call to throw_exception_by_name
         */
        for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
                if (patch_info->type == MONO_PATCH_INFO_EXC) {
                        i = exception_id_by_name (patch_info->data.target);
                        if (!exc_throw_found [i]) {
-                               max_epilog_size += 12;
+                               max_epilog_size += 32;
                                exc_throw_found [i] = TRUE;
                        }
                }
@@ -3323,8 +4136,9 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
        for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
                switch (patch_info->type) {
                case MONO_PATCH_INFO_EXC: {
+                       MonoClass *exc_class;
                        unsigned char *ip = patch_info->ip.i + cfg->native_code;
-                       const char *ex_name = patch_info->data.target;
+
                        i = exception_id_by_name (patch_info->data.target);
                        if (exc_throw_pos [i]) {
                                arm_patch (ip, exc_throw_pos [i]);
@@ -3334,16 +4148,17 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                                exc_throw_pos [i] = code;
                        }
                        arm_patch (ip, code);
-                       //*(int*)code = 0xef9f0001;
-                       code += 4;
-                       /*mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);*/
+
+                       exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
+                       g_assert (exc_class);
+
+                       ARM_MOV_REG_REG (code, ARMREG_R1, ARMREG_LR);
                        ARM_LDR_IMM (code, ARMREG_R0, ARMREG_PC, 0);
-                       /* we got here from a conditional call, so the calling ip is set in lr already */
                        patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
-                       patch_info->data.name = "mono_arch_throw_exception_by_name";
+                       patch_info->data.name = "mono_arch_throw_corlib_exception";
                        patch_info->ip.i = code - cfg->native_code;
-                       ARM_B (code, 0);
-                       *(gconstpointer*)code = ex_name;
+                       ARM_BL (code, 0);
+                       *(guint32*)(gpointer)code = exc_class->type_token;
                        code += 4;
                        break;
                }
@@ -3381,7 +4196,7 @@ mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_re
        /* add the this argument */
        if (this_reg != -1) {
                MonoInst *this;
-               MONO_INST_NEW (cfg, this, OP_SETREG);
+               MONO_INST_NEW (cfg, this, OP_MOVE);
                this->type = this_type;
                this->sreg1 = this_reg;
                this->dreg = mono_regstate_next_int (cfg->rs);
@@ -3391,7 +4206,7 @@ mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_re
 
        if (vt_reg != -1) {
                MonoInst *vtarg;
-               MONO_INST_NEW (cfg, vtarg, OP_SETREG);
+               MONO_INST_NEW (cfg, vtarg, OP_MOVE);
                vtarg->type = STACK_MP;
                vtarg->sreg1 = vt_reg;
                vtarg->dreg = mono_regstate_next_int (cfg->rs);
@@ -3403,12 +4218,14 @@ mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_re
 MonoInst*
 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
 {
-       MonoInst *ins = NULL;
-       if (cmethod->klass == mono_defaults.thread_class &&
-                       strcmp (cmethod->name, "MemoryBarrier") == 0) {
-               MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
-       }
-       return ins;
+       return NULL;
+}
+
+MonoInst*
+mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
+{
+       /* FIXME: */
+       return NULL;
 }
 
 gboolean
@@ -3428,6 +4245,13 @@ mono_arch_get_thread_intrinsic (MonoCompile* cfg)
        return NULL;
 }
 
+guint32
+mono_arch_get_patch_offset (guint8 *code)
+{
+       /* OP_AOTCONST */
+       return 8;
+}
+
 void
 mono_arch_flush_register_windows (void)
 {
@@ -3441,3 +4265,236 @@ mono_arch_fixup_jinfo (MonoCompile *cfg)
        cfg->jit_info->used_regs |= cfg->stack_usage << 14;
 }
 
+#ifdef MONO_ARCH_HAVE_IMT
+
+void
+mono_arch_emit_imt_argument (MonoCompile *cfg, MonoCallInst *call, MonoInst *imt_arg)
+{
+       if (cfg->compile_aot) {
+               int method_reg = mono_regstate_next_int (cfg->rs);
+               MonoInst *ins;
+
+               call->dynamic_imt_arg = TRUE;
+
+               MONO_INST_NEW (cfg, ins, OP_AOTCONST);
+               ins->dreg = method_reg;
+               ins->inst_p0 = call->method;
+               ins->inst_c1 = MONO_PATCH_INFO_METHODCONST;
+               MONO_ADD_INS (cfg->cbb, ins);
+
+               mono_call_inst_add_outarg_reg (cfg, call, method_reg, ARMREG_V5, FALSE);
+       } else if (cfg->generic_context) {
+
+               /* Always pass in a register for simplicity */
+               call->dynamic_imt_arg = TRUE;
+
+               cfg->uses_rgctx_reg = TRUE;
+
+               if (imt_arg) {
+                       mono_call_inst_add_outarg_reg (cfg, call, imt_arg->dreg, ARMREG_V5, FALSE);
+               } else {
+                       MonoInst *ins;
+                       int method_reg = mono_alloc_preg (cfg);
+
+                       MONO_INST_NEW (cfg, ins, OP_PCONST);
+                       ins->inst_p0 = call->method;
+                       ins->dreg = method_reg;
+                       MONO_ADD_INS (cfg->cbb, ins);
+
+                       mono_call_inst_add_outarg_reg (cfg, call, method_reg, ARMREG_V5, FALSE);
+               }
+       }
+}
+
+MonoMethod*
+mono_arch_find_imt_method (gpointer *regs, guint8 *code)
+{
+       guint32 *code_ptr = (guint32*)code;
+       code_ptr -= 2;
+       /* The IMT value is stored in the code stream right after the LDC instruction. */
+       if (!IS_LDR_PC (code_ptr [0])) {
+               g_warning ("invalid code stream, instruction before IMT value is not a LDC in %s() (code %p value 0: 0x%x -1: 0x%x -2: 0x%x)", __FUNCTION__, code, code_ptr [2], code_ptr [1], code_ptr [0]);
+               g_assert (IS_LDR_PC (code_ptr [0]));
+       }
+       if (code_ptr [1] == 0)
+               /* This is AOTed code, the IMT method is in V5 */
+               return (MonoMethod*)regs [ARMREG_V5];
+       else
+               return (MonoMethod*) code_ptr [1];
+}
+
+MonoObject*
+mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
+{
+       return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), (gssize*)regs, NULL);
+}
+
+MonoVTable*
+mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code)
+{
+       return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
+}
+
+#define ENABLE_WRONG_METHOD_CHECK 0
+#define BASE_SIZE (6 * 4)
+#define BSEARCH_ENTRY_SIZE (4 * 4)
+#define CMP_SIZE (3 * 4)
+#define BRANCH_SIZE (1 * 4)
+#define CALL_SIZE (2 * 4)
+#define WMC_SIZE (5 * 4)
+#define DISTANCE(A, B) (((gint32)(B)) - ((gint32)(A)))
+
+static arminstr_t *
+arm_emit_value_and_patch_ldr (arminstr_t *code, arminstr_t *target, guint32 value)
+{
+       guint32 delta = DISTANCE (target, code);
+       delta -= 8;
+       g_assert (delta >= 0 && delta <= 0xFFF);
+       *target = *target | delta;
+       *code = value;
+       return code + 1;
+}
+
+gpointer
+mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
+{
+       int size, i, extra_space = 0;
+       arminstr_t *code, *start, *vtable_target = NULL;
+       size = BASE_SIZE;
+
+       for (i = 0; i < count; ++i) {
+               MonoIMTCheckItem *item = imt_entries [i];
+               if (item->is_equals) {
+                       g_assert (arm_is_imm12 (DISTANCE (vtable, &vtable->vtable[item->vtable_slot])));
+
+                       if (item->check_target_idx) {
+                               if (!item->compare_done)
+                                       item->chunk_size += CMP_SIZE;
+                               item->chunk_size += BRANCH_SIZE;
+                       } else {
+#if ENABLE_WRONG_METHOD_CHECK
+                               item->chunk_size += WMC_SIZE;
+#endif
+                       }
+                       item->chunk_size += CALL_SIZE;
+               } else {
+                       item->chunk_size += BSEARCH_ENTRY_SIZE;
+                       imt_entries [item->check_target_idx]->compare_done = TRUE;
+               }
+               size += item->chunk_size;
+       }
+
+       start = code = mono_code_manager_reserve (domain->code_mp, size);
+
+#if DEBUG_IMT
+       printf ("building IMT thunk for class %s %s entries %d code size %d code at %p end %p vtable %p\n", vtable->klass->name_space, vtable->klass->name, count, size, start, ((guint8*)start) + size, vtable);
+       for (i = 0; i < count; ++i) {
+               MonoIMTCheckItem *item = imt_entries [i];
+               printf ("method %d (%p) %s vtable slot %p is_equals %d chunk size %d\n", i, item->method, item->method->name, &vtable->vtable [item->vtable_slot], item->is_equals, item->chunk_size);
+       }
+#endif
+
+       ARM_PUSH2 (code, ARMREG_R0, ARMREG_R1);
+       ARM_LDR_IMM (code, ARMREG_R0, ARMREG_LR, -4);
+       vtable_target = code;
+       ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
+
+       /* R0 == 0 means we are called from AOT code. In this case, V5 contains the IMT method */
+       ARM_CMP_REG_IMM8 (code, ARMREG_R0, 0);
+       ARM_MOV_REG_REG_COND (code, ARMREG_R0, ARMREG_V5, ARMCOND_EQ);
+
+       for (i = 0; i < count; ++i) {
+               MonoIMTCheckItem *item = imt_entries [i];
+               arminstr_t *imt_method = NULL;
+               item->code_target = (guint8*)code;
+
+               if (item->is_equals) {
+                       if (item->check_target_idx) {
+                               if (!item->compare_done) {
+                                       imt_method = code;
+                                       ARM_LDR_IMM (code, ARMREG_R1, ARMREG_PC, 0);
+                                       ARM_CMP_REG_REG (code, ARMREG_R0, ARMREG_R1);
+                               }
+                               item->jmp_code = (guint8*)code;
+                               ARM_B_COND (code, ARMCOND_NE, 0);
+
+                               ARM_POP2 (code, ARMREG_R0, ARMREG_R1);
+                               ARM_LDR_IMM (code, ARMREG_PC, ARMREG_IP, DISTANCE (vtable, &vtable->vtable[item->vtable_slot]));
+                       } else {
+                               /*Enable the commented code to assert on wrong method*/
+#if ENABLE_WRONG_METHOD_CHECK
+                               imt_method = code;
+                               ARM_LDR_IMM (code, ARMREG_R1, ARMREG_PC, 0);
+                               ARM_CMP_REG_REG (code, ARMREG_R0, ARMREG_R1);
+                               ARM_B_COND (code, ARMCOND_NE, 1);
+#endif
+                               ARM_POP2 (code, ARMREG_R0, ARMREG_R1);
+                               ARM_LDR_IMM (code, ARMREG_PC, ARMREG_IP, DISTANCE (vtable, &vtable->vtable[item->vtable_slot]));
+
+#if ENABLE_WRONG_METHOD_CHECK
+                               ARM_DBRK (code);
+#endif
+                       }
+
+                       if (imt_method)
+                               code = arm_emit_value_and_patch_ldr (code, imt_method, (guint32)item->method);
+
+                       /*must emit after unconditional branch*/
+                       if (vtable_target) {
+                               code = arm_emit_value_and_patch_ldr (code, vtable_target, (guint32)vtable);
+                               item->chunk_size += 4;
+                               vtable_target = NULL;
+                       }
+
+                       /*We reserve the space for bsearch IMT values after the first entry with an absolute jump*/
+                       if (extra_space) {
+                               code += extra_space;
+                               extra_space = 0;
+                       }
+               } else {
+                       ARM_LDR_IMM (code, ARMREG_R1, ARMREG_PC, 0);
+                       ARM_CMP_REG_REG (code, ARMREG_R0, ARMREG_R1);
+
+                       item->jmp_code = (guint8*)code;
+                       ARM_B_COND (code, ARMCOND_GE, 0);
+                       ++extra_space;
+               }
+       }
+
+       for (i = 0; i < count; ++i) {
+               MonoIMTCheckItem *item = imt_entries [i];
+               if (item->jmp_code) {
+                       if (item->check_target_idx)
+                               arm_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
+               }
+               if (i > 0 && item->is_equals) {
+                       int j;
+                       arminstr_t *space_start = (arminstr_t*)(item->code_target + item->chunk_size);
+                       for (j = i - 1; j >= 0 && !imt_entries [j]->is_equals; --j) {
+                               space_start = arm_emit_value_and_patch_ldr (space_start, (arminstr_t*)imt_entries [j]->code_target, (guint32)imt_entries [j]->method);
+                       }
+               }
+       }
+
+#if DEBUG_IMT
+       {
+               char *buff = g_strdup_printf ("thunk_for_class_%s_%s_entries_%d", vtable->klass->name_space, vtable->klass->name, count);
+               mono_disassemble_code (NULL, (guint8*)start, size, buff);
+               g_free (buff);
+       }
+#endif
+
+       mono_arch_flush_icache ((guint8*)start, size);
+       mono_stats.imt_thunks_size += code - start;
+
+       g_assert (DISTANCE (start, code) <= size);
+       return start;
+}
+
+#endif
+
+gpointer
+mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
+{
+       return ctx->regs [reg];
+}