* HttpRuntime.cs: Avoid ANE on Windows when HttpRuntime is used
[mono.git] / mono / mini / mini-arm.c
index 1993cbd68ffc6c4237ac79d7ec7f3bbe165ac7f6..581182d597ee5d7cea9c8b5378d8c309b912488a 100644 (file)
 #include <mono/metadata/debug-helpers.h>
 
 #include "mini-arm.h"
-#include "inssel.h"
 #include "cpu-arm.h"
 #include "trace.h"
+#include "ir-emit.h"
 #ifdef ARM_FPU_FPA
 #include "mono/arch/arm/arm-fpa-codegen.h"
 #elif defined(ARM_FPU_VFP)
 #include "mono/arch/arm/arm-vfp-codegen.h"
 #endif
 
+#if defined(__ARM_EABI__) && defined(__linux__) && !defined(PLATFORM_ANDROID)
+#define HAVE_AEABI_READ_TP 1
+#endif
+
+static gint lmf_tls_offset = -1;
+static gint lmf_addr_tls_offset = -1;
+
 /* This mutex protects architecture specific caches */
 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
@@ -31,8 +38,6 @@ static CRITICAL_SECTION mini_arch_mutex;
 static int v5_supported = 0;
 static int thumb_supported = 0;
 
-static int mono_arm_is_rotated_imm8 (guint32 val, gint *rot_amount);
-
 /*
  * TODO:
  * floating point support: on ARM it is a mess, there are at least 3
@@ -61,8 +66,6 @@ int mono_exc_esp_offset = 0;
 #define MOV_LR_PC ((ARMCOND_AL << ARMCOND_SHIFT) | (1 << 24) | (0xa << 20) |  (ARMREG_LR << 12) | ARMREG_PC)
 #define DEBUG_IMT 0
 
-void mini_emit_memcpy2 (MonoCompile *cfg, int destreg, int doffset, int srcreg, int soffset, int size, int align);
-
 const char*
 mono_arch_regname (int reg)
 {
@@ -191,6 +194,13 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
 #ifdef ARM_FPU_FPA
                if (ins->dreg != ARM_FPA_F0)
                        ARM_MVFD (code, ins->dreg, ARM_FPA_F0);
+#elif defined(ARM_FPU_VFP)
+               if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) {
+                       ARM_FMSR (code, ins->dreg, ARMREG_R0);
+                       ARM_CVTS (code, ins->dreg, ins->dreg);
+               } else {
+                       ARM_FMDRR (code, ARMREG_R0, ARMREG_R1, ins->dreg);
+               }
 #endif
                break;
        }
@@ -213,7 +223,7 @@ int
 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
 {
        int k, frame_size = 0;
-       int size, align, pad;
+       guint32 size, align, pad;
        int offset = 8;
 
        if (MONO_TYPE_ISSTRUCT (csig->ret)) { 
@@ -231,11 +241,7 @@ mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJit
        arg_info [0].size = frame_size;
 
        for (k = 0; k < param_count; k++) {
-               
-               if (csig->pinvoke)
-                       size = mono_type_native_stack_size (csig->params [k], &align);
-               else
-                       size = mini_type_stack_size (NULL, csig->params [k], &align);
+               size = mini_type_stack_size_full (NULL, csig->params [k], &align, csig->pinvoke);
 
                /* ignore alignment for now */
                align = 1;
@@ -375,7 +381,7 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
 
                g_assert ((code - start) <= 12);
 
-               mono_arch_flush_icache (code, 12);
+               mono_arch_flush_icache (start, 12);
                cached = start;
                mono_mini_arch_unlock ();
                return cached;
@@ -408,7 +414,7 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
 
                g_assert ((code - start) <= size);
 
-               mono_arch_flush_icache (code, size);
+               mono_arch_flush_icache (start, size);
                cache [sig->param_count] = start;
                mono_mini_arch_unlock ();
                return start;
@@ -498,7 +504,7 @@ static gboolean
 is_regsize_var (MonoType *t) {
        if (t->byref)
                return TRUE;
-       t = mono_type_get_underlying_type (t);
+       t = mini_type_get_underlying_type (NULL, t);
        switch (t->type) {
        case MONO_TYPE_I4:
        case MONO_TYPE_U4:
@@ -561,10 +567,8 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
        regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V2));
        regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V3));
        regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V4));
-       if (cfg->compile_aot)
-               /* V5 is reserved for holding the IMT method */
-               cfg->used_int_regs |= (1 << ARMREG_V5);
-       else
+       if (!(cfg->compile_aot || cfg->uses_rgctx_reg))
+               /* V5 is reserved for passing the vtable/rgctx/IMT method */
                regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V5));
        /*regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V6));*/
        /*regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V7));*/
@@ -586,11 +590,29 @@ mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
        return 2;
 }
 
+#ifndef __GNUC_PREREQ
+#define __GNUC_PREREQ(maj, min) (0)
+#endif
+
 void
 mono_arch_flush_icache (guint8 *code, gint size)
 {
 #if __APPLE__
        sys_icache_invalidate (code, size);
+#elif __GNUC_PREREQ(4, 1)
+       __clear_cache (code, code + size);
+#elif defined(PLATFORM_ANDROID)
+       const int syscall = 0xf0002;
+       __asm __volatile (
+               "mov     r0, %0\n"                      
+               "mov     r1, %1\n"
+               "mov     r7, %2\n"
+               "mov     r2, #0x0\n"
+               "svc     0x00000000\n"
+               :
+               :       "r" (code), "r" (code + size), "r" (syscall)
+               :       "r0", "r1", "r7"
+               );
 #else
        __asm __volatile ("mov r0, %0\n"
                        "mov r1, %1\n"
@@ -679,7 +701,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 {
        guint i, gr;
        int n = sig->hasthis + sig->param_count;
-       guint32 simpletype;
+       MonoType *simpletype;
        guint32 stack_size = 0;
        CallInfo *cinfo = g_malloc0 (sizeof (CallInfo) + sizeof (ArgInfo) * n);
 
@@ -712,8 +734,8 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        n++;
                        continue;
                }
-               simpletype = mono_type_get_underlying_type (sig->params [i])->type;
-               switch (simpletype) {
+               simpletype = mini_type_get_underlying_type (NULL, sig->params [i]);
+               switch (simpletype->type) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
                case MONO_TYPE_U1:
@@ -762,7 +784,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        int align_size;
                        int nwords;
 
-                       if (simpletype == MONO_TYPE_TYPEDBYREF) {
+                       if (simpletype->type == MONO_TYPE_TYPEDBYREF) {
                                size = sizeof (MonoTypedRef);
                        } else {
                                MonoClass *klass = mono_class_from_mono_type (sig->params [i]);
@@ -810,8 +832,8 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
        }
 
        {
-               simpletype = mono_type_get_underlying_type (sig->ret)->type;
-               switch (simpletype) {
+               simpletype = mini_type_get_underlying_type (NULL, sig->ret);
+               switch (simpletype->type) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
                case MONO_TYPE_U1:
@@ -871,7 +893,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
  * The locals var stuff should most likely be split in another method.
  */
 void
-mono_arch_allocate_vars (MonoCompile *m)
+mono_arch_allocate_vars (MonoCompile *cfg)
 {
        MonoMethodSignature *sig;
        MonoMethodHeader *header;
@@ -880,13 +902,13 @@ mono_arch_allocate_vars (MonoCompile *m)
        int frame_reg = ARMREG_FP;
 
        /* FIXME: this will change when we use FP as gcc does */
-       m->flags |= MONO_CFG_HAS_SPILLUP;
+       cfg->flags |= MONO_CFG_HAS_SPILLUP;
 
        /* allow room for the vararg method args: void* and long/double */
-       if (mono_jit_trace_calls != NULL && mono_trace_eval (m->method))
-               m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
+       if (mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method))
+               cfg->param_area = MAX (cfg->param_area, sizeof (gpointer)*8);
 
-       header = mono_method_get_header (m->method);
+       header = mono_method_get_header (cfg->method);
 
        /* 
         * We use the frame register also for any method that has
@@ -897,25 +919,29 @@ mono_arch_allocate_vars (MonoCompile *m)
         * filters get called before stack unwinding happens) when the filter
         * code would call any method (this also applies to finally etc.).
         */ 
-       if ((m->flags & MONO_CFG_HAS_ALLOCA) || header->num_clauses)
+       if ((cfg->flags & MONO_CFG_HAS_ALLOCA) || header->num_clauses)
                frame_reg = ARMREG_FP;
-       m->frame_reg = frame_reg;
+       cfg->frame_reg = frame_reg;
        if (frame_reg != ARMREG_SP) {
-               m->used_int_regs |= 1 << frame_reg;
+               cfg->used_int_regs |= 1 << frame_reg;
        }
 
-       sig = mono_method_signature (m->method);
+       if (!cfg->compile_aot || cfg->uses_rgctx_reg)
+               /* V5 is reserved for passing the vtable/rgctx/IMT method */
+               cfg->used_int_regs |= (1 << ARMREG_V5);
+
+       sig = mono_method_signature (cfg->method);
        
        offset = 0;
        curinst = 0;
        if (!MONO_TYPE_ISSTRUCT (sig->ret)) {
                /* FIXME: handle long and FP values */
-               switch (mono_type_get_underlying_type (sig->ret)->type) {
+               switch (mini_type_get_underlying_type (NULL, sig->ret)->type) {
                case MONO_TYPE_VOID:
                        break;
                default:
-                       m->ret->opcode = OP_REGVAR;
-                       m->ret->inst_c0 = ARMREG_R0;
+                       cfg->ret->opcode = OP_REGVAR;
+                       cfg->ret->inst_c0 = ARMREG_R0;
                        break;
                }
        }
@@ -930,48 +956,51 @@ mono_arch_allocate_vars (MonoCompile *m)
        //offset &= ~(8 - 1);
 
        /* add parameter area size for called functions */
-       offset += m->param_area;
+       offset += cfg->param_area;
        offset += 8 - 1;
        offset &= ~(8 - 1);
-       if (m->flags & MONO_CFG_HAS_FPOUT)
+       if (cfg->flags & MONO_CFG_HAS_FPOUT)
                offset += 8;
 
        /* allow room to save the return value */
-       if (mono_jit_trace_calls != NULL && mono_trace_eval (m->method))
+       if (mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method))
                offset += 8;
 
        /* the MonoLMF structure is stored just below the stack pointer */
 
        if (sig->call_convention == MONO_CALL_VARARG) {
-                m->sig_cookie = 0;
+                cfg->sig_cookie = 0;
         }
 
        if (MONO_TYPE_ISSTRUCT (sig->ret)) {
-               inst = m->vret_addr;
+               inst = cfg->vret_addr;
                offset += sizeof(gpointer) - 1;
                offset &= ~(sizeof(gpointer) - 1);
                inst->inst_offset = offset;
                inst->opcode = OP_REGOFFSET;
                inst->inst_basereg = frame_reg;
-               if (G_UNLIKELY (m->verbose_level > 1)) {
+               if (G_UNLIKELY (cfg->verbose_level > 1)) {
                        printf ("vret_addr =");
-                       mono_print_ins (m->vret_addr);
+                       mono_print_ins (cfg->vret_addr);
                }
                offset += sizeof(gpointer);
                if (sig->call_convention == MONO_CALL_VARARG)
-                       m->sig_cookie += sizeof (gpointer);
+                       cfg->sig_cookie += sizeof (gpointer);
        }
 
-       curinst = m->locals_start;
-       for (i = curinst; i < m->num_varinfo; ++i) {
-               inst = m->varinfo [i];
+       curinst = cfg->locals_start;
+       for (i = curinst; i < cfg->num_varinfo; ++i) {
+               inst = cfg->varinfo [i];
                if ((inst->flags & MONO_INST_IS_DEAD) || inst->opcode == OP_REGVAR)
                        continue;
 
                /* inst->backend.is_pinvoke indicates native sized value types, this is used by the
                * pinvoke wrappers when they call functions returning structure */
-               if (inst->backend.is_pinvoke && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
-                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), &align);
+               if (inst->backend.is_pinvoke && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF) {
+                       guint32 ualign;
+                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), &ualign);
+                       align = ualign;
+               }
                else
                        size = mono_type_size (inst->inst_vtype, &align);
 
@@ -991,7 +1020,7 @@ mono_arch_allocate_vars (MonoCompile *m)
 
        curinst = 0;
        if (sig->hasthis) {
-               inst = m->args [curinst];
+               inst = cfg->args [curinst];
                if (inst->opcode != OP_REGVAR) {
                        inst->opcode = OP_REGOFFSET;
                        inst->inst_basereg = frame_reg;
@@ -1000,13 +1029,13 @@ mono_arch_allocate_vars (MonoCompile *m)
                        inst->inst_offset = offset;
                        offset += sizeof (gpointer);
                        if (sig->call_convention == MONO_CALL_VARARG)
-                               m->sig_cookie += sizeof (gpointer);
+                               cfg->sig_cookie += sizeof (gpointer);
                }
                curinst++;
        }
 
        for (i = 0; i < sig->param_count; ++i) {
-               inst = m->args [curinst];
+               inst = cfg->args [curinst];
                if (inst->opcode != OP_REGVAR) {
                        inst->opcode = OP_REGOFFSET;
                        inst->inst_basereg = frame_reg;
@@ -1021,7 +1050,7 @@ mono_arch_allocate_vars (MonoCompile *m)
                        inst->inst_offset = offset;
                        offset += size;
                        if ((sig->call_convention == MONO_CALL_VARARG) && (i < sig->sentinelpos)) 
-                               m->sig_cookie += size;
+                               cfg->sig_cookie += size;
                }
                curinst++;
        }
@@ -1031,8 +1060,7 @@ mono_arch_allocate_vars (MonoCompile *m)
        offset &= ~(8 - 1);
 
        /* change sign? */
-       m->stack_offset = offset;
-
+       cfg->stack_offset = offset;
 }
 
 void
@@ -1051,139 +1079,6 @@ mono_arch_create_vars (MonoCompile *cfg)
        }
 }
 
-/* 
- * take the arguments and generate the arch-specific
- * instructions to properly call the function in call.
- * This includes pushing, moving arguments to the right register
- * etc.
- * Issue: who does the spilling if needed, and when?
- */
-MonoCallInst*
-mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
-       MonoInst *arg, *in;
-       MonoMethodSignature *sig;
-       int i, n;
-       CallInfo *cinfo;
-       ArgInfo *ainfo;
-
-       sig = call->signature;
-       n = sig->param_count + sig->hasthis;
-       
-       cinfo = calculate_sizes (sig, sig->pinvoke);
-       if (cinfo->struct_ret)
-               call->used_iregs |= 1 << cinfo->struct_ret;
-
-       for (i = 0; i < n; ++i) {
-               ainfo = cinfo->args + i;
-               if ((sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
-                       MonoInst *sig_arg;
-                       cfg->disable_aot = TRUE;
-                               
-                       MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
-                       sig_arg->inst_p0 = call->signature;
-                       
-                       MONO_INST_NEW (cfg, arg, OP_OUTARG);
-                       arg->inst_imm = cinfo->sig_cookie.offset;
-                       arg->inst_left = sig_arg;
-                       
-                       /* prepend, so they get reversed */
-                       arg->next = call->out_args;
-                       call->out_args = arg;
-               }
-               if (is_virtual && i == 0) {
-                       /* the argument will be attached to the call instrucion */
-                       in = call->args [i];
-                       call->used_iregs |= 1 << ainfo->reg;
-               } else {
-                       MONO_INST_NEW (cfg, arg, OP_OUTARG);
-                       in = call->args [i];
-                       arg->cil_code = in->cil_code;
-                       arg->inst_left = in;
-                       arg->inst_right = (MonoInst*)call;
-                       arg->type = in->type;
-                       /* prepend, we'll need to reverse them later */
-                       arg->next = call->out_args;
-                       call->out_args = arg;
-                       if (ainfo->regtype == RegTypeGeneral) {
-                               arg->backend.reg3 = ainfo->reg;
-                               call->used_iregs |= 1 << ainfo->reg;
-                               if (arg->type == STACK_I8)
-                                       call->used_iregs |= 1 << (ainfo->reg + 1);
-                               if (arg->type == STACK_R8) {
-                                       if (ainfo->size == 4) {
-#ifndef MONO_ARCH_SOFT_FLOAT
-                                               arg->opcode = OP_OUTARG_R4;
-#endif
-                                       } else {
-                                               call->used_iregs |= 1 << (ainfo->reg + 1);
-                                       }
-                                       cfg->flags |= MONO_CFG_HAS_FPOUT;
-                               }
-                       } else if (ainfo->regtype == RegTypeStructByAddr) {
-                               /* FIXME: where si the data allocated? */
-                               arg->backend.reg3 = ainfo->reg;
-                               call->used_iregs |= 1 << ainfo->reg;
-                               g_assert_not_reached ();
-                       } else if (ainfo->regtype == RegTypeStructByVal) {
-                               int cur_reg;
-                               /* mark the used regs */
-                               for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
-                                       call->used_iregs |= 1 << (ainfo->reg + cur_reg);
-                               }
-                               arg->opcode = OP_OUTARG_VT;
-                               /* vtsize and offset have just 12 bits of encoding in number of words */
-                               g_assert (((ainfo->vtsize | (ainfo->offset / 4)) & 0xfffff000) == 0);
-                               arg->backend.arg_info = ainfo->reg | (ainfo->size << 4) | (ainfo->vtsize << 8) | ((ainfo->offset / 4) << 20);
-                       } else if (ainfo->regtype == RegTypeBase) {
-                               arg->opcode = OP_OUTARG_MEMBASE;
-                               arg->backend.arg_info = (ainfo->offset << 8) | ainfo->size;
-                       } else if (ainfo->regtype == RegTypeBaseGen) {
-                               call->used_iregs |= 1 << ARMREG_R3;
-                               arg->opcode = OP_OUTARG_MEMBASE;
-                               arg->backend.arg_info = (ainfo->offset << 8) | 0xff;
-                               if (arg->type == STACK_R8)
-                                       cfg->flags |= MONO_CFG_HAS_FPOUT;
-                       } else if (ainfo->regtype == RegTypeFP) {
-                               arg->backend.reg3 = ainfo->reg;
-                               /* FP args are passed in int regs */
-                               call->used_iregs |= 1 << ainfo->reg;
-                               if (ainfo->size == 8) {
-                                       arg->opcode = OP_OUTARG_R8;
-                                       call->used_iregs |= 1 << (ainfo->reg + 1);
-                               } else {
-                                       arg->opcode = OP_OUTARG_R4;
-                               }
-                               cfg->flags |= MONO_CFG_HAS_FPOUT;
-                       } else {
-                               g_assert_not_reached ();
-                       }
-               }
-       }
-       /*
-        * Reverse the call->out_args list.
-        */
-       {
-               MonoInst *prev = NULL, *list = call->out_args, *next;
-               while (list) {
-                       next = list->next;
-                       list->next = prev;
-                       prev = list;
-                       list = next;
-               }
-               call->out_args = prev;
-       }
-       call->stack_usage = cinfo->stack_usage;
-       cfg->param_area = MAX (cfg->param_area, cinfo->stack_usage);
-       cfg->flags |= MONO_CFG_HAS_CALLS;
-       /* 
-        * should set more info in call, such as the stack space
-        * used by the args that needs to be added back to esp
-        */
-
-       g_free (cinfo);
-       return call;
-}
-
 void
 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 {
@@ -1205,7 +1100,7 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
                        t = sig->params [i - sig->hasthis];
                else
                        t = &mono_defaults.int_class->byval_arg;
-               t = mono_type_get_underlying_type (t);
+               t = mini_type_get_underlying_type (NULL, t);
 
                if ((sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
                        /* FIXME: */
@@ -1400,13 +1295,13 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
        }
        //g_print ("vt size: %d at R%d + %d\n", doffset, vt->inst_basereg, vt->inst_offset);
        if (ovf_size != 0)
-               mini_emit_memcpy2 (cfg, ARMREG_SP, doffset, src->dreg, soffset, ovf_size * sizeof (gpointer), 0);
+               mini_emit_memcpy (cfg, ARMREG_SP, doffset, src->dreg, soffset, ovf_size * sizeof (gpointer), 0);
 }
 
 void
 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
 {
-       MonoType *ret = mono_type_get_underlying_type (mono_method_signature (method)->ret);
+       MonoType *ret = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
 
        if (!ret->byref) {
                if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
@@ -1433,6 +1328,21 @@ mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
                        MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
                        return;
                }                       
+#elif defined(ARM_FPU_VFP)
+               if (ret->type == MONO_TYPE_R8 || ret->type == MONO_TYPE_R4) {
+                       MonoInst *ins;
+
+                       MONO_INST_NEW (cfg, ins, OP_SETFRET);
+                       ins->dreg = cfg->ret->dreg;
+                       ins->sreg1 = val->dreg;
+                       MONO_ADD_INS (cfg->cbb, ins);
+                       return;
+               }
+#else
+               if (ret->type == MONO_TYPE_R4 || ret->type == MONO_TYPE_R8) {
+                       MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
+                       return;
+               }
 #endif
        }
 
@@ -1477,7 +1387,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
        int save_mode = SAVE_NONE;
        int offset;
        MonoMethod *method = cfg->method;
-       int rtype = mono_type_get_underlying_type (mono_method_signature (method)->ret)->type;
+       int rtype = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret)->type;
        int save_offset = cfg->param_area;
        save_offset += 7;
        save_offset &= ~7;
@@ -1619,24 +1529,6 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_MUL_IMM: 
                case OP_IMUL_IMM: 
                        /* Already done by an arch-independent pass */
-                       if (cfg->new_ir)
-                               break;
-
-                       /* remove unnecessary multiplication with 1 */
-                       if (ins->inst_imm == 1) {
-                               if (ins->dreg != ins->sreg1) {
-                                       ins->opcode = OP_MOVE;
-                               } else {
-                                       MONO_DELETE_INS (bb, ins);
-                                       continue;
-                               }
-                       } else {
-                               int power2 = mono_is_power_of_two (ins->inst_imm);
-                               if (power2 > 0) {
-                                       ins->opcode = OP_SHL_IMM;
-                                       ins->inst_imm = power2;
-                               }
-                       }
                        break;
                case OP_LOAD_MEMBASE:
                case OP_LOADI4_MEMBASE:
@@ -1854,10 +1746,6 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
        MonoInst *ins, *temp, *last_ins = NULL;
        int rot_amount, imm8, low_imm;
 
-       /* setup the virtual reg allocator */
-       if (bb->max_vreg > cfg->rs->next_vreg)
-               cfg->rs->next_vreg = bb->max_vreg;
-
        MONO_BB_FOR_EACH_INS (bb, ins) {
 loop_start:
                switch (ins->opcode) {
@@ -1882,14 +1770,14 @@ loop_start:
                        if ((imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount)) < 0) {
                                NEW_INS (cfg, temp, OP_ICONST);
                                temp->inst_c0 = ins->inst_imm;
-                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               temp->dreg = mono_alloc_ireg (cfg);
                                ins->sreg2 = temp->dreg;
-                               if (cfg->new_ir)
-                                       ins->opcode = mono_op_imm_to_op (ins->opcode);
-                               else
-                                       ins->opcode = map_to_reg_reg_op (ins->opcode);
+                               ins->opcode = mono_op_imm_to_op (ins->opcode);
                        }
-                       break;
+                       if (ins->opcode == OP_SBB || ins->opcode == OP_ISBB || ins->opcode == OP_SUBCC)
+                               goto loop_start;
+                       else
+                               break;
                case OP_MUL_IMM:
                case OP_IMUL_IMM:
                        if (ins->inst_imm == 1) {
@@ -1909,14 +1797,22 @@ loop_start:
                        }
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
                        ins->opcode = OP_IMUL;
                        break;
+               case OP_SBB:
+               case OP_ISBB:
+               case OP_SUBCC:
+               case OP_ISUBCC:
+                       if (ins->next  && (ins->next->opcode == OP_COND_EXC_C || ins->next->opcode == OP_COND_EXC_IC))
+                               /* ARM sets the C flag to 1 if there was _no_ overflow */
+                               ins->next->opcode = OP_COND_EXC_NC;
+                       break;
                case OP_LOCALLOC_IMM:
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg1 = temp->dreg;
                        ins->opcode = OP_LOCALLOC;
                        break;
@@ -1933,7 +1829,7 @@ loop_start:
                                break;
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_offset;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                        break;
@@ -1944,7 +1840,7 @@ loop_start:
                                break;
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_offset;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                        break;
@@ -1957,7 +1853,7 @@ loop_start:
                                NEW_INS (cfg, temp, OP_ADD_IMM);
                                temp->inst_imm = ins->inst_offset & ~0x1ff;
                                temp->sreg1 = ins->inst_basereg;
-                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               temp->dreg = mono_alloc_ireg (cfg);
                                ins->inst_basereg = temp->dreg;
                                ins->inst_offset = low_imm;
                                break;
@@ -1972,7 +1868,7 @@ loop_start:
                                break;
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_offset;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                        break;
@@ -1981,7 +1877,7 @@ loop_start:
                                break;
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_offset;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                        break;
@@ -1994,7 +1890,7 @@ loop_start:
                                NEW_INS (cfg, temp, OP_ADD_IMM);
                                temp->inst_imm = ins->inst_offset & ~0x1ff;
                                temp->sreg1 = ins->inst_destbasereg;
-                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               temp->dreg = mono_alloc_ireg (cfg);
                                ins->inst_destbasereg = temp->dreg;
                                ins->inst_offset = low_imm;
                                break;
@@ -2009,7 +1905,7 @@ loop_start:
                case OP_STOREI4_MEMBASE_IMM:
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg1 = temp->dreg;
                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                        last_ins = temp;
@@ -2052,8 +1948,7 @@ loop_start:
                last_ins = ins;
        }
        bb->last_ins = last_ins;
-       bb->max_vreg = cfg->rs->next_vreg;
-
+       bb->max_vreg = cfg->next_vreg;
 }
 
 static guchar*
@@ -2167,12 +2062,12 @@ handle_thunk (int absolute, guchar *code, const guchar *target) {
        pdata.found = 0;
 
        mono_domain_lock (domain);
-       mono_code_manager_foreach (domain->code_mp, search_thunk_slot, &pdata);
+       mono_domain_code_foreach (domain, search_thunk_slot, &pdata);
 
        if (!pdata.found) {
                /* this uses the first available slot */
                pdata.found = 2;
-               mono_code_manager_foreach (domain->code_mp, search_thunk_slot, &pdata);
+               mono_domain_code_foreach (domain, search_thunk_slot, &pdata);
        }
        mono_domain_unlock (domain);
 
@@ -2334,7 +2229,7 @@ arm_patch (guchar *code, const guchar *target)
  * to be used with the emit macros.
  * Return -1 otherwise.
  */
-static int
+int
 mono_arm_is_rotated_imm8 (guint32 val, gint *rot_amount)
 {
        guint32 res, i;
@@ -2481,8 +2376,7 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
                                // FIXME:
                                NOT_IMPLEMENTED;
                        } else if (ainfo->regtype == RegTypeBase) {
-                               // FIXME:
-                               NOT_IMPLEMENTED;
+                               /* Nothing to do */
                        } else if (ainfo->regtype == RegTypeFP) {
                                g_assert_not_reached ();
                        } else if (ainfo->regtype == RegTypeStructByVal) {
@@ -2519,6 +2413,8 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
        return code;
 }
 
+#ifndef DISABLE_JIT
+
 void
 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 {
@@ -2573,7 +2469,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_MEMORY_BARRIER:
                        break;
                case OP_TLS_GET:
+#ifdef HAVE_AEABI_READ_TP
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+                                                                (gpointer)"__aeabi_read_tp");
+                       code = emit_call_seq (cfg, code);
+
+                       ARM_LDR_IMM (code, ins->dreg, ARMREG_R0, ins->inst_offset);
+#else
                        g_assert_not_reached ();
+#endif
                        break;
                /*case OP_BIGMUL:
                        ppc_mullw (code, ppc_r4, ins->sreg1, ins->sreg2);
@@ -2712,6 +2616,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                                                 (gpointer)"mono_break");
                        code = emit_call_seq (cfg, code);
                        break;
+               case OP_RELAXED_NOP:
+                       ARM_NOP (code);
+                       break;
                case OP_NOP:
                case OP_DUMMY_USE:
                case OP_DUMMY_STORE:
@@ -2899,10 +2806,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_IMUL_OVF:
                        /* FIXME: handle ovf/ sreg2 != dreg */
                        ARM_MUL_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       /* FIXME: MUL doesn't set the C/O flags on ARM */
                        break;
                case OP_IMUL_OVF_UN:
                        /* FIXME: handle ovf/ sreg2 != dreg */
                        ARM_MUL_REG_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       /* FIXME: MUL doesn't set the C/O flags on ARM */
                        break;
                case OP_ICONST:
                        code = mono_arm_emit_load_imm (code, ins->dreg, ins->inst_c0);
@@ -2961,7 +2870,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        code = emit_big_add (code, ARMREG_SP, cfg->frame_reg, cfg->stack_usage);
                        ARM_POP_NWB (code, cfg->used_int_regs | ((1 << ARMREG_SP)) | ((1 << ARMREG_LR)));
                        mono_add_patch_info (cfg, (guint8*) code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
-                       ARM_B (code, 0);
+                       if (cfg->compile_aot) {
+                               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
+                               ARM_B (code, 0);
+                               *(gpointer*)code = NULL;
+                               code += 4;
+                               ARM_LDR_REG_REG (code, ARMREG_PC, ARMREG_PC, ARMREG_IP);
+                       } else {
+                               ARM_B (code, 0);
+                       }
                        break;
                case OP_CHECK_THIS:
                        /* ensure ins->sreg1 is not NULL */
@@ -3014,17 +2931,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (call->method->klass->flags & TYPE_ATTRIBUTE_INTERFACE) {
                                ARM_ADD_REG_IMM8 (code, ARMREG_LR, ARMREG_PC, 4);
                                ARM_LDR_IMM (code, ARMREG_PC, ins->sreg1, ins->inst_offset);
-                               if (cfg->compile_aot) {
-                                       /* 
-                                        * We can't embed the method in the code stream in PIC code. Instead,
-                                        * we put it in V5 in code emitted by mono_arch_emit_imt_argument (),
-                                        * and embed NULL here to signal the IMT thunk that the call is made
-                                        * from AOT code.
-                                        */
+                               /* 
+                                * We can't embed the method in the code stream in PIC code, or
+                                * in gshared code.
+                                * Instead, we put it in V5 in code emitted by 
+                                * mono_arch_emit_imt_argument (), and embed NULL here to 
+                                * signal the IMT thunk that the value is in V5.
+                                */
+                               if (call->dynamic_imt_arg)
                                        *((gpointer*)code) = NULL;
-                               } else {
+                               else
                                        *((gpointer*)code) = (gpointer)call->method;
-                               }
                                code += 4;
                        } else {
                                ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
@@ -3032,9 +2949,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        code = emit_move_return_value (cfg, ins, code);
                        break;
-               case OP_OUTARG:
-                       g_assert_not_reached ();
-                       break;
                case OP_LOCALLOC: {
                        /* keep alignment */
                        int alloca_waste = cfg->param_area;
@@ -3158,8 +3072,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         * After follows the data.
                         * FIXME: add aot support.
                         */
-                       if (cfg->new_ir)
-                               mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_SWITCH, ins->inst_p0);
+                       mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_SWITCH, ins->inst_p0);
                        max_len += 4 * GPOINTER_TO_INT (ins->klass);
                        if (offset > (cfg->code_size - max_len - 16)) {
                                cfg->code_size += max_len;
@@ -3221,14 +3134,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_SYSTEM_EXCEPTION (ins->opcode - OP_COND_EXC_IEQ, ins->inst_p1);
                        break;
                case OP_COND_EXC_C:
-               case OP_COND_EXC_OV:
-               case OP_COND_EXC_NC:
-               case OP_COND_EXC_NO:
                case OP_COND_EXC_IC:
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (ARMCOND_CS, ins->inst_p1);
+                       break;
+               case OP_COND_EXC_OV:
                case OP_COND_EXC_IOV:
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (ARMCOND_VS, ins->inst_p1);
+                       break;
+               case OP_COND_EXC_NC:
                case OP_COND_EXC_INC:
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (ARMCOND_CC, ins->inst_p1);
+                       break;
+               case OP_COND_EXC_NO:
                case OP_COND_EXC_INO:
-                       /* FIXME: */
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (ARMCOND_VC, ins->inst_p1);
                        break;
                case OP_IBEQ:
                case OP_IBNE_UN:
@@ -3276,6 +3195,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        /* This is generated by the local regalloc pass which runs after the lowering pass */
                        if (!arm_is_fpimm8 (ins->inst_offset)) {
                                code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                               ARM_ADD_REG_REG (code, ARMREG_LR, ARMREG_LR, ins->inst_destbasereg);
                                ARM_STFD (code, ins->sreg1, ARMREG_LR, 0);
                        } else {
                                ARM_STFD (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
@@ -3285,6 +3205,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        /* This is generated by the local regalloc pass which runs after the lowering pass */
                        if (!arm_is_fpimm8 (ins->inst_offset)) {
                                code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                               ARM_ADD_REG_REG (code, ARMREG_LR, ARMREG_LR, ins->inst_basereg);
                                ARM_LDFD (code, ins->dreg, ARMREG_LR, 0);
                        } else {
                                ARM_LDFD (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
@@ -3330,7 +3251,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_ICONV_TO_R8:
                        ARM_FLTD (code, ins->dreg, ins->sreg1);
                        break;
+
 #elif defined(ARM_FPU_VFP)
+
                case OP_R8CONST:
                        if (cfg->compile_aot) {
                                ARM_FLDD (code, ins->dreg, ARMREG_PC, 0);
@@ -3361,34 +3284,60 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_STORER8_MEMBASE_REG:
-                       g_assert (arm_is_fpimm8 (ins->inst_offset));
-                       ARM_FSTD (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
+                       /* This is generated by the local regalloc pass which runs after the lowering pass */
+                       if (!arm_is_fpimm8 (ins->inst_offset)) {
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                               ARM_ADD_REG_REG (code, ARMREG_LR, ARMREG_LR, ins->inst_destbasereg);
+                               ARM_FSTD (code, ins->sreg1, ARMREG_LR, 0);
+                       } else {
+                               ARM_FSTD (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
+                       }
                        break;
                case OP_LOADR8_MEMBASE:
-                       g_assert (arm_is_fpimm8 (ins->inst_offset));
-                       ARM_FLDD (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                       /* This is generated by the local regalloc pass which runs after the lowering pass */
+                       if (!arm_is_fpimm8 (ins->inst_offset)) {
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                               ARM_ADD_REG_REG (code, ARMREG_LR, ARMREG_LR, ins->inst_basereg);
+                               ARM_FLDD (code, ins->dreg, ARMREG_LR, 0);
+                       } else {
+                               ARM_FLDD (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                       }
                        break;
                case OP_STORER4_MEMBASE_REG:
                        g_assert (arm_is_fpimm8 (ins->inst_offset));
-                       ARM_FSTS (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
+                       ARM_CVTD (code, ARM_VFP_F0, ins->sreg1);
+                       ARM_FSTS (code, ARM_VFP_F0, ins->inst_destbasereg, ins->inst_offset);
                        break;
                case OP_LOADR4_MEMBASE:
                        g_assert (arm_is_fpimm8 (ins->inst_offset));
-                       ARM_FLDS (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                       ARM_FLDS (code, ARM_VFP_F0, ins->inst_basereg, ins->inst_offset);
+                       ARM_CVTS (code, ins->dreg, ARM_VFP_F0);
                        break;
                case OP_ICONV_TO_R_UN: {
                        g_assert_not_reached ();
                        break;
                }
                case OP_ICONV_TO_R4:
-                       g_assert_not_reached ();
-                       //ARM_FLTS (code, ins->dreg, ins->sreg1);
+                       ARM_FMSR (code, ARM_VFP_F0, ins->sreg1);
+                       ARM_FUITOS (code, ARM_VFP_F0, ARM_VFP_F0);
+                       ARM_CVTS (code, ins->dreg, ARM_VFP_F0);
                        break;
                case OP_ICONV_TO_R8:
-                       g_assert_not_reached ();
-                       //ARM_FLTD (code, ins->dreg, ins->sreg1);
+                       ARM_FMSR (code, ARM_VFP_F0, ins->sreg1);
+                       ARM_FUITOD (code, ins->dreg, ARM_VFP_F0);
                        break;
+
+               case OP_SETFRET:
+                       if (mono_method_signature (cfg->method)->ret->type == MONO_TYPE_R4) {
+                               ARM_CVTD (code, ARM_VFP_F0, ins->sreg1);
+                               ARM_FMRS (code, ARMREG_R0, ARM_VFP_F0);
+                       } else {
+                               ARM_FMRRD (code, ARMREG_R0, ARMREG_R1, ins->sreg1);
+                       }
+                       break;
+
 #endif
+
                case OP_FCONV_TO_I1:
                        code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, TRUE);
                        break;
@@ -3420,30 +3369,33 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_LCONV_TO_OVF_I:
                case OP_LCONV_TO_OVF_I4_2: {
-#if ARM_PORT
-                       guint32 *negative_branch, *msword_positive_branch, *msword_negative_branch, *ovf_ex_target;
-                       // Check if its negative
-                       ppc_cmpi (code, 0, 0, ins->sreg1, 0);
-                       negative_branch = code;
-                       ppc_bc (code, PPC_BR_TRUE, PPC_BR_LT, 0);
-                       // Its positive msword == 0
-                       ppc_cmpi (code, 0, 0, ins->sreg2, 0);
-                       msword_positive_branch = code;
-                       ppc_bc (code, PPC_BR_TRUE, PPC_BR_EQ, 0);
-
-                       ovf_ex_target = code;
-                       //EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_ALWAYS, 0, "OverflowException");
-                       // Negative
-                       ppc_patch (negative_branch, code);
-                       ppc_cmpi (code, 0, 0, ins->sreg2, -1);
-                       msword_negative_branch = code;
-                       ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
-                       ppc_patch (msword_negative_branch, ovf_ex_target);
+                       guint32 *high_bit_not_set, *valid_negative, *invalid_negative, *valid_positive;
+                       /* 
+                        * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
+                        */
+
+                       ARM_CMP_REG_IMM8 (code, ins->sreg1, 0);
+                       high_bit_not_set = code;
+                       ARM_B_COND (code, ARMCOND_GE, 0); /*branch if bit 31 of the lower part is not set*/
+
+                       ARM_CMN_REG_IMM8 (code, ins->sreg2, 1); /*This have the same effect as CMP reg, 0xFFFFFFFF */
+                       valid_negative = code;
+                       ARM_B_COND (code, ARMCOND_EQ, 0); /*branch if upper part == 0xFFFFFFFF (lower part has bit 31 set) */
+                       invalid_negative = code;
+                       ARM_B_COND (code, ARMCOND_AL, 0);
                        
-                       ppc_patch (msword_positive_branch, code);
-                       if (ins->dreg != ins->sreg1)
-                               ppc_mr (code, ins->dreg, ins->sreg1);
-#endif
+                       arm_patch (high_bit_not_set, code);
+
+                       ARM_CMP_REG_IMM8 (code, ins->sreg2, 0);
+                       valid_positive = code;
+                       ARM_B_COND (code, ARMCOND_EQ, 0); /*branch if upper part == 0 (lower part has bit 31 clear)*/
+
+                       arm_patch (invalid_negative, code);
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (ARMCOND_AL, "OverflowException");
+
+                       arm_patch (valid_negative, code);
+                       arm_patch (valid_positive, code);
+
                        if (ins->dreg != ins->sreg1)
                                ARM_MOV_REG_REG (code, ins->dreg, ins->sreg1);
                        break;
@@ -3490,6 +3442,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
 #elif defined(ARM_FPU_VFP)
                        ARM_CMPD (code, ins->sreg1, ins->sreg2);
+                       ARM_FMSTAT (code);
 #endif
                        break;
                case OP_FCEQ:
@@ -3497,6 +3450,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
 #elif defined(ARM_FPU_VFP)
                        ARM_CMPD (code, ins->sreg1, ins->sreg2);
+                       ARM_FMSTAT (code);
 #endif
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 0, ARMCOND_NE);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_EQ);
@@ -3506,6 +3460,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
 #elif defined(ARM_FPU_VFP)
                        ARM_CMPD (code, ins->sreg1, ins->sreg2);
+                       ARM_FMSTAT (code);
 #endif
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_MI);
@@ -3515,6 +3470,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
 #elif defined(ARM_FPU_VFP)
                        ARM_CMPD (code, ins->sreg1, ins->sreg2);
+                       ARM_FMSTAT (code);
 #endif
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_MI);
@@ -3526,6 +3482,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
 #elif defined(ARM_FPU_VFP)
                        ARM_CMPD (code, ins->sreg2, ins->sreg1);
+                       ARM_FMSTAT (code);
 #endif
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_MI);
@@ -3536,6 +3493,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
 #elif defined(ARM_FPU_VFP)
                        ARM_CMPD (code, ins->sreg2, ins->sreg1);
+                       ARM_FMSTAT (code);
 #endif
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_MI);
@@ -3567,6 +3525,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert_not_reached ();
                        break;
                case OP_FBGE:
+                       /* FIXME does VFP requires both conds?
+                        * FPA requires EQ even thou the docs suggests that just CS is enough
+                        */
+                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_EQ);
                        EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_CS);
                        break;
                case OP_FBGE_UN:
@@ -3578,8 +3540,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 #ifdef ARM_FPU_FPA
                        if (ins->dreg != ins->sreg1)
                                ARM_MVFD (code, ins->dreg, ins->sreg1);
-#else
-                       g_assert_not_reached ();
+#elif defined(ARM_FPU_VFP)
+                       ARM_CPYD (code, ins->dreg, ins->sreg1);
 #endif
                        break;
                }
@@ -3603,9 +3565,22 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        cfg->code_len = code - cfg->native_code;
 }
 
+#endif /* DISABLE_JIT */
+
+#ifdef HAVE_AEABI_READ_TP
+void __aeabi_read_tp (void);
+#endif
+
 void
 mono_arch_register_lowlevel_calls (void)
 {
+       /* The signature doesn't matter */
+       mono_register_jit_icall (mono_arm_throw_exception, "mono_arm_throw_exception", mono_create_icall_signature ("void"), TRUE);
+       mono_register_jit_icall (mono_arm_throw_exception_by_token, "mono_arm_throw_exception_by_token", mono_create_icall_signature ("void"), TRUE);
+
+#ifdef HAVE_AEABI_READ_TP
+       mono_register_jit_icall (__aeabi_read_tp, "__aeabi_read_tp", mono_create_icall_signature ("void"), TRUE);
+#endif
 }
 
 #define patch_lis_ori(ip,val) do {\
@@ -3719,7 +3694,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        CallInfo *cinfo;
        int tracing = 0;
        int lmf_offset = 0;
-       int prev_sp_offset;
+       int prev_sp_offset, reg_offset;
 
        if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
                tracing = 1;
@@ -3728,21 +3703,40 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        cfg->code_size = 256 + sig->param_count * 20;
        code = cfg->native_code = g_malloc (cfg->code_size);
 
+       mono_emit_unwind_op_def_cfa (cfg, code, ARMREG_SP, 0);
+
        ARM_MOV_REG_REG (code, ARMREG_IP, ARMREG_SP);
 
        alloc_size = cfg->stack_offset;
        pos = 0;
 
        if (!method->save_lmf) {
+               /* We save SP by storing it into IP and saving IP */
                ARM_PUSH (code, (cfg->used_int_regs | (1 << ARMREG_IP) | (1 << ARMREG_LR)));
                prev_sp_offset = 8; /* ip and lr */
                for (i = 0; i < 16; ++i) {
                        if (cfg->used_int_regs & (1 << i))
                                prev_sp_offset += 4;
                }
+               mono_emit_unwind_op_def_cfa_offset (cfg, code, prev_sp_offset);
+               reg_offset = 0;
+               for (i = 0; i < 16; ++i) {
+                       if ((cfg->used_int_regs & (1 << i)) || (i == ARMREG_IP) || (i == ARMREG_LR)) {
+                               mono_emit_unwind_op_offset (cfg, code, i, (- prev_sp_offset) + reg_offset);
+                               reg_offset += 4;
+                       }
+               }
        } else {
                ARM_PUSH (code, 0x5ff0);
                prev_sp_offset = 4 * 10; /* all but r0-r3, sp and pc */
+               mono_emit_unwind_op_def_cfa_offset (cfg, code, prev_sp_offset);
+               reg_offset = 0;
+               for (i = 0; i < 16; ++i) {
+                       if ((i > ARMREG_R3) && (i != ARMREG_SP) && (i != ARMREG_PC)) {
+                               mono_emit_unwind_op_offset (cfg, code, i, (- prev_sp_offset) + reg_offset);
+                               reg_offset += 4;
+                       }
+               }
                pos += sizeof (MonoLMF) - prev_sp_offset;
                lmf_offset = pos;
        }
@@ -3764,9 +3758,12 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        code = mono_arm_emit_load_imm (code, ARMREG_IP, alloc_size);
                        ARM_SUB_REG_REG (code, ARMREG_SP, ARMREG_SP, ARMREG_IP);
                }
+               mono_emit_unwind_op_def_cfa_offset (cfg, code, prev_sp_offset + alloc_size);
        }
-       if (cfg->frame_reg != ARMREG_SP)
+       if (cfg->frame_reg != ARMREG_SP) {
                ARM_MOV_REG_REG (code, cfg->frame_reg, ARMREG_SP);
+               mono_emit_unwind_op_def_cfa_reg (cfg, code, cfg->frame_reg);
+       }
        //g_print ("prev_sp_offset: %d, alloc_size:%d\n", prev_sp_offset, alloc_size);
        prev_sp_offset += alloc_size;
 
@@ -3786,6 +3783,20 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
        }
 
+       /* store runtime generic context */
+       if (cfg->rgctx_var) {
+               MonoInst *ins = cfg->rgctx_var;
+
+               g_assert (ins->opcode == OP_REGOFFSET);
+
+               if (arm_is_imm12 (ins->inst_offset)) {
+                       ARM_STR_IMM (code, MONO_ARCH_RGCTX_REG, ins->inst_basereg, ins->inst_offset);
+               } else {
+                       code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                       ARM_STR_REG_REG (code, MONO_ARCH_RGCTX_REG, ins->inst_basereg, ARMREG_LR);
+               }
+       }
+
        /* load arguments allocated to register from the stack */
        pos = 0;
 
@@ -3922,8 +3933,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                int soffset = 0;
                                int cur_reg;
                                int size = 0;
-                               if (mono_class_from_mono_type (inst->inst_vtype))
-                                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL);
+                               size = mini_type_stack_size_full (cfg->generic_sharing_context, inst->inst_vtype, NULL, sig->pinvoke);
                                for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
                                        if (arm_is_imm12 (doffset)) {
                                                ARM_STR_IMM (code, ainfo->reg + cur_reg, inst->inst_basereg, doffset);
@@ -3949,11 +3959,35 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                pos++;
        }
 
-       if (method->save_lmf) {
-
+       if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
+               code = mono_arm_emit_load_imm (code, ARMREG_R0, (guint32)cfg->domain);
                mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
-                            (gpointer)"mono_get_lmf_addr");
+                            (gpointer)"mono_jit_thread_attach");
                code = emit_call_seq (cfg, code);
+       }
+
+       if (method->save_lmf) {
+               gboolean get_lmf_fast = FALSE;
+
+#ifdef HAVE_AEABI_READ_TP
+               gint32 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
+
+               if (lmf_addr_tls_offset != -1) {
+                       get_lmf_fast = TRUE;
+
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+                                                                (gpointer)"__aeabi_read_tp");
+                       code = emit_call_seq (cfg, code);
+
+                       ARM_LDR_IMM (code, ARMREG_R0, ARMREG_R0, lmf_addr_tls_offset);
+                       get_lmf_fast = TRUE;
+               }
+#endif
+               if (!get_lmf_fast) {
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+                                                                (gpointer)"mono_get_lmf_addr");
+                       code = emit_call_seq (cfg, code);
+               }
                /* we build the MonoLMF structure on the stack - see mini-arm.h */
                /* lmf_offset is the offset from the previous stack pointer,
                 * alloc_size is the total stack space allocated, so the offset
@@ -4154,51 +4188,22 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
 
 }
 
-void
-mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
-{
-}
+static gboolean tls_offset_inited = FALSE;
 
 void
-mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
-{
-}
-
-void
-mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
+mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
 {
-       
-       int this_dreg = ARMREG_R0;
-       
-       if (vt_reg != -1)
-               this_dreg = ARMREG_R1;
-
-       /* add the this argument */
-       if (this_reg != -1) {
-               MonoInst *this;
-               MONO_INST_NEW (cfg, this, OP_MOVE);
-               this->type = this_type;
-               this->sreg1 = this_reg;
-               this->dreg = mono_regstate_next_int (cfg->rs);
-               mono_bblock_add_inst (cfg->cbb, this);
-               mono_call_inst_add_outarg_reg (cfg, inst, this->dreg, this_dreg, FALSE);
-       }
+       if (!tls_offset_inited) {
+               tls_offset_inited = TRUE;
 
-       if (vt_reg != -1) {
-               MonoInst *vtarg;
-               MONO_INST_NEW (cfg, vtarg, OP_MOVE);
-               vtarg->type = STACK_MP;
-               vtarg->sreg1 = vt_reg;
-               vtarg->dreg = mono_regstate_next_int (cfg->rs);
-               mono_bblock_add_inst (cfg->cbb, vtarg);
-               mono_call_inst_add_outarg_reg (cfg, inst, vtarg->dreg, ARMREG_R0, FALSE);
+               lmf_tls_offset = mono_get_lmf_tls_offset ();
+               lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
        }
 }
 
-MonoInst*
-mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
+void
+mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
 {
-       return NULL;
 }
 
 MonoInst*
@@ -4214,15 +4219,16 @@ mono_arch_print_tree (MonoInst *tree, int arity)
        return 0;
 }
 
-MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
+MonoInst*
+mono_arch_get_domain_intrinsic (MonoCompile* cfg)
 {
-       return NULL;
+       return mono_get_domain_intrinsic (cfg);
 }
 
-MonoInst* 
+MonoInst*
 mono_arch_get_thread_intrinsic (MonoCompile* cfg)
 {
-       return NULL;
+       return mono_get_thread_intrinsic (cfg);
 }
 
 guint32
@@ -4240,20 +4246,19 @@ mono_arch_flush_register_windows (void)
 void
 mono_arch_fixup_jinfo (MonoCompile *cfg)
 {
-       /* max encoded stack usage is 64KB * 4 */
-       g_assert ((cfg->stack_usage & ~(0xffff << 2)) == 0);
-       cfg->jit_info->used_regs |= cfg->stack_usage << 14;
 }
 
 #ifdef MONO_ARCH_HAVE_IMT
 
 void
-mono_arch_emit_imt_argument (MonoCompile *cfg, MonoCallInst *call)
+mono_arch_emit_imt_argument (MonoCompile *cfg, MonoCallInst *call, MonoInst *imt_arg)
 {
        if (cfg->compile_aot) {
-               int method_reg = mono_regstate_next_int (cfg->rs);
+               int method_reg = mono_alloc_ireg (cfg);
                MonoInst *ins;
 
+               call->dynamic_imt_arg = TRUE;
+
                MONO_INST_NEW (cfg, ins, OP_AOTCONST);
                ins->dreg = method_reg;
                ins->inst_p0 = call->method;
@@ -4261,6 +4266,26 @@ mono_arch_emit_imt_argument (MonoCompile *cfg, MonoCallInst *call)
                MONO_ADD_INS (cfg->cbb, ins);
 
                mono_call_inst_add_outarg_reg (cfg, call, method_reg, ARMREG_V5, FALSE);
+       } else if (cfg->generic_context) {
+
+               /* Always pass in a register for simplicity */
+               call->dynamic_imt_arg = TRUE;
+
+               cfg->uses_rgctx_reg = TRUE;
+
+               if (imt_arg) {
+                       mono_call_inst_add_outarg_reg (cfg, call, imt_arg->dreg, ARMREG_V5, FALSE);
+               } else {
+                       MonoInst *ins;
+                       int method_reg = mono_alloc_preg (cfg);
+
+                       MONO_INST_NEW (cfg, ins, OP_PCONST);
+                       ins->inst_p0 = call->method;
+                       ins->dreg = method_reg;
+                       MONO_ADD_INS (cfg->cbb, ins);
+
+                       mono_call_inst_add_outarg_reg (cfg, call, method_reg, ARMREG_V5, FALSE);
+               }
        }
 }
 
@@ -4287,6 +4312,11 @@ mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSha
        return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), (gssize*)regs, NULL);
 }
 
+MonoVTable*
+mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code)
+{
+       return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
+}
 
 #define ENABLE_WRONG_METHOD_CHECK 0
 #define BASE_SIZE (6 * 4)
@@ -4309,16 +4339,30 @@ arm_emit_value_and_patch_ldr (arminstr_t *code, arminstr_t *target, guint32 valu
 }
 
 gpointer
-mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
+mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
+       gpointer fail_tramp)
 {
        int size, i, extra_space = 0;
        arminstr_t *code, *start, *vtable_target = NULL;
+       gboolean large_offsets = FALSE;
+       guint32 **constant_pool_starts;
+
        size = BASE_SIZE;
+       constant_pool_starts = g_new0 (guint32*, count);
+
+       /* 
+        * We might be called with a fail_tramp from the IMT builder code even if
+        * MONO_ARCH_HAVE_GENERALIZED_IMT_THUNK is not defined.
+        */
+       //g_assert (!fail_tramp);
 
        for (i = 0; i < count; ++i) {
                MonoIMTCheckItem *item = imt_entries [i];
                if (item->is_equals) {
-                       g_assert (arm_is_imm12 (DISTANCE (vtable, &vtable->vtable[item->vtable_slot])));
+                       if (!arm_is_imm12 (DISTANCE (vtable, &vtable->vtable[item->value.vtable_slot]))) {
+                               item->chunk_size += 32;
+                               large_offsets = TRUE;
+                       }
 
                        if (item->check_target_idx) {
                                if (!item->compare_done)
@@ -4337,17 +4381,23 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                size += item->chunk_size;
        }
 
-       start = code = mono_code_manager_reserve (domain->code_mp, size);
+       if (large_offsets)
+               size += 4 * count; /* The ARM_ADD_REG_IMM to pop the stack */
+
+       start = code = mono_domain_code_reserve (domain, size);
 
 #if DEBUG_IMT
        printf ("building IMT thunk for class %s %s entries %d code size %d code at %p end %p vtable %p\n", vtable->klass->name_space, vtable->klass->name, count, size, start, ((guint8*)start) + size, vtable);
        for (i = 0; i < count; ++i) {
                MonoIMTCheckItem *item = imt_entries [i];
-               printf ("method %d (%p) %s vtable slot %p is_equals %d chunk size %d\n", i, item->method, item->method->name, &vtable->vtable [item->vtable_slot], item->is_equals, item->chunk_size);
+               printf ("method %d (%p) %s vtable slot %p is_equals %d chunk size %d\n", i, item->key, item->key->name, &vtable->vtable [item->value.vtable_slot], item->is_equals, item->chunk_size);
        }
 #endif
 
-       ARM_PUSH2 (code, ARMREG_R0, ARMREG_R1);
+       if (large_offsets)
+               ARM_PUSH4 (code, ARMREG_R0, ARMREG_R1, ARMREG_IP, ARMREG_PC);
+       else
+               ARM_PUSH2 (code, ARMREG_R0, ARMREG_R1);
        ARM_LDR_IMM (code, ARMREG_R0, ARMREG_LR, -4);
        vtable_target = code;
        ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
@@ -4358,7 +4408,9 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
 
        for (i = 0; i < count; ++i) {
                MonoIMTCheckItem *item = imt_entries [i];
-               arminstr_t *imt_method = NULL;
+               arminstr_t *imt_method = NULL, *vtable_offset_ins = NULL;
+               gint32 vtable_offset;
+
                item->code_target = (guint8*)code;
 
                if (item->is_equals) {
@@ -4370,9 +4422,6 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                }
                                item->jmp_code = (guint8*)code;
                                ARM_B_COND (code, ARMCOND_NE, 0);
-
-                               ARM_POP2 (code, ARMREG_R0, ARMREG_R1);
-                               ARM_LDR_IMM (code, ARMREG_PC, ARMREG_IP, DISTANCE (vtable, &vtable->vtable[item->vtable_slot]));
                        } else {
                                /*Enable the commented code to assert on wrong method*/
 #if ENABLE_WRONG_METHOD_CHECK
@@ -4380,17 +4429,38 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                ARM_LDR_IMM (code, ARMREG_R1, ARMREG_PC, 0);
                                ARM_CMP_REG_REG (code, ARMREG_R0, ARMREG_R1);
                                ARM_B_COND (code, ARMCOND_NE, 1);
-#endif
-                               ARM_POP2 (code, ARMREG_R0, ARMREG_R1);
-                               ARM_LDR_IMM (code, ARMREG_PC, ARMREG_IP, DISTANCE (vtable, &vtable->vtable[item->vtable_slot]));
 
-#if ENABLE_WRONG_METHOD_CHECK
                                ARM_DBRK (code);
 #endif
                        }
 
+                       vtable_offset = DISTANCE (vtable, &vtable->vtable[item->value.vtable_slot]);
+                       if (!arm_is_imm12 (vtable_offset)) {
+                               /* 
+                                * We need to branch to a computed address but we don't have
+                                * a free register to store it, since IP must contain the 
+                                * vtable address. So we push the two values to the stack, and
+                                * load them both using LDM.
+                                */
+                               /* Compute target address */
+                               vtable_offset_ins = code;
+                               ARM_LDR_IMM (code, ARMREG_R1, ARMREG_PC, 0);
+                               ARM_LDR_REG_REG (code, ARMREG_R1, ARMREG_IP, ARMREG_R1);
+                               /* Save it to the fourth slot */
+                               ARM_STR_IMM (code, ARMREG_R1, ARMREG_SP, 3 * sizeof (gpointer));
+                               /* Restore registers and branch */
+                               ARM_POP4 (code, ARMREG_R0, ARMREG_R1, ARMREG_IP, ARMREG_PC);
+                               
+                               code = arm_emit_value_and_patch_ldr (code, vtable_offset_ins, vtable_offset);
+                       } else {
+                               ARM_POP2 (code, ARMREG_R0, ARMREG_R1);
+                               if (large_offsets)
+                                       ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 2 * sizeof (gpointer));
+                               ARM_LDR_IMM (code, ARMREG_PC, ARMREG_IP, vtable_offset);
+                       }
+
                        if (imt_method)
-                               code = arm_emit_value_and_patch_ldr (code, imt_method, (guint32)item->method);
+                               code = arm_emit_value_and_patch_ldr (code, imt_method, (guint32)item->key);
 
                        /*must emit after unconditional branch*/
                        if (vtable_target) {
@@ -4400,6 +4470,7 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                        }
 
                        /*We reserve the space for bsearch IMT values after the first entry with an absolute jump*/
+                       constant_pool_starts [i] = code;
                        if (extra_space) {
                                code += extra_space;
                                extra_space = 0;
@@ -4422,9 +4493,9 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                }
                if (i > 0 && item->is_equals) {
                        int j;
-                       arminstr_t *space_start = (arminstr_t*)(item->code_target + item->chunk_size);
+                       arminstr_t *space_start = constant_pool_starts [i];
                        for (j = i - 1; j >= 0 && !imt_entries [j]->is_equals; --j) {
-                               space_start = arm_emit_value_and_patch_ldr (space_start, (arminstr_t*)imt_entries [j]->code_target, (guint32)imt_entries [j]->method);
+                               space_start = arm_emit_value_and_patch_ldr (space_start, (arminstr_t*)imt_entries [j]->code_target, (guint32)imt_entries [j]->key);
                        }
                }
        }
@@ -4437,6 +4508,8 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
        }
 #endif
 
+       g_free (constant_pool_starts);
+
        mono_arch_flush_icache ((guint8*)start, size);
        mono_stats.imt_thunks_size += code - start;
 
@@ -4449,6 +4522,14 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
 gpointer
 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
 {
-       /* FIXME: implement */
-       g_assert_not_reached ();
+       if (reg >= 4 && reg <= 11)
+               return (gpointer)ctx->regs [reg - 4];
+       else if (reg == ARMREG_IP)
+               return (gpointer)ctx->regs [8];
+       else if (reg == ARMREG_LR)
+               return (gpointer)ctx->regs [9];
+       else {
+               g_assert_not_reached ();
+               return NULL;
+       }
 }