Merge pull request #1857 from slluis/fix-assembly-resolver
[mono.git] / mono / mini / mini-arm.c
index 2d0ef57b9931d620a7ea17611bcac7d23fb74744..0bdec210868ae04a6913482a461efc4b5ddaa302 100644 (file)
@@ -60,6 +60,8 @@
 #define HAVE_AEABI_READ_TP 1
 #endif
 
+#define THUNK_SIZE (3 * 4)
+
 #ifdef __native_client_codegen__
 const guint kNaClAlignment = kNaClAlignmentARM;
 const guint kNaClAlignmentMask = kNaClAlignmentMaskARM;
@@ -143,8 +145,7 @@ static int i8_align;
 
 static volatile int ss_trigger_var = 0;
 
-static gpointer single_step_func_wrapper;
-static gpointer breakpoint_func_wrapper;
+static gpointer single_step_tramp, breakpoint_tramp;
 
 /*
  * The code generated for sequence points reads from this location, which is
@@ -331,6 +332,7 @@ emit_call_seq (MonoCompile *cfg, guint8 *code)
        } else {
                ARM_BL (code, 0);
        }
+       cfg->thunk_area += THUNK_SIZE;
 #endif
        return code;
 }
@@ -383,55 +385,6 @@ mono_arm_load_jumptable_entry (guint8 *code, gpointer* jte, ARMReg reg)
 }
 #endif
 
-static guint8*
-emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
-{
-       switch (ins->opcode) {
-       case OP_FCALL:
-       case OP_FCALL_REG:
-       case OP_FCALL_MEMBASE:
-               if (IS_VFP) {
-                       MonoType *sig_ret = mini_type_get_underlying_type (NULL, ((MonoCallInst*)ins)->signature->ret);
-                       if (sig_ret->type == MONO_TYPE_R4) {
-                               if (IS_HARD_FLOAT) {
-                                       ARM_CVTS (code, ins->dreg, ARM_VFP_F0);
-                               } else {
-                                       ARM_FMSR (code, ins->dreg, ARMREG_R0);
-                                       ARM_CVTS (code, ins->dreg, ins->dreg);
-                               }
-                       } else {
-                               if (IS_HARD_FLOAT) {
-                                       ARM_CPYD (code, ins->dreg, ARM_VFP_D0);
-                               } else {
-                                       ARM_FMDRR (code, ARMREG_R0, ARMREG_R1, ins->dreg);
-                               }
-                       }
-               }
-               break;
-       case OP_RCALL:
-       case OP_RCALL_REG:
-       case OP_RCALL_MEMBASE: {
-               MonoType *sig_ret;
-
-               g_assert (IS_VFP);
-
-               sig_ret = mini_type_get_underlying_type (NULL, ((MonoCallInst*)ins)->signature->ret);
-               g_assert (sig_ret->type == MONO_TYPE_R4);
-               if (IS_HARD_FLOAT) {
-                       ARM_CPYS (code, ins->dreg, ARM_VFP_F0);
-               } else {
-                       ARM_FMSR (code, ins->dreg, ARMREG_R0);
-                       ARM_CPYS (code, ins->dreg, ins->dreg);
-               }
-               break;
-       }
-       default:
-               break;
-       }
-
-       return code;
-}
-
 /*
  * emit_save_lmf:
  *
@@ -848,73 +801,6 @@ mono_arch_cpu_init (void)
 #endif
 }
 
-static gpointer
-create_function_wrapper (gpointer function)
-{
-       guint8 *start, *code;
-
-       start = code = mono_global_codeman_reserve (96);
-
-       /*
-        * Construct the MonoContext structure on the stack.
-        */
-
-       ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, sizeof (MonoContext));
-
-       /* save ip, lr and pc into their correspodings ctx.regs slots. */
-       ARM_STR_IMM (code, ARMREG_IP, ARMREG_SP, MONO_STRUCT_OFFSET (MonoContext, regs) + sizeof (mgreg_t) * ARMREG_IP);
-       ARM_STR_IMM (code, ARMREG_LR, ARMREG_SP, MONO_STRUCT_OFFSET (MonoContext, regs) + 4 * ARMREG_LR);
-       ARM_STR_IMM (code, ARMREG_LR, ARMREG_SP, MONO_STRUCT_OFFSET (MonoContext, regs) + 4 * ARMREG_PC);
-
-       /* save r0..r10 and fp */
-       ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_SP, MONO_STRUCT_OFFSET (MonoContext, regs));
-       ARM_STM (code, ARMREG_IP, 0x0fff);
-
-       /* now we can update fp. */
-       ARM_MOV_REG_REG (code, ARMREG_FP, ARMREG_SP);
-
-       /* make ctx.esp hold the actual value of sp at the beginning of this method. */
-       ARM_ADD_REG_IMM8 (code, ARMREG_R0, ARMREG_FP, sizeof (MonoContext));
-       ARM_STR_IMM (code, ARMREG_R0, ARMREG_IP, 4 * ARMREG_SP);
-       ARM_STR_IMM (code, ARMREG_R0, ARMREG_FP, MONO_STRUCT_OFFSET (MonoContext, regs) + 4 * ARMREG_SP);
-
-       /* make ctx.eip hold the address of the call. */
-       ARM_SUB_REG_IMM8 (code, ARMREG_LR, ARMREG_LR, 4);
-       ARM_STR_IMM (code, ARMREG_LR, ARMREG_SP, MONO_STRUCT_OFFSET (MonoContext, pc));
-
-       /* r0 now points to the MonoContext */
-       ARM_MOV_REG_REG (code, ARMREG_R0, ARMREG_FP);
-
-       /* call */
-#ifdef USE_JUMP_TABLES
-       {
-               gpointer *jte = mono_jumptable_add_entry ();
-               code = mono_arm_load_jumptable_entry (code, jte, ARMREG_IP);
-               jte [0] = function;
-       }
-#else
-       ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
-       ARM_B (code, 0);
-       *(gpointer*)code = function;
-       code += 4;
-#endif
-       ARM_BLX_REG (code, ARMREG_IP);
-
-       /* we're back; save ctx.eip and ctx.esp into the corresponding regs slots. */
-       ARM_LDR_IMM (code, ARMREG_R0, ARMREG_FP, MONO_STRUCT_OFFSET (MonoContext, pc));
-       ARM_STR_IMM (code, ARMREG_R0, ARMREG_FP, MONO_STRUCT_OFFSET (MonoContext, regs) + 4 * ARMREG_LR);
-       ARM_STR_IMM (code, ARMREG_R0, ARMREG_FP, MONO_STRUCT_OFFSET (MonoContext, regs) + 4 * ARMREG_PC);
-
-       /* make ip point to the regs array, then restore everything, including pc. */
-       ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_FP, MONO_STRUCT_OFFSET (MonoContext, regs));
-       ARM_LDM (code, ARMREG_IP, 0xffff);
-
-       mono_arch_flush_icache (start, code - start);
-       mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_HELPER, NULL);
-
-       return start;
-}
-
 /*
  * Initialize architecture specific code.
  */
@@ -924,14 +810,10 @@ mono_arch_init (void)
        const char *cpu_arch;
 
        mono_mutex_init_recursive (&mini_arch_mutex);
-#ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
        if (mini_get_debug_options ()->soft_breakpoints) {
-               single_step_func_wrapper = create_function_wrapper (debugger_agent_single_step_from_context);
-               breakpoint_func_wrapper = create_function_wrapper (debugger_agent_breakpoint_from_context);
+               single_step_tramp = mini_get_single_step_trampoline ();
+               breakpoint_tramp = mini_get_breakpoint_trampoline ();
        } else {
-#else
-       {
-#endif
                ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
                bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
                mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
@@ -1237,11 +1119,16 @@ typedef enum {
        RegTypeGSharedVtInReg,
        /* gsharedvt argument passed by addr on stack */
        RegTypeGSharedVtOnStack,
+       RegTypeHFA
 } ArgStorage;
 
 typedef struct {
        gint32  offset;
        guint16 vtsize; /* in param area */
+       /* RegTypeHFA */
+       int esize;
+       /* RegTypeHFA */
+       int nregs;
        guint8  reg;
        ArgStorage  storage;
        gint32  struct_size;
@@ -1251,8 +1138,7 @@ typedef struct {
 typedef struct {
        int nargs;
        guint32 stack_usage;
-       gboolean vtype_retaddr;
-       /* The index of the vret arg in the argument list */
+       /* The index of the vret arg in the argument list for RegTypeStructByAddr */
        int vret_arg_index;
        ArgInfo ret;
        ArgInfo sig_cookie;
@@ -1401,17 +1287,65 @@ add_float (guint *fpr, guint *stack_size, ArgInfo *ainfo, gboolean is_double, gi
        }
 }
 
+static gboolean
+is_hfa (MonoType *t, int *out_nfields, int *out_esize)
+{
+       MonoClass *klass;
+       gpointer iter;
+       MonoClassField *field;
+       MonoType *ftype, *prev_ftype = NULL;
+       int nfields = 0;
+
+       klass = mono_class_from_mono_type (t);
+       iter = NULL;
+       while ((field = mono_class_get_fields (klass, &iter))) {
+               if (field->type->attrs & FIELD_ATTRIBUTE_STATIC)
+                       continue;
+               ftype = mono_field_get_type (field);
+               ftype = mini_type_get_underlying_type (NULL, ftype);
+
+               if (MONO_TYPE_ISSTRUCT (ftype)) {
+                       int nested_nfields, nested_esize;
+
+                       if (!is_hfa (ftype, &nested_nfields, &nested_esize))
+                               return FALSE;
+                       if (nested_esize == 4)
+                               ftype = &mono_defaults.single_class->byval_arg;
+                       else
+                               ftype = &mono_defaults.double_class->byval_arg;
+                       if (prev_ftype && prev_ftype->type != ftype->type)
+                               return FALSE;
+                       prev_ftype = ftype;
+                       nfields += nested_nfields;
+               } else {
+                       if (!(!ftype->byref && (ftype->type == MONO_TYPE_R4 || ftype->type == MONO_TYPE_R8)))
+                               return FALSE;
+                       if (prev_ftype && prev_ftype->type != ftype->type)
+                               return FALSE;
+                       prev_ftype = ftype;
+                       nfields ++;
+               }
+       }
+       if (nfields == 0 || nfields > 4)
+               return FALSE;
+       *out_nfields = nfields;
+       *out_esize = prev_ftype->type == MONO_TYPE_R4 ? 4 : 8;
+       return TRUE;
+}
+
 static CallInfo*
 get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig)
 {
        guint i, gr, fpr, pstart;
        gint float_spare;
        int n = sig->hasthis + sig->param_count;
-       MonoType *simpletype;
+       int nfields, esize;
+       guint32 align;
+       MonoType *t;
        guint32 stack_size = 0;
        CallInfo *cinfo;
        gboolean is_pinvoke = sig->pinvoke;
-       MonoType *t;
+       gboolean vtype_retaddr = FALSE;
 
        if (mp)
                cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
@@ -1424,18 +1358,84 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
        float_spare = -1;
 
        t = mini_type_get_underlying_type (gsctx, sig->ret);
-       if (MONO_TYPE_ISSTRUCT (t)) {
-               guint32 align;
+       switch (t->type) {
+       case MONO_TYPE_I1:
+       case MONO_TYPE_U1:
+       case MONO_TYPE_I2:
+       case MONO_TYPE_U2:
+       case MONO_TYPE_I4:
+       case MONO_TYPE_U4:
+       case MONO_TYPE_I:
+       case MONO_TYPE_U:
+       case MONO_TYPE_PTR:
+       case MONO_TYPE_FNPTR:
+       case MONO_TYPE_CLASS:
+       case MONO_TYPE_OBJECT:
+       case MONO_TYPE_SZARRAY:
+       case MONO_TYPE_ARRAY:
+       case MONO_TYPE_STRING:
+               cinfo->ret.storage = RegTypeGeneral;
+               cinfo->ret.reg = ARMREG_R0;
+               break;
+       case MONO_TYPE_U8:
+       case MONO_TYPE_I8:
+               cinfo->ret.storage = RegTypeIRegPair;
+               cinfo->ret.reg = ARMREG_R0;
+               break;
+       case MONO_TYPE_R4:
+       case MONO_TYPE_R8:
+               cinfo->ret.storage = RegTypeFP;
+
+               if (t->type == MONO_TYPE_R4)
+                       cinfo->ret.size = 4;
+               else
+                       cinfo->ret.size = 8;
 
-               if (is_pinvoke && mono_class_native_size (mono_class_from_mono_type (t), &align) <= sizeof (gpointer)) {
-                       cinfo->ret.storage = RegTypeStructByVal;
+               if (IS_HARD_FLOAT) {
+                       cinfo->ret.reg = ARM_VFP_F0;
                } else {
-                       cinfo->vtype_retaddr = TRUE;
+                       cinfo->ret.reg = ARMREG_R0;
                }
-       } else if (!(t->type == MONO_TYPE_GENERICINST && !mono_type_generic_inst_is_valuetype (t)) && mini_is_gsharedvt_type_gsctx (gsctx, t)) {
-               cinfo->vtype_retaddr = TRUE;
+               break;
+       case MONO_TYPE_GENERICINST:
+               if (!mono_type_generic_inst_is_valuetype (t)) {
+                       cinfo->ret.storage = RegTypeGeneral;
+                       cinfo->ret.reg = ARMREG_R0;
+                       break;
+               }
+               // FIXME: Only for variable types
+               if (mini_is_gsharedvt_type_gsctx (gsctx, t)) {
+                       cinfo->ret.storage = RegTypeStructByAddr;
+                       break;
+               }
+               /* Fall through */
+       case MONO_TYPE_VALUETYPE:
+       case MONO_TYPE_TYPEDBYREF:
+               if (IS_HARD_FLOAT && sig->pinvoke && is_hfa (t, &nfields, &esize)) {
+                       cinfo->ret.storage = RegTypeHFA;
+                       cinfo->ret.reg = 0;
+                       cinfo->ret.nregs = nfields;
+                       cinfo->ret.esize = esize;
+               } else {
+                       if (is_pinvoke && mono_class_native_size (mono_class_from_mono_type (t), &align) <= sizeof (gpointer))
+                               cinfo->ret.storage = RegTypeStructByVal;
+                       else
+                               cinfo->ret.storage = RegTypeStructByAddr;
+               }
+               break;
+       case MONO_TYPE_VAR:
+       case MONO_TYPE_MVAR:
+               g_assert (mini_is_gsharedvt_type_gsctx (gsctx, t));
+               cinfo->ret.storage = RegTypeStructByAddr;
+               break;
+       case MONO_TYPE_VOID:
+               break;
+       default:
+               g_error ("Can't handle as return value 0x%x", sig->ret->type);
        }
 
+       vtype_retaddr = cinfo->ret.storage == RegTypeStructByAddr;
+
        pstart = 0;
        n = 0;
        /*
@@ -1445,7 +1445,7 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
         * are sometimes made using calli without sig->hasthis set, like in the delegate
         * invoke wrappers.
         */
-       if (cinfo->vtype_retaddr && !is_pinvoke && (sig->hasthis || (sig->param_count > 0 && MONO_TYPE_IS_REFERENCE (mini_type_get_underlying_type (gsctx, sig->params [0]))))) {
+       if (vtype_retaddr && !is_pinvoke && (sig->hasthis || (sig->param_count > 0 && MONO_TYPE_IS_REFERENCE (mini_type_get_underlying_type (gsctx, sig->params [0]))))) {
                if (sig->hasthis) {
                        add_general (&gr, &stack_size, cinfo->args + 0, TRUE);
                } else {
@@ -1453,7 +1453,8 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                        pstart = 1;
                }
                n ++;
-               add_general (&gr, &stack_size, &cinfo->ret, TRUE);
+               cinfo->ret.reg = gr;
+               gr ++;
                cinfo->vret_arg_index = 1;
        } else {
                /* this */
@@ -1461,9 +1462,10 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                        add_general (&gr, &stack_size, cinfo->args + 0, TRUE);
                        n ++;
                }
-
-               if (cinfo->vtype_retaddr)
-                       add_general (&gr, &stack_size, &cinfo->ret, TRUE);
+               if (vtype_retaddr) {
+                       cinfo->ret.reg = gr;
+                       gr ++;
+               }
        }
 
        DEBUG(printf("params: %d\n", sig->param_count));
@@ -1485,25 +1487,22 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                        n++;
                        continue;
                }
-               simpletype = mini_type_get_underlying_type (gsctx, sig->params [i]);
-               switch (simpletype->type) {
+               t = mini_type_get_underlying_type (gsctx, sig->params [i]);
+               switch (t->type) {
                case MONO_TYPE_I1:
                case MONO_TYPE_U1:
                        cinfo->args [n].size = 1;
                        add_general (&gr, &stack_size, ainfo, TRUE);
-                       n++;
                        break;
                case MONO_TYPE_I2:
                case MONO_TYPE_U2:
                        cinfo->args [n].size = 2;
                        add_general (&gr, &stack_size, ainfo, TRUE);
-                       n++;
                        break;
                case MONO_TYPE_I4:
                case MONO_TYPE_U4:
                        cinfo->args [n].size = 4;
                        add_general (&gr, &stack_size, ainfo, TRUE);
-                       n++;
                        break;
                case MONO_TYPE_I:
                case MONO_TYPE_U:
@@ -1516,18 +1515,16 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                case MONO_TYPE_ARRAY:
                        cinfo->args [n].size = sizeof (gpointer);
                        add_general (&gr, &stack_size, ainfo, TRUE);
-                       n++;
                        break;
                case MONO_TYPE_GENERICINST:
-                       if (!mono_type_generic_inst_is_valuetype (simpletype)) {
+                       if (!mono_type_generic_inst_is_valuetype (t)) {
                                cinfo->args [n].size = sizeof (gpointer);
                                add_general (&gr, &stack_size, ainfo, TRUE);
-                               n++;
                                break;
                        }
-                       if (mini_is_gsharedvt_type_gsctx (gsctx, simpletype)) {
+                       if (mini_is_gsharedvt_type_gsctx (gsctx, t)) {
                                /* gsharedvt arguments are passed by ref */
-                               g_assert (mini_is_gsharedvt_type_gsctx (gsctx, simpletype));
+                               g_assert (mini_is_gsharedvt_type_gsctx (gsctx, t));
                                add_general (&gr, &stack_size, ainfo, TRUE);
                                switch (ainfo->storage) {
                                case RegTypeGeneral:
@@ -1539,7 +1536,6 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                                default:
                                        g_assert_not_reached ();
                                }
-                               n++;
                                break;
                        }
                        /* Fall through */
@@ -1547,10 +1543,23 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                case MONO_TYPE_VALUETYPE: {
                        gint size;
                        int align_size;
-                       int nwords;
+                       int nwords, nfields, esize;
                        guint32 align;
 
-                       if (simpletype->type == MONO_TYPE_TYPEDBYREF) {
+                       if (IS_HARD_FLOAT && sig->pinvoke && is_hfa (t, &nfields, &esize)) {
+                               if (fpr + nfields < ARM_VFP_F16) {
+                                       ainfo->storage = RegTypeHFA;
+                                       ainfo->reg = fpr;
+                                       ainfo->nregs = nfields;
+                                       ainfo->esize = esize;
+                                       fpr += nfields;
+                                       break;
+                               } else {
+                                       fpr = ARM_VFP_F16;
+                               }
+                       }
+
+                       if (t->type == MONO_TYPE_TYPEDBYREF) {
                                size = sizeof (MonoTypedRef);
                                align = sizeof (gpointer);
                        } else {
@@ -1558,7 +1567,7 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                                if (is_pinvoke)
                                        size = mono_class_native_size (klass, &align);
                                else
-                                       size = mini_type_stack_size_full (gsctx, simpletype, &align, FALSE);
+                                       size = mini_type_stack_size_full (gsctx, t, &align, FALSE);
                        }
                        DEBUG(printf ("load %d bytes struct\n", size));
                        align_size = size;
@@ -1592,14 +1601,12 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                        ainfo->offset = stack_size;
                        /*g_print ("offset for arg %d at %d\n", n, stack_size);*/
                        stack_size += nwords * sizeof (gpointer);
-                       n++;
                        break;
                }
                case MONO_TYPE_U8:
                case MONO_TYPE_I8:
                        ainfo->size = 8;
                        add_general (&gr, &stack_size, ainfo, FALSE);
-                       n++;
                        break;
                case MONO_TYPE_R4:
                        ainfo->size = 4;
@@ -1608,8 +1615,6 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                                add_float (&fpr, &stack_size, ainfo, FALSE, &float_spare);
                        else
                                add_general (&gr, &stack_size, ainfo, TRUE);
-
-                       n++;
                        break;
                case MONO_TYPE_R8:
                        ainfo->size = 8;
@@ -1618,13 +1623,11 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                                add_float (&fpr, &stack_size, ainfo, TRUE, &float_spare);
                        else
                                add_general (&gr, &stack_size, ainfo, FALSE);
-
-                       n++;
                        break;
                case MONO_TYPE_VAR:
                case MONO_TYPE_MVAR:
                        /* gsharedvt arguments are passed by ref */
-                       g_assert (mini_is_gsharedvt_type_gsctx (gsctx, simpletype));
+                       g_assert (mini_is_gsharedvt_type_gsctx (gsctx, t));
                        add_general (&gr, &stack_size, ainfo, TRUE);
                        switch (ainfo->storage) {
                        case RegTypeGeneral:
@@ -1636,11 +1639,11 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                        default:
                                g_assert_not_reached ();
                        }
-                       n++;
                        break;
                default:
-                       g_error ("Can't trampoline 0x%x", sig->params [i]->type);
+                       g_error ("Can't handle 0x%x", sig->params [i]->type);
                }
+               n ++;
        }
 
        /* Handle the case where there are no implicit arguments */
@@ -1653,74 +1656,6 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                add_general (&gr, &stack_size, &cinfo->sig_cookie, TRUE);
        }
 
-       {
-               simpletype = mini_type_get_underlying_type (gsctx, sig->ret);
-               switch (simpletype->type) {
-               case MONO_TYPE_I1:
-               case MONO_TYPE_U1:
-               case MONO_TYPE_I2:
-               case MONO_TYPE_U2:
-               case MONO_TYPE_I4:
-               case MONO_TYPE_U4:
-               case MONO_TYPE_I:
-               case MONO_TYPE_U:
-               case MONO_TYPE_PTR:
-               case MONO_TYPE_FNPTR:
-               case MONO_TYPE_CLASS:
-               case MONO_TYPE_OBJECT:
-               case MONO_TYPE_SZARRAY:
-               case MONO_TYPE_ARRAY:
-               case MONO_TYPE_STRING:
-                       cinfo->ret.storage = RegTypeGeneral;
-                       cinfo->ret.reg = ARMREG_R0;
-                       break;
-               case MONO_TYPE_U8:
-               case MONO_TYPE_I8:
-                       cinfo->ret.storage = RegTypeIRegPair;
-                       cinfo->ret.reg = ARMREG_R0;
-                       break;
-               case MONO_TYPE_R4:
-               case MONO_TYPE_R8:
-                       cinfo->ret.storage = RegTypeFP;
-
-                       if (IS_HARD_FLOAT) {
-                               cinfo->ret.reg = ARM_VFP_F0;
-                       } else {
-                               cinfo->ret.reg = ARMREG_R0;
-                       }
-
-                       break;
-               case MONO_TYPE_GENERICINST:
-                       if (!mono_type_generic_inst_is_valuetype (simpletype)) {
-                               cinfo->ret.storage = RegTypeGeneral;
-                               cinfo->ret.reg = ARMREG_R0;
-                               break;
-                       }
-                       // FIXME: Only for variable types
-                       if (mini_is_gsharedvt_type_gsctx (gsctx, simpletype)) {
-                               cinfo->ret.storage = RegTypeStructByAddr;
-                               g_assert (cinfo->vtype_retaddr);
-                               break;
-                       }
-                       /* Fall through */
-               case MONO_TYPE_VALUETYPE:
-               case MONO_TYPE_TYPEDBYREF:
-                       if (cinfo->ret.storage != RegTypeStructByVal)
-                               cinfo->ret.storage = RegTypeStructByAddr;
-                       break;
-               case MONO_TYPE_VAR:
-               case MONO_TYPE_MVAR:
-                       g_assert (mini_is_gsharedvt_type_gsctx (gsctx, simpletype));
-                       cinfo->ret.storage = RegTypeStructByAddr;
-                       g_assert (cinfo->vtype_retaddr);
-                       break;
-               case MONO_TYPE_VOID:
-                       break;
-               default:
-                       g_error ("Can't handle as return value 0x%x", sig->ret->type);
-               }
-       }
-
        /* align stack size to 8 */
        DEBUG (printf ("      stack size: %d (%d)\n", (stack_size + 15) & ~15, stack_size));
        stack_size = (stack_size + 7) & ~7;
@@ -1737,10 +1672,6 @@ mono_arch_tail_call_supported (MonoCompile *cfg, MonoMethodSignature *caller_sig
        CallInfo *c1, *c2;
        gboolean res;
 
-       if (cfg->compile_aot && !cfg->full_aot)
-               /* OP_TAILCALL doesn't work with AOT */
-               return FALSE;
-
        c1 = get_call_info (NULL, NULL, caller_sig);
        c2 = get_call_info (NULL, NULL, callee_sig);
 
@@ -1858,6 +1789,7 @@ mono_arch_allocate_vars (MonoCompile *cfg)
        MonoType *sig_ret;
        int i, offset, size, align, curinst;
        CallInfo *cinfo;
+       ArgInfo *ainfo;
        guint32 ualign;
 
        sig = mono_method_signature (cfg->method);
@@ -1891,11 +1823,11 @@ mono_arch_allocate_vars (MonoCompile *cfg)
 
        if (cfg->compile_aot || cfg->uses_rgctx_reg || COMPILE_LLVM (cfg))
                /* V5 is reserved for passing the vtable/rgctx/IMT method */
-               cfg->used_int_regs |= (1 << ARMREG_V5);
+               cfg->used_int_regs |= (1 << MONO_ARCH_IMT_REG);
 
        offset = 0;
        curinst = 0;
-       if (!MONO_TYPE_ISSTRUCT (sig_ret) && !cinfo->vtype_retaddr) {
+       if (!MONO_TYPE_ISSTRUCT (sig_ret) && cinfo->ret.storage != RegTypeStructByAddr) {
                if (sig_ret->type != MONO_TYPE_VOID) {
                        cfg->ret->opcode = OP_REGVAR;
                        cfg->ret->inst_c0 = ARMREG_R0;
@@ -1922,15 +1854,25 @@ mono_arch_allocate_vars (MonoCompile *cfg)
        if (mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method))
                offset += 8;
 
-       /* the MonoLMF structure is stored just below the stack pointer */
-       if (cinfo->ret.storage == RegTypeStructByVal) {
+       switch (cinfo->ret.storage) {
+       case RegTypeStructByVal:
                cfg->ret->opcode = OP_REGOFFSET;
                cfg->ret->inst_basereg = cfg->frame_reg;
                offset += sizeof (gpointer) - 1;
                offset &= ~(sizeof (gpointer) - 1);
                cfg->ret->inst_offset = - offset;
                offset += sizeof(gpointer);
-       } else if (cinfo->vtype_retaddr) {
+               break;
+       case RegTypeHFA:
+               /* Allocate a local to hold the result, the epilog will copy it to the correct place */
+               offset = ALIGN_TO (offset, 8);
+               cfg->ret->opcode = OP_REGOFFSET;
+               cfg->ret->inst_basereg = cfg->frame_reg;
+               cfg->ret->inst_offset = offset;
+               // FIXME:
+               offset += 32;
+               break;
+       case RegTypeStructByAddr:
                ins = cfg->vret_addr;
                offset += sizeof(gpointer) - 1;
                offset &= ~(sizeof(gpointer) - 1);
@@ -1942,6 +1884,9 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                        mono_print_ins (cfg->vret_addr);
                }
                offset += sizeof(gpointer);
+               break;
+       default:
+               break;
        }
 
        /* Allocate these first so they have a small offset, OP_SEQ_POINT depends on this */
@@ -2086,8 +2031,26 @@ mono_arch_allocate_vars (MonoCompile *cfg)
        }                       
 
        for (i = 0; i < sig->param_count; ++i) {
+               ainfo = cinfo->args + i;
+
                ins = cfg->args [curinst];
 
+               switch (ainfo->storage) {
+               case RegTypeHFA:
+                       offset = ALIGN_TO (offset, 8);
+                       ins->opcode = OP_REGOFFSET;
+                       ins->inst_basereg = cfg->frame_reg;
+                       /* These arguments are saved to the stack in the prolog */
+                       ins->inst_offset = offset;
+                       if (cfg->verbose_level >= 2)
+                               printf ("arg %d allocated to %s+0x%0x.\n", i, mono_arch_regname (ins->inst_basereg), (int)ins->inst_offset);
+                       // FIXME:
+                       offset += 32;
+                       break;
+               default:
+                       break;
+               }
+
                if (ins->opcode != OP_REGVAR) {
                        ins->opcode = OP_REGOFFSET;
                        ins->inst_basereg = cfg->frame_reg;
@@ -2146,7 +2109,7 @@ mono_arch_create_vars (MonoCompile *cfg)
        if (cinfo->ret.storage == RegTypeStructByVal)
                cfg->ret_var_is_local = TRUE;
 
-       if (cinfo->vtype_retaddr) {
+       if (cinfo->ret.storage == RegTypeStructByAddr) {
                cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
                if (G_UNLIKELY (cfg->verbose_level > 1)) {
                        printf ("vret_addr = ");
@@ -2154,7 +2117,7 @@ mono_arch_create_vars (MonoCompile *cfg)
                }
        }
 
-       if (cfg->gen_seq_points_debug_data) {
+       if (cfg->gen_sdb_seq_points) {
                if (cfg->soft_breakpoints) {
                        MonoInst *ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
                        ins->flags |= MONO_INST_VOLATILE;
@@ -2231,7 +2194,7 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
         * - we only pass/receive them in registers in some cases, and only 
         *   in 1 or 2 integer registers.
         */
-       if (cinfo->vtype_retaddr) {
+       if (cinfo->ret.storage == RegTypeStructByAddr) {
                /* Vtype returned using a hidden argument */
                linfo->ret.storage = LLVMArgVtypeRetAddr;
                linfo->vret_arg_index = cinfo->vret_arg_index;
@@ -2278,7 +2241,39 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
        sig = call->signature;
        n = sig->param_count + sig->hasthis;
        
-       cinfo = get_call_info (cfg->generic_sharing_context, NULL, sig);
+       cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
+
+       switch (cinfo->ret.storage) {
+       case RegTypeStructByVal:
+               /* The JIT will transform this into a normal call */
+               call->vret_in_reg = TRUE;
+               break;
+       case RegTypeHFA:
+               /*
+                * The vtype is returned in registers, save the return area address in a local, and save the vtype into
+                * the location pointed to by it after call in emit_move_return_value ().
+                */
+               if (!cfg->arch.vret_addr_loc) {
+                       cfg->arch.vret_addr_loc = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+                       /* Prevent it from being register allocated or optimized away */
+                       ((MonoInst*)cfg->arch.vret_addr_loc)->flags |= MONO_INST_VOLATILE;
+               }
+
+               MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ((MonoInst*)cfg->arch.vret_addr_loc)->dreg, call->vret_var->dreg);
+               break;
+       case RegTypeStructByAddr: {
+               MonoInst *vtarg;
+               MONO_INST_NEW (cfg, vtarg, OP_MOVE);
+               vtarg->sreg1 = call->vret_var->dreg;
+               vtarg->dreg = mono_alloc_preg (cfg);
+               MONO_ADD_INS (cfg->cbb, vtarg);
+
+               mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
+               break;
+       }
+       default:
+               break;
+       }
 
        for (i = 0; i < n; ++i) {
                ArgInfo *ainfo = cinfo->args + i;
@@ -2377,6 +2372,7 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
                case RegTypeStructByVal:
                case RegTypeGSharedVtInReg:
                case RegTypeGSharedVtOnStack:
+               case RegTypeHFA:
                        MONO_INST_NEW (cfg, ins, OP_OUTARG_VT);
                        ins->opcode = OP_OUTARG_VT;
                        ins->sreg1 = in->dreg;
@@ -2485,76 +2481,112 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
        if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sig->sentinelpos))
                emit_sig_cookie (cfg, call, cinfo);
 
-       if (cinfo->ret.storage == RegTypeStructByVal) {
-               /* The JIT will transform this into a normal call */
-               call->vret_in_reg = TRUE;
-       } else if (cinfo->vtype_retaddr) {
-               MonoInst *vtarg;
-               MONO_INST_NEW (cfg, vtarg, OP_MOVE);
-               vtarg->sreg1 = call->vret_var->dreg;
-               vtarg->dreg = mono_alloc_preg (cfg);
-               MONO_ADD_INS (cfg->cbb, vtarg);
-
-               mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
-       }
-
+       call->call_info = cinfo;
        call->stack_usage = cinfo->stack_usage;
+}
 
-       g_free (cinfo);
+static void
+add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int reg, MonoInst *arg)
+{
+       MonoInst *ins;
+
+       switch (storage) {
+       case RegTypeFP:
+               MONO_INST_NEW (cfg, ins, OP_FMOVE);
+               ins->dreg = mono_alloc_freg (cfg);
+               ins->sreg1 = arg->dreg;
+               MONO_ADD_INS (cfg->cbb, ins);
+               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE);
+               break;
+       default:
+               g_assert_not_reached ();
+               break;
+       }
 }
 
 void
 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
 {
        MonoCallInst *call = (MonoCallInst*)ins->inst_p0;
+       MonoInst *load;
        ArgInfo *ainfo = ins->inst_p1;
        int ovf_size = ainfo->vtsize;
        int doffset = ainfo->offset;
        int struct_size = ainfo->struct_size;
        int i, soffset, dreg, tmpreg;
 
-       if (ainfo->storage == RegTypeGSharedVtInReg) {
+       switch (ainfo->storage) {
+       case RegTypeGSharedVtInReg:
                /* Pass by addr */
                mono_call_inst_add_outarg_reg (cfg, call, src->dreg, ainfo->reg, FALSE);
-               return;
-       }
-       if (ainfo->storage == RegTypeGSharedVtOnStack) {
+               break;
+       case RegTypeGSharedVtOnStack:
                /* Pass by addr on stack */
                MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, ainfo->offset, src->dreg);
-               return;
-       }
+               break;
+       case RegTypeHFA:
+               for (i = 0; i < ainfo->nregs; ++i) {
+                       if (ainfo->esize == 4)
+                               MONO_INST_NEW (cfg, load, OP_LOADR4_MEMBASE);
+                       else
+                               MONO_INST_NEW (cfg, load, OP_LOADR8_MEMBASE);
+                       load->dreg = mono_alloc_freg (cfg);
+                       load->inst_basereg = src->dreg;
+                       load->inst_offset = i * ainfo->esize;
+                       MONO_ADD_INS (cfg->cbb, load);
 
-       soffset = 0;
-       for (i = 0; i < ainfo->size; ++i) {
-               dreg = mono_alloc_ireg (cfg);
-               switch (struct_size) {
-               case 1:
-                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU1_MEMBASE, dreg, src->dreg, soffset);
-                       break;
-               case 2:
-                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU2_MEMBASE, dreg, src->dreg, soffset);
-                       break;
-               case 3:
-                       tmpreg = mono_alloc_ireg (cfg);
-                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU1_MEMBASE, dreg, src->dreg, soffset);
-                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU1_MEMBASE, tmpreg, src->dreg, soffset + 1);
-                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, tmpreg, tmpreg, 8);
-                       MONO_EMIT_NEW_BIALU (cfg, OP_IOR, dreg, dreg, tmpreg);
-                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU1_MEMBASE, tmpreg, src->dreg, soffset + 2);
-                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, tmpreg, tmpreg, 16);
-                       MONO_EMIT_NEW_BIALU (cfg, OP_IOR, dreg, dreg, tmpreg);
-                       break;
-               default:
-                       MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, soffset);
-                       break;
+                       if (ainfo->esize == 4) {
+                               FloatArgData *fad;
+
+                               /* See RegTypeFP in mono_arch_emit_call () */
+                               MonoInst *float_arg = mono_compile_create_var (cfg, &mono_defaults.single_class->byval_arg, OP_LOCAL);
+                               float_arg->flags |= MONO_INST_VOLATILE;
+                               MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, float_arg->dreg, load->dreg);
+
+                               fad = mono_mempool_alloc0 (cfg->mempool, sizeof (FloatArgData));
+                               fad->vreg = float_arg->dreg;
+                               fad->hreg = ainfo->reg + i;
+
+                               call->float_args = g_slist_append_mempool (cfg->mempool, call->float_args, fad);
+                       } else {
+                               add_outarg_reg (cfg, call, RegTypeFP, ainfo->reg + i, load);
+                       }
                }
-               mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg + i, FALSE);
-               soffset += sizeof (gpointer);
-               struct_size -= sizeof (gpointer);
+               break;
+       default:
+               soffset = 0;
+               for (i = 0; i < ainfo->size; ++i) {
+                       dreg = mono_alloc_ireg (cfg);
+                       switch (struct_size) {
+                       case 1:
+                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU1_MEMBASE, dreg, src->dreg, soffset);
+                               break;
+                       case 2:
+                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU2_MEMBASE, dreg, src->dreg, soffset);
+                               break;
+                       case 3:
+                               tmpreg = mono_alloc_ireg (cfg);
+                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU1_MEMBASE, dreg, src->dreg, soffset);
+                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU1_MEMBASE, tmpreg, src->dreg, soffset + 1);
+                               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, tmpreg, tmpreg, 8);
+                               MONO_EMIT_NEW_BIALU (cfg, OP_IOR, dreg, dreg, tmpreg);
+                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU1_MEMBASE, tmpreg, src->dreg, soffset + 2);
+                               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, tmpreg, tmpreg, 16);
+                               MONO_EMIT_NEW_BIALU (cfg, OP_IOR, dreg, dreg, tmpreg);
+                               break;
+                       default:
+                               MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, soffset);
+                               break;
+                       }
+                       mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg + i, FALSE);
+                       soffset += sizeof (gpointer);
+                       struct_size -= sizeof (gpointer);
+               }
+               //g_print ("vt size: %d at R%d + %d\n", doffset, vt->inst_basereg, vt->inst_offset);
+               if (ovf_size != 0)
+                       mini_emit_memcpy (cfg, ARMREG_SP, doffset, src->dreg, soffset, MIN (ovf_size * sizeof (gpointer), struct_size), struct_size < 4 ? 1 : 4);
+               break;
        }
-       //g_print ("vt size: %d at R%d + %d\n", doffset, vt->inst_basereg, vt->inst_offset);
-       if (ovf_size != 0)
-               mini_emit_memcpy (cfg, ARMREG_SP, doffset, src->dreg, soffset, MIN (ovf_size * sizeof (gpointer), struct_size), struct_size < 4 ? 1 : 4);
 }
 
 void
@@ -2764,7 +2796,7 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g
                        pindex = 1;
        }
 
-       if (dinfo->cinfo->vtype_retaddr)
+       if (dinfo->cinfo->ret.storage == RegTypeStructByAddr)
                p->regs [greg ++] = (mgreg_t)ret;
 
        for (i = pindex; i < sig->param_count; i++) {
@@ -2831,7 +2863,23 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g
                                p->regs [slot] = (mgreg_t)*arg;
                                break;
                        } else {
-                               /* Fall though */
+                               if (t->type == MONO_TYPE_GENERICINST && mono_class_is_nullable (mono_class_from_mono_type (t))) {
+                                       MonoClass *klass = mono_class_from_mono_type (t);
+                                       guint8 *nullable_buf;
+                                       int size;
+
+                                       size = mono_class_value_size (klass, NULL);
+                                       nullable_buf = g_alloca (size);
+                                       g_assert (nullable_buf);
+
+                                       /* The argument pointed to by arg is either a boxed vtype or null */
+                                       mono_nullable_init (nullable_buf, (MonoObject*)arg, klass);
+
+                                       arg = (gpointer*)nullable_buf;
+                                       /* Fall though */
+                               } else {
+                                       /* Fall though */
+                               }
                        }
                case MONO_TYPE_VALUETYPE:
                        g_assert (ainfo->storage == RegTypeStructByVal);
@@ -2905,7 +2953,7 @@ mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf)
                        /* Fall though */
                }
        case MONO_TYPE_VALUETYPE:
-               g_assert (ainfo->cinfo->vtype_retaddr);
+               g_assert (ainfo->cinfo->ret.storage == RegTypeStructByAddr);
                /* Nothing to do */
                break;
        case MONO_TYPE_R4:
@@ -3111,9 +3159,11 @@ mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
 void
 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
-       MonoInst *ins, *n, *last_ins = NULL;
+       MonoInst *ins, *n;
 
        MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
+               MonoInst *last_ins = mono_inst_prev (ins, FILTER_IL_SEQ_POINT);
+
                switch (ins->opcode) {
                case OP_MUL_IMM: 
                case OP_IMUL_IMM: 
@@ -3220,10 +3270,7 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                }
-               last_ins = ins;
-               ins = ins->next;
        }
-       bb->last_ins = last_ins;
 }
 
 /* 
@@ -3656,134 +3703,104 @@ emit_r4_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gb
 
 #endif /* #ifndef DISABLE_JIT */
 
-typedef struct {
-       guchar *code;
-       const guchar *target;
-       int absolute;
-       int found;
-} PatchData;
-
 #define is_call_imm(diff) ((gint)(diff) >= -33554432 && (gint)(diff) <= 33554431)
 
-static int
-search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
-       PatchData *pdata = (PatchData*)user_data;
-       guchar *code = data;
-       guint32 *thunks = data;
-       guint32 *endthunks = (guint32*)(code + bsize);
-       int count = 0;
-       int difflow, diffhigh;
-
-       /* always ensure a call from pdata->code can reach to the thunks without further thunks */
-       difflow = (char*)pdata->code - (char*)thunks;
-       diffhigh = (char*)pdata->code - (char*)endthunks;
-       if (!((is_call_imm (thunks) && is_call_imm (endthunks)) || (is_call_imm (difflow) && is_call_imm (diffhigh))))
-               return 0;
+static void
+emit_thunk (guint8 *code, gconstpointer target)
+{
+       guint8 *p = code;
 
-       /*
-        * The thunk is composed of 3 words:
-        * load constant from thunks [2] into ARM_IP
-        * bx to ARM_IP
-        * address constant
-        * Note that the LR register is already setup
-        */
-       //g_print ("thunk nentries: %d\n", ((char*)endthunks - (char*)thunks)/16);
-       if ((pdata->found == 2) || (pdata->code >= code && pdata->code <= code + csize)) {
-               while (thunks < endthunks) {
-                       //g_print ("looking for target: %p at %p (%08x-%08x)\n", pdata->target, thunks, thunks [0], thunks [1]);
-                       if (thunks [2] == (guint32)pdata->target) {
-                               arm_patch (pdata->code, (guchar*)thunks);
-                               mono_arch_flush_icache (pdata->code, 4);
-                               pdata->found = 1;
-                               return 1;
-                       } else if ((thunks [0] == 0) && (thunks [1] == 0) && (thunks [2] == 0)) {
-                               /* found a free slot instead: emit thunk */
-                               /* ARMREG_IP is fine to use since this can't be an IMT call
-                                * which is indirect
-                                */
-                               code = (guchar*)thunks;
-                               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
-                               if (thumb_supported)
-                                       ARM_BX (code, ARMREG_IP);
-                               else
-                                       ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_IP);
-                               thunks [2] = (guint32)pdata->target;
-                               mono_arch_flush_icache ((guchar*)thunks, 12);
-
-                               arm_patch (pdata->code, (guchar*)thunks);
-                               mono_arch_flush_icache (pdata->code, 4);
-                               pdata->found = 1;
-                               return 1;
-                       }
-                       /* skip 12 bytes, the size of the thunk */
-                       thunks += 3;
-                       count++;
-               }
-               //g_print ("failed thunk lookup for %p from %p at %p (%d entries)\n", pdata->target, pdata->code, data, count);
-       }
-       return 0;
+       ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
+       if (thumb_supported)
+               ARM_BX (code, ARMREG_IP);
+       else
+               ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_IP);
+       *(guint32*)code = (guint32)target;
+       code += 4;
+       mono_arch_flush_icache (p, code - p);
 }
 
 static void
-handle_thunk (MonoDomain *domain, int absolute, guchar *code, const guchar *target, MonoCodeManager *dyn_code_mp)
+handle_thunk (MonoCompile *cfg, MonoDomain *domain, guchar *code, const guchar *target)
 {
-       PatchData pdata;
+       MonoJitInfo *ji = NULL;
+       MonoThunkJitInfo *info;
+       guint8 *thunks, *p;
+       int thunks_size;
+       guint8 *orig_target;
+       guint8 *target_thunk;
 
        if (!domain)
                domain = mono_domain_get ();
 
-       pdata.code = code;
-       pdata.target = target;
-       pdata.absolute = absolute;
-       pdata.found = 0;
+       if (cfg) {
+               /*
+                * This can be called multiple times during JITting,
+                * save the current position in cfg->arch to avoid
+                * doing a O(n^2) search.
+                */
+               if (!cfg->arch.thunks) {
+                       cfg->arch.thunks = cfg->thunks;
+                       cfg->arch.thunks_size = cfg->thunk_area;
+               }
+               thunks = cfg->arch.thunks;
+               thunks_size = cfg->arch.thunks_size;
+               if (!thunks_size) {
+                       g_print ("thunk failed %p->%p, thunk space=%d method %s", code, target, thunks_size, mono_method_full_name (cfg->method, TRUE));
+                       g_assert_not_reached ();
+               }
 
-       if (dyn_code_mp) {
-               mono_code_manager_foreach (dyn_code_mp, search_thunk_slot, &pdata);
-       }
+               g_assert (*(guint32*)thunks == 0);
+               emit_thunk (thunks, target);
+               arm_patch (code, thunks);
 
-       if (pdata.found != 1) {
-               mono_domain_lock (domain);
-               mono_domain_code_foreach (domain, search_thunk_slot, &pdata);
+               cfg->arch.thunks += THUNK_SIZE;
+               cfg->arch.thunks_size -= THUNK_SIZE;
+       } else {
+               ji = mini_jit_info_table_find (domain, (char*)code, NULL);
+               g_assert (ji);
+               info = mono_jit_info_get_thunk_info (ji);
+               g_assert (info);
 
-               if (!pdata.found) {
-                       /* this uses the first available slot */
-                       pdata.found = 2;
-                       mono_domain_code_foreach (domain, search_thunk_slot, &pdata);
-               }
-               mono_domain_unlock (domain);
-       }
+               thunks = (guint8*)ji->code_start + info->thunks_offset;
+               thunks_size = info->thunks_size;
 
-       if (pdata.found != 1) {
-               GHashTable *hash;
-               GHashTableIter iter;
-               MonoJitDynamicMethodInfo *ji;
+               orig_target = mono_arch_get_call_target (code + 4);
 
-               /*
-                * This might be a dynamic method, search its code manager. We can only
-                * use the dynamic method containing CODE, since the others might be freed later.
-                */
-               pdata.found = 0;
+               mono_mini_arch_lock ();
 
-               mono_domain_lock (domain);
-               hash = domain_jit_info (domain)->dynamic_code_hash;
-               if (hash) {
-                       /* FIXME: Speed this up */
-                       g_hash_table_iter_init (&iter, hash);
-                       while (g_hash_table_iter_next (&iter, NULL, (gpointer*)&ji)) {
-                               mono_code_manager_foreach (ji->code_mp, search_thunk_slot, &pdata);
-                               if (pdata.found == 1)
+               target_thunk = NULL;
+               if (orig_target >= thunks && orig_target < thunks + thunks_size) {
+                       /* The call already points to a thunk, because of trampolines etc. */
+                       target_thunk = orig_target;
+               } else {
+                       for (p = thunks; p < thunks + thunks_size; p += THUNK_SIZE) {
+                               if (((guint32*)p) [0] == 0) {
+                                       /* Free entry */
+                                       target_thunk = p;
                                        break;
+                               }
                        }
                }
-               mono_domain_unlock (domain);
+
+               //printf ("THUNK: %p %p %p\n", code, target, target_thunk);
+
+               if (!target_thunk) {
+                       mono_mini_arch_unlock ();
+                       g_print ("thunk failed %p->%p, thunk space=%d method %s", code, target, thunks_size, cfg ? mono_method_full_name (cfg->method, TRUE) : mono_method_full_name (jinfo_get_method (ji), TRUE));
+                       g_assert_not_reached ();
+               }
+
+               emit_thunk (target_thunk, target);
+               arm_patch (code, target_thunk);
+               mono_arch_flush_icache (code, 4);
+
+               mono_mini_arch_unlock ();
        }
-       if (pdata.found != 1)
-               g_print ("thunk failed for %p from %p\n", target, code);
-       g_assert (pdata.found == 1);
 }
 
 static void
-arm_patch_general (MonoDomain *domain, guchar *code, const guchar *target, MonoCodeManager *dyn_code_mp)
+arm_patch_general (MonoCompile *cfg, MonoDomain *domain, guchar *code, const guchar *target)
 {
        guint32 *code32 = (void*)code;
        guint32 ins = *code32;
@@ -3829,7 +3846,7 @@ arm_patch_general (MonoDomain *domain, guchar *code, const guchar *target, MonoC
                        }
                }
                
-               handle_thunk (domain, TRUE, code, target, dyn_code_mp);
+               handle_thunk (cfg, domain, code, target);
                return;
        }
 
@@ -3940,7 +3957,7 @@ arm_patch_general (MonoDomain *domain, guchar *code, const guchar *target, MonoC
 void
 arm_patch (guchar *code, const guchar *target)
 {
-       arm_patch_general (NULL, code, target, NULL);
+       arm_patch_general (NULL, NULL, code, target);
 }
 
 /* 
@@ -4028,121 +4045,83 @@ mono_arm_thumb_supported (void)
 
 #ifndef DISABLE_JIT
 
-/*
- * emit_load_volatile_arguments:
- *
- *  Load volatile arguments from the stack to the original input registers.
- * Required before a tail call.
- */
 static guint8*
-emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
+emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
 {
-       MonoMethod *method = cfg->method;
-       MonoMethodSignature *sig;
-       MonoInst *inst;
        CallInfo *cinfo;
-       guint32 i, pos;
-
-       /* FIXME: Generate intermediate code instead */
-
-       sig = mono_method_signature (method);
+       MonoCallInst *call;
 
-       /* This is the opposite of the code in emit_prolog */
+       call = (MonoCallInst*)ins;
+       cinfo = call->call_info;
 
-       pos = 0;
+       switch (cinfo->ret.storage) {
+       case RegTypeHFA: {
+               MonoInst *loc = cfg->arch.vret_addr_loc;
+               int i;
 
-       cinfo = get_call_info (cfg->generic_sharing_context, NULL, sig);
+               /* Load the destination address */
+               g_assert (loc && loc->opcode == OP_REGOFFSET);
 
-       if (cinfo->vtype_retaddr) {
-               ArgInfo *ainfo = &cinfo->ret;
-               inst = cfg->vret_addr;
-               g_assert (arm_is_imm12 (inst->inst_offset));
-               ARM_LDR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
+               if (arm_is_imm12 (loc->inst_offset)) {
+                       ARM_LDR_IMM (code, ARMREG_LR, loc->inst_basereg, loc->inst_offset);
+               } else {
+                       code = mono_arm_emit_load_imm (code, ARMREG_LR, loc->inst_offset);
+                       ARM_LDR_REG_REG (code, ARMREG_LR, loc->inst_basereg, ARMREG_LR);
+               }
+               for (i = 0; i < cinfo->ret.nregs; ++i) {
+                       if (cinfo->ret.esize == 4)
+                               ARM_FSTS (code, cinfo->ret.reg + i, ARMREG_LR, i * 4);
+                       else
+                               ARM_FSTD (code, cinfo->ret.reg + (i * 2), ARMREG_LR, i * 8);
+               }
+               return code;
+       }
+       default:
+               break;
        }
-       for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
-               ArgInfo *ainfo = cinfo->args + i;
-               inst = cfg->args [pos];
-               
-               if (cfg->verbose_level > 2)
-                       g_print ("Loading argument %d (type: %d)\n", i, ainfo->storage);
-               if (inst->opcode == OP_REGVAR) {
-                       if (ainfo->storage == RegTypeGeneral)
-                               ARM_MOV_REG_REG (code, inst->dreg, ainfo->reg);
-                       else if (ainfo->storage == RegTypeFP) {
-                               g_assert_not_reached ();
-                       } else if (ainfo->storage == RegTypeBase) {
-                               // FIXME:
-                               NOT_IMPLEMENTED;
-                               /*
-                               if (arm_is_imm12 (prev_sp_offset + ainfo->offset)) {
-                                       ARM_LDR_IMM (code, inst->dreg, ARMREG_SP, (prev_sp_offset + ainfo->offset));
+
+       switch (ins->opcode) {
+       case OP_FCALL:
+       case OP_FCALL_REG:
+       case OP_FCALL_MEMBASE:
+               if (IS_VFP) {
+                       MonoType *sig_ret = mini_type_get_underlying_type (NULL, ((MonoCallInst*)ins)->signature->ret);
+                       if (sig_ret->type == MONO_TYPE_R4) {
+                               if (IS_HARD_FLOAT) {
+                                       ARM_CVTS (code, ins->dreg, ARM_VFP_F0);
                                } else {
-                                       code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
-                                       ARM_LDR_REG_REG (code, inst->dreg, ARMREG_SP, ARMREG_IP);
-                               }
-                               */
-                       } else
-                               g_assert_not_reached ();
-               } else {
-                       if (ainfo->storage == RegTypeGeneral || ainfo->storage == RegTypeIRegPair) {
-                               switch (ainfo->size) {
-                               case 1:
-                               case 2:
-                                       // FIXME:
-                                       NOT_IMPLEMENTED;
-                                       break;
-                               case 8:
-                                       g_assert (arm_is_imm12 (inst->inst_offset));
-                                       ARM_LDR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
-                                       g_assert (arm_is_imm12 (inst->inst_offset + 4));
-                                       ARM_LDR_IMM (code, ainfo->reg + 1, inst->inst_basereg, inst->inst_offset + 4);
-                                       break;
-                               default:
-                                       if (arm_is_imm12 (inst->inst_offset)) {
-                                               ARM_LDR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
-                                       } else {
-                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
-                                               ARM_LDR_REG_REG (code, ainfo->reg, inst->inst_basereg, ARMREG_IP);
-                                       }
-                                       break;
-                               }
-                       } else if (ainfo->storage == RegTypeBaseGen) {
-                               // FIXME:
-                               NOT_IMPLEMENTED;
-                       } else if (ainfo->storage == RegTypeBase) {
-                               /* Nothing to do */
-                       } else if (ainfo->storage == RegTypeFP) {
-                               g_assert_not_reached ();
-                       } else if (ainfo->storage == RegTypeStructByVal) {
-                               int doffset = inst->inst_offset;
-                               int soffset = 0;
-                               int cur_reg;
-                               int size = 0;
-                               if (mono_class_from_mono_type (inst->inst_vtype))
-                                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL);
-                               for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
-                                       if (arm_is_imm12 (doffset)) {
-                                               ARM_LDR_IMM (code, ainfo->reg + cur_reg, inst->inst_basereg, doffset);
-                                       } else {
-                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, doffset);
-                                               ARM_LDR_REG_REG (code, ainfo->reg + cur_reg, inst->inst_basereg, ARMREG_IP);
-                                       }
-                                       soffset += sizeof (gpointer);
-                                       doffset += sizeof (gpointer);
+                                       ARM_FMSR (code, ins->dreg, ARMREG_R0);
+                                       ARM_CVTS (code, ins->dreg, ins->dreg);
                                }
-                               if (ainfo->vtsize)
-                                       // FIXME:
-                                       NOT_IMPLEMENTED;
-                       } else if (ainfo->storage == RegTypeStructByAddr) {
                        } else {
-                               // FIXME:
-                               NOT_IMPLEMENTED;
+                               if (IS_HARD_FLOAT) {
+                                       ARM_CPYD (code, ins->dreg, ARM_VFP_D0);
+                               } else {
+                                       ARM_FMDRR (code, ARMREG_R0, ARMREG_R1, ins->dreg);
+                               }
                        }
                }
-               pos ++;
-       }
+               break;
+       case OP_RCALL:
+       case OP_RCALL_REG:
+       case OP_RCALL_MEMBASE: {
+               MonoType *sig_ret;
 
-       g_free (cinfo);
+               g_assert (IS_VFP);
+
+               sig_ret = mini_type_get_underlying_type (NULL, ((MonoCallInst*)ins)->signature->ret);
+               g_assert (sig_ret->type == MONO_TYPE_R4);
+               if (IS_HARD_FLOAT) {
+                       ARM_CPYS (code, ins->dreg, ARM_VFP_F0);
+               } else {
+                       ARM_FMSR (code, ins->dreg, ARMREG_R0);
+                       ARM_CPYS (code, ins->dreg, ins->dreg);
+               }
+               break;
+       }
+       default:
+               break;
+       }
 
        return code;
 }
@@ -4313,6 +4292,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                break;
                        case OP_ATOMIC_LOAD_R4:
                                code = mono_arm_emit_vfp_scratch_save (cfg, code, vfp_scratch1);
+                               ARM_ADD_REG_REG (code, ARMREG_LR, ins->inst_basereg, ARMREG_LR);
                                ARM_FLDS (code, vfp_scratch1, ARMREG_LR, 0);
                                ARM_CVTS (code, ins->dreg, vfp_scratch1);
                                code = mono_arm_emit_vfp_scratch_restore (cfg, code, vfp_scratch1);
@@ -4353,8 +4333,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                break;
                        case OP_ATOMIC_STORE_R4:
                                code = mono_arm_emit_vfp_scratch_save (cfg, code, vfp_scratch1);
+                               ARM_ADD_REG_REG (code, ARMREG_LR, ins->inst_destbasereg, ARMREG_LR);
                                ARM_CVTD (code, vfp_scratch1, ins->sreg1);
-                               ARM_FSTS (code, vfp_scratch1, ins->inst_destbasereg, ins->inst_offset);
+                               ARM_FSTS (code, vfp_scratch1, ARMREG_LR, 0);
                                code = mono_arm_emit_vfp_scratch_restore (cfg, code, vfp_scratch1);
                                break;
                        case OP_ATOMIC_STORE_R8:
@@ -4912,33 +4893,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                }
                        }
                        break;
-               case OP_JMP:
-                       /*
-                        * Keep in sync with mono_arch_emit_epilog
-                        */
-                       g_assert (!cfg->method->save_lmf);
-
-                       code = emit_load_volatile_arguments (cfg, code);
-
-                       code = emit_big_add (code, ARMREG_SP, cfg->frame_reg, cfg->stack_usage);
-                       if (iphone_abi) {
-                               if (cfg->used_int_regs)
-                                       ARM_POP (code, cfg->used_int_regs);
-                               ARM_POP (code, (1 << ARMREG_R7) | (1 << ARMREG_LR));
-                       } else {
-                               ARM_POP (code, cfg->used_int_regs | (1 << ARMREG_LR));
-                       }
-                       mono_add_patch_info (cfg, (guint8*) code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
-                       if (cfg->compile_aot) {
-                               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
-                               ARM_B (code, 0);
-                               *(gpointer*)code = NULL;
-                               code += 4;
-                               ARM_LDR_REG_REG (code, ARMREG_PC, ARMREG_PC, ARMREG_IP);
-                       } else {
-                               code = mono_arm_patchable_b (code, ARMCOND_AL);
-                       }
-                       break;
                case OP_TAILCALL: {
                        MonoCallInst *call = (MonoCallInst*)ins;
 
@@ -5052,56 +5006,52 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_VCALL2_MEMBASE:
                case OP_VOIDCALL_MEMBASE:
                case OP_CALL_MEMBASE: {
-                       gboolean imt_arg = FALSE;
-
                        g_assert (ins->sreg1 != ARMREG_LR);
                        call = (MonoCallInst*)ins;
 
                        if (IS_HARD_FLOAT)
                                code = emit_float_args (cfg, call, code, &max_len, &offset);
-
-                       if (call->dynamic_imt_arg || call->method->klass->flags & TYPE_ATTRIBUTE_INTERFACE)
-                               imt_arg = TRUE;
                        if (!arm_is_imm12 (ins->inst_offset))
                                code = mono_arm_emit_load_imm (code, ARMREG_IP, ins->inst_offset);
-#ifdef USE_JUMP_TABLES
-#define LR_BIAS 0
-#else
-#define LR_BIAS 4
-#endif
-                       if (imt_arg)
-                               ARM_ADD_REG_IMM8 (code, ARMREG_LR, ARMREG_PC, LR_BIAS);
-                       else
-                               ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
-#undef LR_BIAS
+                       ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
                        if (!arm_is_imm12 (ins->inst_offset))
                                ARM_LDR_REG_REG (code, ARMREG_PC, ins->sreg1, ARMREG_IP);
                        else
                                ARM_LDR_IMM (code, ARMREG_PC, ins->sreg1, ins->inst_offset);
-                       if (imt_arg) {
-                               /* 
-                                * We can't embed the method in the code stream in PIC code, or
-                                * in gshared code.
-                                * Instead, we put it in V5 in code emitted by 
-                                * mono_arch_emit_imt_argument (), and embed NULL here to 
-                                * signal the IMT thunk that the value is in V5.
-                                */
-#ifdef USE_JUMP_TABLES
-                               /* In case of jumptables we always use value in V5. */
-#else
-
-                               if (call->dynamic_imt_arg)
-                                       *((gpointer*)code) = NULL;
-                               else
-                                       *((gpointer*)code) = (gpointer)call->method;
-                               code += 4;
-#endif
-                       }
                        ins->flags |= MONO_INST_GC_CALLSITE;
                        ins->backend.pc_offset = code - cfg->native_code;
                        code = emit_move_return_value (cfg, ins, code);
                        break;
                }
+               case OP_GENERIC_CLASS_INIT: {
+                       static int byte_offset = -1;
+                       static guint8 bitmask;
+                       guint32 imm8;
+                       guint8 *jump;
+
+                       if (byte_offset < 0)
+                               mono_marshal_find_bitfield_offset (MonoVTable, initialized, &byte_offset, &bitmask);
+
+                       g_assert (arm_is_imm8 (byte_offset));
+                       ARM_LDRSB_IMM (code, ARMREG_IP, ins->sreg1, byte_offset);
+                       imm8 = mono_arm_is_rotated_imm8 (bitmask, &rot_amount);
+                       g_assert (imm8 >= 0);
+                       ARM_AND_REG_IMM (code, ARMREG_IP, ARMREG_IP, imm8, rot_amount);
+                       ARM_CMP_REG_IMM (code, ARMREG_IP, 0, 0);
+                       jump = code;
+                       ARM_B_COND (code, ARMCOND_NE, 0);
+
+                       /* Uninitialized case */
+                       g_assert (ins->sreg1 == ARMREG_R0);
+
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
+                                                                (gpointer)"specific_trampoline_generic_class_init");
+                       code = emit_call_seq (cfg, code);
+
+                       /* Initialized case */
+                       arm_patch (jump, code);
+                       break;
+               }
                case OP_LOCALLOC: {
                        /* round the size to 8 bytes */
                        ARM_ADD_REG_IMM8 (code, ins->dreg, ins->sreg1, 7);
@@ -5253,8 +5203,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_CALL_HANDLER: 
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
                        code = mono_arm_patchable_bl (code, ARMCOND_AL);
+                       cfg->thunk_area += THUNK_SIZE;
                        mono_cfg_add_try_hole (cfg, ins->inst_eh_block, code, bb);
                        break;
+               case OP_GET_EX_OBJ:
+                       if (ins->dreg != ARMREG_R0)
+                               ARM_MOV_REG_REG (code, ins->dreg, ARMREG_R0);
+                       break;
+
                case OP_LABEL:
                        ins->inst_c0 = code - cfg->native_code;
                        break;
@@ -5916,85 +5872,68 @@ mono_arch_register_lowlevel_calls (void)
        } while (0)
 
 void
-mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, MonoCodeManager *dyn_code_mp, gboolean run_cctors)
+mono_arch_patch_code_new (MonoCompile *cfg, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gpointer target)
 {
-       MonoJumpInfo *patch_info;
-       gboolean compile_aot = !run_cctors;
+       unsigned char *ip = ji->ip.i + code;
 
-       for (patch_info = ji; patch_info; patch_info = patch_info->next) {
-               unsigned char *ip = patch_info->ip.i + code;
-               const unsigned char *target;
+       if (ji->type == MONO_PATCH_INFO_SWITCH) {
+       }
 
-               if (patch_info->type == MONO_PATCH_INFO_SWITCH && !compile_aot) {
+       switch (ji->type) {
+       case MONO_PATCH_INFO_SWITCH: {
 #ifdef USE_JUMP_TABLES
-                       gpointer *jt = mono_jumptable_get_entry (ip);
+               gpointer *jt = mono_jumptable_get_entry (ip);
 #else
-                       gpointer *jt = (gpointer*)(ip + 8);
+               gpointer *jt = (gpointer*)(ip + 8);
 #endif
-                       int i;
-                       /* jt is the inlined jump table, 2 instructions after ip
-                        * In the normal case we store the absolute addresses,
-                        * otherwise the displacements.
-                        */
-                       for (i = 0; i < patch_info->data.table->table_size; i++)
-                               jt [i] = code + (int)patch_info->data.table->table [i];
-                       continue;
-               }
-
-               if (compile_aot) {
-                       switch (patch_info->type) {
-                       case MONO_PATCH_INFO_BB:
-                       case MONO_PATCH_INFO_LABEL:
-                               break;
-                       default:
-                               /* No need to patch these */
-                               continue;
-                       }
-               }
-
-               target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
-
-               switch (patch_info->type) {
-               case MONO_PATCH_INFO_IP:
-                       g_assert_not_reached ();
-                       patch_lis_ori (ip, ip);
-                       continue;
-               case MONO_PATCH_INFO_METHOD_REL:
-                       g_assert_not_reached ();
-                       *((gpointer *)(ip)) = code + patch_info->data.offset;
-                       continue;
-               case MONO_PATCH_INFO_METHODCONST:
-               case MONO_PATCH_INFO_CLASS:
-               case MONO_PATCH_INFO_IMAGE:
-               case MONO_PATCH_INFO_FIELD:
-               case MONO_PATCH_INFO_VTABLE:
-               case MONO_PATCH_INFO_IID:
-               case MONO_PATCH_INFO_SFLDA:
-               case MONO_PATCH_INFO_LDSTR:
-               case MONO_PATCH_INFO_TYPE_FROM_HANDLE:
-               case MONO_PATCH_INFO_LDTOKEN:
-                       g_assert_not_reached ();
-                       /* from OP_AOTCONST : lis + ori */
-                       patch_lis_ori (ip, target);
-                       continue;
-               case MONO_PATCH_INFO_R4:
-               case MONO_PATCH_INFO_R8:
-                       g_assert_not_reached ();
-                       *((gconstpointer *)(ip + 2)) = patch_info->data.target;
-                       continue;
-               case MONO_PATCH_INFO_EXC_NAME:
-                       g_assert_not_reached ();
-                       *((gconstpointer *)(ip + 1)) = patch_info->data.name;
-                       continue;
-               case MONO_PATCH_INFO_NONE:
-               case MONO_PATCH_INFO_BB_OVF:
-               case MONO_PATCH_INFO_EXC_OVF:
-                       /* everything is dealt with at epilog output time */
-                       continue;
-               default:
-                       break;
-               }
-               arm_patch_general (domain, ip, target, dyn_code_mp);
+               int i;
+               /* jt is the inlined jump table, 2 instructions after ip
+                * In the normal case we store the absolute addresses,
+                * otherwise the displacements.
+                */
+               for (i = 0; i < ji->data.table->table_size; i++)
+                       jt [i] = code + (int)ji->data.table->table [i];
+               break;
+       }
+       case MONO_PATCH_INFO_IP:
+               g_assert_not_reached ();
+               patch_lis_ori (ip, ip);
+               break;
+       case MONO_PATCH_INFO_METHOD_REL:
+               g_assert_not_reached ();
+               *((gpointer *)(ip)) = target;
+               break;
+       case MONO_PATCH_INFO_METHODCONST:
+       case MONO_PATCH_INFO_CLASS:
+       case MONO_PATCH_INFO_IMAGE:
+       case MONO_PATCH_INFO_FIELD:
+       case MONO_PATCH_INFO_VTABLE:
+       case MONO_PATCH_INFO_IID:
+       case MONO_PATCH_INFO_SFLDA:
+       case MONO_PATCH_INFO_LDSTR:
+       case MONO_PATCH_INFO_TYPE_FROM_HANDLE:
+       case MONO_PATCH_INFO_LDTOKEN:
+               g_assert_not_reached ();
+               /* from OP_AOTCONST : lis + ori */
+               patch_lis_ori (ip, target);
+               break;
+       case MONO_PATCH_INFO_R4:
+       case MONO_PATCH_INFO_R8:
+               g_assert_not_reached ();
+               *((gconstpointer *)(ip + 2)) = target;
+               break;
+       case MONO_PATCH_INFO_EXC_NAME:
+               g_assert_not_reached ();
+               *((gconstpointer *)(ip + 1)) = target;
+               break;
+       case MONO_PATCH_INFO_NONE:
+       case MONO_PATCH_INFO_BB_OVF:
+       case MONO_PATCH_INFO_EXC_OVF:
+               /* everything is dealt with at epilog output time */
+               break;
+       default:
+               arm_patch_general (cfg, domain, ip, target);
+               break;
        }
 }
 
@@ -6022,7 +5961,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        MonoBasicBlock *bb;
        MonoMethodSignature *sig;
        MonoInst *inst;
-       int alloc_size, orig_alloc_size, pos, max_offset, i, rot_amount;
+       int alloc_size, orig_alloc_size, pos, max_offset, i, rot_amount, part;
        guint8 *code;
        CallInfo *cinfo;
        int tracing = 0;
@@ -6173,7 +6112,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 
        cinfo = get_call_info (cfg->generic_sharing_context, NULL, sig);
 
-       if (cinfo->vtype_retaddr) {
+       if (cinfo->ret.storage == RegTypeStructByAddr) {
                ArgInfo *ainfo = &cinfo->ret;
                inst = cfg->vret_addr;
                g_assert (arm_is_imm12 (inst->inst_offset));
@@ -6198,6 +6137,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                
                if (cfg->verbose_level > 2)
                        g_print ("Saving argument %d (type: %d)\n", i, ainfo->storage);
+
                if (inst->opcode == OP_REGVAR) {
                        if (ainfo->storage == RegTypeGeneral)
                                ARM_MOV_REG_REG (code, inst->dreg, ainfo->reg);
@@ -6216,8 +6156,18 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        if (cfg->verbose_level > 2)
                                g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
                } else {
-                       /* the argument should be put on the stack: FIXME handle size != word  */
-                       if (ainfo->storage == RegTypeGeneral || ainfo->storage == RegTypeIRegPair || ainfo->storage == RegTypeGSharedVtInReg) {
+                       switch (ainfo->storage) {
+                       case RegTypeHFA:
+                               for (part = 0; part < ainfo->nregs; part ++) {
+                                       if (ainfo->esize == 4)
+                                               ARM_FSTS (code, ainfo->reg + part, inst->inst_basereg, inst->inst_offset + (part * ainfo->esize));
+                                       else
+                                               ARM_FSTD (code, ainfo->reg + (part * 2), inst->inst_basereg, inst->inst_offset + (part * ainfo->esize));
+                               }
+                               break;
+                       case RegTypeGeneral:
+                       case RegTypeIRegPair:
+                       case RegTypeGSharedVtInReg:
                                switch (ainfo->size) {
                                case 1:
                                        if (arm_is_imm12 (inst->inst_offset))
@@ -6258,7 +6208,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        }
                                        break;
                                }
-                       } else if (ainfo->storage == RegTypeBaseGen) {
+                               break;
+                       case RegTypeBaseGen:
                                if (arm_is_imm12 (prev_sp_offset + ainfo->offset)) {
                                        ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
                                } else {
@@ -6274,7 +6225,9 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
                                        ARM_STR_REG_REG (code, ARMREG_R3, inst->inst_basereg, ARMREG_IP);
                                }
-                       } else if (ainfo->storage == RegTypeBase || ainfo->storage == RegTypeGSharedVtOnStack) {
+                               break;
+                       case RegTypeBase:
+                       case RegTypeGSharedVtOnStack:
                                if (arm_is_imm12 (prev_sp_offset + ainfo->offset)) {
                                        ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
                                } else {
@@ -6328,7 +6281,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        }
                                        break;
                                }
-                       } else if (ainfo->storage == RegTypeFP) {
+                               break;
+                       case RegTypeFP: {
                                int imm8, rot_amount;
 
                                if ((imm8 = mono_arm_is_rotated_imm8 (inst->inst_offset, &rot_amount)) == -1) {
@@ -6341,7 +6295,9 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        ARM_FSTD (code, ainfo->reg, ARMREG_IP, 0);
                                else
                                        ARM_FSTS (code, ainfo->reg, ARMREG_IP, 0);
-                       } else if (ainfo->storage == RegTypeStructByVal) {
+                               break;
+                       }
+                       case RegTypeStructByVal: {
                                int doffset = inst->inst_offset;
                                int soffset = 0;
                                int cur_reg;
@@ -6362,12 +6318,16 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        //g_print ("emit_memcpy (prev_sp_ofs: %d, ainfo->offset: %d, soffset: %d)\n", prev_sp_offset, ainfo->offset, soffset);
                                        code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer), inst->inst_basereg, doffset, ARMREG_SP, prev_sp_offset + ainfo->offset);
                                }
-                       } else if (ainfo->storage == RegTypeStructByAddr) {
+                               break;
+                       }
+                       case RegTypeStructByAddr:
                                g_assert_not_reached ();
                                /* FIXME: handle overrun! with struct sizes not multiple of 4 */
                                code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer), inst->inst_basereg, inst->inst_offset, ainfo->reg, 0);
-                       } else
+                       default:
                                g_assert_not_reached ();
+                               break;
+                       }
                }
                pos++;
        }
@@ -6442,17 +6402,17 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 #ifdef USE_JUMP_TABLES
                jte = mono_jumptable_add_entries (3);
                jte [0] = (gpointer)&ss_trigger_var;
-               jte [1] = single_step_func_wrapper;
-               jte [2] = breakpoint_func_wrapper;
+               jte [1] = single_step_tramp;
+               jte [2] = breakpoint_tramp;
                code = mono_arm_load_jumptable_entry_addr (code, jte, ARMREG_LR);
 #else
                ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
                ARM_B (code, 2);
                *(volatile int **)code = &ss_trigger_var;
                code += 4;
-               *(gpointer*)code = single_step_func_wrapper;
+               *(gpointer*)code = single_step_tramp;
                code += 4;
-               *(gpointer*)code = breakpoint_func_wrapper;
+               *(gpointer*)code = breakpoint_tramp;
                code += 4;
 #endif
 
@@ -6507,7 +6467,8 @@ mono_arch_emit_epilog (MonoCompile *cfg)
 
        /* Load returned vtypes into registers if needed */
        cinfo = cfg->arch.cinfo;
-       if (cinfo->ret.storage == RegTypeStructByVal) {
+       switch (cinfo->ret.storage) {
+       case RegTypeStructByVal: {
                MonoInst *ins = cfg->ret;
 
                if (arm_is_imm12 (ins->inst_offset)) {
@@ -6516,6 +6477,21 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                        code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
                        ARM_LDR_REG_REG (code, ARMREG_R0, ins->inst_basereg, ARMREG_LR);
                }
+               break;
+       }
+       case RegTypeHFA: {
+               MonoInst *ins = cfg->ret;
+
+               for (i = 0; i < cinfo->ret.nregs; ++i) {
+                       if (cinfo->ret.esize == 4)
+                               ARM_FLDS (code, cinfo->ret.reg + i, ins->inst_basereg, ins->inst_offset + (i * cinfo->ret.esize));
+                       else
+                               ARM_FLDD (code, cinfo->ret.reg + (i * 2), ins->inst_basereg, ins->inst_offset + (i * cinfo->ret.esize));
+               }
+               break;
+       }
+       default:
+               break;
        }
 
        if (method->save_lmf) {
@@ -6651,6 +6627,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                        patch_info->data.name = "mono_arch_throw_corlib_exception";
                        patch_info->ip.i = code - cfg->native_code;
                        ARM_BL (code, 0);
+                       cfg->thunk_area += THUNK_SIZE;
                        *(guint32*)(gpointer)code = exc_class->type_token;
                        code += 4;
 #endif
@@ -6709,85 +6686,10 @@ mono_arch_flush_register_windows (void)
 {
 }
 
-#ifndef DISABLE_JIT
-
-void
-mono_arch_emit_imt_argument (MonoCompile *cfg, MonoCallInst *call, MonoInst *imt_arg)
-{
-       int method_reg = mono_alloc_ireg (cfg);
-#ifdef USE_JUMP_TABLES
-       int use_jumptables = TRUE;
-#else
-       int use_jumptables = FALSE;
-#endif
-
-       if (cfg->compile_aot) {
-               MonoInst *ins;
-
-               call->dynamic_imt_arg = TRUE;
-
-               if (imt_arg) {
-                       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, method_reg, imt_arg->dreg);
-               } else {
-                       MONO_INST_NEW (cfg, ins, OP_AOTCONST);
-                       ins->dreg = method_reg;
-                       ins->inst_p0 = call->method;
-                       ins->inst_c1 = MONO_PATCH_INFO_METHODCONST;
-                       MONO_ADD_INS (cfg->cbb, ins);
-               }
-               mono_call_inst_add_outarg_reg (cfg, call, method_reg, ARMREG_V5, FALSE);
-       } else if (cfg->generic_context || imt_arg || mono_use_llvm || use_jumptables) {
-               /* Always pass in a register for simplicity */
-               call->dynamic_imt_arg = TRUE;
-
-               cfg->uses_rgctx_reg = TRUE;
-
-               if (imt_arg) {
-                       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, method_reg, imt_arg->dreg);
-               } else {
-                       MonoInst *ins;
-
-                       MONO_INST_NEW (cfg, ins, OP_PCONST);
-                       ins->inst_p0 = call->method;
-                       ins->dreg = method_reg;
-                       MONO_ADD_INS (cfg->cbb, ins);
-               }
-
-               mono_call_inst_add_outarg_reg (cfg, call, method_reg, ARMREG_V5, FALSE);
-       }
-}
-
-#endif /* DISABLE_JIT */
-
 MonoMethod*
 mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
 {
-#ifdef USE_JUMP_TABLES
-       return (MonoMethod*)regs [ARMREG_V5];
-#else
-       gpointer method;
-       guint32 *code_ptr = (guint32*)code;
-       code_ptr -= 2;
-       method = GUINT_TO_POINTER (code_ptr [1]);
-
-       if (mono_use_llvm)
-               /* Passed in V5 */
-               return (MonoMethod*)regs [ARMREG_V5];
-
-       /* The IMT value is stored in the code stream right after the LDC instruction. */
-       /* This is no longer true for the gsharedvt_in trampoline */
-       /*
-       if (!IS_LDR_PC (code_ptr [0])) {
-               g_warning ("invalid code stream, instruction before IMT value is not a LDC in %s() (code %p value 0: 0x%x -1: 0x%x -2: 0x%x)", __FUNCTION__, code, code_ptr [2], code_ptr [1], code_ptr [0]);
-               g_assert (IS_LDR_PC (code_ptr [0]));
-       }
-       */
-       if (method == 0)
-               /* This is AOTed code, or the gsharedvt trampoline, the IMT method is in V5 */
-               return (MonoMethod*)regs [ARMREG_V5];
-       else
-               return (MonoMethod*) method;
-#endif
+       return (MonoMethod*)regs [MONO_ARCH_IMT_REG];
 }
 
 MonoVTable*
@@ -6928,8 +6830,6 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
 
 #ifdef USE_JUMP_TABLES
        ARM_PUSH3 (code, ARMREG_R0, ARMREG_R1, ARMREG_R2);
-       /* If jumptables we always pass the IMT method in R5 */
-       ARM_MOV_REG_REG (code, ARMREG_R0, ARMREG_V5);
 #define VTABLE_JTI 0
 #define IMT_METHOD_OFFSET 0
 #define TARGET_CODE_OFFSET 1
@@ -6951,16 +6851,8 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
        ARM_LDR_IMM (code, ARMREG_R0, ARMREG_LR, -4);
        vtable_target = code;
        ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
-
-       if (mono_use_llvm) {
-               /* LLVM always passes the IMT method in R5 */
-               ARM_MOV_REG_REG (code, ARMREG_R0, ARMREG_V5);
-       } else {
-               /* R0 == 0 means we are called from AOT code. In this case, V5 contains the IMT method */
-               ARM_CMP_REG_IMM8 (code, ARMREG_R0, 0);
-               ARM_MOV_REG_REG_COND (code, ARMREG_R0, ARMREG_V5, ARMCOND_EQ);
-       }
 #endif
+       ARM_MOV_REG_REG (code, ARMREG_R0, ARMREG_V5);
 
        for (i = 0; i < count; ++i) {
                MonoIMTCheckItem *item = imt_entries [i];