Merge pull request #5714 from alexischr/update_bockbuild
[mono.git] / mono / mini / mini-arm.c
index 8e7e351b08070e417be7b1524e85581d2797af8e..44626141830fe9f5ff2c42f28ae54de1c668f0ee 100644 (file)
@@ -1,5 +1,6 @@
-/*
- * mini-arm.c: ARM backend for the Mono code generator
+/**
+ * \file
+ * ARM backend for the Mono code generator
  *
  * Authors:
  *   Paolo Molaro (lupus@ximian.com)
 #include <mono/metadata/profiler-private.h>
 #include <mono/metadata/debug-helpers.h>
 #include <mono/utils/mono-mmap.h>
-#include <mono/utils/mono-hwcap-arm.h>
+#include <mono/utils/mono-hwcap.h>
 #include <mono/utils/mono-memory-model.h>
+#include <mono/utils/mono-threads-coop.h>
+#include <mono/utils/unlocked.h>
 
 #include "mini-arm.h"
-#include "mini-arm-tls.h"
 #include "cpu-arm.h"
 #include "trace.h"
 #include "ir-emit.h"
 #include "mini-gc.h"
 #include "mono/arch/arm/arm-vfp-codegen.h"
 
-#if (defined(HAVE_KW_THREAD) && defined(__linux__) && defined(__ARM_EABI__)) \
-       || defined(TARGET_ANDROID) \
-       || (defined(TARGET_IOS) && !defined(TARGET_WATCHOS))
-#define HAVE_FAST_TLS
-#endif
-
 /* Sanity check: This makes no sense */
 #if defined(ARM_FPU_NONE) && (defined(ARM_FPU_VFP) || defined(ARM_FPU_VFP_HARD))
 #error "ARM_FPU_NONE is defined while one of ARM_FPU_VFP/ARM_FPU_VFP_HARD is defined"
@@ -166,6 +162,9 @@ int mono_exc_esp_offset = 0;
 static void mono_arch_compute_omit_fp (MonoCompile *cfg);
 #endif
 
+static guint8*
+emit_aotconst (MonoCompile *cfg, guint8 *code, int dreg, int patch_type, gpointer data);
+
 const char*
 mono_arch_regname (int reg)
 {
@@ -217,6 +216,19 @@ emit_big_add (guint8 *code, int dreg, int sreg, int imm)
        return code;
 }
 
+static guint8*
+emit_ldr_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+       if (!arm_is_imm12 (imm)) {
+               g_assert (dreg != sreg);
+               code = emit_big_add (code, dreg, sreg, imm);
+               ARM_LDR_IMM (code, dreg, dreg, 0);
+       } else {
+               ARM_LDR_IMM (code, dreg, sreg, imm);
+       }
+       return code;
+}
+
 /* If dreg == sreg, this clobbers IP */
 static guint8*
 emit_sub_imm (guint8 *code, int dreg, int sreg, int imm)
@@ -324,83 +336,58 @@ mono_arm_patchable_bl (guint8 *code, int cond)
        return code;
 }
 
-static guint8*
-mono_arm_emit_tls_get (MonoCompile *cfg, guint8* code, int dreg, int tls_offset)
-{
-#ifdef HAVE_FAST_TLS
-       code = mono_arm_emit_load_imm (code, ARMREG_R0, tls_offset);
-       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
-                       "mono_get_tls_key");
-       code = emit_call_seq (cfg, code);
-       if (dreg != ARMREG_R0)
-               ARM_MOV_REG_REG (code, dreg, ARMREG_R0);
-#else
-       g_assert_not_reached ();
+#if defined(__ARM_EABI__) && defined(__linux__) && !defined(HOST_ANDROID) && !defined(MONO_CROSS_COMPILE)
+#define HAVE_AEABI_READ_TP 1
 #endif
-       return code;
-}
 
-static guint8*
-mono_arm_emit_tls_get_reg (MonoCompile *cfg, guint8* code, int dreg, int tls_offset_reg)
+#ifdef HAVE_AEABI_READ_TP
+gpointer __aeabi_read_tp (void);
+#endif
+
+gboolean
+mono_arch_have_fast_tls (void)
 {
-#ifdef HAVE_FAST_TLS
-       if (tls_offset_reg != ARMREG_R0)
-               ARM_MOV_REG_REG (code, ARMREG_R0, tls_offset_reg);
-       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
-                       "mono_get_tls_key");
-       code = emit_call_seq (cfg, code);
-       if (dreg != ARMREG_R0)
-               ARM_MOV_REG_REG (code, dreg, ARMREG_R0);
+#ifdef HAVE_AEABI_READ_TP
+       static gboolean have_fast_tls = FALSE;
+        static gboolean inited = FALSE;
+
+       if (mini_get_debug_options ()->use_fallback_tls)
+               return FALSE;
+
+       if (inited)
+               return have_fast_tls;
+
+       if (v7_supported) {
+               gpointer tp1, tp2;
+
+               tp1 = __aeabi_read_tp ();
+               asm volatile("mrc p15, 0, %0, c13, c0, 3" : "=r" (tp2));
+
+               have_fast_tls = tp1 && tp1 == tp2;
+       }
+       inited = TRUE;
+       return have_fast_tls;
 #else
-       g_assert_not_reached ();
+       return FALSE;
 #endif
-       return code;
 }
 
 static guint8*
-mono_arm_emit_tls_set (MonoCompile *cfg, guint8* code, int sreg, int tls_offset)
+emit_tls_get (guint8 *code, int dreg, int tls_offset)
 {
-#ifdef HAVE_FAST_TLS
-       if (sreg != ARMREG_R1)
-               ARM_MOV_REG_REG (code, ARMREG_R1, sreg);
-       code = mono_arm_emit_load_imm (code, ARMREG_R0, tls_offset);
-       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
-                       "mono_set_tls_key");
-       code = emit_call_seq (cfg, code);
-#else
-       g_assert_not_reached ();
-#endif
+       g_assert (v7_supported);
+       ARM_MRC (code, 15, 0, dreg, 13, 0, 3);
+       ARM_LDR_IMM (code, dreg, dreg, tls_offset);
        return code;
 }
 
 static guint8*
-mono_arm_emit_tls_set_reg (MonoCompile *cfg, guint8* code, int sreg, int tls_offset_reg)
+emit_tls_set (guint8 *code, int sreg, int tls_offset)
 {
-#ifdef HAVE_FAST_TLS
-       /* Get sreg in R1 and tls_offset_reg in R0 */
-       if (tls_offset_reg == ARMREG_R1) {
-               if (sreg == ARMREG_R0) {
-                       /* swap sreg and tls_offset_reg */
-                       ARM_EOR_REG_REG (code, sreg, sreg, tls_offset_reg);
-                       ARM_EOR_REG_REG (code, tls_offset_reg, sreg, tls_offset_reg);
-                       ARM_EOR_REG_REG (code, sreg, sreg, tls_offset_reg);
-               } else {
-                       ARM_MOV_REG_REG (code, ARMREG_R0, tls_offset_reg);
-                       if (sreg != ARMREG_R1)
-                               ARM_MOV_REG_REG (code, ARMREG_R1, sreg);
-               }
-       } else {
-               if (sreg != ARMREG_R1)
-                       ARM_MOV_REG_REG (code, ARMREG_R1, sreg);
-               if (tls_offset_reg != ARMREG_R0)
-                       ARM_MOV_REG_REG (code, ARMREG_R0, tls_offset_reg);
-       }
-       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
-                       "mono_set_tls_key");
-       code = emit_call_seq (cfg, code);
-#else
-       g_assert_not_reached ();
-#endif
+       int tp_reg = (sreg != ARMREG_R0) ? ARMREG_R0 : ARMREG_R1;
+       g_assert (v7_supported);
+       ARM_MRC (code, 15, 0, tp_reg, 13, 0, 3);
+       ARM_STR_IMM (code, sreg, tp_reg, tls_offset);
        return code;
 }
 
@@ -413,31 +400,13 @@ mono_arm_emit_tls_set_reg (MonoCompile *cfg, guint8* code, int sreg, int tls_off
 static guint8*
 emit_save_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset)
 {
-       gboolean get_lmf_fast = FALSE;
        int i;
 
-       if (mono_arm_have_tls_get ()) {
-               get_lmf_fast = TRUE;
-               if (cfg->compile_aot) {
-                       /* OP_AOTCONST */
-                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_TLS_OFFSET, (gpointer)TLS_KEY_LMF_ADDR);
-                       ARM_LDR_IMM (code, ARMREG_R1, ARMREG_PC, 0);
-                       ARM_B (code, 0);
-                       *(gpointer*)code = NULL;
-                       code += 4;
-                       /* Load the value from the GOT */
-                       ARM_LDR_REG_REG (code, ARMREG_R1, ARMREG_PC, ARMREG_R1);
-                       code = mono_arm_emit_tls_get_reg (cfg, code, ARMREG_R0, ARMREG_R1);
-               } else {
-                       gint32 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
-                       g_assert (lmf_addr_tls_offset != -1);
-                       code = mono_arm_emit_tls_get (cfg, code, ARMREG_R0, lmf_addr_tls_offset);
-               }
-       }
-
-       if (!get_lmf_fast) {
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
-                                                        (gpointer)"mono_get_lmf_addr");
+       if (mono_arch_have_fast_tls () && mono_tls_get_tls_offset (TLS_KEY_LMF_ADDR) != -1) {
+               code = emit_tls_get (code, ARMREG_R0, mono_tls_get_tls_offset (TLS_KEY_LMF_ADDR));
+       } else {
+               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
+                                                        (gpointer)"mono_tls_get_lmf_addr");
                code = emit_call_seq (cfg, code);
        }
        /* we build the MonoLMF structure on the stack - see mini-arm.h */
@@ -580,22 +549,6 @@ emit_restore_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset)
 
 #endif /* #ifndef DISABLE_JIT */
 
-/*
- * mono_arm_have_tls_get:
- *
- * Returns whether we have tls access implemented on the current
- * platform
- */
-gboolean
-mono_arm_have_tls_get (void)
-{
-#ifdef HAVE_FAST_TLS
-       return TRUE;
-#else
-       return FALSE;
-#endif
-}
-
 /*
  * mono_arch_get_argument_info:
  * @csig:  a method signature
@@ -698,7 +651,7 @@ get_delegate_invoke_impl (MonoTrampInfo **info, gboolean has_target, gboolean pa
                 g_free (name);
        }
 
-       mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE, NULL);
+       MONO_PROFILER_RAISE (jit_code_buffer, (start, code - start, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE, NULL));
 
        return start;
 }
@@ -824,14 +777,19 @@ mono_arch_cpu_init (void)
 void
 mono_arch_init (void)
 {
-       const char *cpu_arch;
+       char *cpu_arch;
+
+#ifdef TARGET_WATCHOS
+       mini_get_debug_options ()->soft_breakpoints = TRUE;
+#endif
 
        mono_os_mutex_init_recursive (&mini_arch_mutex);
        if (mini_get_debug_options ()->soft_breakpoints) {
-               breakpoint_tramp = mini_get_breakpoint_trampoline ();
+               if (!mono_aot_only)
+                       breakpoint_tramp = mini_get_breakpoint_trampoline ();
        } else {
-               ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
-               bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
+               ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT, MONO_MEM_ACCOUNT_OTHER);
+               bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT, MONO_MEM_ACCOUNT_OTHER);
                mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
        }
 
@@ -842,7 +800,6 @@ mono_arch_init (void)
        mono_aot_register_jit_icall ("mono_arm_start_gsharedvt_call", mono_arm_start_gsharedvt_call);
 #endif
        mono_aot_register_jit_icall ("mono_arm_unaligned_stack", mono_arm_unaligned_stack);
-
 #if defined(__ARM_EABI__)
        eabi_supported = TRUE;
 #endif
@@ -852,7 +809,7 @@ mono_arch_init (void)
 #else
        arm_fpu = MONO_ARM_FPU_VFP;
 
-#if defined(ARM_FPU_NONE) && !defined(__APPLE__)
+#if defined(ARM_FPU_NONE) && !defined(TARGET_IOS)
        /*
         * If we're compiling with a soft float fallback and it
         * turns out that no VFP unit is available, we need to
@@ -868,10 +825,11 @@ mono_arch_init (void)
         * works. Most ARM devices have VFP units these days, so
         * normally soft float code would not be exercised much.
         */
-       const char *soft = g_getenv ("MONO_ARM_FORCE_SOFT_FLOAT");
+       char *soft = g_getenv ("MONO_ARM_FORCE_SOFT_FLOAT");
 
        if (soft && !strncmp (soft, "1", 1))
                arm_fpu = MONO_ARM_FPU_NONE;
+       g_free (soft);
 #endif
 #endif
 
@@ -879,7 +837,22 @@ mono_arch_init (void)
        v6_supported = mono_hwcap_arm_is_v6;
        v7_supported = mono_hwcap_arm_is_v7;
 
-#if defined(__APPLE__)
+       /*
+        * On weird devices, the hwcap code may fail to detect
+        * the ARM version. In that case, we can at least safely
+        * assume the version the runtime was compiled for.
+        */
+#ifdef HAVE_ARMV5
+       v5_supported = TRUE;
+#endif
+#ifdef HAVE_ARMV6
+       v6_supported = TRUE;
+#endif
+#ifdef HAVE_ARMV7
+       v7_supported = TRUE;
+#endif
+
+#if defined(TARGET_IOS)
        /* iOS is special-cased here because we don't yet
           have a way to properly detect CPU features on it. */
        thumb_supported = TRUE;
@@ -904,6 +877,7 @@ mono_arch_init (void)
 
                thumb_supported = strstr (cpu_arch, "thumb") != NULL;
                thumb2_supported = strstr (cpu_arch, "thumb2") != NULL;
+               g_free (cpu_arch);
        }
 }
 
@@ -987,10 +961,6 @@ is_regsize_var (MonoType *t)
        case MONO_TYPE_FNPTR:
                return TRUE;
        case MONO_TYPE_OBJECT:
-       case MONO_TYPE_STRING:
-       case MONO_TYPE_CLASS:
-       case MONO_TYPE_SZARRAY:
-       case MONO_TYPE_ARRAY:
                return TRUE;
        case MONO_TYPE_GENERICINST:
                if (!mono_type_generic_inst_is_valuetype (t))
@@ -1315,11 +1285,7 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
        case MONO_TYPE_U:
        case MONO_TYPE_PTR:
        case MONO_TYPE_FNPTR:
-       case MONO_TYPE_CLASS:
        case MONO_TYPE_OBJECT:
-       case MONO_TYPE_SZARRAY:
-       case MONO_TYPE_ARRAY:
-       case MONO_TYPE_STRING:
                cinfo->ret.storage = RegTypeGeneral;
                cinfo->ret.reg = ARMREG_R0;
                break;
@@ -1362,10 +1328,25 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                        cinfo->ret.nregs = nfields;
                        cinfo->ret.esize = esize;
                } else {
-                       if (is_pinvoke && mono_class_native_size (mono_class_from_mono_type (t), &align) <= sizeof (gpointer))
-                               cinfo->ret.storage = RegTypeStructByVal;
-                       else
+                       if (is_pinvoke) {
+                               int native_size = mono_class_native_size (mono_class_from_mono_type (t), &align);
+                               int max_size;
+
+#ifdef TARGET_WATCHOS
+                               max_size = 16;
+#else
+                               max_size = 4;
+#endif
+                               if (native_size <= max_size) {
+                                       cinfo->ret.storage = RegTypeStructByVal;
+                                       cinfo->ret.struct_size = native_size;
+                                       cinfo->ret.nregs = ALIGN_TO (native_size, 4) / 4;
+                               } else {
+                                       cinfo->ret.storage = RegTypeStructByAddr;
+                               }
+                       } else {
                                cinfo->ret.storage = RegTypeStructByAddr;
+                       }
                }
                break;
        case MONO_TYPE_VAR:
@@ -1453,11 +1434,7 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                case MONO_TYPE_U:
                case MONO_TYPE_PTR:
                case MONO_TYPE_FNPTR:
-               case MONO_TYPE_CLASS:
                case MONO_TYPE_OBJECT:
-               case MONO_TYPE_STRING:
-               case MONO_TYPE_SZARRAY:
-               case MONO_TYPE_ARRAY:
                        cinfo->args [n].size = sizeof (gpointer);
                        add_general (&gr, &stack_size, ainfo, TRUE);
                        break;
@@ -1518,6 +1495,27 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                                        size = mini_type_stack_size_full (t, &align, FALSE);
                        }
                        DEBUG(g_print ("load %d bytes struct\n", size));
+
+#ifdef TARGET_WATCHOS
+                       /* Watchos pass large structures by ref */
+                       /* We only do this for pinvoke to make gsharedvt/dyncall simpler */
+                       if (sig->pinvoke && size > 16) {
+                               add_general (&gr, &stack_size, ainfo, TRUE);
+                               switch (ainfo->storage) {
+                               case RegTypeGeneral:
+                                       ainfo->storage = RegTypeStructByAddr;
+                                       break;
+                               case RegTypeBase:
+                                       ainfo->storage = RegTypeStructByAddrOnStack;
+                                       break;
+                               default:
+                                       g_assert_not_reached ();
+                                       break;
+                               }
+                               break;
+                       }
+#endif
+
                        align_size = size;
                        nwords = 0;
                        align_size += (sizeof (gpointer) - 1);
@@ -1525,6 +1523,7 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                        nwords = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer);
                        ainfo->storage = RegTypeStructByVal;
                        ainfo->struct_size = size;
+                       ainfo->align = align;
                        /* FIXME: align stack_size if needed */
                        if (eabi_supported) {
                                if (align >= 8 && (gr & 1))
@@ -1656,8 +1655,7 @@ debug_omit_fp (void)
 
 /**
  * mono_arch_compute_omit_fp:
- *
- *   Determine whenever the frame pointer can be eliminated.
+ * Determine whether the frame pointer can be eliminated.
  */
 static void
 mono_arch_compute_omit_fp (MonoCompile *cfg)
@@ -1700,8 +1698,7 @@ mono_arch_compute_omit_fp (MonoCompile *cfg)
                cfg->arch.omit_fp = FALSE;
        if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
                cfg->arch.omit_fp = FALSE;
-       if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
-               (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
+       if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)))
                cfg->arch.omit_fp = FALSE;
        for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
                ArgInfo *ainfo = &cinfo->args [i];
@@ -1804,21 +1801,16 @@ mono_arch_allocate_vars (MonoCompile *cfg)
 
        switch (cinfo->ret.storage) {
        case RegTypeStructByVal:
-               cfg->ret->opcode = OP_REGOFFSET;
-               cfg->ret->inst_basereg = cfg->frame_reg;
-               offset += sizeof (gpointer) - 1;
-               offset &= ~(sizeof (gpointer) - 1);
-               cfg->ret->inst_offset = - offset;
-               offset += sizeof(gpointer);
-               break;
        case RegTypeHFA:
                /* Allocate a local to hold the result, the epilog will copy it to the correct place */
                offset = ALIGN_TO (offset, 8);
                cfg->ret->opcode = OP_REGOFFSET;
                cfg->ret->inst_basereg = cfg->frame_reg;
                cfg->ret->inst_offset = offset;
-               // FIXME:
-               offset += 32;
+               if (cinfo->ret.storage == RegTypeStructByVal)
+                       offset += cinfo->ret.nregs * sizeof (gpointer);
+               else
+                       offset += 32;
                break;
        case RegTypeStructByAddr:
                ins = cfg->vret_addr;
@@ -1851,6 +1843,9 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                ins->inst_basereg = cfg->frame_reg;
                ins->inst_offset = offset;
                offset += size;
+       }
+       if (cfg->arch.ss_trigger_page_var) {
+               MonoInst *ins;
 
                ins = cfg->arch.ss_trigger_page_var;
                size = 4;
@@ -1875,6 +1870,9 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                ins->inst_basereg = cfg->frame_reg;
                ins->inst_offset = offset;
                offset += size;
+       }
+       if (cfg->arch.seq_point_bp_method_var) {
+               MonoInst *ins;
 
                ins = cfg->arch.seq_point_bp_method_var;
                size = 4;
@@ -2055,6 +2053,18 @@ mono_arch_create_vars (MonoCompile *cfg)
        }
 
        if (cfg->gen_sdb_seq_points) {
+               if (cfg->compile_aot) {
+                       MonoInst *ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+                       ins->flags |= MONO_INST_VOLATILE;
+                       cfg->arch.seq_point_info_var = ins;
+
+                       if (!cfg->soft_breakpoints) {
+                               /* Allocate a separate variable for this to save 1 load per seq point */
+                               ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+                               ins->flags |= MONO_INST_VOLATILE;
+                               cfg->arch.ss_trigger_page_var = ins;
+                       }
+               }
                if (cfg->soft_breakpoints) {
                        MonoInst *ins;
 
@@ -2065,17 +2075,6 @@ mono_arch_create_vars (MonoCompile *cfg)
                        ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
                        ins->flags |= MONO_INST_VOLATILE;
                        cfg->arch.seq_point_bp_method_var = ins;
-
-                       g_assert (!cfg->compile_aot);
-               } else if (cfg->compile_aot) {
-                       MonoInst *ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
-                       ins->flags |= MONO_INST_VOLATILE;
-                       cfg->arch.seq_point_info_var = ins;
-
-                       /* Allocate a separate variable for this to save 1 load per seq point */
-                       ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
-                       ins->flags |= MONO_INST_VOLATILE;
-                       cfg->arch.ss_trigger_page_var = ins;
                }
        }
 }
@@ -2140,6 +2139,18 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
                linfo->ret.storage = LLVMArgVtypeRetAddr;
                linfo->vret_arg_index = cinfo->vret_arg_index;
                break;
+#if TARGET_WATCHOS
+       case RegTypeStructByVal:
+               /* LLVM models this by returning an int array */
+               linfo->ret.storage = LLVMArgAsIArgs;
+               linfo->ret.nslots = cinfo->ret.nregs;
+               break;
+#endif
+       case RegTypeHFA:
+               linfo->ret.storage = LLVMArgFpStruct;
+               linfo->ret.nslots = cinfo->ret.nregs;
+               linfo->ret.esize = cinfo->ret.esize;
+               break;
        default:
                cfg->exception_message = g_strdup_printf ("unknown ret conv (%d)", cinfo->ret.storage);
                cfg->disable_llvm = TRUE;
@@ -2147,9 +2158,10 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
        }
 
        for (i = 0; i < n; ++i) {
+               LLVMArgInfo *lainfo = &linfo->args [i];
                ainfo = cinfo->args + i;
 
-               linfo->args [i].storage = LLVMArgNone;
+               lainfo->storage = LLVMArgNone;
 
                switch (ainfo->storage) {
                case RegTypeGeneral:
@@ -2157,12 +2169,33 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
                case RegTypeBase:
                case RegTypeBaseGen:
                case RegTypeFP:
-                       linfo->args [i].storage = LLVMArgNormal;
+                       lainfo->storage = LLVMArgNormal;
                        break;
                case RegTypeStructByVal:
-                       linfo->args [i].storage = LLVMArgAsIArgs;
-                       linfo->args [i].nslots = ainfo->struct_size / sizeof (gpointer);
+                       lainfo->storage = LLVMArgAsIArgs;
+                       if (eabi_supported && ainfo->align == 8) {
+                               /* LLVM models this by passing an int64 array */
+                               lainfo->nslots = ALIGN_TO (ainfo->struct_size, 8) / 8;
+                               lainfo->esize = 8;
+                       } else {
+                               lainfo->nslots = ainfo->struct_size / sizeof (gpointer);
+                               lainfo->esize = 4;
+                       }
+                       break;
+               case RegTypeStructByAddr:
+               case RegTypeStructByAddrOnStack:
+                       lainfo->storage = LLVMArgVtypeByRef;
+                       break;
+               case RegTypeHFA: {
+                       int j;
+
+                       lainfo->storage = LLVMArgAsFpArgs;
+                       lainfo->nslots = ainfo->nregs;
+                       lainfo->esize = ainfo->esize;
+                       for (j = 0; j < ainfo->nregs; ++j)
+                               lainfo->pair_storage [j] = LLVMArgInFPReg;
                        break;
+               }
                default:
                        cfg->exception_message = g_strdup_printf ("ainfo->storage (%d)", ainfo->storage);
                        cfg->disable_llvm = TRUE;
@@ -2189,10 +2222,14 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 
        switch (cinfo->ret.storage) {
        case RegTypeStructByVal:
-               /* The JIT will transform this into a normal call */
-               call->vret_in_reg = TRUE;
-               break;
        case RegTypeHFA:
+               if (cinfo->ret.storage == RegTypeStructByVal && cinfo->ret.nregs == 1) {
+                       /* The JIT will transform this into a normal call */
+                       call->vret_in_reg = TRUE;
+                       break;
+               }
+               if (call->inst.opcode == OP_TAILCALL)
+                       break;
                /*
                 * The vtype is returned in registers, save the return area address in a local, and save the vtype into
                 * the location pointed to by it after call in emit_move_return_value ().
@@ -2306,19 +2343,12 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
                                mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
                        }
                        break;
-               case RegTypeStructByAddr:
-                       NOT_IMPLEMENTED;
-#if 0
-                       /* FIXME: where si the data allocated? */
-                       arg->backend.reg3 = ainfo->reg;
-                       call->used_iregs |= 1 << ainfo->reg;
-                       g_assert_not_reached ();
-#endif
-                       break;
                case RegTypeStructByVal:
                case RegTypeGSharedVtInReg:
                case RegTypeGSharedVtOnStack:
                case RegTypeHFA:
+               case RegTypeStructByAddr:
+               case RegTypeStructByAddrOnStack:
                        MONO_INST_NEW (cfg, ins, OP_OUTARG_VT);
                        ins->opcode = OP_OUTARG_VT;
                        ins->sreg1 = in->dreg;
@@ -2464,10 +2494,12 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
 
        switch (ainfo->storage) {
        case RegTypeGSharedVtInReg:
+       case RegTypeStructByAddr:
                /* Pass by addr */
                mono_call_inst_add_outarg_reg (cfg, call, src->dreg, ainfo->reg, FALSE);
                break;
        case RegTypeGSharedVtOnStack:
+       case RegTypeStructByAddrOnStack:
                /* Pass by addr on stack */
                MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, ainfo->offset, src->dreg);
                break;
@@ -2721,16 +2753,20 @@ mono_arch_dyn_call_free (MonoDynCallInfo *info)
        g_free (ainfo);
 }
 
+int
+mono_arch_dyn_call_get_buf_size (MonoDynCallInfo *info)
+{
+       return sizeof (DynCallArgs);
+}
+
 void
-mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, guint8 *buf, int buf_len)
+mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, guint8 *buf)
 {
        ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info;
        DynCallArgs *p = (DynCallArgs*)buf;
        int arg_index, greg, i, j, pindex;
        MonoMethodSignature *sig = dinfo->sig;
 
-       g_assert (buf_len >= sizeof (DynCallArgs));
-
        p->res = 0;
        p->ret = ret;
        p->has_fpregs = 0;
@@ -2772,10 +2808,6 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g
                }
 
                switch (t->type) {
-               case MONO_TYPE_STRING:
-               case MONO_TYPE_CLASS:  
-               case MONO_TYPE_ARRAY:
-               case MONO_TYPE_SZARRAY:
                case MONO_TYPE_OBJECT:
                case MONO_TYPE_PTR:
                case MONO_TYPE_I:
@@ -2877,10 +2909,6 @@ mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf)
        case MONO_TYPE_VOID:
                *(gpointer*)ret = NULL;
                break;
-       case MONO_TYPE_STRING:
-       case MONO_TYPE_CLASS:  
-       case MONO_TYPE_ARRAY:
-       case MONO_TYPE_SZARRAY:
        case MONO_TYPE_OBJECT:
        case MONO_TYPE_I:
        case MONO_TYPE_U:
@@ -4025,6 +4053,18 @@ mono_arm_thumb_supported (void)
        return thumb_supported;
 }
 
+gboolean
+mono_arm_eabi_supported (void)
+{
+       return eabi_supported;
+}
+
+int
+mono_arm_i8_align (void)
+{
+       return i8_align;
+}
+
 #ifndef DISABLE_JIT
 
 static guint8*
@@ -4037,10 +4077,16 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
        cinfo = call->call_info;
 
        switch (cinfo->ret.storage) {
+       case RegTypeStructByVal:
        case RegTypeHFA: {
                MonoInst *loc = cfg->arch.vret_addr_loc;
                int i;
 
+               if (cinfo->ret.storage == RegTypeStructByVal && cinfo->ret.nregs == 1) {
+                       /* The JIT treats this as a normal call */
+                       break;
+               }
+
                /* Load the destination address */
                g_assert (loc && loc->opcode == OP_REGOFFSET);
 
@@ -4050,11 +4096,34 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
                        code = mono_arm_emit_load_imm (code, ARMREG_LR, loc->inst_offset);
                        ARM_LDR_REG_REG (code, ARMREG_LR, loc->inst_basereg, ARMREG_LR);
                }
-               for (i = 0; i < cinfo->ret.nregs; ++i) {
-                       if (cinfo->ret.esize == 4)
-                               ARM_FSTS (code, cinfo->ret.reg + i, ARMREG_LR, i * 4);
-                       else
-                               ARM_FSTD (code, cinfo->ret.reg + (i * 2), ARMREG_LR, i * 8);
+
+               if (cinfo->ret.storage == RegTypeStructByVal) {
+                       int rsize = cinfo->ret.struct_size;
+
+                       for (i = 0; i < cinfo->ret.nregs; ++i) {
+                               g_assert (rsize >= 0);
+                               switch (rsize) {
+                               case 0:
+                                       break;
+                               case 1:
+                                       ARM_STRB_IMM (code, i, ARMREG_LR, i * 4);
+                                       break;
+                               case 2:
+                                       ARM_STRH_IMM (code, i, ARMREG_LR, i * 4);
+                                       break;
+                               default:
+                                       ARM_STR_IMM (code, i, ARMREG_LR, i * 4);
+                                       break;
+                               }
+                               rsize -= 4;
+                       }
+               } else {
+                       for (i = 0; i < cinfo->ret.nregs; ++i) {
+                               if (cinfo->ret.esize == 4)
+                                       ARM_FSTS (code, cinfo->ret.reg + i, ARMREG_LR, i * 4);
+                               else
+                                       ARM_FSTD (code, cinfo->ret.reg + (i * 2), ARMREG_LR, i * 8);
+                       }
                }
                return code;
        }
@@ -4127,17 +4196,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
        cpos = bb->max_offset;
 
-       if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
-               //MonoCoverageInfo *cov = mono_get_coverage_info (cfg->method);
-               //g_assert (!mono_compile_aot);
-               //cpos += 6;
-               //if (bb->cil_code)
-               //      cov->data [bb->dfn].iloffset = bb->cil_code - cfg->cil_code;
-               /* this is not thread save, but good enough */
-               /* fixme: howto handle overflows? */
-               //x86_inc_mem (code, &cov->data [bb->dfn].count); 
-       }
-
     if (mono_break_at_bb_method && mono_method_desc_full_match (mono_break_at_bb_method, cfg->method) && bb->block_num == mono_break_at_bb_bb_num) {
                mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                                         (gpointer)"mono_break");
@@ -4166,16 +4224,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_TLS_GET:
-                       code = mono_arm_emit_tls_get (cfg, code, ins->dreg, ins->inst_offset);
-                       break;
-               case OP_TLS_GET_REG:
-                       code = mono_arm_emit_tls_get_reg (cfg, code, ins->dreg, ins->sreg1);
+                       code = emit_tls_get (code, ins->dreg, ins->inst_offset);
                        break;
                case OP_TLS_SET:
-                       code = mono_arm_emit_tls_set (cfg, code, ins->sreg1, ins->inst_offset);
-                       break;
-               case OP_TLS_SET_REG:
-                       code = mono_arm_emit_tls_set_reg (cfg, code, ins->sreg1, ins->sreg2);
+                       code = emit_tls_set (code, ins->sreg1, ins->inst_offset);
                        break;
                case OP_ATOMIC_EXCHANGE_I4:
                case OP_ATOMIC_CAS_I4:
@@ -4291,7 +4343,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                break;
                        }
 
-                       ARM_DMB (code, ARM_DMB_SY);
+                       if (ins->backend.memory_barrier_kind != MONO_MEMORY_BARRIER_NONE)
+                               ARM_DMB (code, ARM_DMB_SY);
                        break;
                }
                case OP_ATOMIC_STORE_I1:
@@ -4302,7 +4355,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_ATOMIC_STORE_U4:
                case OP_ATOMIC_STORE_R4:
                case OP_ATOMIC_STORE_R8: {
-                       ARM_DMB (code, ARM_DMB_SY);
+                       if (ins->backend.memory_barrier_kind != MONO_MEMORY_BARRIER_NONE)
+                               ARM_DMB (code, ARM_DMB_SY);
 
                        code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
 
@@ -4341,14 +4395,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                ARM_DMB (code, ARM_DMB_SY);
                        break;
                }
-               /*case OP_BIGMUL:
-                       ppc_mullw (code, ppc_r4, ins->sreg1, ins->sreg2);
-                       ppc_mulhw (code, ppc_r3, ins->sreg1, ins->sreg2);
+               case OP_BIGMUL:
+                       ARM_SMULL_REG_REG (code, ins->backend.reg3, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_BIGMUL_UN:
-                       ppc_mullw (code, ppc_r4, ins->sreg1, ins->sreg2);
-                       ppc_mulhwu (code, ppc_r3, ins->sreg1, ins->sreg2);
-                       break;*/
+                       ARM_UMULL_REG_REG (code, ins->backend.reg3, ins->dreg, ins->sreg1, ins->sreg2);
+                       break;
                case OP_STOREI1_MEMBASE_IMM:
                        code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_imm & 0xFF);
                        g_assert (arm_is_imm12 (ins->inst_offset));
@@ -4501,9 +4553,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        MonoInst *var;
                        int dreg = ARMREG_LR;
 
+#if 0
                        if (cfg->soft_breakpoints) {
                                g_assert (!cfg->compile_aot);
                        }
+#endif
 
                        /*
                         * For AOT, we use one got slot per method, which will point to a
@@ -4513,7 +4567,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (cfg->compile_aot) {
                                g_assert (info_var);
                                g_assert (info_var->opcode == OP_REGOFFSET);
-                               g_assert (arm_is_imm12 (info_var->inst_offset));
                        }
 
                        if (!cfg->soft_breakpoints && !cfg->compile_aot) {
@@ -4526,15 +4579,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                g_assert (((guint64)(gsize)ss_trigger_page >> 32) == 0);
                        }
 
+                       /* Single step check */
                        if (ins->flags & MONO_INST_SINGLE_STEP_LOC) {
                                if (cfg->soft_breakpoints) {
                                        /* Load the address of the sequence point method variable. */
                                        var = ss_method_var;
                                        g_assert (var);
                                        g_assert (var->opcode == OP_REGOFFSET);
-                                       g_assert (arm_is_imm12 (var->inst_offset));
-                                       ARM_LDR_IMM (code, dreg, var->inst_basereg, var->inst_offset);
-
+                                       code = emit_ldr_imm (code, dreg, var->inst_basereg, var->inst_offset);
                                        /* Read the value and check whether it is non-zero. */
                                        ARM_LDR_IMM (code, dreg, dreg, 0);
                                        ARM_CMP_REG_IMM (code, dreg, 0, 0);
@@ -4546,8 +4598,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                                var = ss_trigger_page_var;
                                                g_assert (var);
                                                g_assert (var->opcode == OP_REGOFFSET);
-                                               g_assert (arm_is_imm12 (var->inst_offset));
-                                               ARM_LDR_IMM (code, dreg, var->inst_basereg, var->inst_offset);
+                                               code = emit_ldr_imm (code, dreg, var->inst_basereg, var->inst_offset);
                                        } else {
                                                ARM_LDR_IMM (code, dreg, ARMREG_PC, 0);
                                                ARM_B (code, 0);
@@ -4560,24 +4611,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
 
-                       if (cfg->soft_breakpoints) {
-                               /* Load the address of the breakpoint method into ip. */
-                               var = bp_method_var;
-                               g_assert (var);
-                               g_assert (var->opcode == OP_REGOFFSET);
-                               g_assert (arm_is_imm12 (var->inst_offset));
-                               ARM_LDR_IMM (code, dreg, var->inst_basereg, var->inst_offset);
-
-                               /*
-                                * A placeholder for a possible breakpoint inserted by
-                                * mono_arch_set_breakpoint ().
-                                */
-                               ARM_NOP (code);
-                       } else if (cfg->compile_aot) {
+                       /* Breakpoint check */
+                       if (cfg->compile_aot) {
                                guint32 offset = code - cfg->native_code;
                                guint32 val;
 
-                               ARM_LDR_IMM (code, dreg, info_var->inst_basereg, info_var->inst_offset);
+                               var = info_var;
+                               code = emit_ldr_imm (code, dreg, var->inst_basereg, var->inst_offset);
                                /* Add the offset */
                                val = ((offset / 4) * sizeof (guint8*)) + MONO_STRUCT_OFFSET (SeqPointInfo, bp_addrs);
                                /* Load the info->bp_addrs [offset], which is either 0 or the address of a trigger page */
@@ -4596,7 +4636,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                /* What is faster, a branch or a load ? */
                                ARM_CMP_REG_IMM (code, dreg, 0, 0);
                                /* The breakpoint instruction */
-                               ARM_LDR_IMM_COND (code, dreg, dreg, 0, ARMCOND_NE);
+                               if (cfg->soft_breakpoints)
+                                       ARM_BLX_REG_COND (code, ARMCOND_NE, dreg);
+                               else
+                                       ARM_LDR_IMM_COND (code, dreg, dreg, 0, ARMCOND_NE);
+                       } else if (cfg->soft_breakpoints) {
+                               /* Load the address of the breakpoint method into ip. */
+                               var = bp_method_var;
+                               g_assert (var);
+                               g_assert (var->opcode == OP_REGOFFSET);
+                               g_assert (arm_is_imm12 (var->inst_offset));
+                               ARM_LDR_IMM (code, dreg, var->inst_basereg, var->inst_offset);
+
+                               /*
+                                * A placeholder for a possible breakpoint inserted by
+                                * mono_arch_set_breakpoint ().
+                                */
+                               ARM_NOP (code);
                        } else {
                                /* 
                                 * A placeholder for a possible breakpoint inserted by
@@ -5026,19 +5082,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                }
                case OP_GENERIC_CLASS_INIT: {
-                       static int byte_offset = -1;
-                       static guint8 bitmask;
-                       guint32 imm8;
+                       int byte_offset;
                        guint8 *jump;
 
-                       if (byte_offset < 0)
-                               mono_marshal_find_bitfield_offset (MonoVTable, initialized, &byte_offset, &bitmask);
+                       byte_offset = MONO_STRUCT_OFFSET (MonoVTable, initialized);
 
                        g_assert (arm_is_imm8 (byte_offset));
                        ARM_LDRSB_IMM (code, ARMREG_IP, ins->sreg1, byte_offset);
-                       imm8 = mono_arm_is_rotated_imm8 (bitmask, &rot_amount);
-                       g_assert (imm8 >= 0);
-                       ARM_AND_REG_IMM (code, ARMREG_IP, ARMREG_IP, imm8, rot_amount);
                        ARM_CMP_REG_IMM (code, ARMREG_IP, 0, 0);
                        jump = code;
                        ARM_B_COND (code, ARMCOND_NE, 0);
@@ -5845,7 +5895,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        arm_patch (buf [0], code);
                        break;
                }
-
+               case OP_FILL_PROF_CALL_CTX:
+                       for (int i = 0; i < ARMREG_MAX; i++)
+                               if ((MONO_ARCH_CALLEE_SAVED_REGS & (1 << i)) || i == ARMREG_SP || i == ARMREG_FP)
+                                       ARM_STR_IMM (code, i, ins->sreg1, MONO_STRUCT_OFFSET (MonoContext, regs) + i * sizeof (mgreg_t));
+                       break;
                default:
                        g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
                        g_assert_not_reached ();
@@ -5875,38 +5929,6 @@ mono_arch_register_lowlevel_calls (void)
        mono_register_jit_icall (mono_arm_throw_exception, "mono_arm_throw_exception", mono_create_icall_signature ("void"), TRUE);
        mono_register_jit_icall (mono_arm_throw_exception_by_token, "mono_arm_throw_exception_by_token", mono_create_icall_signature ("void"), TRUE);
        mono_register_jit_icall (mono_arm_unaligned_stack, "mono_arm_unaligned_stack", mono_create_icall_signature ("void"), TRUE);
-
-#ifndef MONO_CROSS_COMPILE
-       if (mono_arm_have_tls_get ()) {
-               MonoTlsImplementation tls_imp = mono_arm_get_tls_implementation ();
-
-               mono_register_jit_icall (tls_imp.get_tls_thunk, "mono_get_tls_key", mono_create_icall_signature ("ptr ptr"), TRUE);
-               mono_register_jit_icall (tls_imp.set_tls_thunk, "mono_set_tls_key", mono_create_icall_signature ("void ptr ptr"), TRUE);
-
-               if (tls_imp.get_tls_thunk_end) {
-                       mono_tramp_info_register (
-                               mono_tramp_info_create (
-                                       "mono_get_tls_key",
-                                       (guint8*)tls_imp.get_tls_thunk,
-                                       (guint8*)tls_imp.get_tls_thunk_end - (guint8*)tls_imp.get_tls_thunk,
-                                       NULL,
-                                       mono_arch_get_cie_program ()
-                                       ),
-                               NULL
-                               );
-                       mono_tramp_info_register (
-                               mono_tramp_info_create (
-                                       "mono_set_tls_key",
-                                       (guint8*)tls_imp.set_tls_thunk,
-                                       (guint8*)tls_imp.set_tls_thunk_end - (guint8*)tls_imp.set_tls_thunk,
-                                       NULL,
-                                       mono_arch_get_cie_program ()
-                                       ),
-                               NULL
-                               );
-               }
-       }
-#endif
 }
 
 #define patch_lis_ori(ip,val) do {\
@@ -6126,9 +6148,6 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                MonoInst *ins = bb->code;
                bb->max_offset = max_offset;
 
-               if (cfg->prof_options & MONO_PROFILE_COVERAGE)
-                       max_offset += 6; 
-
                MONO_BB_FOR_EACH_INS (bb, ins)
                        max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
        }
@@ -6228,6 +6247,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        case RegTypeGeneral:
                        case RegTypeIRegPair:
                        case RegTypeGSharedVtInReg:
+                       case RegTypeStructByAddr:
                                switch (ainfo->size) {
                                case 1:
                                        if (arm_is_imm12 (inst->inst_offset))
@@ -6288,6 +6308,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                break;
                        case RegTypeBase:
                        case RegTypeGSharedVtOnStack:
+                       case RegTypeStructByAddrOnStack:
                                if (arm_is_imm12 (prev_sp_offset + ainfo->offset)) {
                                        ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
                                } else {
@@ -6380,10 +6401,6 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                }
                                break;
                        }
-                       case RegTypeStructByAddr:
-                               g_assert_not_reached ();
-                               /* FIXME: handle overrun! with struct sizes not multiple of 4 */
-                               code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer), inst->inst_basereg, inst->inst_offset, ainfo->reg, 0);
                        default:
                                g_assert_not_reached ();
                                break;
@@ -6427,9 +6444,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 
                if (info_var) {
                        g_assert (info_var->opcode == OP_REGOFFSET);
-                       g_assert (arm_is_imm12 (info_var->inst_offset));
 
-                       ARM_LDR_IMM (code, dreg, info_var->inst_basereg, info_var->inst_offset);
+                       code = emit_ldr_imm (code, dreg, info_var->inst_basereg, info_var->inst_offset);
                        /* Load the trigger page addr */
                        ARM_LDR_IMM (code, dreg, dreg, MONO_STRUCT_OFFSET (SeqPointInfo, ss_trigger_page));
                        ARM_STR_IMM (code, dreg, ss_trigger_page_var->inst_basereg, ss_trigger_page_var->inst_offset);
@@ -6439,22 +6455,36 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        if (cfg->arch.seq_point_ss_method_var) {
                MonoInst *ss_method_ins = cfg->arch.seq_point_ss_method_var;
                MonoInst *bp_method_ins = cfg->arch.seq_point_bp_method_var;
+
                g_assert (ss_method_ins->opcode == OP_REGOFFSET);
                g_assert (arm_is_imm12 (ss_method_ins->inst_offset));
-               g_assert (bp_method_ins->opcode == OP_REGOFFSET);
-               g_assert (arm_is_imm12 (bp_method_ins->inst_offset));
 
-               ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
-               ARM_B (code, 1);
-               *(gpointer*)code = &single_step_tramp;
-               code += 4;
-               *(gpointer*)code = breakpoint_tramp;
-               code += 4;
+               if (cfg->compile_aot) {
+                       MonoInst *info_var = cfg->arch.seq_point_info_var;
+                       int dreg = ARMREG_LR;
+
+                       g_assert (info_var->opcode == OP_REGOFFSET);
+                       g_assert (arm_is_imm12 (info_var->inst_offset));
+
+                       ARM_LDR_IMM (code, dreg, info_var->inst_basereg, info_var->inst_offset);
+                       ARM_LDR_IMM (code, dreg, dreg, MONO_STRUCT_OFFSET (SeqPointInfo, ss_tramp_addr));
+                       ARM_STR_IMM (code, dreg, ss_method_ins->inst_basereg, ss_method_ins->inst_offset);
+               } else {
+                       g_assert (bp_method_ins->opcode == OP_REGOFFSET);
+                       g_assert (arm_is_imm12 (bp_method_ins->inst_offset));
+
+                       ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
+                       ARM_B (code, 1);
+                       *(gpointer*)code = &single_step_tramp;
+                       code += 4;
+                       *(gpointer*)code = breakpoint_tramp;
+                       code += 4;
 
-               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_LR, 0);
-               ARM_STR_IMM (code, ARMREG_IP, ss_method_ins->inst_basereg, ss_method_ins->inst_offset);
-               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_LR, 4);
-               ARM_STR_IMM (code, ARMREG_IP, bp_method_ins->inst_basereg, bp_method_ins->inst_offset);
+                       ARM_LDR_IMM (code, ARMREG_IP, ARMREG_LR, 0);
+                       ARM_STR_IMM (code, ARMREG_IP, ss_method_ins->inst_basereg, ss_method_ins->inst_offset);
+                       ARM_LDR_IMM (code, ARMREG_IP, ARMREG_LR, 4);
+                       ARM_STR_IMM (code, ARMREG_IP, bp_method_ins->inst_basereg, bp_method_ins->inst_offset);
+               }
        }
 
        cfg->code_len = code - cfg->native_code;
@@ -6479,9 +6509,6 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        if (mono_jit_trace_calls != NULL)
                max_epilog_size += 50;
 
-       if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
-               max_epilog_size += 50;
-
        while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
                cfg->code_size *= 2;
                cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
@@ -6507,11 +6534,23 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        case RegTypeStructByVal: {
                MonoInst *ins = cfg->ret;
 
-               if (arm_is_imm12 (ins->inst_offset)) {
-                       ARM_LDR_IMM (code, ARMREG_R0, ins->inst_basereg, ins->inst_offset);
+               if (cinfo->ret.nregs == 1) {
+                       if (arm_is_imm12 (ins->inst_offset)) {
+                               ARM_LDR_IMM (code, ARMREG_R0, ins->inst_basereg, ins->inst_offset);
+                       } else {
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                               ARM_LDR_REG_REG (code, ARMREG_R0, ins->inst_basereg, ARMREG_LR);
+                       }
                } else {
-                       code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
-                       ARM_LDR_REG_REG (code, ARMREG_R0, ins->inst_basereg, ARMREG_LR);
+                       for (i = 0; i < cinfo->ret.nregs; ++i) {
+                               int offset = ins->inst_offset + (i * 4);
+                               if (arm_is_imm12 (offset)) {
+                                       ARM_LDR_IMM (code, i, ins->inst_basereg, offset);
+                               } else {
+                                       code = mono_arm_emit_load_imm (code, ARMREG_LR, offset);
+                                       ARM_LDR_REG_REG (code, i, ins->inst_basereg, ARMREG_LR);
+                               }
+                       }
                }
                break;
        }
@@ -6718,12 +6757,6 @@ mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMetho
        return NULL;
 }
 
-gboolean
-mono_arch_print_tree (MonoInst *tree, int arity)
-{
-       return 0;
-}
-
 #ifndef DISABLE_JIT
 
 #endif
@@ -6792,7 +6825,7 @@ mini_dump_bad_imt (int input_imt, int compared_imt, int pc)
 #endif
 
 gpointer
-mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
+mono_arch_build_imt_trampoline (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
        gpointer fail_tramp)
 {
        int size, i;
@@ -6844,7 +6877,7 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                size += 4 * count; /* The ARM_ADD_REG_IMM to pop the stack */
 
        if (fail_tramp)
-               code = mono_method_alloc_generic_virtual_thunk (domain, size);
+               code = mono_method_alloc_generic_virtual_trampoline (domain, size);
        else
                code = mono_domain_code_reserve (domain, size);
        start = code;
@@ -6852,7 +6885,7 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
        unwind_ops = mono_arch_get_cie_program ();
 
 #ifdef DEBUG_IMT
-       g_print ("Building IMT thunk for class %s %s entries %d code size %d code at %p end %p vtable %p fail_tramp %p\n", vtable->klass->name_space, vtable->klass->name, count, size, start, ((guint8*)start) + size, vtable, fail_tramp);
+       g_print ("Building IMT trampoline for class %s %s entries %d code size %d code at %p end %p vtable %p fail_tramp %p\n", vtable->klass->name_space, vtable->klass->name, count, size, start, ((guint8*)start) + size, vtable, fail_tramp);
        for (i = 0; i < count; ++i) {
                MonoIMTCheckItem *item = imt_entries [i];
                g_print ("method %d (%p) %s vtable slot %p is_equals %d chunk size %d\n", i, item->key, ((MonoMethod*)item->key)->name, &vtable->vtable [item->value.vtable_slot], item->is_equals, item->chunk_size);
@@ -7017,8 +7050,8 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
        g_free (constant_pool_starts);
 
        mono_arch_flush_icache ((guint8*)start, size);
-       mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_IMT_TRAMPOLINE, NULL);
-       mono_stats.imt_thunks_size += code - start;
+       MONO_PROFILER_RAISE (jit_code_buffer, (start, code - start, MONO_PROFILER_CODE_BUFFER_IMT_TRAMPOLINE, NULL));
+       UnlockedAdd (&mono_stats.imt_trampolines_size, code - start);
 
        g_assert (DISTANCE (start, code) <= size);
 
@@ -7051,26 +7084,6 @@ mono_arch_get_trampolines (gboolean aot)
        return mono_arm_get_exception_trampolines (aot);
 }
 
-gpointer
-mono_arch_install_handler_block_guard (MonoJitInfo *ji, MonoJitExceptionInfo *clause, MonoContext *ctx, gpointer new_value)
-{
-       gpointer *lr_loc;
-       char *old_value;
-       char *bp;
-
-       /*Load the spvar*/
-       bp = MONO_CONTEXT_GET_BP (ctx);
-       lr_loc = (gpointer*)(bp + clause->exvar_offset);
-
-       old_value = *lr_loc;
-       if ((char*)old_value < (char*)ji->code_start || (char*)old_value > ((char*)ji->code_start + ji->code_size))
-               return old_value;
-
-       *lr_loc = new_value;
-
-       return old_value;
-}
-
 #if defined(MONO_ARCH_SOFT_DEBUG_SUPPORTED)
 /*
  * mono_arch_set_breakpoint:
@@ -7085,17 +7098,19 @@ mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
        guint32 native_offset = ip - (guint8*)ji->code_start;
        MonoDebugOptions *opt = mini_get_debug_options ();
 
-       if (opt->soft_breakpoints) {
-               g_assert (!ji->from_aot);
-               code += 4;
-               ARM_BLX_REG (code, ARMREG_LR);
-               mono_arch_flush_icache (code - 4, 4);
-       } else if (ji->from_aot) {
+       if (ji->from_aot) {
                SeqPointInfo *info = mono_arch_get_seq_point_info (mono_domain_get (), ji->code_start);
 
+               if (!breakpoint_tramp)
+                       breakpoint_tramp = mini_get_breakpoint_trampoline ();
+
                g_assert (native_offset % 4 == 0);
                g_assert (info->bp_addrs [native_offset / 4] == 0);
-               info->bp_addrs [native_offset / 4] = bp_trigger_page;
+               info->bp_addrs [native_offset / 4] = opt->soft_breakpoints ? breakpoint_tramp : bp_trigger_page;
+       } else if (opt->soft_breakpoints) {
+               code += 4;
+               ARM_BLX_REG (code, ARMREG_LR);
+               mono_arch_flush_icache (code - 4, 4);
        } else {
                int dreg = ARMREG_LR;
 
@@ -7131,18 +7146,20 @@ mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
        guint8 *code = ip;
        int i;
 
-       if (opt->soft_breakpoints) {
-               g_assert (!ji->from_aot);
-               code += 4;
-               ARM_NOP (code);
-               mono_arch_flush_icache (code - 4, 4);
-       } else if (ji->from_aot) {
+       if (ji->from_aot) {
                guint32 native_offset = ip - (guint8*)ji->code_start;
                SeqPointInfo *info = mono_arch_get_seq_point_info (mono_domain_get (), ji->code_start);
 
+               if (!breakpoint_tramp)
+                       breakpoint_tramp = mini_get_breakpoint_trampoline ();
+
                g_assert (native_offset % 4 == 0);
-               g_assert (info->bp_addrs [native_offset / 4] == bp_trigger_page);
+               g_assert (info->bp_addrs [native_offset / 4] == (opt->soft_breakpoints ? breakpoint_tramp : bp_trigger_page));
                info->bp_addrs [native_offset / 4] = 0;
+       } else if (opt->soft_breakpoints) {
+               code += 4;
+               ARM_NOP (code);
+               mono_arch_flush_icache (code - 4, 4);
        } else {
                for (i = 0; i < 4; ++i)
                        ARM_NOP (code);
@@ -7280,6 +7297,7 @@ mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
 
                info->ss_trigger_page = ss_trigger_page;
                info->bp_trigger_page = bp_trigger_page;
+               info->ss_tramp_addr = &single_step_tramp;
 
                mono_domain_lock (domain);
                g_hash_table_insert (domain_jit_info (domain)->arch_seq_points,
@@ -7377,3 +7395,37 @@ mono_arch_get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
 {
        return get_call_info (mp, sig);
 }
+
+gpointer
+mono_arch_get_get_tls_tramp (void)
+{
+       return NULL;
+}
+
+static guint8*
+emit_aotconst (MonoCompile *cfg, guint8 *code, int dreg, int patch_type, gpointer data)
+{
+       /* OP_AOTCONST */
+       mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
+       ARM_LDR_IMM (code, dreg, ARMREG_PC, 0);
+       ARM_B (code, 0);
+       *(gpointer*)code = NULL;
+       code += 4;
+       /* Load the value from the GOT */
+       ARM_LDR_REG_REG (code, dreg, ARMREG_PC, dreg);
+       return code;
+}
+
+guint8*
+mono_arm_emit_aotconst (gpointer ji_list, guint8 *code, guint8 *buf, int dreg, int patch_type, gconstpointer data)
+{
+       MonoJumpInfo **ji = (MonoJumpInfo**)ji_list;
+
+       *ji = mono_patch_info_list_prepend (*ji, code - buf, patch_type, data);
+       ARM_LDR_IMM (code, dreg, ARMREG_PC, 0);
+       ARM_B (code, 0);
+       *(gpointer*)code = NULL;
+       code += 4;
+       ARM_LDR_REG_REG (code, dreg, ARMREG_PC, dreg);
+       return code;
+}