[profiler] Implement call context introspection for enter/leave events.
[mono.git] / mono / mini / mini-arm.c
index 13a3ffee37f8159a3b1346bf99c09b007ab5159b..a304dbef08062a83bfc9b907a6840ee00e42cdcb 100644 (file)
@@ -1,5 +1,6 @@
-/*
- * mini-arm.c: ARM backend for the Mono code generator
+/**
+ * \file
+ * ARM backend for the Mono code generator
  *
  * Authors:
  *   Paolo Molaro (lupus@ximian.com)
@@ -23,7 +24,6 @@
 #include <mono/utils/mono-threads-coop.h>
 
 #include "mini-arm.h"
-#include "mini-arm-tls.h"
 #include "cpu-arm.h"
 #include "trace.h"
 #include "ir-emit.h"
 #include "mini-gc.h"
 #include "mono/arch/arm/arm-vfp-codegen.h"
 
-#if (defined(HAVE_KW_THREAD) && defined(__linux__) && defined(__ARM_EABI__)) \
-       || defined(TARGET_ANDROID) \
-       || (defined(TARGET_IOS) && !defined(TARGET_WATCHOS))
-#define HAVE_FAST_TLS
-#endif
-
 /* Sanity check: This makes no sense */
 #if defined(ARM_FPU_NONE) && (defined(ARM_FPU_VFP) || defined(ARM_FPU_VFP_HARD))
 #error "ARM_FPU_NONE is defined while one of ARM_FPU_VFP/ARM_FPU_VFP_HARD is defined"
@@ -127,7 +121,6 @@ static int vfp_scratch2 = ARM_VFP_D1;
 static int i8_align;
 
 static gpointer single_step_tramp, breakpoint_tramp;
-static gpointer get_tls_tramp;
 
 /*
  * The code generated for sequence points reads from this location, which is
@@ -222,6 +215,19 @@ emit_big_add (guint8 *code, int dreg, int sreg, int imm)
        return code;
 }
 
+static guint8*
+emit_ldr_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+       if (!arm_is_imm12 (imm)) {
+               g_assert (dreg != sreg);
+               code = emit_big_add (code, dreg, sreg, imm);
+               ARM_LDR_IMM (code, dreg, dreg, 0);
+       } else {
+               ARM_LDR_IMM (code, dreg, sreg, imm);
+       }
+       return code;
+}
+
 /* If dreg == sreg, this clobbers IP */
 static guint8*
 emit_sub_imm (guint8 *code, int dreg, int sreg, int imm)
@@ -329,100 +335,58 @@ mono_arm_patchable_bl (guint8 *code, int cond)
        return code;
 }
 
-static guint8*
-mono_arm_emit_tls_get (MonoCompile *cfg, guint8* code, int dreg, int tls_offset)
-{
-#ifdef HAVE_FAST_TLS
-       code = mono_arm_emit_load_imm (code, ARMREG_R0, tls_offset);
-       if (cfg->compile_aot) {
-               /*
-                * This opcode is generated by CEE_MONO_JIT_ATTACH, so it can execute on
-                * threads which are not yet attached to the runtime. This means we can't
-                * call it directly, since the call would go through the trampoline code
-                * which assumes the thread is attached. So use a separate patch info type
-                * for it, and load it from a preinitialized GOT slot.
-                */
-               code = emit_aotconst (cfg, code, ARMREG_R1, MONO_PATCH_INFO_GET_TLS_TRAMP, NULL);
-               code = emit_call_reg (code, ARMREG_R1);
-       } else {
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
-                                                        "mono_get_tls_key");
-               code = emit_call_seq (cfg, code);
-       }
-       if (dreg != ARMREG_R0)
-               ARM_MOV_REG_REG (code, dreg, ARMREG_R0);
-#else
-       g_assert_not_reached ();
+#if defined(__ARM_EABI__) && defined(__linux__) && !defined(PLATFORM_ANDROID) && !defined(MONO_CROSS_COMPILE)
+#define HAVE_AEABI_READ_TP 1
 #endif
-       return code;
-}
 
-static guint8*
-mono_arm_emit_tls_get_reg (MonoCompile *cfg, guint8* code, int dreg, int tls_offset_reg)
+#ifdef HAVE_AEABI_READ_TP
+gpointer __aeabi_read_tp (void);
+#endif
+
+gboolean
+mono_arch_have_fast_tls (void)
 {
-#ifdef HAVE_FAST_TLS
-       if (tls_offset_reg != ARMREG_R0)
-               ARM_MOV_REG_REG (code, ARMREG_R0, tls_offset_reg);
-       if (cfg->compile_aot) {
-               code = emit_aotconst (cfg, code, ARMREG_R1, MONO_PATCH_INFO_GET_TLS_TRAMP, NULL);
-               code = emit_call_reg (code, ARMREG_R1);
-       } else {
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
-                                                        "mono_get_tls_key");
-               code = emit_call_seq (cfg, code);
+#ifdef HAVE_AEABI_READ_TP
+       static gboolean have_fast_tls = FALSE;
+        static gboolean inited = FALSE;
+
+       if (mini_get_debug_options ()->use_fallback_tls)
+               return FALSE;
+
+       if (inited)
+               return have_fast_tls;
+
+       if (v7_supported) {
+               gpointer tp1, tp2;
+
+               tp1 = __aeabi_read_tp ();
+               asm volatile("mrc p15, 0, %0, c13, c0, 3" : "=r" (tp2));
+
+               have_fast_tls = tp1 && tp1 == tp2;
        }
-       if (dreg != ARMREG_R0)
-               ARM_MOV_REG_REG (code, dreg, ARMREG_R0);
+       inited = TRUE;
+       return have_fast_tls;
 #else
-       g_assert_not_reached ();
+       return FALSE;
 #endif
-       return code;
 }
 
 static guint8*
-mono_arm_emit_tls_set (MonoCompile *cfg, guint8* code, int sreg, int tls_offset)
+emit_tls_get (guint8 *code, int dreg, int tls_offset)
 {
-#ifdef HAVE_FAST_TLS
-       if (sreg != ARMREG_R1)
-               ARM_MOV_REG_REG (code, ARMREG_R1, sreg);
-       code = mono_arm_emit_load_imm (code, ARMREG_R0, tls_offset);
-       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
-                       "mono_set_tls_key");
-       code = emit_call_seq (cfg, code);
-#else
-       g_assert_not_reached ();
-#endif
+       g_assert (v7_supported);
+       ARM_MRC (code, 15, 0, dreg, 13, 0, 3);
+       ARM_LDR_IMM (code, dreg, dreg, tls_offset);
        return code;
 }
 
 static guint8*
-mono_arm_emit_tls_set_reg (MonoCompile *cfg, guint8* code, int sreg, int tls_offset_reg)
+emit_tls_set (guint8 *code, int sreg, int tls_offset)
 {
-#ifdef HAVE_FAST_TLS
-       /* Get sreg in R1 and tls_offset_reg in R0 */
-       if (tls_offset_reg == ARMREG_R1) {
-               if (sreg == ARMREG_R0) {
-                       /* swap sreg and tls_offset_reg */
-                       ARM_EOR_REG_REG (code, sreg, sreg, tls_offset_reg);
-                       ARM_EOR_REG_REG (code, tls_offset_reg, sreg, tls_offset_reg);
-                       ARM_EOR_REG_REG (code, sreg, sreg, tls_offset_reg);
-               } else {
-                       ARM_MOV_REG_REG (code, ARMREG_R0, tls_offset_reg);
-                       if (sreg != ARMREG_R1)
-                               ARM_MOV_REG_REG (code, ARMREG_R1, sreg);
-               }
-       } else {
-               if (sreg != ARMREG_R1)
-                       ARM_MOV_REG_REG (code, ARMREG_R1, sreg);
-               if (tls_offset_reg != ARMREG_R0)
-                       ARM_MOV_REG_REG (code, ARMREG_R0, tls_offset_reg);
-       }
-       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
-                       "mono_set_tls_key");
-       code = emit_call_seq (cfg, code);
-#else
-       g_assert_not_reached ();
-#endif
+       int tp_reg = (sreg != ARMREG_R0) ? ARMREG_R0 : ARMREG_R1;
+       g_assert (v7_supported);
+       ARM_MRC (code, 15, 0, tp_reg, 13, 0, 3);
+       ARM_STR_IMM (code, sreg, tp_reg, tls_offset);
        return code;
 }
 
@@ -435,31 +399,13 @@ mono_arm_emit_tls_set_reg (MonoCompile *cfg, guint8* code, int sreg, int tls_off
 static guint8*
 emit_save_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset)
 {
-       gboolean get_lmf_fast = FALSE;
        int i;
 
-       if (mono_arm_have_tls_get ()) {
-               get_lmf_fast = TRUE;
-               if (cfg->compile_aot) {
-                       /* OP_AOTCONST */
-                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_TLS_OFFSET, (gpointer)TLS_KEY_LMF_ADDR);
-                       ARM_LDR_IMM (code, ARMREG_R1, ARMREG_PC, 0);
-                       ARM_B (code, 0);
-                       *(gpointer*)code = NULL;
-                       code += 4;
-                       /* Load the value from the GOT */
-                       ARM_LDR_REG_REG (code, ARMREG_R1, ARMREG_PC, ARMREG_R1);
-                       code = mono_arm_emit_tls_get_reg (cfg, code, ARMREG_R0, ARMREG_R1);
-               } else {
-                       gint32 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
-                       g_assert (lmf_addr_tls_offset != -1);
-                       code = mono_arm_emit_tls_get (cfg, code, ARMREG_R0, lmf_addr_tls_offset);
-               }
-       }
-
-       if (!get_lmf_fast) {
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
-                                                        (gpointer)"mono_get_lmf_addr");
+       if (mono_arch_have_fast_tls () && mono_tls_get_tls_offset (TLS_KEY_LMF_ADDR) != -1) {
+               code = emit_tls_get (code, ARMREG_R0, mono_tls_get_tls_offset (TLS_KEY_LMF_ADDR));
+       } else {
+               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
+                                                        (gpointer)"mono_tls_get_lmf_addr");
                code = emit_call_seq (cfg, code);
        }
        /* we build the MonoLMF structure on the stack - see mini-arm.h */
@@ -602,22 +548,6 @@ emit_restore_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset)
 
 #endif /* #ifndef DISABLE_JIT */
 
-/*
- * mono_arm_have_tls_get:
- *
- * Returns whether we have tls access implemented on the current
- * platform
- */
-gboolean
-mono_arm_have_tls_get (void)
-{
-#ifdef HAVE_FAST_TLS
-       return TRUE;
-#else
-       return FALSE;
-#endif
-}
-
 /*
  * mono_arch_get_argument_info:
  * @csig:  a method signature
@@ -720,7 +650,7 @@ get_delegate_invoke_impl (MonoTrampInfo **info, gboolean has_target, gboolean pa
                 g_free (name);
        }
 
-       mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE, NULL);
+       MONO_PROFILER_RAISE (jit_code_buffer, (start, code - start, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE, NULL));
 
        return start;
 }
@@ -846,7 +776,7 @@ mono_arch_cpu_init (void)
 void
 mono_arch_init (void)
 {
-       const char *cpu_arch;
+       char *cpu_arch;
 
 #ifdef TARGET_WATCHOS
        mini_get_debug_options ()->soft_breakpoints = TRUE;
@@ -869,7 +799,6 @@ mono_arch_init (void)
        mono_aot_register_jit_icall ("mono_arm_start_gsharedvt_call", mono_arm_start_gsharedvt_call);
 #endif
        mono_aot_register_jit_icall ("mono_arm_unaligned_stack", mono_arm_unaligned_stack);
-
 #if defined(__ARM_EABI__)
        eabi_supported = TRUE;
 #endif
@@ -879,7 +808,7 @@ mono_arch_init (void)
 #else
        arm_fpu = MONO_ARM_FPU_VFP;
 
-#if defined(ARM_FPU_NONE) && !defined(__APPLE__)
+#if defined(ARM_FPU_NONE) && !defined(TARGET_IOS)
        /*
         * If we're compiling with a soft float fallback and it
         * turns out that no VFP unit is available, we need to
@@ -895,10 +824,11 @@ mono_arch_init (void)
         * works. Most ARM devices have VFP units these days, so
         * normally soft float code would not be exercised much.
         */
-       const char *soft = g_getenv ("MONO_ARM_FORCE_SOFT_FLOAT");
+       char *soft = g_getenv ("MONO_ARM_FORCE_SOFT_FLOAT");
 
        if (soft && !strncmp (soft, "1", 1))
                arm_fpu = MONO_ARM_FPU_NONE;
+       g_free (soft);
 #endif
 #endif
 
@@ -921,7 +851,7 @@ mono_arch_init (void)
        v7_supported = TRUE;
 #endif
 
-#if defined(__APPLE__)
+#if defined(TARGET_IOS)
        /* iOS is special-cased here because we don't yet
           have a way to properly detect CPU features on it. */
        thumb_supported = TRUE;
@@ -946,6 +876,7 @@ mono_arch_init (void)
 
                thumb_supported = strstr (cpu_arch, "thumb") != NULL;
                thumb2_supported = strstr (cpu_arch, "thumb2") != NULL;
+               g_free (cpu_arch);
        }
 }
 
@@ -1029,10 +960,6 @@ is_regsize_var (MonoType *t)
        case MONO_TYPE_FNPTR:
                return TRUE;
        case MONO_TYPE_OBJECT:
-       case MONO_TYPE_STRING:
-       case MONO_TYPE_CLASS:
-       case MONO_TYPE_SZARRAY:
-       case MONO_TYPE_ARRAY:
                return TRUE;
        case MONO_TYPE_GENERICINST:
                if (!mono_type_generic_inst_is_valuetype (t))
@@ -1357,11 +1284,7 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
        case MONO_TYPE_U:
        case MONO_TYPE_PTR:
        case MONO_TYPE_FNPTR:
-       case MONO_TYPE_CLASS:
        case MONO_TYPE_OBJECT:
-       case MONO_TYPE_SZARRAY:
-       case MONO_TYPE_ARRAY:
-       case MONO_TYPE_STRING:
                cinfo->ret.storage = RegTypeGeneral;
                cinfo->ret.reg = ARMREG_R0;
                break;
@@ -1510,11 +1433,7 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                case MONO_TYPE_U:
                case MONO_TYPE_PTR:
                case MONO_TYPE_FNPTR:
-               case MONO_TYPE_CLASS:
                case MONO_TYPE_OBJECT:
-               case MONO_TYPE_STRING:
-               case MONO_TYPE_SZARRAY:
-               case MONO_TYPE_ARRAY:
                        cinfo->args [n].size = sizeof (gpointer);
                        add_general (&gr, &stack_size, ainfo, TRUE);
                        break;
@@ -1603,6 +1522,7 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                        nwords = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer);
                        ainfo->storage = RegTypeStructByVal;
                        ainfo->struct_size = size;
+                       ainfo->align = align;
                        /* FIXME: align stack_size if needed */
                        if (eabi_supported) {
                                if (align >= 8 && (gr & 1))
@@ -1734,8 +1654,7 @@ debug_omit_fp (void)
 
 /**
  * mono_arch_compute_omit_fp:
- *
- *   Determine whenever the frame pointer can be eliminated.
+ * Determine whether the frame pointer can be eliminated.
  */
 static void
 mono_arch_compute_omit_fp (MonoCompile *cfg)
@@ -1778,8 +1697,7 @@ mono_arch_compute_omit_fp (MonoCompile *cfg)
                cfg->arch.omit_fp = FALSE;
        if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
                cfg->arch.omit_fp = FALSE;
-       if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
-               (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
+       if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)))
                cfg->arch.omit_fp = FALSE;
        for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
                ArgInfo *ainfo = &cinfo->args [i];
@@ -2227,6 +2145,11 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
                linfo->ret.nslots = cinfo->ret.nregs;
                break;
 #endif
+       case RegTypeHFA:
+               linfo->ret.storage = LLVMArgFpStruct;
+               linfo->ret.nslots = cinfo->ret.nregs;
+               linfo->ret.esize = cinfo->ret.esize;
+               break;
        default:
                cfg->exception_message = g_strdup_printf ("unknown ret conv (%d)", cinfo->ret.storage);
                cfg->disable_llvm = TRUE;
@@ -2249,12 +2172,29 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
                        break;
                case RegTypeStructByVal:
                        lainfo->storage = LLVMArgAsIArgs;
-                       lainfo->nslots = ainfo->struct_size / sizeof (gpointer);
+                       if (eabi_supported && ainfo->align == 8) {
+                               /* LLVM models this by passing an int64 array */
+                               lainfo->nslots = ALIGN_TO (ainfo->struct_size, 8) / 8;
+                               lainfo->esize = 8;
+                       } else {
+                               lainfo->nslots = ainfo->struct_size / sizeof (gpointer);
+                               lainfo->esize = 4;
+                       }
                        break;
                case RegTypeStructByAddr:
                case RegTypeStructByAddrOnStack:
                        lainfo->storage = LLVMArgVtypeByRef;
                        break;
+               case RegTypeHFA: {
+                       int j;
+
+                       lainfo->storage = LLVMArgAsFpArgs;
+                       lainfo->nslots = ainfo->nregs;
+                       lainfo->esize = ainfo->esize;
+                       for (j = 0; j < ainfo->nregs; ++j)
+                               lainfo->pair_storage [j] = LLVMArgInFPReg;
+                       break;
+               }
                default:
                        cfg->exception_message = g_strdup_printf ("ainfo->storage (%d)", ainfo->storage);
                        cfg->disable_llvm = TRUE;
@@ -2863,10 +2803,6 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g
                }
 
                switch (t->type) {
-               case MONO_TYPE_STRING:
-               case MONO_TYPE_CLASS:  
-               case MONO_TYPE_ARRAY:
-               case MONO_TYPE_SZARRAY:
                case MONO_TYPE_OBJECT:
                case MONO_TYPE_PTR:
                case MONO_TYPE_I:
@@ -2968,10 +2904,6 @@ mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf)
        case MONO_TYPE_VOID:
                *(gpointer*)ret = NULL;
                break;
-       case MONO_TYPE_STRING:
-       case MONO_TYPE_CLASS:  
-       case MONO_TYPE_ARRAY:
-       case MONO_TYPE_SZARRAY:
        case MONO_TYPE_OBJECT:
        case MONO_TYPE_I:
        case MONO_TYPE_U:
@@ -4116,6 +4048,18 @@ mono_arm_thumb_supported (void)
        return thumb_supported;
 }
 
+gboolean
+mono_arm_eabi_supported (void)
+{
+       return eabi_supported;
+}
+
+int
+mono_arm_i8_align (void)
+{
+       return i8_align;
+}
+
 #ifndef DISABLE_JIT
 
 static guint8*
@@ -4247,17 +4191,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
        cpos = bb->max_offset;
 
-       if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
-               //MonoCoverageInfo *cov = mono_get_coverage_info (cfg->method);
-               //g_assert (!mono_compile_aot);
-               //cpos += 6;
-               //if (bb->cil_code)
-               //      cov->data [bb->dfn].iloffset = bb->cil_code - cfg->cil_code;
-               /* this is not thread save, but good enough */
-               /* fixme: howto handle overflows? */
-               //x86_inc_mem (code, &cov->data [bb->dfn].count); 
-       }
-
     if (mono_break_at_bb_method && mono_method_desc_full_match (mono_break_at_bb_method, cfg->method) && bb->block_num == mono_break_at_bb_bb_num) {
                mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                                         (gpointer)"mono_break");
@@ -4286,16 +4219,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_TLS_GET:
-                       code = mono_arm_emit_tls_get (cfg, code, ins->dreg, ins->inst_offset);
-                       break;
-               case OP_TLS_GET_REG:
-                       code = mono_arm_emit_tls_get_reg (cfg, code, ins->dreg, ins->sreg1);
+                       code = emit_tls_get (code, ins->dreg, ins->inst_offset);
                        break;
                case OP_TLS_SET:
-                       code = mono_arm_emit_tls_set (cfg, code, ins->sreg1, ins->inst_offset);
-                       break;
-               case OP_TLS_SET_REG:
-                       code = mono_arm_emit_tls_set_reg (cfg, code, ins->sreg1, ins->sreg2);
+                       code = emit_tls_set (code, ins->sreg1, ins->inst_offset);
                        break;
                case OP_ATOMIC_EXCHANGE_I4:
                case OP_ATOMIC_CAS_I4:
@@ -4635,7 +4562,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (cfg->compile_aot) {
                                g_assert (info_var);
                                g_assert (info_var->opcode == OP_REGOFFSET);
-                               g_assert (arm_is_imm12 (info_var->inst_offset));
                        }
 
                        if (!cfg->soft_breakpoints && !cfg->compile_aot) {
@@ -4655,9 +4581,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                        var = ss_method_var;
                                        g_assert (var);
                                        g_assert (var->opcode == OP_REGOFFSET);
-                                       g_assert (arm_is_imm12 (var->inst_offset));
-                                       ARM_LDR_IMM (code, dreg, var->inst_basereg, var->inst_offset);
-
+                                       code = emit_ldr_imm (code, dreg, var->inst_basereg, var->inst_offset);
                                        /* Read the value and check whether it is non-zero. */
                                        ARM_LDR_IMM (code, dreg, dreg, 0);
                                        ARM_CMP_REG_IMM (code, dreg, 0, 0);
@@ -4669,8 +4593,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                                var = ss_trigger_page_var;
                                                g_assert (var);
                                                g_assert (var->opcode == OP_REGOFFSET);
-                                               g_assert (arm_is_imm12 (var->inst_offset));
-                                               ARM_LDR_IMM (code, dreg, var->inst_basereg, var->inst_offset);
+                                               code = emit_ldr_imm (code, dreg, var->inst_basereg, var->inst_offset);
                                        } else {
                                                ARM_LDR_IMM (code, dreg, ARMREG_PC, 0);
                                                ARM_B (code, 0);
@@ -4688,7 +4611,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                guint32 offset = code - cfg->native_code;
                                guint32 val;
 
-                               ARM_LDR_IMM (code, dreg, info_var->inst_basereg, info_var->inst_offset);
+                               var = info_var;
+                               code = emit_ldr_imm (code, dreg, var->inst_basereg, var->inst_offset);
                                /* Add the offset */
                                val = ((offset / 4) * sizeof (guint8*)) + MONO_STRUCT_OFFSET (SeqPointInfo, bp_addrs);
                                /* Load the info->bp_addrs [offset], which is either 0 or the address of a trigger page */
@@ -5153,19 +5077,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                }
                case OP_GENERIC_CLASS_INIT: {
-                       static int byte_offset = -1;
-                       static guint8 bitmask;
-                       guint32 imm8;
+                       int byte_offset;
                        guint8 *jump;
 
-                       if (byte_offset < 0)
-                               mono_marshal_find_bitfield_offset (MonoVTable, initialized, &byte_offset, &bitmask);
+                       byte_offset = MONO_STRUCT_OFFSET (MonoVTable, initialized);
 
                        g_assert (arm_is_imm8 (byte_offset));
                        ARM_LDRSB_IMM (code, ARMREG_IP, ins->sreg1, byte_offset);
-                       imm8 = mono_arm_is_rotated_imm8 (bitmask, &rot_amount);
-                       g_assert (imm8 >= 0);
-                       ARM_AND_REG_IMM (code, ARMREG_IP, ARMREG_IP, imm8, rot_amount);
                        ARM_CMP_REG_IMM (code, ARMREG_IP, 0, 0);
                        jump = code;
                        ARM_B_COND (code, ARMCOND_NE, 0);
@@ -5972,7 +5890,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        arm_patch (buf [0], code);
                        break;
                }
-
+               case OP_FILL_PROF_CALL_CTX:
+                       for (int i = 0; i < ARMREG_MAX; i++)
+                               if ((MONO_ARCH_CALLEE_SAVED_REGS & (1 << i)) || i == ARMREG_SP || i == ARMREG_FP)
+                                       ARM_STR_IMM (code, i, ins->sreg1, MONO_STRUCT_OFFSET (MonoContext, regs) + i * sizeof (mgreg_t));
+                       break;
                default:
                        g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
                        g_assert_not_reached ();
@@ -6002,40 +5924,6 @@ mono_arch_register_lowlevel_calls (void)
        mono_register_jit_icall (mono_arm_throw_exception, "mono_arm_throw_exception", mono_create_icall_signature ("void"), TRUE);
        mono_register_jit_icall (mono_arm_throw_exception_by_token, "mono_arm_throw_exception_by_token", mono_create_icall_signature ("void"), TRUE);
        mono_register_jit_icall (mono_arm_unaligned_stack, "mono_arm_unaligned_stack", mono_create_icall_signature ("void"), TRUE);
-
-#ifndef MONO_CROSS_COMPILE
-       if (mono_arm_have_tls_get ()) {
-               MonoTlsImplementation tls_imp = mono_arm_get_tls_implementation ();
-
-               mono_register_jit_icall (tls_imp.get_tls_thunk, "mono_get_tls_key", mono_create_icall_signature ("ptr ptr"), TRUE);
-               mono_register_jit_icall (tls_imp.set_tls_thunk, "mono_set_tls_key", mono_create_icall_signature ("void ptr ptr"), TRUE);
-
-               get_tls_tramp = tls_imp.get_tls_thunk;
-
-               if (tls_imp.get_tls_thunk_end) {
-                       mono_tramp_info_register (
-                               mono_tramp_info_create (
-                                       "mono_get_tls_key",
-                                       (guint8*)tls_imp.get_tls_thunk,
-                                       (guint8*)tls_imp.get_tls_thunk_end - (guint8*)tls_imp.get_tls_thunk,
-                                       NULL,
-                                       mono_arch_get_cie_program ()
-                                       ),
-                               NULL
-                               );
-                       mono_tramp_info_register (
-                               mono_tramp_info_create (
-                                       "mono_set_tls_key",
-                                       (guint8*)tls_imp.set_tls_thunk,
-                                       (guint8*)tls_imp.set_tls_thunk_end - (guint8*)tls_imp.set_tls_thunk,
-                                       NULL,
-                                       mono_arch_get_cie_program ()
-                                       ),
-                               NULL
-                               );
-               }
-       }
-#endif
 }
 
 #define patch_lis_ori(ip,val) do {\
@@ -6255,9 +6143,6 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                MonoInst *ins = bb->code;
                bb->max_offset = max_offset;
 
-               if (cfg->prof_options & MONO_PROFILE_COVERAGE)
-                       max_offset += 6; 
-
                MONO_BB_FOR_EACH_INS (bb, ins)
                        max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
        }
@@ -6554,9 +6439,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 
                if (info_var) {
                        g_assert (info_var->opcode == OP_REGOFFSET);
-                       g_assert (arm_is_imm12 (info_var->inst_offset));
 
-                       ARM_LDR_IMM (code, dreg, info_var->inst_basereg, info_var->inst_offset);
+                       code = emit_ldr_imm (code, dreg, info_var->inst_basereg, info_var->inst_offset);
                        /* Load the trigger page addr */
                        ARM_LDR_IMM (code, dreg, dreg, MONO_STRUCT_OFFSET (SeqPointInfo, ss_trigger_page));
                        ARM_STR_IMM (code, dreg, ss_trigger_page_var->inst_basereg, ss_trigger_page_var->inst_offset);
@@ -6620,9 +6504,6 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        if (mono_jit_trace_calls != NULL)
                max_epilog_size += 50;
 
-       if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
-               max_epilog_size += 50;
-
        while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
                cfg->code_size *= 2;
                cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
@@ -7170,7 +7051,7 @@ mono_arch_build_imt_trampoline (MonoVTable *vtable, MonoDomain *domain, MonoIMTC
        g_free (constant_pool_starts);
 
        mono_arch_flush_icache ((guint8*)start, size);
-       mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_IMT_TRAMPOLINE, NULL);
+       MONO_PROFILER_RAISE (jit_code_buffer, (start, code - start, MONO_PROFILER_CODE_BUFFER_IMT_TRAMPOLINE, NULL));
        mono_stats.imt_trampolines_size += code - start;
 
        g_assert (DISTANCE (start, code) <= size);
@@ -7204,26 +7085,6 @@ mono_arch_get_trampolines (gboolean aot)
        return mono_arm_get_exception_trampolines (aot);
 }
 
-gpointer
-mono_arch_install_handler_block_guard (MonoJitInfo *ji, MonoJitExceptionInfo *clause, MonoContext *ctx, gpointer new_value)
-{
-       gpointer *lr_loc;
-       char *old_value;
-       char *bp;
-
-       /*Load the spvar*/
-       bp = MONO_CONTEXT_GET_BP (ctx);
-       lr_loc = (gpointer*)(bp + clause->exvar_offset);
-
-       old_value = *lr_loc;
-       if ((char*)old_value < (char*)ji->code_start || (char*)old_value > ((char*)ji->code_start + ji->code_size))
-               return old_value;
-
-       *lr_loc = new_value;
-
-       return old_value;
-}
-
 #if defined(MONO_ARCH_SOFT_DEBUG_SUPPORTED)
 /*
  * mono_arch_set_breakpoint:
@@ -7539,7 +7400,7 @@ mono_arch_get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
 gpointer
 mono_arch_get_get_tls_tramp (void)
 {
-       return get_tls_tramp;
+       return NULL;
 }
 
 static guint8*
@@ -7555,3 +7416,17 @@ emit_aotconst (MonoCompile *cfg, guint8 *code, int dreg, int patch_type, gpointe
        ARM_LDR_REG_REG (code, dreg, ARMREG_PC, dreg);
        return code;
 }
+
+guint8*
+mono_arm_emit_aotconst (gpointer ji_list, guint8 *code, guint8 *buf, int dreg, int patch_type, gconstpointer data)
+{
+       MonoJumpInfo **ji = (MonoJumpInfo**)ji_list;
+
+       *ji = mono_patch_info_list_prepend (*ji, code - buf, patch_type, data);
+       ARM_LDR_IMM (code, dreg, ARMREG_PC, 0);
+       ARM_B (code, 0);
+       *(gpointer*)code = NULL;
+       code += 4;
+       ARM_LDR_REG_REG (code, dreg, ARMREG_PC, dreg);
+       return code;
+}