Fix an assert in the arm backend if an offset is too large.
[mono.git] / mono / mini / mini-arm.c
index b0fb8556e3dc6167236b8e9f722cdebb853d25cb..823761f9f33b7d7c30edb9ca10ddba2584830601 100644 (file)
@@ -6,6 +6,8 @@
  *   Dietmar Maurer (dietmar@ximian.com)
  *
  * (C) 2003 Ximian, Inc.
+ * Copyright 2003-2011 Novell, Inc (http://www.novell.com)
+ * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
  */
 #include "mini.h"
 #include <string.h>
 #include "cpu-arm.h"
 #include "trace.h"
 #include "ir-emit.h"
-#ifdef ARM_FPU_FPA
+#include "debugger-agent.h"
+#include "mini-gc.h"
 #include "mono/arch/arm/arm-fpa-codegen.h"
-#elif defined(ARM_FPU_VFP)
 #include "mono/arch/arm/arm-vfp-codegen.h"
-#endif
 
 #if defined(__ARM_EABI__) && defined(__linux__) && !defined(PLATFORM_ANDROID)
 #define HAVE_AEABI_READ_TP 1
 #endif
 
+#ifdef ARM_FPU_VFP_HARD
+#define ARM_FPU_VFP 1
+#endif
+
+#ifdef ARM_FPU_FPA
+#define IS_FPA 1
+#else
+#define IS_FPA 0
+#endif
+
+#ifdef ARM_FPU_VFP
+#define IS_VFP 1
+#else
+#define IS_VFP 0
+#endif
+
+#ifdef MONO_ARCH_SOFT_FLOAT
+#define IS_SOFT_FLOAT 1
+#else
+#define IS_SOFT_FLOAT 0
+#endif
+
+#define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
+
 static gint lmf_tls_offset = -1;
 static gint lmf_addr_tls_offset = -1;
 
@@ -57,8 +82,20 @@ static int darwin = 0;
  * only be turned on in debug builds.
  */
 static int iphone_abi = 0;
+
+/*
+ * The FPU we are generating code for. This is NOT runtime configurable right now,
+ * since some things like MONO_ARCH_CALLEE_FREGS still depend on defines.
+ */
+static MonoArmFPU arm_fpu;
+
 static int i8_align;
 
+static volatile int ss_trigger_var = 0;
+
+static gpointer single_step_func_wrapper;
+static gpointer breakpoint_func_wrapper;
+
 /*
  * The code generated for sequence points reads from this location, which is
  * made read-only when single stepping is enabled.
@@ -253,17 +290,17 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
        case OP_FCALL:
        case OP_FCALL_REG:
        case OP_FCALL_MEMBASE:
-#ifdef ARM_FPU_FPA
-               if (ins->dreg != ARM_FPA_F0)
-                       ARM_MVFD (code, ins->dreg, ARM_FPA_F0);
-#elif defined(ARM_FPU_VFP)
-               if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) {
-                       ARM_FMSR (code, ins->dreg, ARMREG_R0);
-                       ARM_CVTS (code, ins->dreg, ins->dreg);
-               } else {
-                       ARM_FMDRR (code, ARMREG_R0, ARMREG_R1, ins->dreg);
+               if (IS_FPA) {
+                       if (ins->dreg != ARM_FPA_F0)
+                               ARM_FPA_MVFD (code, ins->dreg, ARM_FPA_F0);
+               } else if (IS_VFP) {
+                       if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) {
+                               ARM_FMSR (code, ins->dreg, ARMREG_R0);
+                               ARM_CVTS (code, ins->dreg, ins->dreg);
+                       } else {
+                               ARM_FMDRR (code, ARMREG_R0, ARMREG_R1, ins->dreg);
+                       }
                }
-#endif
                break;
        }
 
@@ -280,6 +317,7 @@ static guint8*
 emit_save_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset)
 {
        gboolean get_lmf_fast = FALSE;
+       int i;
 
 #ifdef HAVE_AEABI_READ_TP
        gint32 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
@@ -317,10 +355,14 @@ emit_save_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset)
        /* *(lmf_addr) = r1 */
        ARM_STR_IMM (code, ARMREG_R1, ARMREG_R0, G_STRUCT_OFFSET (MonoLMF, previous_lmf));
        /* Skip method (only needed for trampoline LMF frames) */
-       ARM_STR_IMM (code, ARMREG_SP, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, esp));
+       ARM_STR_IMM (code, ARMREG_SP, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, sp));
+       ARM_STR_IMM (code, ARMREG_FP, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, fp));
        /* save the current IP */
        ARM_MOV_REG_REG (code, ARMREG_IP, ARMREG_PC);
-       ARM_STR_IMM (code, ARMREG_IP, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, eip));
+       ARM_STR_IMM (code, ARMREG_IP, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, ip));
+
+       for (i = 0; i < sizeof (MonoLMF); i += sizeof (mgreg_t))
+               mini_gc_set_slot_type_from_fp (cfg, lmf_offset + i, SLOT_NOREF);
 
        return code;
 }
@@ -556,6 +598,64 @@ mono_arch_cpu_init (void)
 #endif
 }
 
+static gpointer
+create_function_wrapper (gpointer function)
+{
+       guint8 *start, *code;
+
+       start = code = mono_global_codeman_reserve (96);
+
+       /*
+        * Construct the MonoContext structure on the stack.
+        */
+
+       ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, sizeof (MonoContext));
+
+       /* save ip, lr and pc into their correspodings ctx.regs slots. */
+       ARM_STR_IMM (code, ARMREG_IP, ARMREG_SP, G_STRUCT_OFFSET (MonoContext, regs) + sizeof (mgreg_t) * ARMREG_IP);
+       ARM_STR_IMM (code, ARMREG_LR, ARMREG_SP, G_STRUCT_OFFSET (MonoContext, regs) + 4 * ARMREG_LR);
+       ARM_STR_IMM (code, ARMREG_LR, ARMREG_SP, G_STRUCT_OFFSET (MonoContext, regs) + 4 * ARMREG_PC);
+
+       /* save r0..r10 and fp */
+       ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_SP, G_STRUCT_OFFSET (MonoContext, regs));
+       ARM_STM (code, ARMREG_IP, 0x0fff);
+
+       /* now we can update fp. */
+       ARM_MOV_REG_REG (code, ARMREG_FP, ARMREG_SP);
+
+       /* make ctx.esp hold the actual value of sp at the beginning of this method. */
+       ARM_ADD_REG_IMM8 (code, ARMREG_R0, ARMREG_FP, sizeof (MonoContext));
+       ARM_STR_IMM (code, ARMREG_R0, ARMREG_IP, 4 * ARMREG_SP);
+       ARM_STR_IMM (code, ARMREG_R0, ARMREG_FP, G_STRUCT_OFFSET (MonoContext, regs) + 4 * ARMREG_SP);
+
+       /* make ctx.eip hold the address of the call. */
+       ARM_SUB_REG_IMM8 (code, ARMREG_LR, ARMREG_LR, 4);
+       ARM_STR_IMM (code, ARMREG_LR, ARMREG_SP, G_STRUCT_OFFSET (MonoContext, pc));
+
+       /* r0 now points to the MonoContext */
+       ARM_MOV_REG_REG (code, ARMREG_R0, ARMREG_FP);
+
+       /* call */
+       ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0);
+       ARM_B (code, 0);
+       *(gpointer*)code = function;
+       code += 4;
+       ARM_BLX_REG (code, ARMREG_IP);
+
+       /* we're back; save ctx.eip and ctx.esp into the corresponding regs slots. */
+       ARM_LDR_IMM (code, ARMREG_R0, ARMREG_FP, G_STRUCT_OFFSET (MonoContext, pc));
+       ARM_STR_IMM (code, ARMREG_R0, ARMREG_FP, G_STRUCT_OFFSET (MonoContext, regs) + 4 * ARMREG_LR);
+       ARM_STR_IMM (code, ARMREG_R0, ARMREG_FP, G_STRUCT_OFFSET (MonoContext, regs) + 4 * ARMREG_PC);
+
+       /* make ip point to the regs array, then restore everything, including pc. */
+       ARM_ADD_REG_IMM8 (code, ARMREG_IP, ARMREG_FP, G_STRUCT_OFFSET (MonoContext, regs));
+       ARM_LDM (code, ARMREG_IP, 0xffff);
+
+       mono_arch_flush_icache (start, code - start);
+
+       return start;
+}
+
 /*
  * Initialize architecture specific code.
  */
@@ -564,13 +664,28 @@ mono_arch_init (void)
 {
        InitializeCriticalSection (&mini_arch_mutex);
 
-       ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
-       bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
-       mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
+       if (mini_get_debug_options ()->soft_breakpoints) {
+               single_step_func_wrapper = create_function_wrapper (debugger_agent_single_step_from_context);
+               breakpoint_func_wrapper = create_function_wrapper (debugger_agent_breakpoint_from_context);
+       } else {
+               ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
+               bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
+               mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
+       }
 
        mono_aot_register_jit_icall ("mono_arm_throw_exception", mono_arm_throw_exception);
        mono_aot_register_jit_icall ("mono_arm_throw_exception_by_token", mono_arm_throw_exception_by_token);
        mono_aot_register_jit_icall ("mono_arm_resume_unwind", mono_arm_resume_unwind);
+
+#ifdef ARM_FPU_FPA
+       arm_fpu = MONO_ARM_FPU_FPA;
+#elif defined(ARM_FPU_VFP_HARD)
+       arm_fpu = MONO_ARM_FPU_VFP_HARD;
+#elif defined(ARM_FPU_VFP)
+       arm_fpu = MONO_ARM_FPU_VFP;
+#else
+       arm_fpu = MONO_ARM_FPU_NONE;
+#endif
 }
 
 /*
@@ -585,7 +700,7 @@ mono_arch_cleanup (void)
  * This function returns the optimizations supported on this cpu.
  */
 guint32
-mono_arch_cpu_optimizazions (guint32 *exclude_mask)
+mono_arch_cpu_optimizations (guint32 *exclude_mask)
 {
        guint32 opts = 0;
        const char *cpu_arch = getenv ("MONO_CPU_ARCH");
@@ -639,6 +754,20 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask)
        return opts;
 }
 
+/*
+ * This function test for all SIMD functions supported.
+ *
+ * Returns a bitmask corresponding to all supported versions.
+ *
+ */
+guint32
+mono_arch_cpu_enumerate_simd_versions (void)
+{
+       /* SIMD is currently unimplemented */
+       return 0;
+}
+
+
 #ifndef DISABLE_JIT
 
 static gboolean
@@ -1363,6 +1492,43 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                offset += size;
        }
 
+       if (cfg->arch.seq_point_read_var) {
+               MonoInst *ins;
+
+               ins = cfg->arch.seq_point_read_var;
+
+               size = 4;
+               align = 4;
+               offset += align - 1;
+               offset &= ~(align - 1);
+               ins->opcode = OP_REGOFFSET;
+               ins->inst_basereg = cfg->frame_reg;
+               ins->inst_offset = offset;
+               offset += size;
+
+               ins = cfg->arch.seq_point_ss_method_var;
+               size = 4;
+               align = 4;
+               offset += align - 1;
+               offset &= ~(align - 1);
+               ins->opcode = OP_REGOFFSET;
+               ins->inst_basereg = cfg->frame_reg;
+               ins->inst_offset = offset;
+               offset += size;
+
+               ins = cfg->arch.seq_point_bp_method_var;
+               size = 4;
+               align = 4;
+               offset += align - 1;
+               offset &= ~(align - 1);
+               ins->opcode = OP_REGOFFSET;
+               ins->inst_basereg = cfg->frame_reg;
+               ins->inst_offset = offset;
+               offset += size;
+       }
+
+       cfg->locals_min_stack_offset = offset;
+
        curinst = cfg->locals_start;
        for (i = curinst; i < cfg->num_varinfo; ++i) {
                ins = cfg->varinfo [i];
@@ -1383,6 +1549,8 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                 */
                if (align < 4 && size >= 4)
                        align = 4;
+               if (ALIGN_TO (offset, align) > ALIGN_TO (offset, 4))
+                       mini_gc_set_slot_type_from_fp (cfg, ALIGN_TO (offset, 4), SLOT_NOREF);
                offset += align - 1;
                offset &= ~(align - 1);
                ins->opcode = OP_REGOFFSET;
@@ -1392,6 +1560,8 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                //g_print ("allocating local %d to %d\n", i, inst->inst_offset);
        }
 
+       cfg->locals_max_stack_offset = offset;
+
        curinst = 0;
        if (sig->hasthis) {
                ins = cfg->args [curinst];
@@ -1433,6 +1603,8 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                        /* The code in the prolog () stores words when storing vtypes received in a register */
                        if (MONO_TYPE_ISSTRUCT (sig->params [i]))
                                align = 4;
+                       if (ALIGN_TO (offset, align) > ALIGN_TO (offset, 4))
+                               mini_gc_set_slot_type_from_fp (cfg, ALIGN_TO (offset, 4), SLOT_NOREF);
                        offset += align - 1;
                        offset &= ~(align - 1);
                        ins->inst_offset = offset;
@@ -1442,6 +1614,8 @@ mono_arch_allocate_vars (MonoCompile *cfg)
        }
 
        /* align the offset to 8 bytes */
+       if (ALIGN_TO (offset, 8) > ALIGN_TO (offset, 4))
+               mini_gc_set_slot_type_from_fp (cfg, ALIGN_TO (offset, 4), SLOT_NOREF);
        offset += 8 - 1;
        offset &= ~(8 - 1);
 
@@ -1472,15 +1646,31 @@ mono_arch_create_vars (MonoCompile *cfg)
                }
        }
 
-       if (cfg->gen_seq_points && cfg->compile_aot) {
-           MonoInst *ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
-               ins->flags |= MONO_INST_VOLATILE;
-               cfg->arch.seq_point_info_var = ins;
-
-               /* Allocate a separate variable for this to save 1 load per seq point */
-           ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
-               ins->flags |= MONO_INST_VOLATILE;
-               cfg->arch.ss_trigger_page_var = ins;
+       if (cfg->gen_seq_points) {
+               if (cfg->soft_breakpoints) {
+                       MonoInst *ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+                       ins->flags |= MONO_INST_VOLATILE;
+                       cfg->arch.seq_point_read_var = ins;
+
+                       ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+                       ins->flags |= MONO_INST_VOLATILE;
+                       cfg->arch.seq_point_ss_method_var = ins;
+
+                       ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+                       ins->flags |= MONO_INST_VOLATILE;
+                       cfg->arch.seq_point_bp_method_var = ins;
+
+                       g_assert (!cfg->compile_aot);
+               } else if (cfg->compile_aot) {
+                       MonoInst *ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+                       ins->flags |= MONO_INST_VOLATILE;
+                       cfg->arch.seq_point_info_var = ins;
+
+                       /* Allocate a separate variable for this to save 1 load per seq point */
+                       ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+                       ins->flags |= MONO_INST_VOLATILE;
+                       cfg->arch.ss_trigger_page_var = ins;
+               }
        }
 }
 
@@ -1488,14 +1678,11 @@ static void
 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
 {
        MonoMethodSignature *tmp_sig;
-       MonoInst *sig_arg;
+       int sig_reg;
 
        if (call->tail_call)
                NOT_IMPLEMENTED;
 
-       /* FIXME: Add support for signature tokens to AOT */
-       cfg->disable_aot = TRUE;
-
        g_assert (cinfo->sig_cookie.storage == RegTypeBase);
                        
        /*
@@ -1509,12 +1696,10 @@ emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
        tmp_sig->sentinelpos = 0;
        memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
 
-       MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
-       sig_arg->dreg = mono_alloc_ireg (cfg);
-       sig_arg->inst_p0 = tmp_sig;
-       MONO_ADD_INS (cfg->cbb, sig_arg);
+       sig_reg = mono_alloc_ireg (cfg);
+       MONO_EMIT_NEW_SIGNATURECONST (cfg, sig_reg, tmp_sig);
 
-       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, cinfo->sig_cookie.offset, sig_arg->dreg);
+       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, cinfo->sig_cookie.offset, sig_reg);
 }
 
 #ifdef ENABLE_LLVM
@@ -1630,47 +1815,47 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
                                MONO_ADD_INS (cfg->cbb, ins);
                                mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg + 1, FALSE);
                        } else if (!t->byref && ((t->type == MONO_TYPE_R8) || (t->type == MONO_TYPE_R4))) {
-#ifndef MONO_ARCH_SOFT_FLOAT
-                               int creg;
-#endif
-
                                if (ainfo->size == 4) {
-#ifdef MONO_ARCH_SOFT_FLOAT
-                                       /* mono_emit_call_args () have already done the r8->r4 conversion */
-                                       /* The converted value is in an int vreg */
-                                       MONO_INST_NEW (cfg, ins, OP_MOVE);
-                                       ins->dreg = mono_alloc_ireg (cfg);
-                                       ins->sreg1 = in->dreg;
-                                       MONO_ADD_INS (cfg->cbb, ins);
-                                       mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
-#else
-                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, ARMREG_SP, (cfg->param_area - 8), in->dreg);
-                                       creg = mono_alloc_ireg (cfg);
-                                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 8));
-                                       mono_call_inst_add_outarg_reg (cfg, call, creg, ainfo->reg, FALSE);
-#endif
+                                       if (IS_SOFT_FLOAT) {
+                                               /* mono_emit_call_args () have already done the r8->r4 conversion */
+                                               /* The converted value is in an int vreg */
+                                               MONO_INST_NEW (cfg, ins, OP_MOVE);
+                                               ins->dreg = mono_alloc_ireg (cfg);
+                                               ins->sreg1 = in->dreg;
+                                               MONO_ADD_INS (cfg->cbb, ins);
+                                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
+                                       } else {
+                                               int creg;
+
+                                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, ARMREG_SP, (cfg->param_area - 8), in->dreg);
+                                               creg = mono_alloc_ireg (cfg);
+                                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 8));
+                                               mono_call_inst_add_outarg_reg (cfg, call, creg, ainfo->reg, FALSE);
+                                       }
                                } else {
-#ifdef MONO_ARCH_SOFT_FLOAT
-                                       MONO_INST_NEW (cfg, ins, OP_FGETLOW32);
-                                       ins->dreg = mono_alloc_ireg (cfg);
-                                       ins->sreg1 = in->dreg;
-                                       MONO_ADD_INS (cfg->cbb, ins);
-                                       mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
-
-                                       MONO_INST_NEW (cfg, ins, OP_FGETHIGH32);
-                                       ins->dreg = mono_alloc_ireg (cfg);
-                                       ins->sreg1 = in->dreg;
-                                       MONO_ADD_INS (cfg->cbb, ins);
-                                       mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg + 1, FALSE);
-#else
-                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, ARMREG_SP, (cfg->param_area - 8), in->dreg);
-                                       creg = mono_alloc_ireg (cfg);
-                                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 8));
-                                       mono_call_inst_add_outarg_reg (cfg, call, creg, ainfo->reg, FALSE);
-                                       creg = mono_alloc_ireg (cfg);
-                                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 8 + 4));
-                                       mono_call_inst_add_outarg_reg (cfg, call, creg, ainfo->reg + 1, FALSE);
-#endif
+                                       if (IS_SOFT_FLOAT) {
+                                               MONO_INST_NEW (cfg, ins, OP_FGETLOW32);
+                                               ins->dreg = mono_alloc_ireg (cfg);
+                                               ins->sreg1 = in->dreg;
+                                               MONO_ADD_INS (cfg->cbb, ins);
+                                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
+
+                                               MONO_INST_NEW (cfg, ins, OP_FGETHIGH32);
+                                               ins->dreg = mono_alloc_ireg (cfg);
+                                               ins->sreg1 = in->dreg;
+                                               MONO_ADD_INS (cfg->cbb, ins);
+                                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg + 1, FALSE);
+                                       } else {
+                                               int creg;
+
+                                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, ARMREG_SP, (cfg->param_area - 8), in->dreg);
+                                               creg = mono_alloc_ireg (cfg);
+                                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 8));
+                                               mono_call_inst_add_outarg_reg (cfg, call, creg, ainfo->reg, FALSE);
+                                               creg = mono_alloc_ireg (cfg);
+                                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOAD_MEMBASE, creg, ARMREG_SP, (cfg->param_area - 8 + 4));
+                                               mono_call_inst_add_outarg_reg (cfg, call, creg, ainfo->reg + 1, FALSE);
+                                       }
                                }
                                cfg->flags |= MONO_CFG_HAS_FPOUT;
                        } else {
@@ -1709,11 +1894,10 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
                                if (t->type == MONO_TYPE_R8) {
                                        MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
                                } else {
-#ifdef MONO_ARCH_SOFT_FLOAT
-                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
-#else
-                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
-#endif
+                                       if (IS_SOFT_FLOAT)
+                                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
+                                       else
+                                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
                                }
                        } else {
                                MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, ainfo->offset, in->dreg);
@@ -1853,40 +2037,45 @@ mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
                        }
                        return;
                }
-#ifdef MONO_ARCH_SOFT_FLOAT
-               if (ret->type == MONO_TYPE_R8) {
-                       MonoInst *ins;
-
-                       MONO_INST_NEW (cfg, ins, OP_SETFRET);
-                       ins->dreg = cfg->ret->dreg;
-                       ins->sreg1 = val->dreg;
-                       MONO_ADD_INS (cfg->cbb, ins);
-                       return;
-               }
-               if (ret->type == MONO_TYPE_R4) {
-                       /* Already converted to an int in method_to_ir () */
-                       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
-                       return;
-               }                       
-#elif defined(ARM_FPU_VFP)
-               if (ret->type == MONO_TYPE_R8 || ret->type == MONO_TYPE_R4) {
-                       MonoInst *ins;
+               switch (arm_fpu) {
+               case MONO_ARM_FPU_NONE:
+                       if (ret->type == MONO_TYPE_R8) {
+                               MonoInst *ins;
+
+                               MONO_INST_NEW (cfg, ins, OP_SETFRET);
+                               ins->dreg = cfg->ret->dreg;
+                               ins->sreg1 = val->dreg;
+                               MONO_ADD_INS (cfg->cbb, ins);
+                               return;
+                       }
+                       if (ret->type == MONO_TYPE_R4) {
+                               /* Already converted to an int in method_to_ir () */
+                               MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
+                               return;
+                       }
+                       break;
+               case MONO_ARM_FPU_VFP:
+                       if (ret->type == MONO_TYPE_R8 || ret->type == MONO_TYPE_R4) {
+                               MonoInst *ins;
 
-                       MONO_INST_NEW (cfg, ins, OP_SETFRET);
-                       ins->dreg = cfg->ret->dreg;
-                       ins->sreg1 = val->dreg;
-                       MONO_ADD_INS (cfg->cbb, ins);
-                       return;
-               }
-#else
-               if (ret->type == MONO_TYPE_R4 || ret->type == MONO_TYPE_R8) {
-                       MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
-                       return;
+                               MONO_INST_NEW (cfg, ins, OP_SETFRET);
+                               ins->dreg = cfg->ret->dreg;
+                               ins->sreg1 = val->dreg;
+                               MONO_ADD_INS (cfg->cbb, ins);
+                               return;
+                       }
+                       break;
+               case MONO_ARM_FPU_FPA:
+                       if (ret->type == MONO_TYPE_R4 || ret->type == MONO_TYPE_R8) {
+                               MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
+                               return;
+                       }
+                       break;
+               default:
+                       g_assert_not_reached ();
                }
-#endif
        }
 
-       /* FIXME: */
        MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
 }
 
@@ -1926,13 +2115,12 @@ dyn_call_supported (CallInfo *cinfo, MonoMethodSignature *sig)
        case RegTypeStructByAddr:
                break;
        case RegTypeFP:
-#ifdef ARM_FPU_FPA
-               return FALSE;
-#elif defined(ARM_FPU_VFP)
-               break;
-#else
-               return FALSE;
-#endif
+               if (IS_FPA)
+                       return FALSE;
+               else if (IS_VFP)
+                       break;
+               else
+                       return FALSE;
        default:
                return FALSE;
        }
@@ -1966,11 +2154,10 @@ dyn_call_supported (CallInfo *cinfo, MonoMethodSignature *sig)
                switch (t->type) {
                case MONO_TYPE_R4:
                case MONO_TYPE_R8:
-#ifdef MONO_ARCH_SOFT_FLOAT
-                       return FALSE;
-#else
-                       break;
-#endif
+                       if (IS_SOFT_FLOAT)
+                               return FALSE;
+                       else
+                               break;
                        /*
                case MONO_TYPE_I8:
                case MONO_TYPE_U8:
@@ -2185,20 +2372,20 @@ mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf)
                g_assert (ainfo->cinfo->vtype_retaddr);
                /* Nothing to do */
                break;
-#if defined(ARM_FPU_VFP)
        case MONO_TYPE_R4:
+               g_assert (IS_VFP);
                *(float*)ret = *(float*)&res;
                break;
        case MONO_TYPE_R8: {
                mgreg_t regs [2];
 
+               g_assert (IS_VFP);
                regs [0] = res;
                regs [1] = res2;
 
                *(double*)ret = *(double*)&regs;
                break;
        }
-#endif
        default:
                g_assert_not_reached ();
        }
@@ -2507,7 +2694,7 @@ branch_cc_table [] = {
        ARMCOND_LO
 };
 
-#define NEW_INS(cfg,dest,op) do {       \
+#define ADD_NEW_INS(cfg,dest,op) do {       \
                MONO_INST_NEW ((cfg), (dest), (op)); \
         mono_bblock_insert_before_ins (bb, ins, (dest)); \
        } while (0)
@@ -2609,7 +2796,7 @@ loop_start:
                case OP_IOR_IMM:
                case OP_IXOR_IMM:
                        if ((imm8 = mono_arm_is_rotated_imm8 (ins->inst_imm, &rot_amount)) < 0) {
-                               NEW_INS (cfg, temp, OP_ICONST);
+                               ADD_NEW_INS (cfg, temp, OP_ICONST);
                                temp->inst_c0 = ins->inst_imm;
                                temp->dreg = mono_alloc_ireg (cfg);
                                ins->sreg2 = temp->dreg;
@@ -2636,7 +2823,7 @@ loop_start:
                                ins->inst_imm = imm8;
                                break;
                        }
-                       NEW_INS (cfg, temp, OP_ICONST);
+                       ADD_NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
                        temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
@@ -2651,7 +2838,7 @@ loop_start:
                                ins->next->opcode = OP_COND_EXC_NC;
                        break;
                case OP_LOCALLOC_IMM:
-                       NEW_INS (cfg, temp, OP_ICONST);
+                       ADD_NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
                        temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg1 = temp->dreg;
@@ -2668,7 +2855,7 @@ loop_start:
                         */
                        if (arm_is_imm12 (ins->inst_offset))
                                break;
-                       NEW_INS (cfg, temp, OP_ICONST);
+                       ADD_NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_offset;
                        temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
@@ -2679,7 +2866,7 @@ loop_start:
                case OP_LOADI1_MEMBASE:
                        if (arm_is_imm8 (ins->inst_offset))
                                break;
-                       NEW_INS (cfg, temp, OP_ICONST);
+                       ADD_NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_offset;
                        temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
@@ -2691,23 +2878,34 @@ loop_start:
                                break;
                        low_imm = ins->inst_offset & 0x1ff;
                        if ((imm8 = mono_arm_is_rotated_imm8 (ins->inst_offset & ~0x1ff, &rot_amount)) >= 0) {
-                               NEW_INS (cfg, temp, OP_ADD_IMM);
+                               ADD_NEW_INS (cfg, temp, OP_ADD_IMM);
                                temp->inst_imm = ins->inst_offset & ~0x1ff;
                                temp->sreg1 = ins->inst_basereg;
                                temp->dreg = mono_alloc_ireg (cfg);
                                ins->inst_basereg = temp->dreg;
                                ins->inst_offset = low_imm;
-                               break;
+                       } else {
+                               MonoInst *add_ins;
+
+                               ADD_NEW_INS (cfg, temp, OP_ICONST);
+                               temp->inst_c0 = ins->inst_offset;
+                               temp->dreg = mono_alloc_ireg (cfg);
+
+                               ADD_NEW_INS (cfg, add_ins, OP_IADD);
+                               add_ins->sreg1 = ins->inst_basereg;
+                               add_ins->sreg2 = temp->dreg;
+                               add_ins->dreg = mono_alloc_ireg (cfg);
+
+                               ins->inst_basereg = add_ins->dreg;
+                               ins->inst_offset = 0;
                        }
-                       /* VFP/FPA doesn't have indexed load instructions */
-                       g_assert_not_reached ();
                        break;
                case OP_STORE_MEMBASE_REG:
                case OP_STOREI4_MEMBASE_REG:
                case OP_STOREI1_MEMBASE_REG:
                        if (arm_is_imm12 (ins->inst_offset))
                                break;
-                       NEW_INS (cfg, temp, OP_ICONST);
+                       ADD_NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_offset;
                        temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
@@ -2716,7 +2914,7 @@ loop_start:
                case OP_STOREI2_MEMBASE_REG:
                        if (arm_is_imm8 (ins->inst_offset))
                                break;
-                       NEW_INS (cfg, temp, OP_ICONST);
+                       ADD_NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_offset;
                        temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
@@ -2728,23 +2926,33 @@ loop_start:
                                break;
                        low_imm = ins->inst_offset & 0x1ff;
                        if ((imm8 = mono_arm_is_rotated_imm8 (ins->inst_offset & ~ 0x1ff, &rot_amount)) >= 0 && arm_is_fpimm8 (low_imm)) {
-                               NEW_INS (cfg, temp, OP_ADD_IMM);
+                               ADD_NEW_INS (cfg, temp, OP_ADD_IMM);
                                temp->inst_imm = ins->inst_offset & ~0x1ff;
                                temp->sreg1 = ins->inst_destbasereg;
                                temp->dreg = mono_alloc_ireg (cfg);
                                ins->inst_destbasereg = temp->dreg;
                                ins->inst_offset = low_imm;
-                               break;
+                       } else {
+                               MonoInst *add_ins;
+
+                               ADD_NEW_INS (cfg, temp, OP_ICONST);
+                               temp->inst_c0 = ins->inst_offset;
+                               temp->dreg = mono_alloc_ireg (cfg);
+
+                               ADD_NEW_INS (cfg, add_ins, OP_IADD);
+                               add_ins->sreg1 = ins->inst_destbasereg;
+                               add_ins->sreg2 = temp->dreg;
+                               add_ins->dreg = mono_alloc_ireg (cfg);
+
+                               ins->inst_destbasereg = add_ins->dreg;
+                               ins->inst_offset = 0;
                        }
-                       /*g_print ("fail with: %d (%d, %d)\n", ins->inst_offset, ins->inst_offset & ~0x1ff, low_imm);*/
-                       /* VFP/FPA doesn't have indexed store instructions */
-                       g_assert_not_reached ();
                        break;
                case OP_STORE_MEMBASE_IMM:
                case OP_STOREI1_MEMBASE_IMM:
                case OP_STOREI2_MEMBASE_IMM:
                case OP_STOREI4_MEMBASE_IMM:
-                       NEW_INS (cfg, temp, OP_ICONST);
+                       ADD_NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
                        temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg1 = temp->dreg;
@@ -2814,15 +3022,15 @@ static guchar*
 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
 {
        /* sreg is a float, dreg is an integer reg  */
-#ifdef ARM_FPU_FPA
-       ARM_FIXZ (code, dreg, sreg);
-#elif defined(ARM_FPU_VFP)
-       if (is_signed)
-               ARM_TOSIZD (code, ARM_VFP_F0, sreg);
-       else
-               ARM_TOUIZD (code, ARM_VFP_F0, sreg);
-       ARM_FMRS (code, dreg, ARM_VFP_F0);
-#endif
+       if (IS_FPA)
+               ARM_FPA_FIXZ (code, dreg, sreg);
+       else if (IS_VFP) {
+               if (is_signed)
+                       ARM_TOSIZD (code, ARM_VFP_F0, sreg);
+               else
+                       ARM_TOUIZD (code, ARM_VFP_F0, sreg);
+               ARM_FMRS (code, dreg, ARM_VFP_F0);
+       }
        if (!is_signed) {
                if (size == 1)
                        ARM_AND_REG_IMM8 (code, dreg, dreg, 0xff);
@@ -3545,9 +3753,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        int i;
                        MonoInst *info_var = cfg->arch.seq_point_info_var;
                        MonoInst *ss_trigger_page_var = cfg->arch.ss_trigger_page_var;
+                       MonoInst *ss_read_var = cfg->arch.seq_point_read_var;
+                       MonoInst *ss_method_var = cfg->arch.seq_point_ss_method_var;
+                       MonoInst *bp_method_var = cfg->arch.seq_point_bp_method_var;
                        MonoInst *var;
                        int dreg = ARMREG_LR;
 
+                       if (cfg->soft_breakpoints) {
+                               g_assert (!cfg->compile_aot);
+                       }
+
                        /*
                         * For AOT, we use one got slot per method, which will point to a
                         * SeqPointInfo structure, containing all the information required
@@ -3559,34 +3774,72 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                g_assert (arm_is_imm12 (info_var->inst_offset));
                        }
 
-                       /* 
-                        * Read from the single stepping trigger page. This will cause a
-                        * SIGSEGV when single stepping is enabled.
-                        * We do this _before_ the breakpoint, so single stepping after
-                        * a breakpoint is hit will step to the next IL offset.
-                        */
-                       g_assert (((guint64)(gsize)ss_trigger_page >> 32) == 0);
+                       if (!cfg->soft_breakpoints) {
+                               /*
+                                * Read from the single stepping trigger page. This will cause a
+                                * SIGSEGV when single stepping is enabled.
+                                * We do this _before_ the breakpoint, so single stepping after
+                                * a breakpoint is hit will step to the next IL offset.
+                                */
+                               g_assert (((guint64)(gsize)ss_trigger_page >> 32) == 0);
+                       }
 
                        if (ins->flags & MONO_INST_SINGLE_STEP_LOC) {
-                               if (cfg->compile_aot) {
-                                       /* Load the trigger page addr from the variable initialized in the prolog */
-                                       var = ss_trigger_page_var;
+                               if (cfg->soft_breakpoints) {
+                                       /* Load the address of the sequence point trigger variable. */
+                                       var = ss_read_var;
+                                       g_assert (var);
+                                       g_assert (var->opcode == OP_REGOFFSET);
+                                       g_assert (arm_is_imm12 (var->inst_offset));
+                                       ARM_LDR_IMM (code, dreg, var->inst_basereg, var->inst_offset);
+
+                                       /* Read the value and check whether it is non-zero. */
+                                       ARM_LDR_IMM (code, dreg, dreg, 0);
+                                       ARM_CMP_REG_IMM (code, dreg, 0, 0);
+
+                                       /* Load the address of the sequence point method. */
+                                       var = ss_method_var;
                                        g_assert (var);
                                        g_assert (var->opcode == OP_REGOFFSET);
                                        g_assert (arm_is_imm12 (var->inst_offset));
                                        ARM_LDR_IMM (code, dreg, var->inst_basereg, var->inst_offset);
+
+                                       /* Call it conditionally. */
+                                       ARM_BLX_REG_COND (code, ARMCOND_NE, dreg);
                                } else {
-                                       ARM_LDR_IMM (code, dreg, ARMREG_PC, 0);
-                                       ARM_B (code, 0);
-                                       *(int*)code = (int)ss_trigger_page;
-                                       code += 4;
+                                       if (cfg->compile_aot) {
+                                               /* Load the trigger page addr from the variable initialized in the prolog */
+                                               var = ss_trigger_page_var;
+                                               g_assert (var);
+                                               g_assert (var->opcode == OP_REGOFFSET);
+                                               g_assert (arm_is_imm12 (var->inst_offset));
+                                               ARM_LDR_IMM (code, dreg, var->inst_basereg, var->inst_offset);
+                                       } else {
+                                               ARM_LDR_IMM (code, dreg, ARMREG_PC, 0);
+                                               ARM_B (code, 0);
+                                               *(int*)code = (int)ss_trigger_page;
+                                               code += 4;
+                                       }
+                                       ARM_LDR_IMM (code, dreg, dreg, 0);
                                }
-                               ARM_LDR_IMM (code, dreg, dreg, 0);
                        }
 
                        mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
 
-                       if (cfg->compile_aot) {
+                       if (cfg->soft_breakpoints) {
+                               /* Load the address of the breakpoint method into ip. */
+                               var = bp_method_var;
+                               g_assert (var);
+                               g_assert (var->opcode == OP_REGOFFSET);
+                               g_assert (arm_is_imm12 (var->inst_offset));
+                               ARM_LDR_IMM (code, dreg, var->inst_basereg, var->inst_offset);
+
+                               /*
+                                * A placeholder for a possible breakpoint inserted by
+                                * mono_arch_set_breakpoint ().
+                                */
+                               ARM_NOP (code);
+                       } else if (cfg->compile_aot) {
                                guint32 offset = code - cfg->native_code;
                                guint32 val;
 
@@ -3594,19 +3847,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                /* Add the offset */
                                val = ((offset / 4) * sizeof (guint8*)) + G_STRUCT_OFFSET (SeqPointInfo, bp_addrs);
                                ARM_ADD_REG_IMM (code, dreg, dreg, (val & 0xFF), 0);
-                               /* 
-                                * Have to emit nops to keep the difference between the offset
-                                * stored in seq_points and breakpoint instruction constant,
-                                * mono_arch_get_ip_for_breakpoint () depends on this.
-                                */
                                if (val & 0xFF00)
                                        ARM_ADD_REG_IMM (code, dreg, dreg, (val & 0xFF00) >> 8, 24);
-                               else
-                                       ARM_NOP (code);
                                if (val & 0xFF0000)
                                        ARM_ADD_REG_IMM (code, dreg, dreg, (val & 0xFF0000) >> 16, 16);
-                               else
-                                       ARM_NOP (code);
                                g_assert (!(val & 0xFF000000));
                                /* Load the info->bp_addrs [offset], which is either 0 or the address of a trigger page */
                                ARM_LDR_IMM (code, dreg, dreg, 0);
@@ -3845,19 +4089,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                }
                case OP_FMOVE:
-#ifdef ARM_FPU_FPA
-                       ARM_MVFD (code, ins->dreg, ins->sreg1);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CPYD (code, ins->dreg, ins->sreg1);
-#endif
+                       if (IS_FPA)
+                               ARM_FPA_MVFD (code, ins->dreg, ins->sreg1);
+                       else if (IS_VFP)
+                               ARM_CPYD (code, ins->dreg, ins->sreg1);
                        break;
                case OP_FCONV_TO_R4:
-#ifdef ARM_FPU_FPA
-                       ARM_MVFS (code, ins->dreg, ins->sreg1);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CVTD (code, ins->dreg, ins->sreg1);
-                       ARM_CVTS (code, ins->dreg, ins->dreg);
-#endif
+                       if (IS_FPA)
+                               ARM_FPA_MVFS (code, ins->dreg, ins->sreg1);
+                       else if (IS_VFP) {
+                               ARM_CVTD (code, ins->dreg, ins->sreg1);
+                               ARM_CVTS (code, ins->dreg, ins->dreg);
+                       }
                        break;
                case OP_JMP:
                        /*
@@ -3908,6 +4151,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        else
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
                        code = emit_call_seq (cfg, code);
+                       ins->flags |= MONO_INST_GC_CALLSITE;
+                       ins->backend.pc_offset = code - cfg->native_code;
                        code = emit_move_return_value (cfg, ins, code);
                        break;
                case OP_FCALL_REG:
@@ -3917,6 +4162,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_VOIDCALL_REG:
                case OP_CALL_REG:
                        code = emit_call_reg (code, ins->sreg1);
+                       ins->flags |= MONO_INST_GC_CALLSITE;
+                       ins->backend.pc_offset = code - cfg->native_code;
                        code = emit_move_return_value (cfg, ins, code);
                        break;
                case OP_FCALL_MEMBASE:
@@ -3947,6 +4194,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
                                ARM_LDR_IMM (code, ARMREG_PC, ins->sreg1, ins->inst_offset);
                        }
+                       ins->flags |= MONO_INST_GC_CALLSITE;
+                       ins->backend.pc_offset = code - cfg->native_code;
                        code = emit_move_return_value (cfg, ins, code);
                        break;
                case OP_LOCALLOC: {
@@ -4031,6 +4280,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                case OP_START_HANDLER: {
                        MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       int i, rot_amount;
+
+                       /* Reserve a param area, see filter-stack.exe */
+                       if (cfg->param_area) {
+                               if ((i = mono_arm_is_rotated_imm8 (cfg->param_area, &rot_amount)) >= 0) {
+                                       ARM_SUB_REG_IMM (code, ARMREG_SP, ARMREG_SP, i, rot_amount);
+                               } else {
+                                       code = mono_arm_emit_load_imm (code, ARMREG_IP, cfg->param_area);
+                                       ARM_SUB_REG_REG (code, ARMREG_SP, ARMREG_SP, ARMREG_IP);
+                               }
+                       }
 
                        if (arm_is_imm12 (spvar->inst_offset)) {
                                ARM_STR_IMM (code, ARMREG_LR, spvar->inst_basereg, spvar->inst_offset);
@@ -4042,6 +4302,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                case OP_ENDFILTER: {
                        MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       int i, rot_amount;
+
+                       /* Free the param area */
+                       if (cfg->param_area) {
+                               if ((i = mono_arm_is_rotated_imm8 (cfg->param_area, &rot_amount)) >= 0) {
+                                       ARM_ADD_REG_IMM (code, ARMREG_SP, ARMREG_SP, i, rot_amount);
+                               } else {
+                                       code = mono_arm_emit_load_imm (code, ARMREG_IP, cfg->param_area);
+                                       ARM_ADD_REG_REG (code, ARMREG_SP, ARMREG_SP, ARMREG_IP);
+                               }
+                       }
 
                        if (ins->sreg1 != ARMREG_R0)
                                ARM_MOV_REG_REG (code, ARMREG_R0, ins->sreg1);
@@ -4057,6 +4328,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                case OP_ENDFINALLY: {
                        MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       int i, rot_amount;
+
+                       /* Free the param area */
+                       if (cfg->param_area) {
+                               if ((i = mono_arm_is_rotated_imm8 (cfg->param_area, &rot_amount)) >= 0) {
+                                       ARM_ADD_REG_IMM (code, ARMREG_SP, ARMREG_SP, i, rot_amount);
+                               } else {
+                                       code = mono_arm_emit_load_imm (code, ARMREG_IP, cfg->param_area);
+                                       ARM_ADD_REG_REG (code, ARMREG_SP, ARMREG_SP, ARMREG_IP);
+                               }
+                       }
 
                        if (arm_is_imm12 (spvar->inst_offset)) {
                                ARM_LDR_IMM (code, ARMREG_IP, spvar->inst_basereg, spvar->inst_offset);
@@ -4193,7 +4475,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 #ifdef ARM_FPU_FPA
                case OP_R8CONST:
                        if (cfg->compile_aot) {
-                               ARM_LDFD (code, ins->dreg, ARMREG_PC, 0);
+                               ARM_FPA_LDFD (code, ins->dreg, ARMREG_PC, 0);
                                ARM_B (code, 1);
                                *(guint32*)code = ((guint32*)(ins->inst_p0))[0];
                                code += 4;
@@ -4204,18 +4486,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                 * the displacement in LDFD (aligning to 512).
                                 */
                                code = mono_arm_emit_load_imm (code, ARMREG_LR, (guint32)ins->inst_p0);
-                               ARM_LDFD (code, ins->dreg, ARMREG_LR, 0);
+                               ARM_FPA_LDFD (code, ins->dreg, ARMREG_LR, 0);
                        }
                        break;
                case OP_R4CONST:
                        if (cfg->compile_aot) {
-                               ARM_LDFS (code, ins->dreg, ARMREG_PC, 0);
+                               ARM_FPA_LDFS (code, ins->dreg, ARMREG_PC, 0);
                                ARM_B (code, 0);
                                *(guint32*)code = ((guint32*)(ins->inst_p0))[0];
                                code += 4;
                        } else {
                                code = mono_arm_emit_load_imm (code, ARMREG_LR, (guint32)ins->inst_p0);
-                               ARM_LDFS (code, ins->dreg, ARMREG_LR, 0);
+                               ARM_FPA_LDFS (code, ins->dreg, ARMREG_LR, 0);
                        }
                        break;
                case OP_STORER8_MEMBASE_REG:
@@ -4223,9 +4505,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (!arm_is_fpimm8 (ins->inst_offset)) {
                                code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
                                ARM_ADD_REG_REG (code, ARMREG_LR, ARMREG_LR, ins->inst_destbasereg);
-                               ARM_STFD (code, ins->sreg1, ARMREG_LR, 0);
+                               ARM_FPA_STFD (code, ins->sreg1, ARMREG_LR, 0);
                        } else {
-                               ARM_STFD (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
+                               ARM_FPA_STFD (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
                        }
                        break;
                case OP_LOADR8_MEMBASE:
@@ -4233,31 +4515,31 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (!arm_is_fpimm8 (ins->inst_offset)) {
                                code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
                                ARM_ADD_REG_REG (code, ARMREG_LR, ARMREG_LR, ins->inst_basereg);
-                               ARM_LDFD (code, ins->dreg, ARMREG_LR, 0);
+                               ARM_FPA_LDFD (code, ins->dreg, ARMREG_LR, 0);
                        } else {
-                               ARM_LDFD (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                               ARM_FPA_LDFD (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
                        }
                        break;
                case OP_STORER4_MEMBASE_REG:
                        g_assert (arm_is_fpimm8 (ins->inst_offset));
-                       ARM_STFS (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
+                       ARM_FPA_STFS (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
                        break;
                case OP_LOADR4_MEMBASE:
                        g_assert (arm_is_fpimm8 (ins->inst_offset));
-                       ARM_LDFS (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                       ARM_FPA_LDFS (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
                        break;
                case OP_ICONV_TO_R_UN: {
                        int tmpreg;
                        tmpreg = ins->dreg == 0? 1: 0;
                        ARM_CMP_REG_IMM8 (code, ins->sreg1, 0);
-                       ARM_FLTD (code, ins->dreg, ins->sreg1);
+                       ARM_FPA_FLTD (code, ins->dreg, ins->sreg1);
                        ARM_B_COND (code, ARMCOND_GE, 8);
                        /* save the temp register */
                        ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 8);
-                       ARM_STFD (code, tmpreg, ARMREG_SP, 0);
-                       ARM_LDFD (code, tmpreg, ARMREG_PC, 12);
+                       ARM_FPA_STFD (code, tmpreg, ARMREG_SP, 0);
+                       ARM_FPA_LDFD (code, tmpreg, ARMREG_PC, 12);
                        ARM_FPA_ADFD (code, ins->dreg, ins->dreg, tmpreg);
-                       ARM_LDFD (code, tmpreg, ARMREG_SP, 0);
+                       ARM_FPA_LDFD (code, tmpreg, ARMREG_SP, 0);
                        ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 8);
                        /* skip the constant pool */
                        ARM_B (code, 8);
@@ -4273,10 +4555,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                }
                case OP_ICONV_TO_R4:
-                       ARM_FLTS (code, ins->dreg, ins->sreg1);
+                       ARM_FPA_FLTS (code, ins->dreg, ins->sreg1);
                        break;
                case OP_ICONV_TO_R8:
-                       ARM_FLTD (code, ins->dreg, ins->sreg1);
+                       ARM_FPA_FLTD (code, ins->dreg, ins->sreg1);
                        break;
 
 #elif defined(ARM_FPU_VFP)
@@ -4440,7 +4722,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_FPA_DVFD (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;          
                case OP_FNEG:
-                       ARM_MNFD (code, ins->dreg, ins->sreg1);
+                       ARM_FPA_MNFD (code, ins->dreg, ins->sreg1);
                        break;
 #elif defined(ARM_FPU_VFP)
                case OP_FADD:
@@ -4464,63 +4746,63 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert_not_reached ();
                        break;
                case OP_FCOMPARE:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
-                       ARM_FMSTAT (code);
-#endif
+                       if (IS_FPA) {
+                               ARM_FPA_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
+                       } else if (IS_VFP) {
+                               ARM_CMPD (code, ins->sreg1, ins->sreg2);
+                               ARM_FMSTAT (code);
+                       }
                        break;
                case OP_FCEQ:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
-                       ARM_FMSTAT (code);
-#endif
+                       if (IS_FPA) {
+                               ARM_FPA_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
+                       } else if (IS_VFP) {
+                               ARM_CMPD (code, ins->sreg1, ins->sreg2);
+                               ARM_FMSTAT (code);
+                       }
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 0, ARMCOND_NE);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_EQ);
                        break;
                case OP_FCLT:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
-                       ARM_FMSTAT (code);
-#endif
+                       if (IS_FPA) {
+                               ARM_FPA_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
+                       } else {
+                               ARM_CMPD (code, ins->sreg1, ins->sreg2);
+                               ARM_FMSTAT (code);
+                       }
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_MI);
                        break;
                case OP_FCLT_UN:
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg1, ins->sreg2);
-                       ARM_FMSTAT (code);
-#endif
+                       if (IS_FPA) {
+                               ARM_FPA_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
+                       } else if (IS_VFP) {
+                               ARM_CMPD (code, ins->sreg1, ins->sreg2);
+                               ARM_FMSTAT (code);
+                       }
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_MI);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_VS);
                        break;
                case OP_FCGT:
                        /* swapped */
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg2, ins->sreg1);
-                       ARM_FMSTAT (code);
-#endif
+                       if (IS_FPA) {
+                               ARM_FPA_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
+                       } else if (IS_VFP) {
+                               ARM_CMPD (code, ins->sreg2, ins->sreg1);
+                               ARM_FMSTAT (code);
+                       }
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_MI);
                        break;
                case OP_FCGT_UN:
                        /* swapped */
-#ifdef ARM_FPU_FPA
-                       ARM_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
-#elif defined(ARM_FPU_VFP)
-                       ARM_CMPD (code, ins->sreg2, ins->sreg1);
-                       ARM_FMSTAT (code);
-#endif
+                       if (IS_FPA) {
+                               ARM_FPA_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
+                       } else if (IS_VFP) {
+                               ARM_CMPD (code, ins->sreg2, ins->sreg1);
+                               ARM_FMSTAT (code);
+                       }
                        ARM_MOV_REG_IMM8 (code, ins->dreg, 0);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_MI);
                        ARM_MOV_REG_IMM8_COND (code, ins->dreg, 1, ARMCOND_VS);
@@ -4551,13 +4833,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert_not_reached ();
                        break;
                case OP_FBGE:
-#ifdef ARM_FPU_VFP
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_GE);
-#else
-                       /* FPA requires EQ even thou the docs suggests that just CS is enough */                         
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_EQ);
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_CS);
-#endif
+                       if (IS_VFP) {
+                               EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_GE);
+                       } else {
+                               /* FPA requires EQ even thou the docs suggests that just CS is enough */
+                               EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_EQ);
+                               EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_CS);
+                       }
                        break;
                case OP_FBGE_UN:
                        EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_VS); /* V set */
@@ -4565,28 +4847,38 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
 
                case OP_CKFINITE: {
-#ifdef ARM_FPU_FPA
-                       if (ins->dreg != ins->sreg1)
-                               ARM_MVFD (code, ins->dreg, ins->sreg1);
-#elif defined(ARM_FPU_VFP)
-                       ARM_ABSD (code, ARM_VFP_D1, ins->sreg1);
-                       ARM_FLDD (code, ARM_VFP_D0, ARMREG_PC, 0);
-                       ARM_B (code, 1);
-                       *(guint32*)code = 0xffffffff;
-                       code += 4;
-                       *(guint32*)code = 0x7fefffff;
-                       code += 4;
-                       ARM_CMPD (code, ARM_VFP_D1, ARM_VFP_D0);
-                       ARM_FMSTAT (code);
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (ARMCOND_GT, "ArithmeticException");
-                       ARM_CMPD (code, ins->sreg1, ins->sreg1);
-                       ARM_FMSTAT (code);
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (ARMCOND_VS, "ArithmeticException");                   
-
-                       ARM_CPYD (code, ins->dreg, ins->sreg1);
-#endif
+                       if (IS_FPA) {
+                               if (ins->dreg != ins->sreg1)
+                                       ARM_FPA_MVFD (code, ins->dreg, ins->sreg1);
+                       } else if (IS_VFP) {
+                               ARM_ABSD (code, ARM_VFP_D1, ins->sreg1);
+                               ARM_FLDD (code, ARM_VFP_D0, ARMREG_PC, 0);
+                               ARM_B (code, 1);
+                               *(guint32*)code = 0xffffffff;
+                               code += 4;
+                               *(guint32*)code = 0x7fefffff;
+                               code += 4;
+                               ARM_CMPD (code, ARM_VFP_D1, ARM_VFP_D0);
+                               ARM_FMSTAT (code);
+                               EMIT_COND_SYSTEM_EXCEPTION_FLAGS (ARMCOND_GT, "ArithmeticException");
+                               ARM_CMPD (code, ins->sreg1, ins->sreg1);
+                               ARM_FMSTAT (code);
+                               EMIT_COND_SYSTEM_EXCEPTION_FLAGS (ARMCOND_VS, "ArithmeticException");
+                               ARM_CPYD (code, ins->dreg, ins->sreg1);
+                       }
                        break;
                }
+
+               case OP_GC_LIVENESS_DEF:
+               case OP_GC_LIVENESS_USE:
+               case OP_GC_PARAM_SLOT_LIVENESS_DEF:
+                       ins->backend.pc_offset = code - cfg->native_code;
+                       break;
+               case OP_GC_SPILL_SLOT_LIVENESS_DEF:
+                       ins->backend.pc_offset = code - cfg->native_code;
+                       bb->spill_slot_defs = g_slist_prepend_mempool (cfg->mempool, bb->spill_slot_defs, ins);
+                       break;
+
                default:
                        g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
                        g_assert_not_reached ();
@@ -4735,7 +5027,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        MonoBasicBlock *bb;
        MonoMethodSignature *sig;
        MonoInst *inst;
-       int alloc_size, pos, max_offset, i, rot_amount;
+       int alloc_size, orig_alloc_size, pos, max_offset, i, rot_amount;
        guint8 *code;
        CallInfo *cinfo;
        int tracing = 0;
@@ -4790,13 +5082,16 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                for (i = 0; i < 16; ++i) {
                        if ((cfg->used_int_regs & (1 << i))) {
                                mono_emit_unwind_op_offset (cfg, code, i, (- prev_sp_offset) + reg_offset);
+                               mini_gc_set_slot_type_from_cfa (cfg, (- prev_sp_offset) + reg_offset, SLOT_NOREF);
                                reg_offset += 4;
                        }
                }
                if (iphone_abi) {
                        mono_emit_unwind_op_offset (cfg, code, ARMREG_LR, -4);
+                       mini_gc_set_slot_type_from_cfa (cfg, -4, SLOT_NOREF);
                } else {
                        mono_emit_unwind_op_offset (cfg, code, ARMREG_LR, -4);
+                       mini_gc_set_slot_type_from_cfa (cfg, -4, SLOT_NOREF);
                }
        } else {
                ARM_MOV_REG_REG (code, ARMREG_IP, ARMREG_SP);
@@ -4814,6 +5109,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                lmf_offset = pos;
        }
        alloc_size += pos;
+       orig_alloc_size = alloc_size;
        // align to MONO_ARCH_FRAME_ALIGNMENT bytes
        if (alloc_size & (MONO_ARCH_FRAME_ALIGNMENT - 1)) {
                alloc_size += MONO_ARCH_FRAME_ALIGNMENT - 1;
@@ -4840,6 +5136,9 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        //g_print ("prev_sp_offset: %d, alloc_size:%d\n", prev_sp_offset, alloc_size);
        prev_sp_offset += alloc_size;
 
+       for (i = 0; i < alloc_size - orig_alloc_size; i += 4)
+               mini_gc_set_slot_type_from_cfa (cfg, (- prev_sp_offset) + orig_alloc_size + i, SLOT_NOREF);
+
         /* compute max_offset in order to use short forward jumps
         * we could skip do it on arm because the immediate displacement
         * for jumps is large enough, it may be useful later for constant pools
@@ -4938,10 +5237,18 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        }
                                        break;
                                case 8:
-                                       g_assert (arm_is_imm12 (inst->inst_offset));
-                                       ARM_STR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
-                                       g_assert (arm_is_imm12 (inst->inst_offset + 4));
-                                       ARM_STR_IMM (code, ainfo->reg + 1, inst->inst_basereg, inst->inst_offset + 4);
+                                       if (arm_is_imm12 (inst->inst_offset)) {
+                                               ARM_STR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
+                                               ARM_STR_REG_REG (code, ainfo->reg, inst->inst_basereg, ARMREG_IP);
+                                       }
+                                       if (arm_is_imm12 (inst->inst_offset + 4)) {
+                                               ARM_STR_IMM (code, ainfo->reg + 1, inst->inst_basereg, inst->inst_offset + 4);
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset + 4);
+                                               ARM_STR_REG_REG (code, ainfo->reg + 1, inst->inst_basereg, ARMREG_IP);
+                                       }
                                        break;
                                default:
                                        if (arm_is_imm12 (inst->inst_offset)) {
@@ -5045,17 +5352,6 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                pos++;
        }
 
-       if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
-               if (cfg->compile_aot)
-                       /* AOT code is only used in the root domain */
-                       code = mono_arm_emit_load_imm (code, ARMREG_R0, 0);
-               else
-                       code = mono_arm_emit_load_imm (code, ARMREG_R0, (guint32)cfg->domain);
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
-                            (gpointer)"mono_jit_thread_attach");
-               code = emit_call_seq (cfg, code);
-       }
-
        if (method->save_lmf)
                code = emit_save_lmf (cfg, code, alloc_size - lmf_offset);
 
@@ -5084,7 +5380,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        }
 
        /* Initialize ss_trigger_page_var */
-       {
+       if (!cfg->soft_breakpoints) {
                MonoInst *info_var = cfg->arch.seq_point_info_var;
                MonoInst *ss_trigger_page_var = cfg->arch.ss_trigger_page_var;
                int dreg = ARMREG_LR;
@@ -5100,6 +5396,35 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                }
        }
 
+       if (cfg->arch.seq_point_read_var) {
+               MonoInst *read_ins = cfg->arch.seq_point_read_var;
+               MonoInst *ss_method_ins = cfg->arch.seq_point_ss_method_var;
+               MonoInst *bp_method_ins = cfg->arch.seq_point_bp_method_var;
+
+               g_assert (read_ins->opcode == OP_REGOFFSET);
+               g_assert (arm_is_imm12 (read_ins->inst_offset));
+               g_assert (ss_method_ins->opcode == OP_REGOFFSET);
+               g_assert (arm_is_imm12 (ss_method_ins->inst_offset));
+               g_assert (bp_method_ins->opcode == OP_REGOFFSET);
+               g_assert (arm_is_imm12 (bp_method_ins->inst_offset));
+
+               ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
+               ARM_B (code, 2);
+               *(volatile int **)code = &ss_trigger_var;
+               code += 4;
+               *(gpointer*)code = single_step_func_wrapper;
+               code += 4;
+               *(gpointer*)code = breakpoint_func_wrapper;
+               code += 4;
+
+               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_LR, 0);
+               ARM_STR_IMM (code, ARMREG_IP, read_ins->inst_basereg, read_ins->inst_offset);
+               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_LR, 4);
+               ARM_STR_IMM (code, ARMREG_IP, ss_method_ins->inst_basereg, ss_method_ins->inst_offset);
+               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_LR, 8);
+               ARM_STR_IMM (code, ARMREG_IP, bp_method_ins->inst_basereg, bp_method_ins->inst_offset);
+       }
+
        cfg->code_len = code - cfg->native_code;
        g_assert (cfg->code_len < cfg->code_size);
        g_free (cinfo);
@@ -5128,7 +5453,7 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
                cfg->code_size *= 2;
                cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
-               mono_jit_stats.code_reallocs++;
+               cfg->stat_code_reallocs++;
        }
 
        /*
@@ -5258,7 +5583,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
        while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
                cfg->code_size *= 2;
                cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
-               mono_jit_stats.code_reallocs++;
+               cfg->stat_code_reallocs++;
        }
 
        code = cfg->native_code + cfg->code_len;
@@ -5307,17 +5632,11 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
 
 #endif /* #ifndef DISABLE_JIT */
 
-static gboolean tls_offset_inited = FALSE;
-
 void
-mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
+mono_arch_finish_init (void)
 {
-       if (!tls_offset_inited) {
-               tls_offset_inited = TRUE;
-
-               lmf_tls_offset = mono_get_lmf_tls_offset ();
-               lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
-       }
+       lmf_tls_offset = mono_get_lmf_tls_offset ();
+       lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
 }
 
 void
@@ -5672,13 +5991,16 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
 
 #endif
 
-gpointer
+mgreg_t
 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
 {
-       if (reg == ARMREG_SP)
-               return (gpointer)ctx->esp;
-       else
-               return (gpointer)ctx->regs [reg];
+       return ctx->regs [reg];
+}
+
+void
+mono_arch_context_set_int_reg (MonoContext *ctx, int reg, mgreg_t val)
+{
+       ctx->regs [reg] = val;
 }
 
 /*
@@ -5704,8 +6026,14 @@ mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
 {
        guint8 *code = ip;
        guint32 native_offset = ip - (guint8*)ji->code_start;
+       MonoDebugOptions *opt = mini_get_debug_options ();
 
-       if (ji->from_aot) {
+       if (opt->soft_breakpoints) {
+               g_assert (!ji->from_aot);
+               code += 4;
+               ARM_BLX_REG (code, ARMREG_LR);
+               mono_arch_flush_icache (code - 4, 4);
+       } else if (ji->from_aot) {
                SeqPointInfo *info = mono_arch_get_seq_point_info (mono_domain_get (), ji->code_start);
 
                g_assert (native_offset % 4 == 0);
@@ -5742,10 +6070,16 @@ mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
 void
 mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
 {
+       MonoDebugOptions *opt = mini_get_debug_options ();
        guint8 *code = ip;
        int i;
 
-       if (ji->from_aot) {
+       if (opt->soft_breakpoints) {
+               g_assert (!ji->from_aot);
+               code += 4;
+               ARM_NOP (code);
+               mono_arch_flush_icache (code - 4, 4);
+       } else if (ji->from_aot) {
                guint32 native_offset = ip - (guint8*)ji->code_start;
                SeqPointInfo *info = mono_arch_get_seq_point_info (mono_domain_get (), ji->code_start);
 
@@ -5768,7 +6102,10 @@ mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
 void
 mono_arch_start_single_stepping (void)
 {
-       mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
+       if (ss_trigger_page)
+               mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
+       else
+               ss_trigger_var = 1;
 }
        
 /*
@@ -5779,7 +6116,10 @@ mono_arch_start_single_stepping (void)
 void
 mono_arch_stop_single_stepping (void)
 {
-       mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
+       if (ss_trigger_page)
+               mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
+       else
+               ss_trigger_var = 0;
 }
 
 #if __APPLE__
@@ -5799,6 +6139,9 @@ mono_arch_is_single_step_event (void *info, void *sigctx)
 {
        siginfo_t *sinfo = info;
 
+       if (!ss_trigger_page)
+               return FALSE;
+
        /* Sometimes the address is off by 4 */
        if (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128)
                return TRUE;
@@ -5816,6 +6159,9 @@ mono_arch_is_breakpoint_event (void *info, void *sigctx)
 {
        siginfo_t *sinfo = info;
 
+       if (!ss_trigger_page)
+               return FALSE;
+
        if (sinfo->si_signo == DBG_SIGNAL) {
                /* Sometimes the address is off by 4 */
                if (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128)
@@ -5827,36 +6173,13 @@ mono_arch_is_breakpoint_event (void *info, void *sigctx)
        }
 }
 
-guint8*
-mono_arch_get_ip_for_breakpoint (MonoJitInfo *ji, MonoContext *ctx)
-{
-       guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
-
-       if (ji->from_aot)
-               ip -= 6 * 4;
-       else
-               ip -= 12;
-
-       return ip;
-}
-
-guint8*
-mono_arch_get_ip_for_single_step (MonoJitInfo *ji, MonoContext *ctx)
-{
-       guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
-
-       ip += 4;
-
-       return ip;
-}
-
 /*
  * mono_arch_skip_breakpoint:
  *
  *   See mini-amd64.c for docs.
  */
 void
-mono_arch_skip_breakpoint (MonoContext *ctx)
+mono_arch_skip_breakpoint (MonoContext *ctx, MonoJitInfo *ji)
 {
        MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 4);
 }
@@ -5934,4 +6257,3 @@ mono_arch_set_target (char *mtriple)
        if (strstr (mtriple, "gnueabi"))
                eabi_supported = TRUE;
 }
-