2009-06-10 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-amd64.c
index 3c51073fa843c76bfa8c67a9431d2c14963d9da9..b083dbf2f0632bb4a7e5c325ae95c227a8ddcca0 100644 (file)
@@ -389,15 +389,16 @@ merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
 
 static void
 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
-              gboolean is_return,
-              guint32 *gr, guint32 *fr, guint32 *stack_size)
+                          gboolean is_return,
+                          guint32 *gr, guint32 *fr, guint32 *stack_size)
 {
        guint32 size, quad, nquads, i;
        ArgumentClass args [2];
        MonoMarshalType *info = NULL;
        MonoClass *klass;
        MonoGenericSharingContext tmp_gsctx;
-
+       gboolean pass_on_stack = FALSE;
+       
        /* 
         * The gsctx currently contains no data, it is only used for checking whenever
         * open types are allowed, some callers like mono_arch_get_argument_info ()
@@ -412,9 +413,15 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn
        if (!sig->pinvoke && !disable_vtypes_in_regs && ((is_return && (size == 8)) || (!is_return && (size <= 16)))) {
                /* We pass and return vtypes of size 8 in a register */
        } else if (!sig->pinvoke || (size == 0) || (size > 16)) {
+               pass_on_stack = TRUE;
+       }
 #else
        if (!sig->pinvoke) {
+               pass_on_stack = TRUE;
+       }
 #endif
+
+       if (pass_on_stack) {
                /* Allways pass in memory */
                ainfo->offset = *stack_size;
                *stack_size += ALIGN_TO (size, 8);
@@ -1241,7 +1248,18 @@ mono_arch_allocate_vars (MonoCompile *cfg)
 
        /*
         * We use the ABI calling conventions for managed code as well.
-        * Exception: valuetypes are never passed or returned in registers.
+        * Exception: valuetypes are only sometimes passed or returned in registers.
+        */
+
+       /*
+        * The stack looks like this:
+        * <incoming arguments passed on the stack>
+        * <return value>
+        * <lmf/caller saved registers>
+        * <locals>
+        * <spill area>
+        * <localloc area>  -> grows dynamically
+        * <params area>
         */
 
        if (cfg->arch.omit_fp) {
@@ -1256,8 +1274,6 @@ mono_arch_allocate_vars (MonoCompile *cfg)
 
        if (cfg->method->save_lmf) {
                /* Reserve stack space for saving LMF */
-               /* mono_arch_find_jit_info () expects to find the LMF at a fixed offset */
-               g_assert (offset == 0);
                if (cfg->arch.omit_fp) {
                        cfg->arch.lmf_offset = offset;
                        offset += sizeof (MonoLMF);
@@ -1333,6 +1349,14 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                        offset += (locals_stack_align - 1);
                        offset &= ~(locals_stack_align - 1);
                }
+               if (cfg->arch.omit_fp) {
+                       cfg->locals_min_stack_offset = offset;
+                       cfg->locals_max_stack_offset = offset + locals_stack_size;
+               } else {
+                       cfg->locals_min_stack_offset = - (offset + locals_stack_size);
+                       cfg->locals_max_stack_offset = - offset;
+               }
+               
                for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
                        if (offsets [i] != -1) {
                                MonoInst *ins = cfg->varinfo [i];
@@ -1499,6 +1523,19 @@ mono_arch_create_vars (MonoCompile *cfg)
                        mono_print_ins (cfg->vret_addr);
                }
        }
+
+#ifdef MONO_AMD64_NO_PUSHES
+       /*
+        * When this is set, we pass arguments on the stack by moves, and by allocating 
+        * a bigger stack frame, instead of pushes.
+        * Pushes complicate exception handling because the arguments on the stack have
+        * to be popped each time a frame is unwound. They also make fp elimination
+        * impossible.
+        * FIXME: This doesn't work inside filter/finally clauses, since those execute
+        * on a new frame which doesn't include a param area.
+        */
+       cfg->arch.no_pushes = TRUE;
+#endif
 }
 
 static void
@@ -1584,10 +1621,116 @@ emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
        sig_arg->inst_p0 = tmp_sig;
        MONO_ADD_INS (cfg->cbb, sig_arg);
 
-       MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
-       arg->sreg1 = sig_arg->dreg;
-       MONO_ADD_INS (cfg->cbb, arg);
+       if (cfg->arch.no_pushes) {
+               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, cinfo->sig_cookie.offset, sig_arg->dreg);
+       } else {
+               MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
+               arg->sreg1 = sig_arg->dreg;
+               MONO_ADD_INS (cfg->cbb, arg);
+       }
+}
+
+static inline LLVMArgStorage
+arg_storage_to_llvm_arg_storage (MonoCompile *cfg, ArgStorage storage)
+{
+       switch (storage) {
+       case ArgInIReg:
+               return LLVMArgInIReg;
+       case ArgNone:
+               return LLVMArgNone;
+       default:
+               g_assert_not_reached ();
+               return LLVMArgNone;
+       }
+}
+
+#ifdef ENABLE_LLVM
+LLVMCallInfo*
+mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
+{
+       int i, n;
+       CallInfo *cinfo;
+       ArgInfo *ainfo;
+       int j;
+       LLVMCallInfo *linfo;
+
+       n = sig->param_count + sig->hasthis;
+
+       cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
+
+       linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
+
+       /*
+        * LLVM always uses the native ABI while we use our own ABI, the
+        * only difference is the handling of vtypes:
+        * - we only pass/receive them in registers in some cases, and only 
+        *   in 1 or 2 integer registers.
+        */
+       if (cinfo->ret.storage == ArgValuetypeInReg) {
+               if (sig->pinvoke) {
+                       cfg->exception_message = g_strdup ("pinvoke + vtypes");
+                       cfg->disable_llvm = TRUE;
+                       return linfo;
+               }
+
+               linfo->ret.storage = LLVMArgVtypeInReg;
+               for (j = 0; j < 2; ++j)
+                       linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
+       }
+
+       if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage == ArgInIReg) {
+               /* Vtype returned using a hidden argument */
+               linfo->ret.storage = LLVMArgVtypeRetAddr;
+       }
+
+       for (i = 0; i < n; ++i) {
+               ainfo = cinfo->args + i;
+
+               linfo->args [i].storage = LLVMArgNone;
+
+               switch (ainfo->storage) {
+               case ArgInIReg:
+                       linfo->args [i].storage = LLVMArgInIReg;
+                       break;
+               case ArgInDoubleSSEReg:
+               case ArgInFloatSSEReg:
+                       linfo->args [i].storage = LLVMArgInFPReg;
+                       break;
+               case ArgOnStack:
+                       if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
+                               linfo->args [i].storage = LLVMArgVtypeByVal;
+                       } else {
+                               linfo->args [i].storage = LLVMArgInIReg;
+                               if (!sig->params [i - sig->hasthis]->byref) {
+                                       if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4) {
+                                               linfo->args [i].storage = LLVMArgInFPReg;
+                                       } else if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8) {
+                                               linfo->args [i].storage = LLVMArgInFPReg;
+                                       }
+                               }
+                       }
+                       break;
+               case ArgValuetypeInReg:
+                       if (sig->pinvoke) {
+                               cfg->exception_message = g_strdup ("pinvoke + vtypes");
+                               cfg->disable_llvm = TRUE;
+                               return linfo;
+                       }
+
+                       linfo->args [i].storage = LLVMArgVtypeInReg;
+                       for (j = 0; j < 2; ++j)
+                               linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
+                       break;
+               default:
+                       cfg->exception_message = g_strdup ("ainfo->storage");
+                       cfg->disable_llvm = TRUE;
+                       break;
+               }
+       }
+
+       return linfo;
 }
+#endif
 
 void
 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
@@ -1605,53 +1748,46 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 
        cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
 
-       if (cfg->compile_llvm) {
-               for (i = 0; i < n; ++i) {
-                       MonoInst *ins;
+       if (COMPILE_LLVM (cfg)) {
+               /* We shouldn't be called in the llvm case */
+               cfg->disable_llvm = TRUE;
+               return;
+       }
 
+       if (cinfo->need_stack_align) {
+               if (!cfg->arch.no_pushes)
+                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
+       }
+
+       /* 
+        * Emit all arguments which are passed on the stack to prevent register
+        * allocation problems.
+        */
+       if (cfg->arch.no_pushes) {
+               for (i = 0; i < n; ++i) {
+                       MonoType *t;
                        ainfo = cinfo->args + i;
 
                        in = call->args [i];
 
-                       /* Simply remember the arguments */
-                       switch (ainfo->storage) {
-                       case ArgInIReg:
-                               MONO_INST_NEW (cfg, ins, OP_MOVE);
-                               ins->dreg = mono_alloc_ireg (cfg);
-                               ins->sreg1 = in->dreg;
-                               break;
-                       case ArgInDoubleSSEReg:
-                       case ArgInFloatSSEReg:
-                               MONO_INST_NEW (cfg, ins, OP_FMOVE);
-                               ins->dreg = mono_alloc_freg (cfg);
-                               ins->sreg1 = in->dreg;
-                               break;
-                       case ArgOnStack:
-                               if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
-                                       cfg->exception_message = g_strdup ("vtype argument");
-                                       cfg->disable_llvm = TRUE;
+                       if (sig->hasthis && i == 0)
+                               t = &mono_defaults.object_class->byval_arg;
+                       else
+                               t = sig->params [i - sig->hasthis];
+
+                       if (ainfo->storage == ArgOnStack && !MONO_TYPE_ISSTRUCT (t) && !call->tail_call) {
+                               if (!t->byref) {
+                                       if (t->type == MONO_TYPE_R4)
+                                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
+                                       else if (t->type == MONO_TYPE_R8)
+                                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
+                                       else
+                                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
                                } else {
-                                       MONO_INST_NEW (cfg, ins, OP_MOVE);
-                                       ins->dreg = mono_alloc_ireg (cfg);
-                                       ins->sreg1 = in->dreg;
+                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
                                }
-                               break;
-                       default:
-                               cfg->exception_message = g_strdup ("ainfo->storage");
-                               cfg->disable_llvm = TRUE;
-                               return;
-                       }
-
-                       if (!cfg->disable_llvm) {
-                               MONO_ADD_INS (cfg->cbb, ins);
-                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, 0, FALSE);
                        }
                }
-               return;
-       }
-
-       if (cinfo->need_stack_align) {
-               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
        }
 
        /*
@@ -1722,22 +1858,26 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
                                        MONO_ADD_INS (cfg->cbb, arg);
                                }
                        } else {
-                               MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
-                               arg->sreg1 = in->dreg;
-                               if (!sig->params [i - sig->hasthis]->byref) {
-                                       if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4) {
-                                               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
-                                               arg->opcode = OP_STORER4_MEMBASE_REG;
-                                               arg->inst_destbasereg = X86_ESP;
-                                               arg->inst_offset = 0;
-                                       } else if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8) {
-                                               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
-                                               arg->opcode = OP_STORER8_MEMBASE_REG;
-                                               arg->inst_destbasereg = X86_ESP;
-                                               arg->inst_offset = 0;
+                               if (cfg->arch.no_pushes) {
+                                       /* Already done */
+                               } else {
+                                       MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
+                                       arg->sreg1 = in->dreg;
+                                       if (!sig->params [i - sig->hasthis]->byref) {
+                                               if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4) {
+                                                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
+                                                       arg->opcode = OP_STORER4_MEMBASE_REG;
+                                                       arg->inst_destbasereg = X86_ESP;
+                                                       arg->inst_offset = 0;
+                                               } else if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8) {
+                                                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
+                                                       arg->opcode = OP_STORER8_MEMBASE_REG;
+                                                       arg->inst_destbasereg = X86_ESP;
+                                                       arg->inst_offset = 0;
+                                               }
                                        }
+                                       MONO_ADD_INS (cfg->cbb, arg);
                                }
-                               MONO_ADD_INS (cfg->cbb, arg);
                        }
                        break;
                default:
@@ -1850,6 +1990,8 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
                MonoInst *vtaddr, *load;
                vtaddr = mono_compile_create_var (cfg, &ins->klass->byval_arg, OP_LOCAL);
                
+               g_assert (!cfg->arch.no_pushes);
+
                MONO_INST_NEW (cfg, load, OP_LDADDR);
                load->inst_p0 = vtaddr;
                vtaddr->flags |= MONO_INST_INDIRECT;
@@ -1873,20 +2015,36 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
                }
        } else {
                if (size == 8) {
-                       /* Can't use this for < 8 since it does an 8 byte memory load */
-                       MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
-                       arg->inst_basereg = src->dreg;
-                       arg->inst_offset = 0;
-                       MONO_ADD_INS (cfg->cbb, arg);
+                       if (cfg->arch.no_pushes) {
+                               int dreg = mono_alloc_ireg (cfg);
+
+                               MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, 0);
+                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, dreg);
+                       } else {
+                               /* Can't use this for < 8 since it does an 8 byte memory load */
+                               MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
+                               arg->inst_basereg = src->dreg;
+                               arg->inst_offset = 0;
+                               MONO_ADD_INS (cfg->cbb, arg);
+                       }
                } else if (size <= 40) {
-                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 8));
-                       mini_emit_memcpy (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
+                       if (cfg->arch.no_pushes) {
+                               mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, 4);
+                       } else {
+                               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 8));
+                               mini_emit_memcpy (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
+                       }
                } else {
-                       MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
-                       arg->inst_basereg = src->dreg;
-                       arg->inst_offset = 0;
-                       arg->inst_imm = size;
-                       MONO_ADD_INS (cfg->cbb, arg);
+                       if (cfg->arch.no_pushes) {
+                               // FIXME: Code growth
+                               mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, 4);
+                       } else {
+                               MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
+                               arg->inst_basereg = src->dreg;
+                               arg->inst_offset = 0;
+                               arg->inst_imm = size;
+                               MONO_ADD_INS (cfg->cbb, arg);
+                       }
                }
        }
 }
@@ -1898,7 +2056,10 @@ mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
 
        if (!ret->byref) {
                if (ret->type == MONO_TYPE_R4) {
-                       MONO_EMIT_NEW_UNALU (cfg, OP_AMD64_SET_XMMREG_R4, cfg->ret->dreg, val->dreg);
+                       if (COMPILE_LLVM (cfg))
+                               MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
+                       else
+                               MONO_EMIT_NEW_UNALU (cfg, OP_AMD64_SET_XMMREG_R4, cfg->ret->dreg, val->dreg);
                        return;
                } else if (ret->type == MONO_TYPE_R8) {
                        MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
@@ -1910,28 +2071,15 @@ mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
 }
 
 #define EMIT_COND_BRANCH(ins,cond,sign) \
-if (ins->flags & MONO_INST_BRLABEL) { \
-        if (ins->inst_i0->inst_c0) { \
-               x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
-        } else { \
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
-               if ((cfg->opt & MONO_OPT_BRANCH) && \
-                    x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
-                       x86_branch8 (code, cond, 0, sign); \
-                else \
-                       x86_branch32 (code, cond, 0, sign); \
-        } \
-} else { \
         if (ins->inst_true_bb->native_offset) { \
                x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
         } else { \
                mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
                if ((cfg->opt & MONO_OPT_BRANCH) && \
-                    x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
+            x86_is_imm8 (ins->inst_true_bb->max_offset - offset)) \
                        x86_branch8 (code, cond, 0, sign); \
                 else \
                        x86_branch32 (code, cond, 0, sign); \
-        } \
 }
 
 /* emit an exception if condition is fail */
@@ -2376,7 +2524,7 @@ emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size,
 }
 
 static unsigned char*
-mono_emit_stack_alloc (guchar *code, MonoInst* tree)
+mono_emit_stack_alloc (MonoCompile *cfg, guchar *code, MonoInst* tree)
 {
        int sreg = tree->sreg1;
        int need_touch = FALSE;
@@ -2448,6 +2596,8 @@ mono_emit_stack_alloc (guchar *code, MonoInst* tree)
                amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
                                
                amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
+               if (cfg->param_area && cfg->arch.no_pushes)
+                       amd64_alu_reg_imm (code, X86_ADD, AMD64_RDI, cfg->param_area);
                amd64_cld (code);
                amd64_prefix (code, X86_REP_PREFIX);
                amd64_stosl (code);
@@ -2590,7 +2740,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        guint8 *code = cfg->native_code + cfg->code_len;
        MonoInst *last_ins = NULL;
        guint last_offset = 0;
-       int max_len, cpos;
+       int max_len;
+
+       /* Fix max_offset estimate for each successor bb */
+       if (cfg->opt & MONO_OPT_BRANCH) {
+               int current_offset = cfg->code_len;
+               MonoBasicBlock *current_bb;
+               for (current_bb = bb; current_bb != NULL; current_bb = current_bb->next_bb) {
+                       current_bb->max_offset = current_offset;
+                       current_offset += current_bb->max_length;
+               }
+       }
 
        if (cfg->opt & MONO_OPT_LOOP) {
                int pad, align = LOOP_ALIGNMENT;
@@ -2607,12 +2767,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        if (cfg->verbose_level > 2)
                g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
 
-       cpos = bb->max_offset;
-
        if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
                MonoProfileCoverageInfo *cov = cfg->coverage_info;
                g_assert (!cfg->compile_aot);
-               cpos += 6;
 
                cov->data [bb->dfn].cil_code = bb->cil_code;
                amd64_mov_reg_imm (code, AMD64_R11, (guint64)&cov->data [bb->dfn].count);
@@ -3390,7 +3547,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        /* FIXME: no tracing support... */
                        if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
-                               code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
+                               code = mono_arch_instrument_epilog_full (cfg, mono_profiler_method_leave, code, FALSE, FALSE);
 
                        g_assert (!cfg->method->save_lmf);
 
@@ -3472,7 +3629,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method, FALSE);
                        else
                                code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr, FALSE);
-                       if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
+                       if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention) && !cfg->arch.no_pushes)
                                amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
                        code = emit_move_return_value (cfg, ins, code);
                        break;
@@ -3520,7 +3677,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
 
                        amd64_call_reg (code, ins->sreg1);
-                       if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
+                       if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention) && !cfg->arch.no_pushes)
                                amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
                        code = emit_move_return_value (cfg, ins, code);
                        break;
@@ -3549,7 +3706,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        amd64_nop (code);
 
                        amd64_call_membase (code, ins->sreg1, ins->inst_offset);
-                       if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
+                       if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention) && !cfg->arch.no_pushes)
                                amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
                        code = emit_move_return_value (cfg, ins, code);
                        break;
@@ -3557,17 +3714,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        amd64_mov_membase_reg (code, cfg->frame_reg, cfg->arch.lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsp), AMD64_RSP, 8);
                        break;
                case OP_X86_PUSH:
+                       g_assert (!cfg->arch.no_pushes);
                        amd64_push_reg (code, ins->sreg1);
                        break;
                case OP_X86_PUSH_IMM:
+                       g_assert (!cfg->arch.no_pushes);
                        g_assert (amd64_is_imm32 (ins->inst_imm));
                        amd64_push_imm (code, ins->inst_imm);
                        break;
                case OP_X86_PUSH_MEMBASE:
+                       g_assert (!cfg->arch.no_pushes);
                        amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
                        break;
                case OP_X86_PUSH_OBJ: {
                        int size = ALIGN_TO (ins->inst_imm, 8);
+
+                       g_assert (!cfg->arch.no_pushes);
+
                        amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
                        amd64_push_reg (code, AMD64_RDI);
                        amd64_push_reg (code, AMD64_RSI);
@@ -3599,8 +3762,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        /* keep alignment */
                        amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
                        amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
-                       code = mono_emit_stack_alloc (code, ins);
+                       code = mono_emit_stack_alloc (cfg, code, ins);
                        amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
+                       if (cfg->param_area && cfg->arch.no_pushes)
+                               amd64_alu_reg_imm (code, X86_ADD, ins->dreg, cfg->param_area);
                        break;
                case OP_LOCALLOC_IMM: {
                        guint32 size = ins->inst_imm;
@@ -3620,13 +3785,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                        amd64_mov_reg_imm (code, ins->dreg, size);
                                        ins->sreg1 = ins->dreg;
 
-                                       code = mono_emit_stack_alloc (code, ins);
+                                       code = mono_emit_stack_alloc (cfg, code, ins);
                                        amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
                                }
                        } else {
                                amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
                                amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
                        }
+                       if (cfg->param_area && cfg->arch.no_pushes)
+                               amd64_alu_reg_imm (code, X86_ADD, ins->dreg, cfg->param_area);
                        break;
                }
                case OP_THROW: {
@@ -3652,6 +3819,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_START_HANDLER: {
                        MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
                        amd64_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, AMD64_RSP, 8);
+
+                       if ((MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_FINALLY) ||
+                                MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_FINALLY)) &&
+                               cfg->param_area && cfg->arch.no_pushes) {
+                               amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ALIGN_TO (cfg->param_area, MONO_ARCH_FRAME_ALIGNMENT));
+                       }
                        break;
                }
                case OP_ENDFINALLY: {
@@ -3675,28 +3848,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
                        //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
                        //break;
-                       if (ins->flags & MONO_INST_BRLABEL) {
-                               if (ins->inst_i0->inst_c0) {
-                                       amd64_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
-                               } else {
-                                       mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
-                                       if ((cfg->opt & MONO_OPT_BRANCH) &&
-                                           x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
-                                               x86_jump8 (code, 0);
-                                       else 
-                                               x86_jump32 (code, 0);
-                               }
-                       } else {
                                if (ins->inst_target_bb->native_offset) {
                                        amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
                                } else {
                                        mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
                                        if ((cfg->opt & MONO_OPT_BRANCH) &&
-                                           x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
+                                           x86_is_imm8 (ins->inst_target_bb->max_offset - offset))
                                                x86_jump8 (code, 0);
                                        else 
                                                x86_jump32 (code, 0);
-                               } 
                        }
                        break;
                case OP_BR_REG:
@@ -4314,8 +4474,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert_not_reached ();
                }
               
-               cpos += max_len;
-
                last_ins = ins;
                last_offset = offset;
        }
@@ -4435,7 +4593,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        MonoBasicBlock *bb;
        MonoMethodSignature *sig;
        MonoInst *ins;
-       int alloc_size, pos, max_offset, i, cfa_offset, quad, max_epilog_size;
+       int alloc_size, pos, i, cfa_offset, quad, max_epilog_size;
        guint8 *code;
        CallInfo *cinfo;
        gint32 lmf_offset = cfg->arch.lmf_offset;
@@ -4509,6 +4667,15 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        }
        }
 
+       /* The param area is always at offset 0 from sp */
+       /* This needs to be allocated here, since it has to come after the spill area */
+       if (cfg->arch.no_pushes && cfg->param_area) {
+               if (cfg->arch.omit_fp)
+                       // FIXME:
+                       g_assert_not_reached ();
+               cfg->stack_offset += ALIGN_TO (cfg->param_area, sizeof (gpointer));
+       }
+
        if (cfg->arch.omit_fp) {
                /* 
                 * On enter, the stack is misaligned by the the pushing of the return
@@ -4638,30 +4805,28 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                amd64_mov_membase_reg (code, cfg->rgctx_var->inst_basereg, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 8);
        }
 
-       /* compute max_offset in order to use short forward jumps */
-       max_offset = 0;
+       /* compute max_length in order to use short forward jumps */
        max_epilog_size = get_max_epilog_size (cfg);
        if (cfg->opt & MONO_OPT_BRANCH) {
                for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
                        MonoInst *ins;
-                       bb->max_offset = max_offset;
+                       int max_length = 0;
 
                        if (cfg->prof_options & MONO_PROFILE_COVERAGE)
-                               max_offset += 6;
+                               max_length += 6;
                        /* max alignment for loops */
                        if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
-                               max_offset += LOOP_ALIGNMENT;
+                               max_length += LOOP_ALIGNMENT;
 
                        MONO_BB_FOR_EACH_INS (bb, ins) {
-                               if (ins->opcode == OP_LABEL)
-                                       ins->inst_c1 = max_offset;
-                               
-                               max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
+                               max_length += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
                        }
 
-                       if (mono_jit_trace_calls && bb == cfg->bb_exit)
-                               /* The tracing code can be quite large */
-                               max_offset += max_epilog_size;
+                       /* Take prolog and epilog instrumentation into account */
+                       if (bb == cfg->bb_entry || bb == cfg->bb_exit)
+                               max_length += max_epilog_size;
+                       
+                       bb->max_length = max_length;
                }
        }
 
@@ -5329,7 +5494,7 @@ enum {
 };
 
 void*
-mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
+mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
 {
        guchar *code = p;
        int save_mode = SAVE_NONE;
@@ -5402,10 +5567,20 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
        else
                amd64_mov_reg_imm (code, AMD64_RAX, 0);
 
+       if (preserve_argument_registers) {
+               amd64_push_reg (code, MONO_AMD64_ARG_REG1);
+               amd64_push_reg (code, MONO_AMD64_ARG_REG2);
+       }
+
        mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
        amd64_set_reg_template (code, AMD64_ARG_REG1);
        code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE);
 
+       if (preserve_argument_registers) {
+               amd64_pop_reg (code, MONO_AMD64_ARG_REG2);
+               amd64_pop_reg (code, MONO_AMD64_ARG_REG1);
+       }
+
        /* Restore result */
        switch (save_mode) {
        case SAVE_EAX:
@@ -5600,11 +5775,14 @@ mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
                /* call OFFSET(%rip) */
                disp = *(guint32*)(code + 3);
                return (gpointer*)(code + disp + 7);
-       } else if ((code [0] == 0xff) && (amd64_modrm_reg (code [1]) == 0x2) && (amd64_modrm_mod (code [1]) == 0x2) && (amd64_modrm_reg (code [2]) == X86_ESP) && (amd64_modrm_mod (code [2]) == 0) && (amd64_modrm_rm (code [2]) == X86_ESP)) {
-               /* call *[r12+disp32] */
-               if (IS_REX (code [-1]))
+       } else if ((code [0] == 0xff) && (amd64_modrm_reg (code [1]) == 0x2) && (amd64_modrm_mod (code [1]) == 0x2) && (amd64_sib_index (code [2]) == 4) && (amd64_sib_scale (code [2]) == 0)) {
+               /* call *[reg+disp32] using indexed addressing */
+               /* The LLVM JIT emits this, and we emit it too for %r12 */
+               if (IS_REX (code [-1])) {
                        rex = code [-1];
-               reg = AMD64_RSP;
+                       g_assert (amd64_rex_x (rex) == 0);
+               }                       
+               reg = amd64_sib_base (code [2]);
                disp = *(gint32*)(code + 3);
        } else if ((code [1] == 0xff) && (amd64_modrm_reg (code [2]) == 0x2) && (amd64_modrm_mod (code [2]) == 0x2)) {
                /* call *[reg+disp32] */
@@ -5617,11 +5795,11 @@ mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
        } else if (code [2] == 0xe8) {
                /* call <ADDR> */
                return NULL;
-       } else if ((code [3] == 0xff) && (amd64_modrm_reg (code [4]) == 0x2) && (amd64_modrm_mod (code [4]) == 0x1) && (amd64_modrm_reg (code [5]) == X86_ESP) && (amd64_modrm_mod (code [5]) == 0) && (amd64_modrm_rm (code [5]) == X86_ESP)) {
-               /* call *[r12+disp32] */
+       } else if ((code [3] == 0xff) && (amd64_modrm_reg (code [4]) == 0x2) && (amd64_modrm_mod (code [4]) == 0x1) && (amd64_sib_index (code [5]) == 4) && (amd64_sib_scale (code [5]) == 0)) {
+               /* call *[r12+disp8] using indexed addressing */
                if (IS_REX (code [2]))
                        rex = code [2];
-               reg = AMD64_RSP;
+               reg = amd64_sib_base (code [5]);
                disp = *(gint8*)(code + 6);
        } else if (IS_REX (code [4]) && (code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x3)) {
                /* call *%reg */
@@ -5653,17 +5831,6 @@ mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
        return regs [reg];
 }
 
-gpointer*
-mono_arch_get_vcall_slot_addr (guint8* code, gpointer *regs)
-{
-       gpointer vt;
-       int displacement;
-       vt = mono_arch_get_vcall_slot (code, regs, &displacement);
-       if (!vt)
-               return NULL;
-       return (gpointer*)((char*)vt + displacement);
-}
-
 int
 mono_arch_get_this_arg_reg (MonoMethodSignature *sig, MonoGenericSharingContext *gsctx, guint8 *code)
 {
@@ -5693,6 +5860,78 @@ mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSi
 
 #define MAX_ARCH_DELEGATE_PARAMS 10
 
+static gpointer
+get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *code_len)
+{
+       guint8 *code, *start;
+       int i;
+
+       if (has_target) {
+               start = code = mono_global_codeman_reserve (64);
+
+               /* Replace the this argument with the target */
+               amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
+               amd64_mov_reg_membase (code, AMD64_ARG_REG1, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, target), 8);
+               amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+
+               g_assert ((code - start) < 64);
+       } else {
+               start = code = mono_global_codeman_reserve (64);
+
+               if (param_count == 0) {
+                       amd64_jump_membase (code, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+               } else {
+                       /* We have to shift the arguments left */
+                       amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
+                       for (i = 0; i < param_count; ++i) {
+#ifdef PLATFORM_WIN32
+                               if (i < 3)
+                                       amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
+                               else
+                                       amd64_mov_reg_membase (code, param_regs [i], AMD64_RSP, 0x28, 8);
+#else
+                               amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
+#endif
+                       }
+
+                       amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+               }
+               g_assert ((code - start) < 64);
+       }
+
+       mono_debug_add_delegate_trampoline (start, code - start);
+
+       if (code_len)
+               *code_len = code - start;
+
+       return start;
+}
+
+/*
+ * mono_arch_get_delegate_invoke_impls:
+ *
+ *   Return a list of MonoAotTrampInfo structures for the delegate invoke impl
+ * trampolines.
+ */
+GSList*
+mono_arch_get_delegate_invoke_impls (void)
+{
+       GSList *res = NULL;
+       guint8 *code;
+       guint32 code_len;
+       int i;
+
+       code = get_delegate_invoke_impl (TRUE, 0, &code_len);
+       res = g_slist_prepend (res, mono_aot_tramp_info_create (g_strdup ("delegate_invoke_impl_has_target"), code, code_len));
+
+       for (i = 0; i < MAX_ARCH_DELEGATE_PARAMS; ++i) {
+               code = get_delegate_invoke_impl (FALSE, i, &code_len);
+               res = g_slist_prepend (res, mono_aot_tramp_info_create (g_strdup_printf ("delegate_invoke_impl_target_%d", i), code, code_len));
+       }
+
+       return res;
+}
+
 gpointer
 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
 {
@@ -5712,16 +5951,10 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
                if (cached)
                        return cached;
 
-               start = code = mono_global_codeman_reserve (64);
-
-               /* Replace the this argument with the target */
-               amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
-               amd64_mov_reg_membase (code, AMD64_ARG_REG1, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, target), 8);
-               amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
-
-               g_assert ((code - start) < 64);
-
-               mono_debug_add_delegate_trampoline (start, code - start);
+               if (mono_aot_only)
+                       start = mono_aot_get_named_code ("delegate_invoke_impl_has_target");
+               else
+                       start = get_delegate_invoke_impl (TRUE, 0, NULL);
 
                mono_memory_barrier ();
 
@@ -5738,29 +5971,13 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
                if (code)
                        return code;
 
-               start = code = mono_global_codeman_reserve (64);
-
-               if (sig->param_count == 0) {
-                       amd64_jump_membase (code, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+               if (mono_aot_only) {
+                       char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", sig->param_count);
+                       start = mono_aot_get_named_code (name);
+                       g_free (name);
                } else {
-                       /* We have to shift the arguments left */
-                       amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
-                       for (i = 0; i < sig->param_count; ++i) {
-#ifdef PLATFORM_WIN32
-                               if (i < 3)
-                                       amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
-                               else
-                                       amd64_mov_reg_membase (code, param_regs [i], AMD64_RSP, 0x28, 8);
-#else
-                               amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
-#endif
-                       }
-
-                       amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+                       start = get_delegate_invoke_impl (FALSE, sig->param_count, NULL);
                }
-               g_assert ((code - start) < 64);
-
-               mono_debug_add_delegate_trampoline (start, code - start);
 
                mono_memory_barrier ();