2009-03-11 Zoltan Varga <vargaz@gmail.com>

[mono.git] / mono / mini / mini-amd64.c
diff --git a/mono/mini/mini-amd64.c b/mono/mini/mini-amd64.c

index ad4e5836688cec05993c0694ea66ac9ff608d3d1..b33714554e12e8b7f4837f7a2ac08bc97728e433 100644 (file)
--- a/mono/mini/mini-amd64.c
+++ b/mono/mini/mini-amd64.c
@@ -326,7 +326,7 @@ merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
         ArgumentClass class2 = ARG_CLASS_NO_CLASS;
         MonoType *ptype;
  
-       ptype = mono_type_get_underlying_type (type);
+       ptype = mini_type_get_underlying_type (NULL, type);
         switch (ptype->type) {
         case MONO_TYPE_BOOLEAN:
         case MONO_TYPE_CHAR:
@@ -403,14 +403,20 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn
  {
         guint32 size, quad, nquads, i;
         ArgumentClass args [2];
-       MonoMarshalType *info;
+       MonoMarshalType *info = NULL;
         MonoClass *klass;
+       MonoGenericSharingContext tmp_gsctx;
+
+       /* 
+        * The gsctx currently contains no data, it is only used for checking whenever
+        * open types are allowed, some callers like mono_arch_get_argument_info ()
+        * don't pass it to us, so work around that.
+        */
+       if (!gsctx)
+               gsctx = &tmp_gsctx;
  
         klass = mono_class_from_mono_type (type);
-       if (sig->pinvoke) 
-               size = mono_type_native_stack_size (&klass->byval_arg, NULL);
-       else 
-               size = mini_type_stack_size (gsctx, &klass->byval_arg, NULL);
+       size = mini_type_stack_size_full (gsctx, &klass->byval_arg, NULL, sig->pinvoke);
  #ifndef PLATFORM_WIN32
         if (!sig->pinvoke && !disable_vtypes_in_regs && ((is_return && (size == 8)) || (!is_return && (size <= 16)))) {
                 /* We pass and return vtypes of size 8 in a register */
@@ -605,8 +611,7 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
  
         /* return value */
         {
-               ret_type = mono_type_get_underlying_type (sig->ret);
-               ret_type = mini_get_basic_type_from_generic (gsctx, ret_type);
+               ret_type = mini_type_get_underlying_type (gsctx, sig->ret);
                 switch (ret_type->type) {
                 case MONO_TYPE_BOOLEAN:
                 case MONO_TYPE_I1:
@@ -710,8 +715,7 @@ get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSign
                         add_general (&gr, &stack_size, ainfo);
                         continue;
                 }
-               ptype = mono_type_get_underlying_type (sig->params [i]);
-               ptype = mini_get_basic_type_from_generic (gsctx, ptype);
+               ptype = mini_type_get_underlying_type (gsctx, sig->params [i]);
                 switch (ptype->type) {
                 case MONO_TYPE_BOOLEAN:
                 case MONO_TYPE_I1:
@@ -910,10 +914,7 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask)
                 } else
                         *exclude_mask |= MONO_OPT_CMOV;
         }
-#ifdef PLATFORM_WIN32
-       /* FIXME */
-       *exclude_mask |= (MONO_OPT_PEEPHOLE | MONO_OPT_BRANCH);
-#endif
+
         return opts;
  }
  
@@ -1934,6 +1935,19 @@ emit_sig_cookie2 (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
         MONO_ADD_INS (cfg->cbb, arg);
  }
  
+#define NEW_VARSTORE(cfg,dest,var,vartype,inst) do {   \
+        MONO_INST_NEW ((cfg), (dest), OP_MOVE); \
+               (dest)->opcode = mono_type_to_regmove ((cfg), (vartype));    \
+               (dest)->klass = (var)->klass;   \
+        (dest)->sreg1 = (inst)->dreg; \
+               (dest)->dreg = (var)->dreg;   \
+        if ((dest)->opcode == OP_VMOVE) (dest)->klass = mono_class_from_mono_type ((vartype)); \
+       } while (0)
+
+#define NEW_ARGSTORE(cfg,dest,num,inst) NEW_VARSTORE ((cfg), (dest), cfg->args [(num)], cfg->arg_types [(num)], (inst))
+
+#define EMIT_NEW_ARGSTORE(cfg,dest,num,inst) do { NEW_ARGSTORE ((cfg), (dest), (num), (inst)); MONO_ADD_INS ((cfg)->cbb, (dest)); } while (0)
+
  void
  mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
  {
@@ -1983,9 +1997,11 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
                 case ArgOnStack:
                 case ArgValuetypeInReg:
                 case ArgValuetypeAddrInIReg:
-                       if (ainfo->storage == ArgOnStack && call->tail_call)
-                               NOT_IMPLEMENTED;
-                       if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
+                       if (ainfo->storage == ArgOnStack && call->tail_call) {
+                               MonoInst *call_inst = (MonoInst*)call;
+                               cfg->args [i]->flags |= MONO_INST_VOLATILE;
+                               EMIT_NEW_ARGSTORE (cfg, call_inst, i, in);
+                       } else if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
                                 guint32 align;
                                 guint32 size;
  
@@ -2161,10 +2177,11 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
  
                 if (ainfo->pair_storage [0] == ArgInIReg) {
                         MONO_INST_NEW (cfg, arg, OP_X86_LEA_MEMBASE);
-                       arg->dreg = ainfo->pair_regs [0];
+                       arg->dreg = mono_alloc_ireg (cfg);
                         arg->sreg1 = load->dreg;
                         arg->inst_imm = 0;
                         MONO_ADD_INS (cfg->cbb, arg);
+                       mono_call_inst_add_outarg_reg (cfg, call, arg->dreg, ainfo->pair_regs [0], FALSE);
                 } else {
                         MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
                         arg->sreg1 = load->dreg;
@@ -2193,7 +2210,7 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
  void
  mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
  {
-       MonoType *ret = mono_type_get_underlying_type (mono_method_signature (method)->ret);
+       MonoType *ret = mini_type_get_underlying_type (NULL, mono_method_signature (method)->ret);
  
         if (!ret->byref) {
                 if (ret->type == MONO_TYPE_R4) {
@@ -2303,9 +2320,15 @@ emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointe
                         }
                 }
                 else {
-                       if (mono_find_class_init_trampoline_by_addr (data))
+                       if (!cfg->new_ir && mono_find_class_init_trampoline_by_addr (data))
                                 near_call = TRUE;
-                       else {
+                       else if (cfg->abs_patches && g_hash_table_lookup (cfg->abs_patches, data)) {
+                               /* 
+                                * This is not really an optimization, but required because the
+                                * generic class init trampolines use R11 to pass the vtable.
+                                */
+                               near_call = TRUE;
+                       } else {
                                 MonoJitICallInfo *info = mono_find_jit_icall_by_addr (data);
                                 if (info) {
                                         if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && 
@@ -2590,11 +2613,15 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                 case OP_DIV_IMM:
                 case OP_REM_IMM:
                 case OP_IDIV_IMM:
-               case OP_IREM_IMM:
                 case OP_IDIV_UN_IMM:
                 case OP_IREM_UN_IMM:
                         mono_decompose_op_imm (cfg, bb, ins);
                         break;
+               case OP_IREM_IMM:
+                       /* Keep the opcode if we can implement it efficiently */
+                       if (!((ins->inst_imm > 0) && (mono_is_power_of_two (ins->inst_imm) != -1)))
+                               mono_decompose_op_imm (cfg, bb, ins);
+                       break;
                 case OP_COMPARE_IMM:
                 case OP_LCOMPARE_IMM:
                         if (!amd64_is_imm32 (ins->inst_imm)) {
@@ -2830,18 +2857,19 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
  }
  
  /*
- * emit_tls_get:
+ * mono_amd64_emit_tls_get:
   * @code: buffer to store code to
   * @dreg: hard register where to place the result
   * @tls_offset: offset info
   *
- * emit_tls_get emits in @code the native code that puts in the dreg register
- * the item in the thread local storage identified by tls_offset.
+ * mono_amd64_emit_tls_get emits in @code the native code that puts in
+ * the dreg register the item in the thread local storage identified
+ * by tls_offset.
   *
   * Returns: a pointer to the end of the stored code
   */
-static guint8*
-emit_tls_get (guint8* code, int dreg, int tls_offset)
+guint8*
+mono_amd64_emit_tls_get (guint8* code, int dreg, int tls_offset)
  {
  #ifdef PLATFORM_WIN32
         g_assert (tls_offset < 64);
@@ -2966,6 +2994,8 @@ amd64_pop_reg (code, AMD64_RAX);
  #define LOOP_ALIGNMENT 8
  #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
  
+#ifndef DISABLE_JIT
+
  void
  mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
  {
@@ -3009,6 +3039,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
  
         mono_debug_open_block (cfg, bb, offset);
  
+    if (mono_break_at_bb_method && mono_method_desc_full_match (mono_break_at_bb_method, cfg->method) && bb->block_num == mono_break_at_bb_bb_num)
+               x86_breakpoint (code);
+
         MONO_BB_FOR_EACH_INS (bb, ins) {
                 offset = code - cfg->native_code;
  
@@ -3315,6 +3348,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                 case OP_BREAK:
                         amd64_breakpoint (code);
                         break;
+               case OP_RELAXED_NOP:
+                       x86_prefix (code, X86_REP_PREFIX);
+                       x86_nop (code);
+                       break;
+               case OP_HARD_NOP:
+                       x86_nop (code);
+                       break;
                 case OP_NOP:
                 case OP_DUMMY_USE:
                 case OP_DUMMY_STORE:
@@ -3472,6 +3512,32 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                 amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
                         }
                         break;
+               case OP_IREM_IMM: {
+                       int power = mono_is_power_of_two (ins->inst_imm);
+
+                       g_assert (ins->sreg1 == X86_EAX);
+                       g_assert (ins->dreg == X86_EAX);
+                       g_assert (power >= 0);
+
+                       if (power == 0) {
+                               amd64_mov_reg_imm (code, ins->dreg, 0);
+                               break;
+                       }
+
+                       /* Based on gcc code */
+
+                       /* Add compensation for negative dividents */
+                       amd64_mov_reg_reg_size (code, AMD64_RDX, AMD64_RAX, 4);
+                       if (power > 1)
+                               amd64_shift_reg_imm_size (code, X86_SAR, AMD64_RDX, 31, 4);
+                       amd64_shift_reg_imm_size (code, X86_SHR, AMD64_RDX, 32 - power, 4);
+                       amd64_alu_reg_reg_size (code, X86_ADD, AMD64_RAX, AMD64_RDX, 4);
+                       /* Compute remainder */
+                       amd64_alu_reg_imm_size (code, X86_AND, AMD64_RAX, (1 << power) - 1, 4);
+                       /* Remove compensation */
+                       amd64_alu_reg_reg_size (code, X86_SUB, AMD64_RAX, AMD64_RDX, 4);
+                       break;
+               }
                 case OP_LMUL_OVF:
                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
@@ -3943,8 +4009,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                 case OP_X86_PUSH_MEMBASE:
                         amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
                         break;
-               case OP_X86_PUSH_OBJ: 
-                       amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ins->inst_imm);
+               case OP_X86_PUSH_OBJ: {
+                       int size = ALIGN_TO (ins->inst_imm, 8);
+                       amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
                         amd64_push_reg (code, AMD64_RDI);
                         amd64_push_reg (code, AMD64_RSI);
                         amd64_push_reg (code, AMD64_RCX);
@@ -3952,8 +4019,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
                         else
                                 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8);
-                       amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, 3 * 8);
-                       amd64_mov_reg_imm (code, AMD64_RCX, (ins->inst_imm >> 3));
+                       amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, (3 * 8));
+                       amd64_mov_reg_imm (code, AMD64_RCX, (size >> 3));
                         amd64_cld (code);
                         amd64_prefix (code, X86_REP_PREFIX);
                         amd64_movsd (code);
@@ -3961,6 +4028,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         amd64_pop_reg (code, AMD64_RSI);
                         amd64_pop_reg (code, AMD64_RDI);
                         break;
+               }
                 case OP_X86_LEA:
                         amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
                         break;
@@ -4537,7 +4605,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
                         break;
                 case OP_TLS_GET: {
-                       code = emit_tls_get (code, ins->dreg, ins->inst_offset);
+                       code = mono_amd64_emit_tls_get (code, ins->dreg, ins->inst_offset);
                         break;
                 }
                 case OP_MEMORY_BARRIER: {
@@ -4673,6 +4741,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
         cfg->code_len = code - cfg->native_code;
  }
  
+#endif /* DISABLE_JIT */
+
  void
  mono_arch_register_lowlevel_calls (void)
  {
@@ -5107,7 +5177,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
                         guint8 *buf, *no_domain_branch;
  
-                       code = emit_tls_get (code, AMD64_RAX, appdomain_tls_offset);
+                       code = mono_amd64_emit_tls_get (code, AMD64_RAX, appdomain_tls_offset);
                         if ((domain >> 32) == 0)
                                 amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4);
                         else
@@ -5115,7 +5185,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                         amd64_alu_reg_reg (code, X86_CMP, AMD64_RAX, AMD64_ARG_REG1);
                         no_domain_branch = code;
                         x86_branch8 (code, X86_CC_NE, 0, 0);
-                       code = emit_tls_get ( code, AMD64_RAX, lmf_addr_tls_offset);
+                       code = mono_amd64_emit_tls_get ( code, AMD64_RAX, lmf_addr_tls_offset);
                         amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX);
                         buf = code;
                         x86_branch8 (code, X86_CC_NE, 0, 0);
@@ -5163,7 +5233,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                 } else {
                         if (lmf_addr_tls_offset != -1) {
                                 /* Load lmf quicky using the FS register */
-                               code = emit_tls_get (code, AMD64_RAX, lmf_addr_tls_offset);
+                               code = mono_amd64_emit_tls_get (code, AMD64_RAX, lmf_addr_tls_offset);
  #ifdef PLATFORM_WIN32
                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
                                 /* FIXME: Add a separate key for LMF to avoid this */
@@ -5300,6 +5370,23 @@ mono_arch_emit_epilog (MonoCompile *cfg)
         pos = 0;
         
         if (method->save_lmf) {
+               /* check if we need to restore protection of the stack after a stack overflow */
+               if (mono_get_jit_tls_offset () != -1) {
+                       guint8 *patch;
+                       code = mono_amd64_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ());
+                       /* we load the value in a separate instruction: this mechanism may be
+                        * used later as a safer way to do thread interruption
+                        */
+                       amd64_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 8);
+                       x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
+                       patch = code;
+                       x86_branch8 (code, X86_CC_Z, 0, FALSE);
+                       /* note that the call trampoline will preserve eax/edx */
+                       x86_call_reg (code, X86_ECX);
+                       x86_patch (patch, code);
+               } else {
+                       /* FIXME: maybe save the jit tls in the prolog */
+               }
                 if ((lmf_tls_offset != -1) && !optimize_for_xen) {
                         /*
                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
@@ -5618,7 +5705,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
         guchar *code = p;
         int save_mode = SAVE_NONE;
         MonoMethod *method = cfg->method;
-       int rtype = mono_type_get_underlying_type (mono_method_signature (method)->ret)->type;
+       int rtype = mini_type_get_underlying_type (NULL, mono_method_signature (method)->ret)->type;
         
         switch (rtype) {
         case MONO_TYPE_VOID:
@@ -6062,8 +6149,16 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
                 } else {
                         /* We have to shift the arguments left */
                         amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
-                       for (i = 0; i < sig->param_count; ++i)
+                       for (i = 0; i < sig->param_count; ++i) {
+#ifdef PLATFORM_WIN32
+                               if (i < 3)
+                                       amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
+                               else
+                                       amd64_mov_reg_membase (code, param_regs [i], AMD64_RSP, 0x28, 8);
+#else
                                 amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
+#endif
+                       }
  
                         amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
                 }
@@ -6192,7 +6287,8 @@ imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
   * LOCKING: called with the domain lock held
   */
  gpointer
-mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
+mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
+       gpointer fail_tramp)
  {
         int i;
         int size = 0;
@@ -6204,7 +6300,7 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                 if (item->is_equals) {
                         if (item->check_target_idx) {
                                 if (!item->compare_done) {
-                                       if (amd64_is_imm32 (item->method))
+                                       if (amd64_is_imm32 (item->key))
                                                 item->chunk_size += CMP_SIZE;
                                         else
                                                 item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
@@ -6215,17 +6311,22 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                         item->chunk_size += MOV_REG_IMM_SIZE;
                                 item->chunk_size += BR_SMALL_SIZE + JUMP_REG_SIZE;
                         } else {
-                               if (vtable_is_32bit)
-                                       item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
-                               else
-                                       item->chunk_size += MOV_REG_IMM_SIZE;
-                               item->chunk_size += JUMP_REG_SIZE;
-                               /* with assert below:
-                                * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
-                                */
+                               if (fail_tramp) {
+                                       item->chunk_size += MOV_REG_IMM_SIZE * 3 + CMP_REG_REG_SIZE +
+                                               BR_SMALL_SIZE + JUMP_REG_SIZE * 2;
+                               } else {
+                                       if (vtable_is_32bit)
+                                               item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
+                                       else
+                                               item->chunk_size += MOV_REG_IMM_SIZE;
+                                       item->chunk_size += JUMP_REG_SIZE;
+                                       /* with assert below:
+                                        * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
+                                        */
+                               }
                         }
                 } else {
-                       if (amd64_is_imm32 (item->method))
+                       if (amd64_is_imm32 (item->key))
                                 item->chunk_size += CMP_SIZE;
                         else
                                 item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
@@ -6234,7 +6335,10 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                 }
                 size += item->chunk_size;
         }
-       code = mono_code_manager_reserve (domain->code_mp, size);
+       if (fail_tramp)
+               code = mono_method_alloc_generic_virtual_thunk (domain, size);
+       else
+               code = mono_code_manager_reserve (domain->code_mp, size);
         start = code;
         for (i = 0; i < count; ++i) {
                 MonoIMTCheckItem *item = imt_entries [i];
@@ -6242,50 +6346,73 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                 if (item->is_equals) {
                         if (item->check_target_idx) {
                                 if (!item->compare_done) {
-                                       if (amd64_is_imm32 (item->method))
-                                               amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->method);
+                                       if (amd64_is_imm32 (item->key))
+                                               amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
                                         else {
-                                               amd64_mov_reg_imm (code, AMD64_R10, item->method);
+                                               amd64_mov_reg_imm (code, AMD64_R10, item->key);
                                                 amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
                                         }
                                 }
                                 item->jmp_code = code;
                                 amd64_branch8 (code, X86_CC_NE, 0, FALSE);
                                 /* See the comment below about R10 */
-                               amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->vtable_slot]));
-                               amd64_jump_membase (code, AMD64_R10, 0);
+                               if (fail_tramp) {
+                                       amd64_mov_reg_imm (code, AMD64_R10, item->value.target_code);
+                                       amd64_jump_reg (code, AMD64_R10);
+                               } else {
+                                       amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
+                                       amd64_jump_membase (code, AMD64_R10, 0);
+                               }
                         } else {
-                               /* enable the commented code to assert on wrong method */
+                               if (fail_tramp) {
+                                       if (amd64_is_imm32 (item->key))
+                                               amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
+                                       else {
+                                               amd64_mov_reg_imm (code, AMD64_R10, item->key);
+                                               amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
+                                       }
+                                       item->jmp_code = code;
+                                       amd64_branch8 (code, X86_CC_NE, 0, FALSE);
+                                       amd64_mov_reg_imm (code, AMD64_R10, item->value.target_code);
+                                       amd64_jump_reg (code, AMD64_R10);
+                                       amd64_patch (item->jmp_code, code);
+                                       amd64_mov_reg_imm (code, AMD64_R10, fail_tramp);
+                                       amd64_jump_reg (code, AMD64_R10);
+                                       item->jmp_code = NULL;
+                                               
+                               } else {
+                                       /* enable the commented code to assert on wrong method */
  #if 0
-                               if (amd64_is_imm32 (item->method))
-                                       amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->method);
-                               else {
-                                       amd64_mov_reg_imm (code, AMD64_R10, item->method);
-                                       amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
-                               }
-                               item->jmp_code = code;
-                               amd64_branch8 (code, X86_CC_NE, 0, FALSE);
-                               /* See the comment below about R10 */
-                               amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->vtable_slot]));
-                               amd64_jump_membase (code, AMD64_R10, 0);
-                               amd64_patch (item->jmp_code, code);
-                               amd64_breakpoint (code);
-                               item->jmp_code = NULL;
+                                       if (amd64_is_imm32 (item->key))
+                                               amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
+                                       else {
+                                               amd64_mov_reg_imm (code, AMD64_R10, item->key);
+                                               amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
+                                       }
+                                       item->jmp_code = code;
+                                       amd64_branch8 (code, X86_CC_NE, 0, FALSE);
+                                       /* See the comment below about R10 */
+                                       amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
+                                       amd64_jump_membase (code, AMD64_R10, 0);
+                                       amd64_patch (item->jmp_code, code);
+                                       amd64_breakpoint (code);
+                                       item->jmp_code = NULL;
  #else
-                               /* We're using R10 here because R11
-                                  needs to be preserved.  R10 needs
-                                  to be preserved for calls which
-                                  require a runtime generic context,
-                                  but interface calls don't. */
-                               amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->vtable_slot]));
-                               amd64_jump_membase (code, AMD64_R10, 0);
+                                       /* We're using R10 here because R11
+                                          needs to be preserved.  R10 needs
+                                          to be preserved for calls which
+                                          require a runtime generic context,
+                                          but interface calls don't. */
+                                       amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
+                                       amd64_jump_membase (code, AMD64_R10, 0);
  #endif
+                               }
                         }
                 } else {
-                       if (amd64_is_imm32 (item->method))
-                               amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->method);
+                       if (amd64_is_imm32 (item->key))
+                               amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
                         else {
-                               amd64_mov_reg_imm (code, AMD64_R10, item->method);
+                               amd64_mov_reg_imm (code, AMD64_R10, item->key);
                                 amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
                         }
                         item->jmp_code = code;
@@ -6305,8 +6432,9 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                         }
                 }
         }
-               
-       mono_stats.imt_thunks_size += code - start;
+
+       if (!fail_tramp)
+               mono_stats.imt_thunks_size += code - start;
         g_assert (code - start <= size);
  
         return start;
@@ -6325,7 +6453,7 @@ mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSha
  }
  
  void
-mono_arch_emit_imt_argument (MonoCompile *cfg, MonoCallInst *call)
+mono_arch_emit_imt_argument (MonoCompile *cfg, MonoCallInst *call, MonoInst *imt_arg)
  {
         /* Done by the implementation of the CALL_MEMBASE opcodes */
  }