2009-04-03 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-amd64.c
index 441adaaa40eff7725efb3b0828bda8613c3f93d3..92183fceb14ac6a5354fe9607b6c8b485bd7100f 100644 (file)
@@ -1008,11 +1008,6 @@ mono_arch_compute_omit_fp (MonoCompile *cfg)
 
                locals_size += mono_type_size (ins->inst_vtype, &ialign);
        }
-
-       if ((cfg->num_varinfo > 10000) || (locals_size >= (1 << 15))) {
-               /* Avoid hitting the stack_alloc_size < (1 << 16) assertion in emit_epilog () */
-               cfg->arch.omit_fp = FALSE;
-       }
 }
 
 GList *
@@ -1050,6 +1045,10 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
                regs = g_list_prepend (regs, (gpointer)AMD64_R13);
                regs = g_list_prepend (regs, (gpointer)AMD64_R14);
                regs = g_list_prepend (regs, (gpointer)AMD64_R15);
+#ifdef PLATFORM_WIN32
+               regs = g_list_prepend (regs, (gpointer)AMD64_RDI);
+               regs = g_list_prepend (regs, (gpointer)AMD64_RSI);
+#endif
        }
 
        return regs;
@@ -1590,19 +1589,6 @@ emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
        MONO_ADD_INS (cfg->cbb, arg);
 }
 
-#define NEW_VARSTORE(cfg,dest,var,vartype,inst) do {   \
-        MONO_INST_NEW ((cfg), (dest), OP_MOVE); \
-               (dest)->opcode = mono_type_to_regmove ((cfg), (vartype));    \
-               (dest)->klass = (var)->klass;   \
-        (dest)->sreg1 = (inst)->dreg; \
-               (dest)->dreg = (var)->dreg;   \
-        if ((dest)->opcode == OP_VMOVE) (dest)->klass = mono_class_from_mono_type ((vartype)); \
-       } while (0)
-
-#define NEW_ARGSTORE(cfg,dest,num,inst) NEW_VARSTORE ((cfg), (dest), cfg->args [(num)], cfg->arg_types [(num)], (inst))
-
-#define EMIT_NEW_ARGSTORE(cfg,dest,num,inst) do { NEW_ARGSTORE ((cfg), (dest), (num), (inst)); MONO_ADD_INS ((cfg)->cbb, (dest)); } while (0)
-
 void
 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 {
@@ -1619,6 +1605,51 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 
        cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
 
+       if (cfg->compile_llvm) {
+               for (i = 0; i < n; ++i) {
+                       MonoInst *ins;
+
+                       ainfo = cinfo->args + i;
+
+                       in = call->args [i];
+
+                       /* Simply remember the arguments */
+                       switch (ainfo->storage) {
+                       case ArgInIReg:
+                               MONO_INST_NEW (cfg, ins, OP_MOVE);
+                               ins->dreg = mono_alloc_ireg (cfg);
+                               ins->sreg1 = in->dreg;
+                               break;
+                       case ArgInDoubleSSEReg:
+                       case ArgInFloatSSEReg:
+                               MONO_INST_NEW (cfg, ins, OP_FMOVE);
+                               ins->dreg = mono_alloc_freg (cfg);
+                               ins->sreg1 = in->dreg;
+                               break;
+                       case ArgOnStack:
+                               if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
+                                       cfg->exception_message = g_strdup ("vtype argument");
+                                       cfg->disable_llvm = TRUE;
+                               } else {
+                                       MONO_INST_NEW (cfg, ins, OP_MOVE);
+                                       ins->dreg = mono_alloc_ireg (cfg);
+                                       ins->sreg1 = in->dreg;
+                               }
+                               break;
+                       default:
+                               cfg->exception_message = g_strdup ("ainfo->storage");
+                               cfg->disable_llvm = TRUE;
+                               return;
+                       }
+
+                       if (!cfg->disable_llvm) {
+                               MONO_ADD_INS (cfg->cbb, ins);
+                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, 0, FALSE);
+                       }
+               }
+               return;
+       }
+
        if (cinfo->need_stack_align) {
                MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
        }
@@ -2253,9 +2284,6 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 {
        MonoInst *ins, *n, *temp;
 
-       if (bb->max_vreg > cfg->rs->next_vreg)
-               cfg->rs->next_vreg = bb->max_vreg;
-
        /*
         * FIXME: Need to add more instructions, but the current machine 
         * description can't model some parts of the composite instructions like
@@ -2280,10 +2308,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (!amd64_is_imm32 (ins->inst_imm)) {
                                NEW_INS (cfg, ins, temp, OP_I8CONST);
                                temp->inst_c0 = ins->inst_imm;
-                               if (cfg->globalra)
-                                       temp->dreg = mono_alloc_ireg (cfg);
-                               else
-                                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                               temp->dreg = mono_alloc_ireg (cfg);
                                ins->opcode = OP_COMPARE;
                                ins->sreg2 = temp->dreg;
                        }
@@ -2293,10 +2318,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (!amd64_is_imm32 (ins->inst_offset)) {
                                NEW_INS (cfg, ins, temp, OP_I8CONST);
                                temp->inst_c0 = ins->inst_offset;
-                               if (cfg->globalra)
-                                       temp->dreg = mono_alloc_ireg (cfg);
-                               else
-                                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                               temp->dreg = mono_alloc_ireg (cfg);
                                ins->opcode = OP_AMD64_LOADI8_MEMINDEX;
                                ins->inst_indexreg = temp->dreg;
                        }
@@ -2306,10 +2328,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (!amd64_is_imm32 (ins->inst_imm)) {
                                NEW_INS (cfg, ins, temp, OP_I8CONST);
                                temp->inst_c0 = ins->inst_imm;
-                               if (cfg->globalra)
-                                       temp->dreg = mono_alloc_ireg (cfg);
-                               else
-                                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                               temp->dreg = mono_alloc_ireg (cfg);
                                ins->opcode = OP_STOREI8_MEMBASE_REG;
                                ins->sreg1 = temp->dreg;
                        }
@@ -2319,7 +2338,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                }
        }
 
-       bb->max_vreg = cfg->rs->next_vreg;
+       bb->max_vreg = cfg->next_vreg;
 }
 
 static const int 
@@ -2541,93 +2560,6 @@ mono_amd64_emit_tls_get (guint8* code, int dreg, int tls_offset)
        return code;
 }
 
-/*
- * emit_load_volatile_arguments:
- *
- *  Load volatile arguments from the stack to the original input registers.
- * Required before a tail call.
- */
-static guint8*
-emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
-{
-       MonoMethod *method = cfg->method;
-       MonoMethodSignature *sig;
-       MonoInst *ins;
-       CallInfo *cinfo;
-       guint32 i, quad;
-
-       /* FIXME: Generate intermediate code instead */
-
-       sig = mono_method_signature (method);
-
-       cinfo = cfg->arch.cinfo;
-       
-       /* This is the opposite of the code in emit_prolog */
-       if (sig->ret->type != MONO_TYPE_VOID) {
-               if (cfg->vret_addr && (cfg->vret_addr->opcode != OP_REGVAR))
-                       amd64_mov_reg_membase (code, cinfo->ret.reg, cfg->vret_addr->inst_basereg, cfg->vret_addr->inst_offset, 8);
-       }
-
-       for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
-               ArgInfo *ainfo = cinfo->args + i;
-               MonoType *arg_type;
-               ins = cfg->args [i];
-
-               if (sig->hasthis && (i == 0))
-                       arg_type = &mono_defaults.object_class->byval_arg;
-               else
-                       arg_type = sig->params [i - sig->hasthis];
-
-               if (ins->opcode != OP_REGVAR) {
-                       switch (ainfo->storage) {
-                       case ArgInIReg: {
-                               guint32 size = 8;
-
-                               /* FIXME: I1 etc */
-                               amd64_mov_reg_membase (code, ainfo->reg, ins->inst_basereg, ins->inst_offset, size);
-                               break;
-                       }
-                       case ArgInFloatSSEReg:
-                               amd64_movss_reg_membase (code, ainfo->reg, ins->inst_basereg, ins->inst_offset);
-                               break;
-                       case ArgInDoubleSSEReg:
-                               amd64_movsd_reg_membase (code, ainfo->reg, ins->inst_basereg, ins->inst_offset);
-                               break;
-                       case ArgValuetypeInReg:
-                               for (quad = 0; quad < 2; quad ++) {
-                                       switch (ainfo->pair_storage [quad]) {
-                                       case ArgInIReg:
-                                               amd64_mov_reg_membase (code, ainfo->pair_regs [quad], ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), sizeof (gpointer));
-                                               break;
-                                       case ArgInFloatSSEReg:
-                                       case ArgInDoubleSSEReg:
-                                               g_assert_not_reached ();
-                                               break;
-                                       case ArgNone:
-                                               break;
-                                       default:
-                                               g_assert_not_reached ();
-                                       }
-                               }
-                               break;
-                       case ArgValuetypeAddrInIReg:
-                               if (ainfo->pair_storage [0] == ArgInIReg)
-                                       amd64_mov_reg_membase (code, ainfo->pair_regs [0], ins->inst_left->inst_basereg, ins->inst_left->inst_offset,  sizeof (gpointer));
-                               break;
-                       default:
-                               break;
-                       }
-               }
-               else {
-                       g_assert (ainfo->storage == ArgInIReg);
-
-                       amd64_mov_reg_reg (code, ainfo->reg, ins->dreg, 8);
-               }
-       }
-
-       return code;
-}
-
 #define REAL_PRINT_REG(text,reg) \
 mono_assert (reg >= 0); \
 amd64_push_reg (code, AMD64_RAX); \
@@ -3167,6 +3099,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert (ins->dreg == X86_EAX);
                        g_assert (power >= 0);
 
+                       if (power == 0) {
+                               amd64_mov_reg_imm (code, ins->dreg, 0);
+                               break;
+                       }
+
                        /* Based on gcc code */
 
                        /* Add compensation for negative dividents */
@@ -3444,7 +3381,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
                        break;
                }
-               case OP_JMP:
                case OP_TAILCALL: {
                        /*
                         * Note: this 'frame destruction' logic is useful for tail calls, too.
@@ -3458,9 +3394,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        g_assert (!cfg->method->save_lmf);
 
-                       if (ins->opcode == OP_JMP)
-                               code = emit_load_volatile_arguments (cfg, code);
-
                        if (cfg->arch.omit_fp) {
                                guint32 save_offset = 0;
                                /* Pop callee-saved registers */
@@ -3608,30 +3541,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                ins->sreg1 = AMD64_RAX;
                        }
 
-                       if (call->method && ins->inst_offset < 0) {
-                               gssize val;
-
-                               /* 
-                                * This is a possible IMT call so save the IMT method in the proper
-                                * register. We don't use the generic code in method-to-ir.c, because
-                                * we need to disassemble this in get_vcall_slot_addr (), so we have to
-                                * maintain control over the layout of the code.
-                                * Also put the base reg in %rax to simplify find_imt_method ().
-                                */
-                               if (ins->sreg1 != AMD64_RAX) {
-                                       amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8);
-                                       ins->sreg1 = AMD64_RAX;
-                               }
-                               val = (gssize)(gpointer)call->method;
-
-                               // FIXME: Generics sharing
-#if 0
-                               if ((((guint64)val) >> 32) == 0)
-                                       amd64_mov_reg_imm_size (code, MONO_ARCH_IMT_REG, val, 4);
-                               else
-                                       amd64_mov_reg_imm_size (code, MONO_ARCH_IMT_REG, val, 8);
-#endif
-                       }
+                       /* 
+                        * Emit a few nops to simplify get_vcall_slot ().
+                        */
+                       amd64_nop (code);
+                       amd64_nop (code);
+                       amd64_nop (code);
 
                        amd64_call_membase (code, ins->sreg1, ins->inst_offset);
                        if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
@@ -4293,8 +4208,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                }
                case OP_ATOMIC_EXCHANGE_I4:
-               case OP_ATOMIC_EXCHANGE_I8:
-               case OP_ATOMIC_CAS_IMM_I4: {
+               case OP_ATOMIC_EXCHANGE_I8: {
                        guchar *br[2];
                        int sreg2 = ins->sreg2;
                        int breg = ins->inst_basereg;
@@ -4341,28 +4255,54 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                }
                        }
 
-                       if (ins->opcode == OP_ATOMIC_CAS_IMM_I4) {
-                               if (ins->backend.data == NULL)
-                                       amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
-                               else
-                                       amd64_mov_reg_imm (code, AMD64_RAX, ins->backend.data);
-
-                               amd64_prefix (code, X86_LOCK_PREFIX);
-                               amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size);
-                       } else {
-                               amd64_mov_reg_membase (code, AMD64_RAX, breg, ins->inst_offset, size);
+                       amd64_mov_reg_membase (code, AMD64_RAX, breg, ins->inst_offset, size);
 
-                               br [0] = code; amd64_prefix (code, X86_LOCK_PREFIX);
-                               amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size);
-                               br [1] = code; amd64_branch8 (code, X86_CC_NE, -1, FALSE);
-                               amd64_patch (br [1], br [0]);
-                       }
+                       br [0] = code; amd64_prefix (code, X86_LOCK_PREFIX);
+                       amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size);
+                       br [1] = code; amd64_branch8 (code, X86_CC_NE, -1, FALSE);
+                       amd64_patch (br [1], br [0]);
 
                        if (rdx_pushed)
                                amd64_pop_reg (code, AMD64_RDX);
 
                        break;
                }
+               case OP_ATOMIC_CAS_I4:
+               case OP_ATOMIC_CAS_I8: {
+                       guint32 size;
+
+                       if (ins->opcode == OP_ATOMIC_CAS_I8)
+                               size = 8;
+                       else
+                               size = 4;
+
+                       /* 
+                        * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
+                        * an explanation of how this works.
+                        */
+                       g_assert (ins->sreg3 == AMD64_RAX);
+                       g_assert (ins->sreg1 != AMD64_RAX);
+                       g_assert (ins->sreg1 != ins->sreg2);
+
+                       amd64_prefix (code, X86_LOCK_PREFIX);
+                       amd64_cmpxchg_membase_reg_size (code, ins->sreg1, ins->inst_offset, ins->sreg2, size);
+
+                       if (ins->dreg != AMD64_RAX)
+                               amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, size);
+                       break;
+               }
+               case OP_LIVERANGE_START: {
+                       if (cfg->verbose_level > 1)
+                               printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
+                       MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
+                       break;
+               }
+               case OP_LIVERANGE_END: {
+                       if (cfg->verbose_level > 1)
+                               printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
+                       MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
+                       break;
+               }
                default:
                        g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
                        g_assert_not_reached ();
@@ -4647,12 +4587,31 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                /* sp is saved right before calls */
                /* Skip method (only needed for trampoline LMF frames) */
                /* Save callee saved regs */
-               amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), AMD64_RBX, 8);
-               amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbp), AMD64_RBP, 8);
-               amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), AMD64_R12, 8);
-               amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), AMD64_R13, 8);
-               amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), AMD64_R14, 8);
-               amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), AMD64_R15, 8);
+               for (i = 0; i < MONO_MAX_IREGS; ++i) {
+                       int offset;
+
+                       switch (i) {
+                       case AMD64_RBX: offset = G_STRUCT_OFFSET (MonoLMF, rbx); break;
+                       case AMD64_RBP: offset = G_STRUCT_OFFSET (MonoLMF, rbp); break;
+                       case AMD64_R12: offset = G_STRUCT_OFFSET (MonoLMF, r12); break;
+                       case AMD64_R13: offset = G_STRUCT_OFFSET (MonoLMF, r13); break;
+                       case AMD64_R14: offset = G_STRUCT_OFFSET (MonoLMF, r14); break;
+                       case AMD64_R15: offset = G_STRUCT_OFFSET (MonoLMF, r15); break;
+#ifdef PLATFORM_WIN32
+                       case AMD64_RDI: offset = G_STRUCT_OFFSET (MonoLMF, rdi); break;
+                       case AMD64_RSI: offset = G_STRUCT_OFFSET (MonoLMF, rsi); break;
+#endif
+                       default:
+                               offset = -1;
+                               break;
+                       }
+
+                       if (offset != -1) {
+                               amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + offset, i, 8);
+                               if (cfg->arch.omit_fp || (i != AMD64_RBP))
+                                       mono_emit_unwind_op_offset (cfg, code, i, - (cfa_offset - (lmf_offset + offset)));
+                       }
+               }
        }
 
        /* Save callee saved registers */
@@ -5093,6 +5052,14 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                if (cfg->used_int_regs & (1 << AMD64_R15)) {
                        amd64_mov_reg_membase (code, AMD64_R15, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), 8);
                }
+#ifdef PLATFORM_WIN32
+               if (cfg->used_int_regs & (1 << AMD64_RDI)) {
+                       amd64_mov_reg_membase (code, AMD64_RDI, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rdi), 8);
+               }
+               if (cfg->used_int_regs & (1 << AMD64_RSI)) {
+                       amd64_mov_reg_membase (code, AMD64_RSI, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsi), 8);
+               }
+#endif
        } else {
 
                if (cfg->arch.omit_fp) {
@@ -5167,15 +5134,6 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        cfg->code_len = code - cfg->native_code;
 
        g_assert (cfg->code_len < cfg->code_size);
-
-       if (cfg->arch.omit_fp) {
-               /* 
-                * Encode the stack size into used_int_regs so the exception handler
-                * can access it.
-                */
-               g_assert (cfg->arch.stack_alloc_size < (1 << 16));
-               cfg->used_int_regs |= (1 << 31) | (cfg->arch.stack_alloc_size << 16);
-       }
 }
 
 void
@@ -5626,56 +5584,30 @@ mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
 
        *displacement = 0;
 
-       /* go to the start of the call instruction
-        *
-        * address_byte = (m << 6) | (o << 3) | reg
-        * call opcode: 0xff address_byte displacement
-        * 0xff m=1,o=2 imm8
-        * 0xff m=2,o=2 imm32
-        */
        code -= 7;
 
        /* 
         * A given byte sequence can match more than case here, so we have to be
         * really careful about the ordering of the cases. Longer sequences
         * come first.
+        * There are two types of calls:
+        * - direct calls: 0xff address_byte 8/32 bits displacement
+        * - indirect calls: nop nop nop <call>
+        * The nops make sure we don't confuse the instruction preceeding an indirect
+        * call with a direct call.
         */
-#ifdef MONO_ARCH_HAVE_IMT
-       if ((code [-2] == 0x41) && (code [-1] == 0xbb) && (code [4] == 0xff) && (x86_modrm_mod (code [5]) == 1) && (x86_modrm_reg (code [5]) == 2) && ((signed char)code [6] < 0)) {
-               /* IMT-based interface calls: with MONO_ARCH_IMT_REG == r11
-                * 41 bb 14 f8 28 08       mov    $0x828f814,%r11d
-                * ff 50 fc                call   *0xfffffffc(%rax)
-                */
-               reg = amd64_modrm_rm (code [5]);
-               disp = (signed char)code [6];
-               /* R10 is clobbered by the IMT thunk code */
-               g_assert (reg != AMD64_R10);
-       }
-#else
-       if (0) {
-       }
-#endif
-       else if ((code [-1] == 0x8b) && (amd64_modrm_mod (code [0]) == 0x2) && (code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x0)) {
-                       /*
-                        * This is a interface call
-                        * 48 8b 80 f0 e8 ff ff   mov    0xffffffffffffe8f0(%rax),%rax
-                        * ff 10                  callq  *(%rax)
-                        */
-               if (IS_REX (code [4]))
-                       rex = code [4];
-               reg = amd64_modrm_rm (code [6]);
-               disp = 0;
-               /* R10 is clobbered by the IMT thunk code */
-               g_assert (reg != AMD64_R10);
-       } else if ((code [0] == 0x41) && (code [1] == 0xff) && (code [2] == 0x15)) {
+       if ((code [0] == 0x41) && (code [1] == 0xff) && (code [2] == 0x15)) {
                /* call OFFSET(%rip) */
                disp = *(guint32*)(code + 3);
                return (gpointer*)(code + disp + 7);
-       } else if ((code [0] == 0xff) && (amd64_modrm_reg (code [1]) == 0x2) && (amd64_modrm_mod (code [1]) == 0x2) && (amd64_modrm_reg (code [2]) == X86_ESP) && (amd64_modrm_mod (code [2]) == 0) && (amd64_modrm_rm (code [2]) == X86_ESP)) {
-               /* call *[r12+disp32] */
-               if (IS_REX (code [-1]))
+       } else if ((code [0] == 0xff) && (amd64_modrm_reg (code [1]) == 0x2) && (amd64_modrm_mod (code [1]) == 0x2) && (amd64_sib_index (code [2]) == 4) && (amd64_sib_scale (code [2]) == 0)) {
+               /* call *[reg+disp32] using indexed addressing */
+               /* The LLVM JIT emits this, and we emit it too for %r12 */
+               if (IS_REX (code [-1])) {
                        rex = code [-1];
-               reg = AMD64_RSP;
+                       g_assert (amd64_rex_x (rex) == 0);
+               }                       
+               reg = amd64_sib_base (code [2]);
                disp = *(gint32*)(code + 3);
        } else if ((code [1] == 0xff) && (amd64_modrm_reg (code [2]) == 0x2) && (amd64_modrm_mod (code [2]) == 0x2)) {
                /* call *[reg+disp32] */
@@ -5688,11 +5620,11 @@ mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
        } else if (code [2] == 0xe8) {
                /* call <ADDR> */
                return NULL;
-       } else if ((code [3] == 0xff) && (amd64_modrm_reg (code [4]) == 0x2) && (amd64_modrm_mod (code [4]) == 0x1) && (amd64_modrm_reg (code [5]) == X86_ESP) && (amd64_modrm_mod (code [5]) == 0) && (amd64_modrm_rm (code [5]) == X86_ESP)) {
-               /* call *[r12+disp32] */
+       } else if ((code [3] == 0xff) && (amd64_modrm_reg (code [4]) == 0x2) && (amd64_modrm_mod (code [4]) == 0x1) && (amd64_sib_index (code [5]) == 4) && (amd64_sib_scale (code [5]) == 0)) {
+               /* call *[r12+disp8] using indexed addressing */
                if (IS_REX (code [2]))
                        rex = code [2];
-               reg = AMD64_RSP;
+               reg = amd64_sib_base (code [5]);
                disp = *(gint8*)(code + 6);
        } else if (IS_REX (code [4]) && (code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x3)) {
                /* call *%reg */
@@ -5706,11 +5638,7 @@ mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
                //printf ("B: [%%r%d+0x%x]\n", reg, disp);
        }
        else if ((code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x0)) {
-                       /*
-                        * This is a interface call: should check the above code can't catch it earlier 
-                        * 8b 40 30   mov    0x30(%eax),%eax
-                        * ff 10      call   *(%eax)
-                        */
+               /* call *%reg */
                if (IS_REX (code [4]))
                        rex = code [4];
                reg = amd64_modrm_rm (code [6]);
@@ -5932,10 +5860,14 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                        else
                                                item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
                                }
-                               if (vtable_is_32bit)
-                                       item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
-                               else
+                               if (item->has_target_code) {
                                        item->chunk_size += MOV_REG_IMM_SIZE;
+                               } else {
+                                       if (vtable_is_32bit)
+                                               item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
+                                       else
+                                               item->chunk_size += MOV_REG_IMM_SIZE;
+                               }
                                item->chunk_size += BR_SMALL_SIZE + JUMP_REG_SIZE;
                        } else {
                                if (fail_tramp) {
@@ -5965,14 +5897,16 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
        if (fail_tramp)
                code = mono_method_alloc_generic_virtual_thunk (domain, size);
        else
-               code = mono_code_manager_reserve (domain->code_mp, size);
+               code = mono_domain_code_reserve (domain, size);
        start = code;
        for (i = 0; i < count; ++i) {
                MonoIMTCheckItem *item = imt_entries [i];
                item->code_target = code;
                if (item->is_equals) {
-                       if (item->check_target_idx) {
-                               if (!item->compare_done) {
+                       gboolean fail_case = !item->check_target_idx && fail_tramp;
+
+                       if (item->check_target_idx || fail_case) {
+                               if (!item->compare_done || fail_case) {
                                        if (amd64_is_imm32 (item->key))
                                                amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
                                        else {
@@ -5983,57 +5917,46 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                item->jmp_code = code;
                                amd64_branch8 (code, X86_CC_NE, 0, FALSE);
                                /* See the comment below about R10 */
-                               if (fail_tramp) {
+                               if (item->has_target_code) {
                                        amd64_mov_reg_imm (code, AMD64_R10, item->value.target_code);
                                        amd64_jump_reg (code, AMD64_R10);
                                } else {
                                        amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
                                        amd64_jump_membase (code, AMD64_R10, 0);
                                }
-                       } else {
-                               if (fail_tramp) {
-                                       if (amd64_is_imm32 (item->key))
-                                               amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
-                                       else {
-                                               amd64_mov_reg_imm (code, AMD64_R10, item->key);
-                                               amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
-                                       }
-                                       item->jmp_code = code;
-                                       amd64_branch8 (code, X86_CC_NE, 0, FALSE);
-                                       amd64_mov_reg_imm (code, AMD64_R10, item->value.target_code);
-                                       amd64_jump_reg (code, AMD64_R10);
+
+                               if (fail_case) {
                                        amd64_patch (item->jmp_code, code);
                                        amd64_mov_reg_imm (code, AMD64_R10, fail_tramp);
                                        amd64_jump_reg (code, AMD64_R10);
                                        item->jmp_code = NULL;
-                                               
-                               } else {
-                                       /* enable the commented code to assert on wrong method */
+                               }
+                       } else {
+                               /* enable the commented code to assert on wrong method */
 #if 0
-                                       if (amd64_is_imm32 (item->key))
-                                               amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
-                                       else {
-                                               amd64_mov_reg_imm (code, AMD64_R10, item->key);
-                                               amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
-                                       }
-                                       item->jmp_code = code;
-                                       amd64_branch8 (code, X86_CC_NE, 0, FALSE);
-                                       /* See the comment below about R10 */
-                                       amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
-                                       amd64_jump_membase (code, AMD64_R10, 0);
-                                       amd64_patch (item->jmp_code, code);
-                                       amd64_breakpoint (code);
-                                       item->jmp_code = NULL;
+                               if (amd64_is_imm32 (item->key))
+                                       amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
+                               else {
+                                       amd64_mov_reg_imm (code, AMD64_R10, item->key);
+                                       amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
+                               }
+                               item->jmp_code = code;
+                               amd64_branch8 (code, X86_CC_NE, 0, FALSE);
+                               /* See the comment below about R10 */
+                               amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
+                               amd64_jump_membase (code, AMD64_R10, 0);
+                               amd64_patch (item->jmp_code, code);
+                               amd64_breakpoint (code);
+                               item->jmp_code = NULL;
 #else
-                                       /* We're using R10 here because R11
-                                          needs to be preserved.  R10 needs
-                                          to be preserved for calls which
-                                          require a runtime generic context,
-                                          but interface calls don't. */
-                                       amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
-                                       amd64_jump_membase (code, AMD64_R10, 0);
+                               /* We're using R10 here because R11
+                                  needs to be preserved.  R10 needs
+                                  to be preserved for calls which
+                                  require a runtime generic context,
+                                  but interface calls don't. */
+                               amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
+                               amd64_jump_membase (code, AMD64_R10, 0);
 #endif
-                               }
                        }
                } else {
                        if (amd64_is_imm32 (item->key))
@@ -6078,12 +6001,6 @@ mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSha
 {
        return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), (gssize*)regs, NULL);
 }
-
-void
-mono_arch_emit_imt_argument (MonoCompile *cfg, MonoCallInst *call, MonoInst *imt_arg)
-{
-       /* Done by the implementation of the CALL_MEMBASE opcodes */
-}
 #endif
 
 MonoVTable*