2009-05-26 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mono / mini / mini-x86.c
index a1d6b42cc0b5d6be8a54ec7f43bdcf4527845155..e8239ff80f79a0ba985129bde20dc2e29da42fde 100644 (file)
@@ -1105,6 +1105,108 @@ emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
        MONO_EMIT_NEW_BIALU_IMM (cfg, OP_X86_PUSH_IMM, -1, -1, tmp_sig);
 }
 
+#ifdef ENABLE_LLVM
+LLVMCallInfo*
+mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
+{
+       int i, n;
+       CallInfo *cinfo;
+       ArgInfo *ainfo;
+       int j;
+       LLVMCallInfo *linfo;
+
+       n = sig->param_count + sig->hasthis;
+
+       cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
+
+       linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
+
+       /*
+        * LLVM always uses the native ABI while we use our own ABI, the
+        * only difference is the handling of vtypes:
+        * - we only pass/receive them in registers in some cases, and only 
+        *   in 1 or 2 integer registers.
+        */
+       if (cinfo->ret.storage == ArgValuetypeInReg) {
+               if (sig->pinvoke) {
+                       cfg->exception_message = g_strdup ("pinvoke + vtypes");
+                       cfg->disable_llvm = TRUE;
+                       return linfo;
+               }
+
+               cfg->exception_message = g_strdup ("vtype ret in call");
+               cfg->disable_llvm = TRUE;
+               /*
+               linfo->ret.storage = LLVMArgVtypeInReg;
+               for (j = 0; j < 2; ++j)
+                       linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
+               */
+       }
+
+       if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage == ArgInIReg) {
+               /* Vtype returned using a hidden argument */
+               linfo->ret.storage = LLVMArgVtypeRetAddr;
+       }
+
+       if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage != ArgInIReg) {
+               // FIXME:
+               cfg->exception_message = g_strdup ("vtype ret in call");
+               cfg->disable_llvm = TRUE;
+       }
+
+       for (i = 0; i < n; ++i) {
+               ainfo = cinfo->args + i;
+
+               linfo->args [i].storage = LLVMArgNone;
+
+               switch (ainfo->storage) {
+               case ArgInIReg:
+                       linfo->args [i].storage = LLVMArgInIReg;
+                       break;
+               case ArgInDoubleSSEReg:
+               case ArgInFloatSSEReg:
+                       linfo->args [i].storage = LLVMArgInFPReg;
+                       break;
+               case ArgOnStack:
+                       if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
+                               linfo->args [i].storage = LLVMArgVtypeByVal;
+                       } else {
+                               linfo->args [i].storage = LLVMArgInIReg;
+                               if (!sig->params [i - sig->hasthis]->byref) {
+                                       if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4) {
+                                               linfo->args [i].storage = LLVMArgInFPReg;
+                                       } else if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8) {
+                                               linfo->args [i].storage = LLVMArgInFPReg;
+                                       }
+                               }
+                       }
+                       break;
+               case ArgValuetypeInReg:
+                       if (sig->pinvoke) {
+                               cfg->exception_message = g_strdup ("pinvoke + vtypes");
+                               cfg->disable_llvm = TRUE;
+                               return linfo;
+                       }
+
+                       cfg->exception_message = g_strdup ("vtype arg");
+                       cfg->disable_llvm = TRUE;
+                       /*
+                       linfo->args [i].storage = LLVMArgVtypeInReg;
+                       for (j = 0; j < 2; ++j)
+                               linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
+                       */
+                       break;
+               default:
+                       cfg->exception_message = g_strdup ("ainfo->storage");
+                       cfg->disable_llvm = TRUE;
+                       break;
+               }
+       }
+
+       return linfo;
+}
+#endif
+
 void
 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 {
@@ -1131,26 +1233,15 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
        }
 
        if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
-               MonoInst *vtarg;
-
                if (cinfo->ret.storage == ArgValuetypeInReg) {
-                       if (cinfo->ret.pair_storage [0] == ArgInIReg && cinfo->ret.pair_storage [1] == ArgNone) {
-                               /*
-                                * Tell the JIT to use a more efficient calling convention: call using
-                                * OP_CALL, compute the result location after the call, and save the 
-                                * result there.
-                                */
-                               call->vret_in_reg = TRUE;
-                       } else {
-                               /*
-                                * The valuetype is in EAX:EDX after the call, needs to be copied to
-                                * the stack. Save the address here, so the call instruction can
-                                * access it.
-                                */
-                               MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
-                               vtarg->sreg1 = call->vret_var->dreg;
-                               MONO_ADD_INS (cfg->cbb, vtarg);
-                       }
+                       /*
+                        * Tell the JIT to use a more efficient calling convention: call using
+                        * OP_CALL, compute the result location after the call, and save the 
+                        * result there.
+                        */
+                       call->vret_in_reg = TRUE;
+                       if (call->vret_var)
+                               NULLIFY_INS (call->vret_var);
                }
        }
 
@@ -1297,14 +1388,22 @@ mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
 
        if (!ret->byref) {
                if (ret->type == MONO_TYPE_R4) {
+                       if (COMPILE_LLVM (cfg))
+                               MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
                        /* Nothing to do */
                        return;
                } else if (ret->type == MONO_TYPE_R8) {
+                       if (COMPILE_LLVM (cfg))
+                               MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
                        /* Nothing to do */
                        return;
                } else if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
-                       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, val->dreg + 1);
-                       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, val->dreg + 2);
+                       if (COMPILE_LLVM (cfg))
+                               MONO_EMIT_NEW_UNALU (cfg, OP_LMOVE, cfg->ret->dreg, val->dreg);
+                       else {
+                               MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, val->dreg + 1);
+                               MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, val->dreg + 2);
+                       }
                        return;
                }
        }
@@ -1350,7 +1449,7 @@ enum {
 };
 
 void*
-mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
+mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
 {
        guchar *code = p;
        int arg_size = 0, save_mode = SAVE_NONE;
@@ -1456,28 +1555,15 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
 }
 
 #define EMIT_COND_BRANCH(ins,cond,sign) \
-if (ins->flags & MONO_INST_BRLABEL) { \
-        if (ins->inst_i0->inst_c0) { \
-               x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
-        } else { \
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
-               if ((cfg->opt & MONO_OPT_BRANCH) && \
-                    x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
-                       x86_branch8 (code, cond, 0, sign); \
-                else \
-                       x86_branch32 (code, cond, 0, sign); \
-        } \
+if (ins->inst_true_bb->native_offset) { \
+       x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
 } else { \
-        if (ins->inst_true_bb->native_offset) { \
-               x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
-        } else { \
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
-               if ((cfg->opt & MONO_OPT_BRANCH) && \
-                    x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
-                       x86_branch8 (code, cond, 0, sign); \
-                else \
-                       x86_branch32 (code, cond, 0, sign); \
-        } \
+       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
+       if ((cfg->opt & MONO_OPT_BRANCH) && \
+            x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
+               x86_branch8 (code, cond, 0, sign); \
+        else \
+               x86_branch32 (code, cond, 0, sign); \
 }
 
 /*  
@@ -1865,9 +1951,6 @@ mono_emit_stack_alloc (guchar *code, MonoInst* tree)
 static guint8*
 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
 {
-       CallInfo *cinfo;
-       int quad;
-
        /* Move return value to the target register */
        switch (ins->opcode) {
        case OP_CALL:
@@ -1876,42 +1959,6 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
                if (ins->dreg != X86_EAX)
                        x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
                break;
-       case OP_VCALL:
-       case OP_VCALL_REG:
-       case OP_VCALL_MEMBASE:
-       case OP_VCALL2:
-       case OP_VCALL2_REG:
-       case OP_VCALL2_MEMBASE:
-               cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
-               if (cinfo->ret.storage == ArgValuetypeInReg) {
-                       /* Pop the destination address from the stack */
-                       x86_pop_reg (code, X86_ECX);
-                       
-                       for (quad = 0; quad < 2; quad ++) {
-                               switch (cinfo->ret.pair_storage [quad]) {
-                               case ArgInIReg:
-                                       g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
-                                       x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
-                                       break;
-                               case ArgNone:
-                                       break;
-                               default:
-                                       g_assert_not_reached ();
-                               }
-                       }
-               }
-               break;
-       case OP_FCALL: {
-               MonoCallInst *call = (MonoCallInst*)ins;
-               if (call->method && !mono_method_signature (call->method)->ret->byref && mono_method_signature (call->method)->ret->type == MONO_TYPE_R4) {
-                       /* Avoid some precision issues by saving/reloading the return value */
-                       x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
-                       x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
-                       x86_fld_membase (code, X86_ESP, 0, FALSE);
-                       x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
-               }
-               break;
-       }
        default:
                break;
        }
@@ -2749,6 +2796,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_VOIDCALL_MEMBASE:
                case OP_CALL_MEMBASE:
                        call = (MonoCallInst*)ins;
+
+                       /* 
+                        * Emit a few nops to simplify get_vcall_slot ().
+                        */
+                       x86_nop (code);
+                       x86_nop (code);
+                       x86_nop (code);
+
                        x86_call_membase (code, ins->sreg1, ins->inst_offset);
                        if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
                                if (call->stack_usage == 4)
@@ -2859,28 +2914,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ins->inst_c0 = code - cfg->native_code;
                        break;
                case OP_BR:
-                       if (ins->flags & MONO_INST_BRLABEL) {
-                               if (ins->inst_i0->inst_c0) {
-                                       x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
-                               } else {
-                                       mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
-                                       if ((cfg->opt & MONO_OPT_BRANCH) &&
-                                           x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
-                                               x86_jump8 (code, 0);
-                                       else 
-                                               x86_jump32 (code, 0);
-                               }
+                       if (ins->inst_target_bb->native_offset) {
+                               x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
                        } else {
-                               if (ins->inst_target_bb->native_offset) {
-                                       x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
-                               } else {
-                                       mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
-                                       if ((cfg->opt & MONO_OPT_BRANCH) &&
-                                           x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
-                                               x86_jump8 (code, 0);
-                                       else 
-                                               x86_jump32 (code, 0);
-                               } 
+                               mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
+                               if ((cfg->opt & MONO_OPT_BRANCH) &&
+                                   x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
+                                       x86_jump8 (code, 0);
+                               else 
+                                       x86_jump32 (code, 0);
                        }
                        break;
                case OP_BR_REG:
@@ -3021,7 +3063,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_LOADR4_MEMBASE:
                        x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
                        break;
-               case OP_ICONV_TO_R4: /* FIXME: change precision */
+               case OP_ICONV_TO_R4:
+                       x86_push_reg (code, ins->sreg1);
+                       x86_fild_membase (code, X86_ESP, 0, FALSE);
+                       /* Change precision */
+                       x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
+                       x86_fld_membase (code, X86_ESP, 0, FALSE);
+                       x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
+                       break;
                case OP_ICONV_TO_R8:
                        x86_push_reg (code, ins->sreg1);
                        x86_fild_membase (code, X86_ESP, 0, FALSE);
@@ -3080,6 +3129,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_push_reg (code, ins->sreg2);
                        x86_push_reg (code, ins->sreg1);
                        x86_fild_membase (code, X86_ESP, 0, TRUE);
+                       /* Change precision */
+                       x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
+                       x86_fld_membase (code, X86_ESP, 0, TRUE);
                        x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
                        break;
                case OP_LCONV_TO_R4_2:
@@ -3097,12 +3149,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        guint8 *br;
 
                        /* load 64bit integer to FP stack */
-                       x86_push_imm (code, 0);
                        x86_push_reg (code, ins->sreg2);
                        x86_push_reg (code, ins->sreg1);
                        x86_fild_membase (code, X86_ESP, 0, TRUE);
-                       /* store as 80bit FP value */
-                       x86_fst80_membase (code, X86_ESP, 0);
                        
                        /* test if lreg is negative */
                        x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
@@ -3110,14 +3159,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        
                        /* add correction constant mn */
                        x86_fld80_mem (code, mn);
-                       x86_fld80_membase (code, X86_ESP, 0);
                        x86_fp_op_reg (code, X86_FADD, 1, TRUE);
-                       x86_fst80_membase (code, X86_ESP, 0);
 
                        x86_patch (br, code);
 
-                       x86_fld80_membase (code, X86_ESP, 0);
-                       x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
+                       /* Change precision */
+                       x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
+                       x86_fld_membase (code, X86_ESP, 0, TRUE);
+
+                       x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
 
                        break;
                }
@@ -3647,8 +3697,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        break;
                }
-               case OP_ATOMIC_EXCHANGE_I4:
-               case OP_ATOMIC_CAS_IMM_I4: {
+               case OP_ATOMIC_EXCHANGE_I4: {
                        guchar *br[2];
                        int sreg2 = ins->sreg2;
                        int breg = ins->inst_basereg;
@@ -3661,39 +3710,44 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        
                        /* We need the EAX reg for the cmpxchg */
                        if (ins->sreg2 == X86_EAX) {
-                               x86_push_reg (code, X86_EDX);
-                               x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
-                               sreg2 = X86_EDX;
+                               sreg2 = (breg == X86_EDX) ? X86_EBX : X86_EDX;
+                               x86_push_reg (code, sreg2);
+                               x86_mov_reg_reg (code, sreg2, X86_EAX, 4);
                        }
 
                        if (breg == X86_EAX) {
-                               x86_push_reg (code, X86_ESI);
-                               x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
-                               breg = X86_ESI;
+                               breg = (sreg2 == X86_ESI) ? X86_EDI : X86_ESI;
+                               x86_push_reg (code, breg);
+                               x86_mov_reg_reg (code, breg, X86_EAX, 4);
                        }
 
-                       if (ins->opcode == OP_ATOMIC_CAS_IMM_I4) {
-                               x86_mov_reg_imm (code, X86_EAX, ins->backend.data);
+                       x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
 
-                               x86_prefix (code, X86_LOCK_PREFIX);
-                               x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
-                       } else {
-                               x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
-
-                               br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
-                               x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
-                               br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
-                               x86_patch (br [1], br [0]);
-                       }
+                       br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
+                       x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
+                       br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
+                       x86_patch (br [1], br [0]);
 
                        if (breg != ins->inst_basereg)
-                               x86_pop_reg (code, X86_ESI);
+                               x86_pop_reg (code, breg);
 
                        if (ins->sreg2 != sreg2)
-                               x86_pop_reg (code, X86_EDX);
+                               x86_pop_reg (code, sreg2);
 
                        break;
                }
+               case OP_ATOMIC_CAS_I4: {
+                       g_assert (ins->sreg3 == X86_EAX);
+                       g_assert (ins->sreg1 != X86_EAX);
+                       g_assert (ins->sreg1 != ins->sreg2);
+
+                       x86_prefix (code, X86_LOCK_PREFIX);
+                       x86_cmpxchg_membase_reg (code, ins->sreg1, ins->inst_offset, ins->sreg2);
+
+                       if (ins->dreg != X86_EAX)
+                               x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
+                       break;
+               }
 #ifdef MONO_ARCH_SIMD_INTRINSICS
                case OP_ADDPS:
                        x86_sse_alu_ps_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
@@ -4999,7 +5053,7 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                        x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
                                item->jmp_code = code;
                                x86_branch8 (code, X86_CC_NE, 0, FALSE);
-                               if (fail_tramp)
+                               if (item->has_target_code)
                                        x86_jump_code (code, item->value.target_code);
                                else
                                        x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
@@ -5008,7 +5062,10 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                        x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
                                        item->jmp_code = code;
                                        x86_branch8 (code, X86_CC_NE, 0, FALSE);
-                                       x86_jump_code (code, item->value.target_code);
+                                       if (item->has_target_code)
+                                               x86_jump_code (code, item->value.target_code);
+                                       else
+                                               x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
                                        x86_patch (item->jmp_code, code);
                                        x86_jump_code (code, fail_tramp);
                                        item->jmp_code = NULL;
@@ -5019,7 +5076,10 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                        item->jmp_code = code;
                                        x86_branch8 (code, X86_CC_NE, 0, FALSE);
 #endif
-                                       x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
+                                       if (item->has_target_code)
+                                               x86_jump_code (code, item->value.target_code);
+                                       else
+                                               x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
 #if ENABLE_WRONG_METHOD_CHECK
                                        x86_patch (item->jmp_code, code);
                                        x86_breakpoint (code);
@@ -5273,95 +5333,64 @@ mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
 
        *displacement = 0;
 
-       /* go to the start of the call instruction
-        *
-        * address_byte = (m << 6) | (o << 3) | reg
-        * call opcode: 0xff address_byte displacement
-        * 0xff m=1,o=2 imm8
-        * 0xff m=2,o=2 imm32
-        */
        code -= 6;
 
        /* 
         * A given byte sequence can match more than case here, so we have to be
         * really careful about the ordering of the cases. Longer sequences
         * come first.
-        * Some of the rules are only needed because the imm in the mov could 
-        * match the
-        * code [2] == 0xe8 case below.
+        * There are two types of calls:
+        * - direct calls: 0xff address_byte 8/32 bits displacement
+        * - indirect calls: nop nop nop <call>
+        * The nops make sure we don't confuse the instruction preceeding an indirect
+        * call with a direct call.
         */
-       if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
-               /*
-                * This is an interface call
-                * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
-                * ff 10                   call   *(%eax)
-                */
-               reg = x86_modrm_rm (code [5]);
-               disp = 0;
-#ifdef MONO_ARCH_HAVE_IMT
-       } else if ((code [-2] == 0xba) && (code [3] == 0xff) && (x86_modrm_mod (code [4]) == 1) && (x86_modrm_reg (code [4]) == 2) && ((signed char)code [5] < 0)) {
-               /* IMT-based interface calls: with MONO_ARCH_IMT_REG == edx
-                * ba 14 f8 28 08          mov    $0x828f814,%edx
-                * ff 50 fc                call   *0xfffffffc(%eax)
-                */
-               reg = code [4] & 0x07;
-               disp = (signed char)code [5];
-#endif
-       } else if ((code [-2] >= 0xb8) && (code [-2] < 0xb8 + 8) && (code [3] == 0xff) && (x86_modrm_reg (code [4]) == 0x2) && (x86_modrm_mod (code [4]) == 0x1)) {
-               /* 
-                * ba e8 e8 e8 e8     mov    $0xe8e8e8e8,%edx
-                * ff 50 60              callq  *0x60(%eax)
-                */
-               reg = x86_modrm_rm (code [4]);
-               disp = *(gint8*)(code + 5);
-       } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
+       if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
                reg = code [4] & 0x07;
                disp = (signed char)code [5];
-       } else {
-               if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
-                       reg = code [1] & 0x07;
-                       disp = *((gint32*)(code + 2));
-               } else if ((code [1] == 0xe8)) {
-                       return NULL;
-               } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
-                       /*
-                        * This is a interface call
-                        * 8b 40 30   mov    0x30(%eax),%eax
-                        * ff 10      call   *(%eax)
-                        */
-                       disp = 0;
-                       reg = code [5] & 0x07;
-               }
-               else
+       } else if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
+               reg = code [1] & 0x07;
+               disp = *((gint32*)(code + 2));
+       } else if ((code [1] == 0xe8)) {
                        return NULL;
+       } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
+               /*
+                * This is a interface call
+                * 8b 40 30   mov    0x30(%eax),%eax
+                * ff 10      call   *(%eax)
+                */
+               disp = 0;
+               reg = code [5] & 0x07;
        }
+       else
+               return NULL;
 
        *displacement = disp;
        return regs [reg];
 }
 
-gpointer*
-mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
-{
-       gpointer vt;
-       int displacement;
-       vt = mono_arch_get_vcall_slot (code, regs, &displacement);
-       if (!vt)
-               return NULL;
-       return (gpointer*)((char*)vt + displacement);
-}
-
 gpointer
 mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig,
                gssize *regs, guint8 *code)
 {
        guint32 esp = regs [X86_ESP];
-       CallInfo *cinfo;
+       CallInfo *cinfo = NULL;
        gpointer res;
+       int offset;
 
-       if (!gsctx && code)
-               gsctx = mono_get_generic_context_from_code (code);
-       cinfo = get_call_info (gsctx, NULL, sig, FALSE);
+       /* 
+        * Avoid expensive calls to get_generic_context_from_code () + get_call_info 
+        * if possible.
+        */
+       if (MONO_TYPE_ISSTRUCT (sig->ret)) {
+               if (!gsctx && code)
+                       gsctx = mono_get_generic_context_from_code (code);
+               cinfo = get_call_info (gsctx, NULL, sig, FALSE);
+
+               offset = cinfo->args [0].offset;
+       } else {
+               offset = 0;
+       }
 
        /*
         * The stack looks like:
@@ -5371,8 +5400,9 @@ mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSi
         * <return addr>
         * <4 pointers pushed by mono_arch_create_trampoline_code ()>
         */
-       res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
-       g_free (cinfo);
+       res = (((MonoObject**)esp) [5 + (offset / 4)]);
+       if (cinfo)
+               g_free (cinfo);
        return res;
 }
 
@@ -5508,7 +5538,7 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
        MonoInst *fconv;
        int dreg, src_opcode;
 
-       if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD))
+       if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD) || COMPILE_LLVM (cfg))
                return;
 
        switch (src_opcode = ins->opcode) {
@@ -5544,11 +5574,25 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
        ins->backend.source_opcode = src_opcode;
 }
 
+#endif /* #ifdef MONO_ARCH_SIMD_INTRINSICS */
+
 void
 mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins)
 {
        MonoInst *ins;
        int vreg;
+
+       if (long_ins->opcode == OP_LNEG) {
+               ins = long_ins;
+               MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 1, ins->sreg1 + 1);
+               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ADC_IMM, ins->dreg + 2, ins->sreg1 + 2, 0);
+               MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 2, ins->dreg + 2);
+               NULLIFY_INS (ins);
+               return;
+       }
+
+#ifdef MONO_ARCH_SIMD_INTRINSICS
+
        if (!(cfg->opt & MONO_OPT_SIMD))
                return;
        
@@ -5636,6 +5680,6 @@ mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins)
                long_ins->opcode = OP_NOP;
                break;
        }
+#endif /* MONO_ARCH_SIMD_INTRINSICS */
 }
-#endif