2007-07-09 Mark Probst <mark.probst@gmail.com>
[mono.git] / mono / mini / mini-amd64.c
index 418b757ec52b4463fb509ff3d874fe81b2aea01e..29b6f3b029b8c854dfd1207c05b255c6205f9bf5 100644 (file)
@@ -44,6 +44,8 @@ static gboolean use_sse2 = !MONO_ARCH_USE_FPSTACK;
 
 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
 
+#define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
+
 #ifdef PLATFORM_WIN32
 /* Under windows, the default pinvoke calling convention is stdcall */
 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
@@ -1749,6 +1751,7 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                case CEE_CONV_I4:
                case CEE_CONV_U4:
                case OP_MOVE:
+               case OP_FMOVE:
                        /*
                         * Removes:
                         *
@@ -1757,6 +1760,8 @@ peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ins->dreg == ins->sreg1) {
                                if (last_ins)
                                        last_ins->next = ins->next;                             
+                               else
+                                       bb->code = ins->next;
                                ins = ins->next;
                                continue;
                        }
@@ -1794,8 +1799,8 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_I8CONST:
                        /* reg = 0 -> XOR (reg, reg) */
                        /* XOR sets cflags on x86, so we cant do it always */
-                       if (ins->inst_c0 == 0 && (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode))) {
-                               ins->opcode = CEE_XOR;
+                       if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
+                               ins->opcode = OP_LXOR;
                                ins->sreg1 = ins->dreg;
                                ins->sreg2 = ins->dreg;
                                /* Fall through */
@@ -1803,6 +1808,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        else
                                break;
                case CEE_XOR:
+               case OP_LXOR:
                        if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
                                MonoInst *ins2;
 
@@ -1827,6 +1833,14 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                                }
                        }
                        break;
+               case OP_IADD_IMM:
+                       if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
+                               ins->opcode = OP_X86_INC_REG;
+                       break;
+               case OP_ISUB_IMM:
+                       if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
+                               ins->opcode = OP_X86_DEC_REG;
+                       break;
                case OP_MUL_IMM: 
                        /* remove unnecessary multiplication with 1 */
                        if (ins->inst_imm == 1) {
@@ -1995,6 +2009,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                case CEE_CONV_I4:
                case CEE_CONV_U4:
                case OP_MOVE:
+               case OP_FMOVE:
                        /*
                         * Removes:
                         *
@@ -2002,7 +2017,9 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                         */
                        if (ins->dreg == ins->sreg1) {
                                if (last_ins)
-                                       last_ins->next = ins->next;                             
+                                       last_ins->next = ins->next;
+                               else
+                                       bb->code = ins->next;
                                ins = ins->next;
                                continue;
                        }
@@ -2798,23 +2815,52 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                case CEE_DIV:
                case OP_LDIV:
-                       amd64_cdq (code);
-                       amd64_div_reg (code, ins->sreg2, TRUE);
-                       break;
-               case CEE_DIV_UN:
-               case OP_LDIV_UN:
-                       amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
-                       amd64_div_reg (code, ins->sreg2, FALSE);
-                       break;
                case CEE_REM:
                case OP_LREM:
-                       amd64_cdq (code);
-                       amd64_div_reg (code, ins->sreg2, TRUE);
+                       /* Regalloc magic makes the div/rem cases the same */
+                       if (ins->sreg2 == AMD64_RDX) {
+                               amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
+                               amd64_cdq (code);
+                               amd64_div_membase (code, AMD64_RSP, -8, TRUE);
+                       } else {
+                               amd64_cdq (code);
+                               amd64_div_reg (code, ins->sreg2, TRUE);
+                       }
                        break;
+               case CEE_DIV_UN:
+               case OP_LDIV_UN:
                case CEE_REM_UN:
                case OP_LREM_UN:
-                       amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
-                       amd64_div_reg (code, ins->sreg2, FALSE);
+                       if (ins->sreg2 == AMD64_RDX) {
+                               amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
+                               amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
+                               amd64_div_membase (code, AMD64_RSP, -8, FALSE);
+                       } else {
+                               amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
+                               amd64_div_reg (code, ins->sreg2, FALSE);
+                       }
+                       break;
+               case OP_IDIV:
+               case OP_IREM:
+                       if (ins->sreg2 == AMD64_RDX) {
+                               amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
+                               amd64_cdq_size (code, 4);
+                               amd64_div_membase_size (code, AMD64_RSP, -8, TRUE, 4);
+                       } else {
+                               amd64_cdq_size (code, 4);
+                               amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
+                       }
+                       break;
+               case OP_IDIV_UN:
+               case OP_IREM_UN:
+                       if (ins->sreg2 == AMD64_RDX) {
+                               amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
+                               amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
+                               amd64_div_membase_size (code, AMD64_RSP, -8, FALSE, 4);
+                       } else {
+                               amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
+                               amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
+                       }
                        break;
                case OP_LMUL_OVF:
                        amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
@@ -2828,6 +2874,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
                        break;
                case CEE_XOR:
+               case OP_LXOR:
                        amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
                        break;
                case OP_XOR_IMM:
@@ -2995,22 +3042,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
                        break;
                }
-               case OP_IDIV:
-                       amd64_cdq_size (code, 4);
-                       amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
-                       break;
-               case OP_IDIV_UN:
-                       amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
-                       amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
-                       break;
-               case OP_IREM:
-                       amd64_cdq_size (code, 4);
-                       amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
-                       break;
-               case OP_IREM_UN:
-                       amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
-                       amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
-                       break;
                case OP_ICOMPARE:
                        amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
                        break;
@@ -4259,6 +4290,10 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        gint32 lmf_offset = cfg->arch.lmf_offset;
 
        cfg->code_size =  MAX (((MonoMethodNormal *)method)->header->code_size * 4, 512);
+
+       if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
+               cfg->code_size += 512;
+
        code = cfg->native_code = g_malloc (cfg->code_size);
 
        /* Amount of stack space allocated by register saving code */
@@ -4821,10 +4856,15 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
 
                        pos = cfg->native_code + patch_info->ip.i;
 
-                       if (use_sse2)
-                               *(guint32*)(pos + 4) = (guint8*)code - pos - 8;
-                       else
+
+                       if (use_sse2) {
+                               if (IS_REX (pos [1]))
+                                       *(guint32*)(pos + 5) = (guint8*)code - pos - 9;
+                               else
+                                       *(guint32*)(pos + 4) = (guint8*)code - pos - 8;
+                       } else {
                                *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
+                       }
 
                        if (patch_info->type == MONO_PATCH_INFO_R8) {
                                *(double*)code = *(double*)patch_info->data.target;
@@ -5032,8 +5072,6 @@ mono_arch_is_inst_imm (gint64 imm)
        return amd64_is_imm32 (imm);
 }
 
-#define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
-
 /*
  * Determine whenever the trap whose info is in SIGINFO is caused by
  * integer overflow.
@@ -5199,26 +5237,62 @@ mono_arch_get_vcall_slot_addr (guint8* code, gpointer *regs)
        return (gpointer)(((guint64)(regs [reg])) + disp);
 }
 
-gpointer*
-mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
+gpointer
+mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
 {
-       guint32 reg;
-       guint32 disp;
+       if (MONO_TYPE_ISSTRUCT (sig->ret))
+               return (gpointer)regs [AMD64_RSI];
+       else
+               return (gpointer)regs [AMD64_RDI];
+}
+
+gpointer
+mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
+{
+       guint8 *code, *start;
+       MonoDomain *domain = mono_domain_get ();
+       int i;
+
+       /* FIXME: Support more cases */
+       if (MONO_TYPE_ISSTRUCT (sig->ret))
+               return NULL;
 
-       code -= 10;
+       if (has_target) {
+               mono_domain_lock (domain);
+               start = code = mono_code_manager_reserve (domain->code_mp, 64);
+               mono_domain_unlock (domain);
 
-       if (IS_REX (code [0]) && (code [1] == 0x8b) && (code [3] == 0x48) && (code [4] == 0x8b) && (code [5] == 0x40) && (code [7] == 0x48) && (code [8] == 0xff) && (code [9] == 0xd0)) {
-               /* mov REG, %rax; mov <OFFSET>(%rax), %rax; call *%rax */
-               reg = amd64_rex_b (code [0]) + amd64_modrm_rm (code [2]);
-               disp = code [6];
+               /* Replace the this argument with the target */
+               amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RDI, 8);
+               amd64_mov_reg_membase (code, AMD64_RDI, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, target), 8);
+               amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
 
-               if (reg == AMD64_RAX)
+               g_assert ((code - start) < 64);
+       } else {
+               for (i = 0; i < sig->param_count; ++i)
+                       if (!mono_is_regsize_var (sig->params [i]))
+                               return NULL;
+               if (sig->param_count > 4)
                        return NULL;
-               else
-                       return (gpointer*)(((guint64)(regs [reg])) + disp);
+
+               mono_domain_lock (domain);
+               start = code = mono_code_manager_reserve (domain->code_mp, 64);
+               mono_domain_unlock (domain);
+
+               if (sig->param_count == 0) {
+                       amd64_jump_membase (code, AMD64_RDI, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+               } else {
+                       /* We have to shift the arguments left */
+                       amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RDI, 8);
+                       for (i = 0; i < sig->param_count; ++i)
+                               amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
+
+                       amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
+               }
+               g_assert ((code - start) < 64);
        }
 
-       return NULL;
+       return start;
 }
 
 /*