2004-05-13 Patrik Torstensson
[mono.git] / mono / mini / mini-x86.c
index e83007a73343d7821cfb7bf76a80545983288267..db2f3a206060f99f55f68cf63aec44e9c342f572 100644 (file)
@@ -217,6 +217,7 @@ is_regsize_var (MonoType *t) {
        case MONO_TYPE_U4:
        case MONO_TYPE_I:
        case MONO_TYPE_U:
+       case MONO_TYPE_PTR:
                return TRUE;
        case MONO_TYPE_OBJECT:
        case MONO_TYPE_STRING:
@@ -756,25 +757,58 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                                if (ins->dreg != ins->sreg1) {
                                        ins->opcode = OP_MOVE;
                                } else {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       last_ins->next = ins->next;
+                                       ins = ins->next;
                                        continue;
                                }
                        }
                        break;
                case OP_COMPARE_IMM:
-                       /* OP_COMPARE_IMM (reg, 0) --> OP_X86_TEST_NULL (reg) */
+                       /* OP_COMPARE_IMM (reg, 0) 
+                        * --> 
+                        * OP_X86_TEST_NULL (reg) 
+                        */
                        if (ins->inst_imm == 0 && ins->next &&
                            (ins->next->opcode == CEE_BEQ || ins->next->opcode == CEE_BNE_UN ||
                             ins->next->opcode == OP_CEQ)) {
                                ins->opcode = OP_X86_TEST_NULL;
                        }     
                        break;
+               case OP_X86_COMPARE_MEMBASE_IMM:
+                       /* 
+                        * OP_STORE_MEMBASE_REG reg, offset(basereg)
+                        * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
+                        * -->
+                        * OP_STORE_MEMBASE_REG reg, offset(basereg)
+                        * OP_COMPARE_IMM reg, imm
+                        *
+                        * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
+                        */
+                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
+                           ins->inst_basereg == last_ins->inst_destbasereg &&
+                           ins->inst_offset == last_ins->inst_offset) {
+                                       ins->opcode = OP_COMPARE_IMM;
+                                       ins->sreg1 = last_ins->sreg1;
+
+                                       /* check if we can remove cmp reg,0 with test null */
+                                       if (ins->inst_imm == 0 && ins->next &&
+                                               (ins->next->opcode == CEE_BEQ || ins->next->opcode == CEE_BNE_UN ||
+                                               ins->next->opcode == OP_CEQ)) {
+                                               ins->opcode = OP_X86_TEST_NULL;
+                                       }     
+                               }
+
+                       break;
                case OP_LOAD_MEMBASE:
                case OP_LOADI4_MEMBASE:
                        /* 
-                        * OP_STORE_MEMBASE_REG reg, offset(basereg) 
-                        * OP_LOAD_MEMBASE offset(basereg), reg
+                        * Note: if reg1 = reg2 the load op is removed
+                        *
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
+                        * OP_LOAD_MEMBASE offset(basereg), reg2
+                        * -->
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+                        * OP_MOVE reg1, reg2
                         */
                        if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
                                         || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
@@ -792,6 +826,8 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        /* 
                         * Note: reg1 must be different from the basereg in the second load
+                        * Note: if reg1 = reg2 is equal then second load is removed
+                        *
                         * OP_LOAD_MEMBASE offset(basereg), reg1
                         * OP_LOAD_MEMBASE offset(basereg), reg2
                         * -->
@@ -836,9 +872,15 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_LOADU1_MEMBASE:
                case OP_LOADI1_MEMBASE:
-                 /*
-                  * FIXME: Missing explanation
-                  */
+                       /* 
+                        * Note: if reg1 = reg2 the load op is removed
+                        *
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
+                        * OP_LOAD_MEMBASE offset(basereg), reg2
+                        * -->
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+                        * OP_MOVE reg1, reg2
+                        */
                        if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
                                        ins->inst_basereg == last_ins->inst_destbasereg &&
                                        ins->inst_offset == last_ins->inst_offset) {
@@ -855,9 +897,15 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_LOADU2_MEMBASE:
                case OP_LOADI2_MEMBASE:
-                 /*
-                  * FIXME: Missing explanation
-                  */
+                       /* 
+                        * Note: if reg1 = reg2 the load op is removed
+                        *
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
+                        * OP_LOAD_MEMBASE offset(basereg), reg2
+                        * -->
+                        * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+                        * OP_MOVE reg1, reg2
+                        */
                        if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
                                        ins->inst_basereg == last_ins->inst_destbasereg &&
                                        ins->inst_offset == last_ins->inst_offset) {
@@ -875,7 +923,9 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                case CEE_CONV_I4:
                case CEE_CONV_U4:
                case OP_MOVE:
-                       /* 
+                       /*
+                        * Removes:
+                        *
                         * OP_MOVE reg, reg 
                         */
                        if (ins->dreg == ins->sreg1) {
@@ -885,6 +935,8 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                                continue;
                        }
                        /* 
+                        * Removes:
+                        *
                         * OP_MOVE sreg, dreg 
                         * OP_MOVE dreg, sreg
                         */
@@ -1443,8 +1495,6 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        int new_dest = mono_regstate_alloc_int (rs, dest_mask);
                                        if (new_dest < 0)
                                                new_dest = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
-                                       /* new_dest is only used inside this opcode */
-                                       mono_regstate_free_int (cfg->rs, new_dest);
                                        g_assert (new_dest >= 0);
                                        DEBUG (g_print ("\tclob:s changing dreg from R%d to %s (val = %d)\n", ins->dreg, mono_arch_regname (new_dest), val));
                                        clob_dreg = ins->dreg;
@@ -2033,28 +2083,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        if (cfg->opt & MONO_OPT_PEEPHOLE)
                peephole_pass (cfg, bb);
 
-#if 0
-       /* 
-        * various stratgies to align BBs. Using real loop detection or simply
-        * aligning every block leads to more consistent benchmark results,
-        * but usually slows down the code
-        * we should do the alignment outside this function or we should adjust
-        * bb->native offset as well or the code is effectively slowed down!
-        */
-       /* align all blocks */
-//     if ((pad = (cfg->code_len & (align - 1)))) {
-       /* poor man loop start detection */
-//     if (bb->code && bb->in_count && bb->in_bb [0]->cil_code > bb->cil_code && (pad = (cfg->code_len & (align - 1)))) {
-       /* consider real loop detection and nesting level */
-//     if (bb->loop_blocks && bb->nesting < 3 && (pad = (cfg->code_len & (align - 1)))) {
-       /* consider real loop detection */
-       if (bb->loop_blocks && (pad = (cfg->code_len & (align - 1)))) {
-               pad = align - pad;
-               x86_padding (code, pad);
-               cfg->code_len += pad;
-               bb->native_offset = cfg->code_len;
+       if (cfg->opt & MONO_OPT_LOOP) {
+               int pad, align = 8;
+               /* set alignment depending on cpu */
+               if (bb->nesting && (bb->in_count == 1) && (pad = (cfg->code_len & (align - 1)))) {
+                       pad = align - pad;
+                       /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
+                       x86_padding (code, pad);
+                       cfg->code_len += pad;
+                       bb->native_offset = cfg->code_len;
+               }
        }
-#endif
 
        if (cfg->verbose_level > 2)
                g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
@@ -2296,73 +2335,52 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
                        break;
                case OP_LSHL: {
-                       guint8 *jump_to_large_shift;
                        guint8 *jump_to_end;
 
-                       /* handle shifts bellow 32 bits */
-                       x86_alu_reg_imm (code, X86_CMP, X86_ECX, 32);
-                       jump_to_large_shift = code; x86_branch8 (code, X86_CC_GE, 0, TRUE);
-
+                       /* handle shifts below 32 bits */
                        x86_shld_reg (code, ins->unused, ins->sreg1);
                        x86_shift_reg (code, X86_SHL, ins->sreg1);
 
-                       jump_to_end = code; x86_jump8 (code, 0);
+                       x86_test_reg_imm (code, X86_ECX, 32);
+                       jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
 
-                       x86_patch (jump_to_large_shift, code);
-
-                       /* handle shifts over 31 bits */
+                       /* handle shift over 32 bit */
                        x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
                        x86_clear_reg (code, ins->sreg1);
-                       x86_alu_reg_imm (code, X86_AND, X86_ECX, 0x1f);
-                       x86_shift_reg (code, X86_SHL, ins->unused);
                        
                        x86_patch (jump_to_end, code);
                        }
                        break;
                case OP_LSHR: {
-                       guint8 *jump_to_large_shift;
                        guint8 *jump_to_end;
 
-                       /* handle shifts bellow 32 bits */
-                       x86_alu_reg_imm (code, X86_CMP, X86_ECX, 32);
-                       jump_to_large_shift = code; x86_branch8 (code, X86_CC_GE, 0, TRUE);
-
+                       /* handle shifts below 32 bits */
                        x86_shrd_reg (code, ins->sreg1, ins->unused);
                        x86_shift_reg (code, X86_SAR, ins->unused);
 
-                       jump_to_end = code; x86_jump8 (code, 0);
-
-                       x86_patch (jump_to_large_shift, code);
+                       x86_test_reg_imm (code, X86_ECX, 32);
+                       jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
 
                        /* handle shifts over 31 bits */
-                       x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
-                       x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
-                       x86_alu_reg_imm (code, X86_AND, X86_ECX, 0x1f);
-                       x86_shift_reg (code, X86_SAR, ins->sreg1);
+                       x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
+                       x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
                        
                        x86_patch (jump_to_end, code);
                        }
                        break;
                case OP_LSHR_UN: {
-                       guint8 *jump_to_large_shift;
                        guint8 *jump_to_end;
 
-                       /* handle shifts bellow 32 bits */
-                       x86_alu_reg_imm (code, X86_CMP, X86_ECX, 32);
-                       jump_to_large_shift = code; x86_branch8 (code, X86_CC_GE, 0, TRUE);
-
+                       /* handle shifts below 32 bits */
                        x86_shrd_reg (code, ins->sreg1, ins->unused);
                        x86_shift_reg (code, X86_SHR, ins->unused);
 
-                       jump_to_end = code; x86_jump8 (code, 0);
-
-                       x86_patch (jump_to_large_shift, code);
+                       x86_test_reg_imm (code, X86_ECX, 32);
+                       jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
 
                        /* handle shifts over 31 bits */
                        x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
-                       x86_clear_reg (code, ins->unused);
-                       x86_alu_reg_imm (code, X86_AND, X86_ECX, 0x1f);
-                       x86_shift_reg (code, X86_SHR, ins->sreg1);
+                       x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
                        
                        x86_patch (jump_to_end, code);
                        }
@@ -2371,7 +2389,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ins->inst_imm >= 32) {
                                x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
                                x86_clear_reg (code, ins->sreg1);
-                               x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm & 0x1f);
+                               x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
                        } else {
                                x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
                                x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
@@ -2381,7 +2399,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ins->inst_imm >= 32) {
                                x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
                                x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
-                               x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm & 0x1f);
+                               x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
                        } else {
                                x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
                                x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
@@ -2391,7 +2409,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ins->inst_imm >= 32) {
                                x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
                                x86_clear_reg (code, ins->unused);
-                               x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm & 0x1f);
+                               x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
                        } else {
                                x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
                                x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);