case MONO_TYPE_U4:
case MONO_TYPE_I:
case MONO_TYPE_U:
+ case MONO_TYPE_PTR:
return TRUE;
case MONO_TYPE_OBJECT:
case MONO_TYPE_STRING:
if (ins->dreg != ins->sreg1) {
ins->opcode = OP_MOVE;
} else {
- last_ins->next = ins->next;
- ins = ins->next;
+ last_ins->next = ins->next;
+ ins = ins->next;
continue;
}
}
break;
case OP_COMPARE_IMM:
- /* OP_COMPARE_IMM (reg, 0) --> OP_X86_TEST_NULL (reg) */
+ /* OP_COMPARE_IMM (reg, 0)
+ * -->
+ * OP_X86_TEST_NULL (reg)
+ */
if (ins->inst_imm == 0 && ins->next &&
(ins->next->opcode == CEE_BEQ || ins->next->opcode == CEE_BNE_UN ||
ins->next->opcode == OP_CEQ)) {
ins->opcode = OP_X86_TEST_NULL;
}
break;
+ case OP_X86_COMPARE_MEMBASE_IMM:
+ /*
+ * OP_STORE_MEMBASE_REG reg, offset(basereg)
+ * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
+ * -->
+ * OP_STORE_MEMBASE_REG reg, offset(basereg)
+ * OP_COMPARE_IMM reg, imm
+ *
+ * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
+ */
+ if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
+ ins->inst_basereg == last_ins->inst_destbasereg &&
+ ins->inst_offset == last_ins->inst_offset) {
+ ins->opcode = OP_COMPARE_IMM;
+ ins->sreg1 = last_ins->sreg1;
+
+ /* check if we can remove cmp reg,0 with test null */
+ if (ins->inst_imm == 0 && ins->next &&
+ (ins->next->opcode == CEE_BEQ || ins->next->opcode == CEE_BNE_UN ||
+ ins->next->opcode == OP_CEQ)) {
+ ins->opcode = OP_X86_TEST_NULL;
+ }
+ }
+
+ break;
case OP_LOAD_MEMBASE:
case OP_LOADI4_MEMBASE:
/*
- * OP_STORE_MEMBASE_REG reg, offset(basereg)
- * OP_LOAD_MEMBASE offset(basereg), reg
+ * Note: if reg1 = reg2 the load op is removed
+ *
+ * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+ * OP_LOAD_MEMBASE offset(basereg), reg2
+ * -->
+ * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+ * OP_MOVE reg1, reg2
*/
if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG
|| last_ins->opcode == OP_STORE_MEMBASE_REG) &&
/*
* Note: reg1 must be different from the basereg in the second load
+ * Note: if reg1 = reg2 is equal then second load is removed
+ *
* OP_LOAD_MEMBASE offset(basereg), reg1
* OP_LOAD_MEMBASE offset(basereg), reg2
* -->
break;
case OP_LOADU1_MEMBASE:
case OP_LOADI1_MEMBASE:
- /*
- * FIXME: Missing explanation
- */
+ /*
+ * Note: if reg1 = reg2 the load op is removed
+ *
+ * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+ * OP_LOAD_MEMBASE offset(basereg), reg2
+ * -->
+ * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+ * OP_MOVE reg1, reg2
+ */
if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
ins->inst_basereg == last_ins->inst_destbasereg &&
ins->inst_offset == last_ins->inst_offset) {
break;
case OP_LOADU2_MEMBASE:
case OP_LOADI2_MEMBASE:
- /*
- * FIXME: Missing explanation
- */
+ /*
+ * Note: if reg1 = reg2 the load op is removed
+ *
+ * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+ * OP_LOAD_MEMBASE offset(basereg), reg2
+ * -->
+ * OP_STORE_MEMBASE_REG reg1, offset(basereg)
+ * OP_MOVE reg1, reg2
+ */
if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
ins->inst_basereg == last_ins->inst_destbasereg &&
ins->inst_offset == last_ins->inst_offset) {
case CEE_CONV_I4:
case CEE_CONV_U4:
case OP_MOVE:
- /*
+ /*
+ * Removes:
+ *
* OP_MOVE reg, reg
*/
if (ins->dreg == ins->sreg1) {
continue;
}
/*
+ * Removes:
+ *
* OP_MOVE sreg, dreg
* OP_MOVE dreg, sreg
*/
int new_dest = mono_regstate_alloc_int (rs, dest_mask);
if (new_dest < 0)
new_dest = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
- /* new_dest is only used inside this opcode */
- mono_regstate_free_int (cfg->rs, new_dest);
g_assert (new_dest >= 0);
DEBUG (g_print ("\tclob:s changing dreg from R%d to %s (val = %d)\n", ins->dreg, mono_arch_regname (new_dest), val));
clob_dreg = ins->dreg;
if (cfg->opt & MONO_OPT_PEEPHOLE)
peephole_pass (cfg, bb);
-#if 0
- /*
- * various stratgies to align BBs. Using real loop detection or simply
- * aligning every block leads to more consistent benchmark results,
- * but usually slows down the code
- * we should do the alignment outside this function or we should adjust
- * bb->native offset as well or the code is effectively slowed down!
- */
- /* align all blocks */
-// if ((pad = (cfg->code_len & (align - 1)))) {
- /* poor man loop start detection */
-// if (bb->code && bb->in_count && bb->in_bb [0]->cil_code > bb->cil_code && (pad = (cfg->code_len & (align - 1)))) {
- /* consider real loop detection and nesting level */
-// if (bb->loop_blocks && bb->nesting < 3 && (pad = (cfg->code_len & (align - 1)))) {
- /* consider real loop detection */
- if (bb->loop_blocks && (pad = (cfg->code_len & (align - 1)))) {
- pad = align - pad;
- x86_padding (code, pad);
- cfg->code_len += pad;
- bb->native_offset = cfg->code_len;
+ if (cfg->opt & MONO_OPT_LOOP) {
+ int pad, align = 8;
+ /* set alignment depending on cpu */
+ if (bb->nesting && (bb->in_count == 1) && (pad = (cfg->code_len & (align - 1)))) {
+ pad = align - pad;
+ /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
+ x86_padding (code, pad);
+ cfg->code_len += pad;
+ bb->native_offset = cfg->code_len;
+ }
}
-#endif
if (cfg->verbose_level > 2)
g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
break;
case OP_LSHL: {
- guint8 *jump_to_large_shift;
guint8 *jump_to_end;
- /* handle shifts bellow 32 bits */
- x86_alu_reg_imm (code, X86_CMP, X86_ECX, 32);
- jump_to_large_shift = code; x86_branch8 (code, X86_CC_GE, 0, TRUE);
-
+ /* handle shifts below 32 bits */
x86_shld_reg (code, ins->unused, ins->sreg1);
x86_shift_reg (code, X86_SHL, ins->sreg1);
- jump_to_end = code; x86_jump8 (code, 0);
+ x86_test_reg_imm (code, X86_ECX, 32);
+ jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
- x86_patch (jump_to_large_shift, code);
-
- /* handle shifts over 31 bits */
+ /* handle shift over 32 bit */
x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
x86_clear_reg (code, ins->sreg1);
- x86_alu_reg_imm (code, X86_AND, X86_ECX, 0x1f);
- x86_shift_reg (code, X86_SHL, ins->unused);
x86_patch (jump_to_end, code);
}
break;
case OP_LSHR: {
- guint8 *jump_to_large_shift;
guint8 *jump_to_end;
- /* handle shifts bellow 32 bits */
- x86_alu_reg_imm (code, X86_CMP, X86_ECX, 32);
- jump_to_large_shift = code; x86_branch8 (code, X86_CC_GE, 0, TRUE);
-
+ /* handle shifts below 32 bits */
x86_shrd_reg (code, ins->sreg1, ins->unused);
x86_shift_reg (code, X86_SAR, ins->unused);
- jump_to_end = code; x86_jump8 (code, 0);
-
- x86_patch (jump_to_large_shift, code);
+ x86_test_reg_imm (code, X86_ECX, 32);
+ jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
/* handle shifts over 31 bits */
- x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
- x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
- x86_alu_reg_imm (code, X86_AND, X86_ECX, 0x1f);
- x86_shift_reg (code, X86_SAR, ins->sreg1);
+ x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
+ x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
x86_patch (jump_to_end, code);
}
break;
case OP_LSHR_UN: {
- guint8 *jump_to_large_shift;
guint8 *jump_to_end;
- /* handle shifts bellow 32 bits */
- x86_alu_reg_imm (code, X86_CMP, X86_ECX, 32);
- jump_to_large_shift = code; x86_branch8 (code, X86_CC_GE, 0, TRUE);
-
+ /* handle shifts below 32 bits */
x86_shrd_reg (code, ins->sreg1, ins->unused);
x86_shift_reg (code, X86_SHR, ins->unused);
- jump_to_end = code; x86_jump8 (code, 0);
-
- x86_patch (jump_to_large_shift, code);
+ x86_test_reg_imm (code, X86_ECX, 32);
+ jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
/* handle shifts over 31 bits */
x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
- x86_clear_reg (code, ins->unused);
- x86_alu_reg_imm (code, X86_AND, X86_ECX, 0x1f);
- x86_shift_reg (code, X86_SHR, ins->sreg1);
+ x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
x86_patch (jump_to_end, code);
}
if (ins->inst_imm >= 32) {
x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
x86_clear_reg (code, ins->sreg1);
- x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm & 0x1f);
+ x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
} else {
x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
if (ins->inst_imm >= 32) {
x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
- x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm & 0x1f);
+ x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
} else {
x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
if (ins->inst_imm >= 32) {
x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
x86_clear_reg (code, ins->unused);
- x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm & 0x1f);
+ x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
} else {
x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);