2 * local-propagation.c: Local constant, copy and tree propagation.
4 * To make some sense of the tree mover, read mono/docs/tree-mover.txt
7 * Paolo Molaro (lupus@ximian.com)
8 * Dietmar Maurer (dietmar@ximian.com)
9 * Massimiliano Mantione (massi@ximian.com)
11 * (C) 2006 Novell, Inc. http://www.novell.com
12 * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
24 #include <mono/metadata/debug-helpers.h>
25 #include <mono/metadata/mempool.h>
26 #include <mono/metadata/opcodes.h>
30 #ifndef MONO_ARCH_IS_OP_MEMBASE
31 #define MONO_ARCH_IS_OP_MEMBASE(opcode) FALSE
34 static inline MonoBitSet*
35 mono_bitset_mp_new_noinit (MonoMemPool *mp, guint32 max_size)
37 int size = mono_bitset_alloc_size (max_size, 0);
40 mem = mono_mempool_alloc (mp, size);
41 return mono_bitset_mem_new (mem, max_size, MONO_BITSET_DONT_FREE);
45 * Replaces ins with optimized opcodes.
46 * Returns TRUE if additional vregs were allocated.
49 mono_strength_reduction_ins (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins, const char **spec)
51 gboolean allocated_vregs = FALSE;
53 /* FIXME: Add long/float */
54 switch (ins->opcode) {
57 if (ins->dreg == ins->sreg1) {
58 MONO_DELETE_INS (bb, ins);
65 #if SIZEOF_REGISTER == 8
69 if (ins->inst_imm == 0) {
70 ins->opcode = OP_MOVE;
75 #if SIZEOF_REGISTER == 8
78 if (ins->inst_imm == 0) {
79 ins->opcode = (ins->opcode == OP_LMUL_IMM) ? OP_I8CONST : OP_ICONST;
82 } else if (ins->inst_imm == 1) {
83 ins->opcode = OP_MOVE;
84 } else if ((ins->opcode == OP_IMUL_IMM) && (ins->inst_imm == -1)) {
85 ins->opcode = OP_INEG;
86 } else if ((ins->opcode == OP_LMUL_IMM) && (ins->inst_imm == -1)) {
87 ins->opcode = OP_LNEG;
89 int power2 = mono_is_power_of_two (ins->inst_imm);
91 ins->opcode = (ins->opcode == OP_MUL_IMM) ? OP_SHL_IMM : ((ins->opcode == OP_LMUL_IMM) ? OP_LSHL_IMM : OP_ISHL_IMM);
92 ins->inst_imm = power2;
97 case OP_IDIV_UN_IMM: {
98 int c = ins->inst_imm;
99 int power2 = mono_is_power_of_two (c);
102 if (ins->opcode == OP_IREM_UN_IMM) {
103 ins->opcode = OP_IAND_IMM;
105 ins->inst_imm = (1 << power2) - 1;
106 } else if (ins->opcode == OP_IDIV_UN_IMM) {
107 ins->opcode = OP_ISHR_UN_IMM;
109 ins->inst_imm = power2;
115 int c = ins->inst_imm;
116 int power2 = mono_is_power_of_two (c);
117 MonoInst *tmp1, *tmp2, *tmp3, *tmp4;
119 /* FIXME: Move this elsewhere cause its hard to implement it here */
121 int r1 = mono_alloc_ireg (cfg);
123 NEW_BIALU_IMM (cfg, tmp1, OP_ISHR_UN_IMM, r1, ins->sreg1, 31);
124 mono_bblock_insert_after_ins (bb, ins, tmp1);
125 NEW_BIALU (cfg, tmp2, OP_IADD, r1, r1, ins->sreg1);
126 mono_bblock_insert_after_ins (bb, tmp1, tmp2);
127 NEW_BIALU_IMM (cfg, tmp3, OP_ISHR_IMM, ins->dreg, r1, 1);
128 mono_bblock_insert_after_ins (bb, tmp2, tmp3);
131 allocated_vregs = TRUE;
132 } else if (power2 > 0 && power2 < 31) {
133 int r1 = mono_alloc_ireg (cfg);
135 NEW_BIALU_IMM (cfg, tmp1, OP_ISHR_IMM, r1, ins->sreg1, 31);
136 mono_bblock_insert_after_ins (bb, ins, tmp1);
137 NEW_BIALU_IMM (cfg, tmp2, OP_ISHR_UN_IMM, r1, r1, (32 - power2));
138 mono_bblock_insert_after_ins (bb, tmp1, tmp2);
139 NEW_BIALU (cfg, tmp3, OP_IADD, r1, r1, ins->sreg1);
140 mono_bblock_insert_after_ins (bb, tmp2, tmp3);
141 NEW_BIALU_IMM (cfg, tmp4, OP_ISHR_IMM, ins->dreg, r1, power2);
142 mono_bblock_insert_after_ins (bb, tmp3, tmp4);
145 allocated_vregs = TRUE;
153 *spec = INS_INFO (ins->opcode);
154 return allocated_vregs;
160 * A combined local copy and constant propagation pass.
163 mono_local_cprop (MonoCompile *cfg)
169 int filter = FILTER_IL_SEQ_POINT;
173 max = cfg->next_vreg;
174 defs = (MonoInst **)mono_mempool_alloc (cfg->mempool, sizeof (MonoInst*) * cfg->next_vreg);
175 def_index = (gint32 *)mono_mempool_alloc (cfg->mempool, sizeof (guint32) * cfg->next_vreg);
177 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
182 /* Manually init the defs entries used by the bblock */
183 MONO_BB_FOR_EACH_INS (bb, ins) {
184 int sregs [MONO_MAX_SRC_REGS];
187 if ((ins->dreg != -1) && (ins->dreg < max)) {
188 defs [ins->dreg] = NULL;
189 #if SIZEOF_REGISTER == 4
190 defs [ins->dreg + 1] = NULL;
194 num_sregs = mono_inst_get_src_registers (ins, sregs);
195 for (i = 0; i < num_sregs; ++i) {
196 int sreg = sregs [i];
199 #if SIZEOF_REGISTER == 4
200 defs [sreg + 1] = NULL;
207 last_call_index = -1;
208 MONO_BB_FOR_EACH_INS (bb, ins) {
209 const char *spec = INS_INFO (ins->opcode);
210 int regtype, srcindex, sreg;
212 int sregs [MONO_MAX_SRC_REGS];
214 if (ins->opcode == OP_NOP) {
215 MONO_DELETE_INS (bb, ins);
219 g_assert (ins->opcode > MONO_CEE_LAST);
221 /* FIXME: Optimize this */
222 if (ins->opcode == OP_LDADDR) {
223 MonoInst *var = (MonoInst *)ins->inst_p0;
225 defs [var->dreg] = NULL;
227 if (!MONO_TYPE_ISSTRUCT (var->inst_vtype))
232 if (MONO_IS_STORE_MEMBASE (ins)) {
236 if ((regtype == 'i') && (sreg != -1) && defs [sreg]) {
237 MonoInst *def = defs [sreg];
239 if ((def->opcode == OP_MOVE) && (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg])) && !vreg_is_volatile (cfg, def->sreg1)) {
240 int vreg = def->sreg1;
241 if (cfg->verbose_level > 2) printf ("CCOPY: R%d -> R%d\n", sreg, vreg);
247 num_sregs = mono_inst_get_src_registers (ins, sregs);
248 for (srcindex = 0; srcindex < num_sregs; ++srcindex) {
251 mono_inst_get_src_registers (ins, sregs);
253 regtype = spec [MONO_INST_SRC1 + srcindex];
254 sreg = sregs [srcindex];
256 if ((regtype == ' ') || (sreg == -1) || (!defs [sreg]))
261 /* Copy propagation */
263 * The first check makes sure the source of the copy did not change since
265 * The second check avoids volatile variables.
266 * The third check avoids copy propagating local vregs through a call,
267 * since the lvreg will be spilled
268 * The fourth check avoids copy propagating a vreg in cases where
269 * it would be eliminated anyway by reverse copy propagation later,
270 * because propagating it would create another use for it, thus making
271 * it impossible to use reverse copy propagation.
273 /* Enabling this for floats trips up the fp stack */
275 * Enabling this for floats on amd64 seems to cause a failure in
276 * basic-math.cs, most likely because it gets rid of some r8->r4
279 if (MONO_IS_MOVE (def) &&
280 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg])) &&
281 !vreg_is_volatile (cfg, def->sreg1) &&
282 /* This avoids propagating local vregs across calls */
283 ((get_vreg_to_inst (cfg, def->sreg1) || !defs [def->sreg1] || (def_index [def->sreg1] >= last_call_index) || (def->opcode == OP_VMOVE))) &&
284 !(defs [def->sreg1] && mono_inst_next (defs [def->sreg1], filter) == def) &&
285 (!MONO_ARCH_USE_FPSTACK || (def->opcode != OP_FMOVE)) &&
286 (def->opcode != OP_FMOVE)) {
287 int vreg = def->sreg1;
289 if (cfg->verbose_level > 2) printf ("CCOPY/2: R%d -> R%d\n", sreg, vreg);
290 sregs [srcindex] = vreg;
291 mono_inst_set_src_registers (ins, sregs);
293 /* Allow further iterations */
298 /* Constant propagation */
299 /* FIXME: Make is_inst_imm a macro */
300 /* FIXME: Make is_inst_imm take an opcode argument */
301 /* is_inst_imm is only needed for binops */
302 if ((((def->opcode == OP_ICONST) || ((sizeof (gpointer) == 8) && (def->opcode == OP_I8CONST))) &&
303 (((srcindex == 0) && (ins->sreg2 == -1)) || mono_arch_is_inst_imm (def->inst_c0))) ||
304 (!MONO_ARCH_USE_FPSTACK && (def->opcode == OP_R8CONST))) {
307 /* srcindex == 1 -> binop, ins->sreg2 == -1 -> unop */
308 if ((srcindex == 1) && (ins->sreg1 != -1) && defs [ins->sreg1] && (defs [ins->sreg1]->opcode == OP_ICONST) && defs [ins->sreg2]) {
309 /* Both arguments are constants, perform cfold */
310 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
311 } else if ((srcindex == 0) && (ins->sreg2 != -1) && defs [ins->sreg2]) {
312 /* Arg 1 is constant, swap arguments if possible */
313 int opcode = ins->opcode;
314 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
315 if (ins->opcode != opcode) {
316 /* Allow further iterations */
320 } else if ((srcindex == 0) && (ins->sreg2 == -1)) {
321 /* Constant unop, perform cfold */
322 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], NULL, TRUE);
325 opcode2 = mono_op_to_op_imm (ins->opcode);
326 if ((opcode2 != -1) && mono_arch_is_inst_imm (def->inst_c0) && ((srcindex == 1) || (ins->sreg2 == -1))) {
327 ins->opcode = opcode2;
328 if ((def->opcode == OP_I8CONST) && (sizeof (gpointer) == 4)) {
329 ins->inst_ls_word = def->inst_ls_word;
330 ins->inst_ms_word = def->inst_ms_word;
332 ins->inst_imm = def->inst_c0;
334 sregs [srcindex] = -1;
335 mono_inst_set_src_registers (ins, sregs);
337 if ((opcode2 == OP_VOIDCALL) || (opcode2 == OP_CALL) || (opcode2 == OP_LCALL) || (opcode2 == OP_FCALL))
338 ((MonoCallInst*)ins)->fptr = (gpointer)ins->inst_imm;
340 /* Allow further iterations */
346 #if defined(TARGET_X86) || defined(TARGET_AMD64)
347 if ((ins->opcode == OP_X86_LEA) && (srcindex == 1)) {
348 #if SIZEOF_REGISTER == 8
349 /* FIXME: Use OP_PADD_IMM when the new JIT is done */
350 ins->opcode = OP_LADD_IMM;
352 ins->opcode = OP_ADD_IMM;
354 ins->inst_imm += def->inst_c0 << ins->backend.shift_amount;
358 opcode2 = mono_load_membase_to_load_mem (ins->opcode);
359 if ((srcindex == 0) && (opcode2 != -1) && mono_arch_is_inst_imm (def->inst_c0)) {
360 ins->opcode = opcode2;
361 ins->inst_imm = def->inst_c0 + ins->inst_offset;
366 else if (((def->opcode == OP_ADD_IMM) || (def->opcode == OP_LADD_IMM)) && (MONO_IS_LOAD_MEMBASE (ins) || MONO_ARCH_IS_OP_MEMBASE (ins->opcode))) {
367 /* ADD_IMM is created by spill_global_vars */
369 * We have to guarantee that def->sreg1 haven't changed since def->dreg
370 * was defined. cfg->frame_reg is assumed to remain constant.
372 if ((def->sreg1 == cfg->frame_reg) || ((mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1))) {
373 ins->inst_basereg = def->sreg1;
374 ins->inst_offset += def->inst_imm;
376 } else if ((ins->opcode == OP_ISUB_IMM) && (def->opcode == OP_IADD_IMM) && (mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1)) {
377 ins->sreg1 = def->sreg1;
378 ins->inst_imm -= def->inst_imm;
379 } else if ((ins->opcode == OP_IADD_IMM) && (def->opcode == OP_ISUB_IMM) && (mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1)) {
380 ins->sreg1 = def->sreg1;
381 ins->inst_imm -= def->inst_imm;
382 } else if (ins->opcode == OP_STOREI1_MEMBASE_REG &&
383 (def->opcode == OP_ICONV_TO_U1 || def->opcode == OP_ICONV_TO_I1 || def->opcode == OP_SEXT_I4 || (SIZEOF_REGISTER == 8 && def->opcode == OP_LCONV_TO_U1)) &&
384 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg]))) {
385 /* Avoid needless sign extension */
386 ins->sreg1 = def->sreg1;
387 } else if (ins->opcode == OP_STOREI2_MEMBASE_REG &&
388 (def->opcode == OP_ICONV_TO_U2 || def->opcode == OP_ICONV_TO_I2 || def->opcode == OP_SEXT_I4 || (SIZEOF_REGISTER == 8 && def->opcode == OP_LCONV_TO_I2)) &&
389 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg]))) {
390 /* Avoid needless sign extension */
391 ins->sreg1 = def->sreg1;
395 /* Do strength reduction here */
396 if (mono_strength_reduction_ins (cfg, bb, ins, &spec))
399 if (spec [MONO_INST_DEST] != ' ') {
400 MonoInst *def = defs [ins->dreg];
402 if (def && (def->opcode == OP_ADD_IMM) && (def->sreg1 == cfg->frame_reg) && (MONO_IS_STORE_MEMBASE (ins))) {
403 /* ADD_IMM is created by spill_global_vars */
404 /* cfg->frame_reg is assumed to remain constant */
405 ins->inst_destbasereg = def->sreg1;
406 ins->inst_offset += def->inst_imm;
410 if ((spec [MONO_INST_DEST] != ' ') && !MONO_IS_STORE_MEMBASE (ins) && !vreg_is_volatile (cfg, ins->dreg)) {
411 defs [ins->dreg] = ins;
412 def_index [ins->dreg] = ins_index;
415 if (MONO_IS_CALL (ins))
416 last_call_index = ins_index;
423 static inline gboolean
424 reg_is_softreg_no_fpstack (int reg, const char spec)
426 return (spec == 'i' && reg >= MONO_MAX_IREGS)
427 || ((spec == 'f' && reg >= MONO_MAX_FREGS) && !MONO_ARCH_USE_FPSTACK)
428 #ifdef MONO_ARCH_SIMD_INTRINSICS
429 || (spec == 'x' && reg >= MONO_MAX_XREGS)
434 static inline gboolean
435 reg_is_softreg (int reg, const char spec)
437 return (spec == 'i' && reg >= MONO_MAX_IREGS)
438 || (spec == 'f' && reg >= MONO_MAX_FREGS)
439 #ifdef MONO_ARCH_SIMD_INTRINSICS
440 || (spec == 'x' && reg >= MONO_MAX_XREGS)
445 static inline gboolean
446 mono_is_simd_accessor (MonoInst *ins)
448 switch (ins->opcode) {
449 #ifdef MONO_ARCH_SIMD_INTRINSICS
457 case OP_INSERTX_U1_SLOW:
458 case OP_INSERTX_I4_SLOW:
459 case OP_INSERTX_R4_SLOW:
460 case OP_INSERTX_R8_SLOW:
461 case OP_INSERTX_I8_SLOW:
472 * Get rid of the dead assignments to local vregs like the ones created by the
476 mono_local_deadce (MonoCompile *cfg)
479 MonoInst *ins, *prev;
480 MonoBitSet *used, *defined;
482 //mono_print_code (cfg, "BEFORE LOCAL-DEADCE");
485 * Assignments to global vregs can't be eliminated so this pass must come
486 * after the handle_global_vregs () pass.
489 used = mono_bitset_mp_new_noinit (cfg->mempool, cfg->next_vreg + 1);
490 defined = mono_bitset_mp_new_noinit (cfg->mempool, cfg->next_vreg + 1);
492 /* First pass: collect liveness info */
493 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
494 /* Manually init the defs entries used by the bblock */
495 MONO_BB_FOR_EACH_INS (bb, ins) {
496 const char *spec = INS_INFO (ins->opcode);
497 int sregs [MONO_MAX_SRC_REGS];
500 if (spec [MONO_INST_DEST] != ' ') {
501 mono_bitset_clear_fast (used, ins->dreg);
502 mono_bitset_clear_fast (defined, ins->dreg);
503 #if SIZEOF_REGISTER == 4
505 mono_bitset_clear_fast (used, ins->dreg + 1);
506 mono_bitset_clear_fast (defined, ins->dreg + 1);
509 num_sregs = mono_inst_get_src_registers (ins, sregs);
510 for (i = 0; i < num_sregs; ++i) {
511 mono_bitset_clear_fast (used, sregs [i]);
512 #if SIZEOF_REGISTER == 4
513 mono_bitset_clear_fast (used, sregs [i] + 1);
519 * Make a reverse pass over the instruction list
521 MONO_BB_FOR_EACH_INS_REVERSE_SAFE (bb, prev, ins) {
522 const char *spec = INS_INFO (ins->opcode);
523 int sregs [MONO_MAX_SRC_REGS];
525 MonoInst *prev_f = mono_inst_prev (ins, FILTER_NOP | FILTER_IL_SEQ_POINT);
527 if (ins->opcode == OP_NOP) {
528 MONO_DELETE_INS (bb, ins);
532 g_assert (ins->opcode > MONO_CEE_LAST);
534 if (MONO_IS_NON_FP_MOVE (ins) && prev_f) {
539 spec2 = INS_INFO (def->opcode);
542 * Perform a limited kind of reverse copy propagation, i.e.
543 * transform B <- FOO; A <- B into A <- FOO
544 * This isn't copyprop, not deadce, but it can only be performed
545 * after handle_global_vregs () has run.
547 if (!get_vreg_to_inst (cfg, ins->sreg1) && (spec2 [MONO_INST_DEST] != ' ') && (def->dreg == ins->sreg1) && !mono_bitset_test_fast (used, ins->sreg1) && !MONO_IS_STORE_MEMBASE (def) && reg_is_softreg (ins->sreg1, spec [MONO_INST_DEST]) && !mono_is_simd_accessor (def)) {
548 if (cfg->verbose_level > 2) {
549 printf ("\tReverse copyprop in BB%d on ", bb->block_num);
550 mono_print_ins (ins);
553 def->dreg = ins->dreg;
554 MONO_DELETE_INS (bb, ins);
555 spec = INS_INFO (ins->opcode);
559 /* Enabling this on x86 could screw up the fp stack */
560 if (reg_is_softreg_no_fpstack (ins->dreg, spec [MONO_INST_DEST])) {
562 * Assignments to global vregs can only be eliminated if there is another
563 * assignment to the same vreg later in the same bblock.
565 if (!mono_bitset_test_fast (used, ins->dreg) &&
566 (!get_vreg_to_inst (cfg, ins->dreg) || (!bb->extended && !vreg_is_volatile (cfg, ins->dreg) && mono_bitset_test_fast (defined, ins->dreg))) &&
567 MONO_INS_HAS_NO_SIDE_EFFECT (ins)) {
568 /* Happens with CMOV instructions */
569 if (prev_f && prev_f->opcode == OP_ICOMPARE_IMM) {
570 MonoInst *prev = prev_f;
572 * Can't use DELETE_INS since that would interfere with the
577 //printf ("DEADCE: "); mono_print_ins (ins);
578 MONO_DELETE_INS (bb, ins);
579 spec = INS_INFO (ins->opcode);
582 if (spec [MONO_INST_DEST] != ' ')
583 mono_bitset_clear_fast (used, ins->dreg);
586 if (spec [MONO_INST_DEST] != ' ')
587 mono_bitset_set_fast (defined, ins->dreg);
588 num_sregs = mono_inst_get_src_registers (ins, sregs);
589 for (i = 0; i < num_sregs; ++i)
590 mono_bitset_set_fast (used, sregs [i]);
591 if (MONO_IS_STORE_MEMBASE (ins))
592 mono_bitset_set_fast (used, ins->dreg);
594 if (MONO_IS_CALL (ins)) {
595 MonoCallInst *call = (MonoCallInst*)ins;
598 if (call->out_ireg_args) {
599 for (l = call->out_ireg_args; l; l = l->next) {
600 guint32 regpair, reg;
602 regpair = (guint32)(gssize)(l->data);
603 reg = regpair & 0xffffff;
605 mono_bitset_set_fast (used, reg);
609 if (call->out_freg_args) {
610 for (l = call->out_freg_args; l; l = l->next) {
611 guint32 regpair, reg;
613 regpair = (guint32)(gssize)(l->data);
614 reg = regpair & 0xffffff;
616 mono_bitset_set_fast (used, reg);
623 //mono_print_code (cfg, "AFTER LOCAL-DEADCE");
626 #endif /* DISABLE_JIT */