3 * Local constant, copy and tree propagation.
5 * To make some sense of the tree mover, read mono/docs/tree-mover.txt
8 * Paolo Molaro (lupus@ximian.com)
9 * Dietmar Maurer (dietmar@ximian.com)
10 * Massimiliano Mantione (massi@ximian.com)
12 * (C) 2006 Novell, Inc. http://www.novell.com
13 * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
18 #include <mono/utils/mono-compiler.h>
28 #include <mono/metadata/debug-helpers.h>
29 #include <mono/metadata/mempool.h>
30 #include <mono/metadata/opcodes.h>
34 #ifndef MONO_ARCH_IS_OP_MEMBASE
35 #define MONO_ARCH_IS_OP_MEMBASE(opcode) FALSE
38 static inline MonoBitSet*
39 mono_bitset_mp_new_noinit (MonoMemPool *mp, guint32 max_size)
41 int size = mono_bitset_alloc_size (max_size, 0);
44 mem = mono_mempool_alloc (mp, size);
45 return mono_bitset_mem_new (mem, max_size, MONO_BITSET_DONT_FREE);
48 struct magic_unsigned {
59 /* http://www.hackersdelight.org/hdcodetxt/magicu.c.txt */
60 static struct magic_unsigned
61 compute_magic_unsigned (guint32 divisor) {
62 guint32 nc, delta, q1, r1, q2, r2;
63 struct magic_unsigned magu;
68 nc = -1 - (-divisor) % divisor;
71 r1 = 0x80000000 - q1 * nc;
72 q2 = 0x7FFFFFFF / divisor;
73 r2 = 0x7FFFFFFF - q2 * divisor;
85 if (r2 + 1 >= divisor - r2) {
89 r2 = 2 * r2 + 1 - divisor;
96 delta = divisor - 1 - r2;
97 } while (!gt && (q1 < delta || (q1 == delta && r1 == 0)));
99 magu.magic_number = q2 + 1;
104 /* http://www.hackersdelight.org/hdcodetxt/magic.c.txt */
105 static struct magic_signed
106 compute_magic_signed (gint32 divisor) {
108 guint32 ad, anc, delta, q1, r1, q2, r2, t;
109 const guint32 two31 = 0x80000000;
110 struct magic_signed mag;
113 t = two31 + ((unsigned)divisor >> 31);
114 anc = t - 1 - t % ad;
117 r1 = two31 - q1 * anc;
119 r2 = two31 - q2 * ad;
138 } while (q1 < delta || (q1 == delta && r1 == 0));
140 mag.magic_number = q2 + 1;
142 mag.magic_number = -mag.magic_number;
148 mono_strength_reduction_division (MonoCompile *cfg, MonoInst *ins)
150 gboolean allocated_vregs = FALSE;
152 * We don't use it on 32bit systems because on those
153 * platforms we emulate long multiplication, driving the
154 * performance back down.
156 switch (ins->opcode) {
157 case OP_IDIV_UN_IMM: {
159 #if SIZEOF_REGISTER == 8
160 guint32 dividend_reg;
164 struct magic_unsigned mag;
165 int power2 = mono_is_power_of_two (ins->inst_imm);
167 /* The decomposition doesn't handle exception throwing */
168 if (ins->inst_imm == 0)
172 ins->opcode = OP_ISHR_UN_IMM;
174 ins->inst_imm = power2;
177 if (cfg->backend->disable_div_with_mul)
179 allocated_vregs = TRUE;
181 * Replacement of unsigned division with multiplication,
182 * shifts and additions Hacker's Delight, chapter 10-10.
184 mag = compute_magic_unsigned (ins->inst_imm);
185 tmp_regl = alloc_lreg (cfg);
186 #if SIZEOF_REGISTER == 8
187 dividend_reg = alloc_lreg (cfg);
188 MONO_EMIT_NEW_I8CONST (cfg, tmp_regl, mag.magic_number);
189 MONO_EMIT_NEW_UNALU (cfg, OP_ZEXT_I4, dividend_reg, ins->sreg1);
190 MONO_EMIT_NEW_BIALU (cfg, OP_LMUL, tmp_regl, dividend_reg, tmp_regl);
192 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, 32);
193 MONO_EMIT_NEW_BIALU (cfg, OP_LADD, tmp_regl, tmp_regl, dividend_reg);
194 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, mag.shift);
196 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, 32 + mag.shift);
199 tmp_regi = alloc_ireg (cfg);
200 MONO_EMIT_NEW_ICONST (cfg, tmp_regi, mag.magic_number);
201 MONO_EMIT_NEW_BIALU (cfg, OP_BIGMUL_UN, tmp_regl, ins->sreg1, tmp_regi);
202 /* Long shifts below will be decomposed during cprop */
204 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, 32);
205 MONO_EMIT_NEW_BIALU (cfg, OP_IADDCC, MONO_LVREG_LS (tmp_regl), MONO_LVREG_LS (tmp_regl), ins->sreg1);
206 /* MONO_LVREG_MS (tmp_reg) is 0, save in it the carry */
207 MONO_EMIT_NEW_BIALU (cfg, OP_IADC, MONO_LVREG_MS (tmp_regl), MONO_LVREG_MS (tmp_regl), MONO_LVREG_MS (tmp_regl));
208 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, mag.shift);
210 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, 32 + mag.shift);
212 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ins->dreg, MONO_LVREG_LS (tmp_regl));
214 mono_jit_stats.optimized_divisions++;
219 #if SIZEOF_REGISTER == 8
220 guint32 dividend_reg;
224 struct magic_signed mag;
225 int power2 = mono_is_power_of_two (ins->inst_imm);
226 /* The decomposition doesn't handle exception throwing */
227 /* Optimization with MUL does not apply for -1, 0 and 1 divisors */
228 if (ins->inst_imm == 0 || ins->inst_imm == -1) {
230 } else if (ins->inst_imm == 1) {
231 ins->opcode = OP_MOVE;
235 allocated_vregs = TRUE;
237 guint32 r1 = alloc_ireg (cfg);
238 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, r1, ins->sreg1, 31);
239 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, r1, r1, ins->sreg1);
240 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, ins->dreg, r1, 1);
242 } else if (power2 > 0 && power2 < 31) {
243 guint32 r1 = alloc_ireg (cfg);
244 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, r1, ins->sreg1, 31);
245 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, r1, r1, (32 - power2));
246 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, r1, r1, ins->sreg1);
247 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, ins->dreg, r1, power2);
251 if (cfg->backend->disable_div_with_mul)
254 * Replacement of signed division with multiplication,
255 * shifts and additions Hacker's Delight, chapter 10-6.
257 mag = compute_magic_signed (ins->inst_imm);
258 tmp_regl = alloc_lreg (cfg);
259 #if SIZEOF_REGISTER == 8
260 dividend_reg = alloc_lreg (cfg);
261 MONO_EMIT_NEW_I8CONST (cfg, tmp_regl, mag.magic_number);
262 MONO_EMIT_NEW_UNALU (cfg, OP_SEXT_I4, dividend_reg, ins->sreg1);
263 MONO_EMIT_NEW_BIALU (cfg, OP_LMUL, tmp_regl, dividend_reg, tmp_regl);
264 if ((ins->inst_imm > 0 && mag.magic_number < 0) || (ins->inst_imm < 0 && mag.magic_number > 0)) {
265 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, 32);
266 if (ins->inst_imm > 0 && mag.magic_number < 0) {
267 MONO_EMIT_NEW_BIALU (cfg, OP_LADD, tmp_regl, tmp_regl, dividend_reg);
268 } else if (ins->inst_imm < 0 && mag.magic_number > 0) {
269 MONO_EMIT_NEW_BIALU (cfg, OP_LSUB, tmp_regl, tmp_regl, dividend_reg);
271 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, mag.shift);
273 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, 32 + mag.shift);
275 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, SIZEOF_REGISTER * 8 - 1);
276 MONO_EMIT_NEW_BIALU (cfg, OP_LADD, ins->dreg, ins->dreg, tmp_regl);
278 tmp_regi = alloc_ireg (cfg);
279 MONO_EMIT_NEW_ICONST (cfg, tmp_regi, mag.magic_number);
280 MONO_EMIT_NEW_BIALU (cfg, OP_BIGMUL, tmp_regl, ins->sreg1, tmp_regi);
281 if ((ins->inst_imm > 0 && mag.magic_number < 0) || (ins->inst_imm < 0 && mag.magic_number > 0)) {
282 if (ins->inst_imm > 0 && mag.magic_number < 0) {
283 /* Opposite sign, cannot overflow */
284 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, tmp_regi, MONO_LVREG_MS (tmp_regl), ins->sreg1);
285 } else if (ins->inst_imm < 0 && mag.magic_number > 0) {
286 /* Same sign, cannot overflow */
287 MONO_EMIT_NEW_BIALU (cfg, OP_ISUB, tmp_regi, MONO_LVREG_MS (tmp_regl), ins->sreg1);
289 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, tmp_regi, tmp_regi, mag.shift);
291 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, tmp_regi, MONO_LVREG_MS (tmp_regl), mag.shift);
293 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, ins->dreg, tmp_regi, SIZEOF_REGISTER * 8 - 1);
294 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, ins->dreg, ins->dreg, tmp_regi);
296 mono_jit_stats.optimized_divisions++;
300 return allocated_vregs;
304 * Replaces ins with optimized opcodes.
306 * We can emit to cbb the equivalent instructions which will be used as
307 * replacement for ins, or simply change the fields of ins. Spec needs to
308 * be updated if we silently change the opcode of ins.
310 * Returns TRUE if additional vregs were allocated.
313 mono_strength_reduction_ins (MonoCompile *cfg, MonoInst *ins, const char **spec)
315 gboolean allocated_vregs = FALSE;
317 /* FIXME: Add long/float */
318 switch (ins->opcode) {
321 if (ins->dreg == ins->sreg1) {
329 #if SIZEOF_REGISTER == 8
333 if (ins->inst_imm == 0) {
334 ins->opcode = OP_MOVE;
339 #if SIZEOF_REGISTER == 8
342 if (ins->inst_imm == 0) {
343 ins->opcode = (ins->opcode == OP_LMUL_IMM) ? OP_I8CONST : OP_ICONST;
346 } else if (ins->inst_imm == 1) {
347 ins->opcode = OP_MOVE;
348 } else if ((ins->opcode == OP_IMUL_IMM) && (ins->inst_imm == -1)) {
349 ins->opcode = OP_INEG;
350 } else if ((ins->opcode == OP_LMUL_IMM) && (ins->inst_imm == -1)) {
351 ins->opcode = OP_LNEG;
353 int power2 = mono_is_power_of_two (ins->inst_imm);
355 ins->opcode = (ins->opcode == OP_MUL_IMM) ? OP_SHL_IMM : ((ins->opcode == OP_LMUL_IMM) ? OP_LSHL_IMM : OP_ISHL_IMM);
356 ins->inst_imm = power2;
360 case OP_IREM_UN_IMM: {
361 int power2 = mono_is_power_of_two (ins->inst_imm);
364 ins->opcode = OP_IAND_IMM;
366 ins->inst_imm = (1 << power2) - 1;
372 if (!COMPILE_LLVM (cfg))
373 allocated_vregs = mono_strength_reduction_division (cfg, ins);
376 #if SIZEOF_REGISTER == 8
380 int power = mono_is_power_of_two (ins->inst_imm);
381 if (ins->inst_imm == 1) {
382 ins->opcode = OP_ICONST;
383 MONO_INST_NULLIFY_SREGS (ins);
388 } else if ((ins->inst_imm > 0) && (ins->inst_imm < (1LL << 32)) && (power != -1)) {
389 gboolean is_long = ins->opcode == OP_LREM_IMM;
390 int compensator_reg = alloc_ireg (cfg);
391 int intermediate_reg;
393 /* Based on gcc code */
395 /* Add compensation for negative numerators */
398 intermediate_reg = compensator_reg;
399 MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LSHR_IMM : OP_ISHR_IMM, intermediate_reg, ins->sreg1, is_long ? 63 : 31);
401 intermediate_reg = ins->sreg1;
404 MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LSHR_UN_IMM : OP_ISHR_UN_IMM, compensator_reg, intermediate_reg, (is_long ? 64 : 32) - power);
405 MONO_EMIT_NEW_BIALU (cfg, is_long ? OP_LADD : OP_IADD, ins->dreg, ins->sreg1, compensator_reg);
406 /* Compute remainder */
407 MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LAND_IMM : OP_AND_IMM, ins->dreg, ins->dreg, (1 << power) - 1);
408 /* Remove compensation */
409 MONO_EMIT_NEW_BIALU (cfg, is_long ? OP_LSUB : OP_ISUB, ins->dreg, ins->dreg, compensator_reg);
411 allocated_vregs = TRUE;
416 #if SIZEOF_REGISTER == 4
418 if (COMPILE_LLVM (cfg))
420 if (ins->inst_c1 == 32) {
421 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
422 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), 31);
423 } else if (ins->inst_c1 == 0) {
424 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
425 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
426 } else if (ins->inst_c1 > 32) {
427 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1 - 32);
428 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), 31);
430 guint32 tmpreg = alloc_ireg (cfg);
431 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, tmpreg, MONO_LVREG_MS (ins->sreg1), 32 - ins->inst_c1);
432 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
433 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
434 MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->dreg), tmpreg);
435 allocated_vregs = TRUE;
439 case OP_LSHR_UN_IMM: {
440 if (COMPILE_LLVM (cfg))
442 if (ins->inst_c1 == 32) {
443 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
444 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_MS (ins->dreg), 0);
445 } else if (ins->inst_c1 == 0) {
446 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
447 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
448 } else if (ins->inst_c1 > 32) {
449 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1 - 32);
450 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_MS (ins->dreg), 0);
452 guint32 tmpreg = alloc_ireg (cfg);
453 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, tmpreg, MONO_LVREG_MS (ins->sreg1), 32 - ins->inst_c1);
454 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
455 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
456 MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->dreg), tmpreg);
457 allocated_vregs = TRUE;
462 if (COMPILE_LLVM (cfg))
464 if (ins->inst_c1 == 32) {
465 /* just move the lower half to the upper and zero the lower word */
466 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
467 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_LS (ins->dreg), 0);
468 } else if (ins->inst_c1 == 0) {
469 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
470 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
471 } else if (ins->inst_c1 > 32) {
472 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1 - 32);
473 MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_LS (ins->dreg), 0);
475 guint32 tmpreg = alloc_ireg (cfg);
476 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, tmpreg, MONO_LVREG_LS (ins->sreg1), 32 - ins->inst_c1);
477 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
478 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
479 MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->dreg), tmpreg);
480 allocated_vregs = TRUE;
490 *spec = INS_INFO (ins->opcode);
491 return allocated_vregs;
497 * A combined local copy and constant propagation pass.
500 mono_local_cprop (MonoCompile *cfg)
502 MonoBasicBlock *bb, *bb_opt;
506 int filter = FILTER_IL_SEQ_POINT;
507 int initial_max_vregs = cfg->next_vreg;
509 max = cfg->next_vreg;
510 defs = (MonoInst **)mono_mempool_alloc (cfg->mempool, sizeof (MonoInst*) * cfg->next_vreg);
511 def_index = (gint32 *)mono_mempool_alloc (cfg->mempool, sizeof (guint32) * cfg->next_vreg);
512 cfg->cbb = bb_opt = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoBasicBlock));
514 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
519 /* Manually init the defs entries used by the bblock */
520 MONO_BB_FOR_EACH_INS (bb, ins) {
521 int sregs [MONO_MAX_SRC_REGS];
524 if (ins->dreg != -1) {
525 #if SIZEOF_REGISTER == 4
526 const char *spec = INS_INFO (ins->opcode);
527 if (spec [MONO_INST_DEST] == 'l') {
528 defs [ins->dreg + 1] = NULL;
529 defs [ins->dreg + 2] = NULL;
532 defs [ins->dreg] = NULL;
535 num_sregs = mono_inst_get_src_registers (ins, sregs);
536 for (i = 0; i < num_sregs; ++i) {
537 int sreg = sregs [i];
538 #if SIZEOF_REGISTER == 4
539 const char *spec = INS_INFO (ins->opcode);
540 if (spec [MONO_INST_SRC1 + i] == 'l') {
541 defs [sreg + 1] = NULL;
542 defs [sreg + 2] = NULL;
550 last_call_index = -1;
551 MONO_BB_FOR_EACH_INS (bb, ins) {
552 const char *spec = INS_INFO (ins->opcode);
553 int regtype, srcindex, sreg;
555 int sregs [MONO_MAX_SRC_REGS];
557 if (ins->opcode == OP_NOP) {
558 MONO_DELETE_INS (bb, ins);
562 g_assert (ins->opcode > MONO_CEE_LAST);
564 /* FIXME: Optimize this */
565 if (ins->opcode == OP_LDADDR) {
566 MonoInst *var = (MonoInst *)ins->inst_p0;
568 defs [var->dreg] = NULL;
570 if (!MONO_TYPE_ISSTRUCT (var->inst_vtype))
575 if (MONO_IS_STORE_MEMBASE (ins)) {
579 if ((regtype == 'i') && (sreg != -1) && defs [sreg]) {
580 MonoInst *def = defs [sreg];
582 if ((def->opcode == OP_MOVE) && (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg])) && !vreg_is_volatile (cfg, def->sreg1)) {
583 int vreg = def->sreg1;
584 if (cfg->verbose_level > 2) printf ("CCOPY: R%d -> R%d\n", sreg, vreg);
590 num_sregs = mono_inst_get_src_registers (ins, sregs);
591 for (srcindex = 0; srcindex < num_sregs; ++srcindex) {
594 mono_inst_get_src_registers (ins, sregs);
596 regtype = spec [MONO_INST_SRC1 + srcindex];
597 sreg = sregs [srcindex];
599 if ((regtype == ' ') || (sreg == -1) || (!defs [sreg]))
604 /* Copy propagation */
606 * The first check makes sure the source of the copy did not change since
608 * The second check avoids volatile variables.
609 * The third check avoids copy propagating local vregs through a call,
610 * since the lvreg will be spilled
611 * The fourth check avoids copy propagating a vreg in cases where
612 * it would be eliminated anyway by reverse copy propagation later,
613 * because propagating it would create another use for it, thus making
614 * it impossible to use reverse copy propagation.
616 /* Enabling this for floats trips up the fp stack */
618 * Enabling this for floats on amd64 seems to cause a failure in
619 * basic-math.cs, most likely because it gets rid of some r8->r4
622 if (MONO_IS_MOVE (def) &&
623 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg])) &&
624 !vreg_is_volatile (cfg, def->sreg1) &&
625 /* This avoids propagating local vregs across calls */
626 ((get_vreg_to_inst (cfg, def->sreg1) || !defs [def->sreg1] || (def_index [def->sreg1] >= last_call_index) || (def->opcode == OP_VMOVE))) &&
627 !(defs [def->sreg1] && mono_inst_next (defs [def->sreg1], filter) == def) &&
628 (!MONO_ARCH_USE_FPSTACK || (def->opcode != OP_FMOVE)) &&
629 (def->opcode != OP_FMOVE)) {
630 int vreg = def->sreg1;
632 if (cfg->verbose_level > 2) printf ("CCOPY/2: R%d -> R%d\n", sreg, vreg);
633 sregs [srcindex] = vreg;
634 mono_inst_set_src_registers (ins, sregs);
636 /* Allow further iterations */
641 /* Constant propagation */
642 /* FIXME: Make is_inst_imm a macro */
643 /* FIXME: Make is_inst_imm take an opcode argument */
644 /* is_inst_imm is only needed for binops */
645 if ((((def->opcode == OP_ICONST) || ((sizeof (gpointer) == 8) && (def->opcode == OP_I8CONST)) || (def->opcode == OP_PCONST)) &&
646 (((srcindex == 0) && (ins->sreg2 == -1)) || mono_arch_is_inst_imm (def->inst_c0))) ||
647 (!MONO_ARCH_USE_FPSTACK && (def->opcode == OP_R8CONST))) {
650 /* srcindex == 1 -> binop, ins->sreg2 == -1 -> unop */
651 if ((srcindex == 1) && (ins->sreg1 != -1) && defs [ins->sreg1] &&
652 ((defs [ins->sreg1]->opcode == OP_ICONST) || defs [ins->sreg1]->opcode == OP_PCONST) &&
654 /* Both arguments are constants, perform cfold */
655 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
656 } else if ((srcindex == 0) && (ins->sreg2 != -1) && defs [ins->sreg2]) {
657 /* Arg 1 is constant, swap arguments if possible */
658 int opcode = ins->opcode;
659 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
660 if (ins->opcode != opcode) {
661 /* Allow further iterations */
665 } else if ((srcindex == 0) && (ins->sreg2 == -1)) {
666 /* Constant unop, perform cfold */
667 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], NULL, TRUE);
670 opcode2 = mono_op_to_op_imm (ins->opcode);
671 if ((opcode2 != -1) && mono_arch_is_inst_imm (def->inst_c0) && ((srcindex == 1) || (ins->sreg2 == -1))) {
672 ins->opcode = opcode2;
673 if ((def->opcode == OP_I8CONST) && (sizeof (gpointer) == 4)) {
674 ins->inst_ls_word = def->inst_ls_word;
675 ins->inst_ms_word = def->inst_ms_word;
677 ins->inst_imm = def->inst_c0;
679 sregs [srcindex] = -1;
680 mono_inst_set_src_registers (ins, sregs);
682 if ((opcode2 == OP_VOIDCALL) || (opcode2 == OP_CALL) || (opcode2 == OP_LCALL) || (opcode2 == OP_FCALL))
683 ((MonoCallInst*)ins)->fptr = (gpointer)ins->inst_imm;
685 /* Allow further iterations */
691 #if defined(TARGET_X86) || defined(TARGET_AMD64)
692 if ((ins->opcode == OP_X86_LEA) && (srcindex == 1)) {
693 #if SIZEOF_REGISTER == 8
694 /* FIXME: Use OP_PADD_IMM when the new JIT is done */
695 ins->opcode = OP_LADD_IMM;
697 ins->opcode = OP_ADD_IMM;
699 ins->inst_imm += def->inst_c0 << ins->backend.shift_amount;
703 opcode2 = mono_load_membase_to_load_mem (ins->opcode);
704 if ((srcindex == 0) && (opcode2 != -1) && mono_arch_is_inst_imm (def->inst_c0)) {
705 ins->opcode = opcode2;
706 ins->inst_imm = def->inst_c0 + ins->inst_offset;
711 else if (((def->opcode == OP_ADD_IMM) || (def->opcode == OP_LADD_IMM)) && (MONO_IS_LOAD_MEMBASE (ins) || MONO_ARCH_IS_OP_MEMBASE (ins->opcode))) {
712 /* ADD_IMM is created by spill_global_vars */
714 * We have to guarantee that def->sreg1 haven't changed since def->dreg
715 * was defined. cfg->frame_reg is assumed to remain constant.
717 if ((def->sreg1 == cfg->frame_reg) || ((mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1))) {
718 ins->inst_basereg = def->sreg1;
719 ins->inst_offset += def->inst_imm;
721 } else if ((ins->opcode == OP_ISUB_IMM) && (def->opcode == OP_IADD_IMM) && (mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1)) {
722 ins->sreg1 = def->sreg1;
723 ins->inst_imm -= def->inst_imm;
724 } else if ((ins->opcode == OP_IADD_IMM) && (def->opcode == OP_ISUB_IMM) && (mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1)) {
725 ins->sreg1 = def->sreg1;
726 ins->inst_imm -= def->inst_imm;
727 } else if (ins->opcode == OP_STOREI1_MEMBASE_REG &&
728 (def->opcode == OP_ICONV_TO_U1 || def->opcode == OP_ICONV_TO_I1 || def->opcode == OP_SEXT_I4 || (SIZEOF_REGISTER == 8 && def->opcode == OP_LCONV_TO_U1)) &&
729 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg]))) {
730 /* Avoid needless sign extension */
731 ins->sreg1 = def->sreg1;
732 } else if (ins->opcode == OP_STOREI2_MEMBASE_REG &&
733 (def->opcode == OP_ICONV_TO_U2 || def->opcode == OP_ICONV_TO_I2 || def->opcode == OP_SEXT_I4 || (SIZEOF_REGISTER == 8 && def->opcode == OP_LCONV_TO_I2)) &&
734 (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg]))) {
735 /* Avoid needless sign extension */
736 ins->sreg1 = def->sreg1;
737 } else if (ins->opcode == OP_COMPARE_IMM && def->opcode == OP_LDADDR && ins->inst_imm == 0) {
740 memset (&dummy_arg1, 0, sizeof (MonoInst));
741 dummy_arg1.opcode = OP_ICONST;
742 dummy_arg1.inst_c0 = 1;
744 mono_constant_fold_ins (cfg, ins, &dummy_arg1, NULL, TRUE);
745 } else if (srcindex == 0 && ins->opcode == OP_COMPARE && defs [ins->sreg1]->opcode == OP_PCONST && defs [ins->sreg2] && defs [ins->sreg2]->opcode == OP_PCONST) {
746 /* typeof(T) == typeof(..) */
747 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
751 g_assert (cfg->cbb == bb_opt);
752 g_assert (!bb_opt->code);
753 /* Do strength reduction here */
754 if (mono_strength_reduction_ins (cfg, ins, &spec) && max < cfg->next_vreg) {
755 MonoInst **defs_prev = defs;
756 gint32 *def_index_prev = def_index;
757 guint32 prev_max = max;
758 guint32 additional_vregs = cfg->next_vreg - initial_max_vregs;
760 /* We have more vregs so we need to reallocate defs and def_index arrays */
761 max = initial_max_vregs + additional_vregs * 2;
762 defs = (MonoInst **)mono_mempool_alloc (cfg->mempool, sizeof (MonoInst*) * max);
763 def_index = (gint32 *)mono_mempool_alloc (cfg->mempool, sizeof (guint32) * max);
765 /* Keep the entries for the previous vregs, zero the rest */
766 memcpy (defs, defs_prev, sizeof (MonoInst*) * prev_max);
767 memset (defs + prev_max, 0, sizeof (MonoInst*) * (max - prev_max));
768 memcpy (def_index, def_index_prev, sizeof (guint32) * prev_max);
769 memset (def_index + prev_max, 0, sizeof (guint32) * (max - prev_max));
772 if (cfg->cbb->code || (cfg->cbb != bb_opt)) {
773 MonoInst *saved_prev = ins->prev;
775 /* If we have code in cbb, we need to replace ins with the decomposition */
776 mono_replace_ins (cfg, bb, ins, &ins->prev, bb_opt, cfg->cbb);
777 bb_opt->code = bb_opt->last_ins = NULL;
778 bb_opt->in_count = bb_opt->out_count = 0;
781 /* ins is hanging, continue scanning the emitted code */
786 if (spec [MONO_INST_DEST] != ' ') {
787 MonoInst *def = defs [ins->dreg];
789 if (def && (def->opcode == OP_ADD_IMM) && (def->sreg1 == cfg->frame_reg) && (MONO_IS_STORE_MEMBASE (ins))) {
790 /* ADD_IMM is created by spill_global_vars */
791 /* cfg->frame_reg is assumed to remain constant */
792 ins->inst_destbasereg = def->sreg1;
793 ins->inst_offset += def->inst_imm;
796 if (!MONO_IS_STORE_MEMBASE (ins) && !vreg_is_volatile (cfg, ins->dreg)) {
797 defs [ins->dreg] = ins;
798 def_index [ins->dreg] = ins_index;
802 if (MONO_IS_CALL (ins))
803 last_call_index = ins_index;
810 static inline gboolean
811 reg_is_softreg_no_fpstack (int reg, const char spec)
813 return (spec == 'i' && reg >= MONO_MAX_IREGS)
814 || ((spec == 'f' && reg >= MONO_MAX_FREGS) && !MONO_ARCH_USE_FPSTACK)
815 #ifdef MONO_ARCH_SIMD_INTRINSICS
816 || (spec == 'x' && reg >= MONO_MAX_XREGS)
821 static inline gboolean
822 reg_is_softreg (int reg, const char spec)
824 return (spec == 'i' && reg >= MONO_MAX_IREGS)
825 || (spec == 'f' && reg >= MONO_MAX_FREGS)
826 #ifdef MONO_ARCH_SIMD_INTRINSICS
827 || (spec == 'x' && reg >= MONO_MAX_XREGS)
832 static inline gboolean
833 mono_is_simd_accessor (MonoInst *ins)
835 switch (ins->opcode) {
836 #ifdef MONO_ARCH_SIMD_INTRINSICS
844 case OP_INSERTX_U1_SLOW:
845 case OP_INSERTX_I4_SLOW:
846 case OP_INSERTX_R4_SLOW:
847 case OP_INSERTX_R8_SLOW:
848 case OP_INSERTX_I8_SLOW:
859 * Get rid of the dead assignments to local vregs like the ones created by the
863 mono_local_deadce (MonoCompile *cfg)
866 MonoInst *ins, *prev;
867 MonoBitSet *used, *defined;
869 //mono_print_code (cfg, "BEFORE LOCAL-DEADCE");
872 * Assignments to global vregs can't be eliminated so this pass must come
873 * after the handle_global_vregs () pass.
876 used = mono_bitset_mp_new_noinit (cfg->mempool, cfg->next_vreg + 1);
877 defined = mono_bitset_mp_new_noinit (cfg->mempool, cfg->next_vreg + 1);
879 /* First pass: collect liveness info */
880 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
881 /* Manually init the defs entries used by the bblock */
882 MONO_BB_FOR_EACH_INS (bb, ins) {
883 const char *spec = INS_INFO (ins->opcode);
884 int sregs [MONO_MAX_SRC_REGS];
887 if (spec [MONO_INST_DEST] != ' ') {
888 mono_bitset_clear_fast (used, ins->dreg);
889 mono_bitset_clear_fast (defined, ins->dreg);
890 #if SIZEOF_REGISTER == 4
892 mono_bitset_clear_fast (used, ins->dreg + 1);
893 mono_bitset_clear_fast (defined, ins->dreg + 1);
896 num_sregs = mono_inst_get_src_registers (ins, sregs);
897 for (i = 0; i < num_sregs; ++i) {
898 mono_bitset_clear_fast (used, sregs [i]);
899 #if SIZEOF_REGISTER == 4
900 mono_bitset_clear_fast (used, sregs [i] + 1);
906 * Make a reverse pass over the instruction list
908 MONO_BB_FOR_EACH_INS_REVERSE_SAFE (bb, prev, ins) {
909 const char *spec = INS_INFO (ins->opcode);
910 int sregs [MONO_MAX_SRC_REGS];
912 MonoInst *prev_f = mono_inst_prev (ins, FILTER_NOP | FILTER_IL_SEQ_POINT);
914 if (ins->opcode == OP_NOP) {
915 MONO_DELETE_INS (bb, ins);
919 g_assert (ins->opcode > MONO_CEE_LAST);
921 if (MONO_IS_NON_FP_MOVE (ins) && prev_f) {
926 spec2 = INS_INFO (def->opcode);
929 * Perform a limited kind of reverse copy propagation, i.e.
930 * transform B <- FOO; A <- B into A <- FOO
931 * This isn't copyprop, not deadce, but it can only be performed
932 * after handle_global_vregs () has run.
934 if (!get_vreg_to_inst (cfg, ins->sreg1) && (spec2 [MONO_INST_DEST] != ' ') && (def->dreg == ins->sreg1) && !mono_bitset_test_fast (used, ins->sreg1) && !MONO_IS_STORE_MEMBASE (def) && reg_is_softreg (ins->sreg1, spec [MONO_INST_DEST]) && !mono_is_simd_accessor (def)) {
935 if (cfg->verbose_level > 2) {
936 printf ("\tReverse copyprop in BB%d on ", bb->block_num);
937 mono_print_ins (ins);
940 def->dreg = ins->dreg;
941 MONO_DELETE_INS (bb, ins);
942 spec = INS_INFO (ins->opcode);
946 /* Enabling this on x86 could screw up the fp stack */
947 if (reg_is_softreg_no_fpstack (ins->dreg, spec [MONO_INST_DEST])) {
949 * Assignments to global vregs can only be eliminated if there is another
950 * assignment to the same vreg later in the same bblock.
952 if (!mono_bitset_test_fast (used, ins->dreg) &&
953 (!get_vreg_to_inst (cfg, ins->dreg) || (!bb->extended && !vreg_is_volatile (cfg, ins->dreg) && mono_bitset_test_fast (defined, ins->dreg))) &&
954 MONO_INS_HAS_NO_SIDE_EFFECT (ins)) {
955 /* Happens with CMOV instructions */
956 if (prev_f && prev_f->opcode == OP_ICOMPARE_IMM) {
957 MonoInst *prev = prev_f;
959 * Can't use DELETE_INS since that would interfere with the
964 //printf ("DEADCE: "); mono_print_ins (ins);
965 MONO_DELETE_INS (bb, ins);
966 spec = INS_INFO (ins->opcode);
969 if (spec [MONO_INST_DEST] != ' ')
970 mono_bitset_clear_fast (used, ins->dreg);
973 if (spec [MONO_INST_DEST] != ' ')
974 mono_bitset_set_fast (defined, ins->dreg);
975 num_sregs = mono_inst_get_src_registers (ins, sregs);
976 for (i = 0; i < num_sregs; ++i)
977 mono_bitset_set_fast (used, sregs [i]);
978 if (MONO_IS_STORE_MEMBASE (ins))
979 mono_bitset_set_fast (used, ins->dreg);
981 if (MONO_IS_CALL (ins)) {
982 MonoCallInst *call = (MonoCallInst*)ins;
985 if (call->out_ireg_args) {
986 for (l = call->out_ireg_args; l; l = l->next) {
987 guint32 regpair, reg;
989 regpair = (guint32)(gssize)(l->data);
990 reg = regpair & 0xffffff;
992 mono_bitset_set_fast (used, reg);
996 if (call->out_freg_args) {
997 for (l = call->out_freg_args; l; l = l->next) {
998 guint32 regpair, reg;
1000 regpair = (guint32)(gssize)(l->data);
1001 reg = regpair & 0xffffff;
1003 mono_bitset_set_fast (used, reg);
1010 //mono_print_code (cfg, "AFTER LOCAL-DEADCE");
1013 #else /* !DISABLE_JIT */
1015 MONO_EMPTY_SOURCE_FILE (local_propagation);
1017 #endif /* !DISABLE_JIT */