[jit] Optimize signed division by constant
[mono.git] / mono / mini / local-propagation.c
1 /*
2  * local-propagation.c: Local constant, copy and tree propagation.
3  *
4  * To make some sense of the tree mover, read mono/docs/tree-mover.txt
5  *
6  * Author:
7  *   Paolo Molaro (lupus@ximian.com)
8  *   Dietmar Maurer (dietmar@ximian.com)
9  *   Massimiliano Mantione (massi@ximian.com)
10  *
11  * (C) 2006 Novell, Inc.  http://www.novell.com
12  * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
13  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
14  */
15
16 #include <config.h>
17 #ifndef DISABLE_JIT
18
19 #include <string.h>
20 #include <stdio.h>
21 #ifdef HAVE_ALLOCA_H
22 #include <alloca.h>
23 #endif
24
25 #include <mono/metadata/debug-helpers.h>
26 #include <mono/metadata/mempool.h>
27 #include <mono/metadata/opcodes.h>
28 #include "mini.h"
29 #include "ir-emit.h"
30
31 #ifndef MONO_ARCH_IS_OP_MEMBASE
32 #define MONO_ARCH_IS_OP_MEMBASE(opcode) FALSE
33 #endif
34
35 static inline MonoBitSet* 
36 mono_bitset_mp_new_noinit (MonoMemPool *mp,  guint32 max_size)
37 {
38         int size = mono_bitset_alloc_size (max_size, 0);
39         gpointer mem;
40
41         mem = mono_mempool_alloc (mp, size);
42         return mono_bitset_mem_new (mem, max_size, MONO_BITSET_DONT_FREE);
43 }
44
45 #if SIZEOF_REGISTER == 8
46 struct magic_unsigned {
47         guint32 magic_number;
48         gboolean addition;
49         int shift;
50 };
51
52 struct magic_signed {
53         gint32 magic_number;
54         int shift;
55 };
56
57 /* http://www.hackersdelight.org/hdcodetxt/magicu.c.txt */
58 static struct magic_unsigned
59 compute_magic_unsigned (guint32 divisor) {
60         guint32 nc, delta, q1, r1, q2, r2;
61         struct magic_unsigned magu;
62         gboolean gt = FALSE;
63         int p;
64
65         magu.addition = 0;
66         nc = -1 - (-divisor) % divisor;
67         p = 31;
68         q1 = 0x80000000 / nc;
69         r1 = 0x80000000 - q1 * nc;
70         q2 = 0x7FFFFFFF / divisor;
71         r2 = 0x7FFFFFFF - q2 * divisor;
72         do {
73                 p = p + 1;
74                 if (q1 >= 0x80000000)
75                         gt = TRUE;
76                 if (r1 >= nc - r1) {
77                         q1 = 2 * q1 + 1;
78                         r1 = 2 * r1 - nc;
79                 } else {
80                         q1 = 2 * q1;
81                         r1 = 2 * r1;
82                 }
83                 if (r2 + 1 >= divisor - r2) {
84                         if (q2 >= 0x7FFFFFFF)
85                                 magu.addition = 1;
86                         q2 = 2 * q2 + 1;
87                         r2 = 2 * r2 + 1 - divisor;
88                 } else {
89                         if (q2 >= 0x80000000)
90                                 magu.addition = 1;
91                         q2 = 2 * q2;
92                         r2 = 2 * r2 + 1;
93                 }
94                 delta = divisor - 1 - r2;
95         } while (!gt && (q1 < delta || (q1 == delta && r1 == 0)));
96
97         magu.magic_number = q2 + 1;
98         magu.shift = p - 32;
99         return magu;
100 }
101
102 /* http://www.hackersdelight.org/hdcodetxt/magic.c.txt */
103 static struct magic_signed
104 compute_magic_signed (gint32 divisor) {
105         int p;
106         guint32 ad, anc, delta, q1, r1, q2, r2, t;
107         const guint32 two31 = 0x80000000;
108         struct magic_signed mag;
109
110         ad = abs (divisor);
111         t = two31 + ((unsigned)divisor >> 31);
112         anc = t - 1 - t % ad;
113         p = 31;
114         q1 = two31 / anc;
115         r1 = two31 - q1 * anc;
116         q2 = two31 / ad;
117         r2 = two31 - q2 * ad;
118         do {
119                 p++;
120                 q1 *= 2;
121                 r1 *= 2;
122                 if (r1 >= anc) {
123                         q1++;
124                         r1 -= anc;
125                 }
126
127                 q2 *= 2;
128                 r2 *= 2;
129
130                 if (r2 >= ad) {
131                         q2++;
132                         r2 -= ad;
133                 }
134
135                 delta = ad - r2;
136         } while (q1 < delta || (q1 == delta && r1 == 0));
137
138         mag.magic_number = q2 + 1;
139         if (divisor < 0)
140                 mag.magic_number = -mag.magic_number;
141         mag.shift = p - 32;
142         return mag;
143 }
144 #endif
145
146 static gboolean
147 mono_strength_reduction_division (MonoCompile *cfg, MonoInst *ins)
148 {
149         gboolean allocated_vregs = FALSE;
150         /*
151          * We don't use it on 32bit systems because on those
152          * platforms we emulate long multiplication, driving the
153          * performance back down.
154          */
155 #if SIZEOF_REGISTER == 8
156         switch (ins->opcode) {
157                 case OP_IDIV_UN_IMM: {
158                         guint32 tmp_regl, dividend_reg;
159                         struct magic_unsigned mag;
160                         int power2 = mono_is_power_of_two (ins->inst_imm);
161
162                         /* The decomposition doesn't handle exception throwing */
163                         if (ins->inst_imm == 0)
164                                 break;
165
166                         if (power2 >= 0) {
167                                 ins->opcode = OP_ISHR_UN_IMM;
168                                 ins->sreg2 = -1;
169                                 ins->inst_imm = power2;
170                                 break;
171                         }
172                         allocated_vregs = TRUE;
173                         /*
174                          * Replacement of unsigned division with multiplication,
175                          * shifts and additions Hacker's Delight, chapter 10-10.
176                          */
177                         mag = compute_magic_unsigned (ins->inst_imm);
178                         tmp_regl = alloc_lreg (cfg);
179                         dividend_reg = alloc_lreg (cfg);
180                         MONO_EMIT_NEW_I8CONST (cfg, tmp_regl, mag.magic_number);
181                         MONO_EMIT_NEW_UNALU (cfg, OP_ZEXT_I4, dividend_reg, ins->sreg1);
182                         MONO_EMIT_NEW_BIALU (cfg, OP_LMUL, tmp_regl, dividend_reg, tmp_regl);
183                         if (mag.addition) {
184                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, tmp_regl, tmp_regl, 32);
185                                 MONO_EMIT_NEW_BIALU (cfg, OP_LADD, tmp_regl, tmp_regl, dividend_reg);
186                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, mag.shift);
187                         } else {
188                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, 32 + mag.shift);
189                         }
190                         break;
191                 }
192                 case OP_IDIV_IMM: {
193                         guint32 tmp_regl, dividend_reg;
194                         struct magic_signed mag;
195                         int power2 = mono_is_power_of_two (ins->inst_imm);
196
197                         /* The decomposition doesn't handle exception throwing */
198                         if (ins->inst_imm == 0 || ins->inst_imm == -1)
199                                 break;
200                         allocated_vregs = TRUE;
201                         if (power2 == 1) {
202                                 guint32 r1 = alloc_ireg (cfg);
203                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, r1, ins->sreg1, 31);
204                                 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, r1, r1, ins->sreg1);
205                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, ins->dreg, r1, 1);
206                                 break;
207                         } else if (power2 > 0 && power2 < 31) {
208                                 guint32 r1 = alloc_ireg (cfg);
209                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, r1, ins->sreg1, 31);
210                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, r1, r1, (32 - power2));
211                                 MONO_EMIT_NEW_BIALU (cfg, OP_IADD, r1, r1, ins->sreg1);
212                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, ins->dreg, r1, power2);
213                                 break;
214                         }
215
216                         /*
217                          * Replacement of signed division with multiplication,
218                          * shifts and additions Hacker's Delight, chapter 10-6.
219                          */
220                         mag = compute_magic_signed (ins->inst_imm);
221                         tmp_regl = alloc_lreg (cfg);
222                         dividend_reg = alloc_lreg (cfg);
223                         MONO_EMIT_NEW_I8CONST (cfg, tmp_regl, mag.magic_number);
224                         MONO_EMIT_NEW_UNALU (cfg, OP_SEXT_I4, dividend_reg, ins->sreg1);
225                         MONO_EMIT_NEW_BIALU (cfg, OP_LMUL, tmp_regl, dividend_reg, tmp_regl);
226                         if ((ins->inst_imm > 0 && mag.magic_number < 0) || (ins->inst_imm < 0 && mag.magic_number > 0)) {
227                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, 32);
228                                 if (ins->inst_imm > 0 && mag.magic_number < 0) {
229                                         MONO_EMIT_NEW_BIALU (cfg, OP_LADD, tmp_regl, tmp_regl, dividend_reg);
230                                 } else if (ins->inst_imm < 0 && mag.magic_number > 0) {
231                                         MONO_EMIT_NEW_BIALU (cfg, OP_LSUB, tmp_regl, tmp_regl, dividend_reg);
232                                 }
233                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, mag.shift);
234                         } else {
235                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_IMM, tmp_regl, tmp_regl, 32 + mag.shift);
236                         }
237                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_LSHR_UN_IMM, ins->dreg, tmp_regl, SIZEOF_REGISTER * 8 - 1);
238                         MONO_EMIT_NEW_BIALU (cfg, OP_LADD, ins->dreg, ins->dreg, tmp_regl);
239                         break;
240                 }
241         }
242 #endif
243         return allocated_vregs;
244 }
245
246 /*
247  * Replaces ins with optimized opcodes.
248  *
249  * We can emit to cbb the equivalent instructions which will be used as
250  * replacement for ins, or simply change the fields of ins. Spec needs to
251  * be updated if we silently change the opcode of ins.
252  *
253  * Returns TRUE if additional vregs were allocated.
254  */
255 static gboolean
256 mono_strength_reduction_ins (MonoCompile *cfg, MonoInst *ins, const char **spec)
257 {
258         gboolean allocated_vregs = FALSE;
259
260         /* FIXME: Add long/float */
261         switch (ins->opcode) {
262         case OP_MOVE:
263         case OP_XMOVE:
264                 if (ins->dreg == ins->sreg1) {
265                         NULLIFY_INS (ins);
266                 }
267                 break;
268         case OP_ADD_IMM:
269         case OP_IADD_IMM:
270         case OP_SUB_IMM:
271         case OP_ISUB_IMM:
272 #if SIZEOF_REGISTER == 8
273         case OP_LADD_IMM:
274         case OP_LSUB_IMM:
275 #endif
276                 if (ins->inst_imm == 0) {
277                         ins->opcode = OP_MOVE;
278                 }
279                 break;
280         case OP_MUL_IMM:
281         case OP_IMUL_IMM:
282 #if SIZEOF_REGISTER == 8
283         case OP_LMUL_IMM:
284 #endif
285                 if (ins->inst_imm == 0) {
286                         ins->opcode = (ins->opcode == OP_LMUL_IMM) ? OP_I8CONST : OP_ICONST;
287                         ins->inst_c0 = 0;
288                         ins->sreg1 = -1;
289                 } else if (ins->inst_imm == 1) {
290                         ins->opcode = OP_MOVE;
291                 } else if ((ins->opcode == OP_IMUL_IMM) && (ins->inst_imm == -1)) {
292                         ins->opcode = OP_INEG;
293                 } else if ((ins->opcode == OP_LMUL_IMM) && (ins->inst_imm == -1)) {
294                         ins->opcode = OP_LNEG;
295                 } else {
296                         int power2 = mono_is_power_of_two (ins->inst_imm);
297                         if (power2 >= 0) {
298                                 ins->opcode = (ins->opcode == OP_MUL_IMM) ? OP_SHL_IMM : ((ins->opcode == OP_LMUL_IMM) ? OP_LSHL_IMM : OP_ISHL_IMM);
299                                 ins->inst_imm = power2;
300                         }
301                 }
302                 break;
303         case OP_IREM_UN_IMM: {
304                 int power2 = mono_is_power_of_two (ins->inst_imm);
305
306                 if (power2 >= 0) {
307                         ins->opcode = OP_IAND_IMM;
308                         ins->sreg2 = -1;
309                         ins->inst_imm = (1 << power2) - 1;
310                 }
311                 break;
312         }
313         case OP_IDIV_UN_IMM:
314         case OP_IDIV_IMM: {
315                 allocated_vregs = mono_strength_reduction_division (cfg, ins);
316                 break;
317         }
318 #if SIZEOF_REGISTER == 8
319         case OP_LREM_IMM:
320 #endif
321         case OP_IREM_IMM: {
322                 int power = mono_is_power_of_two (ins->inst_imm);
323                 if (ins->inst_imm == 1) {
324                         ins->opcode = OP_ICONST;
325                         MONO_INST_NULLIFY_SREGS (ins);
326                         ins->inst_c0 = 0;
327 #if __s390__
328                 }
329 #else
330                 } else if ((ins->inst_imm > 0) && (ins->inst_imm < (1LL << 32)) && (power != -1)) {
331                         gboolean is_long = ins->opcode == OP_LREM_IMM;
332                         int compensator_reg = alloc_ireg (cfg);
333                         int intermediate_reg;
334
335                         /* Based on gcc code */
336
337                         /* Add compensation for negative numerators */
338
339                         if (power > 1) {
340                                 intermediate_reg = compensator_reg;
341                                 MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LSHR_IMM : OP_ISHR_IMM, intermediate_reg, ins->sreg1, is_long ? 63 : 31);
342                         } else {
343                                 intermediate_reg = ins->sreg1;
344                         }
345
346                         MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LSHR_UN_IMM : OP_ISHR_UN_IMM, compensator_reg, intermediate_reg, (is_long ? 64 : 32) - power);
347                         MONO_EMIT_NEW_BIALU (cfg, is_long ? OP_LADD : OP_IADD, ins->dreg, ins->sreg1, compensator_reg);
348                         /* Compute remainder */
349                         MONO_EMIT_NEW_BIALU_IMM (cfg, is_long ? OP_LAND_IMM : OP_AND_IMM, ins->dreg, ins->dreg, (1 << power) - 1);
350                         /* Remove compensation */
351                         MONO_EMIT_NEW_BIALU (cfg, is_long ? OP_LSUB : OP_ISUB, ins->dreg, ins->dreg, compensator_reg);
352
353                         allocated_vregs = TRUE;
354                 }
355 #endif
356                 break;
357         }
358 #if SIZEOF_REGISTER == 4
359         case OP_LSHR_IMM: {
360                 if (COMPILE_LLVM (cfg))
361                         break;
362                 if (ins->inst_c1 == 32) {
363                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
364                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), 31);
365                 } else if (ins->inst_c1 == 0) {
366                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
367                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
368                 } else if (ins->inst_c1 > 32) {
369                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1 - 32);
370                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), 31);
371                 } else {
372                         guint32 tmpreg = alloc_ireg (cfg);
373                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, tmpreg, MONO_LVREG_MS (ins->sreg1), 32 - ins->inst_c1);
374                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
375                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
376                         MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->dreg), tmpreg);
377                         allocated_vregs = TRUE;
378                 }
379                 break;
380         }
381         case OP_LSHR_UN_IMM: {
382                 if (COMPILE_LLVM (cfg))
383                         break;
384                 if (ins->inst_c1 == 32) {
385                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
386                         MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_MS (ins->dreg), 0);
387                 } else if (ins->inst_c1 == 0) {
388                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
389                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
390                 } else if (ins->inst_c1 > 32) {
391                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1 - 32);
392                         MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_MS (ins->dreg), 0);
393                 } else {
394                         guint32 tmpreg = alloc_ireg (cfg);
395                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, tmpreg, MONO_LVREG_MS (ins->sreg1), 32 - ins->inst_c1);
396                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
397                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
398                         MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->dreg), tmpreg);
399                         allocated_vregs = TRUE;
400                 }
401                 break;
402         }
403         case OP_LSHL_IMM: {
404                 if (COMPILE_LLVM (cfg))
405                         break;
406                 if (ins->inst_c1 == 32) {
407                         /* just move the lower half to the upper and zero the lower word */
408                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
409                         MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_LS (ins->dreg), 0);
410                 } else if (ins->inst_c1 == 0) {
411                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1));
412                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1));
413                 } else if (ins->inst_c1 > 32) {
414                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1 - 32);
415                         MONO_EMIT_NEW_ICONST (cfg, MONO_LVREG_LS (ins->dreg), 0);
416                 } else {
417                         guint32 tmpreg = alloc_ireg (cfg);
418                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_UN_IMM, tmpreg, MONO_LVREG_LS (ins->sreg1), 32 - ins->inst_c1);
419                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->sreg1), ins->inst_c1);
420                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, MONO_LVREG_LS (ins->dreg), MONO_LVREG_LS (ins->sreg1), ins->inst_c1);
421                         MONO_EMIT_NEW_BIALU (cfg, OP_IOR, MONO_LVREG_MS (ins->dreg), MONO_LVREG_MS (ins->dreg), tmpreg);
422                         allocated_vregs = TRUE;
423                 }
424                 break;
425         }
426 #endif
427
428         default:
429                 break;
430         }
431
432         *spec = INS_INFO (ins->opcode);
433         return allocated_vregs;
434 }
435
436 /*
437  * mono_local_cprop:
438  *
439  *  A combined local copy and constant propagation pass.
440  */
441 void
442 mono_local_cprop (MonoCompile *cfg)
443 {
444         MonoBasicBlock *bb, *bb_opt;
445         MonoInst **defs;
446         gint32 *def_index;
447         int max;
448         int filter = FILTER_IL_SEQ_POINT;
449         int initial_max_vregs = cfg->next_vreg;
450
451         max = cfg->next_vreg;
452         defs = (MonoInst **)mono_mempool_alloc (cfg->mempool, sizeof (MonoInst*) * cfg->next_vreg);
453         def_index = (gint32 *)mono_mempool_alloc (cfg->mempool, sizeof (guint32) * cfg->next_vreg);
454         cfg->cbb = bb_opt = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoBasicBlock));
455
456         for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
457                 MonoInst *ins;
458                 int ins_index;
459                 int last_call_index;
460
461                 /* Manually init the defs entries used by the bblock */
462                 MONO_BB_FOR_EACH_INS (bb, ins) {
463                         int sregs [MONO_MAX_SRC_REGS];
464                         int num_sregs, i;
465
466                         if (ins->dreg != -1) {
467 #if SIZEOF_REGISTER == 4
468                                 const char *spec = INS_INFO (ins->opcode);
469                                 if (spec [MONO_INST_DEST] == 'l') {
470                                         defs [ins->dreg + 1] = NULL;
471                                         defs [ins->dreg + 2] = NULL;
472                                 }
473 #endif
474                                 defs [ins->dreg] = NULL;
475                         }
476
477                         num_sregs = mono_inst_get_src_registers (ins, sregs);
478                         for (i = 0; i < num_sregs; ++i) {
479                                 int sreg = sregs [i];
480 #if SIZEOF_REGISTER == 4
481                                 const char *spec = INS_INFO (ins->opcode);
482                                 if (spec [MONO_INST_SRC1 + i] == 'l') {
483                                         defs [sreg + 1] = NULL;
484                                         defs [sreg + 2] = NULL;
485                                 }
486 #endif
487                                 defs [sreg] = NULL;
488                         }
489                 }
490
491                 ins_index = 0;
492                 last_call_index = -1;
493                 MONO_BB_FOR_EACH_INS (bb, ins) {
494                         const char *spec = INS_INFO (ins->opcode);
495                         int regtype, srcindex, sreg;
496                         int num_sregs;
497                         int sregs [MONO_MAX_SRC_REGS];
498
499                         if (ins->opcode == OP_NOP) {
500                                 MONO_DELETE_INS (bb, ins);
501                                 continue;
502                         }
503
504                         g_assert (ins->opcode > MONO_CEE_LAST);
505
506                         /* FIXME: Optimize this */
507                         if (ins->opcode == OP_LDADDR) {
508                                 MonoInst *var = (MonoInst *)ins->inst_p0;
509
510                                 defs [var->dreg] = NULL;
511                                 /*
512                                 if (!MONO_TYPE_ISSTRUCT (var->inst_vtype))
513                                         break;
514                                 */
515                         }
516
517                         if (MONO_IS_STORE_MEMBASE (ins)) {
518                                 sreg = ins->dreg;
519                                 regtype = 'i';
520
521                                 if ((regtype == 'i') && (sreg != -1) && defs [sreg]) {
522                                         MonoInst *def = defs [sreg];
523
524                                         if ((def->opcode == OP_MOVE) && (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg])) && !vreg_is_volatile (cfg, def->sreg1)) {
525                                                 int vreg = def->sreg1;
526                                                 if (cfg->verbose_level > 2) printf ("CCOPY: R%d -> R%d\n", sreg, vreg);
527                                                 ins->dreg = vreg;
528                                         }
529                                 }
530                         }
531
532                         num_sregs = mono_inst_get_src_registers (ins, sregs);
533                         for (srcindex = 0; srcindex < num_sregs; ++srcindex) {
534                                 MonoInst *def;
535
536                                 mono_inst_get_src_registers (ins, sregs);
537
538                                 regtype = spec [MONO_INST_SRC1 + srcindex];
539                                 sreg = sregs [srcindex];
540
541                                 if ((regtype == ' ') || (sreg == -1) || (!defs [sreg]))
542                                         continue;
543
544                                 def = defs [sreg];
545
546                                 /* Copy propagation */
547                                 /* 
548                                  * The first check makes sure the source of the copy did not change since 
549                                  * the copy was made.
550                                  * The second check avoids volatile variables.
551                                  * The third check avoids copy propagating local vregs through a call, 
552                                  * since the lvreg will be spilled 
553                                  * The fourth check avoids copy propagating a vreg in cases where
554                                  * it would be eliminated anyway by reverse copy propagation later,
555                                  * because propagating it would create another use for it, thus making 
556                                  * it impossible to use reverse copy propagation.
557                                  */
558                                 /* Enabling this for floats trips up the fp stack */
559                                 /* 
560                                  * Enabling this for floats on amd64 seems to cause a failure in 
561                                  * basic-math.cs, most likely because it gets rid of some r8->r4 
562                                  * conversions.
563                                  */
564                                 if (MONO_IS_MOVE (def) &&
565                                         (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg])) &&
566                                         !vreg_is_volatile (cfg, def->sreg1) &&
567                                         /* This avoids propagating local vregs across calls */
568                                         ((get_vreg_to_inst (cfg, def->sreg1) || !defs [def->sreg1] || (def_index [def->sreg1] >= last_call_index) || (def->opcode == OP_VMOVE))) &&
569                                         !(defs [def->sreg1] && mono_inst_next (defs [def->sreg1], filter) == def) &&
570                                         (!MONO_ARCH_USE_FPSTACK || (def->opcode != OP_FMOVE)) &&
571                                         (def->opcode != OP_FMOVE)) {
572                                         int vreg = def->sreg1;
573
574                                         if (cfg->verbose_level > 2) printf ("CCOPY/2: R%d -> R%d\n", sreg, vreg);
575                                         sregs [srcindex] = vreg;
576                                         mono_inst_set_src_registers (ins, sregs);
577
578                                         /* Allow further iterations */
579                                         srcindex = -1;
580                                         continue;
581                                 }
582
583                                 /* Constant propagation */
584                                 /* FIXME: Make is_inst_imm a macro */
585                                 /* FIXME: Make is_inst_imm take an opcode argument */
586                                 /* is_inst_imm is only needed for binops */
587                                 if ((((def->opcode == OP_ICONST) || ((sizeof (gpointer) == 8) && (def->opcode == OP_I8CONST))) &&
588                                          (((srcindex == 0) && (ins->sreg2 == -1)) || mono_arch_is_inst_imm (def->inst_c0))) || 
589                                         (!MONO_ARCH_USE_FPSTACK && (def->opcode == OP_R8CONST))) {
590                                         guint32 opcode2;
591
592                                         /* srcindex == 1 -> binop, ins->sreg2 == -1 -> unop */
593                                         if ((srcindex == 1) && (ins->sreg1 != -1) && defs [ins->sreg1] && (defs [ins->sreg1]->opcode == OP_ICONST) && defs [ins->sreg2]) {
594                                                 /* Both arguments are constants, perform cfold */
595                                                 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
596                                         } else if ((srcindex == 0) && (ins->sreg2 != -1) && defs [ins->sreg2]) {
597                                                 /* Arg 1 is constant, swap arguments if possible */
598                                                 int opcode = ins->opcode;
599                                                 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], defs [ins->sreg2], TRUE);
600                                                 if (ins->opcode != opcode) {
601                                                         /* Allow further iterations */
602                                                         srcindex = -1;
603                                                         continue;
604                                                 }
605                                         } else if ((srcindex == 0) && (ins->sreg2 == -1)) {
606                                                 /* Constant unop, perform cfold */
607                                                 mono_constant_fold_ins (cfg, ins, defs [ins->sreg1], NULL, TRUE);
608                                         }
609
610                                         opcode2 = mono_op_to_op_imm (ins->opcode);
611                                         if ((opcode2 != -1) && mono_arch_is_inst_imm (def->inst_c0) && ((srcindex == 1) || (ins->sreg2 == -1))) {
612                                                 ins->opcode = opcode2;
613                                                 if ((def->opcode == OP_I8CONST) && (sizeof (gpointer) == 4)) {
614                                                         ins->inst_ls_word = def->inst_ls_word;
615                                                         ins->inst_ms_word = def->inst_ms_word;
616                                                 } else {
617                                                         ins->inst_imm = def->inst_c0;
618                                                 }
619                                                 sregs [srcindex] = -1;
620                                                 mono_inst_set_src_registers (ins, sregs);
621
622                                                 if ((opcode2 == OP_VOIDCALL) || (opcode2 == OP_CALL) || (opcode2 == OP_LCALL) || (opcode2 == OP_FCALL))
623                                                         ((MonoCallInst*)ins)->fptr = (gpointer)ins->inst_imm;
624
625                                                 /* Allow further iterations */
626                                                 srcindex = -1;
627                                                 continue;
628                                         }
629                                         else {
630                                                 /* Special cases */
631 #if defined(TARGET_X86) || defined(TARGET_AMD64)
632                                                 if ((ins->opcode == OP_X86_LEA) && (srcindex == 1)) {
633 #if SIZEOF_REGISTER == 8
634                                                         /* FIXME: Use OP_PADD_IMM when the new JIT is done */
635                                                         ins->opcode = OP_LADD_IMM;
636 #else
637                                                         ins->opcode = OP_ADD_IMM;
638 #endif
639                                                         ins->inst_imm += def->inst_c0 << ins->backend.shift_amount;
640                                                         ins->sreg2 = -1;
641                                                 }
642 #endif
643                                                 opcode2 = mono_load_membase_to_load_mem (ins->opcode);
644                                                 if ((srcindex == 0) && (opcode2 != -1) && mono_arch_is_inst_imm (def->inst_c0)) {
645                                                         ins->opcode = opcode2;
646                                                         ins->inst_imm = def->inst_c0 + ins->inst_offset;
647                                                         ins->sreg1 = -1;
648                                                 }
649                                         }
650                                 }
651                                 else if (((def->opcode == OP_ADD_IMM) || (def->opcode == OP_LADD_IMM)) && (MONO_IS_LOAD_MEMBASE (ins) || MONO_ARCH_IS_OP_MEMBASE (ins->opcode))) {
652                                         /* ADD_IMM is created by spill_global_vars */
653                                         /* 
654                                          * We have to guarantee that def->sreg1 haven't changed since def->dreg
655                                          * was defined. cfg->frame_reg is assumed to remain constant.
656                                          */
657                                         if ((def->sreg1 == cfg->frame_reg) || ((mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1))) {
658                                                 ins->inst_basereg = def->sreg1;
659                                                 ins->inst_offset += def->inst_imm;
660                                         }
661                                 } else if ((ins->opcode == OP_ISUB_IMM) && (def->opcode == OP_IADD_IMM) && (mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1)) {
662                                         ins->sreg1 = def->sreg1;
663                                         ins->inst_imm -= def->inst_imm;
664                                 } else if ((ins->opcode == OP_IADD_IMM) && (def->opcode == OP_ISUB_IMM) && (mono_inst_next (def, filter) == ins) && (def->dreg != def->sreg1)) {
665                                         ins->sreg1 = def->sreg1;
666                                         ins->inst_imm -= def->inst_imm;
667                                 } else if (ins->opcode == OP_STOREI1_MEMBASE_REG &&
668                                                    (def->opcode == OP_ICONV_TO_U1 || def->opcode == OP_ICONV_TO_I1 || def->opcode == OP_SEXT_I4 || (SIZEOF_REGISTER == 8 && def->opcode == OP_LCONV_TO_U1)) &&
669                                                    (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg]))) {
670                                         /* Avoid needless sign extension */
671                                         ins->sreg1 = def->sreg1;
672                                 } else if (ins->opcode == OP_STOREI2_MEMBASE_REG &&
673                                                    (def->opcode == OP_ICONV_TO_U2 || def->opcode == OP_ICONV_TO_I2 || def->opcode == OP_SEXT_I4 || (SIZEOF_REGISTER == 8 && def->opcode == OP_LCONV_TO_I2)) &&
674                                                    (!defs [def->sreg1] || (def_index [def->sreg1] < def_index [sreg]))) {
675                                         /* Avoid needless sign extension */
676                                         ins->sreg1 = def->sreg1;
677                                 } else if (ins->opcode == OP_COMPARE_IMM && def->opcode == OP_LDADDR && ins->inst_imm == 0) {
678                                         MonoInst dummy_arg1;
679
680                                         memset (&dummy_arg1, 0, sizeof (MonoInst));
681                                         dummy_arg1.opcode = OP_ICONST;
682                                         dummy_arg1.inst_c0 = 1;
683
684                                         mono_constant_fold_ins (cfg, ins, &dummy_arg1, NULL, TRUE);
685                                 }
686                         }
687
688                         g_assert (cfg->cbb == bb_opt);
689                         g_assert (!bb_opt->code);
690                         /* Do strength reduction here */
691                         if (mono_strength_reduction_ins (cfg, ins, &spec) && max < cfg->next_vreg) {
692                                 MonoInst **defs_prev = defs;
693                                 gint32 *def_index_prev = def_index;
694                                 guint32 prev_max = max;
695                                 guint32 additional_vregs = cfg->next_vreg - initial_max_vregs;
696
697                                 /* We have more vregs so we need to reallocate defs and def_index arrays */
698                                 max  = initial_max_vregs + additional_vregs * 2;
699                                 defs = (MonoInst **)mono_mempool_alloc (cfg->mempool, sizeof (MonoInst*) * max);
700                                 def_index = (gint32 *)mono_mempool_alloc (cfg->mempool, sizeof (guint32) * max);
701
702                                 /* Keep the entries for the previous vregs, zero the rest */
703                                 memcpy (defs, defs_prev, sizeof (MonoInst*) * prev_max);
704                                 memset (defs + prev_max, 0, sizeof (MonoInst*) * (max - prev_max));
705                                 memcpy (def_index, def_index_prev, sizeof (guint32) * prev_max);
706                                 memset (def_index + prev_max, 0, sizeof (guint32) * (max - prev_max));
707                         }
708
709                         if (cfg->cbb->code || (cfg->cbb != bb_opt)) {
710                                 MonoInst *saved_prev = ins->prev;
711
712                                 /* If we have code in cbb, we need to replace ins with the decomposition */
713                                 mono_replace_ins (cfg, bb, ins, &ins->prev, bb_opt, cfg->cbb);
714                                 bb_opt->code = bb_opt->last_ins = NULL;
715                                 bb_opt->in_count = bb_opt->out_count = 0;
716                                 cfg->cbb = bb_opt;
717
718                                 /* ins is hanging, continue scanning the emitted code */
719                                 ins = saved_prev;
720                                 continue;
721                         }
722
723                         if (spec [MONO_INST_DEST] != ' ') {
724                                 MonoInst *def = defs [ins->dreg];
725
726                                 if (def && (def->opcode == OP_ADD_IMM) && (def->sreg1 == cfg->frame_reg) && (MONO_IS_STORE_MEMBASE (ins))) {
727                                         /* ADD_IMM is created by spill_global_vars */
728                                         /* cfg->frame_reg is assumed to remain constant */
729                                         ins->inst_destbasereg = def->sreg1;
730                                         ins->inst_offset += def->inst_imm;
731                                 }
732
733                                 if (!MONO_IS_STORE_MEMBASE (ins) && !vreg_is_volatile (cfg, ins->dreg)) {
734                                         defs [ins->dreg] = ins;
735                                         def_index [ins->dreg] = ins_index;
736                                 }
737                         }
738                         
739                         if (MONO_IS_CALL (ins))
740                                 last_call_index = ins_index;
741
742                         ins_index ++;
743                 }
744         }
745 }
746
747 static inline gboolean
748 reg_is_softreg_no_fpstack (int reg, const char spec)
749 {
750         return (spec == 'i' && reg >= MONO_MAX_IREGS)
751                 || ((spec == 'f' && reg >= MONO_MAX_FREGS) && !MONO_ARCH_USE_FPSTACK)
752 #ifdef MONO_ARCH_SIMD_INTRINSICS
753                 || (spec == 'x' && reg >= MONO_MAX_XREGS)
754 #endif
755                 || (spec == 'v');
756 }
757                 
758 static inline gboolean
759 reg_is_softreg (int reg, const char spec)
760 {
761         return (spec == 'i' && reg >= MONO_MAX_IREGS)
762                 || (spec == 'f' && reg >= MONO_MAX_FREGS)
763 #ifdef MONO_ARCH_SIMD_INTRINSICS
764                 || (spec == 'x' && reg >= MONO_MAX_XREGS)
765 #endif
766                 || (spec == 'v');
767 }
768
769 static inline gboolean
770 mono_is_simd_accessor (MonoInst *ins)
771 {
772         switch (ins->opcode) {
773 #ifdef MONO_ARCH_SIMD_INTRINSICS
774         case OP_INSERT_I1:
775         case OP_INSERT_I2:
776         case OP_INSERT_I4:
777         case OP_INSERT_I8:
778         case OP_INSERT_R4:
779         case OP_INSERT_R8:
780
781         case OP_INSERTX_U1_SLOW:
782         case OP_INSERTX_I4_SLOW:
783         case OP_INSERTX_R4_SLOW:
784         case OP_INSERTX_R8_SLOW:
785         case OP_INSERTX_I8_SLOW:
786                 return TRUE;
787 #endif
788         default:
789                 return FALSE;
790         }
791 }
792
793 /**
794  * mono_local_deadce:
795  *
796  *   Get rid of the dead assignments to local vregs like the ones created by the 
797  * copyprop pass.
798  */
799 void
800 mono_local_deadce (MonoCompile *cfg)
801 {
802         MonoBasicBlock *bb;
803         MonoInst *ins, *prev;
804         MonoBitSet *used, *defined;
805
806         //mono_print_code (cfg, "BEFORE LOCAL-DEADCE");
807
808         /*
809          * Assignments to global vregs can't be eliminated so this pass must come
810          * after the handle_global_vregs () pass.
811          */
812
813         used = mono_bitset_mp_new_noinit (cfg->mempool, cfg->next_vreg + 1);
814         defined = mono_bitset_mp_new_noinit (cfg->mempool, cfg->next_vreg + 1);
815
816         /* First pass: collect liveness info */
817         for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
818                 /* Manually init the defs entries used by the bblock */
819                 MONO_BB_FOR_EACH_INS (bb, ins) {
820                         const char *spec = INS_INFO (ins->opcode);
821                         int sregs [MONO_MAX_SRC_REGS];
822                         int num_sregs, i;
823
824                         if (spec [MONO_INST_DEST] != ' ') {
825                                 mono_bitset_clear_fast (used, ins->dreg);
826                                 mono_bitset_clear_fast (defined, ins->dreg);
827 #if SIZEOF_REGISTER == 4
828                                 /* Regpairs */
829                                 mono_bitset_clear_fast (used, ins->dreg + 1);
830                                 mono_bitset_clear_fast (defined, ins->dreg + 1);
831 #endif
832                         }
833                         num_sregs = mono_inst_get_src_registers (ins, sregs);
834                         for (i = 0; i < num_sregs; ++i) {
835                                 mono_bitset_clear_fast (used, sregs [i]);
836 #if SIZEOF_REGISTER == 4
837                                 mono_bitset_clear_fast (used, sregs [i] + 1);
838 #endif
839                         }
840                 }
841
842                 /*
843                  * Make a reverse pass over the instruction list
844                  */
845                 MONO_BB_FOR_EACH_INS_REVERSE_SAFE (bb, prev, ins) {
846                         const char *spec = INS_INFO (ins->opcode);
847                         int sregs [MONO_MAX_SRC_REGS];
848                         int num_sregs, i;
849                         MonoInst *prev_f = mono_inst_prev (ins, FILTER_NOP | FILTER_IL_SEQ_POINT);
850
851                         if (ins->opcode == OP_NOP) {
852                                 MONO_DELETE_INS (bb, ins);
853                                 continue;
854                         }
855
856                         g_assert (ins->opcode > MONO_CEE_LAST);
857
858                         if (MONO_IS_NON_FP_MOVE (ins) && prev_f) {
859                                 MonoInst *def;
860                                 const char *spec2;
861
862                                 def = prev_f;
863                                 spec2 = INS_INFO (def->opcode);
864
865                                 /* 
866                                  * Perform a limited kind of reverse copy propagation, i.e.
867                                  * transform B <- FOO; A <- B into A <- FOO
868                                  * This isn't copyprop, not deadce, but it can only be performed
869                                  * after handle_global_vregs () has run.
870                                  */
871                                 if (!get_vreg_to_inst (cfg, ins->sreg1) && (spec2 [MONO_INST_DEST] != ' ') && (def->dreg == ins->sreg1) && !mono_bitset_test_fast (used, ins->sreg1) && !MONO_IS_STORE_MEMBASE (def) && reg_is_softreg (ins->sreg1, spec [MONO_INST_DEST]) && !mono_is_simd_accessor (def)) {
872                                         if (cfg->verbose_level > 2) {
873                                                 printf ("\tReverse copyprop in BB%d on ", bb->block_num);
874                                                 mono_print_ins (ins);
875                                         }
876
877                                         def->dreg = ins->dreg;
878                                         MONO_DELETE_INS (bb, ins);
879                                         spec = INS_INFO (ins->opcode);
880                                 }
881                         }
882
883                         /* Enabling this on x86 could screw up the fp stack */
884                         if (reg_is_softreg_no_fpstack (ins->dreg, spec [MONO_INST_DEST])) {
885                                 /* 
886                                  * Assignments to global vregs can only be eliminated if there is another
887                                  * assignment to the same vreg later in the same bblock.
888                                  */
889                                 if (!mono_bitset_test_fast (used, ins->dreg) && 
890                                         (!get_vreg_to_inst (cfg, ins->dreg) || (!bb->extended && !vreg_is_volatile (cfg, ins->dreg) && mono_bitset_test_fast (defined, ins->dreg))) &&
891                                         MONO_INS_HAS_NO_SIDE_EFFECT (ins)) {
892                                         /* Happens with CMOV instructions */
893                                         if (prev_f && prev_f->opcode == OP_ICOMPARE_IMM) {
894                                                 MonoInst *prev = prev_f;
895                                                 /* 
896                                                  * Can't use DELETE_INS since that would interfere with the
897                                                  * FOR_EACH_INS loop.
898                                                  */
899                                                 NULLIFY_INS (prev);
900                                         }
901                                         //printf ("DEADCE: "); mono_print_ins (ins);
902                                         MONO_DELETE_INS (bb, ins);
903                                         spec = INS_INFO (ins->opcode);
904                                 }
905
906                                 if (spec [MONO_INST_DEST] != ' ')
907                                         mono_bitset_clear_fast (used, ins->dreg);
908                         }
909
910                         if (spec [MONO_INST_DEST] != ' ')
911                                 mono_bitset_set_fast (defined, ins->dreg);
912                         num_sregs = mono_inst_get_src_registers (ins, sregs);
913                         for (i = 0; i < num_sregs; ++i)
914                                 mono_bitset_set_fast (used, sregs [i]);
915                         if (MONO_IS_STORE_MEMBASE (ins))
916                                 mono_bitset_set_fast (used, ins->dreg);
917
918                         if (MONO_IS_CALL (ins)) {
919                                 MonoCallInst *call = (MonoCallInst*)ins;
920                                 GSList *l;
921
922                                 if (call->out_ireg_args) {
923                                         for (l = call->out_ireg_args; l; l = l->next) {
924                                                 guint32 regpair, reg;
925
926                                                 regpair = (guint32)(gssize)(l->data);
927                                                 reg = regpair & 0xffffff;
928                                         
929                                                 mono_bitset_set_fast (used, reg);
930                                         }
931                                 }
932
933                                 if (call->out_freg_args) {
934                                         for (l = call->out_freg_args; l; l = l->next) {
935                                                 guint32 regpair, reg;
936
937                                                 regpair = (guint32)(gssize)(l->data);
938                                                 reg = regpair & 0xffffff;
939                                         
940                                                 mono_bitset_set_fast (used, reg);
941                                         }
942                                 }
943                         }
944                 }
945         }
946
947         //mono_print_code (cfg, "AFTER LOCAL-DEADCE");
948 }
949
950 #endif /* DISABLE_JIT */