First set of licensing changes
[mono.git] / mono / mini / dominators.c
index e8df3c2c1366195dd098349d99074d0e66a08186..b4c8dc39056147e947786a6c25efdbeb80096296 100644 (file)
  *   Paolo Molaro (lupus@ximian.com)
  *
  * (C) 2003 Ximian, Inc.
+ * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
  */
 #include <string.h>
 #include <mono/metadata/debug-helpers.h>
+#include <mono/metadata/mempool.h>
+#include <mono/metadata/mempool-internals.h>
 
 #include "mini.h"
 
-//#define DEBUG_DOMINATORS
+#ifndef DISABLE_JIT
 
-/* the simpler, dumber algorithm */
-static void
-compute_dominators (MonoCompile *m) {
-       int change = TRUE;
-       int i, j, bitsize;
-       MonoBasicBlock *bb;
-       MonoBitSet *T;
-       char* mem;
+/*
+ * bb->dfn == 0 means either the bblock is ignored by the dfn calculation, or
+ * it is the entry bblock.
+ */
+#define HAS_DFN(bb, entry) ((bb)->dfn || ((bb) == entry))
 
-       g_assert (!(m->comp_done & MONO_COMP_DOM));
+/*
+ * Compute dominators and immediate dominators using the algorithm in the
+ * paper "A Simple, Fast Dominance Algorithm" by Keith D. Cooper, 
+ * Timothy J. Harvey, and Ken Kennedy:
+ * http://citeseer.ist.psu.edu/cooper01simple.html
+ */
+static void
+compute_dominators (MonoCompile *cfg)
+{
+       int bindex, i, bitsize;
+       MonoBasicBlock *entry;
+       MonoBasicBlock **doms;
+       gboolean changed;
 
-       bitsize = mono_bitset_alloc_size (m->num_bblocks, 0);
-       /* the first is always the entry */
-       bb = m->bblocks [0];
-       mem = mono_mempool_alloc0 (m->mempool, bitsize * (m->num_bblocks + 1));
-       bb->dominators = mono_bitset_mem_new (mem, m->num_bblocks, 0);
+       g_assert (!(cfg->comp_done & MONO_COMP_DOM));
 
-       mem += bitsize;
-       mono_bitset_set (bb->dominators, 0);
+       bitsize = mono_bitset_alloc_size (cfg->num_bblocks, 0);
 
-       T = mono_bitset_mem_new (mem, m->num_bblocks, 0);
-       mem += bitsize;
+       entry = cfg->bblocks [0];
 
+       doms = g_new0 (MonoBasicBlock*, cfg->num_bblocks);
+       doms [entry->dfn] = entry;
 
-       for (i = 1; i < m->num_bblocks; ++i) {
-               bb = m->bblocks [i];
-               bb->dominators = mono_bitset_mem_new (mem, m->num_bblocks, 0);
-               mem += bitsize;
-               mono_bitset_invert (bb->dominators);
+       if (cfg->verbose_level > 1) {
+               for (i = 0; i < cfg->num_bblocks; ++i) {
+                       int j;
+                       MonoBasicBlock *bb = cfg->bblocks [i];
 
-#ifdef DEBUG_DOMINATORS
-               printf ("BB%d IN: ", bb->block_num);
-               for (j = 0; j < bb->in_count; ++j) 
-                       printf ("%d ", bb->in_bb [j]->block_num);
-               printf ("\n");
-#endif
+                       printf ("BB%d IN: ", bb->block_num);
+                       for (j = 0; j < bb->in_count; ++j)
+                               printf ("%d ", bb->in_bb [j]->block_num);
+                       printf ("\n");
+               }
        }
 
-       do {
-               change = FALSE;
-               for (i = 1; i < m->num_bblocks; ++i) {
-                       bb = m->bblocks [i];
-                       mono_bitset_set_all (T);
-                       for (j = 0; j < bb->in_count; ++j) {
-                               if (bb->in_bb [j]->dominators)
-                                       mono_bitset_intersection (T, bb->in_bb [j]->dominators);
+       changed = TRUE;
+       while (changed) {
+               changed = FALSE;
+
+               for (bindex = 0; bindex < cfg->num_bblocks; ++bindex) {
+                       MonoBasicBlock *bb = cfg->bblocks [bindex];
+                       MonoBasicBlock *idom;
+
+                       idom = NULL;
+                       for (i = 0; i < bb->in_count; ++i) {
+                               MonoBasicBlock *in_bb = bb->in_bb [i];
+                               if ((in_bb != bb) && doms [in_bb->dfn]) {
+                                       idom = in_bb;
+                                       break;
+                               }
                        }
-                       mono_bitset_set (T, i);
-                       if (!mono_bitset_equal (T, bb->dominators)) {
-                               change = TRUE;
-                               mono_bitset_copyto (T, bb->dominators);
+                       if (bb != cfg->bblocks [0])
+                               g_assert (idom);
+
+                       while (i < bb->in_count) {
+                               MonoBasicBlock *in_bb = bb->in_bb [i];
+
+                               if (HAS_DFN (in_bb, entry) && doms [in_bb->dfn]) {
+                                       /* Intersect */
+                                       MonoBasicBlock *f1 = idom;
+                                       MonoBasicBlock *f2 = in_bb;
+
+                                       while (f1 != f2) {
+                                               if (f1->dfn < f2->dfn)
+                                                       f2 = doms [f2->dfn];
+                                               else
+                                                       f1 = doms [f1->dfn];
+                                       }
+
+                                       idom = f1;
+                               }
+                               i ++;
                        }
-               }
-       } while (change);
 
-       m->comp_done |= MONO_COMP_DOM;
+                       if (idom != doms [bb->dfn]) {
+                               if (bb == cfg->bblocks [0])
+                                       doms [bb->dfn] = bb;
+                               else {
+                                       doms [bb->dfn] = idom;
+                                       changed = TRUE;
+                               }
 
-#ifdef DEBUG_DOMINATORS
-       printf ("DTREE %s %d\n", mono_method_full_name (m->method, TRUE), 
-               mono_method_get_header (m->method)->num_clauses);
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = m->bblocks [i];
-               printf ("BB%d: ", bb->block_num);
-               mono_blockset_print (m, bb->dominators, NULL, -1);
+                               //printf ("A: bb=%d dfn=%d dom:%d\n", bb->block_num, bb->dfn, doms [bb->dfn]->block_num);
+                       }
+               }
        }
-#endif
-}
 
-static void
-compute_idominators (MonoCompile* m) {
-       char *mem;
-       int bitsize, i, s, t;
-       MonoBitSet **T, *temp;
-       MonoBasicBlock *bb;
+       /* Compute bb->dominators for each bblock */
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               MonoBasicBlock *bb = cfg->bblocks [i];
+               MonoBasicBlock *cbb;
+               MonoBitSet *dominators;
+               char *mem;
 
-       g_assert (!(m->comp_done & MONO_COMP_IDOM));
+               mem = (char *)mono_mempool_alloc0 (cfg->mempool, bitsize);
 
-       bitsize = mono_bitset_alloc_size (m->num_bblocks, 0);
-       mem = mono_mempool_alloc (m->mempool, bitsize * (m->num_bblocks + 1));
-       T = mono_mempool_alloc (m->mempool, sizeof (MonoBitSet*) * m->num_bblocks);
-
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = m->bblocks [i];
-               T [i] = mono_bitset_mem_new (mem, m->num_bblocks, 0);
-               mono_bitset_copyto (bb->dominators, T [i]);
-               mono_bitset_clear (T [i], i);
-               if (mono_bitset_count (bb->dominators) - 1 != mono_bitset_count (T [i])) {
-                       mono_blockset_print (m, bb->dominators, "dominators", -1);
-                       mono_blockset_print (m, T [i], "T [i]", -1);
-                       g_error ("problem at %d (%d)\n", i, bb->dfn);
-               }
+               bb->dominators = dominators = mono_bitset_mem_new (mem, cfg->num_bblocks, 0);
                mem += bitsize;
-       }
-       temp = mono_bitset_mem_new (mem, m->num_bblocks, 0);
 
-       for (i = 1; i < m->num_bblocks; ++i) {
+               mono_bitset_set_fast (dominators, bb->dfn);
 
-               temp = T [i];
-                       
-               mono_bitset_foreach_bit_rev (temp, s, m->num_bblocks) {
-
-                       mono_bitset_foreach_bit_rev (temp, t, m->num_bblocks) {
-                                               
-                               if (t == s)
-                                       continue;
+               if (bb->dfn) {
+                       for (cbb = doms [bb->dfn]; cbb->dfn; cbb = doms [cbb->dfn])
+                               mono_bitset_set_fast (dominators, cbb->dfn);
 
-                               //if (mono_bitset_test_fast (T [s], t))
-                               if (mono_bitset_test_fast (m->bblocks [s]->dominators, t))
-                                       mono_bitset_clear (temp, t);
-                       }
+                       bb->idom = doms [bb->dfn];
+                       if (bb->idom)
+                               bb->idom->dominated = g_slist_prepend_mempool (cfg->mempool, bb->idom->dominated, bb);
                }
 
-#ifdef DEBUG_DOMINATORS
-               printf ("IDOMSET BB%d %d: ", m->bblocks [i]->block_num, m->num_bblocks);
-               mono_blockset_print (m, T [i], NULL, -1);
-#endif
+               /* The entry bb */
+               mono_bitset_set_fast (dominators, 0);
        }
 
-       for (i = 1; i < m->num_bblocks; ++i) {
-               bb = m->bblocks [i];
-               s = mono_bitset_find_start (T [i]);
-               g_assert (s != -1);
-               /*fixme:mono_bitset_count does not really work */
-               //g_assert (mono_bitset_count (T [i]) == 1);
-               t = mono_bitset_find_first (T [i], s);
-               g_assert (t == -1 || t >=  m->num_bblocks);
-               bb->idom = m->bblocks [s];
-               bb->idom->dominated = g_list_prepend (bb->idom->dominated, bb);
-       }
-
-       m->comp_done |= MONO_COMP_IDOM;
-}
+       g_free (doms);
 
-static void
-postorder_visit (MonoBasicBlock *start, int *idx, MonoBasicBlock **array)
-{
-       int i;
+       cfg->comp_done |= MONO_COMP_DOM | MONO_COMP_IDOM;
 
-       /* we assume the flag was already cleared by the caller. */
-       start->flags |= BB_VISITED;
-       /*g_print ("visit %d at %p\n", *dfn, start->cil_code);*/
-       for (i = 0; i < start->out_count; ++i) {
-               if (start->out_bb [i]->flags & BB_VISITED)
-                       continue;
-               postorder_visit (start->out_bb [i], idx, array);
+       if (cfg->verbose_level > 1) {
+               printf ("DTREE %s %d\n", mono_method_full_name (cfg->method, TRUE),
+                       cfg->header->num_clauses);
+               for (i = 0; i < cfg->num_bblocks; ++i) {
+                       MonoBasicBlock *bb = cfg->bblocks [i];
+                       printf ("BB%d(dfn=%d) (IDOM=BB%d): ", bb->block_num, bb->dfn, bb->idom ? bb->idom->block_num : -1);
+                       mono_blockset_print (cfg, bb->dominators, NULL, -1);
+               }
        }
-       array [*idx] = start;
-       (*idx)++;
 }
 
+#if 0
+
 static void
 check_dominance_frontier (MonoBasicBlock *x, MonoBasicBlock *t)
 {
@@ -192,148 +183,58 @@ check_dominance_frontier (MonoBasicBlock *x, MonoBasicBlock *t)
        }
 } 
 
-#if 0
-/* there is a bug in this code */
-static void
-compute_dominance_frontier_old (MonoCompile *m) {
-       int i, j, bitsize;
-       MonoBasicBlock **postorder;
-       MonoBasicBlock *bb, *z;
-       char *mem;
-
-       g_assert (!(m->comp_done & MONO_COMP_DFRONTIER));
-
-       postorder = mono_mempool_alloc (m->mempool, sizeof (MonoBasicBlock*) * m->num_bblocks);
-       i = 0;
-       postorder_visit (m->bb_entry, &i, postorder);
-       /*g_print ("postorder traversal:");
-       for (i = 0; i < m->num_bblocks; ++i)
-               g_print (" B%d", postorder [i]->dfn);
-       g_print ("\n");*/
-       
-       /* we could reuse the bitsets allocated in compute_idominators() */
-       bitsize = mono_bitset_alloc_size (m->num_bblocks, 0);
-       mem = mono_mempool_alloc0 (m->mempool, bitsize * m->num_bblocks);
-
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = postorder [i];
-               bb->dfrontier = mono_bitset_mem_new (mem, m->num_bblocks, 0);
-               mem += bitsize;
-       }
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = postorder [i];
-               /* the local component */
-               for (j = 0; j < bb->out_count; ++j) {
-                       //if (bb->out_bb [j] != bb->idom)
-                       if (bb->out_bb [j]->idom != bb)
-                               mono_bitset_set (bb->dfrontier, bb->out_bb [j]->dfn);
-               }
-       }
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = postorder [i];
-               /* the up component */
-               if (bb->idom) {
-                       z = bb->idom;
-                       mono_bitset_foreach_bit (z->dfrontier, j, m->num_bblocks) {
-                               //if (m->bblocks [j] != bb->idom)
-                               if (m->bblocks [j]->idom != bb)
-                                       mono_bitset_set (bb->dfrontier, m->bblocks [j]->dfn);
-                       }
-               }
-       }
-
-       /* this is a check for the dominator frontier */
-       for (i = 0; i < m->num_bblocks; ++i) {
-               MonoBasicBlock *x = m->bblocks [i];
-
-               mono_bitset_foreach_bit ((x->dfrontier), j, (m->num_bblocks)) {
-                       MonoBasicBlock *w = m->bblocks [j];
-                       int k;
-                       /* x must not strictly dominates w */
-                       if (mono_bitset_test_fast (w->dominators, x->dfn) && w != x)
-                               g_assert_not_reached ();
-
-                       for (k = 0; k < m->num_bblocks; ++k)
-                               m->bblocks [k]->flags &= ~BB_VISITED;
-
-                       check_dominance_frontier (x, x);
-               }
-       }
-
-       m->comp_done |= MONO_COMP_DFRONTIER;
-}
 #endif
 
-/* this is an implementation of the dominance frontier algorithm described in
- * "modern compiler implementation in C" written by Andrew W. Appel
+/**
+ * Compute dominance frontiers using the algorithm from the same paper.
  */
 static void
-compute_dominance_frontier_appel (MonoCompile *m, int n) 
-{
-       int i, j;
-       MonoBasicBlock *bb;
-
-       bb = m->bblocks [n];
-       g_assert (!(bb->flags & BB_VISITED));
-       bb->flags |= BB_VISITED;
-
-       for (i = 0; i < bb->out_count; ++i) {
-               MonoBasicBlock *y = bb->out_bb [i];
-               if (y->idom != bb) {
-                       g_assert (!(mono_bitset_test_fast (y->dominators, bb->dfn) && bb->dfn != y->dfn));
-                       mono_bitset_set (bb->dfrontier, y->dfn);
-               }
-       }
-       
-       
-       for (i = 0; i < m->num_bblocks; ++i) {
-               MonoBasicBlock *c = m->bblocks [i];
-               if (c->idom == bb) {
-                       if (!(c->flags & BB_VISITED))
-                               compute_dominance_frontier_appel (m, c->dfn);
-                       mono_bitset_foreach_bit (c->dfrontier, j, m->num_bblocks) {
-                               MonoBasicBlock *w = m->bblocks [j];
-                               if (!(mono_bitset_test_fast (w->dominators, bb->dfn) && bb->dfn != w->dfn))
-                                       mono_bitset_set (bb->dfrontier, w->dfn);
-                       }
-               }
-       }
-}
-
-static void
-compute_dominance_frontier (MonoCompile *m) 
+compute_dominance_frontier (MonoCompile *cfg)
 {
-       MonoBasicBlock *bb;
        char *mem;
        int i, j, bitsize;
 
-       g_assert (!(m->comp_done & MONO_COMP_DFRONTIER));
+       g_assert (!(cfg->comp_done & MONO_COMP_DFRONTIER));
 
-       for (i = 0; i < m->num_bblocks; ++i)
-               m->bblocks [i]->flags &= ~BB_VISITED;
+       for (i = 0; i < cfg->num_bblocks; ++i)
+               cfg->bblocks [i]->flags &= ~BB_VISITED;
 
-       /* we could reuse the bitsets allocated in compute_idominators() */
-       bitsize = mono_bitset_alloc_size (m->num_bblocks, 0);
-       mem = mono_mempool_alloc0 (m->mempool, bitsize * m->num_bblocks);
+       bitsize = mono_bitset_alloc_size (cfg->num_bblocks, 0);
+       mem = (char *)mono_mempool_alloc0 (cfg->mempool, bitsize * cfg->num_bblocks);
  
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = m->bblocks [i];
-               bb->dfrontier = mono_bitset_mem_new (mem, m->num_bblocks, 0);
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               MonoBasicBlock *bb = cfg->bblocks [i];
+               bb->dfrontier = mono_bitset_mem_new (mem, cfg->num_bblocks, 0);
                mem += bitsize;
        }
 
-       compute_dominance_frontier_appel (m, 0);
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               MonoBasicBlock *bb = cfg->bblocks [i];
+
+               if (bb->in_count > 1) {
+                       for (j = 0; j < bb->in_count; ++j) {
+                               MonoBasicBlock *p = bb->in_bb [j];
+
+                               if (p->dfn || (p == cfg->bblocks [0])) {
+                                       while (p != bb->idom) {
+                                               mono_bitset_set_fast (p->dfrontier, bb->dfn);
+                                               p = p->idom;
+                                       }
+                               }
+                       }
+               }
+       }
 
 #if 0
-       for (i = 0; i < m->num_bblocks; ++i) {
-               MonoBasicBlock *x = m->bblocks [i];
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               MonoBasicBlock *bb = cfg->bblocks [i];
                
-               printf ("DFRONT %s BB%d: ", mono_method_full_name (m->method, TRUE), x->block_num);
-               mono_blockset_print (m, x->dfrontier, NULL, -1);
+               printf ("DFRONT %s BB%d: ", mono_method_full_name (cfg->method, TRUE), bb->block_num);
+               mono_blockset_print (cfg, bb->dfrontier, NULL, -1);
        }
 #endif
 
-#if 1
+#if 0
        /* this is a check for the dominator frontier */
        for (i = 0; i < m->num_bblocks; ++i) {
                MonoBasicBlock *x = m->bblocks [i];
@@ -353,58 +254,47 @@ compute_dominance_frontier (MonoCompile *m)
        }
 #endif
 
-       m->comp_done |= MONO_COMP_DFRONTIER;
+       cfg->comp_done |= MONO_COMP_DFRONTIER;
 }
 
-void    
-mono_compile_dominator_info (MonoCompile *cfg, int dom_flags)
-{
-       if ((dom_flags & MONO_COMP_DOM) && !(cfg->comp_done & MONO_COMP_DOM))
-               compute_dominators (cfg);
-       if ((dom_flags & MONO_COMP_IDOM) && !(cfg->comp_done & MONO_COMP_IDOM))
-               compute_idominators (cfg);
-       if ((dom_flags & MONO_COMP_DFRONTIER) && !(cfg->comp_done & MONO_COMP_DFRONTIER))
-               compute_dominance_frontier (cfg);
-}
-
-static void
+static inline void
 df_set (MonoCompile *m, MonoBitSet* dest, MonoBitSet *set) 
 {
        int i;
 
-       mono_bitset_clear_all (dest);
        mono_bitset_foreach_bit (set, i, m->num_bblocks) {
-               mono_bitset_union (dest, m->bblocks [i]->dfrontier);
+               mono_bitset_union_fast (dest, m->bblocks [i]->dfrontier);
        }
 }
 
-/* TODO: alloc tmp and D on the stack */
 MonoBitSet*
-mono_compile_iterated_dfrontier (MonoCompile *m, MonoBitSet *set) 
+mono_compile_iterated_dfrontier (MonoCompile *m, MonoBitSet *set)
 {
-       MonoBitSet *result, *D;
-       int bitsize, change = TRUE;
+       MonoBitSet *result;
+       int bitsize, count1, count2;
 
        bitsize = mono_bitset_alloc_size (m->num_bblocks, 0);
-       result = mono_bitset_mem_new (mono_mempool_alloc (m->mempool, bitsize), m->num_bblocks, 0);
-       D = mono_bitset_mem_new (mono_mempool_alloc (m->mempool, bitsize), m->num_bblocks, 0);
+       result = mono_bitset_mem_new (mono_mempool_alloc0 (m->mempool, bitsize), m->num_bblocks, 0);
 
        df_set (m, result, set);
+       count2 = mono_bitset_count (result);
        do {
-               change = FALSE;
-               df_set (m, D, result);
-               mono_bitset_union (D, result);
-
-               if (!mono_bitset_equal (D, result)) {
-                       mono_bitset_copyto (D, result);
-                       change = TRUE;
-               }
-       } while (change);
+               count1 = count2;
+               df_set (m, result, result);
+               count2 = mono_bitset_count (result);
+       } while (count2 > count1);
        
        return result;
 }
 
-//#define DEBUG_NATURAL_LOOPS
+void    
+mono_compile_dominator_info (MonoCompile *cfg, int dom_flags)
+{
+       if ((dom_flags & MONO_COMP_DOM) && !(cfg->comp_done & MONO_COMP_DOM))
+               compute_dominators (cfg);
+       if ((dom_flags & MONO_COMP_DFRONTIER) && !(cfg->comp_done & MONO_COMP_DFRONTIER))
+               compute_dominance_frontier (cfg);
+}
 
 /*
  * code to detect loops and loop nesting level
@@ -413,70 +303,119 @@ void
 mono_compute_natural_loops (MonoCompile *cfg)
 {
        int i, j, k;
+       MonoBitSet *in_loop_blocks;
+       int *bb_indexes;
 
        g_assert (!(cfg->comp_done & MONO_COMP_LOOPS));
 
+       in_loop_blocks = mono_bitset_new (cfg->num_bblocks + 1, 0);
        for (i = 0; i < cfg->num_bblocks; ++i) {
                MonoBasicBlock *n = cfg->bblocks [i];
 
                for (j = 0; j < n->out_count; j++) {
                        MonoBasicBlock *h = n->out_bb [j];
+                       /* check for single block loops */
+                       if (n == h) {
+                               h->loop_blocks = g_list_prepend_mempool (cfg->mempool, h->loop_blocks, h);
+                               h->nesting++;
+                       }
                        /* check for back-edge from n to h */
-                       if (n != h && mono_bitset_test (n->dominators, h->dfn)) {
-                               GList *todo;
-
-                               n->loop_body_start = 1;
+                       else if (n != h && mono_bitset_test_fast (n->dominators, h->dfn)) {
+                               GSList *todo;
 
                                /* already in loop_blocks? */
-                               if (h->loop_blocks && g_list_find (h->loop_blocks, n))
+                               if (h->loop_blocks && g_list_find (h->loop_blocks, n)) {
                                        continue;
-                               
-                               todo = g_list_prepend (NULL, n);
+                               }
 
+                               mono_bitset_clear_all (in_loop_blocks);
+                               if (h->loop_blocks) {
+                                       GList *l;
+
+                                       for (l = h->loop_blocks; l; l = l->next) {
+                                               MonoBasicBlock *b = (MonoBasicBlock *)l->data;
+                                               if (b->dfn)
+                                                       mono_bitset_set_fast (in_loop_blocks, b->dfn);
+                                       }
+                               }
+                               todo = g_slist_prepend (NULL, n);       
+                       
                                while (todo) {
                                        MonoBasicBlock *cb = (MonoBasicBlock *)todo->data;
-                                       todo = g_list_delete_link (todo, todo);
+                                       todo = g_slist_delete_link (todo, todo);
 
-                                       if (g_list_find (h->loop_blocks, cb))
+                                       if ((cb->dfn && mono_bitset_test_fast (in_loop_blocks, cb->dfn)) || (!cb->dfn && g_list_find (h->loop_blocks, cb)))
                                                continue;
 
-                                       h->loop_blocks = g_list_prepend (h->loop_blocks, cb);
+                                       h->loop_blocks = g_list_prepend_mempool (cfg->mempool, h->loop_blocks, cb);
                                        cb->nesting++;
+                                       if (cb->dfn)
+                                               mono_bitset_set_fast (in_loop_blocks, cb->dfn);
 
                                        for (k = 0; k < cb->in_count; k++) {
                                                MonoBasicBlock *prev = cb->in_bb [k];
                                                /* add all previous blocks */
-                                               if (prev != h && !g_list_find (h->loop_blocks, prev)) {
-                                                       todo = g_list_prepend (todo, prev);
+                                               if (prev != h && !((prev->dfn && mono_bitset_test_fast (in_loop_blocks, prev->dfn)) || (!prev->dfn && g_list_find (h->loop_blocks, prev)))) {
+                                                       todo = g_slist_prepend (todo, prev);
                                                }
                                        }
                                }
 
                                /* add the header if not already there */
-                               if (!g_list_find (h->loop_blocks, h)) {
-                                       h->loop_blocks = g_list_prepend (h->loop_blocks, h);
+                               if (!((h->dfn && mono_bitset_test_fast (in_loop_blocks, h->dfn)) || (!h->dfn && g_list_find (h->loop_blocks, h)))) {
+                                       h->loop_blocks = g_list_prepend_mempool (cfg->mempool, h->loop_blocks, h);
                                        h->nesting++;
                                }
                        }
                }
        }
+       mono_bitset_free (in_loop_blocks);
 
        cfg->comp_done |= MONO_COMP_LOOPS;
-       
-#ifdef DEBUG_NATURAL_LOOPS
+
+       /* Compute loop_body_start for each loop */
+       bb_indexes = g_new0 (int, cfg->num_bblocks);
+       {
+               MonoBasicBlock *bb;
+
+               for (i = 0, bb = cfg->bb_entry; bb; i ++, bb = bb->next_bb) {
+                       if (bb->dfn)
+                               bb_indexes [bb->dfn] = i;
+               }
+       }
        for (i = 0; i < cfg->num_bblocks; ++i) {
                if (cfg->bblocks [i]->loop_blocks) {
-                       MonoBasicBlock *h = (MonoBasicBlock *)cfg->bblocks [i]->loop_blocks->data;
+                       /* The loop body start is the first bblock in the order they will be emitted */
+                       MonoBasicBlock *h = cfg->bblocks [i];
+                       MonoBasicBlock *body_start = h;
                        GList *l;
-                       printf ("LOOP START %d\n", h->block_num);
+
                        for (l = h->loop_blocks; l; l = l->next) {
                                MonoBasicBlock *cb = (MonoBasicBlock *)l->data;
-                               printf (" BB%d %d %p\n", cb->block_num, cb->nesting, cb->loop_blocks);
+
+                               if (cb->dfn && bb_indexes [cb->dfn] < bb_indexes [body_start->dfn]) {
+                                       body_start = cb;
+                               }
+                       }
+
+                       body_start->loop_body_start = 1;
+               }
+       }
+       g_free (bb_indexes);
+
+       if (cfg->verbose_level > 1) {
+               for (i = 0; i < cfg->num_bblocks; ++i) {
+                       if (cfg->bblocks [i]->loop_blocks) {
+                               MonoBasicBlock *h = (MonoBasicBlock *)cfg->bblocks [i]->loop_blocks->data;
+                               GList *l;
+                               printf ("LOOP START %d\n", h->block_num);
+                               for (l = h->loop_blocks; l; l = l->next) {
+                                       MonoBasicBlock *cb = (MonoBasicBlock *)l->data;
+                                       printf ("\tBB%d %d %p\n", cb->block_num, cb->nesting, cb->loop_blocks);
+                               }
                        }
                }
        }
-#endif
-
 }
 
 static void
@@ -486,7 +425,6 @@ clear_idominators (MonoCompile *cfg)
     
        for (i = 0; i < cfg->num_bblocks; ++i) {
                if (cfg->bblocks[i]->dominated) {
-                       g_list_free (cfg->bblocks[i]->dominated);        
                        cfg->bblocks[i]->dominated = NULL;
                }
        }
@@ -501,10 +439,7 @@ clear_loops (MonoCompile *cfg)
     
        for (i = 0; i < cfg->num_bblocks; ++i) {
                cfg->bblocks[i]->nesting = 0;
-               if (cfg->bblocks[i]->loop_blocks) {
-                       g_list_free (cfg->bblocks[i]->loop_blocks);        
-                       cfg->bblocks[i]->loop_blocks = NULL;
-               }
+               cfg->bblocks[i]->loop_blocks = NULL;
        }
 
        cfg->comp_done &= ~MONO_COMP_LOOPS;   
@@ -518,4 +453,12 @@ mono_free_loop_info (MonoCompile *cfg)
     if (cfg->comp_done & MONO_COMP_LOOPS)
         clear_loops (cfg);
 }
-    
+
+#else /* DISABLE_JIT */
+
+void
+mono_free_loop_info (MonoCompile *cfg)
+{
+}
+
+#endif /* DISABLE_JIT */