In .:
[mono.git] / mono / mini / dominators.c
index 735f404a964c42bdd4608462780927915c69e5fa..a733189d1d10a841eec9e11af0d4e135e66c823a 100644 (file)
 
 //#define DEBUG_DOMINATORS
 
-/* the simpler, dumber algorithm */
+/*
+ * Compute dominators and immediate dominators using the algorithm in the
+ * paper "A Simple, Fast Dominance Algorithm" by Keith D. Cooper, 
+ * Timothy J. Harvey, and Ken Kennedy:
+ * http://citeseer.ist.psu.edu/cooper01simple.html
+ */
 static void
-compute_dominators (MonoCompile *m) {
-       int change = TRUE;
+compute_dominators (MonoCompile *cfg)
+{
        int i, j, bitsize;
-       MonoBasicBlock *bb;
-       MonoBitSet *T;
        char* mem;
+       gboolean *in_worklist;
+       MonoBasicBlock **worklist;
+       guint32 l_begin, l_end;
+       MonoBasicBlock **doms;
 
-       g_assert (!(m->comp_done & MONO_COMP_DOM));
-
-       bitsize = mono_bitset_alloc_size (m->num_bblocks, 0);
-       /* the first is always the entry */
-       bb = m->bblocks [0];
-       mem = mono_mempool_alloc0 (m->mempool, bitsize * (m->num_bblocks + 1));
-       bb->dominators = mono_bitset_mem_new (mem, m->num_bblocks, 0);
+       g_assert (!(cfg->comp_done & MONO_COMP_DOM));
 
-       mem += bitsize;
-       mono_bitset_set (bb->dominators, 0);
+       bitsize = mono_bitset_alloc_size (cfg->num_bblocks, 0);
+       in_worklist = g_new0 (gboolean, cfg->num_bblocks);
+       worklist = g_new (MonoBasicBlock*, cfg->num_bblocks + 1);
+       l_begin = 0;
+       l_end = 0;
 
-       T = mono_bitset_mem_new (mem, m->num_bblocks, 0);
-       mem += bitsize;
+       mem = mono_mempool_alloc0 (cfg->mempool, bitsize * cfg->num_bblocks);
 
+       /* the first is always the entry */
+       worklist [l_end ++] = cfg->bblocks [0];
 
-       for (i = 1; i < m->num_bblocks; ++i) {
-               bb = m->bblocks [i];
-               bb->dominators = mono_bitset_mem_new (mem, m->num_bblocks, 0);
-               mem += bitsize;
-               mono_bitset_invert (bb->dominators);
+       doms = g_new0 (MonoBasicBlock*, cfg->num_bblocks);
+       doms [cfg->bblocks [0]->dfn] = cfg->bblocks [0];
 
 #ifdef DEBUG_DOMINATORS
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               MonoBasicBlock *bb = cfg->bblocks [i];
+
                printf ("BB%d IN: ", bb->block_num);
                for (j = 0; j < bb->in_count; ++j) 
                        printf ("%d ", bb->in_bb [j]->block_num);
                printf ("\n");
-#endif
-       }
-
-       do {
-               change = FALSE;
-               for (i = 1; i < m->num_bblocks; ++i) {
-                       bb = m->bblocks [i];
-                       mono_bitset_set_all (T);
-                       for (j = 0; j < bb->in_count; ++j) {
-                               if (bb->in_bb [j]->dominators)
-                                       mono_bitset_intersection (T, bb->in_bb [j]->dominators);
-                       }
-                       mono_bitset_set (T, i);
-                       if (!mono_bitset_equal (T, bb->dominators)) {
-                               change = TRUE;
-                               mono_bitset_copyto (T, bb->dominators);
-                       }
-               }
-       } while (change);
-
-       m->comp_done |= MONO_COMP_DOM;
-
-#ifdef DEBUG_DOMINATORS
-       printf ("DTREE %s %d\n", mono_method_full_name (m->method, TRUE), 
-               ((MonoMethodNormal *)m->method)->header->num_clauses);
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = m->bblocks [i];
-               printf ("BB%d: ", bb->block_num);
-               mono_blockset_print (m, bb->dominators, NULL, -1);
        }
 #endif
-}
 
-static void
-compute_idominators (MonoCompile* m) {
-       char *mem;
-       int bitsize, i, s, t;
-       MonoBitSet **T, *temp;
-       MonoBasicBlock *bb;
+       while (l_begin != l_end) {
+               MonoBasicBlock *bb = worklist [l_begin ++];
+               MonoBasicBlock *idom;
 
-       g_assert (!(m->comp_done & MONO_COMP_IDOM));
+               in_worklist [bb->dfn] = FALSE;
 
-       bitsize = mono_bitset_alloc_size (m->num_bblocks, 0);
-       mem = mono_mempool_alloc (m->mempool, bitsize * (m->num_bblocks + 1));
-       T = mono_mempool_alloc (m->mempool, sizeof (MonoBitSet*) * m->num_bblocks);
+               if (l_begin == cfg->num_bblocks + 1)
+                       l_begin = 0;
 
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = m->bblocks [i];
-               T [i] = mono_bitset_mem_new (mem, m->num_bblocks, 0);
-               mono_bitset_copyto (bb->dominators, T [i]);
-               mono_bitset_clear (T [i], i);
-               if (mono_bitset_count (bb->dominators) - 1 != mono_bitset_count (T [i])) {
-                       mono_blockset_print (m, bb->dominators, "dominators", -1);
-                       mono_blockset_print (m, T [i], "T [i]", -1);
-                       g_error ("problem at %d (%d)\n", i, bb->dfn);
+               idom = NULL;
+               for (i = 0; i < bb->in_count; ++i) {
+                       MonoBasicBlock *in_bb = bb->in_bb [i];
+                       if (doms [in_bb->dfn]) {
+                               idom = in_bb;
+                               break;
+                       }
                }
-               mem += bitsize;
-       }
-       temp = mono_bitset_mem_new (mem, m->num_bblocks, 0);
-
-       for (i = 1; i < m->num_bblocks; ++i) {
-
-               temp = T [i];
+               if (bb != cfg->bblocks [0])
+                       g_assert (idom);
                        
-               mono_bitset_foreach_bit_rev (temp, s, m->num_bblocks) {
-
-                       mono_bitset_foreach_bit_rev (temp, t, m->num_bblocks) {
-                                               
-                               if (t == s)
-                                       continue;
+               while (i < bb->in_count) {
+                       MonoBasicBlock *in_bb = bb->in_bb [i];
+
+                       if (in_bb->dfn && doms [in_bb->dfn]) {
+                               /* Intersect */
+                               MonoBasicBlock *f1 = idom;
+                               MonoBasicBlock *f2 = in_bb;
+
+                               while (f1 != f2) {
+                                       if (f1->dfn < f2->dfn)
+                                               f2 = doms [f2->dfn];
+                                       else
+                                               f1 = doms [f1->dfn];
+                               }
 
-                               //if (mono_bitset_test_fast (T [s], t))
-                               if (mono_bitset_test_fast (m->bblocks [s]->dominators, t))
-                                       mono_bitset_clear (temp, t);
+                               idom = f1;
                        }
+                       i ++;
                }
 
+               if (idom != doms [bb->dfn]) {
+                       if (bb == cfg->bblocks [0])
+                               doms [bb->dfn] = bb;
+                       else
+                               doms [bb->dfn] = idom;
+                                       
+                       for (j = 0; j < bb->out_count; ++j) {
+                               MonoBasicBlock *out_bb = bb->out_bb [j];
+                               if (!in_worklist [out_bb->dfn]) {
 #ifdef DEBUG_DOMINATORS
-               printf ("IDOMSET BB%d %d: ", m->bblocks [i]->block_num, m->num_bblocks);
-               mono_blockset_print (m, T [i], NULL, -1);
+                                       printf ("\tADD %d to worklist.\n", out_bb->dfn);
 #endif
+                                       worklist [l_end ++] = out_bb;
+                                       if (l_end == cfg->num_bblocks + 1)
+                                               l_end = 0;
+                                       in_worklist [out_bb->dfn] = TRUE;
+                               }
+                       }
+               }
        }
 
-       for (i = 1; i < m->num_bblocks; ++i) {
-               bb = m->bblocks [i];
-               s = mono_bitset_find_start (T [i]);
-               g_assert (s != -1);
-               /*fixme:mono_bitset_count does not really work */
-               //g_assert (mono_bitset_count (T [i]) == 1);
-               t = mono_bitset_find_first (T [i], s);
-               g_assert (t == -1 || t >=  m->num_bblocks);
-               bb->idom = m->bblocks [s];
-               bb->idom->dominated = g_list_prepend (bb->idom->dominated, bb);
+       /* Compute bb->dominators for each bblock */
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               MonoBasicBlock *bb = cfg->bblocks [i];
+               MonoBasicBlock *cbb;
+               MonoBitSet *dominators;
+
+               bb->dominators = dominators = mono_bitset_mem_new (mem, cfg->num_bblocks, 0);
+               mem += bitsize;
+
+               mono_bitset_set_fast (dominators, bb->dfn);
+
+               if (bb->dfn) {
+                       for (cbb = doms [bb->dfn]; cbb->dfn; cbb = doms [cbb->dfn])
+                               mono_bitset_set_fast (dominators, cbb->dfn);
+
+                       bb->idom = doms [bb->dfn];
+                       if (bb->idom)
+                               bb->idom->dominated = g_list_prepend (bb->idom->dominated, bb);
+               }
+
+               /* The entry bb */
+               mono_bitset_set_fast (dominators, 0);
        }
 
-       m->comp_done |= MONO_COMP_IDOM;
-}
+       g_free (worklist);
+       g_free (in_worklist);
+       g_free (doms);
 
-static void
-postorder_visit (MonoBasicBlock *start, int *idx, MonoBasicBlock **array)
-{
-       int i;
+       cfg->comp_done |= MONO_COMP_DOM | MONO_COMP_IDOM;
 
-       /* we assume the flag was already cleared by the caller. */
-       start->flags |= BB_VISITED;
-       /*g_print ("visit %d at %p\n", *dfn, start->cil_code);*/
-       for (i = 0; i < start->out_count; ++i) {
-               if (start->out_bb [i]->flags & BB_VISITED)
-                       continue;
-               postorder_visit (start->out_bb [i], idx, array);
+#ifdef DEBUG_DOMINATORS
+       printf ("DTREE %s %d\n", mono_method_full_name (cfg->method, TRUE), 
+               mono_method_get_header (cfg->method)->num_clauses);
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               MonoBasicBlock *bb = cfg->bblocks [i];
+               printf ("BB%d(dfn=%d) (IDOM=BB%d): ", bb->block_num, bb->dfn, bb->idom ? bb->idom->block_num : -1);
+               mono_blockset_print (cfg, bb->dominators, NULL, -1);
        }
-       array [*idx] = start;
-       (*idx)++;
+#endif
 }
 
 static void
@@ -192,148 +187,56 @@ check_dominance_frontier (MonoBasicBlock *x, MonoBasicBlock *t)
        }
 } 
 
-#if 0
-/* there is a bug in this code */
+/**
+ * Compute dominance frontiers using the algorithm from the same paper.
+ */
 static void
-compute_dominance_frontier_old (MonoCompile *m) {
-       int i, j, bitsize;
-       MonoBasicBlock **postorder;
-       MonoBasicBlock *bb, *z;
+compute_dominance_frontier (MonoCompile *cfg)
+{
        char *mem;
+       int i, j, bitsize;
 
-       g_assert (!(m->comp_done & MONO_COMP_DFRONTIER));
+       g_assert (!(cfg->comp_done & MONO_COMP_DFRONTIER));
 
-       postorder = mono_mempool_alloc (m->mempool, sizeof (MonoBasicBlock*) * m->num_bblocks);
-       i = 0;
-       postorder_visit (m->bb_entry, &i, postorder);
-       /*g_print ("postorder traversal:");
-       for (i = 0; i < m->num_bblocks; ++i)
-               g_print (" B%d", postorder [i]->dfn);
-       g_print ("\n");*/
-       
-       /* we could reuse the bitsets allocated in compute_idominators() */
-       bitsize = mono_bitset_alloc_size (m->num_bblocks, 0);
-       mem = mono_mempool_alloc0 (m->mempool, bitsize * m->num_bblocks);
+       for (i = 0; i < cfg->num_bblocks; ++i)
+               cfg->bblocks [i]->flags &= ~BB_VISITED;
 
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = postorder [i];
-               bb->dfrontier = mono_bitset_mem_new (mem, m->num_bblocks, 0);
+       bitsize = mono_bitset_alloc_size (cfg->num_bblocks, 0);
+       mem = mono_mempool_alloc0 (cfg->mempool, bitsize * cfg->num_bblocks);
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               MonoBasicBlock *bb = cfg->bblocks [i];
+               bb->dfrontier = mono_bitset_mem_new (mem, cfg->num_bblocks, 0);
                mem += bitsize;
        }
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = postorder [i];
-               /* the local component */
-               for (j = 0; j < bb->out_count; ++j) {
-                       //if (bb->out_bb [j] != bb->idom)
-                       if (bb->out_bb [j]->idom != bb)
-                               mono_bitset_set (bb->dfrontier, bb->out_bb [j]->dfn);
-               }
-       }
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = postorder [i];
-               /* the up component */
-               if (bb->idom) {
-                       z = bb->idom;
-                       mono_bitset_foreach_bit (z->dfrontier, j, m->num_bblocks) {
-                               //if (m->bblocks [j] != bb->idom)
-                               if (m->bblocks [j]->idom != bb)
-                                       mono_bitset_set (bb->dfrontier, m->bblocks [j]->dfn);
-                       }
-               }
-       }
 
-       /* this is a check for the dominator frontier */
-       for (i = 0; i < m->num_bblocks; ++i) {
-               MonoBasicBlock *x = m->bblocks [i];
-
-               mono_bitset_foreach_bit ((x->dfrontier), j, (m->num_bblocks)) {
-                       MonoBasicBlock *w = m->bblocks [j];
-                       int k;
-                       /* x must not strictly dominates w */
-                       if (mono_bitset_test_fast (w->dominators, x->dfn) && w != x)
-                               g_assert_not_reached ();
-
-                       for (k = 0; k < m->num_bblocks; ++k)
-                               m->bblocks [k]->flags &= ~BB_VISITED;
-
-                       check_dominance_frontier (x, x);
-               }
-       }
-
-       m->comp_done |= MONO_COMP_DFRONTIER;
-}
-#endif
-
-/* this is an implementation of the dominance frontier algorithm described in
- * "modern compiler implementation in C" written by Andrew W. Appel
- */
-static void
-compute_dominance_frontier_appel (MonoCompile *m, int n) 
-{
-       int i, j;
-       MonoBasicBlock *bb;
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               MonoBasicBlock *bb = cfg->bblocks [i];
 
-       bb = m->bblocks [n];
-       g_assert (!(bb->flags & BB_VISITED));
-       bb->flags |= BB_VISITED;
+               if (bb->in_count > 1) {
+                       for (j = 0; j < bb->in_count; ++j) {
+                               MonoBasicBlock *p = bb->in_bb [j];
 
-       for (i = 0; i < bb->out_count; ++i) {
-               MonoBasicBlock *y = bb->out_bb [i];
-               if (y->idom != bb) {
-                       g_assert (!(mono_bitset_test_fast (y->dominators, bb->dfn) && bb->dfn != y->dfn));
-                       mono_bitset_set (bb->dfrontier, y->dfn);
-               }
-       }
-       
-       
-       for (i = 0; i < m->num_bblocks; ++i) {
-               MonoBasicBlock *c = m->bblocks [i];
-               if (c->idom == bb) {
-                       if (!(c->flags & BB_VISITED))
-                               compute_dominance_frontier_appel (m, c->dfn);
-                       mono_bitset_foreach_bit (c->dfrontier, j, m->num_bblocks) {
-                               MonoBasicBlock *w = m->bblocks [j];
-                               if (!(mono_bitset_test_fast (w->dominators, bb->dfn) && bb->dfn != w->dfn))
-                                       mono_bitset_set (bb->dfrontier, w->dfn);
+                               if (p->dfn || (p == cfg->bblocks [0])) {
+                                       while (p != bb->idom) {
+                                               mono_bitset_set_fast (p->dfrontier, bb->dfn);
+                                               p = p->idom;
+                                       }
+                               }
                        }
                }
        }
-}
-
-static void
-compute_dominance_frontier (MonoCompile *m) 
-{
-       MonoBasicBlock *bb;
-       char *mem;
-       int i, j, bitsize;
-
-       g_assert (!(m->comp_done & MONO_COMP_DFRONTIER));
-
-       for (i = 0; i < m->num_bblocks; ++i)
-               m->bblocks [i]->flags &= ~BB_VISITED;
-
-       /* we could reuse the bitsets allocated in compute_idominators() */
-       bitsize = mono_bitset_alloc_size (m->num_bblocks, 0);
-       mem = mono_mempool_alloc0 (m->mempool, bitsize * m->num_bblocks);
-       for (i = 0; i < m->num_bblocks; ++i) {
-               bb = m->bblocks [i];
-               bb->dfrontier = mono_bitset_mem_new (mem, m->num_bblocks, 0);
-               mem += bitsize;
-       }
-
-       compute_dominance_frontier_appel (m, 0);
 
 #if 0
-       for (i = 0; i < m->num_bblocks; ++i) {
-               MonoBasicBlock *x = m->bblocks [i];
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               MonoBasicBlock *bb = cfg->bblocks [i];
                
-               printf ("DFRONT %s BB%d: ", mono_method_full_name (m->method, TRUE), x->block_num);
-               mono_blockset_print (m, x->dfrontier, NULL, -1);
+               printf ("DFRONT %s BB%d: ", mono_method_full_name (cfg->method, TRUE), bb->block_num);
+               mono_blockset_print (cfg, bb->dfrontier, NULL, -1);
        }
 #endif
 
-#if 1
+#if 0
        /* this is a check for the dominator frontier */
        for (i = 0; i < m->num_bblocks; ++i) {
                MonoBasicBlock *x = m->bblocks [i];
@@ -353,57 +256,48 @@ compute_dominance_frontier (MonoCompile *m)
        }
 #endif
 
-       m->comp_done |= MONO_COMP_DFRONTIER;
-}
-
-void    
-mono_compile_dominator_info (MonoCompile *cfg, int dom_flags)
-{
-       if ((dom_flags & MONO_COMP_DOM) && !(cfg->comp_done & MONO_COMP_DOM))
-               compute_dominators (cfg);
-       if ((dom_flags & MONO_COMP_IDOM) && !(cfg->comp_done & MONO_COMP_IDOM))
-               compute_idominators (cfg);
-       if ((dom_flags & MONO_COMP_DFRONTIER) && !(cfg->comp_done & MONO_COMP_DFRONTIER))
-               compute_dominance_frontier (cfg);
+       cfg->comp_done |= MONO_COMP_DFRONTIER;
 }
 
-static void
+static inline void
 df_set (MonoCompile *m, MonoBitSet* dest, MonoBitSet *set) 
 {
        int i;
 
-       mono_bitset_clear_all (dest);
        mono_bitset_foreach_bit (set, i, m->num_bblocks) {
                mono_bitset_union (dest, m->bblocks [i]->dfrontier);
        }
 }
 
-/* TODO: alloc tmp and D on the stack */
 MonoBitSet*
-mono_compile_iterated_dfrontier (MonoCompile *m, MonoBitSet *set) 
+mono_compile_iterated_dfrontier (MonoCompile *m, MonoBitSet *set)
 {
-       MonoBitSet *result, *D;
-       int bitsize, change = TRUE;
+       MonoBitSet *result;
+       int bitsize, count1, count2;
 
        bitsize = mono_bitset_alloc_size (m->num_bblocks, 0);
-       result = mono_bitset_mem_new (mono_mempool_alloc (m->mempool, bitsize), m->num_bblocks, 0);
-       D = mono_bitset_mem_new (mono_mempool_alloc (m->mempool, bitsize), m->num_bblocks, 0);
+       result = mono_bitset_mem_new (mono_mempool_alloc0 (m->mempool, bitsize), m->num_bblocks, 0);
 
        df_set (m, result, set);
+       count2 = mono_bitset_count (result);
        do {
-               change = FALSE;
-               df_set (m, D, result);
-               mono_bitset_union (D, result);
-
-               if (!mono_bitset_equal (D, result)) {
-                       mono_bitset_copyto (D, result);
-                       change = TRUE;
-               }
-       } while (change);
+               count1 = count2;
+               df_set (m, result, result);
+               count2 = mono_bitset_count (result);
+       } while (count2 > count1);
        
        return result;
 }
 
+void    
+mono_compile_dominator_info (MonoCompile *cfg, int dom_flags)
+{
+       if ((dom_flags & MONO_COMP_DOM) && !(cfg->comp_done & MONO_COMP_DOM))
+               compute_dominators (cfg);
+       if ((dom_flags & MONO_COMP_DFRONTIER) && !(cfg->comp_done & MONO_COMP_DFRONTIER))
+               compute_dominance_frontier (cfg);
+}
+
 //#define DEBUG_NATURAL_LOOPS
 
 /*
@@ -413,54 +307,101 @@ void
 mono_compute_natural_loops (MonoCompile *cfg)
 {
        int i, j, k;
+       MonoBitSet *in_loop_blocks;
+       int *bb_indexes;
 
        g_assert (!(cfg->comp_done & MONO_COMP_LOOPS));
 
+       in_loop_blocks = mono_bitset_new (cfg->num_bblocks + 1, 0);
        for (i = 0; i < cfg->num_bblocks; ++i) {
                MonoBasicBlock *n = cfg->bblocks [i];
 
                for (j = 0; j < n->out_count; j++) {
                        MonoBasicBlock *h = n->out_bb [j];
                        /* check for back-edge from n to h */
-                       if (n != h && mono_bitset_test (n->dominators, h->dfn)) {
-                               GList *todo;
+                       if (n != h && mono_bitset_test_fast (n->dominators, h->dfn)) {
+                               GSList *todo;
 
                                /* already in loop_blocks? */
-                               if (h->loop_blocks && g_list_find (h->loop_blocks, n))
+                               if (h->loop_blocks && g_list_find (h->loop_blocks, n)) {
                                        continue;
-                               
-                               todo = g_list_prepend (NULL, n);
+                               }
 
+                               mono_bitset_clear_all (in_loop_blocks);
+                               if (h->loop_blocks) {
+                                       GList *l;
+
+                                       for (l = h->loop_blocks; l; l = l->next) {
+                                               MonoBasicBlock *b = l->data;
+                                               if (b->dfn)
+                                                       mono_bitset_set_fast (in_loop_blocks, b->dfn);
+                                       }
+                               }
+                               todo = g_slist_prepend (NULL, n);       
+                       
                                while (todo) {
                                        MonoBasicBlock *cb = (MonoBasicBlock *)todo->data;
-                                       todo = g_list_delete_link (todo, todo);
+                                       todo = g_slist_delete_link (todo, todo);
 
-                                       if (g_list_find (h->loop_blocks, cb))
+                                       if ((cb->dfn && mono_bitset_test_fast (in_loop_blocks, cb->dfn)) || (!cb->dfn && g_list_find (h->loop_blocks, cb)))
                                                continue;
 
                                        h->loop_blocks = g_list_prepend (h->loop_blocks, cb);
                                        cb->nesting++;
+                                       if (cb->dfn)
+                                               mono_bitset_set_fast (in_loop_blocks, cb->dfn);
 
                                        for (k = 0; k < cb->in_count; k++) {
                                                MonoBasicBlock *prev = cb->in_bb [k];
                                                /* add all previous blocks */
-                                               if (prev != h && !g_list_find (h->loop_blocks, prev)) {
-                                                       todo = g_list_prepend (todo, prev);
+                                               if (prev != h && !((prev->dfn && mono_bitset_test_fast (in_loop_blocks, prev->dfn)) || (!prev->dfn && g_list_find (h->loop_blocks, prev)))) {
+                                                       todo = g_slist_prepend (todo, prev);
                                                }
                                        }
                                }
 
                                /* add the header if not already there */
-                               if (!g_list_find (h->loop_blocks, h)) {
+                               if (!((h->dfn && mono_bitset_test_fast (in_loop_blocks, h->dfn)) || (!h->dfn && g_list_find (h->loop_blocks, h)))) {
                                        h->loop_blocks = g_list_prepend (h->loop_blocks, h);
                                        h->nesting++;
                                }
                        }
                }
        }
+       mono_bitset_free (in_loop_blocks);
 
        cfg->comp_done |= MONO_COMP_LOOPS;
-       
+
+       /* Compute loop_body_start for each loop */
+       bb_indexes = g_new0 (int, cfg->num_bblocks);
+       {
+               MonoBasicBlock *bb;
+
+               for (i = 0, bb = cfg->bb_entry; bb; i ++, bb = bb->next_bb) {
+                       if (bb->dfn)
+                               bb_indexes [bb->dfn] = i;
+               }
+       }
+       for (i = 0; i < cfg->num_bblocks; ++i) {
+               if (cfg->bblocks [i]->loop_blocks) {
+                       /* The loop body start is the first bblock in the order they will be emitted */
+                       MonoBasicBlock *h = cfg->bblocks [i];
+                       MonoBasicBlock *body_start = h;
+                       GList *l;
+
+                       for (l = h->loop_blocks; l; l = l->next) {
+                               MonoBasicBlock *cb = (MonoBasicBlock *)l->data;
+
+                               if (cb->dfn && bb_indexes [cb->dfn] < bb_indexes [body_start->dfn]) {
+                                       body_start = cb;
+                               }
+                       }
+
+                       body_start->loop_body_start = 1;
+               }
+       }
+       g_free (bb_indexes);
+
 #ifdef DEBUG_NATURAL_LOOPS
        for (i = 0; i < cfg->num_bblocks; ++i) {
                if (cfg->bblocks [i]->loop_blocks) {
@@ -516,4 +457,3 @@ mono_free_loop_info (MonoCompile *cfg)
     if (cfg->comp_done & MONO_COMP_LOOPS)
         clear_loops (cfg);
 }
-