Merge pull request #2822 from BrzVlad/feature-lshift-decomposition
[mono.git] / mono / sgen / sgen-marksweep.c
index 02a212b76dfd84f8713f2cf7534b20b55054c027..38e127b8cd541fe4b304adf4f3e91d19135265db 100644 (file)
@@ -7,18 +7,7 @@
  * Copyright 2009-2010 Novell, Inc.
  * Copyright (C) 2012 Xamarin Inc
  *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License 2.0 as published by the Free Software Foundation;
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License 2.0 along with this library; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
  */
 
 #include "config.h"
@@ -41,7 +30,7 @@
 #include "mono/sgen/sgen-workers.h"
 #include "mono/sgen/sgen-thread-pool.h"
 #include "mono/sgen/sgen-client.h"
-#include "mono/utils/mono-membar.h"
+#include "mono/utils/mono-memory-model.h"
 
 #if defined(ARCH_MIN_MS_BLOCK_SIZE) && defined(ARCH_MIN_MS_BLOCK_SIZE_SHIFT)
 #define MS_BLOCK_SIZE  ARCH_MIN_MS_BLOCK_SIZE
@@ -110,6 +99,7 @@ struct _MSBlockInfo {
        guint16 obj_size_index;
        /* FIXME: Reduce this - it only needs a byte. */
        volatile gint32 state;
+       gint16 nused;
        unsigned int pinned : 1;
        unsigned int has_references : 1;
        unsigned int has_pinned : 1;    /* means cannot evacuate */
@@ -1075,7 +1065,6 @@ mark_mod_union_card (GCObject *obj, void **ptr, GCObject *value_obj)
        } else {
                sgen_los_mark_mod_union_card (obj, ptr);
        }
-
        binary_protocol_mod_union_remset (obj, ptr, value_obj, SGEN_LOAD_VTABLE (value_obj));
 }
 
@@ -1186,6 +1175,7 @@ static guint64 stat_drain_loops;
 #define SCAN_OBJECT_FUNCTION_NAME      major_scan_object_with_evacuation
 #define SCAN_VTYPE_FUNCTION_NAME       major_scan_vtype_with_evacuation
 #define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_with_evacuation
+#define SCAN_PTR_FIELD_FUNCTION_NAME   major_scan_ptr_field_with_evacuation
 #include "sgen-marksweep-drain-gray-stack.h"
 
 #define COPY_OR_MARK_CONCURRENT
@@ -1197,6 +1187,8 @@ static guint64 stat_drain_loops;
 #define COPY_OR_MARK_CONCURRENT_WITH_EVACUATION
 #define COPY_OR_MARK_FUNCTION_NAME     major_copy_or_mark_object_concurrent_with_evacuation
 #define SCAN_OBJECT_FUNCTION_NAME      major_scan_object_concurrent_with_evacuation
+#define SCAN_VTYPE_FUNCTION_NAME       major_scan_vtype_concurrent_with_evacuation
+#define SCAN_PTR_FIELD_FUNCTION_NAME   major_scan_ptr_field_concurrent_with_evacuation
 #define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_concurrent_with_evacuation
 #include "sgen-marksweep-drain-gray-stack.h"
 
@@ -1523,6 +1515,7 @@ ensure_block_is_checked_for_sweeping (guint32 block_index, gboolean wait, gboole
        for (i = 0; i < MS_NUM_MARK_WORDS; ++i)
                nused += bitcount (block->mark_words [i]);
 
+       block->nused = nused;
        if (nused)
                have_live = TRUE;
        if (nused < count)
@@ -1648,6 +1641,8 @@ sweep_finish (void)
                }
        }
 
+       sgen_memgov_major_post_sweep ();
+
        set_sweep_state (SWEEP_STATE_SWEPT, SWEEP_STATE_COMPACTING);
 }
 
@@ -1785,6 +1780,67 @@ major_finish_nursery_collection (void)
 #endif
 }
 
+static int
+block_usage_comparer (const void *bl1, const void *bl2)
+{
+       const gint16 nused1 = (*(MSBlockInfo**)bl1)->nused;
+       const gint16 nused2 = (*(MSBlockInfo**)bl2)->nused;
+
+       return nused2 - nused1;
+}
+
+static void
+sgen_evacuation_freelist_blocks (MSBlockInfo * volatile *block_list, int size_index)
+{
+       MSBlockInfo **evacuated_blocks;
+       size_t index = 0, count, num_blocks = 0, num_used = 0;
+       MSBlockInfo *info;
+       MSBlockInfo * volatile *prev;
+
+       for (info = *block_list; info != NULL; info = info->next_free) {
+               num_blocks++;
+               num_used += info->nused;
+       }
+
+       /*
+        * We have a set of blocks in the freelist which will be evacuated. Instead
+        * of evacuating all of the blocks into new ones, we traverse the freelist
+        * sorting it by the number of occupied slots, evacuating the objects from
+        * blocks with fewer used slots into fuller blocks.
+        *
+        * The number of used slots is set at the end of the previous sweep. Since
+        * we sequentially unlink slots from blocks, except for the head of the
+        * freelist, for blocks on the freelist, the number of used slots is the same
+        * as at the end of the previous sweep.
+        */
+       evacuated_blocks = (MSBlockInfo**)sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_blocks, INTERNAL_MEM_TEMPORARY, TRUE);
+
+       for (info = *block_list; info != NULL; info = info->next_free) {
+               evacuated_blocks [index++] = info;
+       }
+
+       SGEN_ASSERT (0, num_blocks == index, "Why did the freelist change ?");
+
+       qsort (evacuated_blocks, num_blocks, sizeof (gpointer), block_usage_comparer);
+
+       /*
+        * Form a new freelist with the fullest blocks. These blocks will also be
+        * marked as to_space so we don't evacuate from them.
+        */
+       count = MS_BLOCK_FREE / block_obj_sizes [size_index];
+       prev = block_list;
+       for (index = 0; index < (num_used + count - 1) / count; index++) {
+               SGEN_ASSERT (0, index < num_blocks, "Why do we need more blocks for compaction than we already had ?");
+               info = evacuated_blocks [index];
+               info->is_to_space = TRUE;
+               *prev = info;
+               prev = &info->next_free;
+       }
+       *prev = NULL;
+
+       sgen_free_internal_dynamic (evacuated_blocks, sizeof (MSBlockInfo*) * num_blocks, INTERNAL_MEM_TEMPORARY);
+}
+
 static void
 major_start_major_collection (void)
 {
@@ -1803,8 +1859,8 @@ major_start_major_collection (void)
 
                binary_protocol_evacuating_blocks (block_obj_sizes [i]);
 
-               free_block_lists [0][i] = NULL;
-               free_block_lists [MS_BLOCK_FLAG_REFS][i] = NULL;
+               sgen_evacuation_freelist_blocks (&free_block_lists [0][i], i);
+               sgen_evacuation_freelist_blocks (&free_block_lists [MS_BLOCK_FLAG_REFS][i], i);
        }
 
        if (lazy_sweep)
@@ -1816,6 +1872,13 @@ major_start_major_collection (void)
                        sweep_block (block);
                SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEPT, "All blocks must be swept when we're pinning.");
                set_block_state (block, BLOCK_STATE_MARKING, BLOCK_STATE_SWEPT);
+               /*
+                * Swept blocks that have a null free_list are full. Evacuation is not
+                * effective on these blocks since we expect them to have high usage anyway,
+                * given that the survival rate for majors is relatively high.
+                */
+               if (evacuate_block_obj_sizes [block->obj_size_index] && !block->free_list)
+                       block->is_to_space = TRUE;
        } END_FOREACH_BLOCK_NO_LOCK;
 
        if (lazy_sweep)
@@ -2180,13 +2243,14 @@ initial_skip_card (guint8 *card_data)
 #define MS_OBJ_ALLOCED_FAST(o,b)               (*(void**)(o) && (*(char**)(o) < (b) || *(char**)(o) >= (b) + MS_BLOCK_SIZE))
 
 static void
-scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanCopyContext ctx)
+scan_card_table_for_block (MSBlockInfo *block, CardTableScanType scan_type, ScanCopyContext ctx)
 {
        SgenGrayQueue *queue = ctx.queue;
        ScanObjectFunc scan_func = ctx.ops->scan_object;
 #ifndef SGEN_HAVE_OVERLAPPING_CARDS
        guint8 cards_copy [CARDS_PER_BLOCK];
 #endif
+       guint8 cards_preclean [CARDS_PER_BLOCK];
        gboolean small_objects;
        int block_obj_size;
        char *block_start;
@@ -2194,6 +2258,10 @@ scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanCopyConte
        guint8 *card_data_end;
        char *scan_front = NULL;
 
+       /* The concurrent mark doesn't enter evacuating blocks */
+       if (scan_type == CARDTABLE_SCAN_MOD_UNION_PRECLEAN && major_block_is_evacuating (block))
+               return;
+
        block_obj_size = block->obj_size;
        small_objects = block_obj_size < CARD_SIZE_IN_BYTES;
 
@@ -2206,7 +2274,7 @@ scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanCopyConte
         * Cards aliasing happens in powers of two, so as long as major blocks are aligned to their
         * sizes, they won't overflow the cardtable overlap modulus.
         */
-       if (mod_union) {
+       if (scan_type & CARDTABLE_SCAN_MOD_UNION) {
                card_data = card_base = block->cardtable_mod_union;
                /*
                 * This happens when the nursery collection that precedes finishing
@@ -2214,6 +2282,11 @@ scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanCopyConte
                 */
                if (!card_data)
                        return;
+
+               if (scan_type == CARDTABLE_SCAN_MOD_UNION_PRECLEAN) {
+                       sgen_card_table_preclean_mod_union (card_data, cards_preclean, CARDS_PER_BLOCK);
+                       card_data = card_base = cards_preclean;
+               }
        } else {
 #ifdef SGEN_HAVE_OVERLAPPING_CARDS
                card_data = card_base = sgen_card_table_get_card_scan_address ((mword)block_start);
@@ -2272,7 +2345,7 @@ scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanCopyConte
                        if (obj < scan_front || !MS_OBJ_ALLOCED_FAST (obj, block_start))
                                goto next_object;
 
-                       if (mod_union) {
+                       if (scan_type & CARDTABLE_SCAN_MOD_UNION) {
                                /* FIXME: do this more efficiently */
                                int w, b;
                                MS_CALC_MARK_BIT (w, b, obj);
@@ -2287,7 +2360,7 @@ scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanCopyConte
                                scan_func (object, sgen_obj_get_descriptor (object), queue);
                        } else {
                                size_t offset = sgen_card_table_get_card_offset (obj, block_start);
-                               sgen_cardtable_scan_object (object, block_obj_size, card_base + offset, mod_union, ctx);
+                               sgen_cardtable_scan_object (object, block_obj_size, card_base + offset, ctx);
                        }
                next_object:
                        obj += block_obj_size;
@@ -2305,23 +2378,23 @@ scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanCopyConte
 }
 
 static void
-major_scan_card_table (gboolean mod_union, ScanCopyContext ctx)
+major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx)
 {
        MSBlockInfo *block;
        gboolean has_references;
 
        if (!concurrent_mark)
-               g_assert (!mod_union);
+               g_assert (scan_type == CARDTABLE_SCAN_GLOBAL);
 
        major_finish_sweep_checking ();
-       binary_protocol_major_card_table_scan_start (sgen_timestamp (), mod_union);
+       binary_protocol_major_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
        FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
 #ifdef PREFETCH_CARDS
                int prefetch_index = __index + 6;
                if (prefetch_index < allocated_blocks.next_slot) {
                        MSBlockInfo *prefetch_block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, prefetch_index));
                        PREFETCH_READ (prefetch_block);
-                       if (!mod_union) {
+                       if (scan_type == CARDTABLE_SCAN_GLOBAL) {
                                guint8 *prefetch_cards = sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (prefetch_block));
                                PREFETCH_WRITE (prefetch_cards);
                                PREFETCH_WRITE (prefetch_cards + 32);
@@ -2332,9 +2405,9 @@ major_scan_card_table (gboolean mod_union, ScanCopyContext ctx)
                if (!has_references)
                        continue;
 
-               scan_card_table_for_block (block, mod_union, ctx);
+               scan_card_table_for_block (block, scan_type, ctx);
        } END_FOREACH_BLOCK_NO_LOCK;
-       binary_protocol_major_card_table_scan_end (sgen_timestamp (), mod_union);
+       binary_protocol_major_card_table_scan_end (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
 }
 
 static void
@@ -2492,11 +2565,14 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        if (is_concurrent) {
                collector->major_ops_concurrent_start.copy_or_mark_object = major_copy_or_mark_object_concurrent_canonical;
                collector->major_ops_concurrent_start.scan_object = major_scan_object_concurrent_with_evacuation;
+               collector->major_ops_concurrent_start.scan_vtype = major_scan_vtype_concurrent_with_evacuation;
+               collector->major_ops_concurrent_start.scan_ptr_field = major_scan_ptr_field_concurrent_with_evacuation;
                collector->major_ops_concurrent_start.drain_gray_stack = drain_gray_stack_concurrent;
 
                collector->major_ops_concurrent_finish.copy_or_mark_object = major_copy_or_mark_object_concurrent_finish_canonical;
                collector->major_ops_concurrent_finish.scan_object = major_scan_object_with_evacuation;
                collector->major_ops_concurrent_finish.scan_vtype = major_scan_vtype_with_evacuation;
+               collector->major_ops_concurrent_finish.scan_ptr_field = major_scan_ptr_field_with_evacuation;
                collector->major_ops_concurrent_finish.drain_gray_stack = drain_gray_stack;
        }