Merge pull request #2871 from BrzVlad/feature-conc-sweep-nrs
authorMark Probst <mark.probst@gmail.com>
Tue, 31 May 2016 18:07:42 +0000 (11:07 -0700)
committerMark Probst <mark.probst@gmail.com>
Tue, 31 May 2016 18:07:42 +0000 (11:07 -0700)
[sgen] Run sweep concurrently with nursery collections

1  2 
mono/sgen/sgen-gc.c
mono/sgen/sgen-gc.h
mono/sgen/sgen-marksweep.c
mono/sgen/sgen-pinning-stats.c

diff --combined mono/sgen/sgen-gc.c
index d6e7f11421683a808f5f23832a5f141f57b2a9aa,e4396227e166a2f08ebb280a2a7c2de4078eb3e7..eb65d09b9a30154a2d275d620878e374e7e97c1a
@@@ -692,7 -692,7 +692,7 @@@ pin_objects_from_nursery_pin_queue (gbo
  
                        pin_object (obj_to_pin);
                        GRAY_OBJECT_ENQUEUE (queue, obj_to_pin, desc);
 -                      sgen_pin_stats_register_object (obj_to_pin, obj_to_pin_size);
 +                      sgen_pin_stats_register_object (obj_to_pin, GENERATION_NURSERY);
                        definitely_pinned [count] = obj_to_pin;
                        count++;
                }
@@@ -727,8 -727,6 +727,8 @@@ pin_objects_in_nursery (gboolean do_sca
  void
  sgen_pin_object (GCObject *object, GrayQueue *queue)
  {
 +      SGEN_ASSERT (0, sgen_ptr_in_nursery (object), "We're only supposed to use this for pinning nursery objects when out of memory.");
 +
        /*
         * All pinned objects are assumed to have been staged, so we need to stage as well.
         * Also, the count of staged objects shows that "late pinning" happened.
        binary_protocol_pin (object, (gpointer)LOAD_VTABLE (object), safe_object_get_size (object));
  
        ++objects_pinned;
 -      sgen_pin_stats_register_object (object, safe_object_get_size (object));
 +      sgen_pin_stats_register_object (object, GENERATION_NURSERY);
  
        GRAY_OBJECT_ENQUEUE (queue, object, sgen_obj_get_descriptor_safe (object));
  }
@@@ -1476,7 -1474,7 +1476,7 @@@ enqueue_scan_from_roots_jobs (char *hea
   * Return whether any objects were late-pinned due to being out of memory.
   */
  static gboolean
 -collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
 +collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
  {
        gboolean needs_major;
        size_t max_garbage_amount;
        time_minor_scan_remsets += TV_ELAPSED (atv, btv);
        SGEN_LOG (2, "Old generation scan: %lld usecs", (long long)TV_ELAPSED (atv, btv));
  
 -      sgen_pin_stats_print_class_stats ();
 +      sgen_pin_stats_report ();
  
        /* FIXME: Why do we do this at this specific, seemingly random, point? */
        sgen_client_collecting_minor (&fin_ready_queue, &critical_fin_queue);
  
        binary_protocol_flush_buffers (FALSE);
  
 -      sgen_memgov_minor_collection_end ();
 +      sgen_memgov_minor_collection_end (reason, is_overflow);
  
        /*objects are late pinned because of lack of memory, so a major is a good call*/
        needs_major = objects_pinned > 0;
@@@ -1768,7 -1766,7 +1768,7 @@@ major_copy_or_mark_from_roots (size_t *
                        sgen_los_pin_object (bigobj->data);
                        if (SGEN_OBJECT_HAS_REFERENCES (bigobj->data))
                                GRAY_OBJECT_ENQUEUE (WORKERS_DISTRIBUTE_GRAY_QUEUE, bigobj->data, sgen_obj_get_descriptor ((GCObject*)bigobj->data));
 -                      sgen_pin_stats_register_object (bigobj->data, safe_object_get_size (bigobj->data));
 +                      sgen_pin_stats_register_object (bigobj->data, GENERATION_OLD);
                        SGEN_LOG (6, "Marked large object %p (%s) size: %lu from roots", bigobj->data,
                                        sgen_client_vtable_get_name (SGEN_LOAD_VTABLE (bigobj->data)),
                                        (unsigned long)sgen_los_object_size (bigobj));
                time_major_scan_mod_union += TV_ELAPSED (btv, atv);
        }
  
 -      sgen_pin_stats_print_class_stats ();
 +      sgen_pin_stats_report ();
  }
  
  static void
@@@ -1871,7 -1869,7 +1871,7 @@@ major_finish_copy_or_mark (CopyOrMarkFr
  }
  
  static void
 -major_start_collection (gboolean concurrent, size_t *old_next_pin_slot)
 +major_start_collection (const char *reason, gboolean concurrent, size_t *old_next_pin_slot)
  {
        SgenObjectOperations *object_ops;
  
  
        reset_pinned_from_failed_allocation ();
  
 -      sgen_memgov_major_collection_start ();
 +      sgen_memgov_major_collection_start (concurrent, reason);
  
        //count_ref_nonref_objs ();
        //consistency_check ();
  }
  
  static void
 -major_finish_collection (const char *reason, size_t old_next_pin_slot, gboolean forced)
 +major_finish_collection (const char *reason, gboolean is_overflow, size_t old_next_pin_slot, gboolean forced)
  {
        ScannedObjectCounts counts;
        SgenObjectOperations *object_ops;
  
        g_assert (sgen_gray_object_queue_is_empty (&gray_queue));
  
 -      sgen_memgov_major_collection_end (forced);
 +      sgen_memgov_major_collection_end (forced, concurrent_collection_in_progress, reason, is_overflow);
        current_collection_generation = -1;
  
        memset (&counts, 0, sizeof (ScannedObjectCounts));
  }
  
  static gboolean
 -major_do_collection (const char *reason, gboolean forced)
 +major_do_collection (const char *reason, gboolean is_overflow, gboolean forced)
  {
        TV_DECLARE (time_start);
        TV_DECLARE (time_end);
        /* world must be stopped already */
        TV_GETTIME (time_start);
  
 -      major_start_collection (FALSE, &old_next_pin_slot);
 -      major_finish_collection (reason, old_next_pin_slot, forced);
 +      major_start_collection (reason, FALSE, &old_next_pin_slot);
 +      major_finish_collection (reason, is_overflow, old_next_pin_slot, forced);
  
        TV_GETTIME (time_end);
        gc_stats.major_gc_time += TV_ELAPSED (time_start, time_end);
@@@ -2098,7 -2096,7 +2098,7 @@@ major_start_concurrent_collection (cons
        binary_protocol_concurrent_start ();
  
        // FIXME: store reason and pass it when finishing
 -      major_start_collection (TRUE, NULL);
 +      major_start_collection (reason, TRUE, NULL);
  
        gray_queue_redirect (&gray_queue);
  
@@@ -2165,7 -2163,7 +2165,7 @@@ major_finish_concurrent_collection (gbo
  
        current_collection_generation = GENERATION_OLD;
        sgen_cement_reset ();
 -      major_finish_collection ("finishing", -1, forced);
 +      major_finish_collection ("finishing", FALSE, -1, forced);
  
        if (whole_heap_check_before_collection)
                sgen_check_whole_heap (FALSE);
@@@ -2225,8 -2223,11 +2225,8 @@@ sgen_ensure_free_space (size_t size, in
  void
  sgen_perform_collection (size_t requested_size, int generation_to_collect, const char *reason, gboolean wait_to_finish)
  {
 -      TV_DECLARE (gc_start);
 -      TV_DECLARE (gc_end);
        TV_DECLARE (gc_total_start);
        TV_DECLARE (gc_total_end);
 -      GGTimingInfo infos [2];
        int overflow_generation_to_collect = -1;
        int oldest_generation_collected = generation_to_collect;
        const char *overflow_reason = NULL;
  
        SGEN_ASSERT (0, generation_to_collect == GENERATION_NURSERY || generation_to_collect == GENERATION_OLD, "What generation is this?");
  
 -      TV_GETTIME (gc_start);
 -
        sgen_stop_world (generation_to_collect);
  
        TV_GETTIME (gc_total_start);
                if (concurrent_collection_in_progress)
                        major_update_concurrent_collection ();
  
 -              if (collect_nursery (NULL, FALSE) && !concurrent_collection_in_progress) {
 +              if (collect_nursery (reason, FALSE, NULL, FALSE) && !concurrent_collection_in_progress) {
                        overflow_generation_to_collect = GENERATION_OLD;
                        overflow_reason = "Minor overflow";
                }
        } else {
                SGEN_ASSERT (0, generation_to_collect == GENERATION_OLD, "We should have handled nursery collections above");
                if (major_collector.is_concurrent && !wait_to_finish) {
 -                      collect_nursery (NULL, FALSE);
 +                      collect_nursery ("Concurrent start", FALSE, NULL, FALSE);
                        major_start_concurrent_collection (reason);
                        oldest_generation_collected = GENERATION_NURSERY;
 -              } else if (major_do_collection (reason, wait_to_finish)) {
 +              } else if (major_do_collection (reason, FALSE, wait_to_finish)) {
                        overflow_generation_to_collect = GENERATION_NURSERY;
                        overflow_reason = "Excessive pinning";
                }
        }
  
 -      TV_GETTIME (gc_end);
 -
 -      memset (infos, 0, sizeof (infos));
 -      infos [0].generation = oldest_generation_collected;
 -      infos [0].reason = reason;
 -      infos [0].is_overflow = FALSE;
 -      infos [1].generation = -1;
 -      infos [0].total_time = SGEN_TV_ELAPSED (gc_start, gc_end);
 -
        if (overflow_generation_to_collect != -1) {
                SGEN_ASSERT (0, !concurrent_collection_in_progress, "We don't yet support overflow collections with the concurrent collector");
  
                 * or the nursery is fully pinned.
                 */
  
 -              infos [1].generation = overflow_generation_to_collect;
 -              infos [1].reason = overflow_reason;
 -              infos [1].is_overflow = TRUE;
 -              gc_start = gc_end;
 -
                if (overflow_generation_to_collect == GENERATION_NURSERY)
 -                      collect_nursery (NULL, FALSE);
 +                      collect_nursery (overflow_reason, TRUE, NULL, FALSE);
                else
 -                      major_do_collection (overflow_reason, wait_to_finish);
 -
 -              TV_GETTIME (gc_end);
 -              infos [1].total_time = SGEN_TV_ELAPSED (gc_start, gc_end);
 +                      major_do_collection (overflow_reason, TRUE, wait_to_finish);
  
                oldest_generation_collected = MAX (oldest_generation_collected, overflow_generation_to_collect);
        }
        TV_GETTIME (gc_total_end);
        time_max = MAX (time_max, TV_ELAPSED (gc_total_start, gc_total_end));
  
 -      sgen_restart_world (oldest_generation_collected, infos);
 +      sgen_restart_world (oldest_generation_collected);
  }
  
  /*
@@@ -3152,6 -3172,12 +3152,12 @@@ sgen_major_collector_iterate_live_block
        major_collector.iterate_live_block_ranges (callback);
  }
  
+ void
+ sgen_major_collector_iterate_block_ranges (sgen_cardtable_block_callback callback)
+ {
+       major_collector.iterate_block_ranges (callback);
+ }
  SgenMajorCollector*
  sgen_get_major_collector (void)
  {
@@@ -3194,10 -3220,9 +3200,10 @@@ sgen_stop_world (int generation
  
  /* LOCKING: assumes the GC lock is held */
  void
 -sgen_restart_world (int generation, GGTimingInfo *timing)
 +sgen_restart_world (int generation)
  {
        long long major_total = -1, major_marked = -1, los_total = -1, los_marked = -1;
 +      gint64 stw_time;
  
        SGEN_ASSERT (0, world_is_stopped, "Why are we restarting a running world?");
  
  
        world_is_stopped = FALSE;
  
 -      sgen_client_restart_world (generation, timing);
 +      sgen_client_restart_world (generation, &stw_time);
  
        binary_protocol_world_restarted (generation, sgen_timestamp ());
  
        if (sgen_client_bridge_need_processing ())
                sgen_client_bridge_processing_finish (generation);
  
 -      sgen_memgov_collection_end (generation, timing, timing ? 2 : 0);
 +      sgen_memgov_collection_end (generation, stw_time);
  }
  
  gboolean
@@@ -3229,7 -3254,7 +3235,7 @@@ sgen_check_whole_heap_stw (void
        sgen_stop_world (0);
        sgen_clear_nursery_fragments ();
        sgen_check_whole_heap (FALSE);
 -      sgen_restart_world (0, NULL);
 +      sgen_restart_world (0);
  }
  
  gint64
diff --combined mono/sgen/sgen-gc.h
index 6e7311e34b25b80c0d9736b9d3d973c32345445f,a3405727f159526eaf2240594c49d642ea5a331f..3a86cc44f5f9bd077365a6525a26fc1f1735c373
@@@ -315,7 -315,6 +315,7 @@@ enum 
        INTERNAL_MEM_CARDTABLE_MOD_UNION,
        INTERNAL_MEM_BINARY_PROTOCOL,
        INTERNAL_MEM_TEMPORARY,
 +      INTERNAL_MEM_LOG_ENTRY,
        INTERNAL_MEM_FIRST_CLIENT
  };
  
@@@ -432,9 -431,9 +432,9 @@@ void* sgen_alloc_internal_dynamic (size
  void sgen_free_internal_dynamic (void *addr, size_t size, int type);
  
  void sgen_pin_stats_enable (void);
 -void sgen_pin_stats_register_object (GCObject *obj, size_t size);
 +void sgen_pin_stats_register_object (GCObject *obj, int generation);
  void sgen_pin_stats_register_global_remset (GCObject *obj);
 -void sgen_pin_stats_print_class_stats (void);
 +void sgen_pin_stats_report (void);
  
  void sgen_sort_addresses (void **array, size_t size);
  void sgen_add_to_global_remset (gpointer ptr, GCObject *obj);
@@@ -571,6 -570,7 +571,7 @@@ sgen_update_reference (GCObject **p, GC
  
  typedef void (*sgen_cardtable_block_callback) (mword start, mword size);
  void sgen_major_collector_iterate_live_block_ranges (sgen_cardtable_block_callback callback);
+ void sgen_major_collector_iterate_block_ranges (sgen_cardtable_block_callback callback);
  
  typedef enum {
        ITERATE_OBJECTS_SWEEP = 1,
@@@ -625,6 -625,7 +626,7 @@@ struct _SgenMajorCollector 
        void (*pin_major_object) (GCObject *obj, SgenGrayQueue *queue);
        void (*scan_card_table) (CardTableScanType scan_type, ScanCopyContext ctx);
        void (*iterate_live_block_ranges) (sgen_cardtable_block_callback callback);
+       void (*iterate_block_ranges) (sgen_cardtable_block_callback callback);
        void (*update_cardtable_mod_union) (void);
        void (*init_to_space) (void);
        void (*sweep) (void);
@@@ -812,8 -813,16 +814,8 @@@ size_t sgen_gc_get_total_heap_allocatio
  
  /* STW */
  
 -typedef struct {
 -      int generation;
 -      const char *reason;
 -      gboolean is_overflow;
 -      gint64 total_time;
 -      gint64 stw_time;
 -} GGTimingInfo;
 -
  void sgen_stop_world (int generation);
 -void sgen_restart_world (int generation, GGTimingInfo *timing);
 +void sgen_restart_world (int generation);
  gboolean sgen_is_world_stopped (void);
  
  gboolean sgen_set_allow_synchronous_major (gboolean flag);
index 8611f6af9eb08e8033e63a40fc18ec55e097f292,6ea06e53de9001d27431e98c0508e6ea7162474a..f2c646b3952f678b513bd74b6f7667f110ca4a24
@@@ -194,16 -194,23 +194,23 @@@ static SgenArrayList allocated_blocks 
  static void *empty_blocks = NULL;
  static size_t num_empty_blocks = 0;
  
+ /*
+  * We can iterate the block list also while sweep is in progress but we
+  * need to account for blocks that will be checked for sweeping and even
+  * freed in the process.
+  */
  #define FOREACH_BLOCK_NO_LOCK(bl) {                                   \
        volatile gpointer *slot;                                                \
-       SGEN_ASSERT (0, !sweep_in_progress (), "Can't iterate blocks while sweep is in progress."); \
        SGEN_ARRAY_LIST_FOREACH_SLOT (&allocated_blocks, slot) {        \
-               (bl) = BLOCK_UNTAG (*slot);
+               (bl) = BLOCK_UNTAG (*slot);                             \
+               if (!(bl))                                              \
+                       continue;
  #define FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK(bl,hr) {                 \
        volatile gpointer *slot;                                                \
-       SGEN_ASSERT (0, !sweep_in_progress (), "Can't iterate blocks while sweep is in progress."); \
        SGEN_ARRAY_LIST_FOREACH_SLOT (&allocated_blocks, slot) {        \
                (bl) = (MSBlockInfo *) (*slot);                 \
+               if (!(bl))                                              \
+                       continue;                                       \
                (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl));           \
                (bl) = BLOCK_UNTAG ((bl));
  #define END_FOREACH_BLOCK_NO_LOCK     } SGEN_ARRAY_LIST_END_FOREACH_SLOT; }
@@@ -549,16 -556,6 +556,6 @@@ ms_alloc_block (int size_index, gboolea
  
        add_free_block (free_blocks, size_index, info);
  
-       /*
-        * Adding to the allocated_blocks array is racy with the removal of nulls when
-        * sweeping. We wait for sweep to finish to avoid that.
-        *
-        * The memory barrier here and in `sweep_job_func()` are required because we need
-        * `allocated_blocks` synchronized between this and the sweep thread.
-        */
-       major_finish_sweep_checking ();
-       mono_memory_barrier ();
        sgen_array_list_add (&allocated_blocks, BLOCK_TAG (info), 0, FALSE);
  
        SGEN_ATOMIC_ADD_P (num_major_sections, 1);
@@@ -1084,6 -1081,19 +1081,6 @@@ major_block_is_evacuating (MSBlockInfo 
        return FALSE;
  }
  
 -#define LOAD_VTABLE   SGEN_LOAD_VTABLE
 -
 -#define MS_MARK_OBJECT_AND_ENQUEUE_CHECKED(obj,desc,block,queue) do { \
 -              int __word, __bit;                                      \
 -              MS_CALC_MARK_BIT (__word, __bit, (obj));                \
 -              if (!MS_MARK_BIT ((block), __word, __bit) && MS_OBJ_ALLOCED ((obj), (block))) { \
 -                      MS_SET_MARK_BIT ((block), __word, __bit);       \
 -                      if (sgen_gc_descr_has_references (desc))                        \
 -                              GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
 -                      binary_protocol_mark ((obj), (gpointer)LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \
 -                      INC_NUM_MAJOR_OBJECTS_MARKED ();                \
 -              }                                                       \
 -      } while (0)
  #define MS_MARK_OBJECT_AND_ENQUEUE(obj,desc,block,queue) do {         \
                int __word, __bit;                                      \
                MS_CALC_MARK_BIT (__word, __bit, (obj));                \
                        MS_SET_MARK_BIT ((block), __word, __bit);       \
                        if (sgen_gc_descr_has_references (desc))                        \
                                GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
 -                      binary_protocol_mark ((obj), (gpointer)LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \
 +                      binary_protocol_mark ((obj), (gpointer)SGEN_LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \
                        INC_NUM_MAJOR_OBJECTS_MARKED ();                \
                }                                                       \
        } while (0)
@@@ -1243,6 -1253,8 +1240,6 @@@ mark_pinned_objects_in_block (MSBlockIn
        if (first_entry == last_entry)
                return;
  
 -      block->has_pinned = TRUE;
 -
        entry = sgen_pinning_get_entry (first_entry);
        end = sgen_pinning_get_entry (last_entry);
  
                if (index == last_index)
                        continue;
                obj = MS_BLOCK_OBJ (block, index);
 -              MS_MARK_OBJECT_AND_ENQUEUE_CHECKED (obj, sgen_obj_get_descriptor (obj), block, queue);
 +              if (!MS_OBJ_ALLOCED (obj, block))
 +                      continue;
 +              MS_MARK_OBJECT_AND_ENQUEUE (obj, sgen_obj_get_descriptor (obj), block, queue);
 +              sgen_pin_stats_register_object (obj, GENERATION_OLD);
                last_index = index;
        }
 +
 +      /*
 +       * There might have been potential pinning "pointers" into this block, but none of
 +       * them pointed to occupied slots, in which case we don't have to pin the block.
 +       */
 +      if (last_index >= 0)
 +              block->has_pinned = TRUE;
  }
  
  static inline void
@@@ -1313,6 -1315,7 +1310,7 @@@ set_block_state (MSBlockInfo *block, gi
  {
        SGEN_ASSERT (6, block->state == expected_state, "Block state incorrect before set");
        block->state = new_state;
+       binary_protocol_block_set_state (block, MS_BLOCK_SIZE, expected_state, new_state);
  }
  
  /*
@@@ -1426,6 -1429,8 +1424,8 @@@ sweep_start (void
                for (j = 0; j < num_block_obj_sizes; ++j)
                        free_blocks [j] = NULL;
        }
+       sgen_array_list_remove_nulls (&allocated_blocks);
  }
  
  static void sweep_finish (void);
@@@ -1580,9 -1585,12 +1580,12 @@@ static voi
  sweep_blocks_job_func (void *thread_data_untyped, SgenThreadPoolJob *job)
  {
        volatile gpointer *slot;
+       MSBlockInfo *bl;
  
        SGEN_ARRAY_LIST_FOREACH_SLOT (&allocated_blocks, slot) {
-               sweep_block (BLOCK_UNTAG (*slot));
+               bl = BLOCK_UNTAG (*slot);
+               if (bl)
+                       sweep_block (bl);
        } SGEN_ARRAY_LIST_END_FOREACH_SLOT;
  
        mono_memory_write_barrier ();
@@@ -1629,8 -1637,6 +1632,6 @@@ sweep_job_func (void *thread_data_untyp
                }
        }
  
-       sgen_array_list_remove_nulls (&allocated_blocks);
        /*
         * Concurrently sweep all the blocks to reduce workload during minor
         * pauses where we need certain blocks to be swept. At the start of
@@@ -1709,7 -1715,7 +1710,7 @@@ static int count_nonpinned_nonref
  static void
  count_nonpinned_callback (GCObject *obj, size_t size, void *data)
  {
 -      GCVTable vtable = LOAD_VTABLE (obj);
 +      GCVTable vtable = SGEN_LOAD_VTABLE (obj);
  
        if (SGEN_VTABLE_HAS_REFERENCES (vtable))
                ++count_nonpinned_ref;
  static void
  count_pinned_callback (GCObject *obj, size_t size, void *data)
  {
 -      GCVTable vtable = LOAD_VTABLE (obj);
 +      GCVTable vtable = SGEN_LOAD_VTABLE (obj);
  
        if (SGEN_VTABLE_HAS_REFERENCES (vtable))
                ++count_pinned_ref;
@@@ -2219,6 -2225,18 +2220,18 @@@ major_print_gc_param_usage (void
  /*
   * This callback is used to clear cards, move cards to the shadow table and do counting.
   */
+ static void
+ major_iterate_block_ranges (sgen_cardtable_block_callback callback)
+ {
+       MSBlockInfo *block;
+       gboolean has_references;
+       FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
+               if (has_references)
+                       callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
+       } END_FOREACH_BLOCK_NO_LOCK;
+ }
  static void
  major_iterate_live_block_ranges (sgen_cardtable_block_callback callback)
  {
@@@ -2418,12 -2436,15 +2431,15 @@@ static voi
  major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx)
  {
        MSBlockInfo *block;
-       gboolean has_references;
+       gboolean has_references, was_sweeping, skip_scan;
  
        if (!concurrent_mark)
                g_assert (scan_type == CARDTABLE_SCAN_GLOBAL);
  
-       major_finish_sweep_checking ();
+       if (scan_type != CARDTABLE_SCAN_GLOBAL)
+               SGEN_ASSERT (0, !sweep_in_progress (), "Sweep should be finished when we scan mod union card table");
+       was_sweeping = sweep_in_progress ();
        binary_protocol_major_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
        FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
  #ifdef PREFETCH_CARDS
  
                if (!has_references)
                        continue;
+               skip_scan = FALSE;
  
-               scan_card_table_for_block (block, scan_type, ctx);
+               if (scan_type == CARDTABLE_SCAN_GLOBAL) {
+                       gpointer *card_start = (gpointer*) sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (block));
+                       gboolean has_dirty_cards = FALSE;
+                       int i;
+                       for (i = 0; i < CARDS_PER_BLOCK / sizeof(gpointer); i++) {
+                               if (card_start [i]) {
+                                       has_dirty_cards = TRUE;
+                                       break;
+                               }
+                       }
+                       if (!has_dirty_cards) {
+                               skip_scan = TRUE;
+                       } else {
+                               /*
+                                * After the start of the concurrent collections, blocks change state
+                                * to marking. We should not sweep it in that case. We can't race with
+                                * sweep start since we are in a nursery collection. Also avoid CAS-ing
+                                */
+                               if (sweep_in_progress ()) {
+                                       skip_scan = !ensure_block_is_checked_for_sweeping (__index, TRUE, NULL);
+                               } else if (was_sweeping) {
+                                       /* Recheck in case sweep finished after dereferencing the slot */
+                                       skip_scan = *sgen_array_list_get_slot (&allocated_blocks, __index) == 0;
+                               }
+                       }
+               }
+               if (!skip_scan)
+                       scan_card_table_for_block (block, scan_type, ctx);
        } END_FOREACH_BLOCK_NO_LOCK;
        binary_protocol_major_card_table_scan_end (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
  }
@@@ -2579,6 -2628,7 +2623,7 @@@ sgen_marksweep_init_internal (SgenMajor
        collector->pin_major_object = pin_major_object;
        collector->scan_card_table = major_scan_card_table;
        collector->iterate_live_block_ranges = major_iterate_live_block_ranges;
+       collector->iterate_block_ranges = major_iterate_block_ranges;
        if (is_concurrent) {
                collector->update_cardtable_mod_union = update_cardtable_mod_union;
                collector->get_cardtable_mod_union_for_reference = major_get_cardtable_mod_union_for_reference;
index 1808015c59ccd0e061a3fc91d982530256fb4074,23662f10be6e29b1d141e63ed33feb10d027cf66..ab00a56d66dd26cca6eb188000031cde4e95d3f0
@@@ -37,9 -37,6 +37,9 @@@ static gboolean do_pin_stats = FALSE
  static PinStatAddress *pin_stat_addresses = NULL;
  static size_t pinned_byte_counts [PIN_TYPE_MAX];
  
 +static size_t pinned_bytes_in_generation [GENERATION_MAX];
 +static int pinned_objects_in_generation [GENERATION_MAX];
 +
  static SgenPointerQueue pinned_objects = SGEN_POINTER_QUEUE_INIT (INTERNAL_MEM_STATISTICS);
  
  static SgenHashTable pinned_class_hash_table = SGEN_HASH_TABLE_INIT (INTERNAL_MEM_STATISTICS, INTERNAL_MEM_STAT_PINNED_CLASS, sizeof (PinnedClassEntry), g_str_hash, g_str_equal);
@@@ -69,10 -66,6 +69,10 @@@ sgen_pin_stats_reset (void
        pin_stat_addresses = NULL;
        for (i = 0; i < PIN_TYPE_MAX; ++i)
                pinned_byte_counts [i] = 0;
 +      for (i = 0; i < GENERATION_MAX; ++i) {
 +              pinned_bytes_in_generation [i] = 0;
 +              pinned_objects_in_generation [i] = 0;
 +      }
        sgen_pointer_queue_clear (&pinned_objects);
        sgen_hash_table_clean (&pinned_class_hash_table);
        sgen_hash_table_clean (&global_remset_class_hash_table);
@@@ -85,6 -78,8 +85,8 @@@ sgen_pin_stats_register_address (char *
        PinStatAddress *node;
        int pin_type_bit = 1 << pin_type;
  
+       if (!do_pin_stats)
+               return;
        while (*node_ptr) {
                node = *node_ptr;
                if (addr == node->addr) {
@@@ -160,23 -155,13 +162,23 @@@ register_vtable (GCVTable vtable, int p
  }
  
  void
 -sgen_pin_stats_register_object (GCObject *obj, size_t size)
 +sgen_pin_stats_register_object (GCObject *obj, int generation)
  {
        int pin_types = 0;
 +      size_t size = 0;
 +
 +      if (binary_protocol_is_enabled ()) {
 +              size = sgen_safe_object_get_size (obj);
 +              pinned_bytes_in_generation [generation] += size;
 +              ++pinned_objects_in_generation [generation];
 +      }
  
        if (!do_pin_stats)
                return;
  
 +      if (!size)
 +              size = sgen_safe_object_get_size (obj);
 +
        pin_stats_count_object_from_tree (obj, size, pin_stat_addresses, &pin_types);
        sgen_pointer_queue_add (&pinned_objects, obj);
  
@@@ -200,15 -185,12 +202,15 @@@ sgen_pin_stats_register_global_remset (
  }
  
  void
 -sgen_pin_stats_print_class_stats (void)
 +sgen_pin_stats_report (void)
  {
        char *name;
        PinnedClassEntry *pinned_entry;
        GlobalRemsetClassEntry *remset_entry;
  
 +      binary_protocol_pin_stats (pinned_objects_in_generation [GENERATION_NURSERY], pinned_bytes_in_generation [GENERATION_NURSERY],
 +                      pinned_objects_in_generation [GENERATION_OLD], pinned_bytes_in_generation [GENERATION_OLD]);
 +
        if (!do_pin_stats)
                return;