Merge pull request #5198 from BrzVlad/fix-binprot-stats
authorVlad Brezae <brezaevlad@gmail.com>
Thu, 20 Jul 2017 17:30:49 +0000 (20:30 +0300)
committerGitHub <noreply@github.com>
Thu, 20 Jul 2017 17:30:49 +0000 (20:30 +0300)
[sgen] Parallel gc improvements

1  2 
mono/metadata/sgen-client-mono.h
mono/sgen/sgen-marksweep.c

index e3cae00e7e605d20cd70630e0a534b4155ea0685,ab59e4f639d279bc496be1bc31c0f95d96e15ca6..a5ec02d480e21cb4477a3cf7e5f1eb4416465d0a
@@@ -198,7 -198,7 +198,7 @@@ sgen_client_update_copied_object (char 
                SGEN_LOG (9, "Array instance %p: size: %lu, rank: %d, length: %lu", array, (unsigned long)objsize, vt->rank, (unsigned long)mono_array_length (array));
        }
  
 -      if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
 +      if (MONO_PROFILER_ENABLED (gc_moves))
                mono_sgen_register_moved_object (obj, destination);
  }
  
@@@ -293,7 -293,7 +293,7 @@@ sgen_client_binary_protocol_collection_
  {
        MONO_GC_BEGIN (generation);
  
 -      mono_profiler_gc_event (MONO_GC_EVENT_START, generation);
 +      MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_START, generation));
  
  #ifndef DISABLE_PERFCOUNTERS
        if (generation == GENERATION_NURSERY)
@@@ -308,7 -308,7 +308,7 @@@ sgen_client_binary_protocol_collection_
  {
        MONO_GC_END (generation);
  
 -      mono_profiler_gc_event (MONO_GC_EVENT_END, generation);
 +      MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_END, generation));
  }
  
  static void G_GNUC_UNUSED
@@@ -383,21 -383,25 +383,21 @@@ sgen_client_binary_protocol_block_set_s
  static void G_GNUC_UNUSED
  sgen_client_binary_protocol_mark_start (int generation)
  {
 -      mono_profiler_gc_event (MONO_GC_EVENT_MARK_START, generation);
  }
  
  static void G_GNUC_UNUSED
  sgen_client_binary_protocol_mark_end (int generation)
  {
 -      mono_profiler_gc_event (MONO_GC_EVENT_MARK_END, generation);
  }
  
  static void G_GNUC_UNUSED
  sgen_client_binary_protocol_reclaim_start (int generation)
  {
 -      mono_profiler_gc_event (MONO_GC_EVENT_RECLAIM_START, generation);
  }
  
  static void G_GNUC_UNUSED
  sgen_client_binary_protocol_reclaim_end (int generation)
  {
 -      mono_profiler_gc_event (MONO_GC_EVENT_RECLAIM_END, generation);
  }
  
  static void
@@@ -686,6 -690,16 +686,16 @@@ sgen_client_binary_protocol_pin_stats (
  {
  }
  
+ static void G_GNUC_UNUSED
+ sgen_client_binary_protocol_worker_finish_stats (int worker_index, int generation, gboolean forced, long long major_scan, long long los_scan, long long work_time)
+ {
+ }
+ static void G_GNUC_UNUSED
+ sgen_client_binary_protocol_collection_end_stats (long long major_scan, long long los_scan, long long finish_stack)
+ {
+ }
  #define TLAB_ACCESS_INIT      SgenThreadInfo *__thread_info__ = (SgenThreadInfo*)mono_tls_get_sgen_thread_info ()
  #define IN_CRITICAL_REGION (__thread_info__->client_info.in_critical_region)
  
index 04feb680f2d77b9f6b239c0046cf4abf165cb589,f2d80765a97f2e65642712571d8dd774438debef..35e1195dbfbbfc27c96546a05684c22ad1b6fa3e
@@@ -212,6 -212,7 +212,7 @@@ static SgenArrayList allocated_blocks 
  /* non-allocated block free-list */
  static void *empty_blocks = NULL;
  static size_t num_empty_blocks = 0;
+ static gboolean compact_blocks = FALSE;
  
  /*
   * We can iterate the block list also while sweep is in progress but we
                (bl) = BLOCK_UNTAG ((bl));
  #define END_FOREACH_BLOCK_NO_LOCK     } SGEN_ARRAY_LIST_END_FOREACH_SLOT; }
  
+ #define FOREACH_BLOCK_RANGE_HAS_REFERENCES_NO_LOCK(bl,begin,end,index,hr) {   \
+       volatile gpointer *slot;                                        \
+       SGEN_ARRAY_LIST_FOREACH_SLOT_RANGE (&allocated_blocks, begin, end, slot, index) { \
+               (bl) = (MSBlockInfo *) (*slot);                         \
+               if (!(bl))                                              \
+                       continue;                                       \
+               (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl));           \
+               (bl) = BLOCK_UNTAG ((bl));
+ #define END_FOREACH_BLOCK_RANGE_NO_LOCK       } SGEN_ARRAY_LIST_END_FOREACH_SLOT_RANGE; }
  static volatile size_t num_major_sections = 0;
  /*
   * One free block list for each block object size.  We add and remove blocks from these
@@@ -1601,6 -1612,8 +1612,8 @@@ sweep_start (void
  
        sgen_workers_foreach (GENERATION_NURSERY, sgen_worker_clear_free_block_lists);
        sgen_workers_foreach (GENERATION_OLD, sgen_worker_clear_free_block_lists);
+       compact_blocks = TRUE;
  }
  
  static void sweep_finish (void);
@@@ -1974,6 -1987,18 +1987,18 @@@ major_start_nursery_collection (void
  #endif
  
        old_num_major_sections = num_major_sections;
+       /* Compact the block list if it hasn't been compacted in a while and nobody is using it */
+       if (compact_blocks && !sweep_in_progress () && !sweep_blocks_job && !sgen_concurrent_collection_in_progress ()) {
+               /*
+                * We support null elements in the array but do regular compaction to avoid
+                * excessive traversal of the array and to facilitate splitting into well
+                * balanced sections for parallel modes. We compact as soon as possible after
+                * sweep.
+                */
+               sgen_array_list_remove_nulls (&allocated_blocks);
+               compact_blocks = FALSE;
+       }
  }
  
  static void
@@@ -2137,7 -2162,7 +2162,7 @@@ major_free_swept_blocks (size_t section
  {
        SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Sweeping must have finished before freeing blocks");
  
 -#ifdef TARGET_WIN32
 +#if defined(HOST_WIN32) || defined(HOST_ORBIS)
                /*
                 * sgen_free_os_memory () asserts in mono_vfree () because windows doesn't like freeing the middle of
                 * a VirtualAlloc ()-ed block.
@@@ -2603,10 -2628,24 +2628,24 @@@ scan_card_table_for_block (MSBlockInfo 
  }
  
  static void
- major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count)
+ major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count, int block_count)
  {
        MSBlockInfo *block;
        gboolean has_references, was_sweeping, skip_scan;
+       int first_block, last_block, index;
+       /*
+        * The last_block's index is at least (num_major_sections - 1) since we
+        * can have nulls in the allocated_blocks list. The last worker will
+        * scan the left-overs of the list. We expect few null entries in the
+        * allocated_blocks list, therefore using num_major_sections for computing
+        * block_count shouldn't affect work distribution.
+        */
+       first_block = block_count * job_index;
+       if (job_index == job_split_count - 1)
+               last_block = allocated_blocks.next_slot;
+       else
+               last_block = block_count * (job_index + 1);
  
        if (!concurrent_mark)
                g_assert (scan_type == CARDTABLE_SCAN_GLOBAL);
        was_sweeping = sweep_in_progress ();
  
        binary_protocol_major_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
-       FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
-               if (__index % job_split_count != job_index)
-                       continue;
+       FOREACH_BLOCK_RANGE_HAS_REFERENCES_NO_LOCK (block, first_block, last_block, index, has_references) {
  #ifdef PREFETCH_CARDS
-               int prefetch_index = __index + 6 * job_split_count;
+               int prefetch_index = index + 6;
                if (prefetch_index < allocated_blocks.next_slot) {
                        MSBlockInfo *prefetch_block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, prefetch_index));
                        PREFETCH_READ (prefetch_block);
                        }
                  }
  #endif
                if (!has_references)
                        continue;
                skip_scan = FALSE;
                                 * sweep start since we are in a nursery collection. Also avoid CAS-ing
                                 */
                                if (sweep_in_progress ()) {
-                                       skip_scan = !ensure_block_is_checked_for_sweeping (__index, TRUE, NULL);
+                                       skip_scan = !ensure_block_is_checked_for_sweeping (index, TRUE, NULL);
                                } else if (was_sweeping) {
                                        /* Recheck in case sweep finished after dereferencing the slot */
-                                       skip_scan = *sgen_array_list_get_slot (&allocated_blocks, __index) == 0;
+                                       skip_scan = *sgen_array_list_get_slot (&allocated_blocks, index) == 0;
                                }
                        }
                }
                if (!skip_scan)
                        scan_card_table_for_block (block, scan_type, ctx);
-       } END_FOREACH_BLOCK_NO_LOCK;
+       } END_FOREACH_BLOCK_RANGE_NO_LOCK;
        binary_protocol_major_card_table_scan_end (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
  }