[sgen] Add explicit binary protocol stats for collections
authorVlad Brezae <brezaevlad@gmail.com>
Fri, 30 Jun 2017 22:47:46 +0000 (01:47 +0300)
committerVlad Brezae <brezaevlad@gmail.com>
Thu, 20 Jul 2017 10:53:47 +0000 (13:53 +0300)
Using the cardtable scan and finish_gray_stack start and end events is cumbersome to parse and require a lot of logging, especially in the case of the parallel collector.

mono/metadata/sgen-client-mono.h
mono/sgen/sgen-gc.c
mono/sgen/sgen-protocol-def.h
mono/sgen/sgen-workers.c
mono/sgen/sgen-workers.h

index 78c0126396cb308433c982290abedba0ba81b93a..ab59e4f639d279bc496be1bc31c0f95d96e15ca6 100644 (file)
@@ -690,6 +690,16 @@ sgen_client_binary_protocol_pin_stats (int objects_pinned_in_nursery, size_t byt
 {
 }
 
+static void G_GNUC_UNUSED
+sgen_client_binary_protocol_worker_finish_stats (int worker_index, int generation, gboolean forced, long long major_scan, long long los_scan, long long work_time)
+{
+}
+
+static void G_GNUC_UNUSED
+sgen_client_binary_protocol_collection_end_stats (long long major_scan, long long los_scan, long long finish_stack)
+{
+}
+
 #define TLAB_ACCESS_INIT       SgenThreadInfo *__thread_info__ = (SgenThreadInfo*)mono_tls_get_sgen_thread_info ()
 #define IN_CRITICAL_REGION (__thread_info__->client_info.in_critical_region)
 
index a58d236da1f4de1513aab6d52b64d6854294dfab..ea67897afca98cbae0e5ec460916cdab92894ee1 100644 (file)
@@ -1449,6 +1449,9 @@ job_scan_major_card_table (void *worker_data_untyped, SgenThreadPoolJob *job)
        major_collector.scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, job_data->job_index, job_data->job_split_count);
        SGEN_TV_GETTIME (btv);
        time_minor_scan_major_blocks += SGEN_TV_ELAPSED (atv, btv);
+
+       if (worker_data_untyped)
+               ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv);
 }
 
 static void
@@ -1463,6 +1466,9 @@ job_scan_los_card_table (void *worker_data_untyped, SgenThreadPoolJob *job)
        sgen_los_scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, job_data->job_index, job_data->job_split_count);
        SGEN_TV_GETTIME (btv);
        time_minor_scan_los += SGEN_TV_ELAPSED (atv, btv);
+
+       if (worker_data_untyped)
+               ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv);
 }
 
 static void
@@ -1478,6 +1484,9 @@ job_scan_major_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJo
        major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, job_data->job_split_count);
        SGEN_TV_GETTIME (btv);
        time_major_scan_mod_union_blocks += SGEN_TV_ELAPSED (atv, btv);
+
+       if (worker_data_untyped)
+               ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv);
 }
 
 static void
@@ -1493,28 +1502,43 @@ job_scan_los_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob
        sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, job_data->job_split_count);
        SGEN_TV_GETTIME (btv);
        time_major_scan_mod_union_los += SGEN_TV_ELAPSED (atv, btv);
+
+       if (worker_data_untyped)
+               ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv);
 }
 
 static void
 job_major_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
+       SGEN_TV_DECLARE (atv);
+       SGEN_TV_DECLARE (btv);
        ParallelScanJob *job_data = (ParallelScanJob*)job;
        ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
 
        g_assert (concurrent_collection_in_progress);
-
+       SGEN_TV_GETTIME (atv);
        major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, job_data->job_split_count);
+       SGEN_TV_GETTIME (btv);
+
+       g_assert (worker_data_untyped);
+       ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv);
 }
 
 static void
 job_los_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
+       SGEN_TV_DECLARE (atv);
+       SGEN_TV_DECLARE (btv);
        ParallelScanJob *job_data = (ParallelScanJob*)job;
        ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
 
        g_assert (concurrent_collection_in_progress);
-
+       SGEN_TV_GETTIME (atv);
        sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, job_data->job_split_count);
+       SGEN_TV_GETTIME (btv);
+
+       g_assert (worker_data_untyped);
+       ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv);
 }
 
 static void
@@ -1662,6 +1686,9 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
        TV_DECLARE (btv);
        SGEN_TV_DECLARE (last_minor_collection_start_tv);
        SGEN_TV_DECLARE (last_minor_collection_end_tv);
+       guint64 major_scan_start = time_minor_scan_major_blocks;
+       guint64 los_scan_start = time_minor_scan_los;
+       guint64 finish_gray_start = time_minor_finish_gray_stack;
 
        if (disable_minor_collections)
                return TRUE;
@@ -1856,6 +1883,14 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
        current_collection_generation = -1;
        objects_pinned = 0;
 
+       if (is_parallel)
+               binary_protocol_collection_end_stats (0, 0, time_minor_finish_gray_stack - finish_gray_start);
+       else
+               binary_protocol_collection_end_stats (
+                       time_minor_scan_major_blocks - major_scan_start,
+                       time_minor_scan_los - los_scan_start,
+                       time_minor_finish_gray_stack - finish_gray_start);
+
        binary_protocol_collection_end (gc_stats.minor_gc_count - 1, GENERATION_NURSERY, 0, 0);
 
        if (check_nursery_objects_pinned && !sgen_minor_collector.is_split)
@@ -2150,8 +2185,9 @@ major_finish_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason
        mword fragment_total;
        TV_DECLARE (atv);
        TV_DECLARE (btv);
-
-       TV_GETTIME (btv);
+       guint64 major_scan_start = time_major_scan_mod_union_blocks;
+       guint64 los_scan_start = time_major_scan_mod_union_los;
+       guint64 finish_gray_start = time_major_finish_gray_stack;
 
        if (concurrent_collection_in_progress) {
                SgenObjectOperations *object_ops_par = NULL;
@@ -2171,6 +2207,7 @@ major_finish_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason
 
        sgen_workers_assert_gray_queue_is_empty (GENERATION_OLD);
 
+       TV_GETTIME (btv);
        finish_gray_stack (GENERATION_OLD, CONTEXT_FROM_OBJECT_OPERATIONS (object_ops_nopar, gc_thread_gray_queue));
        TV_GETTIME (atv);
        time_major_finish_gray_stack += TV_ELAPSED (btv, atv);
@@ -2272,6 +2309,13 @@ major_finish_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason
        binary_protocol_flush_buffers (FALSE);
 
        //consistency_check ();
+       if (major_collector.is_parallel)
+                binary_protocol_collection_end_stats (0, 0, time_major_finish_gray_stack - finish_gray_start);
+        else
+                binary_protocol_collection_end_stats (
+                        time_major_scan_mod_union_blocks - major_scan_start,
+                        time_major_scan_mod_union_los - los_scan_start,
+                        time_major_finish_gray_stack - finish_gray_start);
 
        binary_protocol_collection_end (gc_stats.major_gc_count - 1, GENERATION_OLD, counts.num_scanned_objects, counts.num_unique_scanned_objects);
 }
index 08bfdfe890ee2d9ddf8a4e91f1f8158d66363a36..4842ce3b8ca841ea9b4da4568b63fba840b3d489 100644 (file)
@@ -457,6 +457,20 @@ MATCH_INDEX (BINARY_PROTOCOL_MATCH)
 IS_VTABLE_MATCH (FALSE)
 END_PROTOCOL_ENTRY
 
+BEGIN_PROTOCOL_ENTRY6 (binary_protocol_worker_finish_stats, TYPE_INT, worker_index, TYPE_INT, generation, TYPE_BOOL, forced, TYPE_LONGLONG, major_scan, TYPE_LONGLONG, los_scan, TYPE_LONGLONG, work_time)
+DEFAULT_PRINT ()
+IS_ALWAYS_MATCH (TRUE)
+MATCH_INDEX (BINARY_PROTOCOL_MATCH)
+IS_VTABLE_MATCH (FALSE)
+END_PROTOCOL_ENTRY
+
+BEGIN_PROTOCOL_ENTRY3 (binary_protocol_collection_end_stats, TYPE_LONGLONG, major_scan, TYPE_LONGLONG, los_scan, TYPE_LONGLONG, finish_stack)
+DEFAULT_PRINT ()
+IS_ALWAYS_MATCH (TRUE)
+MATCH_INDEX (BINARY_PROTOCOL_MATCH)
+IS_VTABLE_MATCH (FALSE)
+END_PROTOCOL_ENTRY
+
 #undef BEGIN_PROTOCOL_ENTRY0
 #undef BEGIN_PROTOCOL_ENTRY1
 #undef BEGIN_PROTOCOL_ENTRY2
index f999a5bdb2ed7090d53db7d50f4504980b2bde91..7f926b4f149f81dff35d184d2c9f3ac7e641672a 100644 (file)
@@ -92,6 +92,9 @@ sgen_workers_ensure_awake (WorkerContext *context)
                                break;
 
                        did_set_state = set_state (&context->workers_data [i], old_state, STATE_WORK_ENQUEUED);
+
+                       if (did_set_state && old_state == STATE_NOT_WORKING)
+                               context->workers_data [i].last_start = sgen_timestamp ();
                } while (!did_set_state);
 
                if (!state_is_working_or_enqueued (old_state))
@@ -108,6 +111,7 @@ worker_try_finish (WorkerData *data)
        State old_state;
        int i, working = 0;
        WorkerContext *context = data->context;
+       gint64 last_start = data->last_start;
 
        ++stat_workers_num_finished;
 
@@ -130,6 +134,12 @@ worker_try_finish (WorkerData *data)
                        /* Make sure each worker has a chance of seeing the enqueued jobs */
                        sgen_workers_ensure_awake (context);
                        SGEN_ASSERT (0, data->state == STATE_WORK_ENQUEUED, "Why did we fail to set our own state to ENQUEUED");
+
+                       /*
+                        * Log to be able to get the duration of normal concurrent M&S phase.
+                        * Worker indexes are 1 based, since 0 is logically considered gc thread.
+                        */
+                       binary_protocol_worker_finish_stats (data - &context->workers_data [0] + 1, context->generation, context->forced_stop, data->major_scan_time, data->los_scan_time, data->total_time + sgen_timestamp () - last_start);
                        goto work_available;
                }
        }
@@ -154,6 +164,9 @@ worker_try_finish (WorkerData *data)
        context->workers_finished = TRUE;
        mono_os_mutex_unlock (&context->finished_lock);
 
+       data->total_time += (sgen_timestamp () - last_start);
+       binary_protocol_worker_finish_stats (data - &context->workers_data [0] + 1, context->generation, context->forced_stop, data->major_scan_time, data->los_scan_time, data->total_time);
+
        binary_protocol_worker_finish (sgen_timestamp (), context->forced_stop);
 
        sgen_gray_object_queue_trim_free_list (&data->private_gray_queue);
@@ -454,6 +467,7 @@ void
 sgen_workers_start_all_workers (int generation, SgenObjectOperations *object_ops_nopar, SgenObjectOperations *object_ops_par, SgenWorkersFinishCallback callback)
 {
        WorkerContext *context = &worker_contexts [generation];
+       int i;
        SGEN_ASSERT (0, !context->started, "Why are we starting to work without finishing previous cycle");
 
        context->idle_func_object_ops_par = object_ops_par;
@@ -462,6 +476,13 @@ sgen_workers_start_all_workers (int generation, SgenObjectOperations *object_ops
        context->finish_callback = callback;
        context->worker_awakenings = 0;
        context->started = TRUE;
+
+       for (i = 0; i < context->active_workers_num; i++) {
+               context->workers_data [i].major_scan_time = 0;
+               context->workers_data [i].los_scan_time = 0;
+               context->workers_data [i].total_time = 0;
+               context->workers_data [i].last_start = 0;
+       }
        mono_memory_write_barrier ();
 
        /*
index 42dc16ac4aef7c6eb18cd5fe0b73dd5f49b6497e..e3147f051af78a87304ce3ffb150f0daeb7f2b74 100644 (file)
@@ -32,6 +32,15 @@ struct _WorkerData {
         */
        gpointer free_block_lists;
        WorkerContext *context;
+
+       /* Work time distribution. Measured in ticks. */
+       gint64 major_scan_time, los_scan_time, total_time;
+       /*
+        * When changing the state of the worker from not working to work enqueued
+        * we set the timestamp so we can compute for how long the worker did actual
+        * work during the phase
+        */
+       gint64 last_start;
 };
 
 struct _WorkerContext {