[sgen] Prefetch queue for optimized mark/scan loop.
authorMark Probst <mark.probst@gmail.com>
Thu, 28 Aug 2014 23:38:51 +0000 (16:38 -0700)
committerMark Probst <mark.probst@gmail.com>
Wed, 26 Nov 2014 18:38:34 +0000 (10:38 -0800)
We put a short FIFO between the gray stack and the actual scanning, to
be able to prefetch objects ahead of time, with an adjustable delay.

mono/metadata/sgen-descriptor.h
mono/metadata/sgen-gray.c
mono/metadata/sgen-gray.h
mono/metadata/sgen-marksweep.c

index 4c0cc50259e6e84ff515061f54fa33142c7d5d8d..6419008189497b25b5e3d5e1f122e763462b8d95 100644 (file)
@@ -146,7 +146,7 @@ sgen_gc_descr_has_references (mword desc)
 
 /* helper macros to scan and traverse objects, macros because we resue them in many functions */
 #ifdef __GNUC__
-#define PREFETCH(addr) __builtin_prefetch ((addr))
+#define PREFETCH(addr) __builtin_prefetch ((addr), 0, 1)
 #else
 #define PREFETCH(addr)
 #endif
index 3a1da29784f834a519812339bf28ade40ee1513e..048d623fe5e2d10c638433cdb748a74dc6f11257 100644 (file)
@@ -369,6 +369,31 @@ sgen_section_gray_queue_enqueue (SgenSectionGrayQueue *queue, GrayQueueSection *
        unlock_section_queue (queue);
 }
 
+/*
+ * Compacts and attempts to fill the prefetch queue from the gray
+ * queue. Returns whether the prefetch queue contains any elements.
+ */
+gboolean
+sgen_gray_object_fill_prefetch (SgenGrayQueue *queue)
+{
+       GrayQueueEntry *to = queue->prefetch;
+       GrayQueueEntry *from = queue->prefetch;
+       GrayQueueEntry *const end = queue->prefetch + SGEN_GRAY_QUEUE_PREFETCH_SIZE;
+       while (from < end) {
+               if (from->obj)
+                       *to++ = *from;
+               ++from;
+       }
+       while (to < end) {
+               GRAY_OBJECT_DEQUEUE (queue, &to->obj, &to->desc);
+               /* This doesn't necessarily matter because this function constitutes the slow path. */
+               PREFETCH (to->obj);
+               ++to;
+       }
+       queue->prefetch_cursor = queue->prefetch;
+       return queue->prefetch [0].obj != NULL;
+}
+
 void
 sgen_init_gray_queues (void)
 {
index a5dc772ad0b144e4399d46ba625cc8a858fc518e..536bf673568c987aad790d8fea9a7021ee831964 100644 (file)
@@ -98,6 +98,8 @@ typedef struct _SgenGrayQueue SgenGrayQueue;
 typedef void (*GrayQueueAllocPrepareFunc) (SgenGrayQueue*);
 typedef void (*GrayQueueEnqueueCheckFunc) (char*);
 
+#define SGEN_GRAY_QUEUE_PREFETCH_SIZE (2)
+
 struct _SgenGrayQueue {
        GrayQueueEntry *cursor;
        GrayQueueSection *first;
@@ -107,6 +109,8 @@ struct _SgenGrayQueue {
        GrayQueueEnqueueCheckFunc enqueue_check_func;
 #endif
        void *alloc_prepare_data;
+       GrayQueueEntry prefetch [SGEN_GRAY_QUEUE_PREFETCH_SIZE];
+       GrayQueueEntry *prefetch_cursor;
 };
 
 typedef struct _SgenSectionGrayQueue SgenSectionGrayQueue;
@@ -155,6 +159,8 @@ gboolean sgen_section_gray_queue_is_empty (SgenSectionGrayQueue *queue) MONO_INT
 GrayQueueSection* sgen_section_gray_queue_dequeue (SgenSectionGrayQueue *queue) MONO_INTERNAL;
 void sgen_section_gray_queue_enqueue (SgenSectionGrayQueue *queue, GrayQueueSection *section) MONO_INTERNAL;
 
+gboolean sgen_gray_object_fill_prefetch (SgenGrayQueue *queue);
+
 static inline gboolean
 sgen_gray_object_queue_is_empty (SgenGrayQueue *queue)
 {
@@ -215,4 +221,18 @@ GRAY_OBJECT_DEQUEUE (SgenGrayQueue *queue, char** obj, mword *desc)
 #endif
 }
 
+static inline void
+sgen_gray_object_dequeue_fast (SgenGrayQueue *queue, char** obj, mword *desc) {
+       GrayQueueEntry *cursor = queue->prefetch_cursor;
+       GrayQueueEntry *const end = queue->prefetch + SGEN_GRAY_QUEUE_PREFETCH_SIZE;
+       *obj = cursor->obj;
+       *desc = cursor->desc;
+       GRAY_OBJECT_DEQUEUE (queue, &cursor->obj, &cursor->desc);
+       PREFETCH (cursor->obj);
+       ++cursor;
+       if (cursor == end)
+               cursor = queue->prefetch;
+       queue->prefetch_cursor = cursor;
+}
+
 #endif
index f40493234fb1accc3101207b10e5f76429959c08..065d404836cf7a9d3a2036a08f204a26c3b5dbcc 100644 (file)
@@ -1058,8 +1058,13 @@ static long long stat_optimized_nursery_not_copied;
 static long long stat_optimized_nursery_regular;
 static long long stat_optimized_major;
 static long long stat_optimized_major_forwarded;
-static long long stat_optimized_major_small;
+static long long stat_optimized_major_small_fast;
+static long long stat_optimized_major_small_slow;
 static long long stat_optimized_major_large;
+
+static long long stat_drain_prefetch_fills;
+static long long stat_drain_prefetch_fill_failures;
+static long long stat_drain_loops;
 #endif
 
 /* Returns whether the object is still in the nursery. */
@@ -1178,13 +1183,32 @@ drain_gray_stack (ScanCopyContext ctx)
 
        SGEN_ASSERT (0, ctx.scan_func == major_scan_object, "Wrong scan function");
 
+       HEAVY_STAT (++stat_drain_prefetch_fills);
+       if (!sgen_gray_object_fill_prefetch (queue)) {
+               HEAVY_STAT (++stat_drain_prefetch_fill_failures);
+               return TRUE;
+       }
+
        for (;;) {
                char *obj;
                mword desc;
                int type;
-               GRAY_OBJECT_DEQUEUE (queue, &obj, &desc);
-               if (!obj)
-                       return TRUE;
+
+               HEAVY_STAT (++stat_drain_loops);
+
+               sgen_gray_object_dequeue_fast (queue, &obj, &desc);
+               if (!obj) {
+                       HEAVY_STAT (++stat_drain_prefetch_fills);
+                       if (!sgen_gray_object_fill_prefetch (queue)) {
+                               HEAVY_STAT (++stat_drain_prefetch_fill_failures);
+                               return TRUE;
+                       }
+                       continue;
+               }
+
+#ifdef HEAVY_STATISTICS
+               sgen_descriptor_count_scanned_object (desc);
+#endif
                type = desc & 7;
                if (type == DESC_TYPE_SMALL_BITMAP) {
                        void **_objptr = (void**)(obj);
@@ -1198,7 +1222,6 @@ drain_gray_stack (ScanCopyContext ctx)
                                void *__old = *(_objptr);
                                if (__old) {
                                        gboolean still_in_nursery;
-                                       PREFETCH (__old);
                                        still_in_nursery = optimized_copy_or_mark_object (_objptr, __old, queue);
                                        if (G_UNLIKELY (still_in_nursery && !sgen_ptr_in_nursery ((_objptr)))) {
                                                void *__copy = *(_objptr);
@@ -2281,8 +2304,13 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        mono_counters_register ("Optimized nursery regular", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_nursery_regular);
        mono_counters_register ("Optimized major", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major);
        mono_counters_register ("Optimized major forwarded", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_forwarded);
-       mono_counters_register ("Optimized major small", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_small);
+       mono_counters_register ("Optimized major small fast", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_small_fast);
+       mono_counters_register ("Optimized major small slow", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_small_slow);
        mono_counters_register ("Optimized major large", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_large);
+
+       mono_counters_register ("Gray stack drain loops", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_drain_loops);
+       mono_counters_register ("Gray stack prefetch fills", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_drain_prefetch_fills);
+       mono_counters_register ("Gray stack prefetch failures", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_drain_prefetch_fill_failures);
 #endif
 
 #endif