Merge pull request #5714 from alexischr/update_bockbuild
[mono.git] / tools / sgen / gcpausevis.py
index 66eb3138369846ea9cc108f3aba673cd9fd1f1ed..27eb6276fadbd15d1166548a14c936d4b4928a4d 100755 (executable)
@@ -11,21 +11,25 @@ import subprocess
 
 parser = OptionParser (usage = "Usage: %prog [options] BINARY-PROTOCOL")
 parser.add_option ('--histogram', action = 'store_true', dest = 'histogram', help = "pause time histogram")
+parser.add_option ('--scatter', action = 'store_true', dest = 'scatter', help = "pause time scatterplot")
 parser.add_option ('--minor', action = 'store_true', dest = 'minor', help = "only show minor collections in histogram")
 parser.add_option ('--major', action = 'store_true', dest = 'major', help = "only show major collections in histogram")
 (options, files) = parser.parse_args ()
 
 show_histogram = False
+show_scatter = False
 show_minor = True
 show_major = True
 if options.minor:
-    show_histogram = True
     show_major = False
 if options.major:
-    show_histogram = True
     show_minor = False
 if options.histogram:
     show_histogram = True
+if options.scatter:
+    show_scatter = True
+if (options.minor or options.major) and not options.scatter:
+    show_histogram = True
 
 script_path = os.path.realpath (__file__)
 sgen_grep_path = os.path.join (os.path.dirname (script_path), 'sgen-grep-binprot')
@@ -39,80 +43,262 @@ if len (files) != 1:
     sys.exit (1)
 
 data = []
-minor_pausetimes = []
-major_pausetimes = []
+
+class Event:
+    def __init__(self, **kwargs):
+        self.minor_work = kwargs['minor_work']
+        self.major_work = kwargs['major_work']
+        self.start = kwargs['start']
+        self.stop = kwargs['stop']
+        self.gc_type = kwargs['gc_type']
+    def __repr__(self):
+        return 'Event(minor_work={}, major_work={}, start={}, stop={}, gc_type={})'.format(
+            self.minor_work,
+            self.major_work,
+            self.start,
+            self.stop,
+            self.gc_type,
+        )
 
 grep_input = open (files [0])
 proc = subprocess.Popen ([sgen_grep_path, '--pause-times'], stdin = grep_input, stdout = subprocess.PIPE)
 for line in iter (proc.stdout.readline, ''):
-    m = re.match ('^pause-time (\d+) (\d+) (\d+) (\d+)', line)
+    m = re.match ('^pause-time (\d+) (\d+) (\d+) (\d+) (\d+)', line)
     if m:
+        minor_work = major_work = False
         generation = int (m.group (1))
-        concurrent = int (m.group (2))
-        usecs = int (m.group (3))
-        start = int (m.group (4))
-        if generation == 0:
-            generation = "minor"
-            minor_pausetimes.append (usecs)
-        else:
-            generation = "major"
-            major_pausetimes.append (usecs)
-        if concurrent == 1:
+        concurrent = int (m.group (2)) != 0
+        finish = int (m.group (3)) != 0
+        msecs = int (m.group (4)) / 10.0 / 1000.0
+        start = int (m.group (5)) / 10.0 / 1000.0
+
+        if concurrent:
             kind = "CONC"
         else:
             kind = "SYNC"
-        rec = (generation, start, start + usecs, kind)
+
+        if generation == 0:
+            minor_work = True
+            if concurrent:
+                major_work = True
+                gc_type = "nursery+update"
+            else:
+                gc_type = "nursery"
+        else:
+            major_work = True
+            if concurrent:
+                if finish:
+                    minor_work = True
+                    gc_type = "nursery+finish"
+                else:
+                    gc_type = "start"
+            else:
+                gc_type = "full"
+
+        rec = Event(
+            minor_work=minor_work,
+            major_work=major_work,
+            start=start,
+            stop=start + msecs,
+            kind=kind,
+            gc_type=gc_type,
+        )
         print rec
         data.append (rec)
 
-if show_histogram:
-    pausetimes = []
-    if show_minor:
-        pausetimes += minor_pausetimes
-    if show_major:
-        pausetimes += major_pausetimes
-    plt.hist (pausetimes, 100)
+class MajorGCEventGroup:
+    pass
+
+class FullMajorGCEventGroup(MajorGCEventGroup):
+    def __init__(self, event):
+        self.event = event
+    def __repr__(self):
+        return 'FullMajorGCEventGroup({})'.format(
+            self.event,
+        )
+
+class ConcurrentMajorGCEventGroup(MajorGCEventGroup):
+    def __init__(self, start, updates, finish):
+        self.start = start
+        self.updates = updates
+        self.finish = finish
+    def __repr__(self):
+        return 'ConcurrentMajorEventGroup({}, {}, {})'.format(
+            self.start,
+            self.updates,
+            self.finish,
+        )
+
+# ([Event], int) -> (MajorGCEventGroup, int) | None
+def parse_next_major_gc(data, i):
+    assert i >= 0
+    # Find start or full event.
+    while i < len(data) and data[i].gc_type not in ['start', 'full', 'nursery+update']:
+        i += 1
+    if i == len(data):
+        return None
+    # If full event, done.
+    if data[i].gc_type == 'full':
+        return (FullMajorGCEventGroup(data[i]), i + 1)
+    start_event = data[i]
+    update_events = []
+    # Filter update events and find finish event.
+    while i < len(data) and data[i].gc_type != 'nursery+finish':
+        if data[i].gc_type == 'nursery+update':
+            update_events.append(data[i])
+        i += 1
+    if i == len(data):
+        return None
+    finish_event = data[i]
+    i += 1
+    return (ConcurrentMajorGCEventGroup(start_event, update_events, finish_event), i)
+
+# [Event] -> [MajorGCEventGroup]
+def parse_major_gcs(data):
+    major_gc_events = []
+    i = 0
+    while True:
+        maybe_event_group = parse_next_major_gc(data, i)
+        if maybe_event_group is None:
+            return major_gc_events
+        event_group, i = maybe_event_group
+        major_gc_events.append(event_group)
+
+if show_histogram or show_scatter:
+    bin_data_minor = []
+    bin_data_both = []
+    bin_data_major = []
+    bin_names = []
+
+    timeline_x = []
+    timeline_y = []
+    timeline_c = []
+
+    for rec in data:
+        pause = rec.stop - rec.start
+
+        color = None
+        if rec.major_work:
+            if rec.minor_work:
+                color = 'purple'
+            else:
+                color = 'red' if show_major else None
+        else:
+            color = 'blue' if show_minor else None
+
+        if color:
+            timeline_x.append(rec.start)
+            timeline_y.append(pause)
+            timeline_c.append(color)
+
+        for i in range(100):
+            time = (1.3)**(i+6)
+            prev_time = 0 if i==0 else (1.3)**(i+5)
+            if len(bin_names) <= i:
+                bin_data_minor.append(0)
+                bin_data_both.append(0)
+                bin_data_major.append(0)
+                bin_names.append('%d-%dms' % (int(prev_time), int(time)))
+            if pause <= time:
+                if rec.major_work:
+                    if rec.minor_work:
+                        bin_data_both[i] += pause
+                    else:
+                        bin_data_major[i] += pause
+                else:
+                    bin_data_minor[i] += pause
+                break
+
+    bin_data_minor=np.array(bin_data_minor)
+    bin_data_both=np.array(bin_data_both)
+    bin_data_major=np.array(bin_data_major)
+
+    if show_scatter:
+        plt.scatter(timeline_x, timeline_y, c=timeline_c)
+    else:
+        if show_minor:
+            plt.bar(range(len(bin_data_minor)), bin_data_minor, color='blue', label="minor")  #, align='center')
+            plt.bar(range(len(bin_data_both)), bin_data_both, bottom=bin_data_minor, color='purple', label="minor & major")
+            if show_major:
+                plt.bar(range(len(bin_data_major)), bin_data_major, bottom=(bin_data_minor+bin_data_both), color='red', label="only major")
+        else:
+            plt.bar(range(len(bin_data_both)), bin_data_both, color='purple', label="minor & major")
+            plt.bar(range(len(bin_data_major)), bin_data_major, bottom=bin_data_both, color='red')
+        plt.xticks(range(len(bin_names)), bin_names)
+        plt.ylabel('Cumulative time spent in GC pauses (ms)')
+        plt.xlabel('GC pause length')
+        plt.xticks(rotation=60)
+        plt.legend(loc='upper left')
 else:
-    data = np.array (data, dtype = [('caption', '|S20'), ('start', int), ('stop', int), ('kind', '|S20')])
-    cap, start, stop=data['caption'], data['start'], data['stop']
-
-    #Check the status, because we paint all lines with the same color
-    #together
-    is_sync= (data['kind']=='SYNC')
-    not_sync=np.logical_not(is_sync)
-
-    #Get unique captions and there indices and the inverse mapping
-    captions, unique_idx, caption_inv=np.unique(cap, 1,1)
-    print captions
-
-    #Build y values from the number of unique captions.
-    y=(caption_inv+1)/float(len(captions)+1)
-
-    #Plot function
-    def timelines(y, xstart, xstop,color='b'):
-        """Plot timelines at y from xstart to xstop with given color."""
-        plt.hlines(y,xstart,xstop,color,lw=4)
-        plt.vlines(xstart, y+0.03,y-0.03,color,lw=2)
-        plt.vlines(xstop, y+0.03,y-0.03,color,lw=2)
-
-    #Plot ok tl black
-    timelines(y[is_sync],start[is_sync],stop[is_sync],'r')
-    #Plot fail tl red
-    timelines(y[not_sync],start[not_sync],stop[not_sync],'k')
-
-    #Setup the plot
-    ax=plt.gca()
-    #ax.xaxis_date()
-    #myFmt = DateFormatter('%H:%M:%S')
-    #ax.xaxis.set_major_formatter(myFmt)
-    #ax.xaxis.set_major_locator(SecondLocator(0,interval=20))
-
-    #To adjust the xlimits a timedelta is needed.
-    delta=(stop.max()-start.min())/10
-
-    plt.yticks(y[unique_idx],captions)
-    plt.ylim(0,1)
-    plt.xlim(start.min()-delta, stop.max()+delta)
-    plt.xlabel('Time')
+    major_gc_event_groups = parse_major_gcs(data)
+
+    def bar(**kwargs):
+        indices = kwargs['indices']
+        pauses = kwargs['pauses']
+        color = kwargs['color']
+        if 'bottom' in kwargs:
+            bottom = kwargs['bottom']
+        else:
+            bottom = 0
+        plt.bar(
+            [index for index in indices if pauses[index] is not None],
+            np.array([pause for pause in pauses if pause is not None]),
+            color=color,
+            bottom=bottom,
+        )
+
+    indices = np.arange(len(major_gc_event_groups))
+    start_pauses = [
+        event_group.start.stop - event_group.start.start
+        if isinstance(event_group, ConcurrentMajorGCEventGroup) else None
+        for event_group in major_gc_event_groups
+    ]
+    bar(
+        indices=indices,
+        pauses=start_pauses,
+        color='red',
+    )
+    update_pauses = [
+        sum([
+            update_event.stop - update_event.start
+            for update_event in event_group.updates
+        ]) if isinstance(event_group, ConcurrentMajorGCEventGroup) else None
+        for event_group in major_gc_event_groups
+    ]
+    bar(
+        indices=indices,
+        pauses=update_pauses,
+        color='green',
+        bottom=[pause for pause in start_pauses if pause is not None],
+    )
+    finish_pauses = [
+        event_group.finish.stop - event_group.finish.start
+        if isinstance(event_group, ConcurrentMajorGCEventGroup) else None
+        for event_group in major_gc_event_groups
+    ]
+    start_update_pauses = [
+        a + b
+        for a, b in zip(start_pauses, update_pauses)
+        if a is not None and b is not None
+    ]
+    bar(
+        indices=indices,
+        pauses=finish_pauses,
+        color='blue',
+        bottom=start_update_pauses,
+    )
+    full_pauses = [
+        event_group.event.stop - event_group.event.start
+        if isinstance(event_group, FullMajorGCEventGroup) else None
+        for event_group in major_gc_event_groups
+    ]
+    bar(
+        indices=indices,
+        pauses=full_pauses,
+        color='black',
+    )
+
+    plt.ylabel("Pause Time (ms)")
+    plt.xlabel("Collection")
 
 plt.show()