Silence a bunch of unused result warnings in proflog.c.
[mono.git] / mono / profiler / decode.c
index 6cc08ff53c186d506e162c792e0e0ce0c0717380..eea1dbf5db5e27987c35b915dcc515999cdf9921 100644 (file)
 #include <mono/metadata/profiler.h>
 #include <mono/metadata/object.h>
 #include <mono/metadata/debug-helpers.h>
+#include <mono/utils/mono-counters.h>
 
 #define HASH_SIZE 9371
 #define SMALL_HASH_SIZE 31
 
+#if defined(__native_client__) || defined(__native_client_codegen__)
+volatile int __nacl_thread_suspension_needed = 0;
+void __nacl_suspend_thread_if_needed() {}
+#endif
+
 static int debug = 0;
 static int collect_traces = 0;
 static int show_traces = 0;
@@ -39,9 +45,18 @@ static uint64_t find_size = 0;
 static const char* find_name = NULL;
 static uint64_t time_from = 0;
 static uint64_t time_to = 0xffffffffffffffffULL;
+static int use_time_filter = 0;
 static uint64_t startup_time = 0;
 static FILE* outfile = NULL;
 
+static int32_t
+read_int16 (unsigned char *p)
+{
+       int32_t value = *p++;
+       value |= (*p++) << 8;
+       return value;
+}
+
 static int32_t
 read_int32 (unsigned char *p)
 {
@@ -75,6 +90,412 @@ pstrdup (const char *s)
        return p;
 }
 
+typedef struct _CounterValue CounterValue;
+struct _CounterValue {
+       uint64_t timestamp;
+       unsigned char *buffer;
+       CounterValue *next;
+};
+
+typedef struct _Counter Counter;
+struct _Counter {
+       int index;
+       int section;
+       const char *name;
+       int type;
+       int unit;
+       int variance;
+       CounterValue *values;
+       CounterValue *values_last;
+};
+
+typedef struct _CounterList CounterList;
+struct _CounterList {
+       Counter *counter;
+       CounterList *next;
+};
+
+typedef struct _CounterSection CounterSection;
+struct _CounterSection {
+       int value;
+       CounterList *counters;
+       CounterList *counters_last;
+       CounterSection *next;
+};
+
+typedef struct _CounterTimestamp CounterTimestamp;
+struct _CounterTimestamp {
+       uint64_t value;
+       CounterSection *sections;
+       CounterSection *sections_last;
+       CounterTimestamp *next;
+};
+
+static CounterList *counters = NULL;
+static CounterSection *counters_sections = NULL;
+static CounterTimestamp *counters_timestamps = NULL;
+
+enum {
+       COUNTERS_SORT_TIME,
+       COUNTERS_SORT_CATEGORY
+};
+
+static int counters_sort_mode = COUNTERS_SORT_TIME;
+
+static void
+add_counter_to_section (Counter *counter)
+{
+       CounterSection *csection, *s;
+       CounterList *clist;
+
+       clist = calloc (1, sizeof (CounterList));
+       clist->counter = counter;
+
+       for (csection = counters_sections; csection; csection = csection->next) {
+               if (csection->value == counter->section) {
+                       /* If section exist */
+                       if (!csection->counters)
+                               csection->counters = clist;
+                       else
+                               csection->counters_last->next = clist;
+                       csection->counters_last = clist;
+                       return;
+               }
+       }
+
+       /* If section does not exist */
+       csection = calloc (1, sizeof (CounterSection));
+       csection->value = counter->section;
+       csection->counters = clist;
+       csection->counters_last = clist;
+
+       if (!counters_sections) {
+               counters_sections = csection;
+       } else {
+               s = counters_sections;
+               while (s->next)
+                       s = s->next;
+               s->next = csection;
+       }
+}
+
+static void
+add_counter (int section, const char *name, int type, int unit, int variance, int index)
+{
+       CounterList *list, *l;
+       Counter *counter;
+
+       for (list = counters; list; list = list->next)
+               if (list->counter->index == index)
+                       return;
+
+       counter = calloc (1, sizeof (Counter));
+       counter->section = section;
+       counter->name = name;
+       counter->type = type;
+       counter->unit = unit;
+       counter->variance = variance;
+       counter->index = index;
+
+       list = calloc (1, sizeof (CounterList));
+       list->counter = counter;
+
+       if (!counters) {
+               counters = list;
+       } else {
+               l = counters;
+               while (l->next)
+                       l = l->next;
+               l->next = list;
+       }
+
+       if (counters_sort_mode == COUNTERS_SORT_CATEGORY || !verbose)
+               add_counter_to_section (counter);
+}
+
+static void
+add_counter_to_timestamp (uint64_t timestamp, Counter *counter)
+{
+       CounterTimestamp *ctimestamp, *t;
+       CounterSection *csection;
+       CounterList *clist;
+
+       clist = calloc (1, sizeof (CounterList));
+       clist->counter = counter;
+
+       for (ctimestamp = counters_timestamps; ctimestamp; ctimestamp = ctimestamp->next) {
+               if (ctimestamp->value == timestamp) {
+                       for (csection = ctimestamp->sections; csection; csection = csection->next) {
+                               if (csection->value == counter->section) {
+                                       /* if timestamp exist and section exist */
+                                       if (!csection->counters)
+                                               csection->counters = clist;
+                                       else
+                                               csection->counters_last->next = clist;
+                                       csection->counters_last = clist;
+                                       return;
+                               }
+                       }
+
+                       /* if timestamp exist and section does not exist */
+                       csection = calloc (1, sizeof (CounterSection));
+                       csection->value = counter->section;
+                       csection->counters = clist;
+                       csection->counters_last = clist;
+
+                       if (!ctimestamp->sections)
+                               ctimestamp->sections = csection;
+                       else
+                               ctimestamp->sections_last->next = csection;
+                       ctimestamp->sections_last = csection;
+                       return;
+               }
+       }
+
+       /* If timestamp do not exist and section does not exist */
+       csection = calloc (1, sizeof (CounterSection));
+       csection->value = counter->section;
+       csection->counters = clist;
+       csection->counters_last = clist;
+
+       ctimestamp = calloc (1, sizeof (CounterTimestamp));
+       ctimestamp->value = timestamp;
+       ctimestamp->sections = csection;
+       ctimestamp->sections_last = csection;
+
+       if (!counters_timestamps) {
+               counters_timestamps = ctimestamp;
+       } else {
+               t = counters_timestamps;
+               while (t->next)
+                       t = t->next;
+               t->next = ctimestamp;
+       }
+}
+
+static void
+add_counter_value (int index, CounterValue *value)
+{
+       CounterList *list;
+
+       for (list = counters; list; list = list->next) {
+               if (list->counter->index == index) {
+                       if (!list->counter->values)
+                               list->counter->values = value;
+                       else
+                               list->counter->values_last->next = value;
+                       list->counter->values_last = value;
+
+                       if (counters_sort_mode == COUNTERS_SORT_TIME)
+                               add_counter_to_timestamp (value->timestamp, list->counter);
+
+                       return;
+               }
+       }
+}
+
+static const char*
+section_name (int section)
+{
+       switch (section) {
+       case MONO_COUNTER_JIT: return "Mono JIT";
+       case MONO_COUNTER_GC: return "Mono GC";
+       case MONO_COUNTER_METADATA: return "Mono Metadata";
+       case MONO_COUNTER_GENERICS: return "Mono Generics";
+       case MONO_COUNTER_SECURITY: return "Mono Security";
+       case MONO_COUNTER_RUNTIME: return "Mono Runtime";
+       case MONO_COUNTER_SYSTEM: return "Mono System";
+       default: return "<unknown>";
+       }
+}
+
+static const char*
+type_name (int type)
+{
+       switch (type) {
+       case MONO_COUNTER_INT: return "Int";
+       case MONO_COUNTER_UINT: return "UInt";
+       case MONO_COUNTER_WORD: return "Word";
+       case MONO_COUNTER_LONG: return "Long";
+       case MONO_COUNTER_ULONG: return "ULong";
+       case MONO_COUNTER_DOUBLE: return "Double";
+       case MONO_COUNTER_STRING: return "String";
+       case MONO_COUNTER_TIME_INTERVAL: return "Time Interval";
+       default: return "<unknown>";
+       }
+}
+
+static const char*
+unit_name (int unit)
+{
+       switch (unit) {
+       case MONO_COUNTER_RAW: return "Raw";
+       case MONO_COUNTER_BYTES: return "Bytes";
+       case MONO_COUNTER_TIME: return "Time";
+       case MONO_COUNTER_COUNT: return "Count";
+       case MONO_COUNTER_PERCENTAGE: return "Percentage";
+       default: return "<unknown>";
+       }
+}
+
+static const char*
+variance_name (int variance)
+{
+       switch (variance) {
+       case MONO_COUNTER_MONOTONIC: return "Monotonic";
+       case MONO_COUNTER_CONSTANT: return "Constant";
+       case MONO_COUNTER_VARIABLE: return "Variable";
+       default: return "<unknown>";
+       }
+}
+
+static void
+dump_counters_value (Counter *counter, const char *key_format, const char *key, void *value)
+{
+       char format[32];
+
+       if (value == NULL) {
+               snprintf (format, sizeof (format), "%s : %%s\n", key_format);
+               fprintf (outfile, format, key, "<null>");
+       } else {
+               switch (counter->type) {
+               case MONO_COUNTER_INT:
+#if SIZEOF_VOID_P == 4
+               case MONO_COUNTER_WORD:
+#endif
+                       snprintf (format, sizeof (format), "%s : %%d\n", key_format);
+                       fprintf (outfile, format, key, *(int32_t*)value);
+                       break;
+               case MONO_COUNTER_UINT:
+                       snprintf (format, sizeof (format), "%s : %%u\n", key_format);
+                       fprintf (outfile, format, key, *(uint32_t*)value);
+                       break;
+               case MONO_COUNTER_LONG:
+#if SIZEOF_VOID_P == 8
+               case MONO_COUNTER_WORD:
+#endif
+               case MONO_COUNTER_TIME_INTERVAL:
+                       if (counter->type == MONO_COUNTER_LONG && counter->unit == MONO_COUNTER_TIME) {
+                               snprintf (format, sizeof (format), "%s : %%0.3fms\n", key_format);
+                               fprintf (outfile, format, key, (double)*(int64_t*)value / 10000.0);
+                       } else if (counter->type == MONO_COUNTER_TIME_INTERVAL) {
+                               snprintf (format, sizeof (format), "%s : %%0.3fms\n", key_format);
+                               fprintf (outfile, format, key, (double)*(int64_t*)value / 1000.0);
+                       } else {
+                               snprintf (format, sizeof (format), "%s : %%u\n", key_format);
+                               fprintf (outfile, format, key, *(int64_t*)value);
+                       }
+                       break;
+               case MONO_COUNTER_ULONG:
+                       snprintf (format, sizeof (format), "%s : %%llu\n", key_format);
+                       fprintf (outfile, format, key, *(uint64_t*)value);
+                       break;
+               case MONO_COUNTER_DOUBLE:
+                       snprintf (format, sizeof (format), "%s : %%f\n", key_format);
+                       fprintf (outfile, format, key, *(double*)value);
+                       break;
+               case MONO_COUNTER_STRING:
+                       snprintf (format, sizeof (format), "%s : %%s\n", key_format);
+                       fprintf (outfile, format, key, *(char*)value);
+                       break;
+               }
+       }
+}
+
+static void
+dump_counters (void)
+{
+       Counter *counter;
+       CounterValue *cvalue;
+       CounterTimestamp *ctimestamp;
+       CounterSection *csection;
+       CounterList *clist;
+       char strtimestamp[17];
+       int i, section_printed;
+
+       fprintf (outfile, "\nCounters:\n");
+
+       if (!verbose) {
+               char counters_to_print[][64] = {
+                       "Methods from AOT",
+                       "Methods JITted using mono JIT",
+                       "Methods JITted using LLVM",
+                       "Total time spent JITting (sec)",
+                       "User Time",
+                       "System Time",
+                       "Total Time",
+                       "Working Set",
+                       "Private Bytes",
+                       "Virtual Bytes",
+                       "Page Faults",
+                       "CPU Load Average - 1min",
+                       "CPU Load Average - 5min",
+                       "CPU Load Average - 15min",
+                       ""
+               };
+
+               for (csection = counters_sections; csection; csection = csection->next) {
+                       section_printed = 0;
+
+                       for (clist = csection->counters; clist; clist = clist->next) {
+                               counter = clist->counter;
+                               if (!counter->values_last)
+                                       continue;
+
+                               for (i = 0; counters_to_print [i][0] != 0; i++) {
+                                       if (strcmp (counters_to_print [i], counter->name) == 0) {
+                                               if (!section_printed) {
+                                                       fprintf (outfile, "\t%s:\n", section_name (csection->value));
+                                                       section_printed = 1;
+                                               }
+
+                                               dump_counters_value (counter, "\t\t%-30s", counter->name, counter->values_last->buffer);
+                                               break;
+                                       }
+                               }
+                       }
+               }
+       } else if (counters_sort_mode == COUNTERS_SORT_TIME) {
+               for (ctimestamp = counters_timestamps; ctimestamp; ctimestamp = ctimestamp->next) {
+                       fprintf (outfile, "\t%lld:%02lld:%02lld:%02lld.%03lld:\n", ctimestamp->value / 1000 / 60 / 60 / 24 % 1000,
+                               ctimestamp->value / 1000 / 60 / 60 % 24, ctimestamp->value / 1000 / 60 % 60,
+                               ctimestamp->value / 1000 % 60, ctimestamp->value % 1000);
+
+                       for (csection = ctimestamp->sections; csection; csection = csection->next) {
+                               fprintf (outfile, "\t\t%s:\n", section_name (csection->value));
+
+                               for (clist = csection->counters; clist; clist = clist->next) {
+                                       counter = clist->counter;
+                                       for (cvalue = counter->values; cvalue; cvalue = cvalue->next) {
+                                               if (cvalue->timestamp != ctimestamp->value)
+                                                       continue;
+
+                                               dump_counters_value (counter, "\t\t\t%-30s", counter->name, cvalue->buffer);
+                                       }
+                               }
+                       }
+               }
+       } else if (counters_sort_mode == COUNTERS_SORT_CATEGORY) {
+               for (csection = counters_sections; csection; csection = csection->next) {
+                       fprintf (outfile, "\t%s:\n", section_name (csection->value));
+
+                       for (clist = csection->counters; clist; clist = clist->next) {
+                               counter = clist->counter;
+                               fprintf (outfile, "\t\t%s: [type: %s, unit: %s, variance: %s]\n",
+                                       counter->name, type_name (counter->type), unit_name (counter->unit), variance_name (counter->variance));
+
+                               for (cvalue = counter->values; cvalue; cvalue = cvalue->next) {
+                                       snprintf (strtimestamp, sizeof (strtimestamp), "%lld:%02lld:%02lld:%02lld.%03lld", cvalue->timestamp / 1000 / 60 / 60 / 24 % 1000,
+                                               cvalue->timestamp / 1000 / 60 / 60 % 24, cvalue->timestamp / 1000 / 60 % 60,
+                                               cvalue->timestamp / 1000 % 60, cvalue->timestamp % 1000);
+
+                                       dump_counters_value (counter, "\t\t\t%s", strtimestamp, cvalue->buffer);
+                               }
+                       }
+               }
+       }
+}
+
 static int num_images;
 typedef struct _ImageDesc ImageDesc;
 struct _ImageDesc {
@@ -158,8 +579,11 @@ lookup_class (intptr_t klass)
        ClassDesc *cd = class_hash [slot];
        while (cd && cd->klass != klass)
                cd = cd->next;
-       if (!cd)
-               return add_class (klass, "unresolved class");
+       if (!cd) {
+               char buf [128];
+               snprintf (buf, sizeof (buf), "unresolved class %p", (void*)klass);
+               return add_class (klass, buf);
+       }
        return cd;
 }
 
@@ -171,6 +595,8 @@ struct _MethodDesc {
        intptr_t code;
        int len;
        int recurse_count;
+       int sample_hits;
+       int ignore_jit; /* when this is set, we collect the metadata but don't count this method fot jit time and code size, when filtering events */
        uint64_t calls;
        uint64_t total_time;
        uint64_t callee_time;
@@ -221,11 +647,315 @@ lookup_method (intptr_t method)
        MethodDesc *cd = method_hash [slot];
        while (cd && cd->method != method)
                cd = cd->next;
-       if (!cd)
-               return add_method (method, "unknown method", 0, 0);
+       if (!cd) {
+               char buf [128];
+               snprintf (buf, sizeof (buf), "unknown method %p", (void*)method);
+               return add_method (method, buf, 0, 0);
+       }
        return cd;
 }
 
+static int num_stat_samples = 0;
+static int size_stat_samples = 0;
+uintptr_t *stat_samples = NULL;
+int *stat_sample_desc = NULL;
+
+static void
+add_stat_sample (int type, uintptr_t ip) {
+       if (num_stat_samples == size_stat_samples) {
+               size_stat_samples *= 2;
+               if (!size_stat_samples)
+               size_stat_samples = 32;
+               stat_samples = realloc (stat_samples, size_stat_samples * sizeof (uintptr_t));
+               stat_sample_desc = realloc (stat_sample_desc, size_stat_samples * sizeof (int));
+       }
+       stat_samples [num_stat_samples] = ip;
+       stat_sample_desc [num_stat_samples++] = type;
+}
+
+static MethodDesc*
+lookup_method_by_ip (uintptr_t ip)
+{
+       int i;
+       MethodDesc* m;
+       /* dumb */
+       for (i = 0; i < HASH_SIZE; ++i) {
+               m = method_hash [i];
+               while (m) {
+                       //printf ("checking %p against %p-%p\n", (void*)ip, (void*)(m->code), (void*)(m->code + m->len));
+                       if (ip >= (uintptr_t)m->code && ip < (uintptr_t)m->code + m->len) {
+                               return m;
+                       }
+                       m = m->next;
+               }
+       }
+       return NULL;
+}
+
+static int
+compare_method_samples (const void *a, const void *b)
+{
+       MethodDesc *const*A = a;
+       MethodDesc *const*B = b;
+       if ((*A)->sample_hits == (*B)->sample_hits)
+               return 0;
+       if ((*B)->sample_hits < (*A)->sample_hits)
+               return -1;
+       return 1;
+}
+
+typedef struct _UnmanagedSymbol UnmanagedSymbol;
+struct _UnmanagedSymbol {
+       UnmanagedSymbol *parent;
+       char *name;
+       int is_binary;
+       uintptr_t addr;
+       uintptr_t size;
+       uintptr_t sample_hits;
+};
+
+static UnmanagedSymbol **usymbols = NULL;
+static int usymbols_size = 0;
+static int usymbols_num = 0;
+
+static int
+compare_usymbol_addr (const void *a, const void *b)
+{
+       UnmanagedSymbol *const*A = a;
+       UnmanagedSymbol *const*B = b;
+       if ((*B)->addr == (*A)->addr)
+               return 0;
+       if ((*B)->addr > (*A)->addr)
+               return -1;
+       return 1;
+}
+
+static int
+compare_usymbol_samples (const void *a, const void *b)
+{
+       UnmanagedSymbol *const*A = a;
+       UnmanagedSymbol *const*B = b;
+       if ((*B)->sample_hits == (*A)->sample_hits)
+               return 0;
+       if ((*B)->sample_hits < (*A)->sample_hits)
+               return -1;
+       return 1;
+}
+
+static void
+add_unmanaged_symbol (uintptr_t addr, char *name, uintptr_t size)
+{
+       UnmanagedSymbol *sym;
+       if (usymbols_num == usymbols_size) {
+               int new_size = usymbols_size * 2;
+               if (!new_size)
+                       new_size = 16;
+               usymbols = realloc (usymbols, sizeof (void*) * new_size);
+               usymbols_size = new_size;
+       }
+       sym = calloc (sizeof (UnmanagedSymbol), 1);
+       sym->addr = addr;
+       sym->name = name;
+       sym->size = size;
+       usymbols [usymbols_num++] = sym;
+}
+
+/* only valid after the symbols are sorted */
+static UnmanagedSymbol*
+lookup_unmanaged_symbol (uintptr_t addr)
+{
+       int r = usymbols_num - 1;
+       int l = 0;
+       UnmanagedSymbol *sym;
+       int last_best = -1;
+       while (r >= l) {
+               int m = (l + r) / 2;
+               sym = usymbols [m];
+               if (addr == sym->addr)
+                       return sym;
+               if (addr < sym->addr) {
+                       r = m - 1;
+               } else if (addr > sym->addr) {
+                       l = m + 1;
+                       last_best = m;
+               }
+       }
+       if (last_best >= 0 && (addr - usymbols [last_best]->addr) < 4096)
+               return usymbols [last_best];
+       return NULL;
+}
+
+/* we use the same structure for binaries */
+static UnmanagedSymbol **ubinaries = NULL;
+static int ubinaries_size = 0;
+static int ubinaries_num = 0;
+
+static void
+add_unmanaged_binary (uintptr_t addr, char *name, uintptr_t size)
+{
+       UnmanagedSymbol *sym;
+       if (ubinaries_num == ubinaries_size) {
+               int new_size = ubinaries_size * 2;
+               if (!new_size)
+                       new_size = 16;
+               ubinaries = realloc (ubinaries, sizeof (void*) * new_size);
+               ubinaries_size = new_size;
+       }
+       sym = calloc (sizeof (UnmanagedSymbol), 1);
+       sym->addr = addr;
+       sym->name = name;
+       sym->size = size;
+       sym->is_binary = 1;
+       ubinaries [ubinaries_num++] = sym;
+}
+
+static UnmanagedSymbol*
+lookup_unmanaged_binary (uintptr_t addr)
+{
+       int i;
+       for (i = 0; i < ubinaries_num; ++i) {
+               UnmanagedSymbol *ubin = ubinaries [i];
+               if (addr >= ubin->addr && addr < ubin->addr + ubin->size) {
+                       return ubin;
+               }
+       }
+       return NULL;
+}
+
+static const char*
+sample_type_name (int type)
+{
+       switch (type) {
+       case SAMPLE_CYCLES: return "cycles";
+       case SAMPLE_INSTRUCTIONS: return "instructions retired";
+       case SAMPLE_CACHE_MISSES: return "cache misses";
+       case SAMPLE_CACHE_REFS: return "cache references";
+       case SAMPLE_BRANCHES: return "executed branches";
+       case SAMPLE_BRANCH_MISSES: return "unpredicted branches";
+       }
+       return "unknown";
+}
+
+static void
+set_usym_parent (UnmanagedSymbol** cachedus, int count)
+{
+       int i;
+       for (i = 0; i < count; ++i) {
+               UnmanagedSymbol *ubin = lookup_unmanaged_binary (cachedus [i]->addr);
+               if (ubin == cachedus [i])
+                       continue;
+               cachedus [i]->parent = ubin;
+       }
+}
+
+static void
+print_usym (UnmanagedSymbol* um)
+{
+       if (um->parent)
+               fprintf (outfile, "\t%6d %6.2f %-36s in %s\n", um->sample_hits, um->sample_hits*100.0/num_stat_samples, um->name, um->parent->name);
+       else
+               fprintf (outfile, "\t%6d %6.2f %s\n", um->sample_hits, um->sample_hits*100.0/num_stat_samples, um->name);
+}
+
+static int
+sym_percent (uintptr_t sample_hits)
+{
+       double pc;
+       if (verbose)
+               return 1;
+       pc = sample_hits*100.0/num_stat_samples;
+       return pc >= 0.1;
+}
+
+static void
+dump_samples (void)
+{
+       int i, u;
+       int count = 0, msize = 0;
+       int unmanaged_hits = 0;
+       int unresolved_hits = 0;
+       MethodDesc** cachedm = NULL;
+       int ucount = 0, usize = 0;
+       UnmanagedSymbol** cachedus = NULL;
+       if (!num_stat_samples)
+               return;
+       qsort (usymbols, usymbols_num, sizeof (UnmanagedSymbol*), compare_usymbol_addr);
+       for (i = 0; i < num_stat_samples; ++i) {
+               MethodDesc *m = lookup_method_by_ip (stat_samples [i]);
+               if (m) {
+                       if (!m->sample_hits) {
+                               if (count == msize) {
+                                       msize *= 2;
+                                       if (!msize)
+                                               msize = 4;
+                                       cachedm = realloc (cachedm, sizeof (void*) * msize);
+                               }
+                               cachedm [count++] = m;
+                       }
+                       m->sample_hits++;
+               } else {
+                       UnmanagedSymbol *usym = lookup_unmanaged_symbol (stat_samples [i]);
+                       if (!usym) {
+                               unresolved_hits++;
+                               //printf ("unmanaged hit at %p\n", (void*)stat_samples [i]);
+                               usym = lookup_unmanaged_binary (stat_samples [i]);
+                       }
+                       if (usym) {
+                               if (!usym->sample_hits) {
+                                       if (ucount == usize) {
+                                               usize *= 2;
+                                               if (!usize)
+                                                       usize = 4;
+                                               cachedus = realloc (cachedus, sizeof (void*) * usize);
+                                       }
+                                       cachedus [ucount++] = usym;
+                               }
+                               usym->sample_hits++;
+                       }
+                       unmanaged_hits++;
+               }
+       }
+       qsort (cachedm, count, sizeof (MethodDesc*), compare_method_samples);
+       qsort (cachedus, ucount, sizeof (UnmanagedSymbol*), compare_usymbol_samples);
+       set_usym_parent (cachedus, ucount);
+       fprintf (outfile, "\nStatistical samples summary\n");
+       fprintf (outfile, "\tSample type: %s\n", sample_type_name (stat_sample_desc [0]));
+       fprintf (outfile, "\tUnmanaged hits:  %6d (%4.1f%%)\n", unmanaged_hits, (100.0*unmanaged_hits)/num_stat_samples);
+       fprintf (outfile, "\tManaged hits:    %6d (%4.1f%%)\n", num_stat_samples - unmanaged_hits, (100.0*(num_stat_samples-unmanaged_hits))/num_stat_samples);
+       fprintf (outfile, "\tUnresolved hits: %6d (%4.1f%%)\n", unresolved_hits, (100.0*unresolved_hits)/num_stat_samples);
+       fprintf (outfile, "\t%6s %6s %s\n", "Hits", "%", "Method name");
+       i = 0;
+       u = 0;
+       while (i < count || u < ucount) {
+               if (i < count) {
+                       MethodDesc *m = cachedm [i];
+                       if (u < ucount) {
+                               UnmanagedSymbol *um = cachedus [u];
+                               if (um->sample_hits > m->sample_hits) {
+                                       if (!sym_percent (um->sample_hits))
+                                               break;
+                                       print_usym (um);
+                                       u++;
+                                       continue;
+                               }
+                       }
+                       if (!sym_percent (m->sample_hits))
+                               break;
+                       fprintf (outfile, "\t%6d %6.2f %s\n", m->sample_hits, m->sample_hits*100.0/num_stat_samples, m->name);
+                       i++;
+                       continue;
+               }
+               if (u < ucount) {
+                       UnmanagedSymbol *um = cachedus [u];
+                       if (!sym_percent (um->sample_hits))
+                               break;
+                       print_usym (um);
+                       u++;
+                       continue;
+               }
+       }
+}
+
 typedef struct _HeapClassDesc HeapClassDesc;
 typedef struct {
        HeapClassDesc *klass;
@@ -239,6 +969,8 @@ struct _HeapClassDesc {
        HeapClassRevRef *rev_hash;
        int rev_hash_size;
        int rev_count;
+       uintptr_t pinned_references;
+       uintptr_t root_references;
 };
 
 static int
@@ -312,6 +1044,10 @@ struct _HeapShot {
        HeapObjectDesc **objects_hash;
        uintptr_t objects_count;
        uintptr_t objects_hash_size;
+       uintptr_t num_roots;
+       uintptr_t *roots;
+       uintptr_t *roots_extra;
+       int *roots_types;
 };
 
 static HeapShot *heap_shots = NULL;
@@ -441,7 +1177,7 @@ heap_shot_find_obj_slot (HeapShot *hs, uintptr_t objaddr)
                        i = 0;
        } while (i != start_pos);
        /* should not happen */
-       printf ("failed heap obj update\n");
+       //printf ("failed heap obj slot\n");
        return -1;
 }
 
@@ -526,6 +1262,81 @@ heap_shot_resolve_reverse_refs (HeapShot *hs)
        }
 }
 
+#define MARK_GRAY 1
+#define MARK_BLACK 2
+
+static void
+heap_shot_mark_objects (HeapShot *hs)
+{
+       uintptr_t i, oi, r;
+       unsigned char *marks;
+       HeapObjectDesc *obj, *ref;
+       int marked_some;
+       uintptr_t num_marked = 0, num_unmarked;
+       for (i = 0; i < hs->num_roots; ++i) {
+               HeapClassDesc *cd;
+               oi = heap_shot_find_obj_slot (hs, hs->roots [i]);
+               if (oi == -1) {
+                       continue;
+               }
+               obj = hs->objects_hash [oi];
+               cd = obj->hklass;
+               if (hs->roots_types [i] & MONO_PROFILE_GC_ROOT_PINNING)
+                       cd->pinned_references++;
+               cd->root_references++;
+       }
+       if (!debug)
+               return;
+       /* consistency checks: it seems not all the objects are walked in the heap in some cases */
+       marks = calloc (hs->objects_hash_size, 1);
+       if (!marks)
+               return;
+       for (i = 0; i < hs->num_roots; ++i) {
+               oi = heap_shot_find_obj_slot (hs, hs->roots [i]);
+               if (oi == -1) {
+                       fprintf (outfile, "root type 0x%x for obj %p (%s) not found in heap\n", hs->roots_types [i], (void*)hs->roots [i], lookup_class (hs->roots_extra [i])->name);
+                       continue;
+               }
+               obj = hs->objects_hash [oi];
+               if (!marks [oi]) {
+                       marks [oi] = obj->num_refs? MARK_GRAY: MARK_BLACK;
+                       num_marked++;
+               }
+       }
+       marked_some = 1;
+       while (marked_some) {
+               marked_some = 0;
+               for (i = 0; i < hs->objects_hash_size; ++i) {
+                       if (marks [i] != MARK_GRAY)
+                               continue;
+                       marks [i] = MARK_BLACK;
+                       obj = hs->objects_hash [i];
+                       for (r = 0; r < obj->num_refs; ++r) {
+                               oi = heap_shot_find_obj_slot (hs, obj->refs [r]);
+                               if (oi == -1) {
+                                       fprintf (outfile, "referenced obj %p not found in heap\n", (void*)obj->refs [r]);
+                                       continue;
+                               }
+                               ref = hs->objects_hash [oi];
+                               if (!marks [oi]) {
+                                       marks [oi] = ref->num_refs? MARK_GRAY: MARK_BLACK;
+                               }
+                       }
+                       marked_some++;
+               }
+       }
+
+       num_unmarked = 0;
+       for (i = 0; i < hs->objects_hash_size; ++i) {
+               if (hs->objects_hash [i] && !marks [i]) {
+                       num_unmarked++;
+                       fprintf (outfile, "object %p (%s) unmarked\n", (void*)hs->objects_hash [i], hs->objects_hash [i]->hklass->klass->name);
+               }
+       }
+       fprintf (outfile, "Total unmarked: %d/%d\n", num_unmarked, hs->objects_count);
+       free (marks);
+}
+
 static void
 heap_shot_free_objects (HeapShot *hs)
 {
@@ -542,6 +1353,7 @@ heap_shot_free_objects (HeapShot *hs)
        hs->objects_count = 0;
 }
 
+
 struct _BackTrace {
        BackTrace *next;
        unsigned int hash;
@@ -623,6 +1435,7 @@ typedef struct {
        int version_minor;
        int timer_overhead;
        int pid;
+       int port;
        uint64_t startup_time;
        ThreadContext *threads;
        ThreadContext *current;
@@ -642,6 +1455,12 @@ struct _ThreadContext {
        int stack_size;
        int stack_id;
        HeapShot *current_heap_shot;
+       uintptr_t num_roots;
+       uintptr_t size_roots;
+       uintptr_t *roots;
+       uintptr_t *roots_extra;
+       int *roots_types;
+       uint64_t gc_start_times [3];
 };
 
 static void
@@ -663,8 +1482,9 @@ load_data (ProfContext *ctx, int size)
                if (r == 0)
                        return size == 0? 1: 0;
                return r == size;
-       } else {
+       } else 
 #endif
+       {
                int r = fread (ctx->buf, size, 1, ctx->file);
                if (r == 0)
                        return size == 0? 1: 0;
@@ -793,6 +1613,23 @@ add_trace_methods (MethodDesc **methods, int count, TraceDesc *trace, uint64_t v
        return bt;
 }
 
+static void
+thread_add_root (ThreadContext *ctx, uintptr_t obj, int root_type, uintptr_t extra_info)
+{
+       if (ctx->num_roots == ctx->size_roots) {
+               int new_size = ctx->size_roots * 2;
+               if (!new_size)
+                       new_size = 4;
+               ctx->roots = realloc (ctx->roots, new_size * sizeof (uintptr_t));
+               ctx->roots_extra = realloc (ctx->roots_extra, new_size * sizeof (uintptr_t));
+               ctx->roots_types = realloc (ctx->roots_types, new_size * sizeof (int));
+               ctx->size_roots = new_size;
+       }
+       ctx->roots_types [ctx->num_roots] = root_type;
+       ctx->roots_extra [ctx->num_roots] = extra_info;
+       ctx->roots [ctx->num_roots++] = obj;
+}
+
 static int
 compare_callc (const void *a, const void *b)
 {
@@ -853,7 +1690,6 @@ pop_method (ThreadContext *thread, MethodDesc *method, uint64_t timestamp)
 }
 
 typedef struct {
-       uint64_t start_time; /* move this per-thread? */
        uint64_t total_time;
        uint64_t max_time;
        int count;
@@ -865,6 +1701,7 @@ static int gc_resizes;
 typedef struct {
        uint64_t created;
        uint64_t destroyed;
+       uint64_t live;
        uint64_t max_live;
        TraceDesc traces;
 } HandleInfo;
@@ -961,6 +1798,19 @@ get_handle_name (int htype)
        }
 }
 
+static const char*
+get_root_name (int rtype)
+{
+       switch (rtype & MONO_PROFILE_GC_ROOT_TYPEMASK) {
+       case MONO_PROFILE_GC_ROOT_STACK: return "stack";
+       case MONO_PROFILE_GC_ROOT_FINALIZER: return "finalizer";
+       case MONO_PROFILE_GC_ROOT_HANDLE: return "handle";
+       case MONO_PROFILE_GC_ROOT_OTHER: return "other";
+       case MONO_PROFILE_GC_ROOT_MISC: return "misc";
+       default: return "unknown";
+       }
+}
+
 static MethodDesc**
 decode_bt (MethodDesc** sframes, int *size, unsigned char *p, unsigned char **endp, intptr_t ptr_base)
 {
@@ -1085,7 +1935,7 @@ decode_buffer (ProfContext *ctx)
                return 0;
        if (!startup_time) {
                startup_time = time_base;
-               if (time_from) {
+               if (use_time_filter) {
                        time_from += startup_time;
                        time_to += startup_time;
                }
@@ -1114,16 +1964,16 @@ decode_buffer (ProfContext *ctx)
                                uint64_t ev = decode_uleb128 (p, &p);
                                int gen = decode_uleb128 (p, &p);
                                if (debug)
-                                       fprintf (outfile, "gc event for gen%d: %s at %llu\n", gen - 1, gc_event_name (ev), time_base);
+                                       fprintf (outfile, "gc event for gen%d: %s at %llu (thread: 0x%x)\n", gen, gc_event_name (ev), time_base, thread->thread_id);
                                if (gen > 2) {
                                        fprintf (outfile, "incorrect gc gen: %d\n", gen);
                                        break;
                                }
                                if (ev == MONO_GC_EVENT_START) {
-                                       gc_info [gen].start_time = time_base;
+                                       thread->gc_start_times [gen] = time_base;
                                        gc_info [gen].count++;
                                } else if (ev == MONO_GC_EVENT_END) {
-                                       tdiff = time_base - gc_info [gen].start_time;
+                                       tdiff = time_base - thread->gc_start_times [gen];
                                        gc_info [gen].total_time += tdiff;
                                        if (tdiff > gc_info [gen].max_time)
                                                gc_info [gen].max_time = tdiff;
@@ -1147,10 +1997,11 @@ decode_buffer (ProfContext *ctx)
                                if (htype > 3)
                                        return 0;
                                handle_info [htype].created++;
+                               handle_info [htype].live++;
                                add_trace_thread (thread, &handle_info [htype].traces, 1);
                                /* FIXME: we don't take into account timing here */
-                               if (handle_info [htype].created > handle_info [htype].max_live)
-                                       handle_info [htype].max_live = handle_info [htype].created;
+                               if (handle_info [htype].live > handle_info [htype].max_live)
+                                       handle_info [htype].max_live = handle_info [htype].live;
                                if (num_tracked_objects)
                                        track_handle (OBJ_ADDR (objdiff), htype, handle);
                                if (debug)
@@ -1160,7 +2011,8 @@ decode_buffer (ProfContext *ctx)
                                uint32_t handle = decode_uleb128 (p, &p);
                                if (htype > 3)
                                        return 0;
-                               handle_info [htype].created--;
+                               handle_info [htype].destroyed ++;
+                               handle_info [htype].live--;
                                if (debug)
                                        fprintf (outfile, "handle (%s) %u destroyed\n", get_handle_name (htype), handle);
                        }
@@ -1205,7 +2057,7 @@ decode_buffer (ProfContext *ctx)
                                        fprintf (outfile, "non-zero flags in thread\n");
                                        return 0;
                                }
-                               nt = get_thread (ctx, ptr_base * ptrdiff);
+                               nt = get_thread (ctx, ptr_base + ptrdiff);
                                nt->name = pstrdup ((char*)p);
                                if (debug)
                                        fprintf (outfile, "thread %p named: %s\n", (void*)(ptr_base + ptrdiff), p);
@@ -1268,15 +2120,20 @@ decode_buffer (ProfContext *ctx)
                        if (subtype == TYPE_JIT) {
                                intptr_t codediff = decode_sleb128 (p, &p);
                                int codelen = decode_uleb128 (p, &p);
+                               MethodDesc *jitted_method;
                                if (debug)
-                                       fprintf (outfile, "jitted method %p (%s), size: %d\n", (void*)(method_base), p, codelen);
-                               add_method (method_base, (char*)p, ptr_base + codediff, codelen);
+                                       fprintf (outfile, "jitted method %p (%s), size: %d, code: %p\n", (void*)(method_base), p, codelen, (void*)(ptr_base + codediff));
+                               jitted_method = add_method (method_base, (char*)p, ptr_base + codediff, codelen);
+                               if (!(time_base >= time_from && time_base < time_to))
+                                       jitted_method->ignore_jit = 1;
                                while (*p) p++;
                                p++;
                        } else {
                                MethodDesc *method;
                                if ((thread_filter && thread_filter != thread->thread_id))
                                        break;
+                               if (!(time_base >= time_from && time_base < time_to))
+                                       break;
                                method = lookup_method (method_base);
                                if (subtype == TYPE_ENTER) {
                                        add_trace_thread (thread, &method->traces, 1);
@@ -1326,6 +2183,20 @@ decode_buffer (ProfContext *ctx)
                                }
                                if (debug && size)
                                        fprintf (outfile, "traced object %p, size %llu (%s), refs: %d\n", (void*)OBJ_ADDR (objdiff), size, cd->name, num);
+                       } else if (subtype == TYPE_HEAP_ROOT) {
+                               uintptr_t num = decode_uleb128 (p + 1, &p);
+                               uintptr_t gc_num = decode_uleb128 (p, &p);
+                               int i;
+                               for (i = 0; i < num; ++i) {
+                                       intptr_t objdiff = decode_sleb128 (p, &p);
+                                       int root_type = decode_uleb128 (p, &p);
+                                       /* we just discard the extra info for now */
+                                       uintptr_t extra_info = decode_uleb128 (p, &p);
+                                       if (debug)
+                                               fprintf (outfile, "object %p is a %s root\n", (void*)OBJ_ADDR (objdiff), get_root_name (root_type));
+                                       if (collect_traces)
+                                               thread_add_root (thread, OBJ_ADDR (objdiff), root_type, extra_info);
+                               }
                        } else if (subtype == TYPE_HEAP_END) {
                                uint64_t tdiff = decode_uleb128 (p + 1, &p);
                                LOG_TIME (time_base, tdiff);
@@ -1333,8 +2204,26 @@ decode_buffer (ProfContext *ctx)
                                if (debug)
                                        fprintf (outfile, "heap shot end\n");
                                if (collect_traces) {
-                                       heap_shot_resolve_reverse_refs (thread->current_heap_shot);
-                                       heap_shot_free_objects (thread->current_heap_shot);
+                                       HeapShot *hs = thread->current_heap_shot;
+                                       if (hs && thread->num_roots) {
+                                               /* transfer the root ownershipt to the heapshot */
+                                               hs->num_roots = thread->num_roots;
+                                               hs->roots = thread->roots;
+                                               hs->roots_extra = thread->roots_extra;
+                                               hs->roots_types = thread->roots_types;
+                                       } else {
+                                               free (thread->roots);
+                                               free (thread->roots_extra);
+                                               free (thread->roots_types);
+                                       }
+                                       thread->num_roots = 0;
+                                       thread->size_roots = 0;
+                                       thread->roots = NULL;
+                                       thread->roots_extra = NULL;
+                                       thread->roots_types = NULL;
+                                       heap_shot_resolve_reverse_refs (hs);
+                                       heap_shot_mark_objects (hs);
+                                       heap_shot_free_objects (hs);
                                }
                                thread->current_heap_shot = NULL;
                        } else if (subtype == TYPE_HEAP_START) {
@@ -1359,6 +2248,8 @@ decode_buffer (ProfContext *ctx)
                        LOG_TIME (time_base, tdiff);
                        time_base += tdiff;
                        record = (!thread_filter || thread_filter == thread->thread_id);
+                       if (!(time_base >= time_from && time_base < time_to))
+                               record = 0;
                        if (event == MONO_PROFILER_MONITOR_CONTENTION) {
                                MonitorDesc *mdesc = lookup_monitor (OBJ_ADDR (objdiff));
                                if (record) {
@@ -1421,6 +2312,8 @@ decode_buffer (ProfContext *ctx)
                        LOG_TIME (time_base, tdiff);
                        time_base += tdiff;
                        record = (!thread_filter || thread_filter == thread->thread_id);
+                       if (!(time_base >= time_from && time_base < time_to))
+                               record = 0;
                        if (subtype == TYPE_CLAUSE) {
                                int clause_type = decode_uleb128 (p, &p);
                                int clause_num = decode_uleb128 (p, &p);
@@ -1454,6 +2347,145 @@ decode_buffer (ProfContext *ctx)
                        }
                        break;
                }
+               case TYPE_SAMPLE: {
+                       int subtype = *p & 0xf0;
+                       if (subtype == TYPE_SAMPLE_HIT) {
+                               int i;
+                               int sample_type = decode_uleb128 (p + 1, &p);
+                               uint64_t tstamp = decode_uleb128 (p, &p);
+                               int count = decode_uleb128 (p, &p);
+                               for (i = 0; i < count; ++i) {
+                                       uintptr_t ip = ptr_base + decode_sleb128 (p, &p);
+                                       if ((tstamp >= time_from && tstamp < time_to))
+                                               add_stat_sample (sample_type, ip);
+                                       if (debug)
+                                               fprintf (outfile, "sample hit, type: %d at %p\n", sample_type, (void*)ip);
+                               }
+                               if (ctx->data_version > 5) {
+                                       count = decode_uleb128 (p, &p);
+                                       for (i = 0; i < count; ++i) {
+                                               MethodDesc *method;
+                                               int64_t ptrdiff = decode_sleb128 (p, &p);
+                                               int il_offset = decode_sleb128 (p, &p);
+                                               int native_offset = decode_sleb128 (p, &p);
+                                               method_base += ptrdiff;
+                                               method = lookup_method (method_base);
+                                               if (debug)
+                                                       fprintf (outfile, "sample hit bt %d: %s at IL offset %d (native: %d)\n", i, method->name, il_offset, native_offset);
+                                       }
+                               }
+                       } else if (subtype == TYPE_SAMPLE_USYM) {
+                               /* un unmanaged symbol description */
+                               uintptr_t addr = ptr_base + decode_sleb128 (p + 1, &p);
+                               uintptr_t size = decode_uleb128 (p, &p);
+                               char *name;
+                               name = pstrdup ((char*)p);
+                               add_unmanaged_symbol (addr, name, size);
+                               if (debug)
+                                       fprintf (outfile, "unmanaged symbol %s at %p\n", name, (void*)addr);
+                               while (*p) p++;
+                               p++;
+                       } else if (subtype == TYPE_SAMPLE_UBIN) {
+                               /* un unmanaged binary loaded in memory */
+                               uint64_t tdiff = decode_uleb128 (p + 1, &p);
+                               uintptr_t addr = decode_sleb128 (p, &p);
+                               uint64_t offset = decode_uleb128 (p, &p);
+                               uintptr_t size = decode_uleb128 (p, &p);
+                               char *name;
+                               LOG_TIME (time_base, tdiff);
+                               time_base += tdiff;
+                               name = pstrdup ((char*)p);
+                               add_unmanaged_binary (addr, name, size);
+                               if (debug)
+                                       fprintf (outfile, "unmanaged binary %s at %p\n", name, (void*)addr);
+                               while (*p) p++;
+                               p++;
+                       } else if (subtype == TYPE_SAMPLE_COUNTERS_DESC) {
+                               uint64_t i, len = decode_uleb128 (p + 1, &p);
+                               for (i = 0; i < len; i++) {
+                                       uint64_t type, unit, variance, index;
+                                       uint64_t section = decode_uleb128 (p, &p);
+                                       char *name = pstrdup ((char*)p);
+                                       while (*p++);
+                                       type = decode_uleb128 (p, &p);
+                                       unit = decode_uleb128 (p, &p);
+                                       variance = decode_uleb128 (p, &p);
+                                       index = decode_uleb128 (p, &p);
+                                       add_counter ((int)section, name, (int)type, (int)unit, (int)variance, (int)index);
+                               }
+                       } else if (subtype == TYPE_SAMPLE_COUNTERS) {
+                               int i;
+                               CounterValue *value, *previous = NULL;
+                               CounterList *list;
+                               uint64_t timestamp = decode_uleb128 (p + 1, &p);
+                               uint64_t time_between = timestamp / 1000 * 1000 * 1000 * 1000 + startup_time;
+                               while (1) {
+                                       uint64_t type, index = decode_uleb128 (p, &p);
+                                       if (index == 0)
+                                               break;
+
+                                       for (list = counters; list; list = list->next) {
+                                               if (list->counter->index == (int)index) {
+                                                       previous = list->counter->values_last;
+                                                       break;
+                                               }
+                                       }
+
+                                       type = decode_uleb128 (p, &p);
+
+                                       value = calloc (1, sizeof (CounterValue));
+                                       value->timestamp = timestamp;
+
+                                       switch (type) {
+                                       case MONO_COUNTER_INT:
+#if SIZEOF_VOID_P == 4
+                                       case MONO_COUNTER_WORD:
+#endif
+                                               value->buffer = malloc (sizeof (int32_t));
+                                               *(int32_t*)value->buffer = (int32_t)decode_sleb128 (p, &p) + (previous ? (*(int32_t*)previous->buffer) : 0);
+                                               break;
+                                       case MONO_COUNTER_UINT:
+                                               value->buffer = malloc (sizeof (uint32_t));
+                                               *(uint32_t*)value->buffer = (uint32_t)decode_uleb128 (p, &p) + (previous ? (*(uint32_t*)previous->buffer) : 0);
+                                               break;
+                                       case MONO_COUNTER_LONG:
+#if SIZEOF_VOID_P == 8
+                                       case MONO_COUNTER_WORD:
+#endif
+                                       case MONO_COUNTER_TIME_INTERVAL:
+                                               value->buffer = malloc (sizeof (int64_t));
+                                               *(int64_t*)value->buffer = (int64_t)decode_sleb128 (p, &p) + (previous ? (*(int64_t*)previous->buffer) : 0);
+                                               break;
+                                       case MONO_COUNTER_ULONG:
+                                               value->buffer = malloc (sizeof (uint64_t));
+                                               *(uint64_t*)value->buffer = (uint64_t)decode_uleb128 (p, &p) + (previous ? (*(uint64_t*)previous->buffer) : 0);
+                                               break;
+                                       case MONO_COUNTER_DOUBLE:
+                                               value->buffer = malloc (sizeof (double));
+#if TARGET_BYTE_ORDER == G_LITTLE_ENDIAN
+                                               for (i = 0; i < sizeof (double); i++)
+#else
+                                               for (i = sizeof (double) - 1; i >= 0; i--)
+#endif
+                                                       value->buffer[i] = *p++;
+                                               break;
+                                       case MONO_COUNTER_STRING:
+                                               if (*p++ == 0) {
+                                                       value->buffer = NULL;
+                                               } else {
+                                                       value->buffer = (unsigned char*) pstrdup ((char*)p);
+                                                       while (*p++);
+                                               }
+                                               break;
+                                       }
+                                       if (time_between >= time_from && time_between <= time_to)
+                                               add_counter_value (index, value);
+                               }
+                       } else {
+                               return 0;
+                       }
+                       break;
+               }
                default:
                        fprintf (outfile, "unhandled profiler event: 0x%x at file offset: %llu + %d (len: %d\n)\n", *p, file_offset, p - ctx->buf, len);
                        exit (1);
@@ -1498,6 +2530,7 @@ load_file (char *name)
        ctx->startup_time = read_int64 (p + 8);
        ctx->timer_overhead = read_int32 (p + 16);
        ctx->pid = read_int32 (p + 24);
+       ctx->port = read_int16 (p + 28);
        return ctx;
 }
 
@@ -1539,6 +2572,8 @@ dump_header (ProfContext *ctx)
        fprintf (outfile, "\tProgram startup: %s", t);
        if (ctx->pid)
                fprintf (outfile, "\tProgram ID: %d\n", ctx->pid);
+       if (ctx->port)
+               fprintf (outfile, "\tServer listening on: %d\n", ctx->port);
 }
 
 static void
@@ -1651,6 +2686,27 @@ dump_gcs (void)
        }
 }
 
+static void
+dump_jit (void)
+{
+       int i;
+       int code_size = 0;
+       int compiled_methods = 0;
+       MethodDesc* m;
+       fprintf (outfile, "\nJIT summary\n");
+       for (i = 0; i < HASH_SIZE; ++i) {
+               m = method_hash [i];
+               for (m = method_hash [i]; m; m = m->next) {
+                       if (!m->code || m->ignore_jit)
+                               continue;
+                       compiled_methods++;
+                       code_size += m->len;
+               }
+       }
+       fprintf (outfile, "\tCompiled methods: %d\n", compiled_methods);
+       fprintf (outfile, "\tGenerated code size: %d\n", code_size);
+}
+
 static void
 dump_allocations (void)
 {
@@ -1842,8 +2898,8 @@ heap_shot_summary (HeapShot *hs, int hs_num, HeapShot *last_hs)
        }
        hs->sorted = sorted;
        qsort (sorted, ccount, sizeof (void*), compare_heap_class);
-       fprintf (outfile, "\n\tHeap shot %d at %.3f secs: size: %llu, object count: %llu, class count: %d\n",
-               hs_num, (hs->timestamp - startup_time)/1000000000.0, size, count, ccount);
+       fprintf (outfile, "\n\tHeap shot %d at %.3f secs: size: %llu, object count: %llu, class count: %d, roots: %d\n",
+               hs_num, (hs->timestamp - startup_time)/1000000000.0, size, count, ccount, hs->num_roots);
        if (!verbose && ccount > 30)
                ccount = 30;
        fprintf (outfile, "\t%10s %10s %8s Class name\n", "Bytes", "Count", "Average");
@@ -1872,6 +2928,8 @@ heap_shot_summary (HeapShot *hs, int hs_num, HeapShot *last_hs)
                }
                assert (cd->rev_count == k);
                qsort (rev_sorted, cd->rev_count, sizeof (HeapClassRevRef), compare_rev_class);
+               if (cd->root_references)
+                       fprintf (outfile, "\t\t%d root references (%d pinning)\n", cd->root_references, cd->pinned_references);
                dump_rev_claases (rev_sorted, cd->rev_count);
                free (rev_sorted);
        }
@@ -1926,7 +2984,7 @@ flush_context (ProfContext *ctx)
        }
 }
 
-static const char *reports = "header,gc,alloc,call,metadata,exception,monitor,thread,heapshot";
+static const char *reports = "header,jit,gc,sample,alloc,call,metadata,exception,monitor,thread,heapshot,counters";
 
 static const char*
 match_option (const char *p, const char *opt)
@@ -1961,6 +3019,11 @@ print_reports (ProfContext *ctx, const char *reps, int parse_only)
                                dump_gcs ();
                        continue;
                }
+               if ((opt = match_option (p, "jit")) != p) {
+                       if (!parse_only)
+                               dump_jit ();
+                       continue;
+               }
                if ((opt = match_option (p, "alloc")) != p) {
                        if (!parse_only)
                                dump_allocations ();
@@ -1991,6 +3054,16 @@ print_reports (ProfContext *ctx, const char *reps, int parse_only)
                                dump_heap_shots ();
                        continue;
                }
+               if ((opt = match_option (p, "sample")) != p) {
+                       if (!parse_only)
+                               dump_samples ();
+                       continue;
+               }
+               if ((opt = match_option (p, "counters")) != p) {
+                       if (!parse_only)
+                               dump_counters ();
+                       continue;
+               }
                return 0;
        }
        return 1;
@@ -2025,6 +3098,8 @@ usage (void)
        printf ("\t                     %s\n", reports);
        printf ("\t--method-sort=MODE   sort methods according to MODE: total, self, calls\n");
        printf ("\t--alloc-sort=MODE    sort allocations according to MODE: bytes, count\n");
+       printf ("\t--counters-sort=MODE sort counters according to MODE: time, category\n");
+       printf ("\t                     only accessible in verbose mode\n");
        printf ("\t--track=OB1[,OB2...] track what happens to objects OBJ1, O2 etc.\n");
        printf ("\t--find=FINDSPEC      find and track objects matching FINFSPEC, where FINDSPEC is:\n");
        printf ("\t                     S:minimum_size or T:partial_name\n");
@@ -2068,6 +3143,16 @@ main (int argc, char *argv[])
                                usage ();
                                return 1;
                        }
+               } else if (strncmp ("--counters-sort=", argv [i], 16) == 0) {
+                       const char *val = argv [i] + 16;
+                       if (strcmp (val, "time") == 0) {
+                               counters_sort_mode = COUNTERS_SORT_TIME;
+                       } else if (strcmp (val, "category") == 0) {
+                               counters_sort_mode = COUNTERS_SORT_CATEGORY;
+                       } else {
+                               usage ();
+                               return 1;
+                       }
                } else if (strncmp ("--reports=", argv [i], 10) == 0) {
                        const char *val = argv [i] + 10;
                        if (!print_reports (NULL, val, 1)) {
@@ -2127,6 +3212,7 @@ main (int argc, char *argv[])
                        }
                        time_from = from_secs * 1000000000;
                        time_to = to_secs * 1000000000;
+                       use_time_filter = 1;
                } else if (strcmp ("--verbose", argv [i]) == 0) {
                        verbose++;
                } else if (strcmp ("--traces", argv [i]) == 0) {