Tue Mar 16 11:20:14 CET 2010 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / profiler / mono-profiler-logging.c
index 832551d6cc9395a116b77c224f379a79cf666b77..9ed3cb8cde298fd011036322b3b09cf45922b004 100644 (file)
@@ -1,10 +1,21 @@
+/*
+ * mono-profiler-logging.c: Logging profiler for Mono.
+ *
+ * Author:
+ *   Massimiliano Mantione (massi@ximian.com)
+ *
+ * Copyright 2008-2009 Novell, Inc (http://www.novell.com)
+ */
 #include <config.h>
 #include <mono/metadata/profiler.h>
 #include <mono/metadata/class.h>
+#include <mono/metadata/metadata-internals.h>
+#include <mono/metadata/class-internals.h>
 #include <mono/metadata/assembly.h>
 #include <mono/metadata/loader.h>
 #include <mono/metadata/threads.h>
 #include <mono/metadata/debug-helpers.h>
+#include <mono/metadata/mono-gc.h>
 #include <mono/io-layer/atomic.h>
 #include <string.h>
 #include <stdio.h>
 
 #include <dlfcn.h>
 
+#include <sys/types.h> 
+#include <sys/socket.h>
+#include <netinet/in.h>
+
 #define HAS_OPROFILE 0
 
 #if (HAS_OPROFILE)
@@ -31,9 +46,23 @@ typedef enum {
        MONO_PROFILER_FILE_BLOCK_KIND_UNLOADED = 5,
        MONO_PROFILER_FILE_BLOCK_KIND_EVENTS = 6,
        MONO_PROFILER_FILE_BLOCK_KIND_STATISTICAL = 7,
-       MONO_PROFILER_FILE_BLOCK_KIND_HEAP = 8
+       MONO_PROFILER_FILE_BLOCK_KIND_HEAP_DATA = 8,
+       MONO_PROFILER_FILE_BLOCK_KIND_HEAP_SUMMARY = 9,
+       MONO_PROFILER_FILE_BLOCK_KIND_DIRECTIVES = 10
 } MonoProfilerFileBlockKind;
 
+typedef enum {
+       MONO_PROFILER_DIRECTIVE_END = 0,
+       MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_CALLER = 1,
+       MONO_PROFILER_DIRECTIVE_ALLOCATIONS_HAVE_STACK = 2,
+       MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_ID = 3,
+       MONO_PROFILER_DIRECTIVE_LOADED_ELEMENTS_CARRY_ID = 4,
+       MONO_PROFILER_DIRECTIVE_CLASSES_CARRY_ASSEMBLY_ID = 5,
+       MONO_PROFILER_DIRECTIVE_METHODS_CARRY_WRAPPER_FLAG = 6,
+       MONO_PROFILER_DIRECTIVE_LAST
+} MonoProfilerDirectives;
+
+
 #define MONO_PROFILER_LOADED_EVENT_MODULE     1
 #define MONO_PROFILER_LOADED_EVENT_ASSEMBLY   2
 #define MONO_PROFILER_LOADED_EVENT_APPDOMAIN  4
@@ -52,25 +81,27 @@ typedef struct _ProfilerEventData {
                gsize number;
        } data;
        unsigned int data_type:2;
-       unsigned int code:3;
+       unsigned int code:4;
        unsigned int kind:1;
-       unsigned int value:26;
+       unsigned int value:25;
 } ProfilerEventData;
 
-#define EXTENDED_EVENT_VALUE_SHIFT (26)
-#define MAX_EVENT_VALUE ((1<<EXTENDED_EVENT_VALUE_SHIFT)-1)
-#define MAX_EXTENDED_EVENT_VALUE ((((guint64))MAX_EVENT_VALUE<<32)|((guint64)0xffffffff))
+#define EVENT_VALUE_BITS (25)
+#define MAX_EVENT_VALUE ((1<<EVENT_VALUE_BITS)-1)
 
 typedef enum {
        MONO_PROFILER_EVENT_METHOD_JIT = 0,
        MONO_PROFILER_EVENT_METHOD_FREED = 1,
-       MONO_PROFILER_EVENT_METHOD_CALL = 2
+       MONO_PROFILER_EVENT_METHOD_CALL = 2,
+       MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER = 3,
+       MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER = 4
 } MonoProfilerMethodEvents;
 typedef enum {
        MONO_PROFILER_EVENT_CLASS_LOAD = 0,
        MONO_PROFILER_EVENT_CLASS_UNLOAD = 1,
        MONO_PROFILER_EVENT_CLASS_EXCEPTION = 2,
-       MONO_PROFILER_EVENT_CLASS_ALLOCATION = 3
+       MONO_PROFILER_EVENT_CLASS_MONITOR = 3,
+       MONO_PROFILER_EVENT_CLASS_ALLOCATION = 4
 } MonoProfilerClassEvents;
 typedef enum {
        MONO_PROFILER_EVENT_RESULT_SUCCESS = 0,
@@ -84,7 +115,11 @@ typedef enum {
        MONO_PROFILER_EVENT_GC_SWEEP = 4,
        MONO_PROFILER_EVENT_GC_RESIZE = 5,
        MONO_PROFILER_EVENT_GC_STOP_WORLD = 6,
-       MONO_PROFILER_EVENT_GC_START_WORLD = 7
+       MONO_PROFILER_EVENT_GC_START_WORLD = 7,
+       MONO_PROFILER_EVENT_JIT_TIME_ALLOCATION = 8,
+       MONO_PROFILER_EVENT_STACK_SECTION = 9,
+       MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID = 10,
+       MONO_PROFILER_EVENT_OBJECT_MONITOR = 11
 } MonoProfilerEvents;
 typedef enum {
        MONO_PROFILER_EVENT_KIND_START = 0,
@@ -96,17 +131,130 @@ typedef enum {
        gettimeofday (&current_time, NULL);\
        (t) = (((guint64)current_time.tv_sec) * 1000000) + current_time.tv_usec;\
 } while (0)
-#if 1
-#define MONO_PROFILER_GET_CURRENT_COUNTER(c) MONO_PROFILER_GET_CURRENT_TIME ((c));
+
+static gboolean use_fast_timer = FALSE;
+
+#if (defined(__i386__) || defined(__x86_64__)) && ! defined(HOST_WIN32)
+
+#if defined(__i386__)
+static const guchar cpuid_impl [] = {
+       0x55,                           /* push   %ebp */
+       0x89, 0xe5,                     /* mov    %esp,%ebp */
+       0x53,                           /* push   %ebx */
+       0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
+       0x0f, 0xa2,                     /* cpuid   */
+       0x50,                           /* push   %eax */
+       0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
+       0x89, 0x18,                     /* mov    %ebx,(%eax) */
+       0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
+       0x89, 0x08,                     /* mov    %ecx,(%eax) */
+       0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
+       0x89, 0x10,                     /* mov    %edx,(%eax) */
+       0x58,                           /* pop    %eax */
+       0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
+       0x89, 0x02,                     /* mov    %eax,(%edx) */
+       0x5b,                           /* pop    %ebx */
+       0xc9,                           /* leave   */
+       0xc3,                           /* ret     */
+};
+
+typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
+
+static int 
+cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx) {
+       int have_cpuid = 0;
+#ifndef _MSC_VER
+       __asm__  __volatile__ (
+               "pushfl\n"
+               "popl %%eax\n"
+               "movl %%eax, %%edx\n"
+               "xorl $0x200000, %%eax\n"
+               "pushl %%eax\n"
+               "popfl\n"
+               "pushfl\n"
+               "popl %%eax\n"
+               "xorl %%edx, %%eax\n"
+               "andl $0x200000, %%eax\n"
+               "movl %%eax, %0"
+               : "=r" (have_cpuid)
+               :
+               : "%eax", "%edx"
+       );
 #else
+       __asm {
+               pushfd
+               pop eax
+               mov edx, eax
+               xor eax, 0x200000
+               push eax
+               popfd
+               pushfd
+               pop eax
+               xor eax, edx
+               and eax, 0x200000
+               mov have_cpuid, eax
+       }
+#endif
+       if (have_cpuid) {
+               CpuidFunc func = (CpuidFunc) cpuid_impl;
+               func (id, p_eax, p_ebx, p_ecx, p_edx);
+               /*
+                * We use this approach because of issues with gcc and pic code, see:
+                * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
+               __asm__ __volatile__ ("cpuid"
+                       : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
+                       : "a" (id));
+               */
+               return 1;
+       }
+       return 0;
+}
+
+static void detect_fast_timer (void) {
+       int p_eax, p_ebx, p_ecx, p_edx;
+       
+       if (cpuid (0x1, &p_eax, &p_ebx, &p_ecx, &p_edx)) {
+               if (p_edx & 0x10) {
+                       use_fast_timer = TRUE;
+               } else {
+                       use_fast_timer = FALSE;
+               }
+       } else {
+               use_fast_timer = FALSE;
+       }
+}
+#endif
+
+#if defined(__x86_64__)
+static void detect_fast_timer (void) {
+       guint32 op = 0x1;
+       guint32 eax,ebx,ecx,edx;
+       __asm__ __volatile__ ("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(op));
+       if (edx & 0x10) {
+               use_fast_timer = TRUE;
+       } else {
+               use_fast_timer = FALSE;
+       }
+}
+#endif
+
 static __inline__ guint64 rdtsc(void) {
        guint32 hi, lo;
        __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
        return ((guint64) lo) | (((guint64) hi) << 32);
 }
 #define MONO_PROFILER_GET_CURRENT_COUNTER(c) {\
-       (c) = rdtsc ();\
+       if (use_fast_timer) {\
+               (c) = rdtsc ();\
+       } else {\
+               MONO_PROFILER_GET_CURRENT_TIME ((c));\
+       }\
 } while (0)
+#else
+static void detect_fast_timer (void) {
+       use_fast_timer = FALSE;
+}
+#define MONO_PROFILER_GET_CURRENT_COUNTER(c) MONO_PROFILER_GET_CURRENT_TIME ((c))
 #endif
 
 
@@ -168,11 +316,48 @@ typedef struct _LoadedElement {
        guint64 load_end_counter;
        guint64 unload_start_counter;
        guint64 unload_end_counter;
+       guint32 id;
        guint8 loaded;
        guint8 load_written;
        guint8 unloaded;
        guint8 unload_written;
 } LoadedElement;
+struct _ProfilerCodeBufferArray;
+typedef struct _ProfilerCodeBuffer {
+       gpointer start;
+       gpointer end;
+       struct {
+               union {
+                       MonoMethod *method;
+                       MonoClass *klass;
+                       void *data;
+                       struct _ProfilerCodeBufferArray *sub_buffers;
+               } data;
+               guint16 value;
+               guint16 type;
+       } info;
+} ProfilerCodeBuffer;
+
+#define PROFILER_CODE_BUFFER_ARRAY_SIZE 64
+typedef struct _ProfilerCodeBufferArray {
+       int level;
+       int number_of_buffers;
+       ProfilerCodeBuffer buffers [PROFILER_CODE_BUFFER_ARRAY_SIZE];
+} ProfilerCodeBufferArray;
+
+typedef struct _ProfilerCodeChunk {
+       gpointer start;
+       gpointer end;
+       gboolean destroyed;
+       ProfilerCodeBufferArray *buffers;
+} ProfilerCodeChunk;
+
+typedef struct _ProfilerCodeChunks {
+       int capacity;
+       int number_of_chunks;;
+       ProfilerCodeChunk *chunks;
+} ProfilerCodeChunks;
+
 
 #define PROFILER_HEAP_SHOT_OBJECT_BUFFER_SIZE 1024
 #define PROFILER_HEAP_SHOT_HEAP_BUFFER_SIZE 4096
@@ -207,6 +392,22 @@ typedef struct _ProfilerHeapShotWriteBuffer {
        gpointer buffer [PROFILER_HEAP_SHOT_WRITE_BUFFER_SIZE];
 } ProfilerHeapShotWriteBuffer;
 
+typedef struct _ProfilerHeapShotClassSummary {
+       struct {
+               guint32 instances;
+               guint32 bytes;
+       } reachable;
+       struct {
+               guint32 instances;
+               guint32 bytes;
+       } unreachable;
+} ProfilerHeapShotClassSummary;
+
+typedef struct _ProfilerHeapShotCollectionSummary {
+       ProfilerHeapShotClassSummary *per_class_data;
+       guint32 capacity;
+} ProfilerHeapShotCollectionSummary;
+
 typedef struct _ProfilerHeapShotWriteJob {
        struct _ProfilerHeapShotWriteJob *next;
        struct _ProfilerHeapShotWriteJob *next_unwritten;
@@ -216,16 +417,30 @@ typedef struct _ProfilerHeapShotWriteJob {
        ProfilerHeapShotWriteBuffer *buffers;
        ProfilerHeapShotWriteBuffer **last_next;
        guint32 full_buffers;
-       gboolean heap_shot_was_signalled;
+       gboolean heap_shot_was_requested;
        guint64 start_counter;
        guint64 start_time;
        guint64 end_counter;
        guint64 end_time;
+       guint32 collection;
+       ProfilerHeapShotCollectionSummary summary;
+       gboolean dump_heap_data;
 } ProfilerHeapShotWriteJob;
 
+typedef struct _ProfilerThreadStack {
+       guint32 capacity;
+       guint32 top;
+       guint32 last_saved_top;
+       guint32 last_written_frame;
+       MonoMethod **stack;
+       guint8 *method_is_jitted;
+       guint32 *written_frames;
+} ProfilerThreadStack;
+
 typedef struct _ProfilerPerThreadData {
        ProfilerEventData *events;
        ProfilerEventData *next_free_event;
+       ProfilerEventData *next_unreserved_event;
        ProfilerEventData *end_event;
        ProfilerEventData *first_unwritten_event;
        ProfilerEventData *first_unmapped_event;
@@ -233,16 +448,32 @@ typedef struct _ProfilerPerThreadData {
        guint64 last_event_counter;
        gsize thread_id;
        ProfilerHeapShotObjectBuffer *heap_shot_object_buffers;
+       ProfilerThreadStack stack;
        struct _ProfilerPerThreadData* next;
 } ProfilerPerThreadData;
 
+typedef struct _ProfilerStatisticalHit {
+       gpointer *address;
+       MonoDomain *domain;
+} ProfilerStatisticalHit;
+
 typedef struct _ProfilerStatisticalData {
-       gpointer *addresses;
-       int next_free_index;
-       int end_index;
-       int first_unwritten_index;
+       ProfilerStatisticalHit *hits;
+       unsigned int next_free_index;
+       unsigned int end_index;
+       unsigned int first_unwritten_index;
 } ProfilerStatisticalData;
 
+typedef struct _ProfilerUnmanagedSymbol {
+       guint32 offset;
+       guint32 size;
+       guint32 id;
+       guint32 index;
+} ProfilerUnmanagedSymbol;
+
+struct _ProfilerExecutableFile;
+struct _ProfilerExecutableFileSectionRegion;
+
 typedef struct _ProfilerExecutableMemoryRegionData {
        gpointer start;
        gpointer end;
@@ -250,6 +481,12 @@ typedef struct _ProfilerExecutableMemoryRegionData {
        char *file_name;
        guint32 id;
        gboolean is_new;
+       
+       struct _ProfilerExecutableFile *file;
+       struct _ProfilerExecutableFileSectionRegion *file_region_reference;
+       guint32 symbols_count;
+       guint32 symbols_capacity;
+       ProfilerUnmanagedSymbol *symbols;
 } ProfilerExecutableMemoryRegionData;
 
 typedef struct _ProfilerExecutableMemoryRegions {
@@ -257,24 +494,188 @@ typedef struct _ProfilerExecutableMemoryRegions {
        guint32 regions_capacity;
        guint32 regions_count;
        guint32 next_id;
+       guint32 next_unmanaged_function_id;
 } ProfilerExecutableMemoryRegions;
 
-typedef struct _ProfilerUnmanagedFunction {
-       guint32 id;
-       guint32 hits;
-       char *name;
-       struct _ProfilerUnmanagedFunction *next_unwritten;
-} ProfilerUnmanagedFunction;
+/* Start of ELF definitions */
+#define EI_NIDENT 16
+typedef guint16 ElfHalf;
+typedef guint32 ElfWord;
+typedef gsize ElfAddr;
+typedef gsize ElfOff;
+
+typedef struct {
+       unsigned char e_ident[EI_NIDENT];
+       ElfHalf e_type;
+       ElfHalf e_machine;
+       ElfWord e_version;
+       ElfAddr e_entry;
+       ElfOff  e_phoff;
+       ElfOff  e_shoff; // Section header table
+       ElfWord e_flags;
+       ElfHalf e_ehsize; // Header size
+       ElfHalf e_phentsize;
+       ElfHalf e_phnum;
+       ElfHalf e_shentsize; // Section header entry size
+       ElfHalf e_shnum; // Section header entries number
+       ElfHalf e_shstrndx; // String table index
+} ElfHeader;
+
+#if (SIZEOF_VOID_P == 4)
+typedef struct {
+       ElfWord sh_name;
+       ElfWord sh_type;
+       ElfWord sh_flags;
+       ElfAddr sh_addr; // Address in memory
+       ElfOff  sh_offset; // Offset in file
+       ElfWord sh_size;
+       ElfWord sh_link;
+       ElfWord sh_info;
+       ElfWord sh_addralign;
+       ElfWord sh_entsize;
+} ElfSection;
+typedef struct {
+       ElfWord       st_name;
+       ElfAddr       st_value;
+       ElfWord       st_size;
+       unsigned char st_info; // Use ELF32_ST_TYPE to get symbol type
+       unsigned char st_other;
+       ElfHalf       st_shndx; // Or one of SHN_ABS, SHN_COMMON or SHN_UNDEF.
+} ElfSymbol;
+#elif (SIZEOF_VOID_P == 8)
+typedef struct {
+       ElfWord sh_name;
+       ElfWord sh_type;
+       ElfOff sh_flags;
+       ElfAddr sh_addr; // Address in memory
+       ElfOff  sh_offset; // Offset in file
+       ElfOff sh_size;
+       ElfWord sh_link;
+       ElfWord sh_info;
+       ElfOff sh_addralign;
+       ElfOff sh_entsize;
+} ElfSection;
+typedef struct {
+       ElfWord       st_name;
+       unsigned char st_info; // Use ELF_ST_TYPE to get symbol type
+       unsigned char st_other;
+       ElfHalf       st_shndx; // Or one of SHN_ABS, SHN_COMMON or SHN_UNDEF.
+       ElfAddr       st_value;
+       ElfAddr       st_size;
+} ElfSymbol;
+#else
+#error Bad size of void pointer
+#endif
+
+
+#define ELF_ST_BIND(i)   ((i)>>4)
+#define ELF_ST_TYPE(i)   ((i)&0xf)
+
+
+typedef enum {
+       EI_MAG0 = 0,
+       EI_MAG1 = 1,
+       EI_MAG2 = 2,
+       EI_MAG3 = 3,
+       EI_CLASS = 4,
+       EI_DATA = 5
+} ElfIdentFields;
+
+typedef enum {
+       ELF_FILE_TYPE_NONE = 0,
+       ELF_FILE_TYPE_REL = 1,
+       ELF_FILE_TYPE_EXEC = 2,
+       ELF_FILE_TYPE_DYN = 3,
+       ELF_FILE_TYPE_CORE = 4
+} ElfFileType;
+
+typedef enum {
+       ELF_CLASS_NONE = 0,
+       ELF_CLASS_32 = 1,
+       ELF_CLASS_64 = 2
+} ElfIdentClass;
+
+typedef enum {
+       ELF_DATA_NONE = 0,
+       ELF_DATA_LSB = 1,
+       ELF_DATA_MSB = 2
+} ElfIdentData;
+
+typedef enum {
+       ELF_SHT_NULL = 0,
+       ELF_SHT_PROGBITS = 1,
+       ELF_SHT_SYMTAB = 2,
+       ELF_SHT_STRTAB = 3,
+       ELF_SHT_RELA = 4,
+       ELF_SHT_HASH = 5,
+       ELF_SHT_DYNAMIC = 6,
+       ELF_SHT_NOTE = 7,
+       ELF_SHT_NOBITS = 8,
+       ELF_SHT_REL = 9,
+       ELF_SHT_SHLIB = 10,
+       ELF_SHT_DYNSYM = 11
+} ElfSectionType;
+
+typedef enum {
+       ELF_STT_NOTYPE = 0,
+       ELF_STT_OBJECT = 1,
+       ELF_STT_FUNC = 2,
+       ELF_STT_SECTION = 3,
+       ELF_STT_FILE = 4
+} ElfSymbolType;
+
+typedef enum {
+       ELF_SHF_WRITE = 1,
+       ELF_SHF_ALLOC = 2,
+       ELF_SHF_EXECINSTR = 4,
+} ElfSectionFlags;
+
+#define ELF_SHN_UNDEF       0
+#define ELF_SHN_LORESERVE   0xff00
+#define ELF_SHN_LOPROC      0xff00
+#define ELF_SHN_HIPROC      0xff1f
+#define ELF_SHN_ABS         0xfff1
+#define ELF_SHN_COMMON      0xfff2
+#define ELF_SHN_HIRESERVE   0xffff
+/* End of ELF definitions */
+
+typedef struct _ProfilerExecutableFileSectionRegion {
+       ProfilerExecutableMemoryRegionData *region;
+       guint8 *section_address;
+       gsize section_offset;
+} ProfilerExecutableFileSectionRegion;
+
+typedef struct _ProfilerExecutableFile {
+       guint32 reference_count;
+       
+       /* Used for mmap and munmap */
+       int fd;
+       guint8 *data;
+       size_t length;
+       
+       /* File data */
+       ElfHeader *header;
+       guint8 *symbols_start;
+       guint32 symbols_count;
+       guint32 symbol_size;
+       const char *symbols_string_table;
+       const char *main_string_table;
+       
+       ProfilerExecutableFileSectionRegion *section_regions;
+       
+       struct _ProfilerExecutableFile *next_new_file;
+} ProfilerExecutableFile;
 
-typedef struct _ProfilerUnmanagedFunctions {
+typedef struct _ProfilerExecutableFiles {
        GHashTable *table;
-       ProfilerUnmanagedFunction *unwritten_queue;
-       ProfilerUnmanagedFunction *unwritten_queue_end;
-       guint32 next_id;
-       ProfilerUnmanagedFunction actual_unwritten_queue_end;
-} ProfilerUnmanagedFunctions;
+       ProfilerExecutableFile *new_files;
+} ProfilerExecutableFiles;
 
-#ifndef PLATFORM_WIN32
+
+#define CLEANUP_WRITER_THREAD() do {profiler->writer_thread_terminated = TRUE;} while (0)
+#define CHECK_WRITER_THREAD() (! profiler->writer_thread_terminated)
+
+#ifndef HOST_WIN32
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/stat.h>
@@ -283,6 +684,12 @@ typedef struct _ProfilerUnmanagedFunctions {
 #include <pthread.h>
 #include <semaphore.h>
 
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
 #define MUTEX_TYPE pthread_mutex_t
 #define INITIALIZE_PROFILER_MUTEX() pthread_mutex_init (&(profiler->mutex), NULL)
 #define DELETE_PROFILER_MUTEX() pthread_mutex_destroy (&(profiler->mutex))
@@ -291,8 +698,13 @@ typedef struct _ProfilerUnmanagedFunctions {
 
 #define THREAD_TYPE pthread_t
 #define CREATE_WRITER_THREAD(f) pthread_create (&(profiler->data_writer_thread), NULL, ((void*(*)(void*))f), NULL)
+#define CREATE_USER_THREAD(f) pthread_create (&(profiler->user_thread), NULL, ((void*(*)(void*))f), NULL)
 #define EXIT_THREAD() pthread_exit (NULL);
-#define WAIT_WRITER_THREAD() pthread_join (profiler->data_writer_thread, NULL)
+#define WAIT_WRITER_THREAD() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+               pthread_join (profiler->data_writer_thread, NULL);\
+       }\
+} while (0)
 #define CURRENT_THREAD_ID() (gsize) pthread_self ()
 
 #ifndef HAVE_KW_THREAD
@@ -309,23 +721,39 @@ make_pthread_profiler_key (void) {
 #endif
 
 #define EVENT_TYPE sem_t
-#define WRITER_EVENT_INIT() (void) sem_init (&(profiler->statistical_data_writer_event), 0, 0)
-#define WRITER_EVENT_DESTROY() (void) sem_destroy (&(profiler->statistical_data_writer_event))
-#define WRITER_EVENT_WAIT() (void) sem_wait (&(profiler->statistical_data_writer_event))
-#define WRITER_EVENT_RAISE() (void) sem_post (&(profiler->statistical_data_writer_event))
+#define WRITER_EVENT_INIT() do {\
+       sem_init (&(profiler->enable_data_writer_event), 0, 0);\
+       sem_init (&(profiler->wake_data_writer_event), 0, 0);\
+       sem_init (&(profiler->done_data_writer_event), 0, 0);\
+} while (0)
+#define WRITER_EVENT_DESTROY() do {\
+       sem_destroy (&(profiler->enable_data_writer_event));\
+       sem_destroy (&(profiler->wake_data_writer_event));\
+       sem_destroy (&(profiler->done_data_writer_event));\
+} while (0)
+#define WRITER_EVENT_WAIT() (void) sem_wait (&(profiler->wake_data_writer_event))
+#define WRITER_EVENT_RAISE() (void) sem_post (&(profiler->wake_data_writer_event))
+#define WRITER_EVENT_ENABLE_WAIT() (void) sem_wait (&(profiler->enable_data_writer_event))
+#define WRITER_EVENT_ENABLE_RAISE() (void) sem_post (&(profiler->enable_data_writer_event))
+#define WRITER_EVENT_DONE_WAIT() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+               (void) sem_wait (&(profiler->done_data_writer_event));\
+       }\
+} while (0)
+#define WRITER_EVENT_DONE_RAISE() (void) sem_post (&(profiler->done_data_writer_event))
 
 #if 0
 #define FILE_HANDLE_TYPE FILE*
 #define OPEN_FILE() profiler->file = fopen (profiler->file_name, "wb");
 #define WRITE_BUFFER(b,s) fwrite ((b), 1, (s), profiler->file)
 #define FLUSH_FILE() fflush (profiler->file)
-#define CLOSE_FILE() fclose (profiler->file);
+#define CLOSE_FILE() fclose (profiler->file)
 #else
 #define FILE_HANDLE_TYPE int
 #define OPEN_FILE() profiler->file = open (profiler->file_name, O_WRONLY|O_CREAT|O_TRUNC, 0664);
 #define WRITE_BUFFER(b,s) write (profiler->file, (b), (s))
-#define FLUSH_FILE()
-#define CLOSE_FILE() close (profiler->file);
+#define FLUSH_FILE() fsync (profiler->file)
+#define CLOSE_FILE() close (profiler->file)
 #endif
 
 #else
@@ -341,7 +769,11 @@ make_pthread_profiler_key (void) {
 #define THREAD_TYPE HANDLE
 #define CREATE_WRITER_THREAD(f) CreateThread (NULL, (1*1024*1024), (f), NULL, 0, NULL);
 #define EXIT_THREAD() ExitThread (0);
-#define WAIT_WRITER_THREAD() WaitForSingleObject (profiler->data_writer_thread, INFINITE)
+#define WAIT_WRITER_THREAD() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+                WaitForSingleObject (profiler->data_writer_thread, INFINITE);\
+       }\
+} while (0)
 #define CURRENT_THREAD_ID() (gsize) GetCurrentThreadId ()
 
 #ifndef HAVE_KW_THREAD
@@ -353,10 +785,27 @@ static guint32 profiler_thread_id = -1;
 #endif
 
 #define EVENT_TYPE HANDLE
-#define WRITER_EVENT_INIT() profiler->statistical_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL)
+#define WRITER_EVENT_INIT() (void) do {\
+       profiler->enable_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL);\
+       profiler->wake_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL);\
+       profiler->done_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL);\
+} while (0)
 #define WRITER_EVENT_DESTROY() CloseHandle (profiler->statistical_data_writer_event)
-#define WRITER_EVENT_WAIT() WaitForSingleObject (profiler->statistical_data_writer_event, INFINITE)
-#define WRITER_EVENT_RAISE() SetEvent (profiler->statistical_data_writer_event)
+#define WRITER_EVENT_INIT() (void) do {\
+       CloseHandle (profiler->enable_data_writer_event);\
+       CloseHandle (profiler->wake_data_writer_event);\
+       CloseHandle (profiler->done_data_writer_event);\
+} while (0)
+#define WRITER_EVENT_WAIT() WaitForSingleObject (profiler->wake_data_writer_event, INFINITE)
+#define WRITER_EVENT_RAISE() SetEvent (profiler->wake_data_writer_event)
+#define WRITER_EVENT_ENABLE_WAIT() WaitForSingleObject (profiler->enable_data_writer_event, INFINITE)
+#define WRITER_EVENT_ENABLE_RAISE() SetEvent (profiler->enable_data_writer_event)
+#define WRITER_EVENT_DONE_WAIT() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+               WaitForSingleObject (profiler->done_data_writer_event, INFINITE);\
+       }\
+} while (0)
+#define WRITER_EVENT_DONE_RAISE() SetEvent (profiler->done_data_writer_event)
 
 #define FILE_HANDLE_TYPE FILE*
 #define OPEN_FILE() profiler->file = fopen (profiler->file_name, "wb");
@@ -390,14 +839,20 @@ static __thread ProfilerPerThreadData * tls_profiler_per_thread_data;
 #define PROFILER_FILE_WRITE_BUFFER_SIZE (profiler->write_buffer_size)
 typedef struct _ProfilerFileWriteBuffer {
        struct _ProfilerFileWriteBuffer *next;
-       guint8 buffer [];
+       guint8 buffer [MONO_ZERO_LEN_ARRAY];
 } ProfilerFileWriteBuffer;
 
+#define CHECK_PROFILER_ENABLED() do {\
+       if (! profiler->profiler_enabled)\
+               return;\
+} while (0)
 struct _MonoProfiler {
        MUTEX_TYPE mutex;
        
        MonoProfileFlags flags;
+       gboolean profiler_enabled;
        char *file_name;
+       char *file_name_suffix;
        FILE_HANDLE_TYPE file;
        
        guint64 start_time;
@@ -405,9 +860,12 @@ struct _MonoProfiler {
        guint64 end_time;
        guint64 end_counter;
        
+       guint64 last_header_counter;
+       
        MethodIdMapping *methods;
        ClassIdMapping *classes;
        
+       guint32 loaded_element_next_free_id;
        GHashTable *loaded_assemblies;
        GHashTable *loaded_modules;
        GHashTable *loaded_appdomains;
@@ -418,11 +876,18 @@ struct _MonoProfiler {
        ProfilerStatisticalData *statistical_data;
        ProfilerStatisticalData *statistical_data_ready;
        ProfilerStatisticalData *statistical_data_second_buffer;
-       ProfilerUnmanagedFunctions unmanaged_functions;
+       int statistical_call_chain_depth;
+       MonoProfilerCallChainStrategy statistical_call_chain_strategy;
+       
+       ProfilerCodeChunks code_chunks;
+       
        THREAD_TYPE data_writer_thread;
-       EVENT_TYPE statistical_data_writer_event;
+       THREAD_TYPE user_thread;
+       EVENT_TYPE enable_data_writer_event;
+       EVENT_TYPE wake_data_writer_event;
+       EVENT_TYPE done_data_writer_event;
        gboolean terminate_writer_thread;
-       gboolean detach_writer_thread;
+       gboolean writer_thread_terminated;
        
        ProfilerFileWriteBuffer *write_buffers;
        ProfilerFileWriteBuffer *current_write_buffer;
@@ -433,12 +898,14 @@ struct _MonoProfiler {
        ProfilerHeapShotWriteJob *heap_shot_write_jobs;
        ProfilerHeapShotHeapBuffers heap;
        
-       char *heap_shot_command_file_name;
+       int command_port;
+       
        int dump_next_heap_snapshots;
-       guint64 heap_shot_command_file_access_time;
-       gboolean heap_shot_was_signalled;
+       gboolean heap_shot_was_requested;
+       guint32 garbage_collection_counter;
        
        ProfilerExecutableMemoryRegions *executable_regions;
+       ProfilerExecutableFiles executable_files;
        
        struct {
 #if (HAS_OPROFILE)
@@ -446,11 +913,36 @@ struct _MonoProfiler {
 #endif
                gboolean jit_time;
                gboolean unreachable_objects;
+               gboolean collection_summary;
+               gboolean report_gc_events;
                gboolean heap_shot;
+               gboolean track_stack;
+               gboolean track_calls;
+               gboolean save_allocation_caller;
+               gboolean save_allocation_stack;
+               gboolean allocations_carry_id;
        } action_flags;
 };
 static MonoProfiler *profiler;
 
+static void
+enable_profiler (void) {
+       profiler->profiler_enabled = TRUE;
+}
+
+static void flush_everything (void);
+
+static void
+disable_profiler (void) {
+       profiler->profiler_enabled = FALSE;
+       flush_everything ();
+}
+
+static void
+request_heap_snapshot (void) {
+       profiler->heap_shot_was_requested = TRUE;
+       mono_gc_collect (mono_gc_max_generation ());
+}
 
 #define DEBUG_LOAD_EVENTS 0
 #define DEBUG_MAPPING_EVENTS 0
@@ -459,17 +951,184 @@ static MonoProfiler *profiler;
 #define DEBUG_CLASS_BITMAPS 0
 #define DEBUG_STATISTICAL_PROFILER 0
 #define DEBUG_WRITER_THREAD 0
-#if (DEBUG_LOGGING_PROFILER || DEBUG_STATISTICAL_PROFILER || DEBUG_HEAP_PROFILER || DEBUG_WRITER_THREAD)
+#define DEBUG_USER_THREAD 0
+#define DEBUG_FILE_WRITES 0
+#if (DEBUG_LOGGING_PROFILER || DEBUG_STATISTICAL_PROFILER || DEBUG_HEAP_PROFILER || DEBUG_WRITER_THREAD || DEBUG_FILE_WRITES)
 #define LOG_WRITER_THREAD(m) printf ("WRITER-THREAD-LOG %s\n", m)
 #else
 #define LOG_WRITER_THREAD(m)
 #endif
+#if (DEBUG_LOGGING_PROFILER || DEBUG_STATISTICAL_PROFILER || DEBUG_HEAP_PROFILER || DEBUG_USER_THREAD || DEBUG_FILE_WRITES)
+#define LOG_USER_THREAD(m) printf ("USER-THREAD-LOG %s\n", m)
+#else
+#define LOG_USER_THREAD(m)
+#endif
 
 #if DEBUG_LOGGING_PROFILER
 static int event_counter = 0;
 #define EVENT_MARK() printf ("[EVENT:%d]", ++ event_counter)
 #endif
 
+static void
+thread_stack_initialize_empty (ProfilerThreadStack *stack) {
+       stack->capacity = 0;
+       stack->top = 0;
+       stack->last_saved_top = 0;
+       stack->last_written_frame = 0;
+       stack->stack = NULL;
+       stack->method_is_jitted = NULL;
+       stack->written_frames = NULL;
+}
+
+static void
+thread_stack_free (ProfilerThreadStack *stack) {
+       stack->capacity = 0;
+       stack->top = 0;
+       stack->last_saved_top = 0;
+       stack->last_written_frame = 0;
+       if (stack->stack != NULL) {
+               g_free (stack->stack);
+               stack->stack = NULL;
+       }
+       if (stack->method_is_jitted != NULL) {
+               g_free (stack->method_is_jitted);
+               stack->method_is_jitted = NULL;
+       }
+       if (stack->written_frames != NULL) {
+               g_free (stack->written_frames);
+               stack->written_frames = NULL;
+       }
+}
+
+static void
+thread_stack_initialize (ProfilerThreadStack *stack, guint32 capacity) {
+       stack->capacity = capacity;
+       stack->top = 0;
+       stack->last_saved_top = 0;
+       stack->last_written_frame = 0;
+       stack->stack = g_new0 (MonoMethod*, capacity);
+       stack->method_is_jitted = g_new0 (guint8, capacity);
+       stack->written_frames = g_new0 (guint32, capacity);
+}
+
+static void
+thread_stack_push_jitted (ProfilerThreadStack *stack, MonoMethod* method, gboolean method_is_jitted) {
+       if (stack->top >= stack->capacity) {
+               MonoMethod **old_stack = stack->stack;
+               guint8 *old_method_is_jitted = stack->method_is_jitted;
+               guint32 *old_written_frames = stack->written_frames;
+               guint32 top = stack->top;
+               guint32 last_saved_top = stack->last_saved_top;
+               guint32 last_written_frame = stack->last_written_frame;
+               thread_stack_initialize (stack, stack->capacity * 2);
+               memcpy (stack->stack, old_stack, top * sizeof (MonoMethod*));
+               memcpy (stack->method_is_jitted, old_method_is_jitted, top * sizeof (guint8));
+               memcpy (stack->written_frames, old_written_frames, top * sizeof (guint32));
+               g_free (old_stack);
+               g_free (old_method_is_jitted);
+               g_free (old_written_frames);
+               stack->top = top;
+               stack->last_saved_top = last_saved_top;
+               stack->last_written_frame = last_written_frame;
+       }
+       stack->stack [stack->top] = method;
+       stack->method_is_jitted [stack->top] = method_is_jitted;
+       stack->top ++;
+}
+
+static inline void
+thread_stack_push (ProfilerThreadStack *stack, MonoMethod* method) {
+       thread_stack_push_jitted (stack, method, FALSE);
+}
+
+static MonoMethod*
+thread_stack_pop (ProfilerThreadStack *stack) {
+       if (stack->top > 0) {
+               stack->top --;
+               if (stack->last_saved_top > stack->top) {
+                       stack->last_saved_top = stack->top;
+               }
+               return stack->stack [stack->top];
+       } else {
+               return NULL;
+       }
+}
+
+static MonoMethod*
+thread_stack_top (ProfilerThreadStack *stack) {
+       if (stack->top > 0) {
+               return stack->stack [stack->top - 1];
+       } else {
+               return NULL;
+       }
+}
+
+static gboolean
+thread_stack_top_is_jitted (ProfilerThreadStack *stack) {
+       if (stack->top > 0) {
+               return stack->method_is_jitted [stack->top - 1];
+       } else {
+               return FALSE;
+       }
+}
+
+static MonoMethod*
+thread_stack_index_from_top (ProfilerThreadStack *stack, int index) {
+       if (stack->top > index) {
+               return stack->stack [stack->top - (index + 1)];
+       } else {
+               return NULL;
+       }
+}
+
+static gboolean
+thread_stack_index_from_top_is_jitted (ProfilerThreadStack *stack, int index) {
+       if (stack->top > index) {
+               return stack->method_is_jitted [stack->top - (index + 1)];
+       } else {
+               return FALSE;
+       }
+}
+
+static inline void
+thread_stack_push_safely (ProfilerThreadStack *stack, MonoMethod* method) {
+       if (stack->stack != NULL) {
+               thread_stack_push (stack, method);
+       }
+}
+
+static inline void
+thread_stack_push_jitted_safely (ProfilerThreadStack *stack, MonoMethod* method, gboolean method_is_jitted) {
+       if (stack->stack != NULL) {
+               thread_stack_push_jitted (stack, method, method_is_jitted);
+       }
+}
+
+static inline int
+thread_stack_count_unsaved_frames (ProfilerThreadStack *stack) {
+       int result = stack->top - stack->last_saved_top;
+       return (result > 0) ? result : 0;
+}
+
+static inline int
+thread_stack_get_last_written_frame (ProfilerThreadStack *stack) {
+       return stack->last_written_frame;
+}
+
+static inline void
+thread_stack_set_last_written_frame (ProfilerThreadStack *stack, int last_written_frame) {
+       stack->last_written_frame = last_written_frame;
+}
+
+static inline guint32
+thread_stack_written_frame_at_index (ProfilerThreadStack *stack, int index) {
+       return stack->written_frames [index];
+}
+
+static inline void
+thread_stack_write_frame_at_index (ProfilerThreadStack *stack, int index, guint32 method_id_and_is_jitted) {
+       stack->written_frames [index] = method_id_and_is_jitted;
+}
 
 static ClassIdMappingElement*
 class_id_mapping_element_get (MonoClass *klass) {
@@ -491,7 +1150,7 @@ static ClassIdMappingElement*
 class_id_mapping_element_new (MonoClass *klass) {
        ClassIdMappingElement *result = g_new (ClassIdMappingElement, 1);
        
-       result->name = g_strdup_printf ("%s.%s", mono_class_get_namespace (klass), mono_class_get_name (klass));
+       result->name = mono_type_full_name (mono_class_get_type (klass));
        result->klass = klass;
        result->next_unwritten = profiler->classes->unwritten;
        profiler->classes->unwritten = result;
@@ -831,72 +1490,6 @@ class_id_mapping_destroy (ClassIdMapping *map) {
        g_free (map);
 }
 
-static void
-unmanaged_function_new (ProfilerUnmanagedFunctions *functions, Dl_info *dl_info) {
-       ProfilerUnmanagedFunction *function = g_new (ProfilerUnmanagedFunction, 1);
-       function->id = functions->next_id;
-       functions->next_id ++;
-       function->hits = 1;
-       function->next_unwritten = functions->unwritten_queue;
-       functions->unwritten_queue = function;
-       function->name = g_strdup_printf ("[%s]:%s", dl_info->dli_fname, dl_info->dli_sname);
-       g_hash_table_insert (functions->table, dl_info->dli_saddr, function);
-}
-
-static void
-unmanaged_function_destroy (gpointer element) {
-       ProfilerUnmanagedFunction *function = (ProfilerUnmanagedFunction*) element;
-       if (function->name) {
-               g_free (function->name);
-               function->name = NULL;
-       }
-       g_free (function);
-}
-
-static gboolean
-unmanaged_function_hit (ProfilerUnmanagedFunctions *functions, gpointer address) {
-       Dl_info dl_info;
-       if (dladdr (address, &dl_info) && (dl_info.dli_saddr != NULL) && (dl_info.dli_fname != NULL)) {
-               ProfilerUnmanagedFunction *function = g_hash_table_lookup (functions->table, dl_info.dli_saddr);
-               
-               if (function != NULL) {
-                       if (function->next_unwritten != NULL) {
-                               function->hits ++;
-                       } else {
-                               function->hits = 1;
-                               function->next_unwritten = functions->unwritten_queue;
-                               functions->unwritten_queue = function;
-                       }
-               } else {
-                       unmanaged_function_new (functions, &dl_info);
-               }
-               
-               return TRUE;
-       } else {
-               return FALSE;
-       }
-}
-
-static void
-unmanaged_functions_init (ProfilerUnmanagedFunctions *functions) {
-       functions->next_id = 1;
-       functions->table = g_hash_table_new_full (g_direct_hash, NULL, NULL, unmanaged_function_destroy);
-       functions->unwritten_queue_end = &(functions->actual_unwritten_queue_end);
-       functions->unwritten_queue = functions->unwritten_queue_end;
-       functions->actual_unwritten_queue_end.hits = 0;
-       functions->actual_unwritten_queue_end.id = 0;
-       functions->actual_unwritten_queue_end.name = NULL;
-       functions->actual_unwritten_queue_end.next_unwritten = NULL;
-}
-
-static void
-unmanaged_functions_dispose (ProfilerUnmanagedFunctions *functions) {
-       functions->next_id = 0;
-       g_hash_table_destroy (functions->table);
-       functions->table = NULL;
-       functions->unwritten_queue = NULL;
-}
-
 #if (DEBUG_LOAD_EVENTS)
 static void
 print_load_event (const char *event_name, GHashTable *table, gpointer item, LoadedElement *element);
@@ -905,6 +1498,8 @@ print_load_event (const char *event_name, GHashTable *table, gpointer item, Load
 static LoadedElement*
 loaded_element_load_start (GHashTable *table, gpointer item) {
        LoadedElement *element = g_new0 (LoadedElement, 1);
+       element->id = profiler->loaded_element_next_free_id;
+       profiler->loaded_element_next_free_id ++;
 #if (DEBUG_LOAD_EVENTS)
        print_load_event ("LOAD START", table, item, element);
 #endif
@@ -949,6 +1544,21 @@ loaded_element_unload_end (GHashTable *table, gpointer item) {
        return element;
 }
 
+static LoadedElement*
+loaded_element_find (GHashTable *table, gpointer item) {
+       LoadedElement *element = g_hash_table_lookup (table, item);
+       return element;
+}
+
+static guint32
+loaded_element_get_id (GHashTable *table, gpointer item) {
+       LoadedElement *element = loaded_element_find (table, item);
+       if (element != NULL) {
+               return element->id;
+       } else {
+               return 0;
+       }
+}
 
 static void
 loaded_element_destroy (gpointer element) {
@@ -982,7 +1592,7 @@ print_load_event (const char *event_name, GHashTable *table, gpointer item, Load
                item_name = "<NULL>";
        }
        
-       printf ("%s EVENT for %s (%s)\n", event_name, item_info, item_name);
+       printf ("%s EVENT for %s (%s [id %d])\n", event_name, item_info, item_name, element->id);
        g_free (item_info);
 }
 #endif
@@ -1023,24 +1633,49 @@ profiler_heap_shot_object_buffer_new (ProfilerPerThreadData *data) {
 }
 
 static ProfilerHeapShotWriteJob*
-profiler_heap_shot_write_job_new (gboolean heap_shot_was_signalled) {
+profiler_heap_shot_write_job_new (gboolean heap_shot_was_requested, gboolean dump_heap_data, guint32 collection) {
        ProfilerHeapShotWriteJob *job = g_new (ProfilerHeapShotWriteJob, 1);
        job->next = NULL;
        job->next_unwritten = NULL;
-       job->buffers = g_new (ProfilerHeapShotWriteBuffer, 1);
-       job->buffers->next = NULL;
-       job->last_next = & (job->buffers->next);
-       job->start = & (job->buffers->buffer [0]);
-       job->cursor = job->start;
-       job->end = & (job->buffers->buffer [PROFILER_HEAP_SHOT_WRITE_BUFFER_SIZE]);
+       
+       if (profiler->action_flags.unreachable_objects || dump_heap_data) {
+               job->buffers = g_new (ProfilerHeapShotWriteBuffer, 1);
+               job->buffers->next = NULL;
+               job->last_next = & (job->buffers->next);
+               job->start = & (job->buffers->buffer [0]);
+               job->cursor = job->start;
+               job->end = & (job->buffers->buffer [PROFILER_HEAP_SHOT_WRITE_BUFFER_SIZE]);
+       } else {
+               job->buffers = NULL;
+               job->last_next = NULL;
+               job->start = NULL;
+               job->cursor = NULL;
+               job->end = NULL;
+       }
        job->full_buffers = 0;
-       job->heap_shot_was_signalled = heap_shot_was_signalled;
+       
+       if (profiler->action_flags.collection_summary) {
+               job->summary.capacity = profiler->classes->next_id;
+               job->summary.per_class_data = g_new0 (ProfilerHeapShotClassSummary, job->summary.capacity);
+       } else {
+               job->summary.capacity = 0;
+               job->summary.per_class_data = NULL;
+       }
+
+       job->heap_shot_was_requested = heap_shot_was_requested;
+       job->collection = collection;
+       job->dump_heap_data = dump_heap_data;
 #if DEBUG_HEAP_PROFILER
-       printf ("profiler_heap_shot_write_job_new: created job %p with buffer %p(%p-%p)\n", job, job->buffers, job->start, job->end);
+       printf ("profiler_heap_shot_write_job_new: created job %p with buffer %p(%p-%p) (collection %d, dump %d)\n", job, job->buffers, job->start, job->end, collection, dump_heap_data);
 #endif
        return job;
 }
 
+static gboolean
+profiler_heap_shot_write_job_has_data (ProfilerHeapShotWriteJob *job) {
+       return ((job->buffers != NULL) || (job->summary.capacity > 0));
+}
+
 static void
 profiler_heap_shot_write_job_add_buffer (ProfilerHeapShotWriteJob *job, gpointer value) {
        ProfilerHeapShotWriteBuffer *buffer = g_new (ProfilerHeapShotWriteBuffer, 1);
@@ -1077,6 +1712,12 @@ profiler_heap_shot_write_job_free_buffers (ProfilerHeapShotWriteJob *job) {
        }
        
        job->buffers = NULL;
+       
+       if (job->summary.per_class_data != NULL) {
+               g_free (job->summary.per_class_data);
+               job->summary.per_class_data = NULL;
+       }
+       job->summary.capacity = 0;
 }
 
 static void
@@ -1096,15 +1737,15 @@ profiler_process_heap_shot_write_jobs (void) {
                        next_job = current_job->next_unwritten;
                        
                        if (next_job != NULL) {
-                               if (current_job->buffers != NULL) {
+                               if (profiler_heap_shot_write_job_has_data (current_job)) {
                                        done = FALSE;
                                }
-                               if (next_job->buffers == NULL) {
+                               if (! profiler_heap_shot_write_job_has_data (next_job)) {
                                        current_job->next_unwritten = NULL;
                                        next_job = NULL;
                                }
                        } else {
-                               if (current_job->buffers != NULL) {
+                               if (profiler_heap_shot_write_job_has_data (current_job)) {
                                        LOG_WRITER_THREAD ("profiler_process_heap_shot_write_jobs: writing...");
                                        profiler_heap_shot_write_block (current_job);
                                        LOG_WRITER_THREAD ("profiler_process_heap_shot_write_jobs: done");
@@ -1142,6 +1783,7 @@ profiler_free_heap_shot_write_jobs (void) {
                        printf ("profiler_free_heap_shot_write_jobs: job %p will be freed\n", current_job);
 #endif
                        next_job = current_job->next;
+                       profiler_heap_shot_write_job_free_buffers (current_job);
                        g_free (current_job);
                        current_job = next_job;
                }
@@ -1198,6 +1840,7 @@ profiler_per_thread_data_new (guint32 buffer_size)
 
        data->events = g_new0 (ProfilerEventData, buffer_size);
        data->next_free_event = data->events;
+       data->next_unreserved_event = data->events;
        data->end_event = data->events + (buffer_size - 1);
        data->first_unwritten_event = data->events;
        data->first_unmapped_event = data->events;
@@ -1205,9 +1848,16 @@ profiler_per_thread_data_new (guint32 buffer_size)
        data->last_event_counter = data->start_event_counter;
        data->thread_id = CURRENT_THREAD_ID ();
        data->heap_shot_object_buffers = NULL;
-       if ((profiler->action_flags.unreachable_objects == TRUE) || (profiler->action_flags.heap_shot == TRUE)) {
+       if ((profiler->action_flags.unreachable_objects == TRUE) ||
+                       (profiler->action_flags.heap_shot == TRUE) ||
+                       (profiler->action_flags.collection_summary == TRUE)) {
                profiler_heap_shot_object_buffer_new (data);
        }
+       if (profiler->action_flags.track_stack) {
+               thread_stack_initialize (&(data->stack), 64);
+       } else {
+               thread_stack_initialize_empty (&(data->stack));
+       }
        return data;
 }
 
@@ -1215,17 +1865,18 @@ static void
 profiler_per_thread_data_destroy (ProfilerPerThreadData *data) {
        g_free (data->events);
        profiler_heap_shot_object_buffers_destroy (data->heap_shot_object_buffers);
+       thread_stack_free (&(data->stack));
        g_free (data);
 }
 
 static ProfilerStatisticalData*
-profiler_statistical_data_new (guint32 buffer_size)
-{
+profiler_statistical_data_new (MonoProfiler *profiler) {
+       int buffer_size = profiler->statistical_buffer_size * (profiler->statistical_call_chain_depth + 1);
        ProfilerStatisticalData *data = g_new (ProfilerStatisticalData, 1);
 
-       data->addresses = g_new0 (gpointer, buffer_size);
+       data->hits = g_new0 (ProfilerStatisticalHit, buffer_size);
        data->next_free_index = 0;
-       data->end_index = buffer_size;
+       data->end_index = profiler->statistical_buffer_size;
        data->first_unwritten_index = 0;
        
        return data;
@@ -1233,51 +1884,344 @@ profiler_statistical_data_new (guint32 buffer_size)
 
 static void
 profiler_statistical_data_destroy (ProfilerStatisticalData *data) {
-       g_free (data->addresses);
+       g_free (data->hits);
        g_free (data);
 }
 
-static void
-profiler_add_write_buffer (void) {
-       if (profiler->current_write_buffer->next == NULL) {
-               profiler->current_write_buffer->next = g_malloc (sizeof (ProfilerFileWriteBuffer) + PROFILER_FILE_WRITE_BUFFER_SIZE);
-               profiler->current_write_buffer->next->next = NULL;
-               
-               //printf ("Added next buffer %p, to buffer %p\n", profiler->current_write_buffer->next, profiler->current_write_buffer);
-               
+static ProfilerCodeBufferArray*
+profiler_code_buffer_array_new (ProfilerCodeBufferArray *child) {
+       ProfilerCodeBufferArray *result = g_new0 (ProfilerCodeBufferArray, 1);
+       if (child == NULL) {
+               result->level = 0;
+       } else {
+               result->level = child->level + 1;
+               result->number_of_buffers = 1;
+               result->buffers [0].info.data.sub_buffers = child;
+               result->buffers [0].start = child->buffers [0].start;
+               result->buffers [0].end = child->buffers [child->number_of_buffers - 1].end;
        }
-       profiler->current_write_buffer = profiler->current_write_buffer->next;
-       profiler->current_write_position = 0;
-       profiler->full_write_buffers ++;
+       return result;
 }
 
 static void
-profiler_free_write_buffers (void) {
-       ProfilerFileWriteBuffer *current_buffer = profiler->write_buffers;
-       while (current_buffer != NULL) {
-               ProfilerFileWriteBuffer *next_buffer = current_buffer->next;
-               
-               //printf ("Freeing write buffer %p, next is %p\n", current_buffer, next_buffer);
-               
-               g_free (current_buffer);
-               current_buffer = next_buffer;
+profiler_code_buffer_array_destroy (ProfilerCodeBufferArray *buffers) {
+       if (buffers->level > 0) {
+               int i;
+               for (i = 0; i < buffers->number_of_buffers; i++) {
+                       ProfilerCodeBufferArray *sub_buffers = buffers->buffers [i].info.data.sub_buffers;
+                       profiler_code_buffer_array_destroy (sub_buffers);
+               }
        }
+       g_free (buffers);
 }
 
-#define WRITE_BYTE(b) do {\
-       if (profiler->current_write_position >= PROFILER_FILE_WRITE_BUFFER_SIZE) {\
-               profiler_add_write_buffer ();\
-       }\
-       profiler->current_write_buffer->buffer [profiler->current_write_position] = (b);\
-       profiler->current_write_position ++;\
-} while (0)
+static gboolean
+profiler_code_buffer_array_is_full (ProfilerCodeBufferArray *buffers) {
+       while (buffers->level > 0) {
+               ProfilerCodeBufferArray *next;
+               if (buffers->number_of_buffers < PROFILER_CODE_BUFFER_ARRAY_SIZE) {
+                       return FALSE;
+               }
+               next = buffers->buffers [PROFILER_CODE_BUFFER_ARRAY_SIZE - 1].info.data.sub_buffers;
+               if (next->level < (buffers->level - 1)) {
+                       return FALSE;
+               }
+               buffers = next;
+       }
+       return (buffers->number_of_buffers == PROFILER_CODE_BUFFER_ARRAY_SIZE);
+}
+
+static ProfilerCodeBufferArray*
+profiler_code_buffer_add (ProfilerCodeBufferArray *buffers, gpointer *buffer, int size, MonoProfilerCodeBufferType type, void *data) {
+       if (buffers == NULL) {
+               buffers = profiler_code_buffer_array_new (NULL);
+       }
+       
+       if (profiler_code_buffer_array_is_full (buffers)) {
+               ProfilerCodeBufferArray *new_slot = profiler_code_buffer_add (NULL, buffer, size, type, data);
+               buffers = profiler_code_buffer_array_new (buffers);
+               buffers->buffers [buffers->number_of_buffers].info.data.sub_buffers = new_slot;
+               buffers->buffers [buffers->number_of_buffers].start = new_slot->buffers [0].start;
+               buffers->buffers [buffers->number_of_buffers].end = new_slot->buffers [new_slot->number_of_buffers - 1].end;
+               buffers->number_of_buffers ++;
+       } else if (buffers->level > 0) {
+               ProfilerCodeBufferArray *new_slot = profiler_code_buffer_add (buffers->buffers [buffers->number_of_buffers - 1].info.data.sub_buffers, buffer, size, type, data);
+               buffers->buffers [buffers->number_of_buffers - 1].info.data.sub_buffers = new_slot;
+               buffers->buffers [buffers->number_of_buffers - 1].start = new_slot->buffers [0].start;
+               buffers->buffers [buffers->number_of_buffers - 1].end = new_slot->buffers [new_slot->number_of_buffers - 1].end;
+       } else {
+               buffers->buffers [buffers->number_of_buffers].start = buffer;
+               buffers->buffers [buffers->number_of_buffers].end = (((guint8*) buffer) + size);
+               buffers->buffers [buffers->number_of_buffers].info.type = type;
+               switch (type) {
+               case MONO_PROFILER_CODE_BUFFER_UNKNOWN:
+                       buffers->buffers [buffers->number_of_buffers].info.data.data = NULL;
+                       break;
+               case MONO_PROFILER_CODE_BUFFER_METHOD:
+                       buffers->buffers [buffers->number_of_buffers].info.data.method = data;
+                       break;
+               default:
+                       buffers->buffers [buffers->number_of_buffers].info.type = MONO_PROFILER_CODE_BUFFER_UNKNOWN;
+                       buffers->buffers [buffers->number_of_buffers].info.data.data = NULL;
+               }
+               buffers->number_of_buffers ++;
+       }
+       return buffers;
+}
+
+static ProfilerCodeBuffer*
+profiler_code_buffer_find (ProfilerCodeBufferArray *buffers, gpointer *address) {
+       if (buffers != NULL) {
+               ProfilerCodeBuffer *result = NULL;
+               do {
+                       int low = 0;
+                       int high = buffers->number_of_buffers - 1;
+                       
+                       while (high != low) {
+                               int middle = low + ((high - low) >> 1);
+                               
+                               if ((guint8*) address < (guint8*) buffers->buffers [low].start) {
+                                       return NULL;
+                               }
+                               if ((guint8*) address >= (guint8*) buffers->buffers [high].end) {
+                                       return NULL;
+                               }
+                               
+                               if ((guint8*) address < (guint8*) buffers->buffers [middle].start) {
+                                       high = middle - 1;
+                                       if (high < low) {
+                                               high = low;
+                                       }
+                               } else if ((guint8*) address >= (guint8*) buffers->buffers [middle].end) {
+                                       low = middle + 1;
+                                       if (low > high) {
+                                               low = high;
+                                       }
+                               } else {
+                                       high = middle;
+                                       low = middle;
+                               }
+                       }
+                       
+                       if (((guint8*) address >= (guint8*) buffers->buffers [low].start) && ((guint8*) address < (guint8*) buffers->buffers [low].end)) {
+                               if (buffers->level == 0) {
+                                       result = & (buffers->buffers [low]);
+                               } else {
+                                       buffers = buffers->buffers [low].info.data.sub_buffers;
+                               }
+                       } else {
+                               return NULL;
+                       }
+               } while (result == NULL);
+               return result;
+       } else {
+               return NULL;
+       }
+}
+
+static void
+profiler_code_chunk_initialize (ProfilerCodeChunk *chunk, gpointer memory, gsize size) {
+       chunk->buffers = profiler_code_buffer_array_new (NULL);
+       chunk->destroyed = FALSE;
+       chunk->start = memory;
+       chunk->end = ((guint8*)memory) + size;
+}
+
+static void
+profiler_code_chunk_cleanup (ProfilerCodeChunk *chunk) {
+       if (chunk->buffers != NULL) {
+               profiler_code_buffer_array_destroy (chunk->buffers);
+               chunk->buffers = NULL;
+       }
+       chunk->start = NULL;
+       chunk->end = NULL;
+}
+
+static void
+profiler_code_chunks_initialize (ProfilerCodeChunks *chunks) {
+       chunks->capacity = 32;
+       chunks->chunks = g_new0 (ProfilerCodeChunk, 32);
+       chunks->number_of_chunks = 0;
+}
+
+static void
+profiler_code_chunks_cleanup (ProfilerCodeChunks *chunks) {
+       int i;
+       for (i = 0; i < chunks->number_of_chunks; i++) {
+               profiler_code_chunk_cleanup (& (chunks->chunks [i]));
+       }
+       chunks->capacity = 0;
+       chunks->number_of_chunks = 0;
+       g_free (chunks->chunks);
+       chunks->chunks = NULL;
+}
+
+static int
+compare_code_chunks (const void* c1, const void* c2) {
+       ProfilerCodeChunk *chunk1 = (ProfilerCodeChunk*) c1;
+       ProfilerCodeChunk *chunk2 = (ProfilerCodeChunk*) c2;
+       return ((guint8*) chunk1->end < (guint8*) chunk2->start) ? -1 : (((guint8*) chunk1->start >= (guint8*) chunk2->end) ? 1 : 0);
+}
+
+static int
+compare_address_and_code_chunk (const void* a, const void* c) {
+       gpointer address = (gpointer) a;
+       ProfilerCodeChunk *chunk = (ProfilerCodeChunk*) c;
+       return ((guint8*) address < (guint8*) chunk->start) ? -1 : (((guint8*) address >= (guint8*) chunk->end) ? 1 : 0);
+}
+
+static void
+profiler_code_chunks_sort (ProfilerCodeChunks *chunks) {
+       qsort (chunks->chunks, chunks->number_of_chunks, sizeof (ProfilerCodeChunk), compare_code_chunks);
+}
+
+static ProfilerCodeChunk*
+profiler_code_chunk_find (ProfilerCodeChunks *chunks, gpointer address) {
+       return bsearch (address, chunks->chunks, chunks->number_of_chunks, sizeof (ProfilerCodeChunk), compare_address_and_code_chunk);
+}
+
+static ProfilerCodeChunk*
+profiler_code_chunk_new (ProfilerCodeChunks *chunks, gpointer memory, gsize size) {
+       ProfilerCodeChunk *result;
+       
+       if (chunks->number_of_chunks == chunks->capacity) {
+               ProfilerCodeChunk *new_chunks = g_new0 (ProfilerCodeChunk, chunks->capacity * 2);
+               memcpy (new_chunks, chunks->chunks, chunks->capacity * sizeof (ProfilerCodeChunk));
+               chunks->capacity *= 2;
+               g_free (chunks->chunks);
+               chunks->chunks = new_chunks;
+       }
+       
+       result = & (chunks->chunks [chunks->number_of_chunks]);
+       chunks->number_of_chunks ++;
+       profiler_code_chunk_initialize (result, memory, size);
+       profiler_code_chunks_sort (chunks);
+       return result;
+}
+
+static int
+profiler_code_chunk_to_index (ProfilerCodeChunks *chunks, ProfilerCodeChunk *chunk) {
+       return (int) (chunk - chunks->chunks);
+}
+
+static void
+profiler_code_chunk_remove (ProfilerCodeChunks *chunks, ProfilerCodeChunk *chunk) {
+       int index = profiler_code_chunk_to_index (chunks, chunk);
+       
+       profiler_code_chunk_cleanup (chunk);
+       if ((index >= 0) && (index < chunks->number_of_chunks)) {
+               memmove (chunk, chunk + 1, (chunks->number_of_chunks - index) * sizeof (ProfilerCodeChunk));
+       }
+}
+
+/* This assumes the profiler lock is held */
+static ProfilerCodeBuffer*
+profiler_code_buffer_from_address (MonoProfiler *prof, gpointer address) {
+       ProfilerCodeChunks *chunks = & (prof->code_chunks);
+       
+       ProfilerCodeChunk *chunk = profiler_code_chunk_find (chunks, address);
+       if (chunk != NULL) {
+               return profiler_code_buffer_find (chunk->buffers, address);
+       } else {
+               return NULL;
+       }
+}
+
+static void
+profiler_code_chunk_new_callback (MonoProfiler *prof, gpointer address, int size) {
+       ProfilerCodeChunks *chunks = & (prof->code_chunks);
+       
+       if (prof->code_chunks.chunks != NULL) {
+               LOCK_PROFILER ();
+               profiler_code_chunk_new (chunks, address, size);
+               UNLOCK_PROFILER ();
+       }
+}
+
+static void
+profiler_code_chunk_destroy_callback  (MonoProfiler *prof, gpointer address) {
+       ProfilerCodeChunks *chunks = & (prof->code_chunks);
+       ProfilerCodeChunk *chunk;
+       
+       if (prof->code_chunks.chunks != NULL) {
+               LOCK_PROFILER ();
+               chunk = profiler_code_chunk_find (chunks, address);
+               if (chunk != NULL) {
+                       profiler_code_chunk_remove (chunks, chunk);
+               }
+               UNLOCK_PROFILER ();
+       }
+}
+
+static void
+profiler_code_buffer_new_callback  (MonoProfiler *prof, gpointer address, int size, MonoProfilerCodeBufferType type, void *data) {
+       ProfilerCodeChunks *chunks = & (prof->code_chunks);
+       ProfilerCodeChunk *chunk;
+       
+       if (prof->code_chunks.chunks != NULL) {
+               LOCK_PROFILER ();
+               chunk = profiler_code_chunk_find (chunks, address);
+               if (chunk != NULL) {
+                       chunk->buffers = profiler_code_buffer_add (chunk->buffers, address, size, type, data);
+               }
+               UNLOCK_PROFILER ();
+       }
+}
+
+static void
+profiler_add_write_buffer (void) {
+       if (profiler->current_write_buffer->next == NULL) {
+               profiler->current_write_buffer->next = g_malloc (sizeof (ProfilerFileWriteBuffer) + PROFILER_FILE_WRITE_BUFFER_SIZE);
+               profiler->current_write_buffer->next->next = NULL;
+               
+               //printf ("Added next buffer %p, to buffer %p\n", profiler->current_write_buffer->next, profiler->current_write_buffer);
+               
+       }
+       profiler->current_write_buffer = profiler->current_write_buffer->next;
+       profiler->current_write_position = 0;
+       profiler->full_write_buffers ++;
+}
 
+static void
+profiler_free_write_buffers (void) {
+       ProfilerFileWriteBuffer *current_buffer = profiler->write_buffers;
+       while (current_buffer != NULL) {
+               ProfilerFileWriteBuffer *next_buffer = current_buffer->next;
+               
+               //printf ("Freeing write buffer %p, next is %p\n", current_buffer, next_buffer);
+               
+               g_free (current_buffer);
+               current_buffer = next_buffer;
+       }
+}
+
+#define WRITE_BYTE(b) do {\
+       if (profiler->current_write_position >= PROFILER_FILE_WRITE_BUFFER_SIZE) {\
+               profiler_add_write_buffer ();\
+       }\
+       profiler->current_write_buffer->buffer [profiler->current_write_position] = (b);\
+       profiler->current_write_position ++;\
+} while (0)
+
+#if (DEBUG_FILE_WRITES)
+static int bytes_written = 0;
+#endif
 
 static void
 write_current_block (guint16 code) {
        guint32 size = (profiler->full_write_buffers * PROFILER_FILE_WRITE_BUFFER_SIZE) + profiler->current_write_position;
        ProfilerFileWriteBuffer *current_buffer = profiler->write_buffers;
-       guint8 header [6];
+       guint64 current_counter;
+       guint32 counter_delta;
+       guint8 header [10];
+       
+       MONO_PROFILER_GET_CURRENT_COUNTER (current_counter);
+       if (profiler->last_header_counter != 0) {
+               counter_delta = current_counter - profiler->last_header_counter;
+       } else {
+               counter_delta = 0;
+       }
+       profiler->last_header_counter = current_counter;
        
        header [0] = code & 0xff;
        header [1] = (code >> 8) & 0xff;
@@ -1285,18 +2229,37 @@ write_current_block (guint16 code) {
        header [3] = (size >> 8) & 0xff;
        header [4] = (size >> 16) & 0xff;
        header [5] = (size >> 24) & 0xff;
-       
-       WRITE_BUFFER (& (header [0]), 6);
+       header [6] = counter_delta & 0xff;
+       header [7] = (counter_delta >> 8) & 0xff;
+       header [8] = (counter_delta >> 16) & 0xff;
+       header [9] = (counter_delta >> 24) & 0xff;
+       
+#if (DEBUG_FILE_WRITES)
+       printf ("write_current_block: writing header (code %d) at offset %d\n", code, bytes_written);
+       bytes_written += 10;
+#endif
+       WRITE_BUFFER (& (header [0]), 10);
        
        while ((current_buffer != NULL) && (profiler->full_write_buffers > 0)) {
+#if (DEBUG_FILE_WRITES)
+               printf ("write_current_block: writing buffer (size %d)\n", PROFILER_FILE_WRITE_BUFFER_SIZE);
+               bytes_written += PROFILER_FILE_WRITE_BUFFER_SIZE;
+#endif
                WRITE_BUFFER (& (current_buffer->buffer [0]), PROFILER_FILE_WRITE_BUFFER_SIZE);
                profiler->full_write_buffers --;
                current_buffer = current_buffer->next;
        }
        if (profiler->current_write_position > 0) {
+#if (DEBUG_FILE_WRITES)
+               printf ("write_current_block: writing last buffer (size %d)\n", profiler->current_write_position);
+               bytes_written += profiler->current_write_position;
+#endif
                WRITE_BUFFER (& (current_buffer->buffer [0]), profiler->current_write_position);
        }
        FLUSH_FILE ();
+#if (DEBUG_FILE_WRITES)
+       printf ("write_current_block: buffers flushed (file size %d)\n", bytes_written);
+#endif
        
        profiler->current_write_buffer = profiler->write_buffers;
        profiler->current_write_position = 0;
@@ -1332,6 +2295,31 @@ write_string (const char *string) {
        WRITE_BYTE (0);
 }
 
+static void write_clock_data (void);
+static void
+write_directives_block (gboolean start) {
+       write_clock_data ();
+       
+       if (start) {
+               if (profiler->action_flags.save_allocation_caller) {
+                       write_uint32 (MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_CALLER);
+               }
+               if (profiler->action_flags.save_allocation_stack || profiler->action_flags.track_calls) {
+                       write_uint32 (MONO_PROFILER_DIRECTIVE_ALLOCATIONS_HAVE_STACK);
+               }
+               if (profiler->action_flags.allocations_carry_id) {
+                       write_uint32 (MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_ID);
+               }
+               write_uint32 (MONO_PROFILER_DIRECTIVE_LOADED_ELEMENTS_CARRY_ID);
+               write_uint32 (MONO_PROFILER_DIRECTIVE_CLASSES_CARRY_ASSEMBLY_ID);
+               write_uint32 (MONO_PROFILER_DIRECTIVE_METHODS_CARRY_WRAPPER_FLAG);
+       }
+       write_uint32 (MONO_PROFILER_DIRECTIVE_END);
+       
+       write_clock_data ();
+       write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_DIRECTIVES);
+}
+
 #if DEBUG_HEAP_PROFILER
 #define WRITE_HEAP_SHOT_JOB_VALUE_MESSAGE(v,c) printf ("WRITE_HEAP_SHOT_JOB_VALUE: writing value %p at cursor %p\n", (v), (c))
 #else
@@ -1347,10 +2335,18 @@ write_string (const char *string) {
        }\
 } while (0)
 
+
 #undef GUINT_TO_POINTER
-#define GUINT_TO_POINTER(u) ((void*)(guint64)(u))
 #undef GPOINTER_TO_UINT
+#if (SIZEOF_VOID_P == 4)
+#define GUINT_TO_POINTER(u) ((void*)(guint32)(u))
+#define GPOINTER_TO_UINT(p) ((guint32)(void*)(p))
+#elif (SIZEOF_VOID_P == 8)
+#define GUINT_TO_POINTER(u) ((void*)(guint64)(u))
 #define GPOINTER_TO_UINT(p) ((guint64)(void*)(p))
+#else
+#error Bad size of void pointer
+#endif
 
 #define WRITE_HEAP_SHOT_JOB_VALUE_WITH_CODE(j,v,c) WRITE_HEAP_SHOT_JOB_VALUE (j, GUINT_TO_POINTER (GPOINTER_TO_UINT (v)|(c)))
 
@@ -1378,7 +2374,7 @@ write_string (const char *string) {
 } while (0)
 
 static void
-profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
+profiler_heap_shot_write_data_block (ProfilerHeapShotWriteJob *job) {
        ProfilerHeapShotWriteBuffer *buffer;
        gpointer* cursor;
        gpointer* end;
@@ -1391,13 +2387,13 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
        write_uint64 (job->start_time);
        write_uint64 (job->end_counter);
        write_uint64 (job->end_time);
-       
+       write_uint32 (job->collection);
        MONO_PROFILER_GET_CURRENT_COUNTER (start_counter);
        MONO_PROFILER_GET_CURRENT_TIME (start_time);
        write_uint64 (start_counter);
        write_uint64 (start_time);
 #if DEBUG_HEAP_PROFILER
-       printf ("profiler_heap_shot_write_block: working on job %p...\n", job);
+       printf ("profiler_heap_shot_write_data_block: start writing job %p (start %p, end %p)...\n", job, & (job->buffers->buffer [0]), job->cursor);
 #endif
        buffer = job->buffers;
        cursor = & (buffer->buffer [0]);
@@ -1410,13 +2406,13 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                cursor = NULL;
        }
 #if DEBUG_HEAP_PROFILER
-       printf ("profiler_heap_shot_write_block: in job %p, starting at buffer %p and cursor %p\n", job, buffer, cursor);
+       printf ("profiler_heap_shot_write_data_block: in job %p, starting at buffer %p and cursor %p\n", job, buffer, cursor);
 #endif
        while (cursor != NULL) {
                gpointer value = *cursor;
                HeapProfilerJobValueCode code = GPOINTER_TO_UINT (value) & HEAP_CODE_MASK;
 #if DEBUG_HEAP_PROFILER
-               printf ("profiler_heap_shot_write_block: got value %p and code %d\n", value, code);
+               printf ("profiler_heap_shot_write_data_block: got value %p and code %d\n", value, code);
 #endif
                
                UPDATE_JOB_BUFFER_CURSOR ();
@@ -1428,7 +2424,7 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                        
                        class_id = class_id_mapping_element_get (klass);
                        if (class_id == NULL) {
-                               printf ("profiler_heap_shot_write_block: unknown class %p", klass);
+                               printf ("profiler_heap_shot_write_data_block: unknown class %p", klass);
                        }
                        g_assert (class_id != NULL);
                        write_uint32 ((class_id->id << 2) | HEAP_CODE_FREE_OBJECT_CLASS);
@@ -1437,7 +2433,7 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                        UPDATE_JOB_BUFFER_CURSOR ();
                        write_uint32 (size);
 #if DEBUG_HEAP_PROFILER
-                       printf ("profiler_heap_shot_write_block: wrote unreachable object of class %p (id %d, size %d)\n", klass, class_id->id, size);
+                       printf ("profiler_heap_shot_write_data_block: wrote unreachable object of class %p (id %d, size %d)\n", klass, class_id->id, size);
 #endif
                } else if (code == HEAP_CODE_OBJECT) {
                        MonoObject *object = GUINT_TO_POINTER (GPOINTER_TO_UINT (value) & (~ (guint64) HEAP_CODE_MASK));
@@ -1448,16 +2444,16 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                        UPDATE_JOB_BUFFER_CURSOR ();
                        
                        if (class_id == NULL) {
-                               printf ("profiler_heap_shot_write_block: unknown class %p", klass);
+                               printf ("profiler_heap_shot_write_data_block: unknown class %p", klass);
                        }
                        g_assert (class_id != NULL);
                        
-                       write_uint64 (GPOINTER_TO_UINT (object));
+                       write_uint64 (GPOINTER_TO_UINT (value));
                        write_uint32 (class_id->id);
                        write_uint32 (size);
                        write_uint32 (references);
 #if DEBUG_HEAP_PROFILER
-                       printf ("profiler_heap_shot_write_block: writing object %p (references %d)\n", value, references);
+                       printf ("profiler_heap_shot_write_data_block: writing object %p (references %d)\n", value, references);
 #endif
                        
                        while (references > 0) {
@@ -1466,12 +2462,12 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                                UPDATE_JOB_BUFFER_CURSOR ();
                                references --;
 #if DEBUG_HEAP_PROFILER
-                               printf ("profiler_heap_shot_write_block:   inside object %p, wrote reference %p)\n", value, reference);
+                               printf ("profiler_heap_shot_write_data_block:   inside object %p, wrote reference %p)\n", value, reference);
 #endif
                        }
                } else {
 #if DEBUG_HEAP_PROFILER
-                       printf ("profiler_heap_shot_write_block: unknown code %d in value %p\n", code, value);
+                       printf ("profiler_heap_shot_write_data_block: unknown code %d in value %p\n", code, value);
 #endif
                        g_assert_not_reached ();
                }
@@ -1483,7 +2479,64 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
        write_uint64 (end_counter);
        write_uint64 (end_time);
        
-       write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_HEAP);
+       write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_HEAP_DATA);
+#if DEBUG_HEAP_PROFILER
+       printf ("profiler_heap_shot_write_data_block: writing job %p done.\n", job);
+#endif
+}
+static void
+profiler_heap_shot_write_summary_block (ProfilerHeapShotWriteJob *job) {
+       guint64 start_counter;
+       guint64 start_time;
+       guint64 end_counter;
+       guint64 end_time;
+       int id;
+       
+#if DEBUG_HEAP_PROFILER
+       printf ("profiler_heap_shot_write_summary_block: start writing job %p...\n", job);
+#endif
+       MONO_PROFILER_GET_CURRENT_COUNTER (start_counter);
+       MONO_PROFILER_GET_CURRENT_TIME (start_time);
+       write_uint64 (start_counter);
+       write_uint64 (start_time);
+       
+       write_uint32 (job->collection);
+       
+       for (id = 0; id < job->summary.capacity; id ++) {
+               if ((job->summary.per_class_data [id].reachable.instances > 0) || (job->summary.per_class_data [id].unreachable.instances > 0)) {
+                       write_uint32 (id);
+                       write_uint32 (job->summary.per_class_data [id].reachable.instances);
+                       write_uint32 (job->summary.per_class_data [id].reachable.bytes);
+                       write_uint32 (job->summary.per_class_data [id].unreachable.instances);
+                       write_uint32 (job->summary.per_class_data [id].unreachable.bytes);
+               }
+       }
+       write_uint32 (0);
+       
+       MONO_PROFILER_GET_CURRENT_COUNTER (end_counter);
+       MONO_PROFILER_GET_CURRENT_TIME (end_time);
+       write_uint64 (end_counter);
+       write_uint64 (end_time);
+       
+       write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_HEAP_SUMMARY);
+#if DEBUG_HEAP_PROFILER
+       printf ("profiler_heap_shot_write_summary_block: writing job %p done.\n", job);
+#endif
+}
+
+static void
+profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
+#if DEBUG_HEAP_PROFILER
+       printf ("profiler_heap_shot_write_block: working on job %p...\n", job);
+#endif
+       
+       if (profiler->action_flags.collection_summary == TRUE) {
+               profiler_heap_shot_write_summary_block (job);
+       }
+       
+       if ((profiler->action_flags.unreachable_objects == TRUE) || (profiler->action_flags.heap_shot == TRUE)) {
+               profiler_heap_shot_write_data_block (job);
+       }
        
        profiler_heap_shot_write_job_free_buffers (job);
 #if DEBUG_HEAP_PROFILER
@@ -1492,12 +2545,37 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
 }
 
 static void
-write_element_load_block (LoadedElement *element, guint8 kind, gsize thread_id) {
+write_element_load_block (LoadedElement *element, guint8 kind, gsize thread_id, gpointer item) {
        WRITE_BYTE (kind);
        write_uint64 (element->load_start_counter);
        write_uint64 (element->load_end_counter);
        write_uint64 (thread_id);
+       write_uint32 (element->id);
        write_string (element->name);
+       if (kind & MONO_PROFILER_LOADED_EVENT_ASSEMBLY) {
+               MonoImage *image = mono_assembly_get_image ((MonoAssembly*) item);
+               MonoAssemblyName aname;
+               if (mono_assembly_fill_assembly_name (image, &aname)) {
+                       write_string (aname.name);
+                       write_uint32 (aname.major);
+                       write_uint32 (aname.minor);
+                       write_uint32 (aname.build);
+                       write_uint32 (aname.revision);
+                       write_string (aname.culture && *aname.culture? aname.culture: "neutral");
+                       write_string (aname.public_key_token [0] ? (char *)aname.public_key_token : "null");
+                       /* Retargetable flag */
+                       write_uint32 ((aname.flags & 0x00000100) ? 1 : 0);
+               } else {
+                       write_string ("UNKNOWN");
+                       write_uint32 (0);
+                       write_uint32 (0);
+                       write_uint32 (0);
+                       write_uint32 (0);
+                       write_string ("neutral");
+                       write_string ("null");
+                       write_uint32 (0);
+               }
+       }
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_LOADED);
        element->load_written = TRUE;
 }
@@ -1508,6 +2586,7 @@ write_element_unload_block (LoadedElement *element, guint8 kind, gsize thread_id
        write_uint64 (element->unload_start_counter);
        write_uint64 (element->unload_end_counter);
        write_uint64 (thread_id);
+       write_uint32 (element->id);
        write_string (element->name);
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_UNLOADED);
        element->unload_written = TRUE;
@@ -1533,7 +2612,7 @@ write_mapping_block (gsize thread_id) {
        if ((profiler->classes->unwritten == NULL) && (profiler->methods->unwritten == NULL))
                return;
        
-#if (DEBUG_MAPPING_EVENTS)
+#if (DEBUG_MAPPING_EVENTS || DEBUG_FILE_WRITES)
        printf ("[write_mapping_block][TID %ld] START\n", thread_id);
 #endif
        
@@ -1541,7 +2620,11 @@ write_mapping_block (gsize thread_id) {
        write_uint64 (thread_id);
        
        for (current_class = profiler->classes->unwritten; current_class != NULL; current_class = current_class->next_unwritten) {
+               MonoImage *image = mono_class_get_image (current_class->klass);
+               MonoAssembly *assembly = mono_image_get_assembly (image);
+               guint32 assembly_id = loaded_element_get_id (profiler->loaded_assemblies, assembly);
                write_uint32 (current_class->id);
+               write_uint32 (assembly_id);
                write_string (current_class->name);
 #if (DEBUG_MAPPING_EVENTS)
                printf ("mapping CLASS (%d => %s)\n", current_class->id, current_class->name);
@@ -1559,6 +2642,11 @@ write_mapping_block (gsize thread_id) {
                g_assert (class_element != NULL);
                write_uint32 (current_method->id);
                write_uint32 (class_element->id);
+               if (method->wrapper_type != 0) {
+                       write_uint32 (1);
+               } else {
+                       write_uint32 (0);
+               }
                write_string (current_method->name);
 #if (DEBUG_MAPPING_EVENTS)
                printf ("mapping METHOD ([%d]%d => %s)\n", class_element?class_element->id:1, current_method->id, current_method->name);
@@ -1572,18 +2660,11 @@ write_mapping_block (gsize thread_id) {
        write_clock_data ();
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_MAPPING);
        
-#if (DEBUG_MAPPING_EVENTS)
+#if (DEBUG_MAPPING_EVENTS || DEBUG_FILE_WRITES)
        printf ("[write_mapping_block][TID %ld] END\n", thread_id);
 #endif
 }
 
-static guint64
-get_extended_event_value (ProfilerEventData *event, ProfilerEventData *next) {
-       guint64 result = next->data.number;
-       result |= (((guint64) event->value) << 32);
-       return result;
-}
-
 typedef enum {
        MONO_PROFILER_PACKED_EVENT_CODE_METHOD_ENTER = 1,
        MONO_PROFILER_PACKED_EVENT_CODE_METHOD_EXIT_IMPLICIT = 2,
@@ -1605,17 +2686,78 @@ typedef enum {
        result = ((base)|((((kind)<<4) | (code)) << MONO_PROFILER_PACKED_EVENT_CODE_BITS));\
 } while (0)
 
+static void
+rewrite_last_written_stack (ProfilerThreadStack *stack) {
+       guint8 event_code;
+       int i = thread_stack_get_last_written_frame (stack);
+       
+       MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, MONO_PROFILER_EVENT_STACK_SECTION, 0, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+       WRITE_BYTE (event_code);
+       write_uint32 (0);
+       write_uint32 (i);
+       
+       while (i > 0) {
+               i--;
+               write_uint32 (thread_stack_written_frame_at_index (stack, i));
+       }
+}
+
+
 static ProfilerEventData*
-write_event (ProfilerEventData *event) {
+write_stack_section_event (ProfilerEventData *events, ProfilerPerThreadData *data) {
+       int last_saved_frame = events->data.number;
+       int saved_frames = events->value;
+       guint8 event_code;
+       int i;
+       
+       MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, MONO_PROFILER_EVENT_STACK_SECTION, 0, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+       WRITE_BYTE (event_code);
+       write_uint32 (last_saved_frame);
+       write_uint32 (saved_frames);
+       thread_stack_set_last_written_frame (&(data->stack), last_saved_frame + saved_frames);
+       events++;
+       
+       for (i = 0; i < saved_frames; i++) {
+               guint8 code = events->code;
+               guint32 jit_flag;
+               MethodIdMappingElement *method;
+               guint32 frame_value;
+               
+               if (code == MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER) {
+                       jit_flag = 0;
+               } else if (code == MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER) {
+                       jit_flag = 1;
+               } else {
+                       g_assert_not_reached ();
+                       jit_flag = 0;
+               }
+               
+               method = method_id_mapping_element_get (events->data.address);
+               g_assert (method != NULL);
+               frame_value = (method->id << 1) | jit_flag;
+               write_uint32 (frame_value);
+               thread_stack_write_frame_at_index (&(data->stack), last_saved_frame + saved_frames - (1 + i), frame_value);
+               events ++;
+       }
+       
+       return events;
+}
+
+static ProfilerEventData*
+write_event (ProfilerEventData *event, ProfilerPerThreadData *data) {
        ProfilerEventData *next = event + 1;
        gboolean write_event_value = TRUE;
        guint8 event_code;
        guint64 event_data;
        guint64 event_value;
+       gboolean write_event_value_extension_1 = FALSE;
+       guint64 event_value_extension_1 = 0;
+       gboolean write_event_value_extension_2 = FALSE;
+       guint64 event_value_extension_2 = 0;
 
        event_value = event->value;
-       if (event_value > MAX_EVENT_VALUE) {
-               event_value = get_extended_event_value (event, next);
+       if (event_value == MAX_EVENT_VALUE) {
+               event_value = *((guint64*)next);
                next ++;
        }
        
@@ -1639,13 +2781,63 @@ write_event (ProfilerEventData *event) {
                event_data = element->id;
                
                if (event->code == MONO_PROFILER_EVENT_CLASS_ALLOCATION) {
-                       MONO_PROFILER_EVENT_MAKE_PACKED_CODE (event_code, event_data, MONO_PROFILER_PACKED_EVENT_CODE_CLASS_ALLOCATION);
+                       if ((! profiler->action_flags.save_allocation_caller) || (! (next->code == MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER))) {
+                               MONO_PROFILER_EVENT_MAKE_PACKED_CODE (event_code, event_data, MONO_PROFILER_PACKED_EVENT_CODE_CLASS_ALLOCATION);
+                       } else {
+                               MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, MONO_PROFILER_EVENT_JIT_TIME_ALLOCATION, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+                       }
+                       
+                       if (profiler->action_flags.save_allocation_caller) {
+                               MonoMethod *caller_method = next->data.address;
+                               
+                               if ((next->code != MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER) && (next->code != MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER)) {
+                                       g_assert_not_reached ();
+                               }
+                               
+                               if (caller_method != NULL) {
+                                       MethodIdMappingElement *caller = method_id_mapping_element_get (caller_method);
+                                       g_assert (caller != NULL);
+                                       event_value_extension_1 = caller->id;
+                               }
+
+                               write_event_value_extension_1 = TRUE;
+                               next ++;
+                       }
+                       
+                       if (profiler->action_flags.allocations_carry_id) {
+                               event_value_extension_2  = GPOINTER_TO_UINT (next->data.address);
+                               
+                               if (next->code != MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID) {
+                                       g_assert_not_reached ();
+                               }
+                               
+                               write_event_value_extension_2 = TRUE;
+                               next ++;
+                       }
+               } else if (event->code == MONO_PROFILER_EVENT_CLASS_MONITOR) {
+                       g_assert (next->code == MONO_PROFILER_EVENT_OBJECT_MONITOR);
+                       
+                       MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, event->code, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_CLASS_EVENT);
+                       event_value_extension_1 = next->value;
+                       write_event_value_extension_1 = TRUE;
+                       event_value_extension_2  = GPOINTER_TO_UINT (next->data.address);
+                       write_event_value_extension_2 = TRUE;
+                       next ++;
                } else {
                        MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, event->code, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_CLASS_EVENT);
                }
        } else {
-               event_data = event->data.number;
-               MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, event->code, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+               if (event->code == MONO_PROFILER_EVENT_STACK_SECTION) {
+                       return write_stack_section_event (event, data);
+               } else {
+                       event_data = event->data.number;
+                       MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, event->code, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+               }
+       }
+       
+       /* Skip writing JIT events if the user did not ask for them */
+       if ((event->code == MONO_PROFILER_EVENT_METHOD_JIT) && ! profiler->action_flags.jit_time) {
+               return next;
        }
        
 #if (DEBUG_LOGGING_PROFILER)
@@ -1659,6 +2851,12 @@ write_event (ProfilerEventData *event) {
        write_uint64 (event_data);
        if (write_event_value) {
                write_uint64 (event_value);
+               if (write_event_value_extension_1) {
+                       write_uint64 (event_value_extension_1);
+               }
+               if (write_event_value_extension_2) {
+                       write_uint64 (event_value_extension_2);
+               }
        }
        
        return next;
@@ -1671,20 +2869,31 @@ write_thread_data_block (ProfilerPerThreadData *data) {
        
        if (start == end)
                return;
-       
+#if (DEBUG_FILE_WRITES)
+       printf ("write_thread_data_block: preparing buffer for thread %ld\n", (guint64) data->thread_id);
+#endif
        write_clock_data ();
        write_uint64 (data->thread_id);
        
        write_uint64 (data->start_event_counter);
        
+       /* If we are tracking the stack, make sure that stack sections */
+       /* can be fully reconstructed even reading only one block */
+       if (profiler->action_flags.track_stack) {
+               rewrite_last_written_stack (&(data->stack));
+       }
+       
        while (start < end) {
-               start = write_event (start);
+               start = write_event (start, data);
        }
        WRITE_BYTE (0);
        data->first_unwritten_event = end;
        
        write_clock_data ();
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_EVENTS);
+#if (DEBUG_FILE_WRITES)
+       printf ("write_thread_data_block: buffer for thread %ld written\n", (guint64) data->thread_id);
+#endif
 }
 
 static ProfilerExecutableMemoryRegionData*
@@ -1696,24 +2905,44 @@ profiler_executable_memory_region_new (gpointer *start, gpointer *end, guint32 f
        result->file_name = g_strdup (file_name);
        result->id = id;
        result->is_new = TRUE;
+       
+       result->file = NULL;
+       result->file_region_reference = NULL;
+       result->symbols_capacity = id;
+       result->symbols_count = id;
+       result->symbols = NULL;
+       
        return result;
 }
 
+static void
+executable_file_close (ProfilerExecutableMemoryRegionData *region);
+
 static void
 profiler_executable_memory_region_destroy (ProfilerExecutableMemoryRegionData *data) {
+       if (data->file != NULL) {
+               executable_file_close (data);
+               data->file = NULL;
+       }
+       if (data->symbols != NULL) {
+               g_free (data->symbols);
+               data->symbols = NULL;
+       }
        if (data->file_name != NULL) {
                g_free (data->file_name);
+               data->file_name = NULL;
        }
        g_free (data);
 }
 
 static ProfilerExecutableMemoryRegions*
-profiler_executable_memory_regions_new (int next_id) {
+profiler_executable_memory_regions_new (int next_id, int next_unmanaged_function_id) {
        ProfilerExecutableMemoryRegions *result = g_new (ProfilerExecutableMemoryRegions, 1);
        result->regions = g_new0 (ProfilerExecutableMemoryRegionData*, 32);
        result->regions_capacity = 32;
        result->regions_count = 0;
        result->next_id = next_id;
+       result->next_unmanaged_function_id = next_unmanaged_function_id;
        return result;
 }
 
@@ -1764,62 +2993,488 @@ find_address_region (ProfilerExecutableMemoryRegions *regions, gpointer address)
                }
        }
        
-       if ((middle_region == NULL) || (middle_region->start > address) || (middle_region->end < address)) {
-               return NULL;
-       } else {
-               return middle_region;
+       if ((middle_region == NULL) || (middle_region->start > address) || (middle_region->end < address)) {
+               return NULL;
+       } else {
+               return middle_region;
+       }
+}
+
+static void
+append_region (ProfilerExecutableMemoryRegions *regions, gpointer *start, gpointer *end, guint32 file_offset, char *file_name) {
+       if (regions->regions_count >= regions->regions_capacity) {
+               ProfilerExecutableMemoryRegionData **new_regions = g_new0 (ProfilerExecutableMemoryRegionData*, regions->regions_capacity * 2);
+               memcpy (new_regions, regions->regions, regions->regions_capacity * sizeof (ProfilerExecutableMemoryRegionData*));
+               g_free (regions->regions);
+               regions->regions = new_regions;
+               regions->regions_capacity = regions->regions_capacity * 2;
+       }
+       regions->regions [regions->regions_count] = profiler_executable_memory_region_new (start, end, file_offset, file_name, regions->next_id);
+       regions->regions_count ++;
+       regions->next_id ++;
+}
+
+static gboolean
+regions_are_equivalent (ProfilerExecutableMemoryRegionData *region1, ProfilerExecutableMemoryRegionData *region2) {
+       if ((region1->start == region2->start) &&
+                       (region1->end == region2->end) &&
+                       (region1->file_offset == region2->file_offset) &&
+                       ! strcmp (region1->file_name, region2->file_name)) {
+               return TRUE;
+       } else {
+               return FALSE;
+       }
+}
+
+static int
+compare_regions (const void *a1, const void *a2) {
+       ProfilerExecutableMemoryRegionData *r1 = * (ProfilerExecutableMemoryRegionData**) a1;
+       ProfilerExecutableMemoryRegionData *r2 = * (ProfilerExecutableMemoryRegionData**) a2;
+       return (r1->start < r2->start)? -1 : ((r1->start > r2->start)? 1 : 0);
+}
+
+static void
+restore_old_regions (ProfilerExecutableMemoryRegions *old_regions, ProfilerExecutableMemoryRegions *new_regions) {
+       int old_i;
+       int new_i;
+       
+       for (new_i = 0; new_i < new_regions->regions_count; new_i++) {
+               ProfilerExecutableMemoryRegionData *new_region = new_regions->regions [new_i];
+               for (old_i = 0; old_i < old_regions->regions_count; old_i++) {
+                       ProfilerExecutableMemoryRegionData *old_region = old_regions->regions [old_i];
+                       if ( regions_are_equivalent (old_region, new_region)) {
+                               new_regions->regions [new_i] = old_region;
+                               old_regions->regions [old_i] = new_region;
+                               
+                               // FIXME (sanity check)
+                               g_assert (new_region->is_new && ! old_region->is_new);
+                       }
+               }
+       }
+}
+
+static void
+sort_regions (ProfilerExecutableMemoryRegions *regions) {
+       if (regions->regions_count > 1) {
+               int i;
+               
+               qsort (regions->regions, regions->regions_count, sizeof (ProfilerExecutableMemoryRegionData *), compare_regions);
+               
+               i = 1;
+               while (i < regions->regions_count) {
+                       ProfilerExecutableMemoryRegionData *current_region = regions->regions [i];
+                       ProfilerExecutableMemoryRegionData *previous_region = regions->regions [i - 1];
+                       
+                       if (regions_are_equivalent (previous_region, current_region)) {
+                               int j;
+                               
+                               if (! current_region->is_new) {
+                                       profiler_executable_memory_region_destroy (previous_region);
+                                       regions->regions [i - 1] = current_region;
+                               } else {
+                                       profiler_executable_memory_region_destroy (current_region);
+                               }
+                               
+                               for (j = i + 1; j < regions->regions_count; j++) {
+                                       regions->regions [j - 1] = regions->regions [j];
+                               }
+                               
+                               regions->regions_count --;
+                       } else {
+                               i++;
+                       }
+               }
+       }
+}
+
+static void
+fix_region_references (ProfilerExecutableMemoryRegions *regions) {
+       int i;
+       for (i = 0; i < regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = regions->regions [i];
+               if (region->file_region_reference != NULL) {
+                       region->file_region_reference->region = region;
+               }
+       }
+}
+
+static void
+executable_file_add_region_reference (ProfilerExecutableFile *file, ProfilerExecutableMemoryRegionData *region) {
+       guint8 *section_headers = file->data + file->header->e_shoff;
+       int section_index;
+       
+       for (section_index = 1; section_index < file->header->e_shnum; section_index ++) {
+               ElfSection *section_header = (ElfSection*) (section_headers + (file->header->e_shentsize * section_index));
+               
+               if ((section_header->sh_addr != 0) && (section_header->sh_flags & ELF_SHF_EXECINSTR) &&
+                               (region->file_offset <= section_header->sh_offset) && (region->file_offset + (((guint8*)region->end)-((guint8*)region->start)) >= (section_header->sh_offset + section_header->sh_size))) {
+                       ProfilerExecutableFileSectionRegion *section_region = & (file->section_regions [section_index]);
+                       section_region->region = region;
+                       section_region->section_address = (gpointer) section_header->sh_addr;
+                       section_region->section_offset = section_header->sh_offset;
+                       region->file_region_reference = section_region;
+               }
+       }
+}
+
+static gboolean check_elf_header (ElfHeader* header) {
+       guint16 test = 0x0102;
+       
+       if ((header->e_ident [EI_MAG0] != 0x7f) || (header->e_ident [EI_MAG1] != 'E') ||
+                       (header->e_ident [EI_MAG2] != 'L') || (header->e_ident [EI_MAG3] != 'F')) {
+               return FALSE;
+       }
+
+       if (sizeof (gsize) == 4) {
+               if (header->e_ident [EI_CLASS] != ELF_CLASS_32) {
+                       g_warning ("Class is not ELF_CLASS_32 with gsize size %d", (int) sizeof (gsize));
+                       return FALSE;
+               }
+       } else if (sizeof (gsize) == 8) {
+               if (header->e_ident [EI_CLASS] != ELF_CLASS_64) {
+                       g_warning ("Class is not ELF_CLASS_64 with gsize size %d", (int) sizeof (gsize));
+                       return FALSE;
+               }
+       } else {
+               g_warning ("Absurd gsize size %d", (int) sizeof (gsize));
+               return FALSE;
+       }
+
+       if ((*(guint8*)(&test)) == 0x01) {
+               if (header->e_ident [EI_DATA] != ELF_DATA_MSB) {
+                       g_warning ("Data is not ELF_DATA_MSB with first test byte 0x01");
+                       return FALSE;
+               }
+       } else if ((*(guint8*)(&test)) == 0x02) {
+               if (header->e_ident [EI_DATA] != ELF_DATA_LSB) {
+                       g_warning ("Data is not ELF_DATA_LSB with first test byte 0x02");
+                       return FALSE;
+               }
+       } else {
+               g_warning ("Absurd test byte value");
+               return FALSE;
+       }
+       
+       return TRUE;
+}
+
+static gboolean check_elf_file (int fd) {
+       void *header = malloc (sizeof (ElfHeader));
+       ssize_t read_result = read (fd, header, sizeof (ElfHeader));
+       gboolean result;
+       
+       if (read_result != sizeof (ElfHeader)) {
+               result = FALSE;
+       } else {
+               result = check_elf_header ((ElfHeader*) header);
+       }
+       
+       free (header);
+       return result;
+}
+
+static ProfilerExecutableFile*
+executable_file_open (ProfilerExecutableMemoryRegionData *region) {
+       ProfilerExecutableFiles *files = & (profiler->executable_files);
+       ProfilerExecutableFile *file = region->file;
+       
+       if (file == NULL) {
+               file = (ProfilerExecutableFile*) g_hash_table_lookup (files->table, region->file_name);
+               
+               if (file == NULL) {
+                       struct stat stat_buffer;
+                       int symtab_index = 0;
+                       int strtab_index = 0;
+                       int dynsym_index = 0;
+                       int dynstr_index = 0;
+                       ElfHeader *header;
+                       guint8 *section_headers;
+                       int section_index;
+                       int strings_index;
+                       
+                       file = g_new0 (ProfilerExecutableFile, 1);
+                       region->file = file;
+                       g_hash_table_insert (files->table, region->file_name, file);
+                       file->reference_count ++;
+                       file->next_new_file = files->new_files;
+                       files->new_files = file;
+                       
+                       file->fd = open (region->file_name, O_RDONLY);
+                       if (file->fd == -1) {
+                               //g_warning ("Cannot open file '%s': '%s'", region->file_name, strerror (errno));
+                               return file;
+                       } else {
+                               if (fstat (file->fd, &stat_buffer) != 0) {
+                                       //g_warning ("Cannot stat file '%s': '%s'", region->file_name, strerror (errno));
+                                       return file;
+                               } else if (! check_elf_file (file->fd)) {
+                                       return file;
+                               } else {
+                                       size_t region_length = ((guint8*)region->end) - ((guint8*)region->start);
+                                       file->length = stat_buffer.st_size;
+                                       
+                                       if (file->length == region_length) {
+                                               file->data = region->start;
+                                               close (file->fd);
+                                               file->fd = -1;
+                                       } else {
+                                               file->data = mmap (NULL, file->length, PROT_READ, MAP_PRIVATE, file->fd, 0);
+                                               
+                                               if (file->data == MAP_FAILED) {
+                                                       close (file->fd);
+                                                       //g_warning ("Cannot map file '%s': '%s'", region->file_name, strerror (errno));
+                                                       file->data = NULL;
+                                                       return file;
+                                               }
+                                       }
+                               }
+                       }
+                       
+                       /* OK, this is a usable elf file, and we mmapped it... */
+                       header = (ElfHeader*) file->data;
+                       file->header = header;
+                       section_headers = file->data + file->header->e_shoff;
+                       file->main_string_table = ((const char*) file->data) + (((ElfSection*) (section_headers + (header->e_shentsize * header->e_shstrndx)))->sh_offset);
+                       
+                       for (section_index = 0; section_index < header->e_shnum; section_index ++) {
+                               ElfSection *section_header = (ElfSection*) (section_headers + (header->e_shentsize * section_index));
+                               
+                               if (section_header->sh_type == ELF_SHT_SYMTAB) {
+                                       symtab_index = section_index;
+                               } else if (section_header->sh_type == ELF_SHT_DYNSYM) {
+                                       dynsym_index = section_index;
+                               } else if (section_header->sh_type == ELF_SHT_STRTAB) {
+                                       if (! strcmp (file->main_string_table + section_header->sh_name, ".strtab")) {
+                                               strtab_index = section_index;
+                                       } else if (! strcmp (file->main_string_table + section_header->sh_name, ".dynstr")) {
+                                               dynstr_index = section_index;
+                                       }
+                               }
+                       }
+                       
+                       if ((symtab_index != 0) && (strtab_index != 0)) {
+                               section_index = symtab_index;
+                               strings_index = strtab_index;
+                       } else if ((dynsym_index != 0) && (dynstr_index != 0)) {
+                               section_index = dynsym_index;
+                               strings_index = dynstr_index;
+                       } else {
+                               section_index = 0;
+                               strings_index = 0;
+                       }
+                       
+                       if (section_index != 0) {
+                               ElfSection *section_header = (ElfSection*) (section_headers + (header->e_shentsize * section_index));
+                               file->symbol_size = section_header->sh_entsize;
+                               file->symbols_count = (guint32) (section_header->sh_size / section_header->sh_entsize);
+                               file->symbols_start = file->data + section_header->sh_offset;
+                               file->symbols_string_table = ((const char*) file->data) + (((ElfSection*) (section_headers + (header->e_shentsize * strings_index)))->sh_offset);
+                       }
+                       
+                       file->section_regions = g_new0 (ProfilerExecutableFileSectionRegion, file->header->e_shnum);
+               } else {
+                       region->file = file;
+                       file->reference_count ++;
+               }
+       }
+       
+       if (file->header != NULL) {
+               executable_file_add_region_reference (file, region);
+       }
+       
+       return file;
+}
+
+static void
+executable_file_free (ProfilerExecutableFile* file) {
+       if (file->fd != -1) {
+               if (close (file->fd) != 0) {
+                       g_warning ("Cannot close file: '%s'", strerror (errno));
+               }
+               if (file->data != NULL) {
+                       if (munmap (file->data, file->length) != 0) {
+                               g_warning ("Cannot unmap file: '%s'", strerror (errno));
+                       }
+               }
+       }
+       if (file->section_regions != NULL) {
+               g_free (file->section_regions);
+               file->section_regions = NULL;
+       }
+       g_free (file);
+}
+
+static void
+executable_file_close (ProfilerExecutableMemoryRegionData *region) {
+       region->file->reference_count --;
+       
+       if ((region->file_region_reference != NULL) && (region->file_region_reference->region == region)) {
+               region->file_region_reference->region = NULL;
+               region->file_region_reference->section_address = 0;
+               region->file_region_reference->section_offset = 0;
+       }
+       
+       if (region->file->reference_count <= 0) {
+               ProfilerExecutableFiles *files = & (profiler->executable_files);
+               g_hash_table_remove (files->table, region->file_name);
+               executable_file_free (region->file);
+               region->file = NULL;
        }
 }
 
 static void
-append_region (ProfilerExecutableMemoryRegions *regions, gpointer *start, gpointer *end, guint32 file_offset, char *file_name) {
-       if (regions->regions_count >= regions->regions_capacity) {
-               ProfilerExecutableMemoryRegionData **new_regions = g_new0 (ProfilerExecutableMemoryRegionData*, regions->regions_capacity * 2);
-               memcpy (new_regions, regions->regions, regions->regions_capacity * sizeof (ProfilerExecutableMemoryRegionData*));
-               g_free (regions->regions);
-               regions->regions = new_regions;
-               regions->regions_capacity = regions->regions_capacity * 2;
+executable_file_count_symbols (ProfilerExecutableFile *file) {
+       int symbol_index;
+       
+       for (symbol_index = 0; symbol_index < file->symbols_count; symbol_index ++) {
+               ElfSymbol *symbol = (ElfSymbol*) (file->symbols_start + (symbol_index * file->symbol_size));
+               
+               if ((ELF_ST_TYPE (symbol->st_info) == ELF_STT_FUNC) &&
+                               (symbol->st_shndx > 0) &&
+                               (symbol->st_shndx < file->header->e_shnum)) {
+                       int symbol_section_index = symbol->st_shndx;
+                       ProfilerExecutableMemoryRegionData *region = file->section_regions [symbol_section_index].region;
+                       if ((region != NULL) && (region->symbols == NULL)) {
+                               region->symbols_count ++;
+                       }
+               }
        }
-       regions->regions [regions->regions_count] = profiler_executable_memory_region_new (start, end, file_offset, file_name, regions->next_id);
-       regions->regions_count ++;
-       regions->next_id ++;
 }
 
 static void
-restore_region_ids (ProfilerExecutableMemoryRegions *old_regions, ProfilerExecutableMemoryRegions *new_regions) {
-       int old_i;
-       int new_i;
+executable_memory_regions_prepare_symbol_tables (ProfilerExecutableMemoryRegions *regions) {
+       int i;
+       for (i = 0; i < regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = regions->regions [i];
+               if ((region->symbols_count > 0) && (region->symbols == NULL)) {
+                       region->symbols = g_new (ProfilerUnmanagedSymbol, region->symbols_count);
+                       region->symbols_capacity = region->symbols_count;
+                       region->symbols_count = 0;
+               }
+       }
+}
+
+static const char*
+executable_region_symbol_get_name (ProfilerExecutableMemoryRegionData *region, ProfilerUnmanagedSymbol *symbol) {
+       ElfSymbol *elf_symbol = (ElfSymbol*) (region->file->symbols_start + (symbol->index * region->file->symbol_size));
+       return region->file->symbols_string_table + elf_symbol->st_name;
+}
+
+static void
+executable_file_build_symbol_tables (ProfilerExecutableFile *file) {
+       int symbol_index;
        
-       for (old_i = 0; old_i < old_regions->regions_count; old_i++) {
-               ProfilerExecutableMemoryRegionData *old_region = old_regions->regions [old_i];
-               for (new_i = 0; new_i < new_regions->regions_count; new_i++) {
-                       ProfilerExecutableMemoryRegionData *new_region = new_regions->regions [new_i];
-                       if ((old_region->start == new_region->start) &&
-                                       (old_region->end == new_region->end) &&
-                                       (old_region->file_offset == new_region->file_offset) &&
-                                       ! strcmp (old_region->file_name, new_region->file_name)) {
-                               new_region->is_new = FALSE;
-                               new_region->id = old_region->id;
-                               old_region->is_new = TRUE;
+       for (symbol_index = 0; symbol_index < file->symbols_count; symbol_index ++) {
+               ElfSymbol *symbol = (ElfSymbol*) (file->symbols_start + (symbol_index * file->symbol_size));
+               
+               if ((ELF_ST_TYPE (symbol->st_info) == ELF_STT_FUNC) &&
+                               (symbol->st_shndx > 0) &&
+                               (symbol->st_shndx < file->header->e_shnum)) {
+                       int symbol_section_index = symbol->st_shndx;
+                       ProfilerExecutableFileSectionRegion *section_region = & (file->section_regions [symbol_section_index]);
+                       ProfilerExecutableMemoryRegionData *region = section_region->region;
+                       
+                       if (region != NULL) {
+                               ProfilerUnmanagedSymbol *new_symbol = & (region->symbols [region->symbols_count]);
+                               region->symbols_count ++;
+                               
+                               new_symbol->id = 0;
+                               new_symbol->index = symbol_index;
+                               new_symbol->size = symbol->st_size;
+                               new_symbol->offset = (((guint8*) symbol->st_value) - section_region->section_address) - (region->file_offset - section_region->section_offset);
                        }
                }
        }
 }
 
 static int
-compare_regions (const void *a1, const void *a2) {
-       ProfilerExecutableMemoryRegionData *r1 = * (ProfilerExecutableMemoryRegionData**) a1;
-       ProfilerExecutableMemoryRegionData *r2 = * (ProfilerExecutableMemoryRegionData**) a2;
-       return (r1->start < r2->start)? -1 : ((r1->start > r2->start)? 1 : 0);
+compare_region_symbols (const void *p1, const void *p2) {
+       const ProfilerUnmanagedSymbol *s1 = p1;
+       const ProfilerUnmanagedSymbol *s2 = p2;
+       return (s1->offset < s2->offset)? -1 : ((s1->offset > s2->offset)? 1 : 0);
 }
 
 static void
-sort_regions (ProfilerExecutableMemoryRegions *regions) {
-       qsort (regions->regions, regions->regions_count, sizeof (ProfilerExecutableMemoryRegionData *), compare_regions);
+executable_memory_regions_sort_symbol_tables (ProfilerExecutableMemoryRegions *regions) {
+       int i;
+       for (i = 0; i < regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = regions->regions [i];
+               if ((region->is_new) && (region->symbols != NULL)) {
+                       qsort (region->symbols, region->symbols_count, sizeof (ProfilerUnmanagedSymbol), compare_region_symbols);
+               }
+       }
+}
+
+static void
+build_symbol_tables (ProfilerExecutableMemoryRegions *regions, ProfilerExecutableFiles *files) {
+       int i;
+       ProfilerExecutableFile *file;
+       
+       for (i = 0; i < regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = regions->regions [i];
+               if ((region->is_new) && (region->file == NULL)) {
+                       executable_file_open (region);
+               }
+       }
+       
+       for (file = files->new_files; file != NULL; file = file->next_new_file) {
+               executable_file_count_symbols (file);
+       }
+       
+       executable_memory_regions_prepare_symbol_tables (regions);
+       
+       for (file = files->new_files; file != NULL; file = file->next_new_file) {
+               executable_file_build_symbol_tables (file);
+       }
+       
+       executable_memory_regions_sort_symbol_tables (regions);
+       
+       file = files->new_files;
+       while (file != NULL) {
+               ProfilerExecutableFile *next_file = file->next_new_file;
+               file->next_new_file = NULL;
+               file = next_file;
+       }
+       files->new_files = NULL;
+}
+
+static ProfilerUnmanagedSymbol*
+executable_memory_region_find_symbol (ProfilerExecutableMemoryRegionData *region, guint32 offset) {
+       if (region->symbols_count > 0) {
+               ProfilerUnmanagedSymbol *low = region->symbols;
+               ProfilerUnmanagedSymbol *high = region->symbols + (region->symbols_count - 1);
+               int step = region->symbols_count >> 1;
+               ProfilerUnmanagedSymbol *current = region->symbols + step;
+               
+               do {
+                       step = (high - low) >> 1;
+                       
+                       if (offset < current->offset) {
+                               high = current;
+                               current = high - step;
+                       } else if (offset >= current->offset) {
+                               if (offset >= (current->offset + current->size)) {
+                                       low = current;
+                                       current = low + step;
+                               } else {
+                                       return current;
+                               }
+                       }
+               } while (step > 0);
+               
+               if ((offset >= current->offset) && (offset < (current->offset + current->size))) {
+                       return current;
+               } else {
+                       return NULL;
+               }
+       } else {
+               return NULL;
+       }
 }
 
 //FIXME: make also Win32 and BSD variants
 #define MAPS_BUFFER_SIZE 4096
+#define MAPS_FILENAME_SIZE 2048
 
 static gboolean
 update_regions_buffer (int fd, char *buffer) {
@@ -1848,9 +3503,9 @@ static int hex_digit_value (char c) {
        if ((c >= '0') && (c <= '9')) {
                return c - '0';
        } else if ((c >= 'a') && (c <= 'f')) {
-               return c - 'a';
+               return c - 'a' + 10;
        } else if ((c >= 'A') && (c <= 'F')) {
-               return c - 'A';
+               return c - 'A' + 10;
        } else {
                return 0;
        }
@@ -1898,13 +3553,12 @@ const char *map_line_parser_state [] = {
 };
 
 static char*
-parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer, char *current) {
+parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer, char *filename, char *current) {
        MapLineParserState state = MAP_LINE_PARSER_STATE_START_ADDRESS;
        gsize start_address = 0;
        gsize end_address = 0;
        guint32 offset = 0;
-       char *start_filename = NULL;
-       char *end_filename = NULL;
+       int filename_index = 0;
        gboolean is_executable = FALSE;
        gboolean done = FALSE;
        
@@ -1966,24 +3620,33 @@ parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer,
                        }
                        break;
                case MAP_LINE_PARSER_STATE_BLANK_BEFORE_FILENAME:
-                       if (c == '/') {
+                       if ((c == '/') || (c == '[')) {
                                state = MAP_LINE_PARSER_STATE_FILENAME;
-                               start_filename = current;
+                               filename [filename_index] = *current;
+                               filename_index ++;
                        } else if (! isblank (c)) {
                                state = MAP_LINE_PARSER_STATE_INVALID;
                        }
                        break;
                case MAP_LINE_PARSER_STATE_FILENAME:
-                       if (c == '\n') {
-                               state = MAP_LINE_PARSER_STATE_DONE;
-                               done = TRUE;
-                               end_filename = current;
+                       if (filename_index < MAPS_FILENAME_SIZE) {
+                               if (c == '\n') {
+                                       state = MAP_LINE_PARSER_STATE_DONE;
+                                       done = TRUE;
+                                       filename [filename_index] = 0;
+                               } else {
+                                       filename [filename_index] = *current;
+                                       filename_index ++;
+                               }
+                       } else {
+                               filename [filename_index] = 0;
+                               g_warning ("ELF filename too long: \"%s\"...\n", filename);
                        }
                        break;
                case MAP_LINE_PARSER_STATE_DONE:
                        if (done && is_executable) {
-                               *end_filename = 0;
-                               append_region (regions, (gpointer) start_address, (gpointer) end_address, offset, start_filename);
+                               filename [filename_index] = 0;
+                               append_region (regions, (gpointer) start_address, (gpointer) end_address, offset, filename);
                        }
                        return current;
                case MAP_LINE_PARSER_STATE_INVALID:
@@ -1993,9 +3656,10 @@ parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer,
                        break;
                }
                
-               
                if (c == 0) {
                        return NULL;
+               } else if (c == '\n') {
+                       state = MAP_LINE_PARSER_STATE_DONE;
                }
                
                GOTO_NEXT_CHAR(current, buffer, fd);
@@ -2006,6 +3670,7 @@ parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer,
 static gboolean
 scan_process_regions (ProfilerExecutableMemoryRegions *regions) {
        char *buffer;
+       char *filename;
        char *current;
        int fd;
        
@@ -2015,13 +3680,15 @@ scan_process_regions (ProfilerExecutableMemoryRegions *regions) {
        }
        
        buffer = malloc (MAPS_BUFFER_SIZE);
+       filename = malloc (MAPS_FILENAME_SIZE);
        update_regions_buffer (fd, buffer);
        current = buffer;
        while (current != NULL) {
-               current = parse_map_line (regions, fd, buffer, current);
+               current = parse_map_line (regions, fd, buffer, filename, current);
        }
        
        free (buffer);
+       free (filename);
        
        close (fd);
        return TRUE;
@@ -2032,22 +3699,55 @@ typedef enum {
        MONO_PROFILER_STATISTICAL_CODE_END = 0,
        MONO_PROFILER_STATISTICAL_CODE_METHOD = 1,
        MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_ID = 2,
-       MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_IN_REGION = 3,
+       MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_NEW_ID = 3,
+       MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_OFFSET_IN_REGION = 4,
+       MONO_PROFILER_STATISTICAL_CODE_CALL_CHAIN = 5,
        MONO_PROFILER_STATISTICAL_CODE_REGIONS = 7
 } MonoProfilerStatisticalCode;
 
 static void
 refresh_memory_regions (void) {
        ProfilerExecutableMemoryRegions *old_regions = profiler->executable_regions;
-       ProfilerExecutableMemoryRegions *new_regions = profiler_executable_memory_regions_new (old_regions->next_id);
+       ProfilerExecutableMemoryRegions *new_regions = profiler_executable_memory_regions_new (old_regions->next_id, old_regions->next_unmanaged_function_id);
        int i;
        
        LOG_WRITER_THREAD ("Refreshing memory regions...");
        scan_process_regions (new_regions);
-       restore_region_ids (old_regions, new_regions);
        sort_regions (new_regions);
+       restore_old_regions (old_regions, new_regions);
+       fix_region_references (new_regions);
        LOG_WRITER_THREAD ("Refreshed memory regions.");
        
+       LOG_WRITER_THREAD ("Building symbol tables...");
+       build_symbol_tables (new_regions, & (profiler->executable_files));
+#if 0
+       printf ("Symbol tables done!\n");
+       printf ("Region summary...\n");
+       for (i = 0; i < new_regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = new_regions->regions [i];
+               printf ("Region %d[%d][NEW:%d] (%p-%p) at %d in file %s\n", i, region->id, region->is_new,
+                               region->start, region->end, region->file_offset, region->file_name);
+       }
+       printf ("New symbol tables dump...\n");
+       for (i = 0; i < new_regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = new_regions->regions [i];
+               
+               if (region->is_new) {
+                       int symbol_index;
+                       
+                       printf ("Region %d[%d][NEW:%d] (%p-%p) at %d in file %s\n", i, region->id, region->is_new,
+                                       region->start, region->end, region->file_offset, region->file_name);
+                       for (symbol_index = 0; symbol_index < region->symbols_count; symbol_index ++) {
+                               ProfilerUnmanagedSymbol *symbol = & (region->symbols [symbol_index]);
+                               printf ("  [%d] Symbol %s (offset %d, size %d)\n", symbol_index,
+                                               executable_region_symbol_get_name (region, symbol),
+                                               symbol->offset, symbol->size);
+                       }
+               }
+       }
+#endif
+       LOG_WRITER_THREAD ("Built symbol tables.");
+       
        // This marks the region "sub-block"
        write_uint32 (MONO_PROFILER_STATISTICAL_CODE_REGIONS);
        
@@ -2073,8 +3773,8 @@ refresh_memory_regions (void) {
                        printf ("[refresh_memory_regions] Wrote region %d (%p-%p[%d] '%s')\n", region->id, region->start, region->end, region->file_offset, region->file_name);
 #endif
                        write_uint32 (region->id);
-                       write_uint64 (GPOINTER_TO_INT (region->start));
-                       write_uint32 (GPOINTER_TO_INT (region->end) - GPOINTER_TO_INT (region->start));
+                       write_uint64 (GPOINTER_TO_UINT (region->start));
+                       write_uint32 (GPOINTER_TO_UINT (region->end) - GPOINTER_TO_UINT (region->start));
                        write_uint32 (region->file_offset);
                        write_string (region->file_name);
                }
@@ -2086,6 +3786,78 @@ refresh_memory_regions (void) {
        profiler->executable_regions = new_regions;
 }
 
+static gboolean
+write_statistical_hit (gpointer address, gboolean regions_refreshed) {
+       ProfilerCodeBuffer *code_buffer = profiler_code_buffer_from_address (profiler, address);
+       
+       if ((code_buffer != NULL) && (code_buffer->info.type == MONO_PROFILER_CODE_BUFFER_METHOD)) {
+               MonoMethod *method = code_buffer->info.data.method;
+               MethodIdMappingElement *element = method_id_mapping_element_get (method);
+               
+               if (element != NULL) {
+#if DEBUG_STATISTICAL_PROFILER
+                       printf ("[write_statistical_hit] Wrote method %d\n", element->id);
+#endif
+                       write_uint32 ((element->id << 3) | MONO_PROFILER_STATISTICAL_CODE_METHOD);
+               } else {
+#if DEBUG_STATISTICAL_PROFILER
+                       printf ("[write_statistical_hit] Wrote unknown method %p\n", method);
+#endif
+                       write_uint32 (MONO_PROFILER_STATISTICAL_CODE_METHOD);
+               }
+       } else {
+               ProfilerExecutableMemoryRegionData *region = find_address_region (profiler->executable_regions, address);
+               
+               if (region == NULL && ! regions_refreshed) {
+#if DEBUG_STATISTICAL_PROFILER
+                       printf ("[write_statistical_hit] Cannot find region for address %p, refreshing...\n", address);
+#endif
+                       refresh_memory_regions ();
+                       regions_refreshed = TRUE;
+                       region = find_address_region (profiler->executable_regions, address);
+               }
+               
+               if (region != NULL) {
+                       guint32 offset = ((guint8*)address) - ((guint8*)region->start);
+                       ProfilerUnmanagedSymbol *symbol = executable_memory_region_find_symbol (region, offset);
+                       
+                       if (symbol != NULL) {
+                               if (symbol->id > 0) {
+#if DEBUG_STATISTICAL_PROFILER
+                                       printf ("[write_statistical_hit] Wrote unmanaged symbol %d\n", symbol->id);
+#endif
+                                       write_uint32 ((symbol->id << 3) | MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_ID);
+                               } else {
+                                       ProfilerExecutableMemoryRegions *regions = profiler->executable_regions;
+                                       const char *symbol_name = executable_region_symbol_get_name (region, symbol);
+                                       symbol->id = regions->next_unmanaged_function_id;
+                                       regions->next_unmanaged_function_id ++;
+#if DEBUG_STATISTICAL_PROFILER
+                                       printf ("[write_statistical_hit] Wrote new unmanaged symbol in region %d[%d]\n", region->id, offset);
+#endif
+                                       write_uint32 ((region->id << 3) | MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_NEW_ID);
+                                       write_uint32 (symbol->id);
+                                       write_string (symbol_name);
+                               }
+                       } else {
+#if DEBUG_STATISTICAL_PROFILER
+                               printf ("[write_statistical_hit] Wrote unknown unmanaged hit in region %d[%d] (address %p)\n", region->id, offset, address);
+#endif
+                               write_uint32 ((region->id << 3) | MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_OFFSET_IN_REGION);
+                               write_uint32 (offset);
+                       }
+               } else {
+#if DEBUG_STATISTICAL_PROFILER
+                       printf ("[write_statistical_hit] Wrote unknown unmanaged hit %p\n", address);
+#endif
+                       write_uint32 (MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_OFFSET_IN_REGION);
+                       write_uint64 (GPOINTER_TO_UINT (address));
+               }
+       }
+       
+       return regions_refreshed;
+}
+
 static void
 flush_all_mappings (void);
 
@@ -2094,8 +3866,8 @@ write_statistical_data_block (ProfilerStatisticalData *data) {
        int start_index = data->first_unwritten_index;
        int end_index = data->next_free_index;
        gboolean regions_refreshed = FALSE;
+       int call_chain_depth = profiler->statistical_call_chain_depth;
        int index;
-       ProfilerUnmanagedFunctions *functions = &(profiler->unmanaged_functions);
        
        if (end_index > data->end_index)
                end_index = data->end_index;
@@ -2107,75 +3879,43 @@ write_statistical_data_block (ProfilerStatisticalData *data) {
        
        write_clock_data ();
        
-       for (index = start_index; index < end_index; index ++) {
-               gpointer address = data->addresses [index];
-               MonoJitInfo *ji = mono_jit_info_table_find (mono_domain_get (), (char*) address);
-               
-               if (ji != NULL) {
-                       MonoMethod *method = mono_jit_info_get_method (ji);
-                       MethodIdMappingElement *element = method_id_mapping_element_get (method);
-                       
-                       if (element != NULL) {
-#if DEBUG_STATISTICAL_PROFILER
-                               printf ("[write_statistical_data_block] Wrote method %d\n", element->id);
-#endif
-                               write_uint32 ((element->id << 3) | MONO_PROFILER_STATISTICAL_CODE_METHOD);
-                       } else {
-#if DEBUG_STATISTICAL_PROFILER
-                               printf ("[write_statistical_data_block] Wrote unknown method %p\n", method);
-#endif
-                               write_uint32 (MONO_PROFILER_STATISTICAL_CODE_METHOD);
-                       }
-               } else {
-                       if (! unmanaged_function_hit (functions, address)) {
-                               ProfilerExecutableMemoryRegionData *region = find_address_region (profiler->executable_regions, address);
-                               
-                               if (region == NULL && ! regions_refreshed) {
-                                       refresh_memory_regions ();
-                                       regions_refreshed = TRUE;
-                                       region = find_address_region (profiler->executable_regions, address);
-                               }
-                               
-                               if (region != NULL) {
-#if DEBUG_STATISTICAL_PROFILER
-                                       printf ("[write_statistical_data_block] Wrote unmanaged hit %d[%d]\n", region->id, GPOINTER_TO_INT (address) - GPOINTER_TO_INT (region->start));
-#endif
-                                       write_uint32 ((region->id << 3) | MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_IN_REGION);
-                                       write_uint32 (GPOINTER_TO_INT (address) - GPOINTER_TO_INT (region->start));
-                               } else {
 #if DEBUG_STATISTICAL_PROFILER
-                                       printf ("[write_statistical_data_block] Wrote unknown unmanaged hit %p\n", address);
+       printf ("[write_statistical_data_block] Starting loop at index %d\n", start_index);
 #endif
-                                       write_uint32 (MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_IN_REGION);
-                                       write_uint64 (GPOINTER_TO_INT (address));
-                               }
+       
+       for (index = start_index; index < end_index; index ++) {
+               int base_index = index * (call_chain_depth + 1);
+               ProfilerStatisticalHit hit = data->hits [base_index];
+               int callers_count;
+               
+               regions_refreshed = write_statistical_hit (hit.address, regions_refreshed);
+               base_index ++;
+               
+               for (callers_count = 0; callers_count < call_chain_depth; callers_count ++) {
+                       hit = data->hits [base_index + callers_count];
+                       if (hit.address == NULL) {
+                               break;
                        }
                }
-       }
-       if (functions->unwritten_queue != functions->unwritten_queue_end) {
-               ProfilerUnmanagedFunction *end = functions->unwritten_queue_end;
-               ProfilerUnmanagedFunction *function = functions->unwritten_queue;
-               functions->unwritten_queue = functions->unwritten_queue_end;
                
-               while (function != end) {
-                       ProfilerUnmanagedFunction *next = function->next_unwritten;
-                       
-                       write_uint32 ((function->id << 3) | MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_ID);
-                       if (function->name != NULL) {
-                               write_uint32 (0);
-                               write_string (function->name);
-                               g_free (function->name);
-                               function->name = NULL;
-                       }
-                       write_uint32 (function->hits);
-                       function->hits = 0;
+               if (callers_count > 0) {
+                       write_uint32 ((callers_count << 3) | MONO_PROFILER_STATISTICAL_CODE_CALL_CHAIN);
                        
-                       function->next_unwritten = NULL;
-                       function = next;
+                       for (callers_count = 0; callers_count < call_chain_depth; callers_count ++) {
+                               hit = data->hits [base_index + callers_count];
+                               if (hit.address != NULL) {
+                                       regions_refreshed = write_statistical_hit (hit.address, regions_refreshed);
+                               } else {
+                                       break;
+                               }
+                       }
                }
        }
        write_uint32 (MONO_PROFILER_STATISTICAL_CODE_END);
        
+#if DEBUG_STATISTICAL_PROFILER
+       printf ("[write_statistical_data_block] Ending loop at index %d\n", end_index);
+#endif
        write_clock_data ();
        
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_STATISTICAL);
@@ -2222,10 +3962,15 @@ update_mapping (ProfilerPerThreadData *data) {
                        MethodIdMappingElement *element = method_id_mapping_element_get (start->data.address);
                        if (element == NULL) {
                                MonoMethod *method = start->data.address;
-                               method_id_mapping_element_new (method);
+                               if (method != NULL) {
+                                       method_id_mapping_element_new (method);
+                               }
                        }
                }
                
+               if (start->value == MAX_EVENT_VALUE) {
+                       start ++;
+               }
                start ++;
        }
 #if (DEBUG_LOGGING_PROFILER)
@@ -2252,11 +3997,12 @@ flush_full_event_data_buffer (ProfilerPerThreadData *data) {
        // We flush all mappings because some id definitions could come
        // from other threads
        flush_all_mappings ();
-       g_assert (data->first_unmapped_event == data->end_event);
+       g_assert (data->first_unmapped_event >= data->next_free_event);
        
        write_thread_data_block (data);
        
        data->next_free_event = data->events;
+       data->next_unreserved_event = data->events;
        data->first_unwritten_event = data->events;
        data->first_unmapped_event = data->events;
        MONO_PROFILER_GET_CURRENT_COUNTER (data->start_event_counter);
@@ -2265,12 +4011,17 @@ flush_full_event_data_buffer (ProfilerPerThreadData *data) {
        UNLOCK_PROFILER ();
 }
 
-#define GET_NEXT_FREE_EVENT(d,e) {\
-       if ((d)->next_free_event >= (d)->end_event) {\
+/* The ">=" operator is intentional, to leave one spare slot for "extended values" */
+#define RESERVE_EVENTS(d,e,count) do {\
+       if ((d)->next_unreserved_event >= ((d)->end_event - (count))) {\
                flush_full_event_data_buffer (d);\
        }\
-       (e) = (d)->next_free_event;\
-       (d)->next_free_event ++;\
+       (e) = (d)->next_unreserved_event;\
+       (d)->next_unreserved_event += (count);\
+} while (0)
+#define GET_NEXT_FREE_EVENT(d,e) RESERVE_EVENTS ((d),(e),1)
+#define COMMIT_RESERVED_EVENTS(d) do {\
+       data->next_free_event = data->next_unreserved_event;\
 } while (0)
 
 static void
@@ -2300,7 +4051,7 @@ appdomain_end_load (MonoProfiler *profiler, MonoDomain *domain, int result) {
        name = g_strdup_printf ("%d", mono_domain_get_id (domain));
        LOCK_PROFILER ();
        element = loaded_element_load_end (profiler->loaded_appdomains, domain, name);
-       write_element_load_block (element, MONO_PROFILER_LOADED_EVENT_APPDOMAIN | RESULT_TO_LOAD_CODE (result), CURRENT_THREAD_ID ());
+       write_element_load_block (element, MONO_PROFILER_LOADED_EVENT_APPDOMAIN | RESULT_TO_LOAD_CODE (result), CURRENT_THREAD_ID (), domain);
        UNLOCK_PROFILER ();
 }
 
@@ -2335,11 +4086,14 @@ module_end_load (MonoProfiler *profiler, MonoImage *module, int result) {
        MonoAssemblyName aname;
        LoadedElement *element;
        
-       mono_assembly_fill_assembly_name (module, &aname);
-       name = mono_stringify_assembly_name (&aname);
+       if (mono_assembly_fill_assembly_name (module, &aname)) {
+               name = mono_stringify_assembly_name (&aname);
+       } else {
+               name = g_strdup_printf ("Dynamic module \"%p\"", module);
+       }
        LOCK_PROFILER ();
        element = loaded_element_load_end (profiler->loaded_modules, module, name);
-       write_element_load_block (element, MONO_PROFILER_LOADED_EVENT_MODULE | RESULT_TO_LOAD_CODE (result), CURRENT_THREAD_ID ());
+       write_element_load_block (element, MONO_PROFILER_LOADED_EVENT_MODULE | RESULT_TO_LOAD_CODE (result), CURRENT_THREAD_ID (), module);
        UNLOCK_PROFILER ();
 }
 
@@ -2374,11 +4128,14 @@ assembly_end_load (MonoProfiler *profiler, MonoAssembly *assembly, int result) {
        MonoAssemblyName aname;
        LoadedElement *element;
        
-       mono_assembly_fill_assembly_name (mono_assembly_get_image (assembly), &aname);
-       name = mono_stringify_assembly_name (&aname);
+       if (mono_assembly_fill_assembly_name (mono_assembly_get_image (assembly), &aname)) {
+               name = mono_stringify_assembly_name (&aname);
+       } else {
+               name = g_strdup_printf ("Dynamic assembly \"%p\"", assembly);
+       }
        LOCK_PROFILER ();
        element = loaded_element_load_end (profiler->loaded_assemblies, assembly, name);
-       write_element_load_block (element, MONO_PROFILER_LOADED_EVENT_ASSEMBLY | RESULT_TO_LOAD_CODE (result), CURRENT_THREAD_ID ());
+       write_element_load_block (element, MONO_PROFILER_LOADED_EVENT_ASSEMBLY | RESULT_TO_LOAD_CODE (result), CURRENT_THREAD_ID (), assembly);
        UNLOCK_PROFILER ();
 }
 
@@ -2411,11 +4168,14 @@ class_event_code_to_string (MonoProfilerClassEvents code) {
        }
 }
 static const char*
-method_event_code_to_string (MonoProfilerClassEvents code) {
+method_event_code_to_string (MonoProfilerMethodEvents code) {
        switch (code) {
        case MONO_PROFILER_EVENT_METHOD_CALL: return "CALL";
        case MONO_PROFILER_EVENT_METHOD_JIT: return "JIT";
        case MONO_PROFILER_EVENT_METHOD_FREED: return "FREED";
+       case MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER: return "ALLOCATION_CALLER";
+       case MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER: return "ALLOCATION_JIT_TIME_CALLER";
+       case MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID: return "ALLOCATION_OBJECT_ID";
        default: g_assert_not_reached (); return "";
        }
 }
@@ -2429,6 +4189,9 @@ number_event_code_to_string (MonoProfilerEvents code) {
        case MONO_PROFILER_EVENT_GC_RESIZE: return "GC_RESIZE";
        case MONO_PROFILER_EVENT_GC_STOP_WORLD: return "GC_STOP_WORLD";
        case MONO_PROFILER_EVENT_GC_START_WORLD: return "GC_START_WORLD";
+       case MONO_PROFILER_EVENT_JIT_TIME_ALLOCATION: return "JIT_TIME_ALLOCATION";
+       case MONO_PROFILER_EVENT_STACK_SECTION: return "STACK_SECTION";
+       case MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID: return "ALLOCATION_OBJECT_ID";
        default: g_assert_not_reached (); return "";
        }
 }
@@ -2449,12 +4212,12 @@ event_kind_to_string (MonoProfilerEventKind code) {
        }
 }
 static void
-print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
+print_event_data (ProfilerPerThreadData *data, ProfilerEventData *event, guint64 value) {
        if (event->data_type == MONO_PROFILER_EVENT_DATA_TYPE_CLASS) {
-               printf ("[TID %ld] CLASS[%p] event [%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s)\n",
-                               thread_id,
+               printf ("STORE EVENT [TID %ld][EVENT %ld] CLASS[%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s)\n",
+                               data->thread_id,
+                               event - data->events,
                                event->data.address,
-                               event,
                                class_event_code_to_string (event->code & ~MONO_PROFILER_EVENT_RESULT_MASK),
                                event_result_to_string (event->code & MONO_PROFILER_EVENT_RESULT_MASK),
                                event_kind_to_string (event->kind),
@@ -2465,10 +4228,10 @@ print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
                                mono_class_get_namespace ((MonoClass*) event->data.address),
                                mono_class_get_name ((MonoClass*) event->data.address));
        } else if (event->data_type == MONO_PROFILER_EVENT_DATA_TYPE_METHOD) {
-               printf ("[TID %ld] METHOD[%p] event [%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s:%s (?))\n",
-                               thread_id,
+               printf ("STORE EVENT [TID %ld][EVENT %ld]  METHOD[%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s:%s (?))\n",
+                               data->thread_id,
+                               event - data->events,
                                event->data.address,
-                               event,
                                method_event_code_to_string (event->code & ~MONO_PROFILER_EVENT_RESULT_MASK),
                                event_result_to_string (event->code & MONO_PROFILER_EVENT_RESULT_MASK),
                                event_kind_to_string (event->kind),
@@ -2476,14 +4239,14 @@ print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
                                event->kind,
                                event->code,
                                value,
-                               mono_class_get_namespace (mono_method_get_class ((MonoMethod*) event->data.address)),
-                               mono_class_get_name (mono_method_get_class ((MonoMethod*) event->data.address)),
-                               mono_method_get_name ((MonoMethod*) event->data.address));
+                               (event->data.address != NULL) ? mono_class_get_namespace (mono_method_get_class ((MonoMethod*) event->data.address)) : "<NULL>",
+                               (event->data.address != NULL) ? mono_class_get_name (mono_method_get_class ((MonoMethod*) event->data.address)) : "<NULL>",
+                               (event->data.address != NULL) ? mono_method_get_name ((MonoMethod*) event->data.address) : "<NULL>");
        } else {
-               printf ("[TID %ld] NUMBER[%ld] event [%p] %s:%s[%d-%d-%d] %ld\n",
-                               thread_id,
+               printf ("STORE EVENT [TID %ld][EVENT %ld]  NUMBER[%ld] %s:%s[%d-%d-%d] %ld\n",
+                               data->thread_id,
+                               event - data->events,
                                (guint64) event->data.number,
-                               event,
                                number_event_code_to_string (event->code),
                                event_kind_to_string (event->kind),
                                event->data_type,
@@ -2492,129 +4255,147 @@ print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
                                value);
        }
 }
-#define LOG_EVENT(tid,ev,val) print_event_data ((tid),(ev),(val))
+#define LOG_EVENT(data,ev,val) print_event_data ((data),(ev),(val))
 #else
-#define LOG_EVENT(tid,ev,val)
+#define LOG_EVENT(data,ev,val)
 #endif
 
 #define RESULT_TO_EVENT_CODE(r) (((r)==MONO_PROFILE_OK)?MONO_PROFILER_EVENT_RESULT_SUCCESS:MONO_PROFILER_EVENT_RESULT_FAILURE)
 
-#define STORE_EVENT_ITEM_COUNTER(p,i,dt,c,k) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
+#define STORE_EVENT_ITEM_COUNTER(event,p,i,dt,c,k) do {\
        guint64 counter;\
        guint64 delta;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
        MONO_PROFILER_GET_CURRENT_COUNTER (counter);\
-       event->data.address = (i);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+       (event)->data.address = (i);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        delta = counter - data->last_event_counter;\
        if (delta < MAX_EVENT_VALUE) {\
-               event->value = delta;\
+               (event)->value = delta;\
        } else {\
-               ProfilerEventData *extension = data->next_free_event;\
-               data->next_free_event ++;\
-               event->value = delta >> 32;\
-               extension->data.number = delta & 0xffffffff;\
+               ProfilerEventData *extension = data->next_unreserved_event;\
+               data->next_unreserved_event ++;\
+               (event)->value = MAX_EVENT_VALUE;\
+               *(guint64*)extension = delta;\
        }\
        data->last_event_counter = counter;\
-       LOG_EVENT (data->thread_id, event, delta);\
+       LOG_EVENT (data, (event), delta);\
 } while (0);
-#define STORE_EVENT_ITEM_VALUE(p,i,dt,c,k,v) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
-       event->data.address = (i);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+#define STORE_EVENT_ITEM_VALUE(event,p,i,dt,c,k,v) do {\
+       (event)->data.address = (i);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        if ((v) < MAX_EVENT_VALUE) {\
-               event->value = (v);\
+               (event)->value = (v);\
        } else {\
-               ProfilerEventData *extension = data->next_free_event;\
-               data->next_free_event ++;\
-               event->value = (v) >> 32;\
-               extension->data.number = (v) & 0xffffffff;\
+               ProfilerEventData *extension = data->next_unreserved_event;\
+               data->next_unreserved_event ++;\
+               (event)->value = MAX_EVENT_VALUE;\
+               *(guint64*)extension = (v);\
        }\
-       LOG_EVENT (data->thread_id, event, (v));\
+       LOG_EVENT (data, (event), (v));\
 }while (0);
-#define STORE_EVENT_NUMBER_COUNTER(p,n,dt,c,k) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
+#define STORE_EVENT_NUMBER_COUNTER(event,p,n,dt,c,k) do {\
        guint64 counter;\
        guint64 delta;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
        MONO_PROFILER_GET_CURRENT_COUNTER (counter);\
-       event->data.number = (n);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+       (event)->data.number = (n);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        delta = counter - data->last_event_counter;\
        if (delta < MAX_EVENT_VALUE) {\
-               event->value = delta;\
+               (event)->value = delta;\
        } else {\
-               ProfilerEventData *extension = data->next_free_event;\
-               data->next_free_event ++;\
-               event->value = delta >> 32;\
-               extension->data.number = delta & 0xffffffff;\
+               ProfilerEventData *extension = data->next_unreserved_event;\
+               data->next_unreserved_event ++;\
+               (event)->value = MAX_EVENT_VALUE;\
+               *(guint64*)extension = delta;\
        }\
        data->last_event_counter = counter;\
-       LOG_EVENT (data->thread_id, event, delta);\
+       LOG_EVENT (data, (event), delta);\
 }while (0);
-#define STORE_EVENT_NUMBER_VALUE(p,n,dt,c,k,v) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
-       event->data.number = (n);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+#define STORE_EVENT_NUMBER_VALUE(event,p,n,dt,c,k,v) do {\
+       (event)->data.number = (n);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        if ((v) < MAX_EVENT_VALUE) {\
-               event->value = (v);\
+               (event)->value = (v);\
        } else {\
-               ProfilerEventData *extension = data->next_free_event;\
-               data->next_free_event ++;\
-               event->value = (v) >> 32;\
-               extension->data.number = (v) & 0xffffffff;\
+               ProfilerEventData *extension = data->next_unreserved_event;\
+               data->next_unreserved_event ++;\
+               (event)->value = MAX_EVENT_VALUE;\
+               *(guint64*)extension = (v);\
+       }\
+       LOG_EVENT (data, (event), (v));\
+}while (0);
+#define INCREMENT_EVENT(event) do {\
+       if ((event)->value != MAX_EVENT_VALUE) {\
+               (event) ++;\
+       } else {\
+               (event) += 2;\
        }\
-       LOG_EVENT (data->thread_id, event, (v));\
 }while (0);
-
 
 static void
 class_start_load (MonoProfiler *profiler, MonoClass *klass) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD, MONO_PROFILER_EVENT_KIND_START);
+       COMMIT_RESERVED_EVENTS (data);
 }
 static void
 class_end_load (MonoProfiler *profiler, MonoClass *klass, int result) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
+       COMMIT_RESERVED_EVENTS (data);
 }
 static void
 class_start_unload (MonoProfiler *profiler, MonoClass *klass) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_START);
+       COMMIT_RESERVED_EVENTS (data);
 }
 static void
 class_end_unload (MonoProfiler *profiler, MonoClass *klass) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_END);
+       COMMIT_RESERVED_EVENTS (data);
 }
 
 static void
 method_start_jit (MonoProfiler *profiler, MonoMethod *method) {
-       if (profiler->action_flags.jit_time) {
-               STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT, MONO_PROFILER_EVENT_KIND_START);
-       }
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       thread_stack_push_jitted_safely (&(data->stack), method, TRUE);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT, MONO_PROFILER_EVENT_KIND_START);
+       COMMIT_RESERVED_EVENTS (data);
 }
 static void
 method_end_jit (MonoProfiler *profiler, MonoMethod *method, int result) {
-       if (profiler->action_flags.jit_time) {
-               STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
-       }
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
+       thread_stack_pop (&(data->stack));
+       COMMIT_RESERVED_EVENTS (data);
 }
 
 #if (HAS_OPROFILE)
@@ -2640,50 +4421,249 @@ method_jit_result (MonoProfiler *prof, MonoMethod *method, MonoJitInfo* jinfo, i
 
 static void
 method_enter (MonoProfiler *profiler, MonoMethod *method) {
-       STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       
+       CHECK_PROFILER_ENABLED ();
+       GET_PROFILER_THREAD_DATA (data);
+       if (profiler->action_flags.track_calls) {
+               ProfilerEventData *event;
+               GET_NEXT_FREE_EVENT (data, event);
+               STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_START);
+               COMMIT_RESERVED_EVENTS (data);
+       }
+       if (profiler->action_flags.track_stack) {
+               thread_stack_push_safely (&(data->stack), method);
+       }
 }
 static void
 method_leave (MonoProfiler *profiler, MonoMethod *method) {
-       STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       
+       CHECK_PROFILER_ENABLED ();
+       GET_PROFILER_THREAD_DATA (data);
+       if (profiler->action_flags.track_calls) {
+               ProfilerEventData *event;
+               GET_NEXT_FREE_EVENT (data, event);
+               STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_END);
+               COMMIT_RESERVED_EVENTS (data);
+       }
+       if (profiler->action_flags.track_stack) {
+               thread_stack_pop (&(data->stack));
+       }
 }
 
 static void
 method_free (MonoProfiler *profiler, MonoMethod *method) {
-       STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_FREED, 0);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_FREED, 0);
+       COMMIT_RESERVED_EVENTS (data);
 }
 
 static void
-thread_start (MonoProfiler *profiler, gsize tid) {
-       STORE_EVENT_NUMBER_COUNTER (profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_START);
+thread_start (MonoProfiler *profiler, intptr_t tid) {
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_NUMBER_COUNTER (event, profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_START);
+       COMMIT_RESERVED_EVENTS (data);
 }
 static void
-thread_end (MonoProfiler *profiler, gsize tid) {
-       STORE_EVENT_NUMBER_COUNTER (profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_END);
+thread_end (MonoProfiler *profiler, intptr_t tid) {
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_NUMBER_COUNTER (event, profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_END);
+       COMMIT_RESERVED_EVENTS (data);
+}
+
+static ProfilerEventData*
+save_stack_delta (MonoProfiler *profiler, ProfilerPerThreadData *data, ProfilerEventData *events, int unsaved_frames) {
+       int i;
+       
+       /* In this loop it is safe to simply increment "events" because MAX_EVENT_VALUE cannot be reached. */
+       STORE_EVENT_NUMBER_VALUE (events, profiler, data->stack.last_saved_top, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_STACK_SECTION, 0, unsaved_frames);
+       events++;
+       for (i = 0; i < unsaved_frames; i++) {
+               if (! thread_stack_index_from_top_is_jitted (&(data->stack), i)) {
+                       STORE_EVENT_ITEM_VALUE (events, profiler, thread_stack_index_from_top (&(data->stack), i), MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER, 0, 0);
+               } else {
+                       STORE_EVENT_ITEM_VALUE (events, profiler, thread_stack_index_from_top (&(data->stack), i), MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER, 0, 0);
+               }
+               events ++;
+       }
+       
+       data->stack.last_saved_top = data->stack.top;
+       
+       return events;
 }
 
 static void
 object_allocated (MonoProfiler *profiler, MonoObject *obj, MonoClass *klass) {
-       ProfilerPerThreadData *thread_data;
+       ProfilerPerThreadData *data;
+       ProfilerEventData *events;
+       int unsaved_frames;
+       int event_slot_count;
+       
+       GET_PROFILER_THREAD_DATA (data);
+       event_slot_count = 1;
+       if (profiler->action_flags.save_allocation_caller) {
+               event_slot_count ++;
+       }
+       if (profiler->action_flags.allocations_carry_id) {
+               event_slot_count ++;
+       }
+       if (profiler->action_flags.save_allocation_stack) {
+               unsaved_frames = thread_stack_count_unsaved_frames (&(data->stack));
+               event_slot_count += (unsaved_frames + 1);
+       } else {
+               unsaved_frames = 0;
+       }
+       RESERVE_EVENTS (data, events, event_slot_count);
+       
+       if (profiler->action_flags.save_allocation_stack) {
+               events = save_stack_delta (profiler, data, events, unsaved_frames);
+       }
+       
+       STORE_EVENT_ITEM_VALUE (events, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_ALLOCATION, 0, (guint64) mono_object_get_size (obj));
+       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot || profiler->action_flags.collection_summary) {
+               STORE_ALLOCATED_OBJECT (data, obj);
+       }
+       
+       if (profiler->action_flags.save_allocation_caller) {
+               MonoMethod *caller = thread_stack_top (&(data->stack));
+               gboolean caller_is_jitted = thread_stack_top_is_jitted (&(data->stack));
+               int index = 1;
+               /* In this loop it is safe to simply increment "events" because MAX_EVENT_VALUE cannot be reached. */
+               events ++;
+               
+               while ((caller != NULL) && (caller->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE)) {
+                       caller = thread_stack_index_from_top (&(data->stack), index);
+                       caller_is_jitted = thread_stack_index_from_top_is_jitted (&(data->stack), index);
+                       index ++;
+               }
+               if (! caller_is_jitted) {
+                       STORE_EVENT_ITEM_VALUE (events, profiler, caller, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER, 0, 0);
+               } else {
+                       STORE_EVENT_ITEM_VALUE (events, profiler, caller, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER, 0, 0);
+               }
+       }
+       if (profiler->action_flags.allocations_carry_id) {
+               events ++;
+               STORE_EVENT_ITEM_VALUE (events, profiler, obj, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID, 0, 0);
+       }
+       
+       COMMIT_RESERVED_EVENTS (data);
+}
+
+static void
+monitor_event (MonoProfiler *profiler, MonoObject *obj, MonoProfilerMonitorEvent event) {
+       ProfilerPerThreadData *data;
+       ProfilerEventData *events;
+       MonoClass *klass;
+       int unsaved_frames;
+       int event_slot_count;
        
-       STORE_EVENT_ITEM_VALUE (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_ALLOCATION, 0, (guint64) mono_object_get_size (obj));
-       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot) {
-               GET_PROFILER_THREAD_DATA (thread_data);
-               STORE_ALLOCATED_OBJECT (thread_data, obj);
+       CHECK_PROFILER_ENABLED ();
+       
+       GET_PROFILER_THREAD_DATA (data);
+       klass = mono_object_get_class (obj);
+       
+       unsaved_frames = thread_stack_count_unsaved_frames (&(data->stack));
+       if (unsaved_frames > 0) {
+               event_slot_count = unsaved_frames + 3;
+       } else {
+               event_slot_count = 2;
+       }
+       
+       RESERVE_EVENTS (data, events, event_slot_count);
+       if (unsaved_frames > 0) {
+               events = save_stack_delta (profiler, data, events, unsaved_frames);
        }
+       STORE_EVENT_ITEM_COUNTER (events, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_MONITOR, MONO_PROFILER_EVENT_KIND_START);
+       INCREMENT_EVENT (events);
+       STORE_EVENT_ITEM_VALUE (events, profiler, obj, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_OBJECT_MONITOR, 0, event);
+       COMMIT_RESERVED_EVENTS (data);
+}
+
+static void
+statistical_call_chain (MonoProfiler *profiler, int call_chain_depth, guchar **ips, void *context) {
+       MonoDomain *domain = mono_domain_get ();
+       ProfilerStatisticalData *data;
+       unsigned int index;
+       
+       CHECK_PROFILER_ENABLED ();
+       do {
+               data = profiler->statistical_data;
+               index = InterlockedIncrement ((int*) &data->next_free_index);
+               
+               if (index <= data->end_index) {
+                       unsigned int base_index = (index - 1) * (profiler->statistical_call_chain_depth + 1);
+                       unsigned int call_chain_index = 0;
+                       
+                       //printf ("[statistical_call_chain] (%d)\n", call_chain_depth);
+                       while (call_chain_index < call_chain_depth) {
+                               ProfilerStatisticalHit *hit = & (data->hits [base_index + call_chain_index]);
+                               //printf ("[statistical_call_chain] [%d] = %p\n", base_index + call_chain_index, ips [call_chain_index]);
+                               hit->address = (gpointer) ips [call_chain_index];
+                               hit->domain = domain;
+                               call_chain_index ++;
+                       }
+                       while (call_chain_index <= profiler->statistical_call_chain_depth) {
+                               ProfilerStatisticalHit *hit = & (data->hits [base_index + call_chain_index]);
+                               //printf ("[statistical_call_chain] [%d] = NULL\n", base_index + call_chain_index);
+                               hit->address = NULL;
+                               hit->domain = NULL;
+                               call_chain_index ++;
+                       }
+               } else {
+                       /* Check if we are the one that must swap the buffers */
+                       if (index == data->end_index + 1) {
+                               ProfilerStatisticalData *new_data;
+
+                               /* In the *impossible* case that the writer thread has not finished yet, */
+                               /* loop waiting for it and meanwhile lose all statistical events... */
+                               do {
+                                       /* First, wait that it consumed the ready buffer */
+                                       while (profiler->statistical_data_ready != NULL);
+                                       /* Then, wait that it produced the free buffer */
+                                       new_data = profiler->statistical_data_second_buffer;
+                               } while (new_data == NULL);
+
+                               profiler->statistical_data_ready = data;
+                               profiler->statistical_data = new_data;
+                               profiler->statistical_data_second_buffer = NULL;
+                               WRITER_EVENT_RAISE ();
+                               /* Otherwise exit from the handler and drop the event... */
+                       } else {
+                               break;
+                       }
+                       
+                       /* Loop again, hoping to acquire a free slot this time (otherwise the event will be dropped) */
+                       data = NULL;
+               }
+       } while (data == NULL);
 }
 
-
 static void
 statistical_hit (MonoProfiler *profiler, guchar *ip, void *context) {
+       MonoDomain *domain = mono_domain_get ();
        ProfilerStatisticalData *data;
-       int index;
+       unsigned int index;
        
+       CHECK_PROFILER_ENABLED ();
        do {
                data = profiler->statistical_data;
-               index = InterlockedIncrement (&data->next_free_index);
+               index = InterlockedIncrement ((int*) &data->next_free_index);
                
                if (index <= data->end_index) {
-                       data->addresses [index - 1] = (gpointer) ip;
+                       ProfilerStatisticalHit *hit = & (data->hits [index - 1]);
+                       hit->address = (gpointer) ip;
+                       hit->domain = domain;
                } else {
                        /* Check if we are the one that must swap the buffers */
                        if (index == data->end_index + 1) {
@@ -2697,7 +4677,7 @@ statistical_hit (MonoProfiler *profiler, guchar *ip, void *context) {
                                        /* Then, wait that it produced the free buffer */
                                        new_data = profiler->statistical_data_second_buffer;
                                } while (new_data == NULL);
-
+                               
                                profiler->statistical_data_ready = data;
                                profiler->statistical_data = new_data;
                                profiler->statistical_data_second_buffer = NULL;
@@ -2755,53 +4735,24 @@ gc_event_kind_from_profiler_event (MonoGCEvent event) {
        }
 }
 
-#define HEAP_SHOT_COMMAND_FILE_MAX_LENGTH 64
-static void
-profiler_heap_shot_process_command_file (void) {
-       //FIXME: Port to Windows as well
-       struct stat stat_buf;
-       int fd;
-       char buffer [HEAP_SHOT_COMMAND_FILE_MAX_LENGTH + 1];
-       
-       if (profiler->heap_shot_command_file_name == NULL)
-               return;
-       if (stat (profiler->heap_shot_command_file_name, &stat_buf) != 0)
-               return;
-       if (stat_buf.st_size > HEAP_SHOT_COMMAND_FILE_MAX_LENGTH)
-               return;
-       if ((stat_buf.st_mtim.tv_sec * 1000000) < profiler->heap_shot_command_file_access_time)
-               return;
-       
-       fd = open (profiler->heap_shot_command_file_name, O_RDONLY);
-       if (fd < 0) {
-               return;
-       } else {
-               if (read (fd, &(buffer [0]), stat_buf.st_size) != stat_buf.st_size) {
-                       return;
-               } else {
-                       buffer [stat_buf.st_size] = 0;
-                       profiler->dump_next_heap_snapshots = atoi (buffer);
-                       MONO_PROFILER_GET_CURRENT_TIME (profiler->heap_shot_command_file_access_time);
-               }
-               close (fd);
-       }
-}
-
 static gboolean
 dump_current_heap_snapshot (void) {
        gboolean result;
        
-       profiler_heap_shot_process_command_file ();
-       if (profiler->dump_next_heap_snapshots > 0) {
-               profiler->dump_next_heap_snapshots--;
-               result = TRUE;
-       } else if (profiler->dump_next_heap_snapshots < 0) {
+       if (profiler->heap_shot_was_requested) {
                result = TRUE;
        } else {
-               result = FALSE;
+               if (profiler->dump_next_heap_snapshots > 0) {
+                       profiler->dump_next_heap_snapshots--;
+                       result = TRUE;
+               } else if (profiler->dump_next_heap_snapshots < 0) {
+                       result = TRUE;
+               } else {
+                       result = FALSE;
+               }
        }
        
-       return (result || (profiler->heap_shot_was_signalled));
+       return result;
 }
 
 static void
@@ -2869,64 +4820,74 @@ report_object_references (gpointer *start, ClassIdMappingElement *layout, Profil
 
 static void
 profiler_heap_report_object_reachable (ProfilerHeapShotWriteJob *job, MonoObject *obj) {
-       if (profiler->action_flags.heap_shot && (job != NULL)) {
+       if (job != NULL) {
                MonoClass *klass = mono_object_get_class (obj);
-               int reference_counter = 0;
-               gpointer *reference_counter_location;
+               ClassIdMappingElement *class_id = class_id_mapping_element_get (klass);
+               if (class_id == NULL) {
+                       printf ("profiler_heap_report_object_reachable: class %p (%s.%s) has no id\n", klass, mono_class_get_namespace (klass), mono_class_get_name (klass));
+               }
+               g_assert (class_id != NULL);
                
-               WRITE_HEAP_SHOT_JOB_VALUE_WITH_CODE (job, obj, HEAP_CODE_OBJECT);
+               if (job->summary.capacity > 0) {
+                       guint32 id = class_id->id;
+                       g_assert (id < job->summary.capacity);
+                       
+                       job->summary.per_class_data [id].reachable.instances ++;
+                       job->summary.per_class_data [id].reachable.bytes += mono_object_get_size (obj);
+               }
+               if (profiler->action_flags.heap_shot && job->dump_heap_data) {
+                       int reference_counter = 0;
+                       gpointer *reference_counter_location;
+                       
+                       WRITE_HEAP_SHOT_JOB_VALUE_WITH_CODE (job, obj, HEAP_CODE_OBJECT);
 #if DEBUG_HEAP_PROFILER
-               printf ("profiler_heap_report_object_reachable: reported object %p at cursor %p\n", obj, (job->cursor - 1));
+                       printf ("profiler_heap_report_object_reachable: reported object %p at cursor %p\n", obj, (job->cursor - 1));
 #endif
-               WRITE_HEAP_SHOT_JOB_VALUE (job, NULL);
-               reference_counter_location = job->cursor - 1;
-               
-               if (mono_class_get_rank (klass)) {
-                       MonoArray *array = (MonoArray *) obj;
-                       MonoClass *element_class = mono_class_get_element_class (klass);
-                       ClassIdMappingElement *element_id = class_id_mapping_element_get (element_class);
+                       WRITE_HEAP_SHOT_JOB_VALUE (job, NULL);
+                       reference_counter_location = job->cursor - 1;
                        
-                       g_assert (element_id != NULL);
-                       if (element_id->data.layout.slots == CLASS_LAYOUT_NOT_INITIALIZED) {
-                               class_id_mapping_element_build_layout_bitmap (element_class, element_id);
-                       }
-                       if (! mono_class_is_valuetype (element_class)) {
-                               int length = mono_array_length (array);
-                               int i;
-                               for (i = 0; i < length; i++) {
-                                       MonoObject *array_element = mono_array_get (array, MonoObject*, i);
-                                       if ((array_element != NULL) && mono_object_is_alive (array_element)) {
-                                               reference_counter ++;
-                                               WRITE_HEAP_SHOT_JOB_VALUE (job, array_element);
+                       if (mono_class_get_rank (klass)) {
+                               MonoArray *array = (MonoArray *) obj;
+                               MonoClass *element_class = mono_class_get_element_class (klass);
+                               ClassIdMappingElement *element_id = class_id_mapping_element_get (element_class);
+                               
+                               g_assert (element_id != NULL);
+                               if (element_id->data.layout.slots == CLASS_LAYOUT_NOT_INITIALIZED) {
+                                       class_id_mapping_element_build_layout_bitmap (element_class, element_id);
+                               }
+                               if (! mono_class_is_valuetype (element_class)) {
+                                       int length = mono_array_length (array);
+                                       int i;
+                                       for (i = 0; i < length; i++) {
+                                               MonoObject *array_element = mono_array_get (array, MonoObject*, i);
+                                               if ((array_element != NULL) && mono_object_is_alive (array_element)) {
+                                                       reference_counter ++;
+                                                       WRITE_HEAP_SHOT_JOB_VALUE (job, array_element);
+                                               }
+                                       }
+                               } else if (element_id->data.layout.references > 0) {
+                                       int length = mono_array_length (array);
+                                       int array_element_size = mono_array_element_size (klass);
+                                       int i;
+                                       for (i = 0; i < length; i++) {
+                                               gpointer array_element_address = mono_array_addr_with_size (array, array_element_size, i);
+                                               reference_counter += report_object_references (array_element_address, element_id, job);
                                        }
                                }
-                       } else if (element_id->data.layout.references > 0) {
-                               int length = mono_array_length (array);
-                               int array_element_size = mono_array_element_size (klass);
-                               int i;
-                               for (i = 0; i < length; i++) {
-                                       gpointer array_element_address = mono_array_addr_with_size (array, array_element_size, i);
-                                       reference_counter += report_object_references (array_element_address, element_id, job);
+                       } else {
+                               if (class_id->data.layout.slots == CLASS_LAYOUT_NOT_INITIALIZED) {
+                                       class_id_mapping_element_build_layout_bitmap (klass, class_id);
+                               }
+                               if (class_id->data.layout.references > 0) {
+                                       reference_counter += report_object_references ((gpointer)(((char*)obj) + sizeof (MonoObject)), class_id, job);
                                }
                        }
-               } else {
-                       ClassIdMappingElement *class_id = class_id_mapping_element_get (klass);
-                       if (class_id == NULL) {
-                               printf ("profiler_heap_report_object_reachable: class %p (%s.%s) has no id\n", klass, mono_class_get_namespace (klass), mono_class_get_name (klass));
-                       }
-                       g_assert (class_id != NULL);
-                       if (class_id->data.layout.slots == CLASS_LAYOUT_NOT_INITIALIZED) {
-                               class_id_mapping_element_build_layout_bitmap (klass, class_id);
-                       }
-                       if (class_id->data.layout.references > 0) {
-                               reference_counter += report_object_references ((gpointer)(((char*)obj) + sizeof (MonoObject)), class_id, job);
-                       }
-               }
-               
-               *reference_counter_location = GINT_TO_POINTER (reference_counter);
+                       
+                       *reference_counter_location = GINT_TO_POINTER (reference_counter);
 #if DEBUG_HEAP_PROFILER
-               printf ("profiler_heap_report_object_reachable: updated reference_counter_location %p with value %d\n", reference_counter_location, reference_counter);
+                       printf ("profiler_heap_report_object_reachable: updated reference_counter_location %p with value %d\n", reference_counter_location, reference_counter);
 #endif
+               }
        }
 }
 static void
@@ -2935,15 +4896,31 @@ profiler_heap_report_object_unreachable (ProfilerHeapShotWriteJob *job, MonoObje
                MonoClass *klass = mono_object_get_class (obj);
                guint32 size = mono_object_get_size (obj);
                
+               if (job->summary.capacity > 0) {
+                       ClassIdMappingElement *class_id = class_id_mapping_element_get (klass);
+                       guint32 id;
+                       
+                       if (class_id == NULL) {
+                               printf ("profiler_heap_report_object_reachable: class %p (%s.%s) has no id\n", klass, mono_class_get_namespace (klass), mono_class_get_name (klass));
+                       }
+                       g_assert (class_id != NULL);
+                       id = class_id->id;
+                       g_assert (id < job->summary.capacity);
+                       
+                       job->summary.per_class_data [id].unreachable.instances ++;
+                       job->summary.per_class_data [id].unreachable.bytes += size;
+               }
+               if (profiler->action_flags.unreachable_objects && job->dump_heap_data) {
 #if DEBUG_HEAP_PROFILER
-               printf ("profiler_heap_report_object_unreachable: at job %p writing klass %p\n", job, klass);
+                       printf ("profiler_heap_report_object_unreachable: at job %p writing klass %p\n", job, klass);
 #endif
-               WRITE_HEAP_SHOT_JOB_VALUE_WITH_CODE (job, klass, HEAP_CODE_FREE_OBJECT_CLASS);
+                       WRITE_HEAP_SHOT_JOB_VALUE_WITH_CODE (job, klass, HEAP_CODE_FREE_OBJECT_CLASS);
        
 #if DEBUG_HEAP_PROFILER
-               printf ("profiler_heap_report_object_unreachable: at job %p writing size %p\n", job, GUINT_TO_POINTER (size));
+                       printf ("profiler_heap_report_object_unreachable: at job %p writing size %p\n", job, GUINT_TO_POINTER (size));
 #endif
-               WRITE_HEAP_SHOT_JOB_VALUE (job, GUINT_TO_POINTER (size));
+                       WRITE_HEAP_SHOT_JOB_VALUE (job, GUINT_TO_POINTER (size));
+               }
        }
 }
 
@@ -3014,79 +4991,96 @@ profiler_heap_scan (ProfilerHeapShotHeapBuffers *heap, ProfilerHeapShotWriteJob
                        
                        if (current_slot == current_buffer->end_slot) {
                                current_buffer = current_buffer->next;
-                               //g_assert (current_buffer != NULL);
-                               if (current_buffer == NULL) {
-                                       printf ("KO\n");
-                                       G_BREAKPOINT ();
-                                       g_assert_not_reached ();
-                               }
+                               g_assert (current_buffer != NULL);
                                current_slot = current_buffer->start_slot;
                        }
                }
        }
 }
 
+static inline gboolean
+heap_shot_write_job_should_be_created (gboolean dump_heap_data) {
+       return dump_heap_data || profiler->action_flags.unreachable_objects || profiler->action_flags.collection_summary;
+}
+
 static void
-handle_heap_profiling (MonoProfiler *profiler, MonoGCEvent ev) {
+process_gc_event (MonoProfiler *profiler, gboolean do_heap_profiling, MonoGCEvent ev) {
+       static gboolean dump_heap_data;
+       
        switch (ev) {
        case MONO_GC_EVENT_PRE_STOP_WORLD:
                // Get the lock, so we are sure nobody is flushing events during the collection,
                // and we can update all mappings (building the class descriptors).
+               // This is necessary also during lock profiling (even if do_heap_profiling is FALSE).
                LOCK_PROFILER ();
                break;
        case MONO_GC_EVENT_POST_STOP_WORLD:
-               // Update all mappings, so that we have built all the class descriptors.
-               flush_all_mappings ();
+               if (do_heap_profiling) {
+                       dump_heap_data = dump_current_heap_snapshot ();
+                       if (heap_shot_write_job_should_be_created (dump_heap_data)) {
+                               ProfilerPerThreadData *data;
+                               // Update all mappings, so that we have built all the class descriptors.
+                               flush_all_mappings ();
+                               // Also write all event buffers, so that allocations are recorded.
+                               for (data = profiler->per_thread_data; data != NULL; data = data->next) {
+                                       write_thread_data_block (data);
+                               }
+                       }
+               } else {
+                       dump_heap_data = FALSE;
+               }
                // Release lock...
                UNLOCK_PROFILER ();
                break;
        case MONO_GC_EVENT_MARK_END: {
-               ProfilerHeapShotWriteJob *job;
-               ProfilerPerThreadData *data;
-               
-               if (dump_current_heap_snapshot ()) {
-                       job = profiler_heap_shot_write_job_new (profiler->heap_shot_was_signalled);
-                       profiler->heap_shot_was_signalled = FALSE;
-                       MONO_PROFILER_GET_CURRENT_COUNTER (job->start_counter);
-                       MONO_PROFILER_GET_CURRENT_TIME (job->start_time);
-               } else {
-                       job = NULL;
-               }
-               
-               profiler_heap_scan (&(profiler->heap), job);
-               
-               for (data = profiler->per_thread_data; data != NULL; data = data->next) {
-                       ProfilerHeapShotObjectBuffer *buffer;
-                       for (buffer = data->heap_shot_object_buffers; buffer != NULL; buffer = buffer->next) {
-                               MonoObject **cursor;
-                               for (cursor = buffer->first_unprocessed_slot; cursor < buffer->next_free_slot; cursor ++) {
-                                       MonoObject *obj = *cursor;
+               if (do_heap_profiling) {
+                       ProfilerHeapShotWriteJob *job;
+                       ProfilerPerThreadData *data;
+                       
+                       if (heap_shot_write_job_should_be_created (dump_heap_data)) {
+                               job = profiler_heap_shot_write_job_new (profiler->heap_shot_was_requested, dump_heap_data, profiler->garbage_collection_counter);
+                               profiler->heap_shot_was_requested = FALSE;
+                               MONO_PROFILER_GET_CURRENT_COUNTER (job->start_counter);
+                               MONO_PROFILER_GET_CURRENT_TIME (job->start_time);
+                       } else {
+                               job = NULL;
+                       }
+                       
+                       profiler_heap_scan (&(profiler->heap), job);
+                       
+                       for (data = profiler->per_thread_data; data != NULL; data = data->next) {
+                               ProfilerHeapShotObjectBuffer *buffer;
+                               for (buffer = data->heap_shot_object_buffers; buffer != NULL; buffer = buffer->next) {
+                                       MonoObject **cursor;
+                                       for (cursor = buffer->first_unprocessed_slot; cursor < buffer->next_free_slot; cursor ++) {
+                                               MonoObject *obj = *cursor;
 #if DEBUG_HEAP_PROFILER
-                                       printf ("gc_event: in object buffer %p(%p-%p) cursor at %p has object %p ", buffer, &(buffer->buffer [0]), buffer->end, cursor, obj);
+                                               printf ("gc_event: in object buffer %p(%p-%p) cursor at %p has object %p ", buffer, &(buffer->buffer [0]), buffer->end, cursor, obj);
 #endif
-                                       if (mono_object_is_alive (obj)) {
+                                               if (mono_object_is_alive (obj)) {
 #if DEBUG_HEAP_PROFILER
-                                               printf ("(object is alive, adding to heap)\n");
+                                                       printf ("(object is alive, adding to heap)\n");
 #endif
-                                               profiler_heap_add_object (&(profiler->heap), job, obj);
-                                       } else {
+                                                       profiler_heap_add_object (&(profiler->heap), job, obj);
+                                               } else {
 #if DEBUG_HEAP_PROFILER
-                                               printf ("(object is unreachable, reporting in job)\n");
+                                                       printf ("(object is unreachable, reporting in job)\n");
 #endif
-                                               profiler_heap_report_object_unreachable (job, obj);
+                                                       profiler_heap_report_object_unreachable (job, obj);
+                                               }
                                        }
+                                       buffer->first_unprocessed_slot = cursor;
                                }
-                               buffer->first_unprocessed_slot = cursor;
                        }
-               }
-               
-               if (job != NULL) {
-                       MONO_PROFILER_GET_CURRENT_COUNTER (job->end_counter);
-                       MONO_PROFILER_GET_CURRENT_TIME (job->end_time);
                        
-                       profiler_add_heap_shot_write_job (job);
-                       profiler_free_heap_shot_write_jobs ();
-                       WRITER_EVENT_RAISE ();
+                       if (job != NULL) {
+                               MONO_PROFILER_GET_CURRENT_COUNTER (job->end_counter);
+                               MONO_PROFILER_GET_CURRENT_TIME (job->end_time);
+                               
+                               profiler_add_heap_shot_write_job (job);
+                               profiler_free_heap_shot_write_jobs ();
+                               WRITER_EVENT_RAISE ();
+                       }
                }
                break;
        }
@@ -3097,25 +5091,234 @@ handle_heap_profiling (MonoProfiler *profiler, MonoGCEvent ev) {
 
 static void
 gc_event (MonoProfiler *profiler, MonoGCEvent ev, int generation) {
-       STORE_EVENT_NUMBER_COUNTER (profiler, generation, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, gc_event_code_from_profiler_event (ev), gc_event_kind_from_profiler_event (ev));
-       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot) {
-               handle_heap_profiling (profiler, ev);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       gboolean do_heap_profiling = profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot || profiler->action_flags.collection_summary;
+       guint32 event_value;
+       
+       if (ev == MONO_GC_EVENT_START) {
+               profiler->garbage_collection_counter ++;
+       }
+       
+       event_value = (profiler->garbage_collection_counter << 8) | generation;
+       
+       if (ev == MONO_GC_EVENT_POST_STOP_WORLD) {
+               process_gc_event (profiler, do_heap_profiling, ev);
+       }
+       
+       /* Check if the gc event should be recorded. */
+       if (profiler->action_flags.report_gc_events || do_heap_profiling) {
+               GET_PROFILER_THREAD_DATA (data);
+               GET_NEXT_FREE_EVENT (data, event);
+               STORE_EVENT_NUMBER_COUNTER (event, profiler, event_value, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, gc_event_code_from_profiler_event (ev), gc_event_kind_from_profiler_event (ev));
+               COMMIT_RESERVED_EVENTS (data);
+       }
+       
+       if (ev != MONO_GC_EVENT_POST_STOP_WORLD) {
+               process_gc_event (profiler, do_heap_profiling, ev);
        }
 }
 
 static void
 gc_resize (MonoProfiler *profiler, gint64 new_size) {
-       STORE_EVENT_NUMBER_COUNTER (profiler, new_size, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_GC_RESIZE, 0);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       profiler->garbage_collection_counter ++;
+       STORE_EVENT_NUMBER_VALUE (event, profiler, new_size, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_GC_RESIZE, 0, profiler->garbage_collection_counter);
+       COMMIT_RESERVED_EVENTS (data);
+}
+
+static void
+runtime_initialized (MonoProfiler *profiler) {
+       LOG_WRITER_THREAD ("runtime_initialized: initializing internal calls.\n");
+       mono_add_internal_call ("Mono.Profiler.RuntimeControls::EnableProfiler", enable_profiler);
+       mono_add_internal_call ("Mono.Profiler.RuntimeControls::DisableProfiler", disable_profiler);
+       mono_add_internal_call ("Mono.Profiler.RuntimeControls::TakeHeapSnapshot", request_heap_snapshot);
+       LOG_WRITER_THREAD ("runtime_initialized: initialized internal calls.\n");
+}
+
+
+#define MAX_COMMAND_LENGTH (1024)
+static int server_socket;
+static int command_socket;
+
+static void
+write_user_response (const char *response) {
+       LOG_USER_THREAD ("write_user_response: writing response:");
+       LOG_USER_THREAD (response);
+       send (command_socket, response, strlen (response), 0);
+}
+
+static void
+execute_user_command (char *command) {
+       char *line_feed;
+       
+       LOG_USER_THREAD ("execute_user_command: executing command:");
+       LOG_USER_THREAD (command);
+       
+       /* Ignore leading and trailing '\r' */
+       line_feed = strchr (command, '\r');
+       if (line_feed == command) {
+               command ++;
+               line_feed = strchr (command, '\r');
+       }
+       if ((line_feed != NULL) && (* (line_feed + 1) == 0)) {
+               *line_feed = 0;
+       }
+       
+       if (strcmp (command, "enable") == 0) {
+               LOG_USER_THREAD ("execute_user_command: enabling profiler");
+               enable_profiler ();
+               write_user_response ("DONE\n");
+       } else if (strcmp (command, "disable") == 0) {
+               LOG_USER_THREAD ("execute_user_command: disabling profiler");
+               disable_profiler ();
+               write_user_response ("DONE\n");
+       } else if (strcmp (command, "heap-snapshot") == 0) {
+               LOG_USER_THREAD ("execute_user_command: taking heap snapshot");
+               profiler->heap_shot_was_requested = TRUE;
+               WRITER_EVENT_RAISE ();
+               write_user_response ("DONE\n");
+       } else if (strstr (command, "heap-snapshot-counter") == 0) {
+               char *equals; 
+               LOG_USER_THREAD ("execute_user_command: changing heap counter");
+               equals = strstr (command, "=");
+               if (equals != NULL) {
+                       equals ++;
+                       if (strcmp (equals, "all") == 0) {
+                               LOG_USER_THREAD ("execute_user_command: heap counter is \"all\"");
+                               profiler->garbage_collection_counter = -1;
+                       } else if (strcmp (equals, "none") == 0) {
+                               LOG_USER_THREAD ("execute_user_command: heap counter is \"none\"");
+                               profiler->garbage_collection_counter = 0;
+                       } else {
+                               profiler->garbage_collection_counter = atoi (equals);
+                       }
+                       write_user_response ("DONE\n");
+               } else {
+                       write_user_response ("ERROR\n");
+               }
+               profiler->heap_shot_was_requested = TRUE;
+       } else {
+               LOG_USER_THREAD ("execute_user_command: command not recognized");
+               write_user_response ("ERROR\n");
+       }
+}
+
+static gboolean
+process_user_commands (void) {
+       char *command_buffer = malloc (MAX_COMMAND_LENGTH);
+       int command_buffer_current_index = 0;
+       gboolean loop = TRUE;
+       gboolean result = TRUE;
+       
+       while (loop) {
+               int unprocessed_characters;
+               
+               LOG_USER_THREAD ("process_user_commands: reading from socket...");
+               unprocessed_characters = recv (command_socket, command_buffer + command_buffer_current_index, MAX_COMMAND_LENGTH - command_buffer_current_index, 0);
+               
+               if (unprocessed_characters > 0) {
+                       char *command_end = NULL;
+                       
+                       LOG_USER_THREAD ("process_user_commands: received characters.");
+                       
+                       do {
+                               if (command_end != NULL) {
+                                       *command_end = 0;
+                                       execute_user_command (command_buffer);
+                                       unprocessed_characters -= (((command_end - command_buffer) - command_buffer_current_index) + 1);
+                                       
+                                       if (unprocessed_characters > 0) {
+                                               memmove (command_buffer, command_end + 1, unprocessed_characters);
+                                       }
+                                       command_buffer_current_index = 0;
+                               }
+                               
+                               command_end = memchr (command_buffer, '\n', command_buffer_current_index + unprocessed_characters);
+                       } while (command_end != NULL);
+                       
+                       command_buffer_current_index += unprocessed_characters;
+                       
+               } else if (unprocessed_characters == 0) {
+                       LOG_USER_THREAD ("process_user_commands: received no character.");
+                       result = TRUE;
+                       loop = FALSE;
+               } else {
+                       LOG_USER_THREAD ("process_user_commands: received error.");
+                       result = FALSE;
+                       loop = FALSE;
+               }
+       }
+       
+       free (command_buffer);
+       return result;
+}
+
+static guint32
+user_thread (gpointer nothing) {
+       struct sockaddr_in server_address;
+       
+       server_socket = -1;
+       command_socket = -1;
+       
+       LOG_USER_THREAD ("user_thread: starting up...");
+       
+       server_socket = socket (AF_INET, SOCK_STREAM, 0);
+       if (server_socket < 0) {
+               LOG_USER_THREAD ("user_thread: error creating socket.");
+               return 0;
+       }
+       memset (& server_address, 0, sizeof (server_address));
+       
+       server_address.sin_family = AF_INET;
+       server_address.sin_addr.s_addr = INADDR_ANY;
+       if ((profiler->command_port < 1023) || (profiler->command_port > 65535)) {
+               LOG_USER_THREAD ("user_thread: invalid port number.");
+               return 0;
+       }
+       server_address.sin_port = htons (profiler->command_port);
+       
+       if (bind (server_socket, (struct sockaddr *) &server_address, sizeof(server_address)) < 0) {
+               LOG_USER_THREAD ("user_thread: error binding socket.");
+               close (server_socket);
+               return 0;
+       }
+       
+       LOG_USER_THREAD ("user_thread: listening...\n");
+       listen (server_socket, 1);
+       command_socket = accept (server_socket, NULL, NULL);
+       if (command_socket < 0) {
+               LOG_USER_THREAD ("user_thread: error accepting socket.");
+               close (server_socket);
+               return 0;
+       }
+       
+       LOG_USER_THREAD ("user_thread: processing user commands...");
+       process_user_commands ();
+       
+       LOG_USER_THREAD ("user_thread: exiting cleanly.");
+       close (server_socket);
+       close (command_socket);
+       return 0;
 }
 
+
 /* called at the end of the program */
 static void
 profiler_shutdown (MonoProfiler *prof)
 {
        ProfilerPerThreadData* current_thread_data;
+       ProfilerPerThreadData* next_thread_data;
        
        LOG_WRITER_THREAD ("profiler_shutdown: zeroing relevant flags");
        mono_profiler_set_events (0);
+       /* During shutdown searching for MonoJitInfo is not possible... */
+       if (profiler->statistical_call_chain_strategy == MONO_PROFILER_CALL_CHAIN_MANAGED) {
+               mono_profiler_install_statistical_call_chain (NULL, 0, MONO_PROFILER_CALL_CHAIN_NONE);
+       }
        //profiler->flags = 0;
        //profiler->action_flags.unreachable_objects = FALSE;
        //profiler->action_flags.heap_shot = FALSE;
@@ -3129,16 +5332,22 @@ profiler_shutdown (MonoProfiler *prof)
        WRITER_EVENT_DESTROY ();
        
        LOCK_PROFILER ();
-       
+       flush_everything ();
        MONO_PROFILER_GET_CURRENT_TIME (profiler->end_time);
        MONO_PROFILER_GET_CURRENT_COUNTER (profiler->end_counter);
-       
-       flush_everything ();
        write_end_block ();
        FLUSH_FILE ();
        CLOSE_FILE();
+       mono_profiler_install_code_chunk_new (NULL);
+       mono_profiler_install_code_chunk_destroy (NULL);
+       mono_profiler_install_code_buffer_new (NULL);
+       profiler_code_chunks_cleanup (& (profiler->code_chunks));
        UNLOCK_PROFILER ();
+       
        g_free (profiler->file_name);
+       if (profiler->file_name_suffix != NULL) {
+               g_free (profiler->file_name_suffix);
+       }
        
        method_id_mapping_destroy (profiler->methods);
        class_id_mapping_destroy (profiler->classes);
@@ -3148,7 +5357,8 @@ profiler_shutdown (MonoProfiler *prof)
        
        FREE_PROFILER_THREAD_DATA ();
        
-       for (current_thread_data = profiler->per_thread_data; current_thread_data != NULL; current_thread_data = current_thread_data->next) {
+       for (current_thread_data = profiler->per_thread_data; current_thread_data != NULL; current_thread_data = next_thread_data) {
+               next_thread_data = current_thread_data->next;
                profiler_per_thread_data_destroy (current_thread_data);
        }
        if (profiler->statistical_data != NULL) {
@@ -3163,12 +5373,8 @@ profiler_shutdown (MonoProfiler *prof)
        if (profiler->executable_regions != NULL) {
                profiler_executable_memory_regions_destroy (profiler->executable_regions);
        }
-       unmanaged_functions_dispose (&(profiler->unmanaged_functions));
        
        profiler_heap_buffers_free (&(profiler->heap));
-       if (profiler->heap_shot_command_file_name != NULL) {
-               g_free (profiler->heap_shot_command_file_name);
-       }
        
        profiler_free_write_buffers ();
        profiler_destroy_heap_shot_write_jobs ();
@@ -3185,24 +5391,43 @@ profiler_shutdown (MonoProfiler *prof)
        profiler = NULL;
 }
 
+#define FAIL_ARGUMENT_CHECK(message) do {\
+       failure_message = (message);\
+       goto failure_handling;\
+} while (0)
+#define FAIL_PARSING_VALUED_ARGUMENT FAIL_ARGUMENT_CHECK("cannot parse valued argument %s")
+#define FAIL_PARSING_FLAG_ARGUMENT FAIL_ARGUMENT_CHECK("cannot parse flag argument %s")
+#define CHECK_CONDITION(condition,message) do {\
+       gboolean result = (condition);\
+       if (result) {\
+               FAIL_ARGUMENT_CHECK (message);\
+       }\
+} while (0)
+#define FAIL_IF_HAS_MINUS CHECK_CONDITION(has_minus,"minus ('-') modifier not allowed for argument %s")
+#define TRUE_IF_NOT_MINUS ((!has_minus)?TRUE:FALSE)
+
 #define DEFAULT_ARGUMENTS "s"
 static void
 setup_user_options (const char *arguments) {
        gchar **arguments_array, **current_argument;
+       detect_fast_timer ();
        
        profiler->file_name = NULL;
+       profiler->file_name_suffix = NULL;
        profiler->per_thread_buffer_size = 10000;
        profiler->statistical_buffer_size = 10000;
+       profiler->statistical_call_chain_depth = 0;
+       profiler->statistical_call_chain_strategy = MONO_PROFILER_CALL_CHAIN_NATIVE;
        profiler->write_buffer_size = 1024;
-       profiler->heap_shot_command_file_name = NULL;
        profiler->dump_next_heap_snapshots = 0;
-       profiler->heap_shot_command_file_access_time = 0;
-       profiler->heap_shot_was_signalled = FALSE;
+       profiler->heap_shot_was_requested = FALSE;
        profiler->flags = MONO_PROFILE_APPDOMAIN_EVENTS|
                        MONO_PROFILE_ASSEMBLY_EVENTS|
                        MONO_PROFILE_MODULE_EVENTS|
                        MONO_PROFILE_CLASS_EVENTS|
-                       MONO_PROFILE_METHOD_EVENTS;
+                       MONO_PROFILE_METHOD_EVENTS|
+                       MONO_PROFILE_JIT_COMPILATION;
+       profiler->profiler_enabled = TRUE;
        
        if (arguments == NULL) {
                arguments = DEFAULT_ARGUMENTS;
@@ -3218,98 +5443,274 @@ setup_user_options (const char *arguments) {
        for (current_argument = arguments_array; ((current_argument != NULL) && (current_argument [0] != 0)); current_argument ++) {
                char *argument = *current_argument;
                char *equals = strstr (argument, "=");
+               const char *failure_message = NULL;
+               gboolean has_plus;
+               gboolean has_minus;
+               
+               if (*argument == '+') {
+                       has_plus = TRUE;
+                       has_minus = FALSE;
+                       argument ++;
+               } else if (*argument == '-') {
+                       has_plus = FALSE;
+                       has_minus = TRUE;
+                       argument ++;
+               } else {
+                       has_plus = FALSE;
+                       has_minus = FALSE;
+               }
                
                if (equals != NULL) {
                        int equals_position = equals - argument;
                        
                        if (! (strncmp (argument, "per-thread-buffer-size", equals_position) && strncmp (argument, "tbs", equals_position))) {
                                int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
                                if (value > 0) {
                                        profiler->per_thread_buffer_size = value;
                                }
+                       } else if (! (strncmp (argument, "statistical", equals_position) && strncmp (argument, "stat", equals_position) && strncmp (argument, "s", equals_position))) {
+                               int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
+                               if (value > 0) {
+                                       if (value > MONO_PROFILER_MAX_STAT_CALL_CHAIN_DEPTH) {
+                                               value = MONO_PROFILER_MAX_STAT_CALL_CHAIN_DEPTH;
+                                       }
+                                       profiler->statistical_call_chain_depth = value;
+                                       profiler->flags |= MONO_PROFILE_STATISTICAL;
+                               }
+                       } else if (! (strncmp (argument, "call-chain-strategy", equals_position) && strncmp (argument, "ccs", equals_position))) {
+                               char *parameter = equals + 1;
+                               FAIL_IF_HAS_MINUS;
+                               if (! strcmp (parameter, "native")) {
+                                       profiler->statistical_call_chain_strategy = MONO_PROFILER_CALL_CHAIN_NATIVE;
+                               } else if (! strcmp (parameter, "glibc")) {
+                                       profiler->statistical_call_chain_strategy = MONO_PROFILER_CALL_CHAIN_GLIBC;
+                               } else if (! strcmp (parameter, "managed")) {
+                                       profiler->statistical_call_chain_strategy = MONO_PROFILER_CALL_CHAIN_MANAGED;
+                               } else {
+                                       failure_message = "invalid call chain strategy in argument %s";
+                                       goto failure_handling;
+                               }
                        } else if (! (strncmp (argument, "statistical-thread-buffer-size", equals_position) && strncmp (argument, "sbs", equals_position))) {
                                int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
                                if (value > 0) {
                                        profiler->statistical_buffer_size = value;
                                }
                        } else if (! (strncmp (argument, "write-buffer-size", equals_position) && strncmp (argument, "wbs", equals_position))) {
                                int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
                                if (value > 0) {
                                        profiler->write_buffer_size = value;
                                }
                        } else if (! (strncmp (argument, "output", equals_position) && strncmp (argument, "out", equals_position) && strncmp (argument, "o", equals_position) && strncmp (argument, "O", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        profiler->file_name = g_strdup (equals + 1);
                                }
-                       } else if (! (strncmp (argument, "gc-commands", equals_position) && strncmp (argument, "gc-c", equals_position) && strncmp (argument, "gcc", equals_position))) {
+                       } else if (! (strncmp (argument, "output-suffix", equals_position) && strncmp (argument, "suffix", equals_position) && strncmp (argument, "os", equals_position) && strncmp (argument, "OS", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
-                                       profiler->heap_shot_command_file_name = g_strdup (equals + 1);
+                                       profiler->file_name_suffix = g_strdup (equals + 1);
+                               }
+                       } else if (! (strncmp (argument, "heap-shot", equals_position) && strncmp (argument, "heap", equals_position) && strncmp (argument, "h", equals_position))) {
+                               char *parameter = equals + 1;
+                               if (! strcmp (parameter, "all")) {
+                                       profiler->dump_next_heap_snapshots = -1;
+                               } else {
+                                       profiler->dump_next_heap_snapshots = atoi (parameter);
+                               }
+                               FAIL_IF_HAS_MINUS;
+                               if (! has_plus) {
+                                       profiler->action_flags.save_allocation_caller = TRUE;
+                                       profiler->action_flags.save_allocation_stack = TRUE;
+                                       profiler->action_flags.allocations_carry_id = TRUE_IF_NOT_MINUS;
                                }
+                               profiler->action_flags.heap_shot = TRUE_IF_NOT_MINUS;
                        } else if (! (strncmp (argument, "gc-dumps", equals_position) && strncmp (argument, "gc-d", equals_position) && strncmp (argument, "gcd", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        profiler->dump_next_heap_snapshots = atoi (equals + 1);
                                }
+                       } else if (! (strncmp (argument, "command-port", equals_position) && strncmp (argument, "cp", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
+                               if (strlen (equals + 1) > 0) {
+                                       profiler->command_port = atoi (equals + 1);
+                               }
                        } else {
-                               g_warning ("Cannot parse valued argument %s\n", argument);
+                               FAIL_PARSING_VALUED_ARGUMENT;
                        }
                } else {
                        if (! (strcmp (argument, "jit") && strcmp (argument, "j"))) {
-                               profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
-                               profiler->action_flags.jit_time = TRUE;
+                               profiler->action_flags.jit_time = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "allocations") && strcmp (argument, "alloc") && strcmp (argument, "a"))) {
-                               profiler->flags |= MONO_PROFILE_ALLOCATIONS|MONO_PROFILE_GC;
+                               FAIL_IF_HAS_MINUS;
+                               if (! has_plus) {
+                                       profiler->action_flags.save_allocation_caller = TRUE;
+                                       profiler->action_flags.save_allocation_stack = TRUE;
+                               }
+                               if (! has_minus) {
+                                       profiler->flags |= MONO_PROFILE_ALLOCATIONS;
+                               } else {
+                                       profiler->flags &= ~MONO_PROFILE_ALLOCATIONS;
+                               }
+                       } else if (! (strcmp (argument, "monitor") && strcmp (argument, "locks") && strcmp (argument, "lock"))) {
+                               FAIL_IF_HAS_MINUS;
+                               profiler->action_flags.track_stack = TRUE;
+                               profiler->flags |= MONO_PROFILE_MONITOR_EVENTS;
+                               profiler->flags |= MONO_PROFILE_GC;
                        } else if (! (strcmp (argument, "gc") && strcmp (argument, "g"))) {
+                               FAIL_IF_HAS_MINUS;
+                               profiler->action_flags.report_gc_events = TRUE;
                                profiler->flags |= MONO_PROFILE_GC;
+                       } else if (! (strcmp (argument, "allocations-summary") && strcmp (argument, "as"))) {
+                               profiler->action_flags.collection_summary = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "heap-shot") && strcmp (argument, "heap") && strcmp (argument, "h"))) {
-                               profiler->flags |= MONO_PROFILE_ALLOCATIONS|MONO_PROFILE_GC;
-                               profiler->action_flags.unreachable_objects = TRUE;
-                               profiler->action_flags.heap_shot = TRUE;
+                               FAIL_IF_HAS_MINUS;
+                               if (! has_plus) {
+                                       profiler->action_flags.save_allocation_caller = TRUE;
+                                       profiler->action_flags.save_allocation_stack = TRUE;
+                                       profiler->action_flags.allocations_carry_id = TRUE_IF_NOT_MINUS;
+                               }
+                               profiler->action_flags.heap_shot = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "unreachable") && strcmp (argument, "free") && strcmp (argument, "f"))) {
-                               profiler->flags |= MONO_PROFILE_ALLOCATIONS|MONO_PROFILE_GC;
-                               profiler->action_flags.unreachable_objects = TRUE;
+                               profiler->action_flags.unreachable_objects = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "threads") && strcmp (argument, "t"))) {
-                               profiler->flags |= MONO_PROFILE_THREADS;
+                               if (! has_minus) {
+                                       profiler->flags |= MONO_PROFILE_THREADS;
+                               } else {
+                                       profiler->flags &= ~MONO_PROFILE_THREADS;
+                               }
                        } else if (! (strcmp (argument, "enter-leave") && strcmp (argument, "calls") && strcmp (argument, "c"))) {
-                               profiler->flags |= MONO_PROFILE_ENTER_LEAVE;
+                               profiler->action_flags.track_calls = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "statistical") && strcmp (argument, "stat") && strcmp (argument, "s"))) {
-                               profiler->flags |= MONO_PROFILE_STATISTICAL|MONO_PROFILE_JIT_COMPILATION;
-                               profiler->action_flags.jit_time = TRUE;
+                               if (! has_minus) {
+                                       profiler->flags |= MONO_PROFILE_STATISTICAL;
+                               } else {
+                                       profiler->flags &= ~MONO_PROFILE_STATISTICAL;
+                               }
+                       } else if (! (strcmp (argument, "save-allocation-caller") && strcmp (argument, "sac"))) {
+                               profiler->action_flags.save_allocation_caller = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "save-allocation-stack") && strcmp (argument, "sas"))) {
+                               profiler->action_flags.save_allocation_stack = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "allocations-carry-id") && strcmp (argument, "aci"))) {
+                               profiler->action_flags.allocations_carry_id = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "start-enabled") && strcmp (argument, "se"))) {
+                               profiler->profiler_enabled = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "start-disabled") && strcmp (argument, "sd"))) {
+                               profiler->profiler_enabled = ! TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "force-accurate-timer") && strcmp (argument, "fac"))) {
+                               use_fast_timer = TRUE_IF_NOT_MINUS;
 #if (HAS_OPROFILE)
                        } else if (! (strcmp (argument, "oprofile") && strcmp (argument, "oprof"))) {
                                profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
                                profiler->action_flags.oprofile = TRUE;
                                if (op_open_agent ()) {
-                                       g_warning ("Problem calling op_open_agent\n");
+                                       FAIL_ARGUMENT_CHECK ("problem calling op_open_agent");
                                }
 #endif
                        } else if (strcmp (argument, "logging")) {
-                               g_warning ("Cannot parse flag argument %s\n", argument);
+                               FAIL_PARSING_FLAG_ARGUMENT;
                        }
                }
+               
+failure_handling:
+               if (failure_message != NULL) {
+                       g_warning (failure_message, argument);
+                       failure_message = NULL;
+               }
        }
        
        g_free (arguments_array);
        
-       if (profiler->file_name == NULL) {
-               profiler->file_name = g_strdup ("profiler-log.prof");
+       /* Ensure that the profiler flags needed to support required action flags are active */
+       if (profiler->action_flags.jit_time) {
+               profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
+       }
+       if (profiler->action_flags.save_allocation_caller || profiler->action_flags.save_allocation_stack || profiler->action_flags.allocations_carry_id) {
+               profiler->flags |= MONO_PROFILE_ALLOCATIONS;
+       }
+       if (profiler->action_flags.collection_summary || profiler->action_flags.heap_shot || profiler->action_flags.unreachable_objects) {
+               profiler->flags |= MONO_PROFILE_ALLOCATIONS;
+               profiler->action_flags.report_gc_events = TRUE;
+       }
+       if (profiler->action_flags.track_calls) {
+               profiler->flags |= MONO_PROFILE_ENTER_LEAVE;
+               profiler->action_flags.jit_time = TRUE;
+       }
+       if (profiler->action_flags.save_allocation_caller || profiler->action_flags.save_allocation_stack) {
+               profiler->action_flags.track_stack = TRUE;
+               profiler->flags |= MONO_PROFILE_ENTER_LEAVE;
+       }
+       if (profiler->action_flags.track_stack) {
+               profiler->flags |= MONO_PROFILE_ENTER_LEAVE;
+       }
+       
+       /* Tracking call stacks is useless if we already emit all enter-exit events... */
+       if (profiler->action_flags.track_calls) {
+               profiler->action_flags.track_stack = FALSE;
+               profiler->action_flags.save_allocation_caller = FALSE;
+               profiler->action_flags.save_allocation_stack = FALSE;
+       }
+       
+       /* Without JIT events the stat profiler will not find method IDs... */
+       if (profiler->flags | MONO_PROFILE_STATISTICAL) {
+               profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
+       }
+       /* Profiling allocations without knowing which gc we are doing is not nice... */
+       if (profiler->flags | MONO_PROFILE_ALLOCATIONS) {
+               profiler->flags |= MONO_PROFILE_GC;
+               profiler->action_flags.report_gc_events = TRUE;
        }
-}
 
-static gboolean
-thread_detach_callback (MonoThread *thread) {
-       LOG_WRITER_THREAD ("thread_detach_callback: asking writer thread to detach");
-       profiler->detach_writer_thread = TRUE;
-       WRITER_EVENT_RAISE ();
-       LOG_WRITER_THREAD ("thread_detach_callback: done");
-       return FALSE;
+       
+       if (profiler->file_name == NULL) {
+               char *program_name = g_get_prgname ();
+               
+               if (program_name != NULL) {
+                       char *name_buffer = g_strdup (program_name);
+                       char *name_start = name_buffer;
+                       char *cursor;
+                       
+                       /* Jump over the last '/' */
+                       cursor = strrchr (name_buffer, '/');
+                       if (cursor == NULL) {
+                               cursor = name_buffer;
+                       } else {
+                               cursor ++;
+                       }
+                       name_start = cursor;
+                       
+                       /* Then jump over the last '\\' */
+                       cursor = strrchr (name_start, '\\');
+                       if (cursor == NULL) {
+                               cursor = name_start;
+                       } else {
+                               cursor ++;
+                       }
+                       name_start = cursor;
+                       
+                       /* Finally, find the last '.' */
+                       cursor = strrchr (name_start, '.');
+                       if (cursor != NULL) {
+                               *cursor = 0;
+                       }
+                       
+                       if (profiler->file_name_suffix == NULL) {
+                               profiler->file_name = g_strdup_printf ("%s.mprof", name_start);
+                       } else {
+                               profiler->file_name = g_strdup_printf ("%s-%s.mprof", name_start, profiler->file_name_suffix);
+                       }
+                       g_free (name_buffer);
+               } else {
+                       profiler->file_name = g_strdup_printf ("%s.mprof", "profiler-log");
+               }
+       }
 }
 
 static guint32
 data_writer_thread (gpointer nothing) {
-       static gboolean thread_attached = FALSE;
-       static gboolean thread_detached = FALSE;
-       static MonoThread *this_thread = NULL;
-       
        for (;;) {
                ProfilerStatisticalData *statistical_data;
                gboolean done;
@@ -3318,22 +5719,24 @@ data_writer_thread (gpointer nothing) {
                WRITER_EVENT_WAIT ();
                LOG_WRITER_THREAD ("data_writer_thread: just woke up");
                
-               if (! thread_attached) {
-                       if (! profiler->terminate_writer_thread) {
-                               MonoDomain * root_domain = mono_get_root_domain ();
-                               if (root_domain != NULL) {
-                                       LOG_WRITER_THREAD ("data_writer_thread: attaching thread");
-                                       this_thread = mono_thread_attach (root_domain);
-                                       mono_thread_set_manage_callback (this_thread, thread_detach_callback);
-                                       thread_attached = TRUE;
-                               } else {
-                                       g_error ("Cannot get root domain\n");
-                               }
+               if (profiler->heap_shot_was_requested) {
+                       MonoDomain * root_domain = mono_get_root_domain ();
+                       
+                       if (root_domain != NULL) {
+                               MonoThread *this_thread;
+                               LOG_WRITER_THREAD ("data_writer_thread: attaching thread");
+                               this_thread = mono_thread_attach (root_domain);
+                       LOG_WRITER_THREAD ("data_writer_thread: starting requested collection");
+                       mono_gc_collect (mono_gc_max_generation ());
+                       LOG_WRITER_THREAD ("data_writer_thread: requested collection done");
+                               LOG_WRITER_THREAD ("data_writer_thread: detaching thread");
+                               mono_thread_detach (this_thread);
+                               this_thread = NULL;
+                               LOG_WRITER_THREAD ("data_writer_thread: collection sequence completed");
                        } else {
-                               /* Execution was too short, pretend we attached and detached. */
-                               thread_attached = TRUE;
-                               thread_detached = TRUE;
+                               LOG_WRITER_THREAD ("data_writer_thread: cannot get root domain, collection sequence skipped");
                        }
+                       
                }
                
                statistical_data = profiler->statistical_data_ready;
@@ -3348,7 +5751,7 @@ data_writer_thread (gpointer nothing) {
                        flush_all_mappings ();
                        LOG_WRITER_THREAD ("data_writer_thread: wrote mapping");
                        
-                       if ((statistical_data != NULL) && ! thread_detached) {
+                       if (statistical_data != NULL) {
                                LOG_WRITER_THREAD ("data_writer_thread: writing statistical data...");
                                profiler->statistical_data_ready = NULL;
                                write_statistical_data_block (statistical_data);
@@ -3362,22 +5765,18 @@ data_writer_thread (gpointer nothing) {
                        
                        UNLOCK_PROFILER ();
                        LOG_WRITER_THREAD ("data_writer_thread: wrote data and released lock");
-               }
-               
-               if (profiler->detach_writer_thread) {
-                       if (this_thread != NULL) {
-                               LOG_WRITER_THREAD ("data_writer_thread: detaching thread");
-                               mono_thread_detach (this_thread);
-                               this_thread = NULL;
-                               profiler->detach_writer_thread = FALSE;
-                               thread_detached = TRUE;
-                       } else {
-                               LOG_WRITER_THREAD ("data_writer_thread: warning: thread has already been detached");
-                       }
+               } else {
+                       LOG_WRITER_THREAD ("data_writer_thread: acquiring lock and flushing buffers");
+                       LOCK_PROFILER ();
+                       LOG_WRITER_THREAD ("data_writer_thread: lock acquired, flushing buffers");
+                       flush_everything ();
+                       UNLOCK_PROFILER ();
+                       LOG_WRITER_THREAD ("data_writer_thread: flushed buffers and released lock");
                }
                
                if (profiler->terminate_writer_thread) {
                LOG_WRITER_THREAD ("data_writer_thread: exiting thread");
+                       CLEANUP_WRITER_THREAD ();
                        EXIT_THREAD ();
                }
        }
@@ -3393,47 +5792,61 @@ mono_profiler_startup (const char *desc)
 {
        profiler = g_new0 (MonoProfiler, 1);
        
-       setup_user_options ((desc != NULL) ? desc : "");
+       setup_user_options ((desc != NULL) ? desc : DEFAULT_ARGUMENTS);
        
        INITIALIZE_PROFILER_MUTEX ();
        MONO_PROFILER_GET_CURRENT_TIME (profiler->start_time);
        MONO_PROFILER_GET_CURRENT_COUNTER (profiler->start_counter);
+       profiler->last_header_counter = 0;
        
        profiler->methods = method_id_mapping_new ();
        profiler->classes = class_id_mapping_new ();
+       profiler->loaded_element_next_free_id = 1;
        profiler->loaded_assemblies = g_hash_table_new_full (g_direct_hash, NULL, NULL, loaded_element_destroy);
        profiler->loaded_modules = g_hash_table_new_full (g_direct_hash, NULL, NULL, loaded_element_destroy);
        profiler->loaded_appdomains = g_hash_table_new_full (g_direct_hash, NULL, NULL, loaded_element_destroy);
        
-       profiler->statistical_data = profiler_statistical_data_new (profiler->statistical_buffer_size);
-       profiler->statistical_data_second_buffer = profiler_statistical_data_new (profiler->statistical_buffer_size);
-       unmanaged_functions_init (&(profiler->unmanaged_functions));
+       profiler->statistical_data = profiler_statistical_data_new (profiler);
+       profiler->statistical_data_second_buffer = profiler_statistical_data_new (profiler);
        
        profiler->write_buffers = g_malloc (sizeof (ProfilerFileWriteBuffer) + PROFILER_FILE_WRITE_BUFFER_SIZE);
        profiler->write_buffers->next = NULL;
        profiler->current_write_buffer = profiler->write_buffers;
        profiler->current_write_position = 0;
        profiler->full_write_buffers = 0;
+       profiler_code_chunks_initialize (& (profiler->code_chunks));
        
-       profiler->executable_regions = profiler_executable_memory_regions_new (1);
+       profiler->executable_regions = profiler_executable_memory_regions_new (1, 1);
+       
+       profiler->executable_files.table = g_hash_table_new (g_str_hash, g_str_equal); 
+       profiler->executable_files.new_files = NULL; 
        
        profiler->heap_shot_write_jobs = NULL;
-       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot) {
+       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot || profiler->action_flags.collection_summary) {
                profiler_heap_buffers_setup (&(profiler->heap));
        } else {
                profiler_heap_buffers_clear (&(profiler->heap));
        }
+       profiler->garbage_collection_counter = 0;
        
        WRITER_EVENT_INIT ();
        LOG_WRITER_THREAD ("mono_profiler_startup: creating writer thread");
        CREATE_WRITER_THREAD (data_writer_thread);
        LOG_WRITER_THREAD ("mono_profiler_startup: created writer thread");
+       if ((profiler->command_port >= 1024) && (profiler->command_port <= 65535)) {
+               LOG_USER_THREAD ("mono_profiler_startup: creating user thread");
+               CREATE_USER_THREAD (user_thread);
+               LOG_USER_THREAD ("mono_profiler_startup: created user thread");
+       } else {
+               LOG_USER_THREAD ("mono_profiler_startup: skipping user thread creation");
+       }
 
        ALLOCATE_PROFILER_THREAD_DATA ();
        
        OPEN_FILE ();
        
        write_intro_block ();
+       write_directives_block (TRUE);
        
        mono_profiler_install (profiler, profiler_shutdown);
        
@@ -3450,11 +5863,19 @@ mono_profiler_startup (const char *desc)
        mono_profiler_install_method_free (method_free);
        mono_profiler_install_thread (thread_start, thread_end);
        mono_profiler_install_allocation (object_allocated);
+       mono_profiler_install_monitor (monitor_event);
        mono_profiler_install_statistical (statistical_hit);
+       mono_profiler_install_statistical_call_chain (statistical_call_chain, profiler->statistical_call_chain_depth, profiler->statistical_call_chain_strategy);
        mono_profiler_install_gc (gc_event, gc_resize);
+       mono_profiler_install_runtime_initialized (runtime_initialized);
 #if (HAS_OPROFILE)
        mono_profiler_install_jit_end (method_jit_result);
 #endif
+       if (profiler->flags | MONO_PROFILE_STATISTICAL) {
+               mono_profiler_install_code_chunk_new (profiler_code_chunk_new_callback);
+               mono_profiler_install_code_chunk_destroy (profiler_code_chunk_destroy_callback);
+               mono_profiler_install_code_buffer_new (profiler_code_buffer_new_callback);
+       }
        
        mono_profiler_set_events (profiler->flags);
 }